[med-svn] [mia] 01/04: New upstream version 2.4.5

Tue Oct 17 23:47:56 UTC 2017

This is an automated email from the git hooks/post-receive script.

gewo pushed a commit to branch master
in repository mia.

commit b49c14e0b611e68a0470a7dd0d8b0c43b1177a6b
Author: Gert Wollny <gw.fossdev at gmail.com>
Date:   Mon Oct 16 12:12:27 2017 +0200

    New upstream version 2.4.5
---
 .travis.yml                   |  65 ++-
 CMakeLists.txt                |   9 +-
 ChangeLog                     |   8 +
 cmake/macros.cmake            |  13 +-
 cmake/pluginmacro.cmake       |  60 +--
 mia/2d/CMakeLists.txt         |   2 +
 mia/2d/maskedcost/lncc.cc     | 327 +++++++--------
 mia/2d/maskedcost/lncc.hh     |   3 +-
 mia/2d/maskedcost/ncc.cc      | 213 +++++-----
 mia/2d/maskedcost/ncc.hh      |   4 +-
 mia/3d/CMakeLists.txt         |   2 +
 mia/3d/maskedcost/lncc.cc     |  23 +-
 mia/3d/maskedcost/lncc.hh     |   4 +-
 mia/3d/maskedcost/ncc.cc      |  38 +-
 mia/3d/maskedcost/ncc.hh      |   2 +
 mia/core/CMakeLists.txt       |  20 +-
 mia/mesh/CMakeLists.txt       |   7 +-
 src/3dsegment-local-cmeans.cc | 923 +++++++++++++++++++++---------------------
 18 files changed, 903 insertions(+), 820 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 606d06c..acf0bea 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,4 @@
-sudo: required
+sudo: false
 dist: trusty
 language: generic
 
@@ -11,29 +11,58 @@ env:
   - CTEST_PARALLEL_LEVEL=2 
   - secure: GJh7knKOeyjofQFswCHI4H1VeRBYDmGrZjBiW/yNoGqLvvAZi32HF2d5gHj2vrsGBqto/5mQrydo1rH6Q85VuZ7ZNZmcmiMVA5htv61WKfxzc50mSrkSfvDTb0MOmR8QcpWpK4YAfCYd4r8drPfpbRdHUvTfEk9tS8I/1LGLuos3cLRU+OojEuvndUC8rGHtCRtDswFmngbc96gkAuHfAd7LiaJ5EY+pqOmHY4xMgi5Ev4FnGdkUczUcpisxee/48QIite9lw5E0pemPepKuhrwUJRvnzb9Tgsp94R42mzymvnKyLsttdUQtUItTQshTqEtWSFf/6GO9GPyUXAgR+BDsQ9ODlxOC6+Cip0HqxCvgDfIZarVaj5/2/ZwiyjocEinhSzFiIJF9SShpqSpP9MUgS/ZL/9y6GMiDXx19zkxBJ9kXP+7vH/G/7qFZ6f1aVuDemgKit5styDQDnLyd/FlBrz3oWftj [...]
 
-install:
-- sudo add-apt-repository ppa:gert-die/trusty-mia -y 
-- sudo apt-get update -qq
-- sudo apt-get install -o APT::Get::Install-Recommends=false -o APT::Get::Install-Suggests=false -y libmaxflow-dev libdcmtk2-dev libeigen3-dev libfftw3-dev libgsl0-dev libgts-dev libhdf5-dev libitpp-dev libnifti-dev libnlopt-dev libopenexr-dev libpng-dev libtbb-dev libtiff-dev libvtk6-dev libvistaio-dev libxml2-dev xsltproc docbook-xsl doxygen  graphviz libblas-dev libboost-filesystem-dev libboost-regex-dev libboost-serialization-dev libboost-system-dev libboost-test-dev python-lxml
-- pip install --user cpp-coveralls
+addons:
+  apt:
+    sources:
+    - sourceline: 'ppa:gert-die/trusty-mia'
+    packages:
+    - libmaxflow-dev
+    - libdcmtk2-dev
+    - libeigen3-dev
+    - libfftw3-dev
+    - libgsl0-dev
+    - libgts-dev
+    - libhdf5-dev
+    - libitpp-dev
+    - libnifti-dev
+    - libnlopt-dev
+    - libopenexr-dev
+    - libpng-dev
+    - libtbb-dev
+    - libtiff-dev
+    - libvtk6-dev
+    - libvistaio-dev
+    - libxml2-dev
+    - xsltproc
+    - docbook-xsl
+    - doxygen
+    - graphviz
+    - libblas-dev
+    - libboost-filesystem-dev
+    - libboost-regex-dev
+    - libboost-serialization-dev
+    - libboost-system-dev
+    - libboost-test-dev
+    - python-lxml
+  coverity_scan:
+    project:
+      name: gerddie/mia
+      version: 2.2.7+
+      description: Medical imaga analysis library
+    notification_email: gw.fossdev at gmail.com
+    build_command: make -j3
+    branch_pattern: coverity_scan
+
 before_script:
 - mkdir build
 - cd build
 - echo COVERAGE=$COVERAGE EXTRA=$EXTRA
 - cmake .. -DCMAKE_BUILD_TYPE=${BUILD_TYPE} -DALWAYS_CREATE_DOC=$EXTRA -DSTRICT_DEPENDECIES=ON -DMIA_CREATE_MANPAGES=$EXTRA -DMIA_CREATE_NIPYPE_INTERFACES=$EXTRA -DENABLE_COVERAGE=$COVERAGE -DDISABLE_PROGRAMS=$COVERAGE -DUSE_MATHJAX=YES  -DMIA_USE_BOOST_REGEX=YES
 script:
-- make -j2
+- cat /proc/cpuinfo
+- free -h
+- make -j3
 after_success:
 - make test
 - cd .. 
-- if test "x$COVERAGE" = "xON"; then coveralls --exclude CMakeFiles --exclude src --gcov-options '\-lp' -b $(pwd)/build 2>&1 >/dev/null ; fi 
-
-addons:
-  coverity_scan:
-    project:
-      name: gerddie/mia
-      version: 2.2.7+
-      description: Medical imaga analysis library
-    notification_email: gw.fossdev at gmail.com
-    build_command: make -j3 
-    branch_pattern: coverity_scan
+- if test "x$COVERAGE" = "xON"; then pip install --user cpp-coveralls; coveralls --exclude CMakeFiles --exclude src --gcov-options '\-lp' -b $(pwd)/build 2>&1 >/dev/null ; fi 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82d1641..941bff1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,7 +53,7 @@ SET(VENDOR "Gert Wollny")
 SET(PACKAGE_NAME "mia")
 SET(MAJOR_VERSION 2)
 SET(MINOR_VERSION 4)
-SET(MICRO_VERSION 4)
+SET(MICRO_VERSION 5)
 SET(INTERFACE_AGE 0)
 SET(BINARY_AGE    0)
 
@@ -82,7 +82,7 @@ OPTION(ENABLE_DEBUG_MESSAGES "Enable debug and trace outputs" TRUE)
 OPTION(ENABLE_COVERAGE "Enable code coverage tests" FALSE)
 OPTION(DISABLE_PROGRAMS "Don't build the programs nor documentation (only for testing purposes)" FALSE)
 OPTION(MIA_CREATE_USERDOC "Enable creation of html user documentation" TRUE)
-
+OPTION(MIA_ENABLE_TESTING "Enable compiling and running the test suite" TRUE)
 
 
 
@@ -438,8 +438,9 @@ ADD_DEFINITIONS(-DHAVE_CONFIG_H)
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 
-
-ENABLE_TESTING()
+IF(MIA_ENABLE_TESTING)
+  ENABLE_TESTING()
+ENDIF()
 
 SET(PLUGIN_TEST_ROOT ${CMAKE_CURRENT_BINARY_DIR}/plugintest)
 
diff --git a/ChangeLog b/ChangeLog
index 4ea21bb..308781a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2.4.5
+
+   * Fix that mia-3dsegment-local-cmeans would fail when the number of
+     slices N was relatwed to the grid size G according to k*G+1
+     (k integer)
+   * Reduce the amount of template instanciations created by the *ncc cost
+     functions. This should reduce the memory requirements during build time.
+
 2.4.4
    Issues fixed: 
 
diff --git a/cmake/macros.cmake b/cmake/macros.cmake
index dc13a5b..8c5352f 100644
--- a/cmake/macros.cmake
+++ b/cmake/macros.cmake
@@ -166,15 +166,18 @@ MACRO(NEW_TEST_BASE name libs)
     TARGET_LINK_LIBRARIES(${EXENAME} ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY})
   ENDIF (NOT WIN32)
   ADD_DEPENDENCIES(${EXENAME} plugin_test_links)
-
 ENDMACRO(NEW_TEST_BASE)
 
 MACRO(NEW_TEST name libs)
-  NEW_TEST_BASE(${name} "${libs}")
-  ADD_TEST(${name} ${EXENAME})
+  IF(MIA_ENABLE_TESTING)
+    NEW_TEST_BASE(${name} "${libs}")
+    ADD_TEST(${name} ${EXENAME})
+  ENDIF()
 ENDMACRO(NEW_TEST)
 
 MACRO(NEW_TEST_WITH_PARAM name libs param)
-  NEW_TEST_BASE(${name} "${libs}")
-  ADD_TEST(${name} ${EXENAME} ${param})
+  IF(MIA_ENABLE_TESTING)
+    NEW_TEST_BASE(${name} "${libs}")
+    ADD_TEST(${name} ${EXENAME} ${param})
+  ENDIF()
 ENDMACRO(NEW_TEST_WITH_PARAM)
diff --git a/cmake/pluginmacro.cmake b/cmake/pluginmacro.cmake
index 2c89421..07df8f9 100644
--- a/cmake/pluginmacro.cmake
+++ b/cmake/pluginmacro.cmake
@@ -79,18 +79,20 @@ ENDMACRO(CREATE_PLUGIN_MODULE plugname)
 
 
 MACRO(CREATE_PLUGIN_TEST plugname file libs)
-  PARSE_ARGUMENTS(PLUGIN "TESTLIBS" "" ${ARGN})
-  add_executable(test-${plugname} ${file} $<TARGET_OBJECTS:${plugname}-common>)
-  IF(NOT WIN32)
-    set_target_properties(test-${plugname} PROPERTIES 
-      COMPILE_FLAGS -DVSTREAM_DOMAIN='"${plugname}"' 
-      COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-  ELSE(NOT WIN32)
-    set_target_properties(test-${plugname} PROPERTIES
-      COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-  ENDIF(NOT WIN32)
-  target_link_libraries(test-${plugname} ${libs} ${BOOST_UNITTEST} "${PLUGIN_TESTLIBS}")
-  add_test(${plugname} test-${plugname})
+  IF(MIA_ENABLE_TESTING)
+    PARSE_ARGUMENTS(PLUGIN "TESTLIBS" "" ${ARGN})
+    add_executable(test-${plugname} ${file} $<TARGET_OBJECTS:${plugname}-common>)
+    IF(NOT WIN32)
+      set_target_properties(test-${plugname} PROPERTIES
+        COMPILE_FLAGS -DVSTREAM_DOMAIN='"${plugname}"'
+        COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+    ELSE(NOT WIN32)
+      set_target_properties(test-${plugname} PROPERTIES
+        COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+    ENDIF(NOT WIN32)
+    target_link_libraries(test-${plugname} ${libs} ${BOOST_UNITTEST} "${PLUGIN_TESTLIBS}")
+    add_test(${plugname} test-${plugname})
+  ENDIF()
 ENDMACRO(CREATE_PLUGIN_TEST plugname file)
 
 MACRO(PLUGIN_WITH_TEST plugname file libs plugindir)
@@ -121,7 +123,7 @@ MACRO(TEST_PREFIX type data)
   string(COMPARE EQUAL  "x${${type}_${data}_prefix}" "x" UNDEFINED_PREFIX)
   IF(UNDEFINED_PREFIX)
     MESSAGE(FATAL_ERROR "PLUGIN_WITH_TEST_AND_PREFIX2: the prefix for ${type}-${data} is not defined")
-  ENDIF(UNDEFINED_PREFIX)
+ENDIF(UNDEFINED_PREFIX)
 ENDMACRO(TEST_PREFIX type data)
 
 MACRO(PLUGIN_WITH_PREFIX2 type data plugname libs)
@@ -169,6 +171,7 @@ ENDMACRO(PLUGINGROUP_WITH_TEST_AND_PREFIX2)
 
 
 MACRO(PLUGIN_WITH_TEST_MULTISOURCE name type data src libs) 
+
   PARSE_ARGUMENTS(PLUGIN "TESTLIBS" "" ${ARGN})
   TEST_PREFIX(${type} ${data})
   SET(install_path ${${type}_${data}_dir})
@@ -196,21 +199,22 @@ MACRO(PLUGIN_WITH_TEST_MULTISOURCE name type data src libs)
   ADD_DEPENDENCIES(plugin_test_links ${plugname})
   
   # create tests
-  PARSE_ARGUMENTS(PLUGIN "TESTLIBS" "" ${ARGN})
-
-  add_executable(test-${plugname} $<TARGET_OBJECTS:${plugname}-common> test_${name}.cc)
-  IF(NOT WIN32)
-    set_target_properties(test-${plugname} PROPERTIES 
-      COMPILE_FLAGS -DVSTREAM_DOMAIN='"${plugname}"' 
-      COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-  ELSE(NOT WIN32)
-    set_target_properties(test-${plugname} PROPERTIES
-      COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
-  ENDIF(NOT WIN32)
-  target_link_libraries(test-${plugname} ${libs})
-  target_link_libraries(test-${plugname} ${BOOST_UNITTEST} "${PLUGIN_TESTLIBS}")
-  add_test(${plugname} test-${plugname})
-  
+  IF(MIA_ENABLE_TESTING)
+    PARSE_ARGUMENTS(PLUGIN "TESTLIBS" "" ${ARGN})
+
+    add_executable(test-${plugname} $<TARGET_OBJECTS:${plugname}-common> test_${name}.cc)
+    IF(NOT WIN32)
+      set_target_properties(test-${plugname} PROPERTIES
+        COMPILE_FLAGS -DVSTREAM_DOMAIN='"${plugname}"'
+        COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+    ELSE(NOT WIN32)
+      set_target_properties(test-${plugname} PROPERTIES
+        COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
+    ENDIF(NOT WIN32)
+    target_link_libraries(test-${plugname} ${libs})
+    target_link_libraries(test-${plugname} ${BOOST_UNITTEST} "${PLUGIN_TESTLIBS}")
+    add_test(${plugname} test-${plugname})
+  endif()
   INSTALL(TARGETS ${plugname} LIBRARY DESTINATION ${install_path})
 ENDMACRO(PLUGIN_WITH_TEST_MULTISOURCE) 
 
diff --git a/mia/2d/CMakeLists.txt b/mia/2d/CMakeLists.txt
index f1501ed..f8a9f01 100644
--- a/mia/2d/CMakeLists.txt
+++ b/mia/2d/CMakeLists.txt
@@ -250,11 +250,13 @@ ENDIF(ITPP_FOUND)
 
 set(testperflibs mia2dmyocardperf mia2dtest)
 
+IF(MIA_ENABLE_TESTING)
 TEST_2DMIA(groundtruthproblem mia2dmyocardperf)
 TEST_2DMIA(correlation_weight mia2dmyocardperf)
 TEST_2DMIA(segmentation mia2dmyocardperf)
 TEST_2DMIA(segframe "${testperflibs}")
 TEST_2DMIA(segpoint mia2dmyocardperf)
+ENDIF()
 
 
 #
diff --git a/mia/2d/maskedcost/lncc.cc b/mia/2d/maskedcost/lncc.cc
index d5db691..ec4531c 100644
--- a/mia/2d/maskedcost/lncc.cc
+++ b/mia/2d/maskedcost/lncc.cc
@@ -1,6 +1,6 @@
 /* -*- mia-c++  -*-
  *
- * This file is part of MIA - a toolbox for medical image analysis 
+ * This file is part of MIA - a toolbox for medical image analysis
  * Copyright (c) Leipzig, Madrid 1999-2017 Gert Wollny
  *
  * MIA is free software; you can redistribute it and/or modify
@@ -19,214 +19,223 @@
  */
 
 #include <mia/2d/maskedcost/lncc.hh>
-#include <mia/core/nccsum.hh> 
+#include <mia/core/nccsum.hh>
 #include <mia/core/threadedmsg.hh>
 #include <mia/core/parallel.hh>
 
 
 NS_BEGIN(NS)
 
-using namespace mia; 
-using std::vector; 
-using std::pair; 
-using std::make_pair; 
+using namespace mia;
+using std::vector;
+using std::pair;
+using std::make_pair;
 
 CLNCC2DImageCost::CLNCC2DImageCost(int hw):
 m_hwidth(hw)
 {
+   m_copy_to_double = produce_2dimage_filter("convert:repn=double,map=copy");
 }
 
-inline pair<C2DBounds, C2DBounds> prepare_range(const C2DBounds& size, int cx, int cy, int hw) 
+inline pair<C2DBounds, C2DBounds> prepare_range(const C2DBounds& size, int cx, int cy, int hw)
 {
-	int yb = cy - hw;
-	if (yb < 0) yb = 0; 
-	unsigned ye = cy + hw + 1; 
-	if (ye > size.y) ye = size.y; 
-	
-	int xb = cx - hw;
-	if (xb < 0) xb = 0; 
-	unsigned xe = cx + hw + 1; 
-	if (xe > size.x) xe = size.x; 
-	
-	return make_pair(C2DBounds(xb,yb), C2DBounds(xe,ye)); 
+   int yb = cy - hw;
+   if (yb < 0) yb = 0;
+   unsigned ye = cy + hw + 1;
+   if (ye > size.y) ye = size.y;
+
+   int xb = cx - hw;
+   if (xb < 0) xb = 0;
+   unsigned xe = cx + hw + 1;
+   if (xe > size.x) xe = size.x;
+
+   return make_pair(C2DBounds(xb,yb), C2DBounds(xe,ye));
 }
 
 
 
 class FEvalCost : public TFilter<float> {
-	int m_hw;
-	const C2DBitImage& m_mask; 
+   int m_hw;
+   const C2DBitImage& m_mask;
 public:
-	FEvalCost(int hw, const C2DBitImage& mask):
-		m_hw(hw), 
-		m_mask(mask)
-		{}
-	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
-		auto evaluate_local_cost = [this, &mov, &ref](const C1DParallelRange& range, const pair<float, int>& result) -> pair<float, int> {
-			CThreadMsgStream msks; 
-			float lresult = 0.0; 
-			int count = 0; 
-			const int max_length = 2 * m_hw +1; 
-			vector<float> a_buffer( max_length * max_length * max_length); 
-			vector<float> b_buffer( max_length * max_length * max_length); 
-			
-			for (auto y = range.begin(); y != range.end(); ++y) {
-				auto imask  = m_mask.begin_at(0,y);
-				for (size_t x = 0; x < mov.get_size().x; ++x, ++imask) {
-					if (!*imask) 
-						continue; 
-					
-					auto c_block = prepare_range(mov.get_size(), x, y, m_hw); 
-
-					NCCSums sum; 
-					for (unsigned iy = c_block.first.y; iy < c_block.second.y; ++iy) {
-						auto ia = mov.begin_at(0,iy); 
-						auto ib = ref.begin_at(0, iy); 
-						auto im = m_mask.begin_at(0, iy); 
-						for (unsigned ix = c_block.first.x; ix < c_block.second.x; ++ix) {
-							
-							// make a local copy 
-							if (im[ix]) {
-								sum.add(ia[ix], ib[ix]);
-							}
-						}
-					}
-					
-					if (sum.has_samples()) {
-						lresult += sum.value(); 
-						++count; 
-					}
-					
-					
-				}
-			}
-			return make_pair(result.first + lresult, result.second + count); 
-		};
-		
-		pair<float,int> init{0, 0}; 
-		auto r = preduce(C1DParallelRange(0, mov.get_size().y, 1), init, evaluate_local_cost, 
-				 [](const pair<float,int>& x, const pair<float,int>& y){
-					 return make_pair(x.first + y.first, x.second + y.second);
-				 });	
-		cvdebug() << "result={" << r.first << " /  " <<  r.second << "\n"; 
-		return r.second > 0 ? r.first / r.second : 0.0; 
-	}
-}; 
+   FEvalCost(int hw, const C2DBitImage& mask):
+      m_hw(hw),
+      m_mask(mask)
+      {}
+
+   float operator () ( const C2DDImage& mov, const C2DDImage& ref) const {
+      auto evaluate_local_cost = [this, &mov, &ref](const C1DParallelRange& range, const pair<float, int>& result) -> pair<float, int> {
+         CThreadMsgStream msks;
+         float lresult = 0.0;
+         int count = 0;
+         const int max_length = 2 * m_hw +1;
+         vector<float> a_buffer( max_length * max_length * max_length);
+         vector<float> b_buffer( max_length * max_length * max_length);
+
+         for (auto y = range.begin(); y != range.end(); ++y) {
+            auto imask  = m_mask.begin_at(0,y);
+            for (size_t x = 0; x < mov.get_size().x; ++x, ++imask) {
+               if (!*imask)
+                  continue;
+
+               auto c_block = prepare_range(mov.get_size(), x, y, m_hw);
+
+               NCCSums sum;
+               for (unsigned iy = c_block.first.y; iy < c_block.second.y; ++iy) {
+                  auto ia = mov.begin_at(0,iy);
+                  auto ib = ref.begin_at(0, iy);
+                  auto im = m_mask.begin_at(0, iy);
+                  for (unsigned ix = c_block.first.x; ix < c_block.second.x; ++ix) {
+
+                     // make a local copy
+                     if (im[ix]) {
+                        sum.add(ia[ix], ib[ix]);
+                     }
+                  }
+               }
+
+               if (sum.has_samples()) {
+                  lresult += sum.value();
+                  ++count;
+               }
+
+
+            }
+         }
+         return make_pair(result.first + lresult, result.second + count);
+      };
+
+      pair<float,int> init{0, 0};
+      auto r = preduce(C1DParallelRange(0, mov.get_size().y, 1), init, evaluate_local_cost,
+             [](const pair<float,int>& x, const pair<float,int>& y){
+                return make_pair(x.first + y.first, x.second + y.second);
+             });
+      cvdebug() << "result={" << r.first << " /  " <<  r.second << "\n";
+      return r.second > 0 ? r.first / r.second : 0.0;
+   }
+};
 
 
 double CLNCC2DImageCost::do_value(const Data& a, const Data& b, const Mask& m) const
 {
-	FEvalCost ecost(m_hwidth, m); 
-	return mia::filter(ecost, a, b); 
+   auto a_double_ptr = m_copy_to_double->filter(a);
+   auto b_double_ptr = m_copy_to_double->filter(b);
+   const auto& mov = static_cast<const C2DDImage&>(*a_double_ptr);
+   const auto& ref = static_cast<const C2DDImage&>(*b_double_ptr);
+
+   FEvalCost ecost(m_hwidth, m);
+   return ecost(mov, ref);
 }
 
 
 class FEvalCostForce : public TFilter<float> {
-	int m_hw;
-	const C2DBitImage& m_mask; 
-	C2DFVectorfield& m_force; 
-public: 
-	FEvalCostForce(int hw, const C2DBitImage& mask, C2DFVectorfield& force):
-		m_hw(hw), 
-		m_mask(mask), 
-		m_force(force)
-		{}
-	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
-		auto ag = get_gradient(mov); 
-		auto evaluate_local_cost_force = [this, &mov, &ref, &ag](const C1DParallelRange& range, 
-									 const pair<float, int>& result) -> pair<float, int> {
-			
-			CThreadMsgStream msks; 		
-			float lresult = 0.0; 
-			int count = 0; 
-			const int max_length = 2 * m_hw + 1;
-			vector<float> a_buffer( max_length * max_length * max_length); 
-			vector<float> b_buffer( max_length * max_length * max_length); 
-
-			for (auto y = range.begin(); y != range.end(); ++y) {
-                        
-				auto iforce = m_force.begin_at(0,y);
-				auto imask = m_mask.begin_at(0,y);
-				auto ig = ag.begin_at(0,y);
-				auto imov = mov.begin_at(0,y);
-				auto iref = ref.begin_at(0,y);
-                        
-				for (size_t x = 0; x < mov.get_size().x; ++x, ++iforce, ++imask, ++ig, ++iref, ++imov) {
-					if (!*imask) 
-						continue; 
-                                        
-					auto c_block = prepare_range(mov.get_size(), x, y, m_hw); 
-					
-					
-					NCCSums sum; 
-					for (unsigned iy = c_block.first.y; iy < c_block.second.y; ++iy) {
-						auto ia = mov.begin_at(0,iy); 
-						auto ib = ref.begin_at(0, iy); 
-						auto im = m_mask.begin_at(0, iy); 
-						for (unsigned ix = c_block.first.x; ix < c_block.second.x; ++ix) {
-							
-							// make a local copy 
-							if (im[ix]) {
-								sum.add(ia[ix], ib[ix]);
-							}
-						}
-					}
-					
-					if (sum.has_samples()) {
-						auto res = sum.get_grad_helper(); 
-						lresult += res.first;
-						*iforce = res.second.get_gradient_scale(*imov, *iref) * *ig; 
-						++count; 
-					}
-					
-				}
-			}
-			return make_pair(result.first + lresult, result.second + count); 
-		};
-		pair<float,int> init{0, 0}; 		
-		auto r = preduce(C1DParallelRange(0, mov.get_size().y, 1), init, evaluate_local_cost_force, 
-				 [](const pair<float,int>& x, const pair<float,int>& y){
-					 return make_pair(x.first + y.first, x.second + y.second);
-				 });
-		
-		return r.second > 0 ? r.first / r.second : 0.0; 
-	}
-	
+   int m_hw;
+   const C2DBitImage& m_mask;
+   C2DFVectorfield& m_force;
+public:
+   FEvalCostForce(int hw, const C2DBitImage& mask, C2DFVectorfield& force):
+      m_hw(hw),
+      m_mask(mask),
+      m_force(force)
+      {}
+
+   float operator () ( const C2DDImage& mov, const C2DDImage& ref) const {
+      auto ag = get_gradient(mov);
+      auto evaluate_local_cost_force = [this, &mov, &ref, &ag](const C1DParallelRange& range,
+                            const pair<float, int>& result) -> pair<float, int> {
+
+         CThreadMsgStream msks;
+         float lresult = 0.0;
+         int count = 0;
+         const int max_length = 2 * m_hw + 1;
+         vector<float> a_buffer( max_length * max_length * max_length);
+         vector<float> b_buffer( max_length * max_length * max_length);
+
+         for (auto y = range.begin(); y != range.end(); ++y) {
+
+            auto iforce = m_force.begin_at(0,y);
+            auto imask = m_mask.begin_at(0,y);
+            auto ig = ag.begin_at(0,y);
+            auto imov = mov.begin_at(0,y);
+            auto iref = ref.begin_at(0,y);
+
+            for (size_t x = 0; x < mov.get_size().x; ++x, ++iforce, ++imask, ++ig, ++iref, ++imov) {
+               if (!*imask)
+                  continue;
+
+               auto c_block = prepare_range(mov.get_size(), x, y, m_hw);
+
+
+               NCCSums sum;
+               for (unsigned iy = c_block.first.y; iy < c_block.second.y; ++iy) {
+                  auto ia = mov.begin_at(0,iy);
+                  auto ib = ref.begin_at(0, iy);
+                  auto im = m_mask.begin_at(0, iy);
+                  for (unsigned ix = c_block.first.x; ix < c_block.second.x; ++ix) {
+
+                     // make a local copy
+                     if (im[ix]) {
+                        sum.add(ia[ix], ib[ix]);
+                     }
+                  }
+               }
+
+               if (sum.has_samples()) {
+                  auto res = sum.get_grad_helper();
+                  lresult += res.first;
+                  *iforce = res.second.get_gradient_scale(*imov, *iref) * *ig;
+                  ++count;
+               }
+
+            }
+         }
+         return make_pair(result.first + lresult, result.second + count);
+      };
+      pair<float,int> init{0, 0};
+      auto r = preduce(C1DParallelRange(0, mov.get_size().y, 1), init, evaluate_local_cost_force,
+             [](const pair<float,int>& x, const pair<float,int>& y){
+                return make_pair(x.first + y.first, x.second + y.second);
+             });
+
+      return r.second > 0 ? r.first / r.second : 0.0;
+   }
+
 };
 
 double CLNCC2DImageCost::do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const
 {
-	FEvalCostForce ecostforce(m_hwidth, m, force); 
-	return mia::filter(ecostforce, a, b); 
+   auto a_double_ptr = m_copy_to_double->filter(a);
+   auto b_double_ptr = m_copy_to_double->filter(b);
+   const auto& mov = static_cast<const C2DDImage&>(*a_double_ptr);
+   const auto& ref = static_cast<const C2DDImage&>(*b_double_ptr);
+
+   FEvalCostForce ecostforce(m_hwidth, m, force);
+   return ecostforce(mov, ref);
 }
 
 
 CLNCC2DImageCostPlugin::CLNCC2DImageCostPlugin():
-C2DMaskedImageCostPlugin("lncc"), 
+C2DMaskedImageCostPlugin("lncc"),
      m_hw(5)
 {
-	this->add_parameter("w", make_ci_param(m_hw, 1, 256, false, 
-					       "half width of the window used for evaluating the localized cross correlation")); 
+   this->add_parameter("w", make_ci_param(m_hw, 1, 256, false,
+                      "half width of the window used for evaluating the localized cross correlation"));
 }
 
 C2DMaskedImageCost *CLNCC2DImageCostPlugin::do_create() const
 {
-	return new CLNCC2DImageCost(m_hw);
+   return new CLNCC2DImageCost(m_hw);
 }
 
 const std::string CLNCC2DImageCostPlugin::do_get_descr() const
 {
-	return "local normalized cross correlation with masking support."; 
+   return "local normalized cross correlation with masking support.";
 }
 
 
 extern "C" EXPORT CPluginBase *get_plugin_interface()
 {
-	return new CLNCC2DImageCostPlugin();
+   return new CLNCC2DImageCostPlugin();
 }
 
 NS_END
diff --git a/mia/2d/maskedcost/lncc.hh b/mia/2d/maskedcost/lncc.hh
index 23cc91a..c467fc1 100644
--- a/mia/2d/maskedcost/lncc.hh
+++ b/mia/2d/maskedcost/lncc.hh
@@ -22,6 +22,7 @@
 #define mia_2d_maskedcost_lncc_hh
 
 #include <mia/2d/maskedcost.hh>
+#include <mia/2d/filter.hh>
 
 #define NS mia_2d_maskedlncc
 
@@ -37,7 +38,7 @@ public:
 private: 
 	virtual double do_value(const Data& a, const Data& b, const Mask& m) const; 
 	virtual double do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const; 
-
+  	mia::P2DFilter m_copy_to_double; 
         int m_hwidth; 
 };
 
diff --git a/mia/2d/maskedcost/ncc.cc b/mia/2d/maskedcost/ncc.cc
index d527794..e53cfde 100644
--- a/mia/2d/maskedcost/ncc.cc
+++ b/mia/2d/maskedcost/ncc.cc
@@ -1,6 +1,6 @@
 /* -*- mia-c++  -*-
  *
- * This file is part of MIA - a toolbox for medical image analysis 
+ * This file is part of MIA - a toolbox for medical image analysis
  * Copyright (c) Leipzig, Madrid 1999-2017 Gert Wollny
  *
  * MIA is free software; you can redistribute it and/or modify
@@ -21,142 +21,147 @@
 #include <mia/2d/maskedcost/ncc.hh>
 
 #include <mia/core/threadedmsg.hh>
-#include <mia/core/nccsum.hh> 
+#include <mia/core/nccsum.hh>
 #include <mia/core/parallel.hh>
 
 
 NS_BEGIN(NS)
 
-using namespace mia; 
+using namespace mia;
 
 
 CNCC2DImageCost::CNCC2DImageCost()
 {
+   m_copy_to_double = produce_2dimage_filter("convert:repn=double,map=copy");
 }
 
-template <typename T, typename S> 
 struct FEvaluateNCCSum {
-	FEvaluateNCCSum(const C2DBitImage& mask, const T& mov, const S& ref); 
-	NCCSums operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const; 
-private: 
-	C2DBitImage m_mask; 
-	T m_mov; 
-	S m_ref; 
+   FEvaluateNCCSum(const C2DBitImage& mask, const C2DDImage& mov, const C2DDImage& ref);
+   NCCSums operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const;
+private:
+   const C2DBitImage& m_mask;
+   const C2DDImage& m_mov;
+   const C2DDImage& m_ref;
 };
 
 
-template <typename T, typename S> 
-FEvaluateNCCSum<T,S>::FEvaluateNCCSum(const C2DBitImage& mask, const T& mov, const S& ref):
-	m_mask(mask),  m_mov(mov), m_ref(ref) 
+FEvaluateNCCSum::FEvaluateNCCSum(const C2DBitImage& mask, const C2DDImage& mov, const C2DDImage& ref):
+   m_mask(mask),  m_mov(mov), m_ref(ref)
 {
-	
+
 }
 
-template <typename T, typename S> 
-NCCSums FEvaluateNCCSum<T,S>::operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const
+NCCSums FEvaluateNCCSum::operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const
 {
-	CThreadMsgStream msks; 
-	
-	NCCSums sum;
-	for (auto z = range.begin(); z != range.end(); ++z) {
-		auto imask  = m_mask.begin_at(0,z);
-		auto ia = m_mov.begin_at(0, z); 
-		auto ib = m_ref.begin_at(0, z); 
-		auto eb = m_ref.begin_at(0, z + 1); 
-		
-		while (ib != eb) {
-			if (*imask) {
-				sum.add(*ia, *ib); 
-			}
-			++ia; ++ib; ++imask; 
-		}
-	}
-	return sum + sumacc; 
+   CThreadMsgStream msks;
+
+   NCCSums sum;
+   for (auto z = range.begin(); z != range.end(); ++z) {
+      auto imask  = m_mask.begin_at(0,z);
+      auto ia = m_mov.begin_at(0, z);
+      auto ib = m_ref.begin_at(0, z);
+      auto eb = m_ref.begin_at(0, z + 1);
+
+      while (ib != eb) {
+         if (*imask) {
+            sum.add(*ia, *ib);
+         }
+         ++ia; ++ib; ++imask;
+      }
+   }
+   return sum + sumacc;
 };
 
 
 class FEvalCost : public TFilter<float> {
-	const C2DBitImage& m_mask; 
+   const C2DBitImage& m_mask;
 public:
-	FEvalCost(const C2DBitImage& mask):
-		m_mask(mask)
-		{}
-	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
-
-		FEvaluateNCCSum<T,R> ev(m_mask, mov, ref); 
-		NCCSums sum; 
-		sum = preduce(C1DParallelRange(0, mov.get_size().y, 1), sum, ev, 
-			      [](const NCCSums& x, const NCCSums& y){
-				      return x + y;
-			      });
-		return sum.value(); 
-	}
-}; 
+   FEvalCost(const C2DBitImage& mask):
+      m_mask(mask)
+      {}
+
+        float operator () ( const C2DDImage& mov, const C2DDImage& ref) const {
+
+      FEvaluateNCCSum ev(m_mask, mov, ref);
+      NCCSums sum;
+      sum = preduce(C1DParallelRange(0, mov.get_size().y, 1), sum, ev,
+               [](const NCCSums& x, const NCCSums& y){
+                  return x + y;
+               });
+      return sum.value();
+   }
+};
 
 
 double CNCC2DImageCost::do_value(const Data& a, const Data& b, const Mask& m) const
 {
-	FEvalCost ecost(m); 
-	return mia::filter(ecost, a, b); 
+        auto a_double_ptr = m_copy_to_double->filter(a);
+   auto b_double_ptr = m_copy_to_double->filter(b);
+   const auto& mov = static_cast<const C2DDImage&>(*a_double_ptr);
+   const auto& ref = static_cast<const C2DDImage&>(*b_double_ptr);
+   FEvalCost ecost(m);
+   return ecost(mov, ref);
 }
 
 
 class FEvalCostForce : public TFilter<float> {
-	const C2DBitImage& m_mask; 
-	C2DFVectorfield& m_force; 
-public: 
-	FEvalCostForce(const C2DBitImage& mask, C2DFVectorfield& force):
-		m_mask(mask), 
-		m_force(force)
-		{}
-	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
-		CThreadMsgStream msks;
-		
-		NCCSums sum; 
-		FEvaluateNCCSum<T,R> ev(m_mask, mov, ref); 
-		sum = preduce(C1DParallelRange(0, mov.get_size().y, 1), sum, ev, 
-			      [](const NCCSums& x, const NCCSums& y){
-				      return x + y;
-			      });
-		
-		auto geval = sum.get_grad_helper(); 
-
-		auto grad = get_gradient(mov); 
-		auto grad_eval = [this, &mov, &ref, &grad, &geval](const C1DParallelRange& range) {
-			for (auto z = range.begin(); z != range.end(); ++z) {
-				auto ig = grad.begin_at(0,z); 
-				auto iforce = m_force.begin_at(0,z); 
-				auto im = m_mask.begin_at(0,z); 
-				auto ia = mov.begin_at(0,z); 
-				auto ib = ref.begin_at(0,z); 
-				auto eb = ref.begin_at(0,z+1); 
-				
-				while (ib != eb) {
-					if (*im) {
-						*iforce = geval.second.get_gradient_scale(*ia, *ib) * *ig; 
-					}
-					++ig; 
-					++iforce; 
-					++ia; ++ib; ++im; 
-				}
-			}; 
-		}; 
-		
-		pfor(C1DParallelRange(0, mov.get_size().y, 1), grad_eval); 
-
-		return geval.first; 
-	}
-	
+   const C2DBitImage& m_mask;
+   C2DFVectorfield& m_force;
+public:
+   FEvalCostForce(const C2DBitImage& mask, C2DFVectorfield& force):
+      m_mask(mask),
+      m_force(force)
+      {}
+
+   float operator () ( const C2DDImage& mov, const C2DDImage& ref) const {
+      CThreadMsgStream msks;
+
+      NCCSums sum;
+      FEvaluateNCCSum ev(m_mask, mov, ref);
+      sum = preduce(C1DParallelRange(0, mov.get_size().y, 1), sum, ev,
+               [](const NCCSums& x, const NCCSums& y){
+                  return x + y;
+               });
+
+      auto geval = sum.get_grad_helper();
+
+      auto grad = get_gradient(mov);
+      auto grad_eval = [this, &mov, &ref, &grad, &geval](const C1DParallelRange& range) {
+         for (auto z = range.begin(); z != range.end(); ++z) {
+            auto ig = grad.begin_at(0,z);
+            auto iforce = m_force.begin_at(0,z);
+            auto im = m_mask.begin_at(0,z);
+            auto ia = mov.begin_at(0,z);
+            auto ib = ref.begin_at(0,z);
+            auto eb = ref.begin_at(0,z+1);
+
+            while (ib != eb) {
+               if (*im) {
+                  *iforce = geval.second.get_gradient_scale(*ia, *ib) * *ig;
+               }
+               ++ig;
+               ++iforce;
+               ++ia; ++ib; ++im;
+            }
+         };
+      };
+
+      pfor(C1DParallelRange(0, mov.get_size().y, 1), grad_eval);
+
+      return geval.first;
+   }
+
 };
 
 double CNCC2DImageCost::do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const
 {
-	FEvalCostForce ecostforce(m, force); 
-	return mia::filter(ecostforce, a, b); 
+        auto a_double_ptr = m_copy_to_double->filter(a);
+   auto b_double_ptr = m_copy_to_double->filter(b);
+   const auto& mov = static_cast<const C2DDImage&>(*a_double_ptr);
+   const auto& ref = static_cast<const C2DDImage&>(*b_double_ptr);
+
+   FEvalCostForce ecostforce(m, force);
+   return ecostforce(mov, ref);
 }
 
 
@@ -167,17 +172,17 @@ C2DMaskedImageCostPlugin("ncc")
 
 C2DMaskedImageCost *CNCC2DImageCostPlugin::do_create() const
 {
-	return new CNCC2DImageCost();
+   return new CNCC2DImageCost();
 }
 
 const std::string CNCC2DImageCostPlugin::do_get_descr() const
 {
-	return "normalized cross correlation with masking support."; 
+   return "normalized cross correlation with masking support.";
 }
 
 extern "C" EXPORT CPluginBase *get_plugin_interface()
 {
-	return new CNCC2DImageCostPlugin();
+   return new CNCC2DImageCostPlugin();
 }
 
 NS_END
diff --git a/mia/2d/maskedcost/ncc.hh b/mia/2d/maskedcost/ncc.hh
index 91d36a2..e09af04 100644
--- a/mia/2d/maskedcost/ncc.hh
+++ b/mia/2d/maskedcost/ncc.hh
@@ -22,6 +22,7 @@
 #define mia_2d_maskedcost_ncc_hh
 
 #include <mia/2d/maskedcost.hh>
+#include <mia/2d/filter.hh>
 
 #define NS mia_2d_maskedncc
 
@@ -36,7 +37,8 @@ public:
 	CNCC2DImageCost();
 private: 
 	virtual double do_value(const Data& a, const Data& b, const Mask& m) const; 
-	virtual double do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const; 
+	virtual double do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const;
+    	mia::P2DFilter m_copy_to_double; 
 };
 
 class CNCC2DImageCostPlugin: public mia::C2DMaskedImageCostPlugin {
diff --git a/mia/3d/CMakeLists.txt b/mia/3d/CMakeLists.txt
index 672bb48..0c3cf7b 100644
--- a/mia/3d/CMakeLists.txt
+++ b/mia/3d/CMakeLists.txt
@@ -176,6 +176,7 @@ ENDIF(APPLE)
 #
 # Testing 
 #
+IF(MIA_ENABLE_TESTING)
 ADD_EXECUTABLE(test-3d ${MIA3DTEST_SRC})
 SET_TARGET_PROPERTIES(test-3d PROPERTIES COMPILE_FLAGS -DBOOST_TEST_DYN_LINK)
 TARGET_LINK_LIBRARIES(test-3d mia3dtest ${MIA3DLIBS} ${BOOST_UNITTEST})
@@ -218,6 +219,7 @@ TEST_3D(rot rot)
 TEST_3D(splinetransformpenalty splinetransformpenalty)
 
 TEST_3D(imageio imageio)
+ENDIF()
 #
 # installation 
 #
diff --git a/mia/3d/maskedcost/lncc.cc b/mia/3d/maskedcost/lncc.cc
index 90b3174..5bc9ee2 100644
--- a/mia/3d/maskedcost/lncc.cc
+++ b/mia/3d/maskedcost/lncc.cc
@@ -34,6 +34,7 @@ using std::make_pair;
 CLNCC3DImageCost::CLNCC3DImageCost(int hw):
 m_hwidth(hw)
 {
+        m_copy_to_double = produce_3dimage_filter("convert:repn=double,map=copy");  
 }
 
 inline pair<C3DBounds, C3DBounds> prepare_range(const C3DBounds& size, int cx, int cy, int cz, int hw) 
@@ -70,8 +71,7 @@ public:
 		m_mask(mask)
 		{}
 	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
+        float operator () ( const C3DDImage& mov, const C3DDImage& ref) const {
 		auto evaluate_local_cost = [this, &mov, &ref](const C1DParallelRange& range, const pair<float, int>& result) -> pair<float, int> {
 			CThreadMsgStream msks; 
 			float lresult = 0.0; 
@@ -130,8 +130,13 @@ public:
 
 double CLNCC3DImageCost::do_value(const Data& a, const Data& b, const Mask& m) const
 {
+	auto a_double_ptr = m_copy_to_double->filter(a);
+	auto b_double_ptr = m_copy_to_double->filter(b);
+	const C3DDImage& mov = static_cast<const C3DDImage&>(*a_double_ptr);
+	const C3DDImage& ref = static_cast<const C3DDImage&>(*b_double_ptr);
+
 	FEvalCost ecost(m_hwidth, m); 
-	return mia::filter(ecost, a, b); 
+	return ecost(mov, ref); 
 }
 
 
@@ -146,8 +151,7 @@ public:
 		m_force(force)
 		{}
 	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
+	float operator () ( const C3DDImage& mov, const C3DDImage& ref) const {
 		auto ag = get_gradient(mov); 
 		auto evaluate_local_cost_force = [this, &mov, &ref, &ag](const C1DParallelRange& range, 
 									 const pair<float, int>& result) -> pair<float, int> {
@@ -214,8 +218,13 @@ public:
 
 double CLNCC3DImageCost::do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const
 {
-	FEvalCostForce ecostforce(m_hwidth, m, force); 
-	return mia::filter(ecostforce, a, b); 
+	auto a_double_ptr = m_copy_to_double->filter(a);
+	auto b_double_ptr = m_copy_to_double->filter(b);
+	const C3DDImage& mov = static_cast<const C3DDImage&>(*a_double_ptr);
+	const C3DDImage& ref = static_cast<const C3DDImage&>(*b_double_ptr);
+
+        FEvalCostForce ecostforce(m_hwidth, m, force); 
+	return ecostforce(mov, ref); 
 }
 
 
diff --git a/mia/3d/maskedcost/lncc.hh b/mia/3d/maskedcost/lncc.hh
index 82c67cf..bb68f01 100644
--- a/mia/3d/maskedcost/lncc.hh
+++ b/mia/3d/maskedcost/lncc.hh
@@ -22,6 +22,7 @@
 #define mia_3d_maskedcost_lncc_hh
 
 #include <mia/3d/maskedcost.hh>
+#include <mia/3d/filter.hh>
 
 #define NS mia_3d_maskedlncc
 
@@ -37,7 +38,8 @@ public:
 private: 
 	virtual double do_value(const Data& a, const Data& b, const Mask& m) const; 
 	virtual double do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const; 
-        int m_hwidth; 
+        int m_hwidth;
+  	mia::P3DFilter m_copy_to_double; 
 };
 
 class CLNCC3DImageCostPlugin: public mia::C3DMaskedImageCostPlugin {
diff --git a/mia/3d/maskedcost/ncc.cc b/mia/3d/maskedcost/ncc.cc
index e34fd52..49e81f1 100644
--- a/mia/3d/maskedcost/ncc.cc
+++ b/mia/3d/maskedcost/ncc.cc
@@ -31,28 +31,26 @@ using namespace mia;
 
 CNCC3DImageCost::CNCC3DImageCost()
 {
+        m_copy_to_double = produce_3dimage_filter("convert:repn=double,map=copy"); 
 }
 
-template <typename T, typename S> 
 struct FEvaluateNCCSum {
-	FEvaluateNCCSum(const C3DBitImage& mask, const T& mov, const S& ref); 
+	FEvaluateNCCSum(const C3DBitImage& mask, const C3DDImage& mov, const C3DDImage& ref); 
 	NCCSums operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const; 
 private: 
 	C3DBitImage m_mask; 
-	T m_mov; 
-	S m_ref; 
+	const C3DDImage& m_mov; 
+	const C3DDImage& m_ref; 
 };
 
 
-template <typename T, typename S> 
-FEvaluateNCCSum<T,S>::FEvaluateNCCSum(const C3DBitImage& mask, const T& mov, const S& ref):
+FEvaluateNCCSum::FEvaluateNCCSum(const C3DBitImage& mask, const C3DDImage& mov, const C3DDImage& ref):
 	m_mask(mask),  m_mov(mov), m_ref(ref) 
 {
 	
 }
 
-template <typename T, typename S> 
-NCCSums FEvaluateNCCSum<T,S>::operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const
+NCCSums FEvaluateNCCSum::operator ()(const C1DParallelRange& range, const NCCSums& sumacc) const
 {
 	CThreadMsgStream msks; 
 	
@@ -81,10 +79,9 @@ public:
 		m_mask(mask)
 		{}
 	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
+                float operator () ( const C3DDImage& mov, const C3DDImage& ref) const {
 
-		FEvaluateNCCSum<T,R> ev(m_mask, mov, ref); 
+		FEvaluateNCCSum ev(m_mask, mov, ref); 
 		NCCSums sum; 
 		sum = preduce(C1DParallelRange(0, mov.get_size().z, 1), sum, ev, 
 			      [](const NCCSums& x, const NCCSums& y){
@@ -97,8 +94,13 @@ public:
 
 double CNCC3DImageCost::do_value(const Data& a, const Data& b, const Mask& m) const
 {
+	auto a_double_ptr = m_copy_to_double->filter(a);
+	auto b_double_ptr = m_copy_to_double->filter(b);
+	const C3DDImage& mov = static_cast<const C3DDImage&>(*a_double_ptr);
+	const C3DDImage& ref = static_cast<const C3DDImage&>(*b_double_ptr);
+
 	FEvalCost ecost(m); 
-	return mia::filter(ecost, a, b); 
+	return ecost(mov, ref); 
 }
 
 
@@ -111,12 +113,11 @@ public:
 		m_force(force)
 		{}
 	
-	template <typename T, typename R> 
-	float operator () ( const T& mov, const R& ref) const {
+	float operator () (const C3DDImage& mov, const C3DDImage& ref) const {
 		CThreadMsgStream msks;
 		
 		NCCSums sum; 
-		FEvaluateNCCSum<T,R> ev(m_mask, mov, ref); 
+		FEvaluateNCCSum ev(m_mask, mov, ref); 
 		sum = preduce(C1DParallelRange(0, mov.get_size().z, 1), sum, ev, 
 			      [](const NCCSums& x, const NCCSums& y){
 				      return x + y;
@@ -154,8 +155,13 @@ public:
 
 double CNCC3DImageCost::do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const
 {
+	auto a_double_ptr = m_copy_to_double->filter(a);
+	auto b_double_ptr = m_copy_to_double->filter(b);
+	const C3DDImage& mov = static_cast<const C3DDImage&>(*a_double_ptr);
+	const C3DDImage& ref = static_cast<const C3DDImage&>(*b_double_ptr);
+
 	FEvalCostForce ecostforce(m, force); 
-	return mia::filter(ecostforce, a, b); 
+	return ecostforce(mov, ref); 
 }
 
 
diff --git a/mia/3d/maskedcost/ncc.hh b/mia/3d/maskedcost/ncc.hh
index 25c1ac6..8515215 100644
--- a/mia/3d/maskedcost/ncc.hh
+++ b/mia/3d/maskedcost/ncc.hh
@@ -22,6 +22,7 @@
 #define mia_3d_maskedcost_ncc_hh
 
 #include <mia/3d/maskedcost.hh>
+#include <mia/3d/filter.hh>
 
 #define NS mia_3d_maskedncc
 
@@ -37,6 +38,7 @@ public:
 private: 
 	virtual double do_value(const Data& a, const Data& b, const Mask& m) const; 
 	virtual double do_evaluate_force(const Data& a, const Data& b, const Mask& m, Force& force) const; 
+	mia::P3DFilter m_copy_to_double; 
 };
 
 class CNCC3DImageCostPlugin: public mia::C3DMaskedImageCostPlugin {
diff --git a/mia/core/CMakeLists.txt b/mia/core/CMakeLists.txt
index 0588d55..8aceacc 100644
--- a/mia/core/CMakeLists.txt
+++ b/mia/core/CMakeLists.txt
@@ -246,15 +246,6 @@ IF(PWPDF_FOUND)
     )
 ENDIF(PWPDF_FOUND) 
 
-MACRO(CORE_TEST name)
-  ADD_EXECUTABLE(test-${name} test_${name}.cc)  
-  TARGET_LINK_LIBRARIES(test-${name} ${MIACORE} ${BOOST_UNITTEST})
-  ADD_TEST(core-${name} test-${name})
-  IF(WIN32)
-    SET_TARGET_PROPERTIES(test-${name} PROPERTIES LINKER_FLAGS "/NODEFAULTLIB:MSVCRT")
-  ENDIF(WIN32)
-ENDMACRO(CORE_TEST name)
-
 #
 # create the revision retrival code
 #
@@ -290,14 +281,16 @@ SET_TARGET_PROPERTIES(miacore PROPERTIES
 # the test targets 
 #
 
-ADD_EXECUTABLE(test-core ${MIACORETEST_SRC})
-ADD_TEST(core test-core)
+IF(MIA_ENABLE_TESTING)
+  ADD_EXECUTABLE(test-core ${MIACORETEST_SRC})
+  ADD_TEST(core test-core)
 
 TARGET_LINK_LIBRARIES(test-core ${MIACORE} ${BOOST_UNITTEST})
 
 IF(WIN32)
   SET_TARGET_PROPERTIES(test-core PROPERTIES LINKER_FLAGS "/NODEFAULTLIB:MSVCRT")
 ENDIF(WIN32)
+ENDIF()
 
 #IF (NOT WIN32)
 #  CORE_TEST(history)
@@ -313,11 +306,12 @@ ADD_SUBDIRECTORY(splinekernel  )
 ADD_SUBDIRECTORY(splinebc  ) 
 ADD_SUBDIRECTORY(testplug      )
 
+IF(MIA_ENABLE_TESTING)
+
 #tests that fork themselfs 
 SET(cmdxmlhelp_params  -r 1 --other o)
 NEW_TEST_WITH_PARAM(cmdxmlhelp miacore "${cmdxmlhelp_params}")
 
-
 NEW_TEST(Vector miacore)
 NEW_TEST(attributes miacore)
 NEW_TEST(boundary_conditions miacore)
@@ -381,6 +375,8 @@ ENDIF(FFTWF_FOUND AND WITH_FFTWF)
 IF(PWPDF_FOUND) 
   NEW_TEST(pwh miacore)
 ENDIF(PWPDF_FOUND) 
+
+ENDIF()
 #
 #installation 
 #
diff --git a/mia/mesh/CMakeLists.txt b/mia/mesh/CMakeLists.txt
index 30f948f..db140fe 100644
--- a/mia/mesh/CMakeLists.txt
+++ b/mia/mesh/CMakeLists.txt
@@ -50,6 +50,7 @@ SET(MIAMESHLIBS miamesh)
 ADD_SUBDIRECTORY(io)
 ADD_SUBDIRECTORY(filter)
 
-TEST_MESH(triangulate)
-TEST_MESH(triangle_neighbourhood)
-
+IF(MIA_ENABLE_TESTING)
+  TEST_MESH(triangulate)
+  TEST_MESH(triangle_neighbourhood)
+ENDIF()
diff --git a/src/3dsegment-local-cmeans.cc b/src/3dsegment-local-cmeans.cc
index 19d2a42..0129edf 100644
--- a/src/3dsegment-local-cmeans.cc
+++ b/src/3dsegment-local-cmeans.cc
@@ -1,6 +1,6 @@
 /* -*- mia-c++  -*-
  *
- * This file is part of MIA - a toolbox for medical image analysis 
+ * This file is part of MIA - a toolbox for medical image analysis
  * Copyright (c) Leipzig, Madrid 1999-2017 Gert Wollny
  *
  * MIA is free software; you can redistribute it and/or modify
@@ -19,12 +19,9 @@
  */
 
 /*
-  Todo: 
-  - parallelize local cmeans runs 
-  - adaptive filter sizes? 
+  Todo:
+  - adaptive filter sizes?
   - Consider different tresholds for larger filter width
- 
-
 */
 
 #ifdef HAVE_CONFIG_H
@@ -43,518 +40,522 @@
 #include <numeric>
 
 using namespace mia;
-using namespace std; 
+using namespace std;
 
 typedef vector<C3DFImage> C3DFImageVec;
 
 const SProgramDescription g_description = {
-        {pdi_group, "Analysis, filtering, combining, and segmentation of 3D images"}, 
-	{pdi_short, "Run a c-means segmentation of a 3D image."}, 
-	{pdi_description, "This program runs the segmentation of a 3D image by applying "
-	 "a localized c-means approach that helps to overcome intensity inhomogeneities "
-	 "in the image. The approach evaluates a global c-means clustering, and then "
-	 "separates the image into overlapping regions where more c-means iterations "
-	 "are run only including the locally present classes, i.e. the classes that "
-	 "relatively contain more pixels than a given threshold."}, 
-	{pdi_example_descr, "Run the segmentation on image test.png using three classes, "
-	 "local regions of 40 pixels (grid width 20 pixels), and a class ignore threshold of 0.01." }, 
-	{pdi_example_code, "-i test.png -o label.png -n 3 -g 20 -t 0.01"}
-}; 
+        {pdi_group, "Analysis, filtering, combining, and segmentation of 3D images"},
+        {pdi_short, "Run a c-means segmentation of a 3D image."},
+        {pdi_description, "This program runs the segmentation of a 3D image by applying "
+         "a localized c-means approach that helps to overcome intensity inhomogeneities "
+         "in the image. The approach evaluates a global c-means clustering, and then "
+         "separates the image into overlapping regions where more c-means iterations "
+         "are run only including the locally present classes, i.e. the classes that "
+         "relatively contain more pixels than a given threshold."},
+        {pdi_example_descr, "Run the segmentation on image test.png using three classes, "
+         "local regions of 40 pixels (grid width 20 pixels), and a class ignore threshold of 0.01." },
+        {pdi_example_code, "-i test.png -o label.png -n 3 -g 20 -t 0.01"}
+};
 
 class FRunHistogram : public TFilter<void> {
-public: 
-	template <typename T> 
-	void operator()(const T3DImage<T>& image);
+public:
+        template <typename T>
+        void operator()(const T3DImage<T>& image);
 
-	CSparseHistogram::Compressed get_histogram() const; 
-	
-private: 
-	CSparseHistogram m_sparse_histogram; 
-}; 
+        CSparseHistogram::Compressed get_histogram() const;
+
+private:
+        CSparseHistogram m_sparse_histogram;
+};
 
 struct ProtectedProbBuffer {
-	CMutex mutex;
-	vector<C3DFDatafield> probmap;
+        CMutex mutex;
+        vector<C3DFDatafield> probmap;
 
-	ProtectedProbBuffer(int n_classes, const C3DBounds& size);
+        ProtectedProbBuffer(int n_classes, const C3DBounds& size);
 
-	ProtectedProbBuffer(const ProtectedProbBuffer& orig);
-}; 
+        ProtectedProbBuffer(const ProtectedProbBuffer& orig);
+};
 
 
 class FLocalCMeans: public TFilter<void> {
-public: 
-	typedef map<int, CMeans::DVector> Probmap; 
-
-	
-	FLocalCMeans(float epsilon, const vector<double>& global_class_centers,
-		     const C3DBounds& start, const C3DBounds& end,
-		     const Probmap& global_probmap,
-		     float rel_cluster_threshold, 
-		     const map<int, unsigned>& segmap, 
-		     ProtectedProbBuffer& prob_buffer,
-		     bool partition_with_background);
-	
-	template <typename T> 
-	void operator()(const T3DImage<T>& image);
+public:
+        typedef map<int, CMeans::DVector> Probmap;
+
+
+        FLocalCMeans(float epsilon, const vector<double>& global_class_centers,
+                     const C3DBounds& start, const C3DBounds& end,
+                     const Probmap& global_probmap,
+                     float rel_cluster_threshold,
+                     const map<int, unsigned>& segmap,
+                     ProtectedProbBuffer& prob_buffer,
+                     bool partition_with_background);
+
+        template <typename T>
+        void operator()(const T3DImage<T>& image);
 private:
 
-	const float m_epsilon;
-	const vector<double>& m_global_class_centers;
-	const C3DBounds m_start;
-	const C3DBounds m_end;
-	const Probmap& m_global_probmap;
-	const float m_rel_cluster_threshold; 
-	const map<int, unsigned>& m_segmap;
+        const float m_epsilon;
+        const vector<double>& m_global_class_centers;
+        const C3DBounds m_start;
+        const C3DBounds m_end;
+        const Probmap& m_global_probmap;
+        const float m_rel_cluster_threshold;
+        const map<int, unsigned>& m_segmap;
 
-	ProtectedProbBuffer& m_prob_buffer;
-	size_t m_count; 
-	bool m_partition_with_background; 
-}; 
+        ProtectedProbBuffer& m_prob_buffer;
+        size_t m_count;
+        bool m_partition_with_background;
+};
 
 class FCrispSeg: public TFilter<P3DImage> {
 public:
-	FCrispSeg(const map<int, unsigned>& segmap):
-		m_segmap(segmap)
-		{
-		}
-
-	template <typename T> 
-	P3DImage operator()(const T3DImage<T>& image) const {
-		P3DImage out_image = make_shared<C3DUBImage>(image.get_size());
-		C3DUBImage *help = static_cast<C3DUBImage*>(out_image.get());
-		transform(image.begin(), image.end(), help->begin(),
-			  [this](T x){return m_segmap.at(x);}); 
-		return out_image; 
-	}
+        FCrispSeg(const map<int, unsigned>& segmap):
+                m_segmap(segmap)
+        {
+        }
+
+        template <typename T>
+        P3DImage operator()(const T3DImage<T>& image) const {
+                P3DImage out_image = make_shared<C3DUBImage>(image.get_size());
+                C3DUBImage *help = static_cast<C3DUBImage*>(out_image.get());
+                transform(image.begin(), image.end(), help->begin(),
+                          [this](T x){return m_segmap.at(x);});
+                return out_image;
+        }
 private:
-	const map<int, unsigned>& m_segmap;
-}; 
+        const map<int, unsigned>& m_segmap;
+};
 
 
 
 int do_main(int argc, char *argv[])
 {
 
-	string in_filename; 
-	string out_filename;
-	string out_filename2;
-	string cls_filename;
-	string debug_filename; 
-	
+        string in_filename;
+        string out_filename;
+        string out_filename2;
+        string cls_filename;
+        string debug_filename;
+
         int blocksize = 15;
-	double rel_cluster_threshold = 0.02;
-
-	float cmeans_epsilon = 0.0001;
-	float class_label_thresh = 0.0f; 
-
-	bool ignore_partition_with_background = false; 
-	
-	CMeans::PInitializer class_center_initializer; 
-		
-	const auto& image3dio = C3DImageIOPluginHandler::instance();
-
-	CCmdOptionList opts(g_description);
-        opts.set_group("File-IO"); 
-	opts.add(make_opt( in_filename, "in-file", 'i', "image to be segmented",
-			   CCmdOptionFlags::required_input, &image3dio )); 
-	opts.add(make_opt( out_filename, "out-file", 'o', "class label image based on "
-			   "merging local labels", CCmdOptionFlags::output, &image3dio ));
-
-	opts.add(make_opt( out_filename2, "out-global-crisp", 'G', "class label image based on "
-			   "global segmentation", CCmdOptionFlags::output, &image3dio ));
-	opts.add(make_opt( cls_filename, "class-prob", 'C', "class probability image file, filetype "
-			   "must support floating point multi-frame images", CCmdOptionFlags::output, &image3dio ));
-	
-        opts.set_group("Parameters"); 
-	opts.add(make_opt( blocksize, EParameterBounds::bf_min_closed, {3},
-			   "grid-spacing", 'g', "Spacing of the grid used to modulate "
-			   "the intensity inhomogeneities"));
-	opts.add(make_opt( class_center_initializer, "kmeans:nc=3",
-			   "cmeans", 'c', "c-means initializer"));
-	opts.add(make_opt( cmeans_epsilon, EParameterBounds::bf_min_open,
-			   {0.0}, "c-means-epsilon", 'e', "c-means breaking condition for update tolerance"));
-	opts.add(make_opt( rel_cluster_threshold, EParameterBounds::bf_min_closed | EParameterBounds::bf_max_open,
-			   {0.0,1.0}, "relative-cluster-threshold", 't', "threshhold to ignore classes when initializing"
-			   " the local cmeans from the global one.")); 
-	opts.add(make_opt(ignore_partition_with_background, "ignore-background", 'B',
-			  "Don't take background probablities into account when desiding whether classes are to be ignored"));
-	opts.add(make_opt(class_label_thresh, 
-			  EParameterBounds::bf_min_closed | EParameterBounds::bf_max_closed,
-			  {0.0f, 1.0f},
-			  "label-threshold", 'L',
-			  "for values <= 0.5: create segmentation based on highest class probability, "
-			  "labels staat at 0. For values >0.5: create labels only for voxels with a "
-			  "class probability higher than theg given value, labels start at 1 and voxels "
-			  "without an according class probability are set to 0; this output is suitable "
-			  "for the seeded watershed filter.")); 
-	
-	if (opts.parse(argc, argv) != CCmdOptionList::hr_no)
-		return EXIT_SUCCESS; 
-
-	if (out_filename.empty() && out_filename2.empty()) {
-		throw invalid_argument("You must specify at least one output file"); 
-	}
-	
-	auto in_image = load_image3d(in_filename);
-
-	FRunHistogram global_histogram; 
-
-	mia::accumulate(global_histogram, *in_image);
-	
-	CMeans globale_cmeans(cmeans_epsilon, class_center_initializer); 
-
-	auto gh = global_histogram.get_histogram();
-
-	CMeans::DVector global_class_centers;
-	auto global_sparse_probmap = globale_cmeans.run(gh, global_class_centers, false);
-
-	cvinfo() << "Histogram range: [" << gh[0].first << ", " << gh[gh.size()-1].first << "]\n"; 
-	cvmsg() << "Global class centers: " << global_class_centers << "\n";
-	cvinfo() << "Probmap size = " << global_sparse_probmap.size()
-		 << " weight number " << global_sparse_probmap[0].second.size() << "\n"; 
-
-	const unsigned n_classes = global_class_centers.size(); 
-	
-	// need the normalized class centers
-	
-	
-	
-	map<int, unsigned> segmap;
-	for_each(global_sparse_probmap.begin(), global_sparse_probmap.end(),
-		 [&segmap](const CMeans::SparseProbmap::value_type& x) {
-			 int c = 0;
-			 float max_prob = 0.0f;
-			 for (unsigned i = 0; i< x.second.size(); ++i) {
-				 auto f = x.second[i]; 
-				 if (f > max_prob) {
-					 max_prob = f;
-					 c = i; 
-				 };
-			 }
-			 segmap[x.first] = c; 
-		 }); 
-
-
-	FLocalCMeans::Probmap global_probmap;
-	for (auto k : global_sparse_probmap) {
-		global_probmap[k.first] = k.second; 
-	}; 
-	
-	if (!out_filename2.empty()) {
-		FCrispSeg cs(segmap);
-		auto crisp_global_seg = mia::filter(cs, *in_image);
-		if (!save_image (out_filename2, crisp_global_seg)) {
-			cverr() << "Unable to save to '" << out_filename2 << "'\n"; 
-		}
-	}
-	
-	int  nx = (in_image->get_size().x + blocksize - 1) / blocksize + 1;
-	int  ny = (in_image->get_size().y + blocksize - 1) / blocksize + 1;
-	int  nz = (in_image->get_size().z + blocksize - 1) / blocksize + 1;
-	int  start_x = - static_cast<int>(nx * blocksize - in_image->get_size().x) / 2; 
-	int  start_y = - static_cast<int>(ny * blocksize - in_image->get_size().y) / 2;
-	int  start_z = - static_cast<int>(nz * blocksize - in_image->get_size().z) / 2; 
-
-	cvdebug() << "Start at " << start_x << ", " << start_y << ", " << start_z << "\n"; 
-	
-	
-	int max_threads = CMaxTasks::get_max_tasks();
-	assert(max_threads > 0); 
-
-	CMutex current_probbuffer_mutex; 
-	int current_probbuffer = 0; 
-	
-	vector<ProtectedProbBuffer> prob_buffers(max_threads,
-						 ProtectedProbBuffer(global_class_centers.size(),
-								     in_image->get_size())); 
-
-	auto block_runner = [&](const C1DParallelRange& z_range) -> void {
-		CThreadMsgStream msg_stream; 
-		current_probbuffer_mutex.lock();
-		int my_prob_buffer = current_probbuffer++;
-		if (current_probbuffer >= max_threads)
-			current_probbuffer = 0; 
-		current_probbuffer_mutex.unlock();
-		
-		for (int  i = z_range.begin(); i < z_range.end(); ++i) {
-			int iz_base = start_z + i * blocksize; 
-			unsigned iz = iz_base < 0 ? 0 : iz_base;
-			unsigned iz_end = iz_base +  2 * blocksize;
-			if (iz_end > in_image->get_size().z)
-				iz_end = in_image->get_size().z;
-
-			cvmsg() << "Run slices " << iz_base << " - " <<  iz_end
-				<< " with buffer " << my_prob_buffer << "\n"; 
-			
-			for (int  iy_base = start_y; iy_base < (int)in_image->get_size().y; iy_base +=  blocksize) {
-				unsigned iy = iy_base < 0 ? 0 : iy_base;
-				unsigned iy_end = iy_base +  2 * blocksize;
-				if (iy_end > in_image->get_size().y)
-					iy_end = in_image->get_size().y; 
-				
-				for (int ix_base = start_x; ix_base < (int)in_image->get_size().x; ix_base +=  blocksize) {
-					unsigned ix = ix_base < 0 ? 0 : ix_base;
-					unsigned ix_end = ix_base +  2 * blocksize;
-					if (ix_end > in_image->get_size().x)
-						ix_end = in_image->get_size().x; 
-					
-					
-					FLocalCMeans lcm(cmeans_epsilon, global_class_centers,
-							 C3DBounds(ix, iy, iz),
-							 C3DBounds(ix_end, iy_end, iz_end),
-							 global_probmap,
-							 rel_cluster_threshold,
-							 segmap, 
-							 prob_buffers[my_prob_buffer],
-							 !ignore_partition_with_background);
-					
-					mia::accumulate(lcm, *in_image); 
-				}
-			}
-		}
-		
-	}; 
-
-	pfor(C1DParallelRange(0, nz, 1), block_runner); 
-	
-	
-	// sum the probabilities 
-	for (unsigned i = 1; i < prob_buffers.size(); ++i) {
-		for (unsigned c = 0; c < n_classes; ++c) {
-			transform(prob_buffers[i].probmap[c].begin(), prob_buffers[i].probmap[c].end(),
-				  prob_buffers[0].probmap[c].begin(), prob_buffers[0].probmap[c].begin(),
-				  [](float x, float y){return x+y;}); 
-		}
-	}
-	
-	
-	// normalize probability images
-	vector<C3DFDatafield>& prob_buffer = prob_buffers[0].probmap; 
-		
-	C3DFImage sum(prob_buffer[0]);
-	for (unsigned c = 1; c < n_classes; ++c) {
-		transform(sum.begin(), sum.end(), prob_buffer[c].begin(), sum.begin(),
-			  [](float x, float y){return x+y;}); 
-	}
-	for (unsigned c = 0; c < n_classes; ++c) {
-		transform(sum.begin(), sum.end(), prob_buffer[c].begin(), prob_buffer[c].begin(),
-			  [](float s, float p){return p/s;});
-	}
-	if (!cls_filename.empty()) {
-		C3DImageIOPluginHandler::Instance::Data classes;
-
-		for (unsigned c = 0; c < n_classes; ++c)
-			classes.push_back(make_shared<C3DFImage>(prob_buffer[c]));
-
-		if (!C3DImageIOPluginHandler::instance().save(cls_filename, classes))
-			cverr() << "Error writing class images to '" << cls_filename << "'\n"; 
-	}
-	
-	
-	// now for each pixel we have a probability sum that should take inhomogeinities
-	// into account
-
-	C3DUBImage out_image(in_image->get_size(), *in_image);
-	fill(out_image.begin(), out_image.end(), 0);
-
-	if (class_label_thresh <= 0.5f) {
-
-		for (unsigned c = 1; c < n_classes; ++c) {
-			auto iout = out_image.begin();
-			auto eout = out_image.end();
-			
-			auto itest = prob_buffer[0].begin();
-			auto iprob = prob_buffer[c].begin();
-			
-			while ( iout != eout ){
-				if (*itest < *iprob) {
-					*itest = *iprob;
-					*iout = c; 
-				}
-				++iout; ++itest; ++iprob; 
-			}
-		}
-	}else{
-		for (unsigned c = 0; c < n_classes; ++c) {
-			auto iout = out_image.begin();
-			auto eout = out_image.end();
-			auto iprob = prob_buffer[c].begin();
-			
-			while ( iout != eout ){
-				if (class_label_thresh < *iprob) {
-					*iout = c +1; 
-				}
-				++iout;  ++iprob; 
-			}
-		}
-	}
-	return save_image(out_filename, out_image) ? EXIT_SUCCESS : EXIT_FAILURE; 
+        double rel_cluster_threshold = 0.02;
+
+        float cmeans_epsilon = 0.0001;
+        float class_label_thresh = 0.0f;
+
+        bool ignore_partition_with_background = false;
+
+        CMeans::PInitializer class_center_initializer;
+
+        const auto& image3dio = C3DImageIOPluginHandler::instance();
+
+        CCmdOptionList opts(g_description);
+        opts.set_group("File-IO");
+        opts.add(make_opt( in_filename, "in-file", 'i', "image to be segmented",
+                           CCmdOptionFlags::required_input, &image3dio ));
+        opts.add(make_opt( out_filename, "out-file", 'o', "class label image based on "
+                                                          "merging local labels", CCmdOptionFlags::output, &image3dio ));
+
+        opts.add(make_opt( out_filename2, "out-global-crisp", 'G', "class label image based on "
+                                                                   "global segmentation", CCmdOptionFlags::output, &image3dio ));
+        opts.add(make_opt( cls_filename, "class-prob", 'C', "class probability image file, filetype "
+                                                            "must support floating point multi-frame images", CCmdOptionFlags::output, &image3dio ));
+
+        opts.set_group("Parameters");
+        opts.add(make_opt( blocksize, EParameterBounds::bf_min_closed, {3},
+                           "grid-spacing", 'g', "Spacing of the grid used to modulate "
+                                                "the intensity inhomogeneities"));
+        opts.add(make_opt( class_center_initializer, "kmeans:nc=3",
+                           "cmeans", 'c', "c-means initializer"));
+        opts.add(make_opt( cmeans_epsilon, EParameterBounds::bf_min_open,
+        {0.0}, "c-means-epsilon", 'e', "c-means breaking condition for update tolerance"));
+        opts.add(make_opt( rel_cluster_threshold, EParameterBounds::bf_min_closed | EParameterBounds::bf_max_open,
+        {0.0,1.0}, "relative-cluster-threshold", 't', "threshhold to ignore classes when initializing"
+                                                      " the local cmeans from the global one."));
+        opts.add(make_opt(ignore_partition_with_background, "ignore-background", 'B',
+                          "Don't take background probablities into account when desiding whether classes are to be ignored"));
+        opts.add(make_opt(class_label_thresh,
+                          EParameterBounds::bf_min_closed | EParameterBounds::bf_max_closed,
+        {0.0f, 1.0f},
+                          "label-threshold", 'L',
+                          "for values <= 0.5: create segmentation based on highest class probability, "
+                          "labels start at 0. For values >0.5: create labels only for voxels with a "
+                          "class probability higher than the given value, labels start at 1 and voxels "
+                          "without an according class probability are set to 0; this output is suitable "
+                          "for the seeded watershed filter."));
+
+        if (opts.parse(argc, argv) != CCmdOptionList::hr_no)
+                return EXIT_SUCCESS;
+
+        if (out_filename.empty() && out_filename2.empty()) {
+                throw invalid_argument("You must specify at least one output file");
+        }
+
+        auto in_image = load_image3d(in_filename);
+
+        FRunHistogram global_histogram;
+
+        mia::accumulate(global_histogram, *in_image);
+
+        CMeans globale_cmeans(cmeans_epsilon, class_center_initializer);
+
+        auto gh = global_histogram.get_histogram();
+
+        CMeans::DVector global_class_centers;
+        auto global_sparse_probmap = globale_cmeans.run(gh, global_class_centers, false);
+
+        cvinfo() << "Histogram range: [" << gh[0].first << ", " << gh[gh.size()-1].first << "]\n";
+        cvmsg() << "Global class centers: " << global_class_centers << "\n";
+        cvinfo() << "Probmap size = " << global_sparse_probmap.size()
+                 << " weight number " << global_sparse_probmap[0].second.size() << "\n";
+
+        const unsigned n_classes = global_class_centers.size();
+
+        // need the normalized class centers
+
+
+
+        map<int, unsigned> segmap;
+        for_each(global_sparse_probmap.begin(), global_sparse_probmap.end(),
+                 [&segmap](const CMeans::SparseProbmap::value_type& x) {
+                int c = 0;
+                float max_prob = 0.0f;
+                for (unsigned i = 0; i< x.second.size(); ++i) {
+                        auto f = x.second[i];
+                        if (f > max_prob) {
+                                max_prob = f;
+                                c = i;
+                        };
+                }
+                segmap[x.first] = c;
+        });
+
+
+        FLocalCMeans::Probmap global_probmap;
+        for (auto k : global_sparse_probmap) {
+                global_probmap[k.first] = k.second;
+        };
+
+        if (!out_filename2.empty()) {
+                FCrispSeg cs(segmap);
+                auto crisp_global_seg = mia::filter(cs, *in_image);
+                if (!save_image (out_filename2, crisp_global_seg)) {
+                        cverr() << "Unable to save to '" << out_filename2 << "'\n";
+                }
+        }
+
+        int  nx = (in_image->get_size().x + blocksize - 1) / blocksize + 1;
+        int  ny = (in_image->get_size().y + blocksize - 1) / blocksize + 1;
+        int  nz = (in_image->get_size().z + blocksize - 1) / blocksize + 1;
+        int  start_x = - static_cast<int>(nx * blocksize - in_image->get_size().x) / 2;
+        int  start_y = - static_cast<int>(ny * blocksize - in_image->get_size().y) / 2;
+        int  start_z = - static_cast<int>(nz * blocksize - in_image->get_size().z) / 2;
+
+        cvdebug() << "Start at " << start_x << ", " << start_y << ", " << start_z << "\n";
+
+
+        int max_threads = CMaxTasks::get_max_tasks();
+        assert(max_threads > 0);
+
+        CMutex current_probbuffer_mutex;
+        int current_probbuffer = 0;
+
+        vector<ProtectedProbBuffer> prob_buffers(max_threads,
+                                                 ProtectedProbBuffer(global_class_centers.size(),
+                                                                     in_image->get_size()));
+
+        auto block_runner = [&](const C1DParallelRange& z_range) -> void {
+                CThreadMsgStream msg_stream;
+                current_probbuffer_mutex.lock();
+                int my_prob_buffer = current_probbuffer++;
+                if (current_probbuffer >= max_threads)
+                        current_probbuffer = 0;
+                current_probbuffer_mutex.unlock();
+
+                for (int  i = z_range.begin(); i < z_range.end(); ++i) {
+                        int iz_base = start_z + i * blocksize;
+                        unsigned iz = iz_base < 0 ? 0 : iz_base;
+
+                        /* Work around the case that the image size (k*blocksize+1) */
+                        if (iz >= in_image->get_size().z)
+                                break;
+                        unsigned iz_end = iz_base +  2 * blocksize;
+                        if (iz_end > in_image->get_size().z)
+                                iz_end = in_image->get_size().z;
+
+                        cvmsg() << "Run slices " << iz_base << " - " <<  iz_end
+                                << " with buffer " << my_prob_buffer << "\n";
+
+                        for (int  iy_base = start_y; iy_base < (int)in_image->get_size().y; iy_base +=  blocksize) {
+                                unsigned iy = iy_base < 0 ? 0 : iy_base;
+                                unsigned iy_end = iy_base +  2 * blocksize;
+                                if (iy_end > in_image->get_size().y)
+                                        iy_end = in_image->get_size().y;
+
+                                for (int ix_base = start_x; ix_base < (int)in_image->get_size().x; ix_base +=  blocksize) {
+                                        unsigned ix = ix_base < 0 ? 0 : ix_base;
+                                        unsigned ix_end = ix_base +  2 * blocksize;
+                                        if (ix_end > in_image->get_size().x)
+                                                ix_end = in_image->get_size().x;
+
+
+                                        FLocalCMeans lcm(cmeans_epsilon, global_class_centers,
+                                                         C3DBounds(ix, iy, iz),
+                                                         C3DBounds(ix_end, iy_end, iz_end),
+                                                         global_probmap,
+                                                         rel_cluster_threshold,
+                                                         segmap,
+                                                         prob_buffers[my_prob_buffer],
+                                                         !ignore_partition_with_background);
+
+                                        mia::accumulate(lcm, *in_image);
+                                }
+                        }
+                }
+
+        };
+
+        pfor(C1DParallelRange(0, nz, 1), block_runner);
+
+
+        // sum the probabilities
+        for (unsigned i = 1; i < prob_buffers.size(); ++i) {
+                for (unsigned c = 0; c < n_classes; ++c) {
+                        transform(prob_buffers[i].probmap[c].begin(), prob_buffers[i].probmap[c].end(),
+                                  prob_buffers[0].probmap[c].begin(), prob_buffers[0].probmap[c].begin(),
+                                        [](float x, float y){return x+y;});
+                }
+        }
+
+
+        // normalize probability images
+        vector<C3DFDatafield>& prob_buffer = prob_buffers[0].probmap;
+
+        C3DFImage sum(prob_buffer[0]);
+        for (unsigned c = 1; c < n_classes; ++c) {
+                transform(sum.begin(), sum.end(), prob_buffer[c].begin(), sum.begin(),
+                          [](float x, float y){return x+y;});
+        }
+        for (unsigned c = 0; c < n_classes; ++c) {
+                transform(sum.begin(), sum.end(), prob_buffer[c].begin(), prob_buffer[c].begin(),
+                          [](float s, float p){return p/s;});
+        }
+        if (!cls_filename.empty()) {
+                C3DImageIOPluginHandler::Instance::Data classes;
+
+                for (unsigned c = 0; c < n_classes; ++c)
+                        classes.push_back(make_shared<C3DFImage>(prob_buffer[c]));
+
+                if (!C3DImageIOPluginHandler::instance().save(cls_filename, classes))
+                        cverr() << "Error writing class images to '" << cls_filename << "'\n";
+        }
+
+
+        // now for each pixel we have a probability sum that should take inhomogeinities
+        // into account
+
+        C3DUBImage out_image(in_image->get_size(), *in_image);
+        fill(out_image.begin(), out_image.end(), 0);
+
+        if (class_label_thresh <= 0.5f) {
+
+                for (unsigned c = 1; c < n_classes; ++c) {
+                        auto iout = out_image.begin();
+                        auto eout = out_image.end();
+
+                        auto itest = prob_buffer[0].begin();
+                        auto iprob = prob_buffer[c].begin();
+
+                        while ( iout != eout ){
+                                if (*itest < *iprob) {
+                                        *itest = *iprob;
+                                        *iout = c;
+                                }
+                                ++iout; ++itest; ++iprob;
+                        }
+                }
+        }else{
+                for (unsigned c = 0; c < n_classes; ++c) {
+                        auto iout = out_image.begin();
+                        auto eout = out_image.end();
+                        auto iprob = prob_buffer[c].begin();
+
+                        while ( iout != eout ){
+                                if (class_label_thresh < *iprob) {
+                                        *iout = c +1;
+                                }
+                                ++iout;  ++iprob;
+                        }
+                }
+        }
+        return save_image(out_filename, out_image) ? EXIT_SUCCESS : EXIT_FAILURE;
 }
 
 
 ProtectedProbBuffer::ProtectedProbBuffer(int n_classes, const C3DBounds& size):
-	probmap(n_classes, C3DFDatafield(size))
+        probmap(n_classes, C3DFDatafield(size))
 {
 }
 
 ProtectedProbBuffer::ProtectedProbBuffer(const ProtectedProbBuffer& orig):
-	probmap(orig.probmap)
+        probmap(orig.probmap)
 {
 }
 
-template <typename T> 
+template <typename T>
 void FRunHistogram::operator()(const T3DImage<T>& image)
 {
-	m_sparse_histogram(image.begin(), image.end()); 
+        m_sparse_histogram(image.begin(), image.end());
 }
 
 CSparseHistogram::Compressed FRunHistogram::get_histogram() const
 {
-	return m_sparse_histogram.get_compressed_histogram(); 
+        return m_sparse_histogram.get_compressed_histogram();
 }
 
 
 FLocalCMeans::FLocalCMeans(float epsilon, const vector<double>& global_class_centers,
-			   const C3DBounds& start, const C3DBounds& end,
-			   const Probmap& global_probmap,
-			   float rel_cluster_threshold, 
-			   const map<int, unsigned>& segmap, 
-			   ProtectedProbBuffer& prob_buffer,
-			   bool partition_with_background):
-	m_epsilon(epsilon),
-	m_global_class_centers(global_class_centers),
-	m_start(start),
-	m_end(end),
-	m_global_probmap(global_probmap),
-	m_rel_cluster_threshold(rel_cluster_threshold),
-	m_segmap(segmap), 
-	m_prob_buffer(prob_buffer),
-	m_count((m_end - m_start).product()),
-	m_partition_with_background(partition_with_background)
+                           const C3DBounds& start, const C3DBounds& end,
+                           const Probmap& global_probmap,
+                           float rel_cluster_threshold,
+                           const map<int, unsigned>& segmap,
+                           ProtectedProbBuffer& prob_buffer,
+                           bool partition_with_background):
+        m_epsilon(epsilon),
+        m_global_class_centers(global_class_centers),
+        m_start(start),
+        m_end(end),
+        m_global_probmap(global_probmap),
+        m_rel_cluster_threshold(rel_cluster_threshold),
+        m_segmap(segmap),
+        m_prob_buffer(prob_buffer),
+        m_count((m_end - m_start).product()),
+        m_partition_with_background(partition_with_background)
 {
 }
 
 
 
-template <typename T> 
+template <typename T>
 void FLocalCMeans::operator()(const T3DImage<T>& image)
 {
-	cvinfo() << "Run subrange ["<< m_start << "]-["<< m_end<<"]\n";
-	
-	
-	// obtain the histogram for the current patch 
-	CSparseHistogram histogram;
-	histogram(image.begin_range(m_start, m_end), 
-		  image.end_range(m_start, m_end));
-	auto ch = histogram.get_compressed_histogram();
-
-	// calculate the class avaliability in the patch
-	vector<double> partition(m_global_class_centers.size());
-	
-	for (auto idx: ch) {
-		const double n = idx.second; 
-		auto v = m_global_probmap.at(idx.first);
-		transform(partition.begin(), partition.end(), v.begin(),
-			  partition.begin(), [n](double p, double value){return p + n * value;}); 
-	}
-
-	// don't count background class in partition
-	int start_class = m_partition_with_background ? 0 : 1;
-
-	auto part_thresh = std::accumulate(partition.begin() + start_class, partition.end(), 0.0) * m_rel_cluster_threshold; 
-	
-	cvinfo() << "Partition=" << partition
-		 << ", thresh="  << part_thresh
-		 << "\n";
-
-	// select the classes that should be used further on
-	vector<double> retained_class_centers;
-	vector<unsigned> used_classed; 
-	for (unsigned i = 0; i < partition.size(); ++i) {
-		if (partition[i] >= part_thresh) {
-			retained_class_centers.push_back(m_global_class_centers[i]);
-			used_classed.push_back(i);
-		}
-	}
-	
-
-	// prepare linear interpolation summing 
-	auto center = C3DFVector((m_end + m_start)) / 2.0f;
-	auto max_distance = C3DFVector((m_end - m_start)) / 2.0f;
-	
-	
-	if (retained_class_centers.size() > 1)  {
-		
-		ostringstream cci_descr;
-		cci_descr << "predefined:cc=[" << retained_class_centers<<"]";
-		cvinfo() << "Initializing local cmeans with '" << cci_descr.str()
-			 << " for retained classes " << used_classed << "'\n"; 
-		auto cci = CMeansInitializerPluginHandler::instance().produce(cci_descr.str()); 
-
-
-		// remove data that was globally assigned to now unused class
-		CSparseHistogram::Compressed cleaned_histogram;
-		cleaned_histogram.reserve(ch.size());
-		
-		// copy used intensities 
-		for (auto c: used_classed) {
-			for (auto ich: ch) {
-				if ( m_segmap.at(ich.first) == c) {
-					cleaned_histogram.push_back(ich);
-				}
-			}
-		}
-		
-		// evluate the local c-means classification 
-		CMeans local_cmeans(m_epsilon, cci);
-		auto local_map = local_cmeans.run(cleaned_histogram,  retained_class_centers);
-
-		// create a lookup map intensity -> probability vector 
-		map<unsigned short, CMeans::DVector> mapper;
-		for (auto i: local_map) {
-			mapper[i.first] = i.second; 
-		}
-
-
-		// now add the new probabilities to the global maps.
-		auto ii = image.begin_range(m_start, m_end);
-		auto ie = image.end_range(m_start, m_end);
-		CScopedLock prob_lock(m_prob_buffer.mutex); 
-		while (ii != ie) {
-			auto probs = mapper.find(*ii);
-			auto delta = (C3DFVector(ii.pos()) - center) / max_distance;
-			float lin_scale = (1.0 - std::fabs(delta.x))* (1.0 - std::fabs(delta.y));
-			
-			if (probs != mapper.end()) {
-				for (unsigned c = 0; c < used_classed.size();  ++c) {
-					m_prob_buffer.probmap[used_classed[c]](ii.pos()) += lin_scale * probs->second[c];
-				}
-			}else{ // not in local map: retain global probabilities 
-				auto v = m_global_probmap.at(*ii);
-				for (unsigned c = 0; c < v.size();  ++c) {
-					m_prob_buffer.probmap[c](ii.pos()) += lin_scale * v[c];
-				}
-			}
-			++ii;
-		}
-		
-		
-	}else{// only one class retained, add 1.0 to probabilities, linearly smoothed 
-		cvinfo() << "Only one class used:" << used_classed[0] << "\n"; 
-		auto ii = m_prob_buffer.probmap[used_classed[0]].begin_range(m_start, m_end);
-		auto ie = m_prob_buffer.probmap[used_classed[0]].end_range(m_start, m_end);
-		CScopedLock prob_lock(m_prob_buffer.mutex); 
-		while (ii != ie)  {
-			auto delta = (C3DFVector(ii.pos()) - center) / max_distance;
-			*ii += (1.0 - std::fabs(delta.x))* (1.0 - std::fabs(delta.y)); ;
-			++ii; 
-		}
-		
-	}
-	cvinfo() << "Done subrange ["<< m_start << "]-["<< m_end<<"]\n"; 
+        cvinfo() << "Run subrange ["<< m_start << "]-["<< m_end<<"]\n";
+
+
+        // obtain the histogram for the current patch
+        CSparseHistogram histogram;
+        histogram(image.begin_range(m_start, m_end),
+                  image.end_range(m_start, m_end));
+        auto ch = histogram.get_compressed_histogram();
+
+        // calculate the class avaliability in the patch
+        vector<double> partition(m_global_class_centers.size());
+
+        for (auto idx: ch) {
+                const double n = idx.second;
+                auto v = m_global_probmap.at(idx.first);
+                transform(partition.begin(), partition.end(), v.begin(),
+                          partition.begin(), [n](double p, double value){return p + n * value;});
+        }
+
+        // don't count background class in partition
+        int start_class = m_partition_with_background ? 0 : 1;
+
+        auto part_thresh = std::accumulate(partition.begin() + start_class, partition.end(), 0.0) * m_rel_cluster_threshold;
+
+        cvinfo() << "Partition=" << partition
+                 << ", thresh="  << part_thresh
+                 << "\n";
+
+        // select the classes that should be used further on
+        vector<double> retained_class_centers;
+        vector<unsigned> used_classed;
+        for (unsigned i = 0; i < partition.size(); ++i) {
+                if (partition[i] >= part_thresh) {
+                        retained_class_centers.push_back(m_global_class_centers[i]);
+                        used_classed.push_back(i);
+                }
+        }
+
+
+        // prepare linear interpolation summing
+        auto center = C3DFVector((m_end + m_start)) / 2.0f;
+        auto max_distance = C3DFVector((m_end - m_start)) / 2.0f;
+
+
+        if (retained_class_centers.size() > 1)  {
+
+                ostringstream cci_descr;
+                cci_descr << "predefined:cc=[" << retained_class_centers<<"]";
+                cvinfo() << "Initializing local cmeans with '" << cci_descr.str()
+                         << " for retained classes " << used_classed << "'\n";
+                auto cci = CMeansInitializerPluginHandler::instance().produce(cci_descr.str());
+
+
+                // remove data that was globally assigned to now unused class
+                CSparseHistogram::Compressed cleaned_histogram;
+                cleaned_histogram.reserve(ch.size());
+
+                // copy used intensities
+                for (auto c: used_classed) {
+                        for (auto ich: ch) {
+                                if ( m_segmap.at(ich.first) == c) {
+                                        cleaned_histogram.push_back(ich);
+                                }
+                        }
+                }
+
+                // evluate the local c-means classification
+                CMeans local_cmeans(m_epsilon, cci);
+                auto local_map = local_cmeans.run(cleaned_histogram,  retained_class_centers);
+
+                // create a lookup map intensity -> probability vector
+                map<unsigned short, CMeans::DVector> mapper;
+                for (auto i: local_map) {
+                        mapper[i.first] = i.second;
+                }
+
+
+                // now add the new probabilities to the global maps.
+                auto ii = image.begin_range(m_start, m_end);
+                auto ie = image.end_range(m_start, m_end);
+                CScopedLock prob_lock(m_prob_buffer.mutex);
+                while (ii != ie) {
+                        auto probs = mapper.find(*ii);
+                        auto delta = (C3DFVector(ii.pos()) - center) / max_distance;
+                        float lin_scale = (1.0 - std::fabs(delta.x))* (1.0 - std::fabs(delta.y));
+
+                        if (probs != mapper.end()) {
+                                for (unsigned c = 0; c < used_classed.size();  ++c) {
+                                        m_prob_buffer.probmap[used_classed[c]](ii.pos()) += lin_scale * probs->second[c];
+                                }
+                        }else{ // not in local map: retain global probabilities
+                                auto v = m_global_probmap.at(*ii);
+                                for (unsigned c = 0; c < v.size();  ++c) {
+                                        m_prob_buffer.probmap[c](ii.pos()) += lin_scale * v[c];
+                                }
+                        }
+                        ++ii;
+                }
+
+
+        }else{// only one class retained, add 1.0 to probabilities, linearly smoothed
+                cvinfo() << "Only one class used:" << used_classed[0] << "\n";
+                auto ii = m_prob_buffer.probmap[used_classed[0]].begin_range(m_start, m_end);
+                auto ie = m_prob_buffer.probmap[used_classed[0]].end_range(m_start, m_end);
+                CScopedLock prob_lock(m_prob_buffer.mutex);
+                while (ii != ie)  {
+                        auto delta = (C3DFVector(ii.pos()) - center) / max_distance;
+                        *ii += (1.0 - std::fabs(delta.x))* (1.0 - std::fabs(delta.y)); ;
+                        ++ii;
+                }
+
+        }
+        cvinfo() << "Done subrange ["<< m_start << "]-["<< m_end<<"]\n";
 }
 
 #include <mia/internal/main.hh>
-MIA_MAIN(do_main); 
+MIA_MAIN(do_main);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mia.git