[pktools] 31/375: added OptFactory.h and pkopt_svm.cc

Wed Dec 3 21:53:55 UTC 2014

This is an automated email from the git hooks/post-receive script.

sebastic-guest pushed a commit to branch upstream-master
in repository pktools.

commit 9c4dbcc97e5a732185d8fd71da9d7e23fbbe6383
Author: Pieter Kempeneers <kempenep at gmail.com>
Date:   Wed Jan 16 15:56:07 2013 +0100

    added OptFactory.h and pkopt_svm.cc
---
 src/algorithms/OptFactory.h | 215 ++++++++++++++++++
 src/apps/pkopt_svm.cc       | 530 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 745 insertions(+)

diff --git a/src/algorithms/OptFactory.h b/src/algorithms/OptFactory.h
new file mode 100644
index 0000000..d13ba23
--- /dev/null
+++ b/src/algorithms/OptFactory.h
@@ -0,0 +1,215 @@
+/**********************************************************************
+OptFactory.h: factory class for nlopt::opt (selecting algorithm via string)
+Copyright (C) 2008-2013 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#ifndef _OPTFACTORY_H_
+#define _OPTFACTORY_H_
+
+#include <map>
+
+class OptFactory
+{
+private:
+  static void initMap(std::map<std::string, nlopt::algorithm>& m_algMap){
+    //initialize selMap
+    m_algMap["GN_DIRECT"]=nlopt::GN_DIRECT;
+    m_algMap["GN_DIRECT_L"]=nlopt::GN_DIRECT_L;
+    m_algMap["GN_DIRECT_L_RAND"]=nlopt::GN_DIRECT_L_RAND;
+    m_algMap["GN_DIRECT_NOSCAL"]=nlopt::GN_DIRECT_NOSCAL;
+    m_algMap["GN_DIRECT_L_NOSCAL"]=nlopt::GN_DIRECT_L_NOSCAL;
+    m_algMap["GN_DIRECT_L_RAND_NOSCAL"]=nlopt::GN_DIRECT_L_RAND_NOSCAL;
+    m_algMap["GN_ORIG_DIRECT"]=nlopt::GN_ORIG_DIRECT;
+    m_algMap["GN_ORIG_DIRECT_L"]=nlopt::GN_ORIG_DIRECT_L;
+    m_algMap["GD_STOGO"]=nlopt::GD_STOGO;
+    m_algMap["GD_STOGO_RAND"]=nlopt::GD_STOGO_RAND;
+    m_algMap["LD_LBFGS_NOCEDAL"]=nlopt::LD_LBFGS_NOCEDAL;
+    m_algMap["LD_LBFGS"]=nlopt::LD_LBFGS;
+    m_algMap["LN_PRAXIS"]=nlopt::LN_PRAXIS;
+    m_algMap["LD_VAR1"]=nlopt::LD_VAR1;
+    m_algMap["LD_VAR2"]=nlopt::LD_VAR2;
+    m_algMap["LD_TNEWTON"]=nlopt::LD_TNEWTON;
+    m_algMap["LD_TNEWTON_RESTART"]=nlopt::LD_TNEWTON_RESTART;
+    m_algMap["LD_TNEWTON_PRECOND"]=nlopt::LD_TNEWTON_PRECOND;
+    m_algMap["LD_TNEWTON_PRECOND_RESTART"]=nlopt::LD_TNEWTON_PRECOND_RESTART;
+    m_algMap["GN_CRS2_LM"]=nlopt::GN_CRS2_LM;
+    m_algMap["GN_MLSL"]=nlopt::GN_MLSL;
+    m_algMap["GD_MLSL"]=nlopt::GD_MLSL;
+    m_algMap["GN_MLSL_LDS"]=nlopt::GN_MLSL_LDS;
+    m_algMap["GD_MLSL_LDS"]=nlopt::GD_MLSL_LDS;
+    m_algMap["LD_MMA"]=nlopt::LD_MMA;
+    m_algMap["LN_COBYLA"]=nlopt::LN_COBYLA;
+    m_algMap["LN_NEWUOA"]=nlopt::LN_NEWUOA;
+    m_algMap["LN_NEWUOA_BOUND"]=nlopt::LN_NEWUOA_BOUND;
+    m_algMap["LN_NELDERMEAD"]=nlopt::LN_NELDERMEAD;
+    m_algMap["LN_SBPLX"]=nlopt::LN_SBPLX;
+    m_algMap["LN_AUGLAG"]=nlopt::LN_AUGLAG;
+    m_algMap["LD_AUGLAG"]=nlopt::LD_AUGLAG;
+    m_algMap["LN_AUGLAG_EQ"]=nlopt::LN_AUGLAG_EQ;
+    m_algMap["LD_AUGLAG_EQ"]=nlopt::LD_AUGLAG_EQ;
+    m_algMap["LN_BOBYQA"]=nlopt::LN_BOBYQA;
+    m_algMap["GN_ISRES"]=nlopt::GN_ISRES;
+    m_algMap["AUGLAG"]=nlopt::AUGLAG;
+    m_algMap["AUGLAG_EQ"]=nlopt::AUGLAG_EQ;
+    m_algMap["G_MLSL"]=nlopt::G_MLSL;
+    m_algMap["G_MLSL_LDS"]=nlopt::G_MLSL_LDS;
+    m_algMap["LD_SLSQP "]=nlopt::LD_SLSQP;
+  }
+public:
+  OptFactory(){
+  };
+  ~OptFactory(){};
+  static nlopt::opt getOptimizer(const std::string& algorithmString, unsigned int npar){
+    std::map<std::string, nlopt::algorithm> m_algMap;
+    initMap(m_algMap);
+    switch(m_algMap[algorithmString]){
+    case(nlopt::GN_DIRECT):{
+      nlopt::opt theOptimizer(nlopt::GN_DIRECT,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_DIRECT_L):{
+      nlopt::opt theOptimizer(nlopt::GN_DIRECT_L,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_DIRECT_L_RAND):{
+      nlopt::opt theOptimizer(nlopt::GN_DIRECT_L_RAND,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_DIRECT_NOSCAL):{
+      nlopt::opt theOptimizer(nlopt::GN_DIRECT_NOSCAL,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_DIRECT_L_NOSCAL):{
+      nlopt::opt theOptimizer(nlopt::GN_DIRECT_L_NOSCAL,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_DIRECT_L_RAND_NOSCAL):{
+      nlopt::opt theOptimizer(nlopt::GN_DIRECT_L_RAND_NOSCAL,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_ORIG_DIRECT):{
+      nlopt::opt theOptimizer(nlopt::GN_ORIG_DIRECT,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_ORIG_DIRECT_L):{
+      nlopt::opt theOptimizer(nlopt::GN_ORIG_DIRECT_L,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_PRAXIS):{
+      nlopt::opt theOptimizer(nlopt::LN_PRAXIS,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_CRS2_LM):{
+      nlopt::opt theOptimizer(nlopt::GN_CRS2_LM,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_MLSL):{
+      nlopt::opt theOptimizer(nlopt::GN_MLSL,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_MLSL_LDS):{
+      nlopt::opt theOptimizer(nlopt::GN_MLSL_LDS,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_COBYLA):{
+      nlopt::opt theOptimizer(nlopt::LN_COBYLA,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_NEWUOA):{
+      nlopt::opt theOptimizer(nlopt::LN_NEWUOA,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_NEWUOA_BOUND):{
+      nlopt::opt theOptimizer(nlopt::LN_NEWUOA_BOUND,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_NELDERMEAD):{
+      nlopt::opt theOptimizer(nlopt::LN_NELDERMEAD,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_SBPLX):{
+      nlopt::opt theOptimizer(nlopt::LN_SBPLX,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_AUGLAG):{
+      nlopt::opt theOptimizer(nlopt::LN_AUGLAG,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_AUGLAG_EQ):{
+      nlopt::opt theOptimizer(nlopt::LN_AUGLAG_EQ,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::LN_BOBYQA):{
+      nlopt::opt theOptimizer(nlopt::LN_BOBYQA,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::GN_ISRES):{
+      nlopt::opt theOptimizer(nlopt::GN_ISRES,npar);
+      return(theOptimizer);
+      break;
+    }
+    case(nlopt::G_MLSL_LDS):
+    case(nlopt::AUGLAG):
+    case(nlopt::AUGLAG_EQ):
+    case(nlopt::G_MLSL):
+    case(nlopt::GD_MLSL):
+    case(nlopt::GD_MLSL_LDS):
+    case(nlopt::GD_STOGO):
+    case(nlopt::GD_STOGO_RAND):
+    case(nlopt::LD_LBFGS_NOCEDAL):
+    case(nlopt::LD_LBFGS):
+    case(nlopt::LD_VAR1):
+    case(nlopt::LD_VAR2):
+    case(nlopt::LD_TNEWTON):
+    case(nlopt::LD_TNEWTON_RESTART):
+    case(nlopt::LD_TNEWTON_PRECOND):
+    case(nlopt::LD_TNEWTON_PRECOND_RESTART):
+    case(nlopt::LD_MMA):
+    case(nlopt::LD_AUGLAG):
+    case(nlopt::LD_AUGLAG_EQ):
+    case(nlopt::LD_SLSQP):
+    default:{
+      std::string errorString="Error: derivative optimization algorithm ";
+      errorString+= algorithmString;
+      errorString+= " not supported, select derivative-free algorithm ([GL]N_*])";
+      throw(errorString);
+      break;
+    }
+    }
+  };
+};
+#endif /* _OPTFACTORY_H_ */
diff --git a/src/apps/pkopt_svm.cc b/src/apps/pkopt_svm.cc
new file mode 100644
index 0000000..517f321
--- /dev/null
+++ b/src/apps/pkopt_svm.cc
@@ -0,0 +1,530 @@
+/**********************************************************************
+pkopt_svm.cc: program to optimize parameters for SVM classification
+Copyright (C) 2008-2013 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools.  If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <vector>
+#include <math.h>
+#include <nlopt.hpp>
+#include "base/Optionpk.h"
+#include "base/Optionpk.h"
+#include "algorithms/ConfusionMatrix.h"
+#include "algorithms/FeatureSelector.h"
+#include "algorithms/OptFactory.h"
+#include "algorithms/svm.h"
+#include "pkclassify_nn.h"
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+                                    //declare objective function
+double objFunction(const std::vector<double> &x, std::vector<double> &grad, void *my_func_data);
+
+//global parameters used in objective function
+Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR,	4: nu-SVR)",0);
+Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
+Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
+Optionpk<float> coef0_opt("c0", "coef0", "coef0 in kernel function",0);
+Optionpk<float> nu_opt("nu", "nu", "the parameter nu of nu-SVC, one-class SVM, and nu-SVR",0.5);
+Optionpk<float> epsilon_loss_opt("eloss", "eloss", "the epsilon in loss function of epsilon-SVR",0.1);
+Optionpk<int> cache_opt("cache", "cache", "cache memory size in MB",100);
+Optionpk<float> epsilon_tol_opt("etol", "etol", "the tolerance of termination criterion",0.001);
+Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the shrinking heuristics",false);
+Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR model for probability estimates",false);
+// Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true);
+Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",2);
+Optionpk<short> verbose_opt("v", "verbose", "set to: 0 (results only), 1 (confusion matrix), 2 (debug)",0);
+
+double objFunction(const std::vector<double> &x, std::vector<double> &grad, void *my_func_data){
+  assert(grad.empty());
+  vector<Vector2d<float> > *tf=reinterpret_cast<vector<Vector2d<float> >*> (my_func_data);
+  float gamma=x[0];
+  float ccost=x[1];
+  double error=1.0/epsilon_tol_opt[0];
+  double kappa=1.0;
+  double oa=1.0;
+  //todo: calculate kappa using cross validation
+  unsigned short nclass=tf->size();
+  unsigned int ntraining=0;
+  for(int iclass=0;iclass<nclass;++iclass)
+    ntraining+=(*tf)[iclass].size();
+  unsigned short nFeatures=(*tf)[0][0].size();
+  struct svm_parameter param;
+  param.svm_type = svm_type_opt[0];
+  param.kernel_type = kernel_type_opt[0];
+  param.degree = kernel_degree_opt[0];
+  param.gamma = gamma;
+  param.coef0 = coef0_opt[0];
+  param.nu = nu_opt[0];
+  param.cache_size = cache_opt[0];
+  param.C = ccost;
+  param.eps = epsilon_tol_opt[0];
+  param.p = epsilon_loss_opt[0];
+  param.shrinking = (shrinking_opt[0])? 1 : 0;
+  param.probability = (prob_est_opt[0])? 1 : 0;
+  param.nr_weight = 0;//not used: I use priors and balancing
+  param.weight_label = NULL;
+  param.weight = NULL;
+  param.verbose=(verbose_opt[0]>2)? true:false;
+  struct svm_model* svm;
+  struct svm_problem prob;
+  struct svm_node* x_space;
+ prob.l=ntraining;
+  prob.y = Malloc(double,prob.l);
+  prob.x = Malloc(struct svm_node *,prob.l);
+  x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining);
+  unsigned long int spaceIndex=0;
+  int lIndex=0;
+  for(int iclass=0;iclass<nclass;++iclass){
+    for(int isample=0;isample<(*tf)[iclass].size();++isample){
+      prob.x[lIndex]=&(x_space[spaceIndex]);
+      for(int ifeature=0;ifeature<nFeatures;++ifeature){
+        x_space[spaceIndex].index=ifeature+1;
+        x_space[spaceIndex].value=(*tf)[iclass][isample][ifeature];
+        ++spaceIndex;
+      }
+      x_space[spaceIndex++].index=-1;
+      prob.y[lIndex]=iclass;
+      ++lIndex;
+    }
+  }
+
+  assert(lIndex==prob.l);
+  if(verbose_opt[0]>2)
+    std::cout << "checking parameters" << std::endl;
+  svm_check_parameter(&prob,&param);
+  if(verbose_opt[0]>2)
+    std::cout << "parameters ok, training" << std::endl;
+  svm=svm_train(&prob,&param);
+  if(verbose_opt[0]>2)
+    std::cout << "SVM is now trained" << std::endl;
+
+  ConfusionMatrix cm(nclass);
+  double *target = Malloc(double,prob.l);
+  svm_cross_validation(&prob,&param,cv_opt[0],target);
+  assert(param.svm_type != EPSILON_SVR&&param.svm_type != NU_SVR);//only for regression
+  int total_correct=0;
+  for(int i=0;i<prob.l;i++)
+    cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1);
+  assert(cm.nReference());
+  free(target);
+  free(prob.y);
+  free(prob.x);
+  free(x_space);
+  svm_free_and_destroy_model(&(svm));
+  if(verbose_opt[0]>2)
+    std::cout << cm << std::endl;
+  kappa=cm.kappa();
+  oa=cm.oa();
+  if(verbose_opt[0]>1){
+    std::cout << " --ccost " << x[0];
+    std::cout << " --gamma " << x[1];
+    std::cout << std::endl;
+    std::cout << "oa: " << oa << std::endl;
+    std::cout << "kappa: " << kappa << std::endl;
+  }
+  if(oa)
+    error=1.0/oa;
+  return(error);
+}
+
+int main(int argc, char *argv[])
+{
+  map<short,int> reclassMap;
+  vector<int> vreclass;
+  std::string versionString="version ";
+  versionString+=VERSION;
+  versionString+=", Copyright (C) 2008-2012 Pieter Kempeneers.\n\
+   This program comes with ABSOLUTELY NO WARRANTY; for details type use option -h.\n\
+   This is free software, and you are welcome to redistribute it\n\
+   under certain conditions; use option --license for details.";
+  Optionpk<bool> version_opt("\0","version",versionString,false);
+  Optionpk<bool> license_opt("lic","license","show license information",false);
+  Optionpk<bool> todo_opt("\0","todo","",false);
+  Optionpk<bool> help_opt("h","help","shows this help info",false);
+  Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option)."); 
+  Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label"); 
+  Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
+  Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
+  Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
+  Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0); 
+  Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0); 
+  Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
+  Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
+  Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
+  Optionpk<float> gamma_opt("g", "gamma", "min max boundaries for gamma in kernel function (optional: initial value)",0);
+  Optionpk<float> ccost_opt("cc", "ccost", "min and max boundaries the parameter C of C-SVC, epsilon-SVR, and nu-SVR (optional: initial value)",1);
+  Optionpk<unsigned int> maxit_opt("maxit","maxit","maximum number of iterations",500);
+  Optionpk<string> algorithm_opt("a", "algorithm", "optimization algorithm (see http://ab-initio.mit.edu/wiki/index.php/NLopt_Algorithms)","LN_COBYLA"); 
+  Optionpk<double> tolerance_opt("tol","tolerance","relative tolerance for stopping criterion",0.0001);
+
+  version_opt.retrieveOption(argc,argv);
+  license_opt.retrieveOption(argc,argv);
+  help_opt.retrieveOption(argc,argv);
+  todo_opt.retrieveOption(argc,argv);
+  training_opt.retrieveOption(argc,argv);
+  label_opt.retrieveOption(argc,argv);
+  reclass_opt.retrieveOption(argc,argv);
+  balance_opt.retrieveOption(argc,argv);
+  minSize_opt.retrieveOption(argc,argv);
+  start_opt.retrieveOption(argc,argv);
+  end_opt.retrieveOption(argc,argv);
+  band_opt.retrieveOption(argc,argv);
+  offset_opt.retrieveOption(argc,argv);
+  scale_opt.retrieveOption(argc,argv);
+  svm_type_opt.retrieveOption(argc,argv);
+  kernel_type_opt.retrieveOption(argc,argv);
+  kernel_degree_opt.retrieveOption(argc,argv);
+  gamma_opt.retrieveOption(argc,argv);
+  coef0_opt.retrieveOption(argc,argv);
+  ccost_opt.retrieveOption(argc,argv);
+  nu_opt.retrieveOption(argc,argv);
+  epsilon_loss_opt.retrieveOption(argc,argv);
+  cache_opt.retrieveOption(argc,argv);
+  epsilon_tol_opt.retrieveOption(argc,argv);
+  shrinking_opt.retrieveOption(argc,argv);
+  prob_est_opt.retrieveOption(argc,argv);
+  cv_opt.retrieveOption(argc,argv);
+  maxit_opt.retrieveOption(argc,argv);
+  tolerance_opt.retrieveOption(argc,argv);
+  algorithm_opt.retrieveOption(argc,argv);
+  verbose_opt.retrieveOption(argc,argv);
+
+  if(version_opt[0]||todo_opt[0]){
+    std::cout << version_opt.getHelp() << std::endl;
+    std::cout << "todo: " << todo_opt.getHelp() << std::endl;
+    exit(0);
+  }
+  if(license_opt[0]){
+    std::cout << Optionpk<bool>::getGPLv3License() << std::endl;
+    exit(0);
+  }
+  if(help_opt[0]){
+    std::cout << "usage: pkopt_svm -t training [OPTIONS]" << std::endl;
+    exit(0);
+  }
+
+
+  assert(training_opt[0].size());
+  if(verbose_opt[0]>=1)
+    std::cout << "training shape file: " << training_opt[0] << std::endl;
+
+  unsigned int totalSamples=0;
+  int nreclass=0;
+  vector<int> vcode;//unique class codes in recode string
+
+  unsigned short nclass=0;
+  int nband=0;
+  int startBand=2;//first two bands represent X and Y pos
+
+  vector<double> offset;
+  vector<double> scale;
+  vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+
+  if(reclass_opt.size()>1){
+    vreclass.resize(reclass_opt.size());
+    for(int iclass=0;iclass<reclass_opt.size();++iclass){
+      reclassMap[iclass]=reclass_opt[iclass];
+      vreclass[iclass]=reclass_opt[iclass];
+    }
+  }
+  // if(priors_opt.size()>1){//priors from argument list
+  //   priors.resize(priors_opt.size());
+  //   double normPrior=0;
+  //   for(int iclass=0;iclass<priors_opt.size();++iclass){
+  //     priors[iclass]=priors_opt[iclass];
+  //     normPrior+=priors[iclass];
+  //   }
+  //   //normalize
+  //   for(int iclass=0;iclass<priors_opt.size();++iclass)
+  //     priors[iclass]/=normPrior;
+  // }
+
+  //sort bands
+  if(band_opt.size())
+    std::sort(band_opt.begin(),band_opt.end());
+  //----------------------------------- Training -------------------------------
+  struct svm_problem prob;
+  vector<string> fields;
+  //organize training data
+  trainingPixels.clear();
+  map<int,Vector2d<float> > trainingMap;
+  if(verbose_opt[0]>=1)
+    std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+  try{
+    if(band_opt.size())
+      totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,band_opt,label_opt[0],verbose_opt[0]);
+    else
+      totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+    if(trainingMap.size()<2){
+      string errorstring="Error: could not read at least two classes from training file";
+      throw(errorstring);
+    }
+  }
+  catch(string error){
+    cerr << error << std::endl;
+    exit(1);
+  }
+  catch(...){
+    cerr << "error catched" << std::endl;
+    exit(1);
+  }
+  //delete class 0
+  if(verbose_opt[0]>=1)
+    std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+  totalSamples-=trainingMap[0].size();
+  trainingMap.erase(0);
+  //convert map to vector
+  short iclass=0;
+  if(reclass_opt.size()==1){//no reclass option, read classes from shape
+    reclassMap.clear();
+    vreclass.clear();
+  }
+  if(verbose_opt[0]>1)
+    std::cout << "training pixels: " << std::endl;
+  map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+  while(mapit!=trainingMap.end()){
+    //       for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
+    //delete small classes
+    if((mapit->second).size()<minSize_opt[0]){
+      trainingMap.erase(mapit);
+      continue;
+      //todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
+    }
+    if(reclass_opt.size()==1){//no reclass option, read classes from shape
+      reclassMap[iclass]=(mapit->first);
+      vreclass.push_back(mapit->first);
+    }
+    trainingPixels.push_back(mapit->second);
+    if(verbose_opt[0]>1)
+      std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+    ++iclass;
+    ++mapit;
+  }
+  nclass=trainingPixels.size();
+  nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
+  assert(reclassMap.size()==nclass);
+
+  //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
+  //balance training data
+  if(balance_opt[0]>0){
+    if(random)
+      srand(time(NULL));
+    totalSamples=0;
+    for(int iclass=0;iclass<nclass;++iclass){
+      if(trainingPixels[iclass].size()>balance_opt[0]){
+        while(trainingPixels[iclass].size()>balance_opt[0]){
+          int index=rand()%trainingPixels[iclass].size();
+          trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
+        }
+      }
+      else{
+        int oldsize=trainingPixels[iclass].size();
+        for(int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
+          int index = rand()%oldsize;
+          trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
+        }
+      }
+      totalSamples+=trainingPixels[iclass].size();
+    }
+    assert(totalSamples==nclass*balance_opt[0]);
+  }
+    
+  //set scale and offset
+  offset.resize(nband);
+  scale.resize(nband);
+  if(offset_opt.size()>1)
+    assert(offset_opt.size()==nband);
+  if(scale_opt.size()>1)
+    assert(scale_opt.size()==nband);
+  Histogram hist;
+  for(int iband=0;iband<nband;++iband){
+    if(verbose_opt[0]>1)
+      std::cout << "scaling for band" << iband << std::endl;
+    offset[iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
+    scale[iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
+    //search for min and maximum
+    if(scale[iband]<=0){
+      float theMin=trainingPixels[0][0][iband+startBand];
+      float theMax=trainingPixels[0][0][iband+startBand];
+      for(int iclass=0;iclass<nclass;++iclass){
+        for(int isample=0;isample<trainingPixels[iclass].size();++isample){
+          if(theMin>trainingPixels[iclass][isample][iband+startBand])
+            theMin=trainingPixels[iclass][isample][iband+startBand];
+          if(theMax<trainingPixels[iclass][isample][iband+startBand])
+            theMax=trainingPixels[iclass][isample][iband+startBand];
+        }
+      }
+      offset[iband]=theMin+(theMax-theMin)/2.0;
+      scale[iband]=(theMax-theMin)/2.0;
+      if(verbose_opt[0]>1){
+        std::cout << "Extreme image values for band " << iband << ": [" << theMin << "," << theMax << "]" << std::endl;
+        std::cout << "Using offset, scale: " << offset[iband] << ", " << scale[iband] << std::endl;
+        std::cout << "scaled values for band " << iband << ": [" << (theMin-offset[iband])/scale[iband] << "," << (theMax-offset[iband])/scale[iband] << "]" << std::endl;
+      }
+    }
+  }
+
+  //recode vreclass to ordered vector, starting from 0 to nreclass
+  vcode.clear();
+  if(verbose_opt[0]>=1){
+    std::cout << "before recoding: " << std::endl;
+    for(int iclass = 0; iclass < vreclass.size(); iclass++)
+      std::cout << " " << vreclass[iclass];
+    std::cout << std::endl; 
+  }
+  vector<int> vord=vreclass;//ordered vector, starting from 0 to nreclass
+  map<short,int> mreclass;
+  for(int ic=0;ic<vreclass.size();++ic){
+    if(mreclass.find(vreclass[ic])==mreclass.end())
+      mreclass[vreclass[ic]]=iclass++;
+  }
+  for(int ic=0;ic<vreclass.size();++ic)
+    vord[ic]=mreclass[vreclass[ic]];
+  //construct uniqe class codes
+  while(!vreclass.empty()){
+    vcode.push_back(*(vreclass.begin()));
+    //delete all these entries from vreclass
+    vector<int>::iterator vit;
+    while((vit=find(vreclass.begin(),vreclass.end(),vcode.back()))!=vreclass.end())
+      vreclass.erase(vit);
+  }
+  if(verbose_opt[0]>=1){
+    std::cout << "recode values: " << std::endl;
+    for(int icode=0;icode<vcode.size();++icode)
+      std::cout << vcode[icode] << " ";
+    std::cout << std::endl;
+  }
+  vreclass=vord;
+  if(verbose_opt[0]>=1){
+    std::cout << "after recoding: " << std::endl;
+    for(int iclass = 0; iclass < vord.size(); iclass++)
+      std::cout << " " << vord[iclass];
+    std::cout << std::endl; 
+  }
+      
+  vector<int> vuniqueclass=vreclass;
+  //remove duplicate elements from vuniqueclass
+  sort( vuniqueclass.begin(), vuniqueclass.end() );
+  vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
+  nreclass=vuniqueclass.size();
+  if(verbose_opt[0]>=1){
+    std::cout << "unique classes: " << std::endl;
+    for(int iclass = 0; iclass < vuniqueclass.size(); iclass++)
+      std::cout << " " << vuniqueclass[iclass];
+    std::cout << std::endl; 
+    std::cout << "number of reclasses: " << nreclass << std::endl;
+  }
+    
+  // if(priors_opt.size()==1){//default: equal priors for each class
+  //   priors.resize(nclass);
+  //   for(int iclass=0;iclass<nclass;++iclass)
+  //     priors[iclass]=1.0/nclass;
+  // }
+  // assert(priors_opt.size()==1||priors_opt.size()==nclass);
+    
+  if(verbose_opt[0]>=1){
+    std::cout << "number of bands: " << nband << std::endl;
+    std::cout << "number of classes: " << nclass << std::endl;
+    // std::cout << "priors:";
+    // for(int iclass=0;iclass<nclass;++iclass)
+    //   std::cout << " " << priors[iclass];
+    // std::cout << std::endl;
+  }
+
+  //Calculate features of trainig set
+  vector< Vector2d<float> > trainingFeatures(nclass);
+  for(int iclass=0;iclass<nclass;++iclass){
+    int nctraining=0;
+    if(verbose_opt[0]>=1)
+      std::cout << "calculating features for class " << iclass << std::endl;
+    if(random)
+      srand(time(NULL));
+    nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+    if(verbose_opt[0]>=1)
+      std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
+    int index=0;
+      
+    trainingFeatures[iclass].resize(nctraining);
+    for(int isample=0;isample<nctraining;++isample){
+      //scale pixel values according to scale and offset!!!
+      for(int iband=0;iband<nband;++iband){
+        assert(trainingPixels[iclass].size()>isample);
+        assert(trainingPixels[iclass][isample].size()>iband+startBand);
+        assert(offset.size()>iband);
+        assert(scale.size()>iband);
+        float value=trainingPixels[iclass][isample][iband+startBand];
+        trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+      }
+    }
+    assert(trainingFeatures[iclass].size()==nctraining);
+  }
+    
+  unsigned int ntraining=0;
+  for(int iclass=0;iclass<nclass;++iclass){
+    if(verbose_opt[0]>1)
+      std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
+    ntraining+=trainingFeatures[iclass].size();
+  }
+
+  assert(ccost_opt.size()>1);//must have boundaries at least (initial value is optional)
+  if(ccost_opt.size()<3)//create initial value
+    ccost_opt.push_back(0.5*(ccost_opt[0]+ccost_opt[1]));
+  assert(gamma_opt.size()>1);//must have boundaries at least (initial value is optional)
+  if(gamma_opt.size()<3)//create initial value
+    gamma_opt.push_back(0);//will be translated to 1.0/nFeatures
+  assert(ccost_opt.size()==3);//min, init, max
+  assert(gamma_opt.size()==3);//min, init, max
+  nlopt::opt optimizer=OptFactory::getOptimizer(algorithm_opt[0],2);
+  if(verbose_opt[0]>1)
+    std::cout << "optimization algorithm: " << optimizer.get_algorithm_name() << "..." << std::endl;
+  std::vector<double> lb(2);
+  std::vector<double> init(2);
+  std::vector<double> ub(2);
+  lb[0]=ccost_opt[0];
+  lb[1]=(gamma_opt[0]>0)? gamma_opt[0] : 1.0/trainingFeatures[0][0].size();
+  init[0]=ccost_opt[2];
+  init[1]=(gamma_opt[2]>0)? gamma_opt[1] : 1.0/trainingFeatures[0][0].size();
+  ub[0]=ccost_opt[1];
+  ub[1]=(gamma_opt[1]>0)? gamma_opt[1] : 1.0/trainingFeatures[0][0].size();
+  optimizer.set_min_objective(objFunction, &trainingFeatures);
+  optimizer.set_lower_bounds(lb);
+  optimizer.set_upper_bounds(ub);
+
+  if(verbose_opt[0]>1)
+    std::cout << "set stopping criteria" << std::endl;
+  //set stopping criteria
+  if(maxit_opt[0])
+    optimizer.set_maxeval(maxit_opt[0]);
+  else
+    optimizer.set_xtol_rel(tolerance_opt[0]);
+  double minf=0;
+  std::vector<double> x=init;
+  optimizer.optimize(x, minf);
+  double ccost=x[0];
+  double gamma=x[1];
+  if(verbose_opt[0])
+    std::cout << "optimized with " << optimizer.get_algorithm_name() << "..." << std::endl;
+  std::cout << " --ccost " << x[0];
+  std::cout << " --gamma " << x[1];
+  std::cout << std::endl;
+}      

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git