[pktools] 91/375: FeatureSelector.h: introduced epsilon to exclude rat race in sffs

Wed Dec 3 21:54:03 UTC 2014

This is an automated email from the git hooks/post-receive script.

sebastic-guest pushed a commit to branch upstream-master
in repository pktools.

commit cd9c58179294f0215807efe33a3cc47cb0e81205
Author: user <user at osgeolive.(none)>
Date:   Sun Apr 21 22:35:45 2013 +0200

    FeatureSelector.h: introduced epsilon to exclude rat race in sffs
---
 src/algorithms/FeatureSelector.h |   3 +-
 src/apps/pkclassify_svm.cc       |  14 +--
 src/apps/pkfs_svm.cc             | 189 +++++++++++++++++++++++-------
 src/apps/pkopt_svm.cc            | 242 +++++++++++++++++++++++----------------
 4 files changed, 296 insertions(+), 152 deletions(-)

diff --git a/src/algorithms/FeatureSelector.h b/src/algorithms/FeatureSelector.h
index fcd3f15..51f5f6e 100644
--- a/src/algorithms/FeatureSelector.h
+++ b/src/algorithms/FeatureSelector.h
@@ -152,6 +152,7 @@ template<class T> double FeatureSelector::backward(vector< Vector2d<T> >& v, dou
 
 //floating search
 template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, short verbose){
+  double epsilon=0.001;
   vector<T> cost;
   int maxLevels=v[0][0].size();
   if(maxFeatures<1)
@@ -183,7 +184,7 @@ template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, dou
     while(k>1){
       int x_r;
       double cost_r=removeFeature(v,*getCost,subset,x_r,verbose);
-      if(cost_r>cost[k-1]){
+      if(cost_r>cost[k-1]+epsilon){
 	--k;
 	cost[k]=cost_r;
 	cost.pop_back();
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 6f4c36c..b4f360f 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -182,18 +182,16 @@ int main(int argc, char *argv[])
     exit(0);//help was invoked, stop processing
   }
 
+  assert(training_opt.size());
+
   if(verbose_opt[0]>=1){
     if(input_opt.size())
-      std::cout << "image filename: " << input_opt[0] << std::endl;
+      std::cout << "input filename: " << input_opt[0] << std::endl;
     if(mask_opt.size())
       std::cout << "mask filename: " << mask_opt[0] << std::endl;
-    if(training_opt[0].size()){
-      std::cout << "training shape file: " << std::endl;
-      for(int ifile=0;ifile<training_opt.size();++ifile)
-        std::cout << training_opt[ifile] << std::endl;
-    }
-    else
-      cerr << "no training file set!" << std::endl;
+    std::cout << "training shape file: " << std::endl;
+    for(int ifile=0;ifile<training_opt.size();++ifile)
+      std::cout << training_opt[ifile] << std::endl;
     std::cout << "verbose: " << verbose_opt[0] << std::endl;
   }
   unsigned short nbag=(training_opt.size()>1)?training_opt.size():bag_opt[0];
diff --git a/src/apps/pkfs_svm.cc b/src/apps/pkfs_svm.cc
index d4d125c..37f531e 100644
--- a/src/apps/pkfs_svm.cc
+++ b/src/apps/pkfs_svm.cc
@@ -34,6 +34,8 @@ enum SelectorValue  { NA=0, SFFS=1, SFS=2, SBS=3, BFS=4 };
 //global parameters used in cost function getCost
 map<string,short> classValueMap;
 vector<std::string> nameVector;
+vector<unsigned int> nctraining;
+vector<unsigned int> nctest;
 Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR,	4: nu-SVR)",0);
 Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*(u-v)^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
 Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
@@ -57,8 +59,16 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
 {
   unsigned short nclass=trainingFeatures.size();
   unsigned int ntraining=0;
-  for(int iclass=0;iclass<nclass;++iclass)
-    ntraining+=trainingFeatures[iclass].size();
+  unsigned int ntest=0;
+  for(int iclass=0;iclass<nclass;++iclass){
+    // ntraining+=trainingFeatures[iclass].size();
+    ntraining+=nctraining[iclass];
+    ntest+=nctest[iclass];
+  }
+  if(ntest)
+    assert(!cv_opt[0]);
+  if(!cv_opt[0])
+    assert(ntest);
   unsigned short nFeatures=trainingFeatures[0][0].size();
 
   struct svm_parameter param;
@@ -89,7 +99,8 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
   unsigned long int spaceIndex=0;
   int lIndex=0;
   for(int iclass=0;iclass<nclass;++iclass){
-    for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
+    // for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
+    for(int isample=0;isample<nctraining[iclass];++isample){
       prob.x[lIndex]=&(x_space[spaceIndex]);
       for(int ifeature=0;ifeature<nFeatures;++ifeature){
         x_space[spaceIndex].index=ifeature+1;
@@ -115,6 +126,8 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
   if(cv_opt[0]>0){
     //todo: distinct between independent test input and cross validation
   }
+  else{
+  }
   ConfusionMatrix cm;
   //set names in confusion matrix using nameVector
   for(int iname=0;iname<nameVector.size();++iname){
@@ -123,19 +136,46 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
     else if(cm.getClassIndex(type2string<short>(classValueMap[nameVector[iname]]))<0)
       cm.pushBackClassName(type2string<short>(classValueMap[nameVector[iname]]));
   }
-
-  double *target = Malloc(double,prob.l);
-  svm_cross_validation(&prob,&param,cv_opt[0],target);
-  assert(param.svm_type != EPSILON_SVR&&param.svm_type != NU_SVR);//only for regression
-  int total_correct=0;
-  for(int i=0;i<prob.l;i++){
-    string refClassName=nameVector[prob.y[i]];
-    string className=nameVector[target[i]];
-    if(classValueMap.size())
-      cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
-    else
-      cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+  if(cv_opt[0]>0){
+    double *target = Malloc(double,prob.l);
+    svm_cross_validation(&prob,&param,cv_opt[0],target);
+    assert(param.svm_type != EPSILON_SVR&&param.svm_type != NU_SVR);//only for regression
+    int total_correct=0;
+    for(int i=0;i<prob.l;i++){
+      string refClassName=nameVector[prob.y[i]];
+      string className=nameVector[target[i]];
+      if(classValueMap.size())
+	cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+      else
+	cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+    }
+    free(target);
   }
+  else{
+    struct svm_node *x_test;
+    x_test = Malloc(struct svm_node,(nFeatures+1));
+    for(int iclass=0;iclass<nclass;++iclass){
+      for(int isample=0;isample<nctest[iclass];++isample){
+	for(int ifeature=0;ifeature<nFeatures;++ifeature){
+	  x_test[ifeature].index=ifeature+1;
+	  x_test[ifeature].value=trainingFeatures[iclass][nctraining[iclass]+isample][ifeature];
+	}
+	x_test[nFeatures].index=-1;
+	double predict_label=0;
+	//todo: make distinction between svm_predict and svm_predict_probability?
+	predict_label = svm_predict(svm,x_test);
+	string refClassName=nameVector[iclass];
+	string className=nameVector[static_cast<short>(predict_label)];
+	if(classValueMap.size())
+	  cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+	else
+	  cm.incrementResult(refClassName,className,1.0);
+      }
+    }
+    free(x_test);
+  }
+  if(verbose_opt[0]>1)
+    std::cout << cm << std::endl;
   assert(cm.nReference());
   // if(verbose_opt[0])
   //   std::cout << cm << std::endl;
@@ -149,7 +189,6 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
   // not free the memory used by svm_problem if you are still using the
   // svm_model produced by svm_train(). 
   // however, we will re-train the svm later on after the feature selection
-  free(target);
   free(prob.y);
   free(prob.x);
   free(x_space);
@@ -162,6 +201,7 @@ int main(int argc, char *argv[])
   // vector<double> priors;
   
   //--------------------------- command line options ------------------------------------
+  Optionpk<string> input_opt("i", "input", "input image"); 
   Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option)."); 
   Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label"); 
   Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0);
@@ -178,7 +218,8 @@ int main(int argc, char *argv[])
 
   bool doProcess;//stop process when program was invoked with help option (-h --help)
   try{
-    doProcess=training_opt.retrieveOption(argc,argv);
+    doProcess=input_opt.retrieveOption(argc,argv);
+    training_opt.retrieveOption(argc,argv);
     maxFeatures_opt.retrieveOption(argc,argv);
     label_opt.retrieveOption(argc,argv);
     // reclass_opt.retrieveOption(argc,argv);
@@ -218,6 +259,19 @@ int main(int argc, char *argv[])
     exit(0);//help was invoked, stop processing
   }
 
+  assert(training_opt.size());
+  if(input_opt.size())
+    cv_opt[0]=0;
+
+  if(verbose_opt[0]>=1){
+    if(input_opt.size())
+      std::cout << "input filename: " << input_opt[0] << std::endl;
+    std::cout << "training shape file: " << std::endl;
+    for(int ifile=0;ifile<training_opt.size();++ifile)
+      std::cout << training_opt[ifile] << std::endl;
+    std::cout << "verbose: " << verbose_opt[0] << std::endl;
+  }
+
   static std::map<std::string, SelectorValue> selMap;
   //initialize selMap
   selMap["sffs"]=SFFS;
@@ -225,13 +279,8 @@ int main(int argc, char *argv[])
   selMap["sbs"]=SBS;
   selMap["bfs"]=BFS;
 
-  assert(training_opt[0].size());
-  if(verbose_opt[0]>=1)
-    std::cout << "training shape file: " << training_opt[0] << std::endl;
-
   unsigned int totalSamples=0;
-  // int nreclass=0;
-  // vector<int> vcode;//unique class codes in recode string
+  unsigned int totalTestSamples=0;
 
   unsigned short nclass=0;
   int nband=0;
@@ -240,6 +289,7 @@ int main(int argc, char *argv[])
   vector<double> offset;
   vector<double> scale;
   vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+  vector< Vector2d<float> > testPixels;//[class][sample][band]
 
   // if(priors_opt.size()>1){//priors from argument list
   //   priors.resize(priors_opt.size());
@@ -269,16 +319,28 @@ int main(int argc, char *argv[])
   vector<string> fields;
   //organize training data
   trainingPixels.clear();
+  testPixels.clear();
   map<string,Vector2d<float> > trainingMap;
+  map<string,Vector2d<float> > testMap;
   if(verbose_opt[0]>=1)
-    std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+    std::cout << "reading training file " << training_opt[0] << std::endl;
   try{
-    if(band_opt.size())
+    if(band_opt.size()){
       totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,band_opt,label_opt[0],verbose_opt[0]);
-    else
+      if(input_opt.size())
+	totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,band_opt,label_opt[0],verbose_opt[0]);
+    }
+    else{
       totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+      if(input_opt.size())
+	totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+    }
     if(trainingMap.size()<2){
-      string errorstring="Error: could not read at least two classes from training file";
+      string errorstring="Error: could not read at least two classes from training input file";
+      throw(errorstring);
+    }
+    if(input_opt.size()&&testMap.size()<2){
+      string errorstring="Error: could not read at least two classes from test input file";
       throw(errorstring);
     }
   }
@@ -327,7 +389,30 @@ int main(int argc, char *argv[])
   if(classname_opt.size())
     assert(nclass==classname_opt.size());
   nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
-  // assert(reclassMap.size()==nclass);
+
+  mapit=testMap.begin();
+  while(mapit!=testMap.end()){
+    if(classValueMap.size()){
+      //check if name in test is covered by classname_opt (values can not be 0)
+      if(classValueMap[mapit->first]>0){
+	;//ok, no need to print to std::cout 
+      }
+      else{
+	std::cerr << "Error: names in classname option are not complete, please check names in test vector and make sure classvalue is > 0" << std::endl;
+	exit(1);
+      }
+    }    
+    //no need to delete small classes for test sample
+    testPixels.push_back(mapit->second);
+    if(verbose_opt[0]>1)
+      std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+    ++mapit;
+  }
+  if(input_opt.size()){
+    assert(nclass==testPixels.size());
+    assert(nband=testPixels[0][0].size()-2);//X and Y//testPixels[0][0].size();
+    assert(!cv_opt[0]);
+  }
 
   //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
   //balance training data
@@ -414,21 +499,27 @@ int main(int argc, char *argv[])
   // }
 
 
-  //Calculate features of trainig set
+  //Calculate features of training (and test) set
+  nctraining.resize(nclass);
+  nctest.resize(nclass);
   vector< Vector2d<float> > trainingFeatures(nclass);
   for(int iclass=0;iclass<nclass;++iclass){
-    int nctraining=0;
     if(verbose_opt[0]>=1)
       std::cout << "calculating features for class " << iclass << std::endl;
-    if(random)
-      srand(time(NULL));
-    nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+    nctraining[iclass]=trainingPixels[iclass].size();
     if(verbose_opt[0]>=1)
-      std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
-    int index=0;
-      
-    trainingFeatures[iclass].resize(nctraining);
-    for(int isample=0;isample<nctraining;++isample){
+      std::cout << "nctraining[" << iclass << "]: " << nctraining[iclass] << std::endl;
+    if(testPixels.size()>iclass){
+      nctest[iclass]=testPixels[iclass].size();
+      if(verbose_opt[0]>=1){
+	std::cout << "nctest[" << iclass << "]: " << nctest[iclass] << std::endl;
+      }
+    }
+    else
+      nctest[iclass]=0;
+    // trainingFeatures[iclass].resize(nctraining);
+    trainingFeatures[iclass].resize(nctraining[iclass]+nctest[iclass]);
+    for(int isample=0;isample<nctraining[iclass];++isample){
       //scale pixel values according to scale and offset!!!
       for(int iband=0;iband<nband;++iband){
         assert(trainingPixels[iclass].size()>isample);
@@ -439,13 +530,22 @@ int main(int argc, char *argv[])
         trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
       }
     }
-    assert(trainingFeatures[iclass].size()==nctraining);
+    // assert(trainingFeatures[iclass].size()==nctraining[iclass]);
+    for(int isample=0;isample<nctest[iclass];++isample){
+      //scale pixel values according to scale and offset!!!
+      for(int iband=0;iband<nband;++iband){
+        assert(testPixels[iclass].size()>isample);
+        assert(testPixels[iclass][isample].size()>iband+startBand);
+        assert(offset.size()>iband);
+        assert(scale.size()>iband);
+        float value=testPixels[iclass][isample][iband+startBand];
+        // testFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+        trainingFeatures[iclass][nctraining[iclass]+isample].push_back((value-offset[iband])/scale[iband]);
+      }
+    }
+    assert(trainingFeatures[iclass].size()==nctraining[iclass]+nctest[iclass]);
   }
     
-  unsigned int ntraining=0;
-  for(int iclass=0;iclass<nclass;++iclass)
-    ntraining+=trainingFeatures[iclass].size();
-
   int nFeatures=trainingFeatures[0][0].size();
   int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : 1;
   double previousCost=-1;
@@ -465,7 +565,8 @@ int main(int argc, char *argv[])
         switch(selMap[selector_opt[0]]){
         case(SFFS):
           subset.clear();//needed to clear in case of floating and brute force search
-          cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
+	  //test
+          cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,2);//verbose_opt[0]);
           break;
         case(SFS):
           cost=selector.forward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
diff --git a/src/apps/pkopt_svm.cc b/src/apps/pkopt_svm.cc
index a6b95c0..121b7a9 100644
--- a/src/apps/pkopt_svm.cc
+++ b/src/apps/pkopt_svm.cc
@@ -38,6 +38,8 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
 //global parameters used in objective function
 map<string,short> classValueMap;
 vector<std::string> nameVector;
+vector<unsigned int> nctraining;
+vector<unsigned int> nctest;
 Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR,	4: nu-SVR)",0);
 Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*(u-v)^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
 Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
@@ -66,8 +68,16 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
   //todo: calculate kappa using cross validation
   unsigned short nclass=tf->size();
   unsigned int ntraining=0;
-  for(int iclass=0;iclass<nclass;++iclass)
-    ntraining+=(*tf)[iclass].size();
+  unsigned int ntest=0;
+  for(int iclass=0;iclass<nclass;++iclass){
+    ntraining+=nctraining[iclass];
+    ntest+=nctest[iclass];
+  }
+  if(ntest)
+    assert(!cv_opt[0]);
+  if(!cv_opt[0])
+    assert(ntest);
+    // ntraining+=(*tf)[iclass].size();
   unsigned short nFeatures=(*tf)[0][0].size();
   struct svm_parameter param;
   param.svm_type = svm_type_opt[0];
@@ -96,7 +106,8 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
   unsigned long int spaceIndex=0;
   int lIndex=0;
   for(int iclass=0;iclass<nclass;++iclass){
-    for(int isample=0;isample<(*tf)[iclass].size();++isample){
+    // for(int isample=0;isample<(*tf)[iclass].size();++isample){
+    for(int isample=0;isample<nctraining[iclass];++isample){
       prob.x[lIndex]=&(x_space[spaceIndex]);
       for(int ifeature=0;ifeature<nFeatures;++ifeature){
         x_space[spaceIndex].index=ifeature+1;
@@ -119,9 +130,6 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
   if(verbose_opt[0]>2)
     std::cout << "SVM is now trained" << std::endl;
 
-  if(cv_opt[0]>0){
-    //todo: distinct between independent test input and cross validation
-  }
   ConfusionMatrix cm;
   //set names in confusion matrix using nameVector
   for(int iname=0;iname<nameVector.size();++iname){
@@ -130,23 +138,46 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
     else if(cm.getClassIndex(type2string<short>(classValueMap[nameVector[iname]]))<0)
       cm.pushBackClassName(type2string<short>(classValueMap[nameVector[iname]]));
   }
-
-  double *target = Malloc(double,prob.l);
-  svm_cross_validation(&prob,&param,cv_opt[0],target);
-  assert(param.svm_type != EPSILON_SVR&&param.svm_type != NU_SVR);//only for regression
-  int total_correct=0;
-  for(int i=0;i<prob.l;i++){
-    string refClassName=nameVector[prob.y[i]];
-    string className=nameVector[target[i]];
-    if(classValueMap.size())
-      cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
-    else
-      cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+  if(cv_opt[0]>0){
+    double *target = Malloc(double,prob.l);
+    svm_cross_validation(&prob,&param,cv_opt[0],target);
+    assert(param.svm_type != EPSILON_SVR&&param.svm_type != NU_SVR);//only for regression
+    for(int i=0;i<prob.l;i++){
+      string refClassName=nameVector[prob.y[i]];
+      string className=nameVector[target[i]];
+      if(classValueMap.size())
+	cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+      else
+	cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+    }
+    free(target);
   }
-  if(verbose_opt[0])
+  else{
+    struct svm_node *x_test;
+    x_test = Malloc(struct svm_node,(nFeatures+1));
+    for(int iclass=0;iclass<nclass;++iclass){
+      for(int isample=0;isample<nctest[iclass];++isample){
+	for(int ifeature=0;ifeature<nFeatures;++ifeature){
+	  x_test[ifeature].index=ifeature+1;
+	  x_test[ifeature].value=(*tf)[iclass][nctraining[iclass]+isample][ifeature];
+	}
+	x_test[nFeatures].index=-1;
+	double predict_label=0;
+	//todo: make distinction between svm_predict and svm_predict_probability?
+	predict_label = svm_predict(svm,x_test);
+	string refClassName=nameVector[iclass];
+	string className=nameVector[static_cast<short>(predict_label)];
+	if(classValueMap.size())
+	  cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+	else
+	  cm.incrementResult(refClassName,className,1.0);
+      }
+    }
+    free(x_test);
+  }
+  if(verbose_opt[0]>1)
     std::cout << cm << std::endl;
   assert(cm.nReference());
-  free(target);
   free(prob.y);
   free(prob.x);
   free(x_space);
@@ -172,6 +203,7 @@ int main(int argc, char *argv[])
 {
   map<short,int> reclassMap;
   vector<int> vreclass;
+  Optionpk<string> input_opt("i", "input", "input image"); 
   Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option)."); 
   Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label"); 
   // Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
@@ -190,7 +222,8 @@ int main(int argc, char *argv[])
 
   bool doProcess;//stop process when program was invoked with help option (-h --help)
   try{
-    doProcess=training_opt.retrieveOption(argc,argv);
+    doProcess=input_opt.retrieveOption(argc,argv);
+    training_opt.retrieveOption(argc,argv);
     label_opt.retrieveOption(argc,argv);
     // reclass_opt.retrieveOption(argc,argv);
     balance_opt.retrieveOption(argc,argv);
@@ -230,13 +263,21 @@ int main(int argc, char *argv[])
     exit(0);//help was invoked, stop processing
   }
 
-  assert(training_opt[0].size());
-  if(verbose_opt[0]>=1)
-    std::cout << "training shape file: " << training_opt[0] << std::endl;
+  assert(training_opt.size());
+  if(input_opt.size())
+    cv_opt[0]=0;
+
+  if(verbose_opt[0]>=1){
+    if(input_opt.size())
+      std::cout << "input filename: " << input_opt[0] << std::endl;
+    std::cout << "training shape file: " << std::endl;
+    for(int ifile=0;ifile<training_opt.size();++ifile)
+      std::cout << training_opt[ifile] << std::endl;
+    std::cout << "verbose: " << verbose_opt[0] << std::endl;
+  }
 
   unsigned int totalSamples=0;
-  // int nreclass=0;
-  // vector<int> vcode;//unique class codes in recode string
+  unsigned int totalTestSamples=0;
 
   unsigned short nclass=0;
   int nband=0;
@@ -245,6 +286,7 @@ int main(int argc, char *argv[])
   vector<double> offset;
   vector<double> scale;
   vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+  vector< Vector2d<float> > testPixels;//[class][sample][band]
 
   // if(priors_opt.size()>1){//priors from argument list
   //   priors.resize(priors_opt.size());
@@ -274,16 +316,28 @@ int main(int argc, char *argv[])
   vector<string> fields;
   //organize training data
   trainingPixels.clear();
+  testPixels.clear();
   map<string,Vector2d<float> > trainingMap;
+  map<string,Vector2d<float> > testMap;
   if(verbose_opt[0]>=1)
-    std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+    std::cout << "reading training file " << training_opt[0] << std::endl;
   try{
-    if(band_opt.size())
+    if(band_opt.size()){
       totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,band_opt,label_opt[0],verbose_opt[0]);
-    else
+      if(input_opt.size())
+	totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,band_opt,label_opt[0],verbose_opt[0]);
+    }
+    else{
       totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+      if(input_opt.size())
+	totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+    }
     if(trainingMap.size()<2){
-      string errorstring="Error: could not read at least two classes from training file";
+      string errorstring="Error: could not read at least two classes from training input file";
+      throw(errorstring);
+    }
+    if(input_opt.size()&&testMap.size()<2){
+      string errorstring="Error: could not read at least two classes from test input file";
       throw(errorstring);
     }
   }
@@ -303,7 +357,8 @@ int main(int argc, char *argv[])
 
   if(verbose_opt[0]>1)
     std::cout << "training pixels: " << std::endl;
-  map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+  map<string,Vector2d<float> >::iterator mapit;
+  mapit=trainingMap.begin();
   while(mapit!=trainingMap.end()){
     if(classValueMap.size()){
       //check if name in training is covered by classname_opt (values can not be 0)
@@ -334,6 +389,29 @@ int main(int argc, char *argv[])
     assert(nclass==classname_opt.size());
   nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
 
+  mapit=testMap.begin();
+  while(mapit!=testMap.end()){
+    if(classValueMap.size()){
+      //check if name in test is covered by classname_opt (values can not be 0)
+      if(classValueMap[mapit->first]>0){
+	;//ok, no need to print to std::cout 
+      }
+      else{
+	std::cerr << "Error: names in classname option are not complete, please check names in test vector and make sure classvalue is > 0" << std::endl;
+	exit(1);
+      }
+    }    
+    //no need to delete small classes for test sample
+    testPixels.push_back(mapit->second);
+    if(verbose_opt[0]>1)
+      std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+    ++mapit;
+  }
+  if(input_opt.size()){
+    assert(nclass==testPixels.size());
+    assert(nband=testPixels[0][0].size()-2);//X and Y//testPixels[0][0].size();
+    assert(!cv_opt[0]);
+  }
 
   //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
   //balance training data
@@ -359,7 +437,8 @@ int main(int argc, char *argv[])
     }
     assert(totalSamples==nclass*balance_opt[0]);
   }
-    
+
+  //no need to balance test sample    
   //set scale and offset
   offset.resize(nband);
   scale.resize(nband);
@@ -394,57 +473,6 @@ int main(int argc, char *argv[])
     }
   }
 
-  //recode vreclass to ordered vector, starting from 0 to nreclass
-  // vcode.clear();
-  // if(verbose_opt[0]>=1){
-  //   std::cout << "before recoding: " << std::endl;
-  //   for(int iclass = 0; iclass < vreclass.size(); iclass++)
-  //     std::cout << " " << vreclass[iclass];
-  //   std::cout << std::endl; 
-  // }
-  // vector<int> vord=vreclass;//ordered vector, starting from 0 to nreclass
-  // map<short,int> mreclass;
-  // for(int ic=0;ic<vreclass.size();++ic){
-  //   if(mreclass.find(vreclass[ic])==mreclass.end())
-  //     mreclass[vreclass[ic]]=iclass++;
-  // }
-  // for(int ic=0;ic<vreclass.size();++ic)
-  //   vord[ic]=mreclass[vreclass[ic]];
-  // //construct uniqe class codes
-  // while(!vreclass.empty()){
-  //   vcode.push_back(*(vreclass.begin()));
-  //   //delete all these entries from vreclass
-  //   vector<int>::iterator vit;
-  //   while((vit=find(vreclass.begin(),vreclass.end(),vcode.back()))!=vreclass.end())
-  //     vreclass.erase(vit);
-  // }
-  // if(verbose_opt[0]>=1){
-  //   std::cout << "recode values: " << std::endl;
-  //   for(int icode=0;icode<vcode.size();++icode)
-  //     std::cout << vcode[icode] << " ";
-  //   std::cout << std::endl;
-  // }
-  // vreclass=vord;
-  // if(verbose_opt[0]>=1){
-  //   std::cout << "after recoding: " << std::endl;
-  //   for(int iclass = 0; iclass < vord.size(); iclass++)
-  //     std::cout << " " << vord[iclass];
-  //   std::cout << std::endl; 
-  // }
-      
-  // vector<int> vuniqueclass=vreclass;
-  // //remove duplicate elements from vuniqueclass
-  // sort( vuniqueclass.begin(), vuniqueclass.end() );
-  // vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
-  // nreclass=vuniqueclass.size();
-  // if(verbose_opt[0]>=1){
-  //   std::cout << "unique classes: " << std::endl;
-  //   for(int iclass = 0; iclass < vuniqueclass.size(); iclass++)
-  //     std::cout << " " << vuniqueclass[iclass];
-  //   std::cout << std::endl; 
-  //   std::cout << "number of reclasses: " << nreclass << std::endl;
-  // }
-    
   // if(priors_opt.size()==1){//default: equal priors for each class
   //   priors.resize(nclass);
   //   for(int iclass=0;iclass<nclass;++iclass)
@@ -461,21 +489,27 @@ int main(int argc, char *argv[])
     // std::cout << std::endl;
   }
 
-  //Calculate features of trainig set
+  //Calculate features of training (and test) set
+  nctraining.resize(nclass);
+  nctest.resize(nclass);
   vector< Vector2d<float> > trainingFeatures(nclass);
   for(int iclass=0;iclass<nclass;++iclass){
-    int nctraining=0;
     if(verbose_opt[0]>=1)
       std::cout << "calculating features for class " << iclass << std::endl;
-    if(random)
-      srand(time(NULL));
-    nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+    nctraining[iclass]=trainingPixels[iclass].size();
     if(verbose_opt[0]>=1)
-      std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
-    int index=0;
-      
-    trainingFeatures[iclass].resize(nctraining);
-    for(int isample=0;isample<nctraining;++isample){
+      std::cout << "nctraining[" << iclass << "]: " << nctraining[iclass] << std::endl;
+    if(testPixels.size()>iclass){
+      nctest[iclass]=testPixels[iclass].size();
+      if(verbose_opt[0]>=1){
+	std::cout << "nctest[" << iclass << "]: " << nctest[iclass] << std::endl;
+      }
+    }
+    else
+      nctest[iclass]=0;
+    // trainingFeatures[iclass].resize(nctraining[iclass]);
+    trainingFeatures[iclass].resize(nctraining[iclass]+nctest[iclass]);
+    for(int isample=0;isample<nctraining[iclass];++isample){
       //scale pixel values according to scale and offset!!!
       for(int iband=0;iband<nband;++iband){
         assert(trainingPixels[iclass].size()>isample);
@@ -486,12 +520,21 @@ int main(int argc, char *argv[])
         trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
       }
     }
-    assert(trainingFeatures[iclass].size()==nctraining);
+    // assert(trainingFeatures[iclass].size()==nctraining[iclass]);
+    for(int isample=0;isample<nctest[iclass];++isample){
+      //scale pixel values according to scale and offset!!!
+      for(int iband=0;iband<nband;++iband){
+        assert(testPixels[iclass].size()>isample);
+        assert(testPixels[iclass][isample].size()>iband+startBand);
+        assert(offset.size()>iband);
+        assert(scale.size()>iband);
+        float value=testPixels[iclass][isample][iband+startBand];
+        // testFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+        trainingFeatures[iclass][nctraining[iclass]+isample].push_back((value-offset[iband])/scale[iband]);
+      }
+    }
+    assert(trainingFeatures[iclass].size()==nctraining[iclass]+nctest[iclass]);
   }
-    
-  unsigned int ntraining=0;
-  for(int iclass=0;iclass<nclass;++iclass)
-    ntraining+=trainingFeatures[iclass].size();
 
   assert(ccost_opt.size()>1);//must have boundaries at least (initial value is optional)
   if(ccost_opt.size()<3)//create initial value
@@ -525,7 +568,8 @@ int main(int argc, char *argv[])
 	x[0]=ccost;
 	x[1]=gamma;
 	std::vector<double> theGrad;
-	double error=objFunction(x,theGrad,&trainingFeatures);
+	double error=0;
+	error=objFunction(x,theGrad,&trainingFeatures);
 	if(error<minError){
 	  minError=error;
 	  minCost=ccost;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git