[pktools] 35/375: string map in pkclassify_nn.c and pkclassify_svm.cc

Wed Dec 3 21:53:56 UTC 2014

This is an automated email from the git hooks/post-receive script.

sebastic-guest pushed a commit to branch upstream-master
in repository pktools.

commit f1c59c74ae910460ee382eb9406f2a2aad0ba353
Author: Pieter Kempeneers <kempenep at gmail.com>
Date:   Fri Jan 18 17:28:27 2013 +0100

    string map in pkclassify_nn.c and pkclassify_svm.cc
---
 src/algorithms/ConfusionMatrix.h |   1 +
 src/algorithms/myfann_cpp.h      |  11 ++-
 src/apps/pkclassify_nn.cc        |  96 +++++++++++++++++--------
 src/apps/pkclassify_nn.h         | 151 +++++++++++++++++++++++++++++++++++++++
 src/apps/pkclassify_svm.cc       | 138 ++++++++++++++++++++++-------------
 src/imageclasses/ImgReaderOgr.h  | 136 +++++++++++++++++++++++++++++++++++
 6 files changed, 455 insertions(+), 78 deletions(-)

diff --git a/src/algorithms/ConfusionMatrix.h b/src/algorithms/ConfusionMatrix.h
index 33cf04f..b5481fb 100644
--- a/src/algorithms/ConfusionMatrix.h
+++ b/src/algorithms/ConfusionMatrix.h
@@ -33,6 +33,7 @@ public:
   ConfusionMatrix(const vector<string>& classNames);
   ConfusionMatrix(const ConfusionMatrix& cm);
   ConfusionMatrix& operator=(const ConfusionMatrix& cm);
+  short size() const {return m_results.size();};
   void resize(short nclass);
   void setClassNames(const vector<string>& classNames);
   void pushBackClassName(const string& className);
diff --git a/src/algorithms/myfann_cpp.h b/src/algorithms/myfann_cpp.h
index 6aba87f..4901cfa 100644
--- a/src/algorithms/myfann_cpp.h
+++ b/src/algorithms/myfann_cpp.h
@@ -1535,6 +1535,10 @@ public:
                 trainingFeatures[iclass].push_back(testFeatures[iclass].back());
                 testFeatures[iclass].pop_back();
               }
+              if(verbose>1){
+                std::cout << "training size " << iclass << ": " << trainingFeatures[iclass].size() << std::endl;
+                std::cout << "test size " << iclass << ": " << testFeatures[iclass].size() << std::endl;
+              }
               assert(trainingFeatures[iclass].size());
             }
             //create test sample
@@ -1552,8 +1556,13 @@ public:
                 std::cout << "Error: testclass " << testclass << " has no training" << std::endl;
               assert(trainingFeatures[testclass].size());
               ++nsample;
-              if(static_cast<float>(trainingFeatures[testclass].size())/static_cast<float>(testFeatures[testclass].size())<=cv)
+              if(static_cast<float>(trainingFeatures[testclass].size())/static_cast<float>(testFeatures[testclass].size())<=(cv-1)){
+                if(verbose>1){
+                  std::cout << "training size " << testclass << ": " << trainingFeatures[testclass].size() << std::endl;
+                  std::cout << "test size " << testclass << ": " << testFeatures[testclass].size() << std::endl;
+                }
                 testclass=(testclass+1)%nclass;
+              }
             }
             assert(nsample==ntest);
             //training with left out training set
diff --git a/src/apps/pkclassify_nn.cc b/src/apps/pkclassify_nn.cc
index 1c56084..c9f678a 100644
--- a/src/apps/pkclassify_nn.cc
+++ b/src/apps/pkclassify_nn.cc
@@ -100,17 +100,17 @@ int main(int argc, char *argv[])
   Optionpk<bool> todo_opt("\0","todo","",false);
   Optionpk<string> input_opt("i", "input", "input image"); 
   Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)"); 
-  Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file. (default is label)","label"); 
-  Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.). Default is 0: do not reclass", 0);
-  Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class (default 0: do not balance)", 0);
-  Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (default is 0: consider all classes", 0);
+  Optionpk<string> label_opt("label", "label", "identifier for class label in training shape file.","label"); 
+  Optionpk<unsigned short> reclass_opt("rc", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.)");
+  Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0);
+  Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0);
   Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0); 
   Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0); 
   Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
   Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
   Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
   Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (1: sum rule, 2: max rule).",1);
-  Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 ), default set to equal priors)", 0.0); 
+  Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0); 
   Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0);
   Optionpk<unsigned int> nneuron_opt("\0", "nneuron", "number of neurons in hidden layers in neural network (multiple hidden layers are set by defining multiple number of neurons: -n 15 -n 1, default is one hidden layer with 5 neurons)", 5); 
   Optionpk<float> connection_opt("\0", "connection", "connection reate (default: 1.0 for a fully connected network)", 1.0); 
@@ -186,7 +186,8 @@ int main(int argc, char *argv[])
   }
 
   if(verbose_opt[0]>=1){
-    cout << "image filename: " << input_opt[0] << endl;
+    if(input_opt.size())
+      cout << "image filename: " << input_opt[0] << endl;
     if(mask_opt.size())
       cout << "mask filename: " << mask_opt[0] << endl;
     if(training_opt.size()){
@@ -213,9 +214,10 @@ int main(int argc, char *argv[])
 
   vector< vector<double> > offset(nbag);
   vector< vector<double> > scale(nbag);
+  map<string,Vector2d<float> > trainingMap;
   vector< Vector2d<float> > trainingPixels;//[class][sample][band]
 
-  if(reclass_opt.size()>1){
+  if(reclass_opt.size()){
     vreclass.resize(reclass_opt.size());
     for(int iclass=0;iclass<reclass_opt.size();++iclass){
       reclassMap[iclass]=reclass_opt[iclass];
@@ -242,8 +244,10 @@ int main(int argc, char *argv[])
   for(int ibag=0;ibag<nbag;++ibag){
     //organize training data
     if(ibag<training_opt.size()){//if bag contains new training pixels
+      trainingMap.clear();
       trainingPixels.clear();
-      map<int,Vector2d<float> > trainingMap;
+      // map<int,Vector2d<float> > trainingMap;
+      // map<string,Vector2d<float> > trainingMap;
       if(verbose_opt[0]>=1)
         cout << "reading imageShape file " << training_opt[0] << endl;
       try{
@@ -264,20 +268,21 @@ int main(int argc, char *argv[])
         cerr << "error catched" << std::endl;
         exit(1);
       }
-      //delete class 0
-      if(verbose_opt[0]>=1)
-        cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << endl;
-      totalSamples-=trainingMap[0].size();
-      trainingMap.erase(0);
+      //delete class 0 ?
+      // if(verbose_opt[0]>=1)
+      //   std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+      // totalSamples-=trainingMap[0].size();
+      // trainingMap.erase(0);
       //convert map to vector
       short iclass=0;
-      if(reclass_opt.size()==1){//no reclass option, read classes from shape
+      if(reclass_opt.empty()){//no reclass option, read classes from shape
         reclassMap.clear();
         vreclass.clear();
       }
       if(verbose_opt[0]>1)
-        cout << "training pixels: " << endl;
-      map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+        std::cout << "training pixels: " << std::endl;
+      map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+      // map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
       while(mapit!=trainingMap.end()){
 //       for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
         //delete small classes
@@ -286,13 +291,14 @@ int main(int argc, char *argv[])
           continue;
           //todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
         }
-        if(reclass_opt.size()==1){//no reclass option, read classes from shape
-          reclassMap[iclass]=(mapit->first);
-          vreclass.push_back(mapit->first);
+        if(reclass_opt.empty()){//no reclass option, read classes from shape
+          // reclassMap[iclass]=(mapit->first);
+          // vreclass.push_back(mapit->first);
+          vreclass.push_back(iclass);
         }
         trainingPixels.push_back(mapit->second);
         if(verbose_opt[0]>1)
-          cout << mapit->first << ": " << (mapit->second).size() << " samples" << endl;
+          std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
         ++iclass;
         ++mapit;
       }
@@ -304,7 +310,8 @@ int main(int argc, char *argv[])
         assert(nclass==trainingPixels.size());
         assert(nband==(training_opt.size())?trainingPixels[0][0].size()-2:trainingPixels[0][0].size());
       }
-      assert(reclassMap.size()==nclass);
+      // assert(reclassMap.size()==nclass);
+      assert(vreclass.size()==nclass);
 
       //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
       //balance training data
@@ -483,8 +490,9 @@ int main(int argc, char *argv[])
     unsigned int nFeatures=trainingFeatures[0][0].size();
     unsigned int ntraining=0;
     for(int iclass=0;iclass<nclass;++iclass){
-      if(verbose_opt[0]>=1)
-        cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << endl;
+      //vcode has size nreclass???
+      // if(verbose_opt[0]>=1)
+      //   cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << endl;
       ntraining+=trainingFeatures[iclass].size();
     }
     const unsigned int num_layers = nneuron_opt.size()+2;
@@ -554,17 +562,48 @@ int main(int argc, char *argv[])
                                             referenceVector,
                                             outputVector,
                                             verbose_opt[0]);
-      ConfusionMatrix cm(nclass);
+      ConfusionMatrix cm;
+      map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+      if(reclass_opt.empty()){
+        while(mapit!=trainingMap.end()){
+          cm.pushBackClassName(mapit->first);
+          ++mapit;
+        }
+      }
+      else{
+        if(verbose_opt[0]>1)
+          std::cout << "classes for confusion matrix: " << std::endl;
+        for(int iclass=0;iclass<nreclass;++iclass){
+          ostringstream os;
+          os << vcode[iclass];
+          if(verbose_opt[0]>1)
+            std::cout << os.str() << " ";
+          cm.pushBackClassName(os.str());
+        }
+        if(verbose_opt[0]>1)
+          std::cout << std::endl;
+      }
+      assert(cm.size()==nreclass);
+
       for(int isample=0;isample<referenceVector.size();++isample)
-        cm.incrementResult(cm.getClass(referenceVector[isample]),cm.getClass(outputVector[isample]),1);
+        cm.incrementResult(cm.getClass(vreclass[referenceVector[isample]]),cm.getClass(vreclass[outputVector[isample]]),1);
       assert(cm.nReference());
       std::cout << cm << std::endl;
-      std::cout << "Kappa: " << cm.kappa() << std::endl;
+      cout << "class #samples userAcc prodAcc" << endl;
+      double se95_ua=0;
+      double se95_pa=0;
       double se95_oa=0;
+      double dua=0;
+      double dpa=0;
       double doa=0;
+      for(int iclass=0;iclass<cm.nClasses();++iclass){
+        dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
+        dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
+        cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
+      }
+      std::cout << "Kappa: " << cm.kappa() << std::endl;
       doa=cm.oa_pct(&se95_oa);
       std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")"  << std::endl;
-      std::cout << "rmse cross-validation: " << rmse << std::endl;
     }
   
     if(verbose_opt[0]>=1)
@@ -602,7 +641,8 @@ int main(int argc, char *argv[])
   }
 
   //--------------------------------- end of training -----------------------------------
-
+  if(input_opt.empty())
+    exit(0);
 
   //-------------------------------- open image file ------------------------------------
   if(input_opt[0].find(".shp")==string::npos){
diff --git a/src/apps/pkclassify_nn.h b/src/apps/pkclassify_nn.h
index 583c982..c253e40 100644
--- a/src/apps/pkclassify_nn.h
+++ b/src/apps/pkclassify_nn.h
@@ -42,6 +42,13 @@ template<typename T> unsigned int readDataImageShape(const string &filename,
                                                      int verbose=false);
 
 template<typename T> unsigned int readDataImageShape(const string &filename,
+                                                     map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
+                                                     vector<string>& fields,
+                                                     const vector<short>& bands,
+                                                     const string& label,
+                                                     int verbose=false);
+
+template<typename T> unsigned int readDataImageShape(const string &filename,
                                                      map<int,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
                                                      vector<string>& fields,
                                                      const vector<short>& bands,
@@ -117,6 +124,82 @@ template<typename T> unsigned int readDataImageShape(const string &filename,
   return totalSamples;
 }
 
+template<typename T> unsigned int readDataImageShape(const string &filename,
+                                                     map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
+                                                     vector<string>& fields,
+                                                     const vector<short>& bands,
+                                                     const string& label,
+                                                     int verbose)
+{
+  mapPixels.clear();
+  int nsample=0;
+  int totalSamples=0;  
+  int nband=0;
+  if(verbose)
+    cout << "reading shape file " << filename  << endl;
+  ImgReaderOgr imgReaderShape;
+  try{
+    imgReaderShape.open(filename);
+    //only retain bands in fields
+    imgReaderShape.getFields(fields);
+    vector<string>::iterator fit=fields.begin();
+    if(verbose>1)
+      cout << "reading fields: ";
+    while(fit!=fields.end()){
+      if(verbose)
+        cout << *fit << " ";
+      // size_t pos=(*fit).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ");
+      if(((*fit).substr(0,1)=="B")&&((*fit).substr(1).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ")!=string::npos)){
+        int theBand=atoi((*fit).substr(1).c_str());
+        if(bands.size()){
+          bool validBand=false;
+          for(int iband=0;iband<bands.size();++iband){
+            if(theBand==bands[iband])
+              validBand=true;
+          }
+          if(validBand)
+            ++fit;
+          else
+            fields.erase(fit);
+        }
+        else
+          ++fit;
+      }
+      else
+        fields.erase(fit);
+    }
+    if(verbose)
+      cout << endl;
+    if(verbose){
+      cout << "fields:";
+      for(vector<string>::iterator fit=fields.begin();fit!=fields.end();++fit)
+        cout << " " << *fit;
+      cout << endl;
+    }
+    if(!nband){
+      if(verbose)
+        cout << "reading data" << endl;
+      nband=imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,verbose==2);
+
+    }
+    else
+      assert(nband==imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,false));
+  }
+  catch(string e){
+    ostringstream estr;
+    estr << e << " " << filename;
+    throw(estr.str());
+  }
+  nsample=imgReaderShape.getFeatureCount();
+  totalSamples+=nsample;
+  if(verbose)
+    cout << ": " << nsample << " samples read with " << nband << " bands" << endl;
+  imgReaderShape.close();
+  if(verbose)
+    cout << "total number of samples read " << totalSamples << endl;
+  return totalSamples;
+}
+
 
 template<typename T> unsigned int readDataImageShape(const string &filename,
                                                      map<int,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
@@ -185,4 +268,72 @@ template<typename T> unsigned int readDataImageShape(const string &filename,
     cout << "total number of samples read " << totalSamples << endl;
   return totalSamples;
 }
+
+template<typename T> unsigned int readDataImageShape(const string &filename,
+                                                     map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
+                                                     vector<string>& fields,
+                                                     double start,
+                                                     double end,
+                                                     const string& label,
+                                                     int verbose)
+{
+  mapPixels.clear();
+  int nsample=0;
+  int totalSamples=0;  
+  int nband=0;
+  if(verbose)
+    cout << "reading shape file " << filename  << endl;
+  ImgReaderOgr imgReaderShape;
+  try{
+    imgReaderShape.open(filename);
+    //only retain bands in fields
+    imgReaderShape.getFields(fields);
+    vector<string>::iterator fit=fields.begin();
+    if(verbose)
+      cout << "reading fields: ";
+    while(fit!=fields.end()){
+      if(verbose)
+        cout << *fit << " ";
+      // size_t pos=(*fit).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ");
+      if(((*fit).substr(0,1)=="B")&&((*fit).substr(1).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ")!=string::npos)){
+        int iband=atoi((*fit).substr(1).c_str());
+        if((start||end)&&(iband<start||iband>end))
+          fields.erase(fit);
+        else
+          ++fit;
+      }
+      else
+        fields.erase(fit);
+    }
+    if(verbose)
+      cout << endl;
+    if(verbose){
+      cout << "fields:";
+      for(vector<string>::iterator fit=fields.begin();fit!=fields.end();++fit)
+        cout << " " << *fit;
+      cout << endl;
+    }
+    if(!nband){
+      if(verbose)
+        cout << "reading data" << endl;
+      nband=imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,verbose==2);
+
+    }
+    else
+      assert(nband==imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,false));
+  }
+  catch(string e){
+    ostringstream estr;
+    estr << e << " " << filename;
+    throw(estr.str());
+  }
+  nsample=imgReaderShape.getFeatureCount();
+  totalSamples+=nsample;
+  if(verbose)
+    cout << ": " << nsample << " samples read with " << nband << " bands" << endl;
+  imgReaderShape.close();
+  if(verbose)
+    cout << "total number of samples read " << totalSamples << endl;
+  return totalSamples;
+}
 #endif //_PKCLASSIFY_NN_H_
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 9377fc6..2e36ce1 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -82,8 +82,9 @@ void reclass(const vector<double>& result, const vector<int>& vreclass, const ve
 
 int main(int argc, char *argv[])
 {
-  map<short,int> reclassMap;
+  // map<short,int> reclassMap;
   vector<int> vreclass;	  //vreclass: map nclass->nreclass
+  vector<int> vuniqueclass;
   vector<double> priors;
   vector<double> priorsReclass;
   
@@ -101,10 +102,10 @@ int main(int argc, char *argv[])
   Optionpk<bool> todo_opt("\0","todo","",false);
   Optionpk<string> input_opt("i", "input", "input image"); 
   Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)"); 
-  Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label"); 
-  Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
-  Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
-  Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
+  Optionpk<string> label_opt("label", "label", "identifier for class label in training shape file.","label"); 
+  Optionpk<unsigned short> reclass_opt("rc", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.)");
+  Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0);
+  Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0);
   Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0); 
   Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0); 
   Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
@@ -112,8 +113,7 @@ int main(int argc, char *argv[])
   Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
   Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (0: no aggregation, 1: sum rule, 2: max rule).",0);
   Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0); 
-
-
+  Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0);
   Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR,	4: nu-SVR)",0);
   Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
   Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
@@ -127,7 +127,6 @@ int main(int argc, char *argv[])
   Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the shrinking heuristics",false);
   Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR model for probability estimates",false);
   // Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true);
-  Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0);
   Optionpk<unsigned short> comb_opt("c", "comb", "how to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0); 
   Optionpk<unsigned short> bag_opt("\0", "bag", "Number of bootstrap aggregations", 1);
   Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation", 100);
@@ -229,14 +228,10 @@ int main(int argc, char *argv[])
   int nband=0;
   int startBand=2;//first two bands represent X and Y pos
 
-  vector< vector<double> > offset(nbag);
-  vector< vector<double> > scale(nbag);
-  vector< Vector2d<float> > trainingPixels;//[class][sample][band]
-
-  if(reclass_opt.size()>1){
+  if(reclass_opt.size()){
     vreclass.resize(reclass_opt.size());
     for(int iclass=0;iclass<reclass_opt.size();++iclass){
-      reclassMap[iclass]=reclass_opt[iclass];
+      // reclassMap[iclass]=reclass_opt[iclass];
       vreclass[iclass]=reclass_opt[iclass];
     }
   }
@@ -258,15 +253,36 @@ int main(int argc, char *argv[])
     std::sort(band_opt.begin(),band_opt.end());
 
   //----------------------------------- Training -------------------------------
+  vector< vector<double> > offset(nbag);
+  vector< vector<double> > scale(nbag);
+  map<string,Vector2d<float> > trainingMap;
+  vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+
   vector<struct svm_problem> prob(nbag);
   vector<struct svm_node *> x_space(nbag);
+
+  //test
+  // ImgReaderOgr testOgr(training_opt[0]);
+  // OGRDataSource* testSource=testOgr.getDataSource();
+  // OGRLayer  *poLayer=testSource->GetLayer(0);
+  // unsigned long int ifeature=0;
+  // if(poLayer!=NULL){
+  //   OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();
+  //   OGRFeature *poFeature;
+  //   while( (poFeature = poLayer->GetNextFeature()) != NULL ){
+  //     std::cout << "got feature " << ifeature << std::endl;
+  //     ++ifeature;
+  //   }
+  //   exit(1);
+  // }
+
   // struct svm_node *x_space;
   vector<string> fields;
   for(int ibag=0;ibag<nbag;++ibag){
     //organize training data
     if(ibag<training_opt.size()){//if bag contains new training pixels
+      trainingMap.clear();
       trainingPixels.clear();
-      map<int,Vector2d<float> > trainingMap;
       if(verbose_opt[0]>=1)
         std::cout << "reading imageShape file " << training_opt[0] << std::endl;
       try{
@@ -287,20 +303,21 @@ int main(int argc, char *argv[])
         cerr << "error catched" << std::endl;
         exit(1);
       }
-      //delete class 0
-      if(verbose_opt[0]>=1)
-        std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
-      totalSamples-=trainingMap[0].size();
-      trainingMap.erase(0);
+      //todo: delete class 0 ?
+      // if(verbose_opt[0]>=1)
+      //   std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+      // totalSamples-=trainingMap[0].size();
+      // trainingMap.erase(0);
+
       //convert map to vector
       short iclass=0;
-      if(reclass_opt.size()==1){//no reclass option, read classes from shape
-        reclassMap.clear();
+      if(reclass_opt.empty()){//no reclass option, read classes from shape
+        // reclassMap.clear();
         vreclass.clear();
       }
       if(verbose_opt[0]>1)
         std::cout << "training pixels: " << std::endl;
-      map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+      map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
       while(mapit!=trainingMap.end()){
 //       for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
         //delete small classes
@@ -309,9 +326,10 @@ int main(int argc, char *argv[])
           continue;
           //todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
         }
-        if(reclass_opt.size()==1){//no reclass option, read classes from shape
-          reclassMap[iclass]=(mapit->first);
-          vreclass.push_back(mapit->first);
+        if(reclass_opt.empty()){//no reclass option, read classes from shape
+          // reclassMap[iclass]=(mapit->first);
+          // vreclass.push_back(mapit->first);
+          vreclass.push_back(iclass);
         }
         trainingPixels.push_back(mapit->second);
         if(verbose_opt[0]>1)
@@ -327,7 +345,8 @@ int main(int argc, char *argv[])
         assert(nclass==trainingPixels.size());
         assert(nband==trainingPixels[0][0].size()-2);
       }
-      assert(reclassMap.size()==nclass);
+      // assert(reclassMap.size()==nclass);
+      assert(vreclass.size()==nclass);
 
       //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
       //balance training data
@@ -438,7 +457,7 @@ int main(int argc, char *argv[])
         std::cout << std::endl; 
       }
       
-      vector<int> vuniqueclass=vreclass;
+      vuniqueclass=vreclass;
       //remove duplicate elements from vuniqueclass
       sort( vuniqueclass.begin(), vuniqueclass.end() );
       vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
@@ -507,8 +526,9 @@ int main(int argc, char *argv[])
     unsigned int nFeatures=trainingFeatures[0][0].size();
     unsigned int ntraining=0;
     for(int iclass=0;iclass<nclass;++iclass){
-      if(verbose_opt[0]>=1)
-        std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
+      //vcode has size nreclass???
+      // if(verbose_opt[0]>=1)
+      //   std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
       ntraining+=trainingFeatures[iclass].size();
     }
     // vector<struct svm_problem> prob(ibag);
@@ -521,14 +541,10 @@ int main(int argc, char *argv[])
     int lIndex=0;
     for(int iclass=0;iclass<nclass;++iclass){
       for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
-        // //test
-        // std::cout << iclass;
         prob[ibag].x[lIndex]=&(x_space[ibag][spaceIndex]);
         for(int ifeature=0;ifeature<nFeatures;++ifeature){
           x_space[ibag][spaceIndex].index=ifeature+1;
           x_space[ibag][spaceIndex].value=trainingFeatures[iclass][isample][ifeature];
-          // //test
-          // std::cout << " " << x_space[ibag][spaceIndex].index << ":" << x_space[ibag][spaceIndex].value;
           ++spaceIndex;
         }
         x_space[ibag][spaceIndex++].index=-1;
@@ -566,41 +582,65 @@ int main(int argc, char *argv[])
     if(verbose_opt[0]>1)
       std::cout << "SVM is now trained" << std::endl;
     if(cv_opt[0]>0){
-      std::cout << "Confusion matrix" << std::endl;
-      ConfusionMatrix cm(nclass);
-      // for(int iclass=0;iclass<nclass;++iclass)
-      //   cm.pushBackClassName(type2string(iclass));
+      //todo: implement reclassification
+      // ConfusionMatrix cm(nclass);
+      ConfusionMatrix cm;
+      map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+      if(reclass_opt.empty()){
+        while(mapit!=trainingMap.end()){
+          cm.pushBackClassName(mapit->first);
+          ++mapit;
+        }
+      }
+      else{
+        if(verbose_opt[0]>1)
+          std::cout << "classes for confusion matrix: " << std::endl;
+        for(int iclass=0;iclass<nreclass;++iclass){
+          ostringstream os;
+          os << vcode[iclass];
+          if(verbose_opt[0]>1)
+            std::cout << os.str() << " ";
+          cm.pushBackClassName(os.str());
+        }
+        if(verbose_opt[0]>1)
+          std::cout << std::endl;
+      }
+      assert(cm.size()==nreclass);
+
       double *target = Malloc(double,prob[ibag].l);
       svm_cross_validation(&prob[ibag],&param[ibag],cv_opt[0],target);
       assert(param[ibag].svm_type != EPSILON_SVR&&param[ibag].svm_type != NU_SVR);//only for regression
-      int total_correct=0;
+
       for(int i=0;i<prob[ibag].l;i++)
-        cm.incrementResult(cm.getClass(prob[ibag].y[i]),cm.getClass(target[i]),1);
+        cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1);
       assert(cm.nReference());
       std::cout << cm << std::endl;
-      std::cout << "Kappa: " << cm.kappa() << std::endl;
+      cout << "class #samples userAcc prodAcc" << endl;
+      double se95_ua=0;
+      double se95_pa=0;
       double se95_oa=0;
+      double dua=0;
+      double dpa=0;
       double doa=0;
+      for(int iclass=0;iclass<cm.nClasses();++iclass){
+        dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
+        dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
+        cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
+      }
+      std::cout << "Kappa: " << cm.kappa() << std::endl;
       doa=cm.oa_pct(&se95_oa);
       std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")"  << std::endl;
       free(target);
     }
-
     // *NOTE* Because svm_model contains pointers to svm_problem, you can
     // not free the memory used by svm_problem if you are still using the
     // svm_model produced by svm_train(). 
-
-    // free(prob.y);
-    // free(prob.x);
-    // free(x_space);
-    // svm_destroy_param(&param);
   }//for ibag
 
   //--------------------------------- end of training -----------------------------------
-  if(!output_opt.size())
+  if(input_opt.empty())
     exit(0);
 
-
   const char* pszMessage;
   void* pProgressArg=NULL;
   GDALProgressFunc pfnProgress=GDALTermProgress;
diff --git a/src/imageclasses/ImgReaderOgr.h b/src/imageclasses/ImgReaderOgr.h
index 5863420..2ead995 100644
--- a/src/imageclasses/ImgReaderOgr.h
+++ b/src/imageclasses/ImgReaderOgr.h
@@ -49,6 +49,7 @@ public:
   template <typename T> int readData(vector<T>& data, const OGRFieldType& fieldType, const string& theField, int layer=0, bool verbose=false);
   template <typename T> int readData(Vector2d<T>& data, const OGRFieldType& fieldType, vector<string>& fields, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data
   template <typename T> int readData(map<int,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data
+  template <typename T> int readData(map<string,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data
   void shape2ascii(ostream& theOstream, const string& pointname, int layer=0, bool verbose=false);
   unsigned long int getFeatureCount(int layer=0) const;
   int getFieldCount(int layer=0) const;
@@ -207,6 +208,141 @@ template <typename T> int ImgReaderOgr::readData(map<int,Vector2d<T> >& data, co
   }
 }
 
+//read data from all features in a map, organized by class names
+template <typename T> int ImgReaderOgr::readData(map<string,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer, bool pos, bool verbose)
+{
+  if(layer<0)
+    layer=m_datasource->GetLayerCount()-1;
+  assert(m_datasource->GetLayerCount()>layer);
+  OGRLayer  *poLayer;
+  if(verbose)
+    cout << "number of layers: " << m_datasource->GetLayerCount() << endl;
+  poLayer = m_datasource->GetLayer(layer);
+  if(poLayer!=NULL){
+    OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();
+    if(fields.empty()){
+      fields.resize(poFDefn->GetFieldCount());
+      if(verbose)
+        cout << "resized fields to " << fields.size() << endl;
+    }
+    //start reading features from the layer
+    OGRFeature *poFeature;
+    if(verbose)
+      cout << "reset reading" << endl;
+    poLayer->ResetReading();
+    unsigned long int ifeature=0;
+    int posOffset=(pos)?2:0;
+    if(verbose)
+      cout << "going through features to fill in string map" << endl << flush;
+    string theClass;
+    while( (poFeature = poLayer->GetNextFeature()) != NULL ){
+      vector<T> theFeature;//(fields.size()+posOffset);//x,y+selectedfields
+      if(verbose)
+        cout << "reading feature " << ifeature << endl << flush;
+      OGRGeometry *poGeometry;
+      poGeometry = poFeature->GetGeometryRef();
+      if(verbose){
+        if(poGeometry == NULL)
+          cerr << "no geometry defined" << endl << flush;
+        else if(wkbFlatten(poGeometry->getGeometryType()) != wkbPoint)
+          cerr << "Warning: poGeometry type: " << wkbFlatten(poGeometry->getGeometryType()) << endl << flush;
+      }
+      assert(poGeometry != NULL );
+             // && wkbFlatten(poGeometry->getGeometryType()) == wkbPoint);
+      OGRPoint *poPoint;
+      if(pos){
+        poPoint = (OGRPoint *) poGeometry;
+        if(wkbFlatten(poGeometry->getGeometryType()) == wkbPoint){
+        theFeature.push_back(poPoint->getX());
+        theFeature.push_back(poPoint->getY());
+        }
+        else if(wkbFlatten(poGeometry->getGeometryType()) == wkbPolygon){
+          OGRPolygon * poPolygon = (OGRPolygon *) poGeometry;
+          poPolygon->Centroid(poPoint);
+          theFeature.push_back(poPoint->getX());
+          theFeature.push_back(poPoint->getY());
+        }        
+        else{
+          string errorstring="Error: Centroid for non polygon geometry not supported until OGR 1.8.0, change ImgReaderOgr if version >= 1.8.0 is installed...";
+            throw(errorstring);
+          // poGeometry->Centroid(poPoint);
+          // theFeature.push_back(poPoint->getX());
+          // theFeature.push_back(poPoint->getY());
+        }       
+      }
+      // OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();//got LayerDefn already...
+      string featurename;
+      for(int iField=0;iField<poFDefn->GetFieldCount();++iField){
+        OGRFieldDefn *poFieldDefn = poFDefn->GetFieldDefn(iField);
+        string fieldname=poFieldDefn->GetNameRef();
+        if(fieldname==label){
+          theClass=poFeature->GetFieldAsString(iField);
+          if(verbose)
+            std::cout << "read feature for " << theClass << std::endl;
+        }
+        else{
+          switch(fieldType){
+          case(OFTReal):
+            if(fields.size()<poFDefn->GetFieldCount()){
+              if(find(fields.begin(),fields.end(),fieldname)!=fields.end())
+                theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+            }
+            else{
+              fields[iField]=fieldname;
+              theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+            }
+            break;
+          case(OFTInteger):
+            if(fields.size()<poFDefn->GetFieldCount()){
+              if(find(fields.begin(),fields.end(),fieldname)!=fields.end())
+                theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+            }
+            else{
+              fields[iField]=fieldname;
+              theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+            }
+            break;
+          default:
+            {
+              string errorstring="field type not supported in ImgReaderOgr::ReadData";
+              throw(errorstring);
+            }
+            break;
+          }
+        }
+      }
+      data[theClass].push_back(theFeature);
+      ++ifeature;
+      ++ifeature;
+    }
+    if(verbose)
+      cout << "number of features read: " << ifeature << endl << flush;
+    typename map<string,Vector2d<T> >::const_iterator mit=data.begin();
+    int nband=0;
+    if(verbose)
+      cout << "read classes: " << flush;
+    while(mit!=data.end()){
+      if(verbose)
+        cout << mit->first << " " << flush;
+      if(!nband)
+        nband=fields.size();
+      if(pos)
+        assert((mit->second)[0].size()==nband+2);
+      else
+        assert((mit->second)[0].size()==nband);
+      ++mit;
+    }
+    if(verbose)
+      cout << endl << flush;
+    return(nband);
+  }
+  else{
+    ostringstream ess;
+    ess << "no layer in " << m_filename;
+    throw(ess.str());
+  }
+}
+
 //read x positions
 template <typename T> int ImgReaderOgr::readXY(vector<T>& xVector, vector<T>& yVector, int layer, bool verbose){
   if(layer<0)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git