[pktools] 29/375: feature selection with automatic detection of optimal number of features in pkfs_svm and pkfs_nn

Bas Couwenberg sebastic at xs4all.nl
Wed Dec 3 21:53:55 UTC 2014


This is an automated email from the git hooks/post-receive script.

sebastic-guest pushed a commit to branch upstream-master
in repository pktools.

commit 3531dce9caf7f952a345fe9bccf28d5dc432182d
Author: Pieter Kempeneers <kempenep at gmail.com>
Date:   Mon Jan 7 10:31:59 2013 +0100

    feature selection with automatic detection of optimal number of features in pkfs_svm and pkfs_nn
---
 src/algorithms/FeatureSelector.h |  4 ++--
 src/apps/pkfs_svm.cc             | 48 +++++++++++++++-------------------------
 2 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/src/algorithms/FeatureSelector.h b/src/algorithms/FeatureSelector.h
index 53cb7d0..6eebf46 100644
--- a/src/algorithms/FeatureSelector.h
+++ b/src/algorithms/FeatureSelector.h
@@ -177,7 +177,7 @@ template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, dou
     else if(verbose){
       for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
         cout << *lit << " ";
-      cout << " (" << cost.back() << ")" << endl;
+      cout << " (cost: " << cost.back() << ")" << endl;
     }
 
     while(k>1){
@@ -192,7 +192,7 @@ template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, dou
         else if(verbose){
           for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
             cout << *lit << " ";
-          cout << " (" << cost.back() << ")" << endl;
+          cout << " (cost: " << cost.back() << ")" << endl;
         }
 	continue;
       }
diff --git a/src/apps/pkfs_svm.cc b/src/apps/pkfs_svm.cc
index adb0421..e7d799f 100644
--- a/src/apps/pkfs_svm.cc
+++ b/src/apps/pkfs_svm.cc
@@ -161,7 +161,7 @@ int main(int argc, char *argv[])
   Optionpk<bool> todo_opt("\0","todo","",false);
   Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)"); 
   Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label"); 
-  Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select all)", 0);
+  Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0);
   Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
   Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
   Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
@@ -493,11 +493,10 @@ int main(int argc, char *argv[])
   }
 
   int nFeatures=trainingFeatures[0][0].size();
-  int maxFeatures=maxFeatures_opt[0];
-  double previousCost=(maxFeatures_opt[0])? 1 : 0;
-  double currentCost=1;
-  list<int> subset;//set of selected features (levels) for each class combination
+  int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : 1;
+  double previousCost=-1;
   double cost=0;
+  list<int> subset;//set of selected features (levels) for each class combination
   FeatureSelector selector;
   try{
     if(maxFeatures==nFeatures){
@@ -506,11 +505,12 @@ int main(int argc, char *argv[])
         subset.push_back(ifeature);
       cost=getCost(trainingFeatures);
     }
-    else if(!maxFeatures_opt[0]){
-      while(currentCost-previousCost>epsilon_cost_opt[0]){
-        ++maxFeatures;
+    else{
+      while(cost-previousCost>epsilon_cost_opt[0]){
+        previousCost=cost;
         switch(selMap[selector_opt[0]]){
         case(SFFS):
+          subset.clear();//needed to clear in case of floating and brute force search
           cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
           break;
         case(SFS):
@@ -520,6 +520,7 @@ int main(int argc, char *argv[])
           cost=selector.backward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
           break;
         case(BFS):
+          subset.clear();//needed to clear in case of floating and brute force search
           cost=selector.bruteForce(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
           break;
         default:
@@ -527,28 +528,15 @@ int main(int argc, char *argv[])
           exit(1);
           break;
         }
-        previousCost=currentCost;
-        currentCost=cost;
-      }
-    }
-    else{
-      switch(selMap[selector_opt[0]]){
-      case(SFFS):
-        cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
-        break;
-      case(SFS):
-        cost=selector.forward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
-        break;
-      case(SBS):
-        cost=selector.backward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
-        break;
-      case(BFS):
-        cost=selector.bruteForce(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
-        break;
-      default:
-        std::cout << "Error: selector not supported, please use sffs, sfs, sbs or bfs" << std::endl;
-        exit(1);
-        break;
+        if(verbose_opt[0]){
+          std::cout << "cost: " << cost << std::endl;
+          std::cout << "previousCost: " << previousCost << std::endl;
+          std::cout << std::setprecision(12) << "cost-previousCost: " << cost - previousCost << " ( " << epsilon_cost_opt[0] << ")" << std::endl;
+        }
+        if(!maxFeatures_opt[0])
+          ++maxFeatures;
+        else
+          break;
       }
     }
   }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git



More information about the Pkg-grass-devel mailing list