[pktools] 29/375: feature selection with automatic detection of optimal number of features in pkfs_svm and pkfs_nn
Bas Couwenberg
sebastic at xs4all.nl
Wed Dec 3 21:53:55 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch upstream-master
in repository pktools.
commit 3531dce9caf7f952a345fe9bccf28d5dc432182d
Author: Pieter Kempeneers <kempenep at gmail.com>
Date: Mon Jan 7 10:31:59 2013 +0100
feature selection with automatic detection of optimal number of features in pkfs_svm and pkfs_nn
---
src/algorithms/FeatureSelector.h | 4 ++--
src/apps/pkfs_svm.cc | 48 +++++++++++++++-------------------------
2 files changed, 20 insertions(+), 32 deletions(-)
diff --git a/src/algorithms/FeatureSelector.h b/src/algorithms/FeatureSelector.h
index 53cb7d0..6eebf46 100644
--- a/src/algorithms/FeatureSelector.h
+++ b/src/algorithms/FeatureSelector.h
@@ -177,7 +177,7 @@ template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, dou
else if(verbose){
for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
cout << *lit << " ";
- cout << " (" << cost.back() << ")" << endl;
+ cout << " (cost: " << cost.back() << ")" << endl;
}
while(k>1){
@@ -192,7 +192,7 @@ template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, dou
else if(verbose){
for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit)
cout << *lit << " ";
- cout << " (" << cost.back() << ")" << endl;
+ cout << " (cost: " << cost.back() << ")" << endl;
}
continue;
}
diff --git a/src/apps/pkfs_svm.cc b/src/apps/pkfs_svm.cc
index adb0421..e7d799f 100644
--- a/src/apps/pkfs_svm.cc
+++ b/src/apps/pkfs_svm.cc
@@ -161,7 +161,7 @@ int main(int argc, char *argv[])
Optionpk<bool> todo_opt("\0","todo","",false);
Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label");
- Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select all)", 0);
+ Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0);
Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
@@ -493,11 +493,10 @@ int main(int argc, char *argv[])
}
int nFeatures=trainingFeatures[0][0].size();
- int maxFeatures=maxFeatures_opt[0];
- double previousCost=(maxFeatures_opt[0])? 1 : 0;
- double currentCost=1;
- list<int> subset;//set of selected features (levels) for each class combination
+ int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : 1;
+ double previousCost=-1;
double cost=0;
+ list<int> subset;//set of selected features (levels) for each class combination
FeatureSelector selector;
try{
if(maxFeatures==nFeatures){
@@ -506,11 +505,12 @@ int main(int argc, char *argv[])
subset.push_back(ifeature);
cost=getCost(trainingFeatures);
}
- else if(!maxFeatures_opt[0]){
- while(currentCost-previousCost>epsilon_cost_opt[0]){
- ++maxFeatures;
+ else{
+ while(cost-previousCost>epsilon_cost_opt[0]){
+ previousCost=cost;
switch(selMap[selector_opt[0]]){
case(SFFS):
+ subset.clear();//needed to clear in case of floating and brute force search
cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
break;
case(SFS):
@@ -520,6 +520,7 @@ int main(int argc, char *argv[])
cost=selector.backward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
break;
case(BFS):
+ subset.clear();//needed to clear in case of floating and brute force search
cost=selector.bruteForce(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
break;
default:
@@ -527,28 +528,15 @@ int main(int argc, char *argv[])
exit(1);
break;
}
- previousCost=currentCost;
- currentCost=cost;
- }
- }
- else{
- switch(selMap[selector_opt[0]]){
- case(SFFS):
- cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
- break;
- case(SFS):
- cost=selector.forward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
- break;
- case(SBS):
- cost=selector.backward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
- break;
- case(BFS):
- cost=selector.bruteForce(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
- break;
- default:
- std::cout << "Error: selector not supported, please use sffs, sfs, sbs or bfs" << std::endl;
- exit(1);
- break;
+ if(verbose_opt[0]){
+ std::cout << "cost: " << cost << std::endl;
+ std::cout << "previousCost: " << previousCost << std::endl;
+ std::cout << std::setprecision(12) << "cost-previousCost: " << cost - previousCost << " ( " << epsilon_cost_opt[0] << ")" << std::endl;
+ }
+ if(!maxFeatures_opt[0])
+ ++maxFeatures;
+ else
+ break;
}
}
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git
More information about the Pkg-grass-devel
mailing list