[pktools] 27/375: added feature selection
Bas Couwenberg
sebastic at xs4all.nl
Wed Dec 3 21:53:55 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch upstream-master
in repository pktools.
commit 094c17f13074e221f93e7c4f4e560952c79e7f82
Author: Pieter Kempeneers <kempenep at gmail.com>
Date: Wed Jan 2 18:18:19 2013 +0100
added feature selection
---
ChangeLog | 3 +
src/algorithms/FeatureSelector.h | 777 +++++++++++++++++++++++++++++++++++++++
src/algorithms/Makefile.am | 4 +-
src/algorithms/Makefile.in | 14 +-
src/apps/Makefile.am | 3 +-
src/apps/Makefile.in | 27 +-
src/apps/pkclassify_svm.cc | 5 +-
src/apps/pkfs_svm.cc | 549 +++++++++++++++++++++++++++
8 files changed, 1362 insertions(+), 20 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 3cfd117..6364779 100755
--- a/ChangeLog
+++ b/ChangeLog
@@ -61,6 +61,9 @@ version 2.4
options min and max are now set with -min (--min) and -max (--max)
- pkclassify_svm
do not output input file if no input data was defined in verbose mode
+ update of header information
+ - pkfs_svm
+ feature selection tool for svm classification (added)
- pkascii2ogr
tool to create simple vector files from coordinates in ASCII file (points or polygon)
- pksensormodel
diff --git a/src/algorithms/FeatureSelector.h b/src/algorithms/FeatureSelector.h
new file mode 100644
index 0000000..503dfac
--- /dev/null
+++ b/src/algorithms/FeatureSelector.h
@@ -0,0 +1,777 @@
+/**********************************************************************
+FeatureSelector.h: select features, typical use: feature selection for classification
+Copyright (C) 2008-2012 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools. If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#ifndef _FEATURESELECTOR_H_
+#define _FEATURESELECTOR_H_
+
+#include <math.h>
+#include <vector>
+#include <list>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <iomanip>
+#include "PosValue.h"
+#include "Vector2d.h"
+#include "gsl/gsl_combination.h"
+
+using namespace std;
+
+class FeatureSelector
+{
+ public:
+ FeatureSelector(){};
+ ~FeatureSelector(){};
+ template<class T> double forwardUnivariate(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures, double prior1=0.5, double prior2=0.5, bool verbose=false);
+ template<class T> double forwardUnivariate(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, bool verbose=false);
+ template<class T> double forwardUnivariate(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose=false);
+ template<class T> double forward(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures, double prior1=0.5, double prior2=0.5, bool verbose=false);
+ template<class T> double forward(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, bool verbose=false);
+ template<class T> double forward(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose=false);
+ template<class T> double backward(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int minFeatures, bool verbose=false);
+ template<class T> double backward(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int minFeatures, double prior1=0.5, double prior2=0.5,bool verbose=false);
+ template<class T> double backward(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int minFeatures, bool verbose=false);
+ template<class T> double floating(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures=0, bool verbose=false);
+ template<class T> double floating(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures=0, double prior1=0.5, double prior2=0.5,bool verbose=false);
+ template<class T> double floating(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose=false);
+ template<class T> double bruteForce(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures=0, double prior1=0.5, double prior2=0.5, bool verbose=false);
+
+ private:
+ template<class T> double addFeature(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, bool verbose=false);
+ template<class T> double addFeature(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, double prior1=0.5, double prior2=0.5, bool verbose=false);
+ template<class T> double addFeature(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, bool verbose=false);
+ template<class T> double removeFeature(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int& r, bool verbose=false);
+ template<class T> double removeFeature(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset,int& r, double prior1=0.5, double prior2=0.5, bool verbose=false);
+ template<class T> double removeFeature(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset,int& r, bool verbose=false);
+};
+
+//sequential forward selection Univariate (N single best features)
+template<class T> double FeatureSelector::forwardUnivariate(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures, double prior1, double prior2, bool verbose){
+ int maxLevels=v1[0].size();
+ if(!maxFeatures)
+ maxFeatures=maxLevels;
+ int k=subset.size();
+ if(k>=maxFeatures)
+ return -1;
+ vector<PosValue> cost(maxLevels);
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ Vector2d<T> tmp1(v1.size());
+ Vector2d<T> tmp2(v2.size());
+ for(int ilevel=0;ilevel<maxLevels;++ilevel){
+ if(find(tmpset.begin(),tmpset.end(),ilevel)==tmpset.end()){
+ tmpset.push_back(ilevel);
+ v1.selectCols(tmpset,tmp1);
+ v2.selectCols(tmpset,tmp2);
+ try{
+ PosValue pv;
+ pv.position=ilevel;
+ pv.value=getCost(tmp1,tmp2,prior1,prior2);
+ cost[ilevel]=pv;
+ }
+ catch(...){
+ PosValue pv;
+ pv.position=ilevel;
+ pv.value=-1;
+ cost[ilevel]=pv;
+ }
+ tmpset.pop_back();
+ }
+ }
+ sort(cost.begin(),cost.end(),Compare_PosValue());//decreasing order
+ int ilevel=0;
+ while((subset.size()<maxFeatures)&&(ilevel<maxLevels)){
+ if(cost[ilevel].value>0)
+ subset.push_back(cost[ilevel].position);
+ if(verbose)
+ cout << "feature " << subset.back() << " has cost: " << cost[ilevel].value << endl;
+ ++ilevel;
+ }
+ double maxCost=-1;
+ while(subset.size()){
+ v1.selectCols(subset,tmp1);
+ v2.selectCols(subset,tmp2);
+ try{
+ maxCost=getCost(tmp1,tmp2,prior1,prior2);
+ }
+ catch(...){
+ subset.pop_back();
+ continue;
+ }
+ return maxCost;
+ }
+}
+
+template<class T> double FeatureSelector::forwardUnivariate(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures=0, bool verbose){
+ int maxLevels=v[0][0].size();
+ if(!maxFeatures)
+ maxFeatures=maxLevels;
+ int k=subset.size();
+ if(k>=maxFeatures)
+ return -1;
+ vector<PosValue> cost(maxLevels);
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ vector< Vector2d<T> > tmp(v.size());
+ for(int ilevel=0;ilevel<maxLevels;++ilevel){
+ if(find(tmpset.begin(),tmpset.end(),ilevel)==tmpset.end()){
+ tmpset.push_back(ilevel);
+ for(int iclass=0;iclass<v.size();++iclass){
+// tmp[iclass].resize(v[iclass].size());
+ v[iclass].selectCols(tmpset,tmp[iclass]);
+ }
+ try{
+ PosValue pv;
+ pv.position=ilevel;
+ pv.value=getCost(tmp);
+ cost[ilevel]=pv;
+ }
+ catch(...){
+ PosValue pv;
+ pv.position=ilevel;
+ pv.value=-1;
+ cost[ilevel]=pv;
+ }
+ tmpset.pop_back();
+ }
+ }
+ sort(cost.begin(),cost.end(),Compare_PosValue());//decreasing order
+ int ilevel=0;
+ while((subset.size()<maxFeatures)&&(ilevel<maxLevels)){
+ if(cost[ilevel].value>0)
+ subset.push_back(cost[ilevel].position);
+ if(verbose)
+ cout << "feature " << subset.back() << " has cost: " << cost[ilevel].value << endl;
+ ++ilevel;
+ }
+ double maxCost=-1;
+ while(subset.size()){
+ for(int iclass=0;iclass<v.size();++iclass){
+// tmp[iclass].resize(v[iclass].size());
+ v[iclass].selectCols(subset,tmp[iclass]);
+ }
+ try{
+ maxCost=getCost(tmp);
+ }
+ catch(...){
+ subset.pop_back();
+ continue;
+ }
+ return maxCost;
+ }
+}
+
+//sequential forward selection Multivariate (Combination of N best features)
+template<class T> double FeatureSelector::forward(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures, double prior1, double prior2, bool verbose){
+ //Select feature with the best value (get maximal cost for 1 feature)
+ double maxCost=0;
+ int maxLevels=v1[0].size();
+ if(!maxFeatures)
+ maxFeatures=maxLevels;
+ while(subset.size()<maxFeatures){
+ maxCost=addFeature(v1,v2,*getCost,subset,verbose);
+ if(verbose)
+ cout << "added " << subset.back() << ", " << subset.size() << "/" << maxFeatures << " features selected with cost: " << maxCost << endl;
+ }//while
+ return maxCost;
+}
+
+
+//sequential forward selection Multivariate (Combination of N best features)
+template<class T> double FeatureSelector::forward(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures=0, bool verbose){
+ //Select feature with the best value (get maximal cost for 1 feature)
+ double maxCost=0;
+ int maxLevels=v[0][0].size();
+ if(!maxFeatures)
+ maxFeatures=maxLevels;
+ while(subset.size()<maxFeatures){
+ maxCost=addFeature(v,*getCost,subset,verbose);
+ if(verbose)
+ cout << "added " << subset.back() << ", " << subset.size() << "/" << maxFeatures << " features selected with cost: " << maxCost << endl;
+ }//while
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::forward(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose)
+{
+ //Select feature with the best value (get maximal cost for 1 feature)
+ double maxCost=0;
+ int maxLevels=(v.begin()->second)[0].size();
+ if(!maxFeatures)
+ maxFeatures=maxLevels;
+ while(subset.size()<maxFeatures){
+ maxCost=addFeature(v,y,*getCost,subset,verbose);
+ if(verbose)
+ cout << "added " << subset.back() << ", " << subset.size() << "/" << maxFeatures << " features selected with cost: " << maxCost << endl;
+ }//while
+ return maxCost;
+}
+
+//sequential backward selection
+template<class T> double FeatureSelector::backward(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int minFeatures, double prior1, double prior2, bool verbose){
+ //Select features with least effect on cost when removed (obtain minFeatures eventually)
+ double maxCost=0;
+ int removedFeature;
+ if(subset.empty()){
+ for(int iFeature=0;iFeature<v1[0].size();++iFeature)
+ subset.push_back(iFeature);
+ }//if
+ if(subset.size()==minFeatures)
+ maxCost=getCost(v1,v2,prior1,prior2);
+ while(subset.size()>minFeatures){
+ maxCost=removeFeature(v1,v2,*getCost,subset,removedFeature,verbose);
+ if(verbose)
+ cout << "removed " << removedFeature << ", " << subset.size() << "/" << minFeatures << " features remain with cost: " << maxCost << endl;
+ }//while
+ return maxCost;
+}
+
+//sequential backward selection
+template<class T> double FeatureSelector::backward(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int minFeatures, bool verbose){
+ //Select features with least effect on cost when removed (obtain minFeatures eventually)
+ double maxCost=0;
+ int removedFeature;
+ if(subset.empty()){
+ for(int iFeature=0;iFeature<v[0][0].size();++iFeature)
+ subset.push_back(iFeature);
+ }//if
+ if(subset.size()==minFeatures)
+ maxCost=getCost(v);
+ while(subset.size()>minFeatures){
+ maxCost=removeFeature(v,*getCost,subset,removedFeature,verbose);
+ if(verbose)
+ cout << "removed " << removedFeature << ", " << subset.size() << "/" << minFeatures << " features remain with cost: " << maxCost << endl;
+ }//while
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::backward(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int minFeatures, bool verbose){
+ //Select features with least effect on cost when removed (obtain minFeatures eventually)
+ double maxCost=0;
+ int removedFeature;
+ if(subset.empty()){
+ for(int iFeature=0;iFeature<(v.begin()->second)[0].size();++iFeature)
+ subset.push_back(iFeature);
+ }//if
+ if(subset.size()==minFeatures)
+ maxCost=getCost(v,y);
+ while(subset.size()>minFeatures){
+ maxCost=removeFeature(v,y,*getCost,subset,removedFeature,verbose);
+ if(verbose)
+ cout << "removed " << removedFeature << ", " << subset.size() << "/" << minFeatures << " features remain with cost: " << maxCost << endl;
+ }//while
+ return maxCost;
+}
+
+//floating search
+template<class T> double FeatureSelector::floating(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures, double prior1, double prior2, bool verbose){
+ vector<T> cost;
+ int maxLevels=v1[0].size();
+ if(maxFeatures<1)
+ maxFeatures=maxLevels;
+// int k=0;
+ int initialK=subset.size();
+ int k=initialK;
+ int stopK=initialK+1;
+ if(initialK>=maxFeatures)
+ return -1;
+ cost.push_back(-1);//init 0 features as cost -1
+ cost.push_back(addFeature(v1,v2,*getCost,subset,verbose));
+ ++k;
+ assert(subset.size()==k);
+ if(verbose)
+ cout << "added " << subset.back() << ", " << subset.size() << "/" << maxFeatures << " features selected with cost: " << cost.back() << endl;
+ while(k<maxFeatures){
+ cost.push_back(addFeature(v1,v2,*getCost,subset,verbose));
+ ++k;
+ assert(subset.size()==k);
+ if(verbose)
+ cout << "added " << subset.back() << ", " << subset.size() << "/" << maxFeatures << " features selected with cost: " << cost.back() << endl;
+// while(k>1){
+ while(k>stopK){
+ int x_r;
+ double cost_r=removeFeature(v1,v2,*getCost,subset,x_r,verbose);
+ if(cost_r>cost[k-1]){
+ --k;
+ assert(subset.size()==k);
+ cost[k]=cost_r;
+ cost.pop_back();
+ if(verbose)
+ cout << "removed " << x_r << ", " << subset.size() << "/" << maxFeatures << " features remain with cost: " << cost_r << endl;
+ continue;
+ }
+ else if(cost_r>=0){
+ subset.push_back(x_r);
+ break;
+ }
+ else if(verbose)
+ cout << "could not remove any feature" << endl;
+ cost.pop_back();
+ }
+ }
+ return cost.back();
+}
+
+//floating search
+template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, bool verbose){
+ vector<T> cost;
+ int maxLevels=v[0][0].size();
+ if(maxFeatures<1)
+ maxFeatures=maxLevels;
+ int k=subset.size();
+ if(k>=maxFeatures)
+ return -1;
+ cost.push_back(-1);//init 0 features as cost -1
+ cost.push_back(addFeature(v,*getCost,subset,verbose));
+ ++k;
+ if(verbose)
+ cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << maxFeatures << " features selected with cost: " << cost.back() << endl;
+ while(k<maxFeatures){
+ cost.push_back(addFeature(v,*getCost,subset,verbose));
+ ++k;
+ if(verbose)
+ cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << maxFeatures << " features selected with cost: " << cost.back() << endl;
+ while(k>1){
+ int x_r;
+ double cost_r=removeFeature(v,*getCost,subset,x_r,verbose);
+ if(cost_r>cost[k-1]){
+ --k;
+ cost[k]=cost_r;
+ cost.pop_back();
+ if(verbose)
+ cout << "removed " << x_r << ", " << cost.size()-1 << "/" << maxFeatures << " features remain with cost: " << cost_r << endl;
+ continue;
+ }
+ else if(cost_r>=0){
+ subset.push_back(x_r);
+ break;
+ }
+ else if(verbose)
+ cout << "could not remove any feature" << endl;
+ cost.pop_back();
+ }
+ }
+ return cost.back();
+}
+
+//floating search
+template<class T> double FeatureSelector::floating(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int maxFeatures, bool verbose){
+ vector<T> cost;
+ int maxLevels=(v.begin()->second)[0].size();
+ if(maxFeatures<1)
+ maxFeatures=maxLevels;
+ int k=subset.size();
+ if(k>=maxFeatures)
+ return -1;
+ cost.push_back(-1);//init 0 features as cost -1
+ cost.push_back(addFeature(v,y,*getCost,subset,verbose));
+ ++k;
+ if(verbose)
+ cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << maxFeatures << " features selected with cost: " << cost.back() << endl;
+ while(k<maxFeatures){
+ cost.push_back(addFeature(v,y,*getCost,subset,verbose));
+ ++k;
+ if(verbose)
+ cout << "added " << subset.back() << ", " << cost.size()-1 << "/" << maxFeatures << " features selected with cost: " << cost.back() << endl;
+ while(k>1){
+ int x_r;
+ double cost_r=removeFeature(v,y,*getCost,subset,x_r,verbose);
+ if(cost_r>cost[k-1]){
+ --k;
+ cost[k]=cost_r;
+ cost.pop_back();
+ if(verbose)
+ cout << "removed " << x_r << ", " << cost.size()-1 << "/" << maxFeatures << " features remain with cost: " << cost_r << endl;
+ continue;
+ }
+ else if(cost_r>=0){
+ subset.push_back(x_r);
+ break;
+ }
+ else if(verbose)
+ cout << "could not remove any feature" << endl;
+ cost.pop_back();
+ }
+ }
+ return cost.back();
+}
+
+//brute force search (search for all possible combinations and select best)
+template<class T> double FeatureSelector::bruteForce(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int maxFeatures, double prior1, double prior2, bool verbose){
+ int maxLevels=v1[0].size();
+ if(maxFeatures<1)
+ maxFeatures=maxLevels;
+ int k=subset.size();
+ if(k>=maxFeatures)
+ return -1;
+// gslmm::combination c(v1[0].size(),maxFeatures,false);
+ gsl_combination *c;
+ c=gsl_combination_calloc(v1[0].size(),maxFeatures);
+
+ list<int> tmpset;//temporary set of selected features (levels)
+ Vector2d<T> tmp1(v1.size());
+ Vector2d<T> tmp2(v2.size());
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ double maxCost=-1;
+ double cost;
+ if(subset.size()>=maxLevels)
+ return maxCost;
+// c.first();
+ gsl_combination_next(c);
+ do{
+ for(int ifeature=0;ifeature<maxFeatures;++ifeature)
+ tmpset.push_back(c->data[ifeature]);
+ v1.selectCols(tmpset,tmp1);
+ v2.selectCols(tmpset,tmp2);
+ try{
+ cost=getCost(tmp1,tmp2,prior1,prior2);
+ }
+ catch(...){ //singular matrix encountered
+ catchset=tmpset;//this tmpset resulted in failure of getCost
+ if(verbose){
+ cout << "Could not get cost from set: " << endl;
+ gsl_combination_fprintf(stdout,c," %u");
+ printf("\n");
+// c.print(stdout);
+ }
+ tmpset.clear();
+ continue;
+ }
+ if(maxCost<cost){ //set with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ }
+ tmpset.clear();
+ }while(gsl_combination_next(c)==GSL_SUCCESS);
+ gsl_combination_free(c);
+// }while(c.next());
+ if(maxCost<0) //no level added to better maxCost than current subset (catchset)
+ subset=catchset;
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::addFeature(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, double prior1, double prior2, bool verbose){
+ //select feature with the best value (get maximal cost for 1 feature)
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ double maxCost=-1;
+// if(subset.size()){
+// try{
+// v1.selectCols(subset,tmp1);
+// v2.selectCols(subset,tmp2);
+// maxCost=getCost(tmp1,tmp2);
+// }
+// catch(double determinant){
+// return -1;
+// }
+// }
+ double cost;
+ int maxLevels=v1[0].size();
+ if(subset.size()>=maxLevels)
+ return maxCost;
+ for(int ilevel=0;ilevel<maxLevels;++ilevel){
+ Vector2d<T> tmp1(v1.size());
+ Vector2d<T> tmp2(v2.size());
+ if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
+ continue;
+ tmpset.push_back(ilevel);
+ v1.selectCols(tmpset,tmp1);
+ v2.selectCols(tmpset,tmp2);
+ try{
+ cost=getCost(tmp1,tmp2,prior1,prior2);
+ }
+ catch(...){
+ catchset=tmpset;//this tmpset resulted in singular matrix
+ if(verbose)
+ cout << "Could not add feature " << tmpset.back() << endl;
+ tmpset.pop_back();
+ continue;
+ }
+ if(maxCost<cost){ //level with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ }
+ tmpset.pop_back();
+ }
+ if(maxCost<0) //no level added to better maxCost than current subset (catchset)
+ subset=catchset;
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::addFeature(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, bool verbose){
+ //select feature with the best value (get maximal cost for 1 feature)
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ vector< Vector2d<T> > tmp(v.size());
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ double maxCost=-1;
+ double cost;
+ int maxLevels=v[0][0].size();
+ if(subset.size()>=maxLevels)
+ return maxCost;
+ for(int ilevel=0;ilevel<maxLevels;++ilevel){
+ if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
+ continue;
+ tmpset.push_back(ilevel);
+ for(int iclass=0;iclass<v.size();++iclass){
+// tmp[iclass].resize(v[iclass].size());
+ v[iclass].selectCols(tmpset,tmp[iclass]);
+ }
+ try{
+ cost=getCost(tmp);
+ }
+ catch(...){
+ catchset=tmpset;//this tmpset resulted in singular matrix
+ if(verbose)
+ cout << "Could not add feature " << tmpset.back() << endl;
+ tmpset.pop_back();
+ continue;
+ }
+ if(maxCost<cost){ //level with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ }
+ tmpset.pop_back();
+ }
+ if(maxCost<0) //no level added to better maxCost than current subset (catchset)
+ subset=catchset;
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::addFeature(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, bool verbose){
+ //select feature with the best value (get maximal cost for 1 feature)
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ map<string, Vector2d<double> > tmp;
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ map<string, Vector2d<double> >::const_iterator mit;
+ double maxCost=-1;
+ double cost;
+ int maxLevels=(v.begin()->second)[0].size();
+ if(subset.size()>=maxLevels)
+ return maxCost;
+ for(int ilevel=0;ilevel<maxLevels;++ilevel){
+ if(find(tmpset.begin(),tmpset.end(),ilevel)!=tmpset.end())
+ continue;
+ tmpset.push_back(ilevel);
+ for(mit=v.begin();mit!=v.end();++mit){
+ string theName=mit->first;
+ v[theName].selectCols(tmpset,tmp[theName]);
+ }
+ try{
+ cost=getCost(tmp,y);
+ }
+ catch(...){
+ catchset=tmpset;//this tmpset resulted in singular matrix
+ if(verbose)
+ cout << "Could not add feature " << tmpset.back() << endl;
+ tmpset.pop_back();
+ continue;
+ }
+ if(maxCost<cost){ //level with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ }
+ tmpset.pop_back();
+ }
+ if(maxCost<0) //no level added to better maxCost than current subset (catchset)
+ subset=catchset;
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::removeFeature(Vector2d<T>& v1, Vector2d<T>& v2, double (*getCost)(const Vector2d<T>&, const Vector2d<T>&, double prior1, double prior2), list<int>& subset, int& r, double prior1, double prior2, bool verbose){
+ //find the feature that has the least effect on the cost when it is removed from subset
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ Vector2d<T> tmp1(v1.size());
+ Vector2d<T> tmp2(v2.size());
+ int nFeatures=subset.size();
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ double maxCost=-1;
+// if(subset.size()){
+// try{
+// v1.selectCols(subset,tmp1);
+// v2.selectCols(subset,tmp2);
+// maxCost=getCost(tmp1,tmp2);
+// }
+// catch(double determinant){
+// return -1;
+// }
+// }
+ int last;
+ double cost;
+ int maxLevels=v1[0].size();
+ if(subset.size()>maxLevels||subset.empty()){
+ return maxCost;
+ }
+ list<int>::const_iterator it;
+ for(int i=0;i<nFeatures;++i){
+ last=tmpset.back();
+ tmpset.pop_back();
+ v1.selectCols(tmpset,tmp1);
+ v2.selectCols(tmpset,tmp2);
+ try{
+ cost=getCost(tmp1,tmp2,prior1,prior2);
+ }
+ catch(...){
+ catchset=tmpset;//this tmpset resulted in singular matrix
+ if(verbose)
+ cout << "Could not remove feature " << last << endl;
+ tmpset.push_front(last);
+ continue;
+ }
+ if(maxCost<cost){ //level with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ r=last;
+ }
+ tmpset.push_front(last);
+ }
+ if(maxCost<0){//all levels removed were catched
+ subset=catchset;
+ }
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::removeFeature(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int& r, bool verbose){
+ //find the feature that has the least effect on the cost when it is removed from subset
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ vector< Vector2d<T> > tmp(v.size());
+ int nFeatures=subset.size();
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ double maxCost=-1;
+ int last;
+ double cost;
+ int maxLevels=v[0][0].size();
+ if(subset.size()>maxLevels||subset.empty()){
+ return maxCost;
+ }
+ list<int>::const_iterator it;
+ for(int i=0;i<nFeatures;++i){
+ last=tmpset.back();
+ tmpset.pop_back();
+ for(int iclass=0;iclass<v.size();++iclass){
+// tmp[iclass].resize(v[iclass].size());
+ v[iclass].selectCols(tmpset,tmp[iclass]);
+ }
+ try{
+ cost=getCost(tmp);
+ }
+ catch(...){
+ catchset=tmpset;//this tmpset resulted in singular matrix
+ if(verbose)
+ cout << "Could not remove feature " << last << endl;
+ tmpset.push_front(last);
+ continue;
+ }
+ if(maxCost<cost){ //level with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ r=last;
+ }
+ tmpset.push_front(last);
+ }
+ if(maxCost<0){//all levels removed were catched
+ subset=catchset;
+ }
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+
+template<class T> double FeatureSelector::removeFeature(map<string, Vector2d<T> > &v, map<string, vector<T> > &y, double (*getCost)(map<string, Vector2d<T> >&, map<string, vector<T> >&), list<int>& subset, int& r, bool verbose){
+ //find the feature that has the least effect on the cost when it is removed from subset
+ list<int> tmpset=subset;//temporary set of selected features (levels)
+ map<string, Vector2d<double> > tmp;
+ map<string, Vector2d<double> >::const_iterator mit;
+ int nFeatures=subset.size();
+ list<int> catchset;//restore set in case of catch all the way to last level (no better cost)
+ //initialize maxCost with actual cost for current subset (-1 if empty subset)
+ double maxCost=-1;
+ int last;
+ double cost;
+ int maxLevels=(v.begin()->second)[0].size();
+ if(subset.size()>maxLevels||subset.empty()){
+ return maxCost;
+ }
+ list<int>::const_iterator it;
+ for(int i=0;i<nFeatures;++i){
+ last=tmpset.back();
+ tmpset.pop_back();
+ for(mit=v.begin();mit!=v.end();++mit){
+ string theName=mit->first;
+ v[theName].selectCols(tmpset,tmp[theName]);
+ }
+ try{
+ cost=getCost(tmp,y);
+ }
+ catch(...){ //singular matrix encountered
+ catchset=tmpset;//this tmpset resulted in singular matrix
+ if(verbose)
+ cout << "Could not remove feature " << last << endl;
+ tmpset.push_front(last);
+ continue;
+ }
+ if(maxCost<cost){ //level with better cost is found
+ maxCost=cost;
+ subset=tmpset;
+ r=last;
+ }
+ tmpset.push_front(last);
+ }
+ if(maxCost<0){//all levels removed were catched
+ subset=catchset;
+ }
+ //test: assert list contains no duplicate elements
+ for(list<int>::iterator lit=subset.begin();lit!=subset.end();++lit){
+ list<int>::iterator lit2=lit;//start searching from next element
+ assert(find(++lit2,subset.end(),*lit)==subset.end());
+ }
+ return maxCost;
+}
+#endif /* _FEATURESELECTOR_H_ */
diff --git a/src/algorithms/Makefile.am b/src/algorithms/Makefile.am
index ba7c6e3..9f0bafa 100644
--- a/src/algorithms/Makefile.am
+++ b/src/algorithms/Makefile.am
@@ -12,7 +12,7 @@ noinst_LIBRARIES = libalgorithms.a
libalgorithms_adir = $(includedir)/algorithms
# the list of header files that belong to the library (to be installed later)
-libalgorithms_a_HEADERS = Egcs.h Filter2d.h Filter.h Histogram.h ConfusionMatrix.h svm.h
+libalgorithms_a_HEADERS = Egcs.h Filter2d.h Filter.h Histogram.h ConfusionMatrix.h svm.h FeatureSelector.h
if USE_FANN
libalgorithms_a_HEADERS += myfann_cpp.h
@@ -23,5 +23,5 @@ libalgorithms_a_HEADERS += SensorModel.h
endif
# the sources to add to the library and to add to the source distribution
-libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc Filter2d.cc Filter.cc Histogram.cc ConfusionMatrix.cc svm.cpp
+libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc Filter2d.cc Filter.cc Histogram.cc ConfusionMatrix.cc svm.cpp
###############################################################################
diff --git a/src/algorithms/Makefile.in b/src/algorithms/Makefile.in
index af8c667..a0c6645 100644
--- a/src/algorithms/Makefile.in
+++ b/src/algorithms/Makefile.in
@@ -53,9 +53,9 @@ ARFLAGS = cru
libalgorithms_a_AR = $(AR) $(ARFLAGS)
libalgorithms_a_LIBADD =
am__libalgorithms_a_SOURCES_DIST = Egcs.h Filter2d.h Filter.h \
- Histogram.h ConfusionMatrix.h svm.h myfann_cpp.h SensorModel.h \
- Egcs.cc Filter2d.cc Filter.cc Histogram.cc ConfusionMatrix.cc \
- svm.cpp
+ Histogram.h ConfusionMatrix.h svm.h FeatureSelector.h \
+ myfann_cpp.h SensorModel.h Egcs.cc Filter2d.cc Filter.cc \
+ Histogram.cc ConfusionMatrix.cc svm.cpp
am__objects_1 =
am__objects_2 = $(am__objects_1) $(am__objects_1)
am_libalgorithms_a_OBJECTS = $(am__objects_2) Egcs.$(OBJEXT) \
@@ -78,7 +78,8 @@ LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
SOURCES = $(libalgorithms_a_SOURCES)
DIST_SOURCES = $(am__libalgorithms_a_SOURCES_DIST)
am__libalgorithms_a_HEADERS_DIST = Egcs.h Filter2d.h Filter.h \
- Histogram.h ConfusionMatrix.h svm.h myfann_cpp.h SensorModel.h
+ Histogram.h ConfusionMatrix.h svm.h FeatureSelector.h \
+ myfann_cpp.h SensorModel.h
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@@ -228,10 +229,11 @@ libalgorithms_adir = $(includedir)/algorithms
# the list of header files that belong to the library (to be installed later)
libalgorithms_a_HEADERS = Egcs.h Filter2d.h Filter.h Histogram.h \
- ConfusionMatrix.h svm.h $(am__append_1) $(am__append_2)
+ ConfusionMatrix.h svm.h FeatureSelector.h $(am__append_1) \
+ $(am__append_2)
# the sources to add to the library and to add to the source distribution
-libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc Filter2d.cc Filter.cc Histogram.cc ConfusionMatrix.cc svm.cpp
+libalgorithms_a_SOURCES = $(libalgorithms_a_HEADERS) Egcs.cc Filter2d.cc Filter.cc Histogram.cc ConfusionMatrix.cc svm.cpp
all: all-am
.SUFFIXES:
diff --git a/src/apps/Makefile.am b/src/apps/Makefile.am
index eb3120e..14d8f95 100644
--- a/src/apps/Makefile.am
+++ b/src/apps/Makefile.am
@@ -6,7 +6,7 @@ LDADD = $(GDAL_LDFLAGS) $(top_builddir)/src/algorithms/libalgorithms.a $(top_bui
###############################################################################
# the program to build (the names of the final binaries)
-bin_PROGRAMS = pkinfo pkcrop pkreclass pkgetmask pksetmask pkcreatect pkdumpimg pkdumpogr pksieve pkstat pkstatogr pkegcs pkextract pkfillnodata pkfilter pkdsm2shadow pkmosaic pkndvi pkpolygonize pkascii2img pkdiff pkclassify_svm pkascii2ogr #pkxcorimg pkgeom
+bin_PROGRAMS = pkinfo pkcrop pkreclass pkgetmask pksetmask pkcreatect pkdumpimg pkdumpogr pksieve pkstat pkstatogr pkegcs pkextract pkfillnodata pkfilter pkdsm2shadow pkmosaic pkndvi pkpolygonize pkascii2img pkdiff pkclassify_svm pkfs_svm pkascii2ogr #pkxcorimg pkgeom
if USE_FANN
bin_PROGRAMS += pkclassify_nn
pkclassify_nn_SOURCES = $(top_srcdir)/src/algorithms/myfann_cpp.h pkclassify_nn.h pkclassify_nn.cc
@@ -49,6 +49,7 @@ pkndvi_SOURCES = pkndvi.cc
pkpolygonize_SOURCES = pkpolygonize.cc
pkdiff_SOURCES = pkdiff.cc
pkclassify_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/svm.cpp pkclassify_svm.cc
+pkfs_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/svm.cpp pkfs_svm.cc
pkascii2img_SOURCES = pkascii2img.cc
pkascii2ogr_SOURCES = pkascii2ogr.cc
#pkxcorimg_SOURCES= pkxcorimg.cc
diff --git a/src/apps/Makefile.in b/src/apps/Makefile.in
index 61c9158..184c40a 100644
--- a/src/apps/Makefile.in
+++ b/src/apps/Makefile.in
@@ -39,8 +39,8 @@ bin_PROGRAMS = pkinfo$(EXEEXT) pkcrop$(EXEEXT) pkreclass$(EXEEXT) \
pkextract$(EXEEXT) pkfillnodata$(EXEEXT) pkfilter$(EXEEXT) \
pkdsm2shadow$(EXEEXT) pkmosaic$(EXEEXT) pkndvi$(EXEEXT) \
pkpolygonize$(EXEEXT) pkascii2img$(EXEEXT) pkdiff$(EXEEXT) \
- pkclassify_svm$(EXEEXT) pkascii2ogr$(EXEEXT) $(am__EXEEXT_1) \
- $(am__EXEEXT_2) $(am__EXEEXT_3)
+ pkclassify_svm$(EXEEXT) pkfs_svm$(EXEEXT) pkascii2ogr$(EXEEXT) \
+ $(am__EXEEXT_1) $(am__EXEEXT_2) $(am__EXEEXT_3)
@USE_FANN_TRUE at am__append_1 = pkclassify_nn
@USE_LAS_TRUE at am__append_2 = pklas2img
@USE_NLOPT_TRUE at am__append_3 = pksensormodel
@@ -149,6 +149,12 @@ pkfilter_LDADD = $(LDADD)
pkfilter_DEPENDENCIES = $(am__DEPENDENCIES_1) \
$(top_builddir)/src/algorithms/libalgorithms.a \
$(top_builddir)/src/imageclasses/libimageClasses.a
+am_pkfs_svm_OBJECTS = svm.$(OBJEXT) pkfs_svm.$(OBJEXT)
+pkfs_svm_OBJECTS = $(am_pkfs_svm_OBJECTS)
+pkfs_svm_LDADD = $(LDADD)
+pkfs_svm_DEPENDENCIES = $(am__DEPENDENCIES_1) \
+ $(top_builddir)/src/algorithms/libalgorithms.a \
+ $(top_builddir)/src/imageclasses/libimageClasses.a
am_pkgetmask_OBJECTS = pkgetmask.$(OBJEXT)
pkgetmask_OBJECTS = $(am_pkgetmask_OBJECTS)
pkgetmask_LDADD = $(LDADD)
@@ -236,18 +242,18 @@ SOURCES = $(pkascii2img_SOURCES) $(pkascii2ogr_SOURCES) \
$(pkdsm2shadow_SOURCES) $(pkdumpimg_SOURCES) \
$(pkdumpogr_SOURCES) $(pkegcs_SOURCES) $(pkextract_SOURCES) \
$(pkfillnodata_SOURCES) $(pkfilter_SOURCES) \
- $(pkgetmask_SOURCES) $(pkinfo_SOURCES) $(pklas2img_SOURCES) \
- $(pkmosaic_SOURCES) $(pkndvi_SOURCES) $(pkpolygonize_SOURCES) \
- $(pkreclass_SOURCES) $(pksensormodel_SOURCES) \
- $(pksetmask_SOURCES) $(pksieve_SOURCES) $(pkstat_SOURCES) \
- $(pkstatogr_SOURCES)
+ $(pkfs_svm_SOURCES) $(pkgetmask_SOURCES) $(pkinfo_SOURCES) \
+ $(pklas2img_SOURCES) $(pkmosaic_SOURCES) $(pkndvi_SOURCES) \
+ $(pkpolygonize_SOURCES) $(pkreclass_SOURCES) \
+ $(pksensormodel_SOURCES) $(pksetmask_SOURCES) \
+ $(pksieve_SOURCES) $(pkstat_SOURCES) $(pkstatogr_SOURCES)
DIST_SOURCES = $(pkascii2img_SOURCES) $(pkascii2ogr_SOURCES) \
$(am__pkclassify_nn_SOURCES_DIST) $(pkclassify_svm_SOURCES) \
$(pkcreatect_SOURCES) $(pkcrop_SOURCES) $(pkdiff_SOURCES) \
$(pkdsm2shadow_SOURCES) $(pkdumpimg_SOURCES) \
$(pkdumpogr_SOURCES) $(pkegcs_SOURCES) $(pkextract_SOURCES) \
$(pkfillnodata_SOURCES) $(pkfilter_SOURCES) \
- $(pkgetmask_SOURCES) $(pkinfo_SOURCES) \
+ $(pkfs_svm_SOURCES) $(pkgetmask_SOURCES) $(pkinfo_SOURCES) \
$(am__pklas2img_SOURCES_DIST) $(pkmosaic_SOURCES) \
$(pkndvi_SOURCES) $(pkpolygonize_SOURCES) $(pkreclass_SOURCES) \
$(am__pksensormodel_SOURCES_DIST) $(pksetmask_SOURCES) \
@@ -398,6 +404,7 @@ pkndvi_SOURCES = pkndvi.cc
pkpolygonize_SOURCES = pkpolygonize.cc
pkdiff_SOURCES = pkdiff.cc
pkclassify_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/svm.cpp pkclassify_svm.cc
+pkfs_svm_SOURCES = $(top_srcdir)/src/algorithms/svm.h $(top_srcdir)/src/algorithms/svm.cpp pkfs_svm.cc
pkascii2img_SOURCES = pkascii2img.cc
pkascii2ogr_SOURCES = pkascii2ogr.cc
all: all-am
@@ -513,6 +520,9 @@ pkfillnodata$(EXEEXT): $(pkfillnodata_OBJECTS) $(pkfillnodata_DEPENDENCIES)
pkfilter$(EXEEXT): $(pkfilter_OBJECTS) $(pkfilter_DEPENDENCIES)
@rm -f pkfilter$(EXEEXT)
$(CXXLINK) $(pkfilter_OBJECTS) $(pkfilter_LDADD) $(LIBS)
+pkfs_svm$(EXEEXT): $(pkfs_svm_OBJECTS) $(pkfs_svm_DEPENDENCIES)
+ @rm -f pkfs_svm$(EXEEXT)
+ $(CXXLINK) $(pkfs_svm_OBJECTS) $(pkfs_svm_LDADD) $(LIBS)
pkgetmask$(EXEEXT): $(pkgetmask_OBJECTS) $(pkgetmask_DEPENDENCIES)
@rm -f pkgetmask$(EXEEXT)
$(CXXLINK) $(pkgetmask_OBJECTS) $(pkgetmask_LDADD) $(LIBS)
@@ -570,6 +580,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pkextract.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pkfillnodata.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pkfilter.Po at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pkfs_svm.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pkgetmask.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pkinfo.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/pklas2img.Po at am__quote@
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 43508cb..6d5e911 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -1,5 +1,5 @@
/**********************************************************************
-pkclassify_svm.cc: classify raster image using Artificial Neural Network
+pkclassify_svm.cc: classify raster image using Support Vector Machine
Copyright (C) 2008-2012 Pieter Kempeneers
This file is part of pktools
@@ -149,7 +149,7 @@ int main(int argc, char *argv[])
exit(0);
}
if(help_opt[0]){
- std::cout << "usage: pkclassify_nn -i testimage -o outputimage -t training [OPTIONS]" << std::endl;
+ std::cout << "usage: pkclassify_svm -i testimage -o outputimage -t training [OPTIONS]" << std::endl;
exit(0);
}
@@ -174,7 +174,6 @@ int main(int argc, char *argv[])
unsigned int totalSamples=0;
int nreclass=0;
vector<int> vcode;//unique class codes in recode string
- // vector<FANN::neural_net> net(nbag);//the neural network
vector<struct svm_model*> svm(nbag);
vector<struct svm_parameter> param(nbag);
diff --git a/src/apps/pkfs_svm.cc b/src/apps/pkfs_svm.cc
new file mode 100644
index 0000000..03b56c8
--- /dev/null
+++ b/src/apps/pkfs_svm.cc
@@ -0,0 +1,549 @@
+/**********************************************************************
+pkfs_svm.cc: feature selection for svm classifier
+Copyright (C) 2008-2012 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools. If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#include <vector>
+#include <map>
+#include <algorithm>
+#include "imageclasses/ImgReaderGdal.h"
+#include "imageclasses/ImgWriterGdal.h"
+#include "imageclasses/ImgReaderOgr.h"
+#include "imageclasses/ImgWriterOgr.h"
+#include "base/Optionpk.h"
+#include "algorithms/ConfusionMatrix.h"
+#include "algorithms/FeatureSelector.h"
+#include "algorithms/svm.h"
+#include "pkclassify_nn.h"
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+
+double getCost(const vector<Vector2d<float> > &trainingFeatures)
+{
+ //test
+ std::cout << "debug0" << std::endl;
+ bool verbose=true;
+ unsigned short nFeatures=trainingFeatures[0][0].size();
+ //todo: set SVM parameters through command line options
+ struct svm_parameter param;
+ // param.svm_type = svm_type_opt[0];
+ // param.kernel_type = kernel_type_opt[0];
+ // param.degree = kernel_degree_opt[0];
+ // param.gamma = (gamma_opt[0]>0)? gamma_opt[0] : 1.0/nFeatures;
+ // param.coef0 = coef0_opt[0];
+ // param.nu = nu_opt[0];
+ // param.cache_size = cache_opt[0];
+ // param.C = ccost_opt[0];
+ // param.eps = epsilon_tol_opt[0];
+ // param.p = epsilon_loss_opt[0];
+ // param.shrinking = (shrinking_opt[0])? 1 : 0;
+ // param.probability = (prob_est_opt[0])? 1 : 0;
+ // param.nr_weight = 0;//not used: I use priors and balancing
+ // param.weight_label = NULL;
+ // param.weight = NULL;
+ //todo: make parameter
+ unsigned int cv=2;
+ param.svm_type = 0;
+ param.kernel_type = 2;
+ param.degree = 3;
+ param.gamma = 1.0/nFeatures;
+ param.coef0 = 0;
+ param.nu = 0.5;
+ param.cache_size = 100.0;
+ param.C = 1;
+ param.eps = 0.001;
+ param.p = 0.1;
+ param.shrinking = 0;
+ param.probability = 0;
+ param.nr_weight = 0;//not used: I use priors and balancing
+ param.weight_label = NULL;
+ param.weight = NULL;
+ struct svm_model* svm;
+ struct svm_problem prob;
+ struct svm_node* x_space;
+ unsigned int ntraining=0;
+ unsigned short nclass=trainingFeatures.size();
+
+ //test
+ std::cout << "debug1" << std::endl;
+
+ for(int iclass=0;iclass<nclass;++iclass)
+ ntraining+=trainingFeatures[iclass].size();
+
+ prob.l=ntraining;
+ prob.y = Malloc(double,prob.l);
+ prob.x = Malloc(struct svm_node *,prob.l);
+ //test
+ std::cout << "debug2" << std::endl;
+ x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining);
+ unsigned long int spaceIndex=0;
+ int lIndex=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
+ //test
+ std::cout << "..." << iclass << std::endl;
+ prob.x[lIndex]=&(x_space[spaceIndex]);
+ for(int ifeature=0;ifeature<nFeatures;++ifeature){
+ x_space[spaceIndex].index=ifeature+1;
+ x_space[spaceIndex].value=trainingFeatures[iclass][isample][ifeature];
+ //test
+ std::cout << " " << x_space[spaceIndex].index << ":" << x_space[spaceIndex].value;
+ ++spaceIndex;
+ }
+ x_space[spaceIndex++].index=-1;
+ prob.y[lIndex]=iclass;
+ ++lIndex;
+ }
+ }
+ //test
+ std::cout << "debug3" << std::endl;
+
+ assert(lIndex==prob.l);
+ if(verbose)
+ std::cout << "checking parameters" << std::endl;
+ svm_check_parameter(&prob,¶m);
+ if(verbose)
+ std::cout << "parameters ok, training" << std::endl;
+ svm=svm_train(&prob,¶m);
+ if(verbose)
+ std::cout << "SVM is now trained" << std::endl;
+
+ std::cout << "Confusion matrix" << std::endl;
+ ConfusionMatrix cm(nclass);
+ double *target = Malloc(double,prob.l);
+ svm_cross_validation(&prob,¶m,cv,target);
+ assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression
+ int total_correct=0;
+ for(int i=0;i<prob.l;i++)
+ cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1);
+ assert(cm.nReference());
+ // std::cout << "Kappa: " << cm.kappa() << std::endl;
+ // double se95_oa=0;
+ // double doa=0;
+ // doa=cm.oa_pct(&se95_oa);
+ // std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl;
+
+ // *NOTE* Because svm_model contains pointers to svm_problem, you can
+ // not free the memory used by svm_problem if you are still using the
+ // svm_model produced by svm_train().
+ // however, we will re-train the svm later on after the feature selection
+ free(target);
+ free(prob.y);
+ free(prob.x);
+ free(x_space);
+ svm_free_and_destroy_model(&(svm));
+ return(cm.kappa());
+}
+
+int main(int argc, char *argv[])
+{
+ map<short,int> reclassMap;
+ vector<int> vreclass;
+ vector<double> priors;
+
+ //--------------------------- command line options ------------------------------------
+
+ std::string versionString="version ";
+ versionString+=VERSION;
+ versionString+=", Copyright (C) 2008-2012 Pieter Kempeneers.\n\
+ This program comes with ABSOLUTELY NO WARRANTY; for details type use option -h.\n\
+ This is free software, and you are welcome to redistribute it\n\
+ under certain conditions; use option --license for details.";
+ Optionpk<bool> version_opt("\0","version",versionString,false);
+ Optionpk<bool> license_opt("lic","license","show license information",false);
+ Optionpk<bool> help_opt("h","help","shows this help info",false);
+ Optionpk<bool> todo_opt("\0","todo","",false);
+ Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
+ Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label");
+ Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select all)", 0);
+ Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
+ Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
+ Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
+ Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0);
+ Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0);
+ Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
+ Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
+ Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (0: no aggregation, 1: sum rule, 2: max rule).",0);
+ Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0);
+ Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR, 4: nu-SVR)",0);
+ Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
+ Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
+ Optionpk<float> gamma_opt("g", "gamma", "gamma in kernel function",0);
+ Optionpk<float> coef0_opt("c0", "coef0", "coef0 in kernel function",0);
+ Optionpk<float> ccost_opt("cc", "ccost", "the parameter C of C-SVC, epsilon-SVR, and nu-SVR",1);
+ Optionpk<float> nu_opt("nu", "nu", "the parameter nu of nu-SVC, one-class SVM, and nu-SVR",0.5);
+ Optionpk<float> epsilon_loss_opt("eloss", "eloss", "the epsilon in loss function of epsilon-SVR",0.1);
+ Optionpk<int> cache_opt("cache", "cache", "cache memory size in MB",100);
+ Optionpk<float> epsilon_tol_opt("etol", "etol", "the tolerance of termination criterion",0.001);
+ Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the shrinking heuristics",false);
+ Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR model for probability estimates",false);
+ // Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true);
+ Optionpk<unsigned int> cv_opt("cv", "cv", "n-fold cross validation mode",2);
+ Optionpk<short> verbose_opt("v", "verbose", "set to: 0 (results only), 1 (confusion matrix), 2 (debug)",0);
+
+ version_opt.retrieveOption(argc,argv);
+ license_opt.retrieveOption(argc,argv);
+ help_opt.retrieveOption(argc,argv);
+ todo_opt.retrieveOption(argc,argv);
+ training_opt.retrieveOption(argc,argv);
+ maxFeatures_opt.retrieveOption(argc,argv);
+ label_opt.retrieveOption(argc,argv);
+ reclass_opt.retrieveOption(argc,argv);
+ balance_opt.retrieveOption(argc,argv);
+ minSize_opt.retrieveOption(argc,argv);
+ start_opt.retrieveOption(argc,argv);
+ end_opt.retrieveOption(argc,argv);
+ offset_opt.retrieveOption(argc,argv);
+ scale_opt.retrieveOption(argc,argv);
+ aggreg_opt.retrieveOption(argc,argv);
+ priors_opt.retrieveOption(argc,argv);
+ svm_type_opt.retrieveOption(argc,argv);
+ kernel_type_opt.retrieveOption(argc,argv);
+ kernel_degree_opt.retrieveOption(argc,argv);
+ gamma_opt.retrieveOption(argc,argv);
+ coef0_opt.retrieveOption(argc,argv);
+ ccost_opt.retrieveOption(argc,argv);
+ nu_opt.retrieveOption(argc,argv);
+ epsilon_loss_opt.retrieveOption(argc,argv);
+ cache_opt.retrieveOption(argc,argv);
+ epsilon_tol_opt.retrieveOption(argc,argv);
+ shrinking_opt.retrieveOption(argc,argv);
+ prob_est_opt.retrieveOption(argc,argv);
+ cv_opt.retrieveOption(argc,argv);
+ verbose_opt.retrieveOption(argc,argv);
+
+ if(version_opt[0]||todo_opt[0]){
+ std::cout << version_opt.getHelp() << std::endl;
+ std::cout << "todo: " << todo_opt.getHelp() << std::endl;
+ exit(0);
+ }
+ if(license_opt[0]){
+ std::cout << Optionpk<bool>::getGPLv3License() << std::endl;
+ exit(0);
+ }
+ if(help_opt[0]){
+ std::cout << "usage: pkfs_svm -t training [OPTIONS]" << std::endl;
+ exit(0);
+ }
+
+ assert(training_opt[0].size());
+ if(verbose_opt[0]>=1)
+ std::cout << "training shape file: " << training_opt[0] << std::endl;
+
+ unsigned int totalSamples=0;
+ int nreclass=0;
+ vector<int> vcode;//unique class codes in recode string
+
+ unsigned short nclass=0;
+ int nband=0;
+ int startBand=2;//first two bands represent X and Y pos
+
+ vector<double> offset;
+ vector<double> scale;
+ vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+
+ if(reclass_opt.size()>1){
+ vreclass.resize(reclass_opt.size());
+ for(int iclass=0;iclass<reclass_opt.size();++iclass){
+ reclassMap[iclass]=reclass_opt[iclass];
+ vreclass[iclass]=reclass_opt[iclass];
+ }
+ }
+ if(priors_opt.size()>1){//priors from argument list
+ priors.resize(priors_opt.size());
+ double normPrior=0;
+ for(int iclass=0;iclass<priors_opt.size();++iclass){
+ priors[iclass]=priors_opt[iclass];
+ normPrior+=priors[iclass];
+ }
+ //normalize
+ for(int iclass=0;iclass<priors_opt.size();++iclass)
+ priors[iclass]/=normPrior;
+ }
+
+
+ //----------------------------------- Training -------------------------------
+ struct svm_problem prob;
+ vector<string> fields;
+ //organize training data
+ trainingPixels.clear();
+ map<int,Vector2d<float> > trainingMap;
+ if(verbose_opt[0]>=1)
+ std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+ try{
+ totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+ if(trainingMap.size()<2){
+ string errorstring="Error: could not read at least two classes from training file";
+ throw(errorstring);
+ }
+ }
+ catch(string error){
+ cerr << error << std::endl;
+ exit(1);
+ }
+ catch(...){
+ cerr << "error catched" << std::endl;
+ exit(1);
+ }
+ //delete class 0
+ if(verbose_opt[0]>=1)
+ std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+ totalSamples-=trainingMap[0].size();
+ trainingMap.erase(0);
+ //convert map to vector
+ short iclass=0;
+ if(reclass_opt.size()==1){//no reclass option, read classes from shape
+ reclassMap.clear();
+ vreclass.clear();
+ }
+ if(verbose_opt[0]>1)
+ std::cout << "training pixels: " << std::endl;
+ map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ while(mapit!=trainingMap.end()){
+ // for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
+ //delete small classes
+ if((mapit->second).size()<minSize_opt[0]){
+ trainingMap.erase(mapit);
+ continue;
+ //todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
+ }
+ if(reclass_opt.size()==1){//no reclass option, read classes from shape
+ reclassMap[iclass]=(mapit->first);
+ vreclass.push_back(mapit->first);
+ }
+ trainingPixels.push_back(mapit->second);
+ if(verbose_opt[0]>1)
+ std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+ ++iclass;
+ ++mapit;
+ }
+ nclass=trainingPixels.size();
+ nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
+ assert(reclassMap.size()==nclass);
+
+ //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
+ //balance training data
+ if(balance_opt[0]>0){
+ if(random)
+ srand(time(NULL));
+ totalSamples=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ if(trainingPixels[iclass].size()>balance_opt[0]){
+ while(trainingPixels[iclass].size()>balance_opt[0]){
+ int index=rand()%trainingPixels[iclass].size();
+ trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
+ }
+ }
+ else{
+ int oldsize=trainingPixels[iclass].size();
+ for(int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
+ int index = rand()%oldsize;
+ trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
+ }
+ }
+ totalSamples+=trainingPixels[iclass].size();
+ }
+ assert(totalSamples==nclass*balance_opt[0]);
+ }
+
+ //set scale and offset
+ offset.resize(nband);
+ scale.resize(nband);
+ if(offset_opt.size()>1)
+ assert(offset_opt.size()==nband);
+ if(scale_opt.size()>1)
+ assert(scale_opt.size()==nband);
+ Histogram hist;
+ for(int iband=0;iband<nband;++iband){
+ if(verbose_opt[0]>=1)
+ std::cout << "scaling for band" << iband << std::endl;
+ offset[iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
+ scale[iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
+ //search for min and maximum
+ if(scale[iband]<=0){
+ float theMin=trainingPixels[0][0][iband+startBand];
+ float theMax=trainingPixels[0][0][iband+startBand];
+ for(int iclass=0;iclass<nclass;++iclass){
+ for(int isample=0;isample<trainingPixels[iclass].size();++isample){
+ if(theMin>trainingPixels[iclass][isample][iband+startBand])
+ theMin=trainingPixels[iclass][isample][iband+startBand];
+ if(theMax<trainingPixels[iclass][isample][iband+startBand])
+ theMax=trainingPixels[iclass][isample][iband+startBand];
+ }
+ }
+ offset[iband]=theMin+(theMax-theMin)/2.0;
+ scale[iband]=(theMax-theMin)/2.0;
+ if(verbose_opt[0]>=1){
+ std::cout << "Extreme image values for band " << iband << ": [" << theMin << "," << theMax << "]" << std::endl;
+ std::cout << "Using offset, scale: " << offset[iband] << ", " << scale[iband] << std::endl;
+ std::cout << "scaled values for band " << iband << ": [" << (theMin-offset[iband])/scale[iband] << "," << (theMax-offset[iband])/scale[iband] << "]" << std::endl;
+ }
+ }
+ }
+
+ //recode vreclass to ordered vector, starting from 0 to nreclass
+ vcode.clear();
+ if(verbose_opt[0]>=1){
+ std::cout << "before recoding: " << std::endl;
+ for(int iclass = 0; iclass < vreclass.size(); iclass++)
+ std::cout << " " << vreclass[iclass];
+ std::cout << std::endl;
+ }
+ vector<int> vord=vreclass;//ordered vector, starting from 0 to nreclass
+ map<short,int> mreclass;
+ for(int ic=0;ic<vreclass.size();++ic){
+ if(mreclass.find(vreclass[ic])==mreclass.end())
+ mreclass[vreclass[ic]]=iclass++;
+ }
+ for(int ic=0;ic<vreclass.size();++ic)
+ vord[ic]=mreclass[vreclass[ic]];
+ //construct uniqe class codes
+ while(!vreclass.empty()){
+ vcode.push_back(*(vreclass.begin()));
+ //delete all these entries from vreclass
+ vector<int>::iterator vit;
+ while((vit=find(vreclass.begin(),vreclass.end(),vcode.back()))!=vreclass.end())
+ vreclass.erase(vit);
+ }
+ if(verbose_opt[0]>=1){
+ std::cout << "recode values: " << std::endl;
+ for(int icode=0;icode<vcode.size();++icode)
+ std::cout << vcode[icode] << " ";
+ std::cout << std::endl;
+ }
+ vreclass=vord;
+ if(verbose_opt[0]>=1){
+ std::cout << "after recoding: " << std::endl;
+ for(int iclass = 0; iclass < vord.size(); iclass++)
+ std::cout << " " << vord[iclass];
+ std::cout << std::endl;
+ }
+
+ vector<int> vuniqueclass=vreclass;
+ //remove duplicate elements from vuniqueclass
+ sort( vuniqueclass.begin(), vuniqueclass.end() );
+ vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
+ nreclass=vuniqueclass.size();
+ if(verbose_opt[0]>=1){
+ std::cout << "unique classes: " << std::endl;
+ for(int iclass = 0; iclass < vuniqueclass.size(); iclass++)
+ std::cout << " " << vuniqueclass[iclass];
+ std::cout << std::endl;
+ std::cout << "number of reclasses: " << nreclass << std::endl;
+ }
+
+ if(priors_opt.size()==1){//default: equal priors for each class
+ priors.resize(nclass);
+ for(int iclass=0;iclass<nclass;++iclass)
+ priors[iclass]=1.0/nclass;
+ }
+ assert(priors_opt.size()==1||priors_opt.size()==nclass);
+
+ if(verbose_opt[0]>=1){
+ std::cout << "number of bands: " << nband << std::endl;
+ std::cout << "number of classes: " << nclass << std::endl;
+ std::cout << "priors:";
+ for(int iclass=0;iclass<nclass;++iclass)
+ std::cout << " " << priors[iclass];
+ std::cout << std::endl;
+ }
+
+ //Calculate features of trainig set
+ vector< Vector2d<float> > trainingFeatures(nclass);
+ for(int iclass=0;iclass<nclass;++iclass){
+ int nctraining=0;
+ if(verbose_opt[0]>=1)
+ std::cout << "calculating features for class " << iclass << std::endl;
+ if(random)
+ srand(time(NULL));
+ nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+ if(verbose_opt[0]>=1)
+ std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
+ int index=0;
+
+ trainingFeatures[iclass].resize(nctraining);
+ for(int isample=0;isample<nctraining;++isample){
+ //scale pixel values according to scale and offset!!!
+ for(int iband=0;iband<nband;++iband){
+ assert(trainingPixels[iclass].size()>isample);
+ assert(trainingPixels[iclass][isample].size()>iband+startBand);
+ assert(offset.size()>iband);
+ assert(scale.size()>iband);
+ float value=trainingPixels[iclass][isample][iband+startBand];
+ trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+ }
+ }
+ assert(trainingFeatures[iclass].size()==nctraining);
+ }
+
+ unsigned int ntraining=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ std::cout << "..." << iclass << "..." << std::endl;
+ if(verbose_opt[0]>=1)
+ std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
+ ntraining+=trainingFeatures[iclass].size();
+ }
+
+ int nFeatures=trainingFeatures[0][0].size();
+ int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : nFeatures;
+ list<int> subset;//set of selected features (levels) for each class combination
+ double cost=0;
+ FeatureSelector selector;
+ if(maxFeatures==nFeatures){
+ subset.clear();
+ for(int ifeature=0;ifeature<nFeatures;++ifeature)
+ subset.push_back(ifeature);
+ // cost=getCost(trainingFeatures[iclass1],trainingFeatures[iclass2]);
+ }
+ else{
+ try{
+ //test
+ std::cout << "debug3" << std::endl;
+ cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,(verbose_opt[0]>0) ? true:false);
+ }
+ catch(...){
+ std::cout << "catched feature selection" << std::endl;
+ exit(1);
+ }
+ }
+ if(verbose_opt[0]){
+ cout <<"cost: " << cost << endl;
+ cout << "levels: ";
+ for(list<int>::const_iterator lit=subset.begin();lit!=subset.end();++lit){
+ cout << *lit;
+ if((*(lit))!=subset.back())
+ cout <<",";
+ else
+ cout << endl;
+ }
+ }
+
+ // *NOTE* Because svm_model contains pointers to svm_problem, you can
+ // not free the memory used by svm_problem if you are still using the
+ // svm_model produced by svm_train().
+
+ // free(prob.y);
+ // free(prob.x);
+ // free(x_space);
+ // svm_destroy_param(¶m);
+ return 0;
+}
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git
More information about the Pkg-grass-devel
mailing list