[pktools] 91/375: FeatureSelector.h: introduced epsilon to exclude rat race in sffs
Bas Couwenberg
sebastic at xs4all.nl
Wed Dec 3 21:54:03 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch upstream-master
in repository pktools.
commit cd9c58179294f0215807efe33a3cc47cb0e81205
Author: user <user at osgeolive.(none)>
Date: Sun Apr 21 22:35:45 2013 +0200
FeatureSelector.h: introduced epsilon to exclude rat race in sffs
---
src/algorithms/FeatureSelector.h | 3 +-
src/apps/pkclassify_svm.cc | 14 +--
src/apps/pkfs_svm.cc | 189 +++++++++++++++++++++++-------
src/apps/pkopt_svm.cc | 242 +++++++++++++++++++++++----------------
4 files changed, 296 insertions(+), 152 deletions(-)
diff --git a/src/algorithms/FeatureSelector.h b/src/algorithms/FeatureSelector.h
index fcd3f15..51f5f6e 100644
--- a/src/algorithms/FeatureSelector.h
+++ b/src/algorithms/FeatureSelector.h
@@ -152,6 +152,7 @@ template<class T> double FeatureSelector::backward(vector< Vector2d<T> >& v, dou
//floating search
template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, double (*getCost)(const vector< Vector2d<T> >&), list<int>& subset, int maxFeatures, short verbose){
+ double epsilon=0.001;
vector<T> cost;
int maxLevels=v[0][0].size();
if(maxFeatures<1)
@@ -183,7 +184,7 @@ template<class T> double FeatureSelector::floating(vector< Vector2d<T> >& v, dou
while(k>1){
int x_r;
double cost_r=removeFeature(v,*getCost,subset,x_r,verbose);
- if(cost_r>cost[k-1]){
+ if(cost_r>cost[k-1]+epsilon){
--k;
cost[k]=cost_r;
cost.pop_back();
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 6f4c36c..b4f360f 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -182,18 +182,16 @@ int main(int argc, char *argv[])
exit(0);//help was invoked, stop processing
}
+ assert(training_opt.size());
+
if(verbose_opt[0]>=1){
if(input_opt.size())
- std::cout << "image filename: " << input_opt[0] << std::endl;
+ std::cout << "input filename: " << input_opt[0] << std::endl;
if(mask_opt.size())
std::cout << "mask filename: " << mask_opt[0] << std::endl;
- if(training_opt[0].size()){
- std::cout << "training shape file: " << std::endl;
- for(int ifile=0;ifile<training_opt.size();++ifile)
- std::cout << training_opt[ifile] << std::endl;
- }
- else
- cerr << "no training file set!" << std::endl;
+ std::cout << "training shape file: " << std::endl;
+ for(int ifile=0;ifile<training_opt.size();++ifile)
+ std::cout << training_opt[ifile] << std::endl;
std::cout << "verbose: " << verbose_opt[0] << std::endl;
}
unsigned short nbag=(training_opt.size()>1)?training_opt.size():bag_opt[0];
diff --git a/src/apps/pkfs_svm.cc b/src/apps/pkfs_svm.cc
index d4d125c..37f531e 100644
--- a/src/apps/pkfs_svm.cc
+++ b/src/apps/pkfs_svm.cc
@@ -34,6 +34,8 @@ enum SelectorValue { NA=0, SFFS=1, SFS=2, SBS=3, BFS=4 };
//global parameters used in cost function getCost
map<string,short> classValueMap;
vector<std::string> nameVector;
+vector<unsigned int> nctraining;
+vector<unsigned int> nctest;
Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR, 4: nu-SVR)",0);
Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*(u-v)^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
@@ -57,8 +59,16 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
{
unsigned short nclass=trainingFeatures.size();
unsigned int ntraining=0;
- for(int iclass=0;iclass<nclass;++iclass)
- ntraining+=trainingFeatures[iclass].size();
+ unsigned int ntest=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ // ntraining+=trainingFeatures[iclass].size();
+ ntraining+=nctraining[iclass];
+ ntest+=nctest[iclass];
+ }
+ if(ntest)
+ assert(!cv_opt[0]);
+ if(!cv_opt[0])
+ assert(ntest);
unsigned short nFeatures=trainingFeatures[0][0].size();
struct svm_parameter param;
@@ -89,7 +99,8 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
unsigned long int spaceIndex=0;
int lIndex=0;
for(int iclass=0;iclass<nclass;++iclass){
- for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
+ // for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
+ for(int isample=0;isample<nctraining[iclass];++isample){
prob.x[lIndex]=&(x_space[spaceIndex]);
for(int ifeature=0;ifeature<nFeatures;++ifeature){
x_space[spaceIndex].index=ifeature+1;
@@ -115,6 +126,8 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
if(cv_opt[0]>0){
//todo: distinct between independent test input and cross validation
}
+ else{
+ }
ConfusionMatrix cm;
//set names in confusion matrix using nameVector
for(int iname=0;iname<nameVector.size();++iname){
@@ -123,19 +136,46 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
else if(cm.getClassIndex(type2string<short>(classValueMap[nameVector[iname]]))<0)
cm.pushBackClassName(type2string<short>(classValueMap[nameVector[iname]]));
}
-
- double *target = Malloc(double,prob.l);
- svm_cross_validation(&prob,¶m,cv_opt[0],target);
- assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression
- int total_correct=0;
- for(int i=0;i<prob.l;i++){
- string refClassName=nameVector[prob.y[i]];
- string className=nameVector[target[i]];
- if(classValueMap.size())
- cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
- else
- cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+ if(cv_opt[0]>0){
+ double *target = Malloc(double,prob.l);
+ svm_cross_validation(&prob,¶m,cv_opt[0],target);
+ assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression
+ int total_correct=0;
+ for(int i=0;i<prob.l;i++){
+ string refClassName=nameVector[prob.y[i]];
+ string className=nameVector[target[i]];
+ if(classValueMap.size())
+ cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+ else
+ cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+ }
+ free(target);
}
+ else{
+ struct svm_node *x_test;
+ x_test = Malloc(struct svm_node,(nFeatures+1));
+ for(int iclass=0;iclass<nclass;++iclass){
+ for(int isample=0;isample<nctest[iclass];++isample){
+ for(int ifeature=0;ifeature<nFeatures;++ifeature){
+ x_test[ifeature].index=ifeature+1;
+ x_test[ifeature].value=trainingFeatures[iclass][nctraining[iclass]+isample][ifeature];
+ }
+ x_test[nFeatures].index=-1;
+ double predict_label=0;
+ //todo: make distinction between svm_predict and svm_predict_probability?
+ predict_label = svm_predict(svm,x_test);
+ string refClassName=nameVector[iclass];
+ string className=nameVector[static_cast<short>(predict_label)];
+ if(classValueMap.size())
+ cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+ else
+ cm.incrementResult(refClassName,className,1.0);
+ }
+ }
+ free(x_test);
+ }
+ if(verbose_opt[0]>1)
+ std::cout << cm << std::endl;
assert(cm.nReference());
// if(verbose_opt[0])
// std::cout << cm << std::endl;
@@ -149,7 +189,6 @@ double getCost(const vector<Vector2d<float> > &trainingFeatures)
// not free the memory used by svm_problem if you are still using the
// svm_model produced by svm_train().
// however, we will re-train the svm later on after the feature selection
- free(target);
free(prob.y);
free(prob.x);
free(x_space);
@@ -162,6 +201,7 @@ int main(int argc, char *argv[])
// vector<double> priors;
//--------------------------- command line options ------------------------------------
+ Optionpk<string> input_opt("i", "input", "input image");
Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option).");
Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label");
Optionpk<unsigned short> maxFeatures_opt("n", "nf", "number of features to select (0 to select optimal number, see also ecost option)", 0);
@@ -178,7 +218,8 @@ int main(int argc, char *argv[])
bool doProcess;//stop process when program was invoked with help option (-h --help)
try{
- doProcess=training_opt.retrieveOption(argc,argv);
+ doProcess=input_opt.retrieveOption(argc,argv);
+ training_opt.retrieveOption(argc,argv);
maxFeatures_opt.retrieveOption(argc,argv);
label_opt.retrieveOption(argc,argv);
// reclass_opt.retrieveOption(argc,argv);
@@ -218,6 +259,19 @@ int main(int argc, char *argv[])
exit(0);//help was invoked, stop processing
}
+ assert(training_opt.size());
+ if(input_opt.size())
+ cv_opt[0]=0;
+
+ if(verbose_opt[0]>=1){
+ if(input_opt.size())
+ std::cout << "input filename: " << input_opt[0] << std::endl;
+ std::cout << "training shape file: " << std::endl;
+ for(int ifile=0;ifile<training_opt.size();++ifile)
+ std::cout << training_opt[ifile] << std::endl;
+ std::cout << "verbose: " << verbose_opt[0] << std::endl;
+ }
+
static std::map<std::string, SelectorValue> selMap;
//initialize selMap
selMap["sffs"]=SFFS;
@@ -225,13 +279,8 @@ int main(int argc, char *argv[])
selMap["sbs"]=SBS;
selMap["bfs"]=BFS;
- assert(training_opt[0].size());
- if(verbose_opt[0]>=1)
- std::cout << "training shape file: " << training_opt[0] << std::endl;
-
unsigned int totalSamples=0;
- // int nreclass=0;
- // vector<int> vcode;//unique class codes in recode string
+ unsigned int totalTestSamples=0;
unsigned short nclass=0;
int nband=0;
@@ -240,6 +289,7 @@ int main(int argc, char *argv[])
vector<double> offset;
vector<double> scale;
vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+ vector< Vector2d<float> > testPixels;//[class][sample][band]
// if(priors_opt.size()>1){//priors from argument list
// priors.resize(priors_opt.size());
@@ -269,16 +319,28 @@ int main(int argc, char *argv[])
vector<string> fields;
//organize training data
trainingPixels.clear();
+ testPixels.clear();
map<string,Vector2d<float> > trainingMap;
+ map<string,Vector2d<float> > testMap;
if(verbose_opt[0]>=1)
- std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+ std::cout << "reading training file " << training_opt[0] << std::endl;
try{
- if(band_opt.size())
+ if(band_opt.size()){
totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,band_opt,label_opt[0],verbose_opt[0]);
- else
+ if(input_opt.size())
+ totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,band_opt,label_opt[0],verbose_opt[0]);
+ }
+ else{
totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+ if(input_opt.size())
+ totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+ }
if(trainingMap.size()<2){
- string errorstring="Error: could not read at least two classes from training file";
+ string errorstring="Error: could not read at least two classes from training input file";
+ throw(errorstring);
+ }
+ if(input_opt.size()&&testMap.size()<2){
+ string errorstring="Error: could not read at least two classes from test input file";
throw(errorstring);
}
}
@@ -327,7 +389,30 @@ int main(int argc, char *argv[])
if(classname_opt.size())
assert(nclass==classname_opt.size());
nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
- // assert(reclassMap.size()==nclass);
+
+ mapit=testMap.begin();
+ while(mapit!=testMap.end()){
+ if(classValueMap.size()){
+ //check if name in test is covered by classname_opt (values can not be 0)
+ if(classValueMap[mapit->first]>0){
+ ;//ok, no need to print to std::cout
+ }
+ else{
+ std::cerr << "Error: names in classname option are not complete, please check names in test vector and make sure classvalue is > 0" << std::endl;
+ exit(1);
+ }
+ }
+ //no need to delete small classes for test sample
+ testPixels.push_back(mapit->second);
+ if(verbose_opt[0]>1)
+ std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+ ++mapit;
+ }
+ if(input_opt.size()){
+ assert(nclass==testPixels.size());
+ assert(nband=testPixels[0][0].size()-2);//X and Y//testPixels[0][0].size();
+ assert(!cv_opt[0]);
+ }
//do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
//balance training data
@@ -414,21 +499,27 @@ int main(int argc, char *argv[])
// }
- //Calculate features of trainig set
+ //Calculate features of training (and test) set
+ nctraining.resize(nclass);
+ nctest.resize(nclass);
vector< Vector2d<float> > trainingFeatures(nclass);
for(int iclass=0;iclass<nclass;++iclass){
- int nctraining=0;
if(verbose_opt[0]>=1)
std::cout << "calculating features for class " << iclass << std::endl;
- if(random)
- srand(time(NULL));
- nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+ nctraining[iclass]=trainingPixels[iclass].size();
if(verbose_opt[0]>=1)
- std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
- int index=0;
-
- trainingFeatures[iclass].resize(nctraining);
- for(int isample=0;isample<nctraining;++isample){
+ std::cout << "nctraining[" << iclass << "]: " << nctraining[iclass] << std::endl;
+ if(testPixels.size()>iclass){
+ nctest[iclass]=testPixels[iclass].size();
+ if(verbose_opt[0]>=1){
+ std::cout << "nctest[" << iclass << "]: " << nctest[iclass] << std::endl;
+ }
+ }
+ else
+ nctest[iclass]=0;
+ // trainingFeatures[iclass].resize(nctraining);
+ trainingFeatures[iclass].resize(nctraining[iclass]+nctest[iclass]);
+ for(int isample=0;isample<nctraining[iclass];++isample){
//scale pixel values according to scale and offset!!!
for(int iband=0;iband<nband;++iband){
assert(trainingPixels[iclass].size()>isample);
@@ -439,13 +530,22 @@ int main(int argc, char *argv[])
trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
}
}
- assert(trainingFeatures[iclass].size()==nctraining);
+ // assert(trainingFeatures[iclass].size()==nctraining[iclass]);
+ for(int isample=0;isample<nctest[iclass];++isample){
+ //scale pixel values according to scale and offset!!!
+ for(int iband=0;iband<nband;++iband){
+ assert(testPixels[iclass].size()>isample);
+ assert(testPixels[iclass][isample].size()>iband+startBand);
+ assert(offset.size()>iband);
+ assert(scale.size()>iband);
+ float value=testPixels[iclass][isample][iband+startBand];
+ // testFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+ trainingFeatures[iclass][nctraining[iclass]+isample].push_back((value-offset[iband])/scale[iband]);
+ }
+ }
+ assert(trainingFeatures[iclass].size()==nctraining[iclass]+nctest[iclass]);
}
- unsigned int ntraining=0;
- for(int iclass=0;iclass<nclass;++iclass)
- ntraining+=trainingFeatures[iclass].size();
-
int nFeatures=trainingFeatures[0][0].size();
int maxFeatures=(maxFeatures_opt[0])? maxFeatures_opt[0] : 1;
double previousCost=-1;
@@ -465,7 +565,8 @@ int main(int argc, char *argv[])
switch(selMap[selector_opt[0]]){
case(SFFS):
subset.clear();//needed to clear in case of floating and brute force search
- cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
+ //test
+ cost=selector.floating(trainingFeatures,&getCost,subset,maxFeatures,2);//verbose_opt[0]);
break;
case(SFS):
cost=selector.forward(trainingFeatures,&getCost,subset,maxFeatures,verbose_opt[0]);
diff --git a/src/apps/pkopt_svm.cc b/src/apps/pkopt_svm.cc
index a6b95c0..121b7a9 100644
--- a/src/apps/pkopt_svm.cc
+++ b/src/apps/pkopt_svm.cc
@@ -38,6 +38,8 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
//global parameters used in objective function
map<string,short> classValueMap;
vector<std::string> nameVector;
+vector<unsigned int> nctraining;
+vector<unsigned int> nctest;
Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR, 4: nu-SVR)",0);
Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*(u-v)^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
@@ -66,8 +68,16 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
//todo: calculate kappa using cross validation
unsigned short nclass=tf->size();
unsigned int ntraining=0;
- for(int iclass=0;iclass<nclass;++iclass)
- ntraining+=(*tf)[iclass].size();
+ unsigned int ntest=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ ntraining+=nctraining[iclass];
+ ntest+=nctest[iclass];
+ }
+ if(ntest)
+ assert(!cv_opt[0]);
+ if(!cv_opt[0])
+ assert(ntest);
+ // ntraining+=(*tf)[iclass].size();
unsigned short nFeatures=(*tf)[0][0].size();
struct svm_parameter param;
param.svm_type = svm_type_opt[0];
@@ -96,7 +106,8 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
unsigned long int spaceIndex=0;
int lIndex=0;
for(int iclass=0;iclass<nclass;++iclass){
- for(int isample=0;isample<(*tf)[iclass].size();++isample){
+ // for(int isample=0;isample<(*tf)[iclass].size();++isample){
+ for(int isample=0;isample<nctraining[iclass];++isample){
prob.x[lIndex]=&(x_space[spaceIndex]);
for(int ifeature=0;ifeature<nFeatures;++ifeature){
x_space[spaceIndex].index=ifeature+1;
@@ -119,9 +130,6 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
if(verbose_opt[0]>2)
std::cout << "SVM is now trained" << std::endl;
- if(cv_opt[0]>0){
- //todo: distinct between independent test input and cross validation
- }
ConfusionMatrix cm;
//set names in confusion matrix using nameVector
for(int iname=0;iname<nameVector.size();++iname){
@@ -130,23 +138,46 @@ double objFunction(const std::vector<double> &x, std::vector<double> &grad, void
else if(cm.getClassIndex(type2string<short>(classValueMap[nameVector[iname]]))<0)
cm.pushBackClassName(type2string<short>(classValueMap[nameVector[iname]]));
}
-
- double *target = Malloc(double,prob.l);
- svm_cross_validation(&prob,¶m,cv_opt[0],target);
- assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression
- int total_correct=0;
- for(int i=0;i<prob.l;i++){
- string refClassName=nameVector[prob.y[i]];
- string className=nameVector[target[i]];
- if(classValueMap.size())
- cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
- else
- cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+ if(cv_opt[0]>0){
+ double *target = Malloc(double,prob.l);
+ svm_cross_validation(&prob,¶m,cv_opt[0],target);
+ assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression
+ for(int i=0;i<prob.l;i++){
+ string refClassName=nameVector[prob.y[i]];
+ string className=nameVector[target[i]];
+ if(classValueMap.size())
+ cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+ else
+ cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1.0);
+ }
+ free(target);
}
- if(verbose_opt[0])
+ else{
+ struct svm_node *x_test;
+ x_test = Malloc(struct svm_node,(nFeatures+1));
+ for(int iclass=0;iclass<nclass;++iclass){
+ for(int isample=0;isample<nctest[iclass];++isample){
+ for(int ifeature=0;ifeature<nFeatures;++ifeature){
+ x_test[ifeature].index=ifeature+1;
+ x_test[ifeature].value=(*tf)[iclass][nctraining[iclass]+isample][ifeature];
+ }
+ x_test[nFeatures].index=-1;
+ double predict_label=0;
+ //todo: make distinction between svm_predict and svm_predict_probability?
+ predict_label = svm_predict(svm,x_test);
+ string refClassName=nameVector[iclass];
+ string className=nameVector[static_cast<short>(predict_label)];
+ if(classValueMap.size())
+ cm.incrementResult(type2string<short>(classValueMap[refClassName]),type2string<short>(classValueMap[className]),1.0);
+ else
+ cm.incrementResult(refClassName,className,1.0);
+ }
+ }
+ free(x_test);
+ }
+ if(verbose_opt[0]>1)
std::cout << cm << std::endl;
assert(cm.nReference());
- free(target);
free(prob.y);
free(prob.x);
free(x_space);
@@ -172,6 +203,7 @@ int main(int argc, char *argv[])
{
map<short,int> reclassMap;
vector<int> vreclass;
+ Optionpk<string> input_opt("i", "input", "input image");
Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option).");
Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label");
// Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
@@ -190,7 +222,8 @@ int main(int argc, char *argv[])
bool doProcess;//stop process when program was invoked with help option (-h --help)
try{
- doProcess=training_opt.retrieveOption(argc,argv);
+ doProcess=input_opt.retrieveOption(argc,argv);
+ training_opt.retrieveOption(argc,argv);
label_opt.retrieveOption(argc,argv);
// reclass_opt.retrieveOption(argc,argv);
balance_opt.retrieveOption(argc,argv);
@@ -230,13 +263,21 @@ int main(int argc, char *argv[])
exit(0);//help was invoked, stop processing
}
- assert(training_opt[0].size());
- if(verbose_opt[0]>=1)
- std::cout << "training shape file: " << training_opt[0] << std::endl;
+ assert(training_opt.size());
+ if(input_opt.size())
+ cv_opt[0]=0;
+
+ if(verbose_opt[0]>=1){
+ if(input_opt.size())
+ std::cout << "input filename: " << input_opt[0] << std::endl;
+ std::cout << "training shape file: " << std::endl;
+ for(int ifile=0;ifile<training_opt.size();++ifile)
+ std::cout << training_opt[ifile] << std::endl;
+ std::cout << "verbose: " << verbose_opt[0] << std::endl;
+ }
unsigned int totalSamples=0;
- // int nreclass=0;
- // vector<int> vcode;//unique class codes in recode string
+ unsigned int totalTestSamples=0;
unsigned short nclass=0;
int nband=0;
@@ -245,6 +286,7 @@ int main(int argc, char *argv[])
vector<double> offset;
vector<double> scale;
vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+ vector< Vector2d<float> > testPixels;//[class][sample][band]
// if(priors_opt.size()>1){//priors from argument list
// priors.resize(priors_opt.size());
@@ -274,16 +316,28 @@ int main(int argc, char *argv[])
vector<string> fields;
//organize training data
trainingPixels.clear();
+ testPixels.clear();
map<string,Vector2d<float> > trainingMap;
+ map<string,Vector2d<float> > testMap;
if(verbose_opt[0]>=1)
- std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+ std::cout << "reading training file " << training_opt[0] << std::endl;
try{
- if(band_opt.size())
+ if(band_opt.size()){
totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,band_opt,label_opt[0],verbose_opt[0]);
- else
+ if(input_opt.size())
+ totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,band_opt,label_opt[0],verbose_opt[0]);
+ }
+ else{
totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+ if(input_opt.size())
+ totalTestSamples=readDataImageShape(input_opt[0],testMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+ }
if(trainingMap.size()<2){
- string errorstring="Error: could not read at least two classes from training file";
+ string errorstring="Error: could not read at least two classes from training input file";
+ throw(errorstring);
+ }
+ if(input_opt.size()&&testMap.size()<2){
+ string errorstring="Error: could not read at least two classes from test input file";
throw(errorstring);
}
}
@@ -303,7 +357,8 @@ int main(int argc, char *argv[])
if(verbose_opt[0]>1)
std::cout << "training pixels: " << std::endl;
- map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ map<string,Vector2d<float> >::iterator mapit;
+ mapit=trainingMap.begin();
while(mapit!=trainingMap.end()){
if(classValueMap.size()){
//check if name in training is covered by classname_opt (values can not be 0)
@@ -334,6 +389,29 @@ int main(int argc, char *argv[])
assert(nclass==classname_opt.size());
nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
+ mapit=testMap.begin();
+ while(mapit!=testMap.end()){
+ if(classValueMap.size()){
+ //check if name in test is covered by classname_opt (values can not be 0)
+ if(classValueMap[mapit->first]>0){
+ ;//ok, no need to print to std::cout
+ }
+ else{
+ std::cerr << "Error: names in classname option are not complete, please check names in test vector and make sure classvalue is > 0" << std::endl;
+ exit(1);
+ }
+ }
+ //no need to delete small classes for test sample
+ testPixels.push_back(mapit->second);
+ if(verbose_opt[0]>1)
+ std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+ ++mapit;
+ }
+ if(input_opt.size()){
+ assert(nclass==testPixels.size());
+ assert(nband=testPixels[0][0].size()-2);//X and Y//testPixels[0][0].size();
+ assert(!cv_opt[0]);
+ }
//do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
//balance training data
@@ -359,7 +437,8 @@ int main(int argc, char *argv[])
}
assert(totalSamples==nclass*balance_opt[0]);
}
-
+
+ //no need to balance test sample
//set scale and offset
offset.resize(nband);
scale.resize(nband);
@@ -394,57 +473,6 @@ int main(int argc, char *argv[])
}
}
- //recode vreclass to ordered vector, starting from 0 to nreclass
- // vcode.clear();
- // if(verbose_opt[0]>=1){
- // std::cout << "before recoding: " << std::endl;
- // for(int iclass = 0; iclass < vreclass.size(); iclass++)
- // std::cout << " " << vreclass[iclass];
- // std::cout << std::endl;
- // }
- // vector<int> vord=vreclass;//ordered vector, starting from 0 to nreclass
- // map<short,int> mreclass;
- // for(int ic=0;ic<vreclass.size();++ic){
- // if(mreclass.find(vreclass[ic])==mreclass.end())
- // mreclass[vreclass[ic]]=iclass++;
- // }
- // for(int ic=0;ic<vreclass.size();++ic)
- // vord[ic]=mreclass[vreclass[ic]];
- // //construct uniqe class codes
- // while(!vreclass.empty()){
- // vcode.push_back(*(vreclass.begin()));
- // //delete all these entries from vreclass
- // vector<int>::iterator vit;
- // while((vit=find(vreclass.begin(),vreclass.end(),vcode.back()))!=vreclass.end())
- // vreclass.erase(vit);
- // }
- // if(verbose_opt[0]>=1){
- // std::cout << "recode values: " << std::endl;
- // for(int icode=0;icode<vcode.size();++icode)
- // std::cout << vcode[icode] << " ";
- // std::cout << std::endl;
- // }
- // vreclass=vord;
- // if(verbose_opt[0]>=1){
- // std::cout << "after recoding: " << std::endl;
- // for(int iclass = 0; iclass < vord.size(); iclass++)
- // std::cout << " " << vord[iclass];
- // std::cout << std::endl;
- // }
-
- // vector<int> vuniqueclass=vreclass;
- // //remove duplicate elements from vuniqueclass
- // sort( vuniqueclass.begin(), vuniqueclass.end() );
- // vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
- // nreclass=vuniqueclass.size();
- // if(verbose_opt[0]>=1){
- // std::cout << "unique classes: " << std::endl;
- // for(int iclass = 0; iclass < vuniqueclass.size(); iclass++)
- // std::cout << " " << vuniqueclass[iclass];
- // std::cout << std::endl;
- // std::cout << "number of reclasses: " << nreclass << std::endl;
- // }
-
// if(priors_opt.size()==1){//default: equal priors for each class
// priors.resize(nclass);
// for(int iclass=0;iclass<nclass;++iclass)
@@ -461,21 +489,27 @@ int main(int argc, char *argv[])
// std::cout << std::endl;
}
- //Calculate features of trainig set
+ //Calculate features of training (and test) set
+ nctraining.resize(nclass);
+ nctest.resize(nclass);
vector< Vector2d<float> > trainingFeatures(nclass);
for(int iclass=0;iclass<nclass;++iclass){
- int nctraining=0;
if(verbose_opt[0]>=1)
std::cout << "calculating features for class " << iclass << std::endl;
- if(random)
- srand(time(NULL));
- nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+ nctraining[iclass]=trainingPixels[iclass].size();
if(verbose_opt[0]>=1)
- std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
- int index=0;
-
- trainingFeatures[iclass].resize(nctraining);
- for(int isample=0;isample<nctraining;++isample){
+ std::cout << "nctraining[" << iclass << "]: " << nctraining[iclass] << std::endl;
+ if(testPixels.size()>iclass){
+ nctest[iclass]=testPixels[iclass].size();
+ if(verbose_opt[0]>=1){
+ std::cout << "nctest[" << iclass << "]: " << nctest[iclass] << std::endl;
+ }
+ }
+ else
+ nctest[iclass]=0;
+ // trainingFeatures[iclass].resize(nctraining[iclass]);
+ trainingFeatures[iclass].resize(nctraining[iclass]+nctest[iclass]);
+ for(int isample=0;isample<nctraining[iclass];++isample){
//scale pixel values according to scale and offset!!!
for(int iband=0;iband<nband;++iband){
assert(trainingPixels[iclass].size()>isample);
@@ -486,12 +520,21 @@ int main(int argc, char *argv[])
trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
}
}
- assert(trainingFeatures[iclass].size()==nctraining);
+ // assert(trainingFeatures[iclass].size()==nctraining[iclass]);
+ for(int isample=0;isample<nctest[iclass];++isample){
+ //scale pixel values according to scale and offset!!!
+ for(int iband=0;iband<nband;++iband){
+ assert(testPixels[iclass].size()>isample);
+ assert(testPixels[iclass][isample].size()>iband+startBand);
+ assert(offset.size()>iband);
+ assert(scale.size()>iband);
+ float value=testPixels[iclass][isample][iband+startBand];
+ // testFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+ trainingFeatures[iclass][nctraining[iclass]+isample].push_back((value-offset[iband])/scale[iband]);
+ }
+ }
+ assert(trainingFeatures[iclass].size()==nctraining[iclass]+nctest[iclass]);
}
-
- unsigned int ntraining=0;
- for(int iclass=0;iclass<nclass;++iclass)
- ntraining+=trainingFeatures[iclass].size();
assert(ccost_opt.size()>1);//must have boundaries at least (initial value is optional)
if(ccost_opt.size()<3)//create initial value
@@ -525,7 +568,8 @@ int main(int argc, char *argv[])
x[0]=ccost;
x[1]=gamma;
std::vector<double> theGrad;
- double error=objFunction(x,theGrad,&trainingFeatures);
+ double error=0;
+ error=objFunction(x,theGrad,&trainingFeatures);
if(error<minError){
minError=error;
minCost=ccost;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git
More information about the Pkg-grass-devel
mailing list