[pktools] 35/375: string map in pkclassify_nn.c and pkclassify_svm.cc
Bas Couwenberg
sebastic at xs4all.nl
Wed Dec 3 21:53:56 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch upstream-master
in repository pktools.
commit f1c59c74ae910460ee382eb9406f2a2aad0ba353
Author: Pieter Kempeneers <kempenep at gmail.com>
Date: Fri Jan 18 17:28:27 2013 +0100
string map in pkclassify_nn.c and pkclassify_svm.cc
---
src/algorithms/ConfusionMatrix.h | 1 +
src/algorithms/myfann_cpp.h | 11 ++-
src/apps/pkclassify_nn.cc | 96 +++++++++++++++++--------
src/apps/pkclassify_nn.h | 151 +++++++++++++++++++++++++++++++++++++++
src/apps/pkclassify_svm.cc | 138 ++++++++++++++++++++++-------------
src/imageclasses/ImgReaderOgr.h | 136 +++++++++++++++++++++++++++++++++++
6 files changed, 455 insertions(+), 78 deletions(-)
diff --git a/src/algorithms/ConfusionMatrix.h b/src/algorithms/ConfusionMatrix.h
index 33cf04f..b5481fb 100644
--- a/src/algorithms/ConfusionMatrix.h
+++ b/src/algorithms/ConfusionMatrix.h
@@ -33,6 +33,7 @@ public:
ConfusionMatrix(const vector<string>& classNames);
ConfusionMatrix(const ConfusionMatrix& cm);
ConfusionMatrix& operator=(const ConfusionMatrix& cm);
+ short size() const {return m_results.size();};
void resize(short nclass);
void setClassNames(const vector<string>& classNames);
void pushBackClassName(const string& className);
diff --git a/src/algorithms/myfann_cpp.h b/src/algorithms/myfann_cpp.h
index 6aba87f..4901cfa 100644
--- a/src/algorithms/myfann_cpp.h
+++ b/src/algorithms/myfann_cpp.h
@@ -1535,6 +1535,10 @@ public:
trainingFeatures[iclass].push_back(testFeatures[iclass].back());
testFeatures[iclass].pop_back();
}
+ if(verbose>1){
+ std::cout << "training size " << iclass << ": " << trainingFeatures[iclass].size() << std::endl;
+ std::cout << "test size " << iclass << ": " << testFeatures[iclass].size() << std::endl;
+ }
assert(trainingFeatures[iclass].size());
}
//create test sample
@@ -1552,8 +1556,13 @@ public:
std::cout << "Error: testclass " << testclass << " has no training" << std::endl;
assert(trainingFeatures[testclass].size());
++nsample;
- if(static_cast<float>(trainingFeatures[testclass].size())/static_cast<float>(testFeatures[testclass].size())<=cv)
+ if(static_cast<float>(trainingFeatures[testclass].size())/static_cast<float>(testFeatures[testclass].size())<=(cv-1)){
+ if(verbose>1){
+ std::cout << "training size " << testclass << ": " << trainingFeatures[testclass].size() << std::endl;
+ std::cout << "test size " << testclass << ": " << testFeatures[testclass].size() << std::endl;
+ }
testclass=(testclass+1)%nclass;
+ }
}
assert(nsample==ntest);
//training with left out training set
diff --git a/src/apps/pkclassify_nn.cc b/src/apps/pkclassify_nn.cc
index 1c56084..c9f678a 100644
--- a/src/apps/pkclassify_nn.cc
+++ b/src/apps/pkclassify_nn.cc
@@ -100,17 +100,17 @@ int main(int argc, char *argv[])
Optionpk<bool> todo_opt("\0","todo","",false);
Optionpk<string> input_opt("i", "input", "input image");
Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
- Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file. (default is label)","label");
- Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.). Default is 0: do not reclass", 0);
- Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class (default 0: do not balance)", 0);
- Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (default is 0: consider all classes", 0);
+ Optionpk<string> label_opt("label", "label", "identifier for class label in training shape file.","label");
+ Optionpk<unsigned short> reclass_opt("rc", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.)");
+ Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0);
+ Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0);
Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0);
Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0);
Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (1: sum rule, 2: max rule).",1);
- Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 ), default set to equal priors)", 0.0);
+ Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0);
Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0);
Optionpk<unsigned int> nneuron_opt("\0", "nneuron", "number of neurons in hidden layers in neural network (multiple hidden layers are set by defining multiple number of neurons: -n 15 -n 1, default is one hidden layer with 5 neurons)", 5);
Optionpk<float> connection_opt("\0", "connection", "connection reate (default: 1.0 for a fully connected network)", 1.0);
@@ -186,7 +186,8 @@ int main(int argc, char *argv[])
}
if(verbose_opt[0]>=1){
- cout << "image filename: " << input_opt[0] << endl;
+ if(input_opt.size())
+ cout << "image filename: " << input_opt[0] << endl;
if(mask_opt.size())
cout << "mask filename: " << mask_opt[0] << endl;
if(training_opt.size()){
@@ -213,9 +214,10 @@ int main(int argc, char *argv[])
vector< vector<double> > offset(nbag);
vector< vector<double> > scale(nbag);
+ map<string,Vector2d<float> > trainingMap;
vector< Vector2d<float> > trainingPixels;//[class][sample][band]
- if(reclass_opt.size()>1){
+ if(reclass_opt.size()){
vreclass.resize(reclass_opt.size());
for(int iclass=0;iclass<reclass_opt.size();++iclass){
reclassMap[iclass]=reclass_opt[iclass];
@@ -242,8 +244,10 @@ int main(int argc, char *argv[])
for(int ibag=0;ibag<nbag;++ibag){
//organize training data
if(ibag<training_opt.size()){//if bag contains new training pixels
+ trainingMap.clear();
trainingPixels.clear();
- map<int,Vector2d<float> > trainingMap;
+ // map<int,Vector2d<float> > trainingMap;
+ // map<string,Vector2d<float> > trainingMap;
if(verbose_opt[0]>=1)
cout << "reading imageShape file " << training_opt[0] << endl;
try{
@@ -264,20 +268,21 @@ int main(int argc, char *argv[])
cerr << "error catched" << std::endl;
exit(1);
}
- //delete class 0
- if(verbose_opt[0]>=1)
- cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << endl;
- totalSamples-=trainingMap[0].size();
- trainingMap.erase(0);
+ //delete class 0 ?
+ // if(verbose_opt[0]>=1)
+ // std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+ // totalSamples-=trainingMap[0].size();
+ // trainingMap.erase(0);
//convert map to vector
short iclass=0;
- if(reclass_opt.size()==1){//no reclass option, read classes from shape
+ if(reclass_opt.empty()){//no reclass option, read classes from shape
reclassMap.clear();
vreclass.clear();
}
if(verbose_opt[0]>1)
- cout << "training pixels: " << endl;
- map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ std::cout << "training pixels: " << std::endl;
+ map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ // map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
while(mapit!=trainingMap.end()){
// for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
//delete small classes
@@ -286,13 +291,14 @@ int main(int argc, char *argv[])
continue;
//todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
}
- if(reclass_opt.size()==1){//no reclass option, read classes from shape
- reclassMap[iclass]=(mapit->first);
- vreclass.push_back(mapit->first);
+ if(reclass_opt.empty()){//no reclass option, read classes from shape
+ // reclassMap[iclass]=(mapit->first);
+ // vreclass.push_back(mapit->first);
+ vreclass.push_back(iclass);
}
trainingPixels.push_back(mapit->second);
if(verbose_opt[0]>1)
- cout << mapit->first << ": " << (mapit->second).size() << " samples" << endl;
+ std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
++iclass;
++mapit;
}
@@ -304,7 +310,8 @@ int main(int argc, char *argv[])
assert(nclass==trainingPixels.size());
assert(nband==(training_opt.size())?trainingPixels[0][0].size()-2:trainingPixels[0][0].size());
}
- assert(reclassMap.size()==nclass);
+ // assert(reclassMap.size()==nclass);
+ assert(vreclass.size()==nclass);
//do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
//balance training data
@@ -483,8 +490,9 @@ int main(int argc, char *argv[])
unsigned int nFeatures=trainingFeatures[0][0].size();
unsigned int ntraining=0;
for(int iclass=0;iclass<nclass;++iclass){
- if(verbose_opt[0]>=1)
- cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << endl;
+ //vcode has size nreclass???
+ // if(verbose_opt[0]>=1)
+ // cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << endl;
ntraining+=trainingFeatures[iclass].size();
}
const unsigned int num_layers = nneuron_opt.size()+2;
@@ -554,17 +562,48 @@ int main(int argc, char *argv[])
referenceVector,
outputVector,
verbose_opt[0]);
- ConfusionMatrix cm(nclass);
+ ConfusionMatrix cm;
+ map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ if(reclass_opt.empty()){
+ while(mapit!=trainingMap.end()){
+ cm.pushBackClassName(mapit->first);
+ ++mapit;
+ }
+ }
+ else{
+ if(verbose_opt[0]>1)
+ std::cout << "classes for confusion matrix: " << std::endl;
+ for(int iclass=0;iclass<nreclass;++iclass){
+ ostringstream os;
+ os << vcode[iclass];
+ if(verbose_opt[0]>1)
+ std::cout << os.str() << " ";
+ cm.pushBackClassName(os.str());
+ }
+ if(verbose_opt[0]>1)
+ std::cout << std::endl;
+ }
+ assert(cm.size()==nreclass);
+
for(int isample=0;isample<referenceVector.size();++isample)
- cm.incrementResult(cm.getClass(referenceVector[isample]),cm.getClass(outputVector[isample]),1);
+ cm.incrementResult(cm.getClass(vreclass[referenceVector[isample]]),cm.getClass(vreclass[outputVector[isample]]),1);
assert(cm.nReference());
std::cout << cm << std::endl;
- std::cout << "Kappa: " << cm.kappa() << std::endl;
+ cout << "class #samples userAcc prodAcc" << endl;
+ double se95_ua=0;
+ double se95_pa=0;
double se95_oa=0;
+ double dua=0;
+ double dpa=0;
double doa=0;
+ for(int iclass=0;iclass<cm.nClasses();++iclass){
+ dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
+ dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
+ cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
+ }
+ std::cout << "Kappa: " << cm.kappa() << std::endl;
doa=cm.oa_pct(&se95_oa);
std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl;
- std::cout << "rmse cross-validation: " << rmse << std::endl;
}
if(verbose_opt[0]>=1)
@@ -602,7 +641,8 @@ int main(int argc, char *argv[])
}
//--------------------------------- end of training -----------------------------------
-
+ if(input_opt.empty())
+ exit(0);
//-------------------------------- open image file ------------------------------------
if(input_opt[0].find(".shp")==string::npos){
diff --git a/src/apps/pkclassify_nn.h b/src/apps/pkclassify_nn.h
index 583c982..c253e40 100644
--- a/src/apps/pkclassify_nn.h
+++ b/src/apps/pkclassify_nn.h
@@ -42,6 +42,13 @@ template<typename T> unsigned int readDataImageShape(const string &filename,
int verbose=false);
template<typename T> unsigned int readDataImageShape(const string &filename,
+ map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
+ vector<string>& fields,
+ const vector<short>& bands,
+ const string& label,
+ int verbose=false);
+
+template<typename T> unsigned int readDataImageShape(const string &filename,
map<int,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
vector<string>& fields,
const vector<short>& bands,
@@ -117,6 +124,82 @@ template<typename T> unsigned int readDataImageShape(const string &filename,
return totalSamples;
}
+template<typename T> unsigned int readDataImageShape(const string &filename,
+ map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
+ vector<string>& fields,
+ const vector<short>& bands,
+ const string& label,
+ int verbose)
+{
+ mapPixels.clear();
+ int nsample=0;
+ int totalSamples=0;
+ int nband=0;
+ if(verbose)
+ cout << "reading shape file " << filename << endl;
+ ImgReaderOgr imgReaderShape;
+ try{
+ imgReaderShape.open(filename);
+ //only retain bands in fields
+ imgReaderShape.getFields(fields);
+ vector<string>::iterator fit=fields.begin();
+ if(verbose>1)
+ cout << "reading fields: ";
+ while(fit!=fields.end()){
+ if(verbose)
+ cout << *fit << " ";
+ // size_t pos=(*fit).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ");
+ if(((*fit).substr(0,1)=="B")&&((*fit).substr(1).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ")!=string::npos)){
+ int theBand=atoi((*fit).substr(1).c_str());
+ if(bands.size()){
+ bool validBand=false;
+ for(int iband=0;iband<bands.size();++iband){
+ if(theBand==bands[iband])
+ validBand=true;
+ }
+ if(validBand)
+ ++fit;
+ else
+ fields.erase(fit);
+ }
+ else
+ ++fit;
+ }
+ else
+ fields.erase(fit);
+ }
+ if(verbose)
+ cout << endl;
+ if(verbose){
+ cout << "fields:";
+ for(vector<string>::iterator fit=fields.begin();fit!=fields.end();++fit)
+ cout << " " << *fit;
+ cout << endl;
+ }
+ if(!nband){
+ if(verbose)
+ cout << "reading data" << endl;
+ nband=imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,verbose==2);
+
+ }
+ else
+ assert(nband==imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,false));
+ }
+ catch(string e){
+ ostringstream estr;
+ estr << e << " " << filename;
+ throw(estr.str());
+ }
+ nsample=imgReaderShape.getFeatureCount();
+ totalSamples+=nsample;
+ if(verbose)
+ cout << ": " << nsample << " samples read with " << nband << " bands" << endl;
+ imgReaderShape.close();
+ if(verbose)
+ cout << "total number of samples read " << totalSamples << endl;
+ return totalSamples;
+}
+
template<typename T> unsigned int readDataImageShape(const string &filename,
map<int,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
@@ -185,4 +268,72 @@ template<typename T> unsigned int readDataImageShape(const string &filename,
cout << "total number of samples read " << totalSamples << endl;
return totalSamples;
}
+
+template<typename T> unsigned int readDataImageShape(const string &filename,
+ map<string,Vector2d<T> > &mapPixels, //[classNr][pixelNr][bandNr],
+ vector<string>& fields,
+ double start,
+ double end,
+ const string& label,
+ int verbose)
+{
+ mapPixels.clear();
+ int nsample=0;
+ int totalSamples=0;
+ int nband=0;
+ if(verbose)
+ cout << "reading shape file " << filename << endl;
+ ImgReaderOgr imgReaderShape;
+ try{
+ imgReaderShape.open(filename);
+ //only retain bands in fields
+ imgReaderShape.getFields(fields);
+ vector<string>::iterator fit=fields.begin();
+ if(verbose)
+ cout << "reading fields: ";
+ while(fit!=fields.end()){
+ if(verbose)
+ cout << *fit << " ";
+ // size_t pos=(*fit).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ");
+ if(((*fit).substr(0,1)=="B")&&((*fit).substr(1).find_first_not_of("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_ ")!=string::npos)){
+ int iband=atoi((*fit).substr(1).c_str());
+ if((start||end)&&(iband<start||iband>end))
+ fields.erase(fit);
+ else
+ ++fit;
+ }
+ else
+ fields.erase(fit);
+ }
+ if(verbose)
+ cout << endl;
+ if(verbose){
+ cout << "fields:";
+ for(vector<string>::iterator fit=fields.begin();fit!=fields.end();++fit)
+ cout << " " << *fit;
+ cout << endl;
+ }
+ if(!nband){
+ if(verbose)
+ cout << "reading data" << endl;
+ nband=imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,verbose==2);
+
+ }
+ else
+ assert(nband==imgReaderShape.readData(mapPixels,OFTReal,fields,label,0,true,false));
+ }
+ catch(string e){
+ ostringstream estr;
+ estr << e << " " << filename;
+ throw(estr.str());
+ }
+ nsample=imgReaderShape.getFeatureCount();
+ totalSamples+=nsample;
+ if(verbose)
+ cout << ": " << nsample << " samples read with " << nband << " bands" << endl;
+ imgReaderShape.close();
+ if(verbose)
+ cout << "total number of samples read " << totalSamples << endl;
+ return totalSamples;
+}
#endif //_PKCLASSIFY_NN_H_
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 9377fc6..2e36ce1 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -82,8 +82,9 @@ void reclass(const vector<double>& result, const vector<int>& vreclass, const ve
int main(int argc, char *argv[])
{
- map<short,int> reclassMap;
+ // map<short,int> reclassMap;
vector<int> vreclass; //vreclass: map nclass->nreclass
+ vector<int> vuniqueclass;
vector<double> priors;
vector<double> priorsReclass;
@@ -101,10 +102,10 @@ int main(int argc, char *argv[])
Optionpk<bool> todo_opt("\0","todo","",false);
Optionpk<string> input_opt("i", "input", "input image");
Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option). Use multiple training files for bootstrap aggregation (alternative to the bag and bsize options, where a random subset is taken from a single training file)");
- Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label");
- Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
- Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
- Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
+ Optionpk<string> label_opt("label", "label", "identifier for class label in training shape file.","label");
+ Optionpk<unsigned short> reclass_opt("rc", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.)");
+ Optionpk<unsigned int> balance_opt("bal", "balance", "balance the input data to this number of samples for each class", 0);
+ Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account (0: consider all classes)", 0);
Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0);
Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0);
Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
@@ -112,8 +113,7 @@ int main(int argc, char *argv[])
Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
Optionpk<unsigned short> aggreg_opt("a", "aggreg", "how to combine aggregated classifiers, see also rc option (0: no aggregation, 1: sum rule, 2: max rule).",0);
Optionpk<double> priors_opt("p", "prior", "prior probabilities for each class (e.g., -p 0.3 -p 0.3 -p 0.2 )", 0.0);
-
-
+ Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0);
Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR, 4: nu-SVR)",0);
Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
@@ -127,7 +127,6 @@ int main(int argc, char *argv[])
Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the shrinking heuristics",false);
Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR model for probability estimates",false);
// Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true);
- Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",0);
Optionpk<unsigned short> comb_opt("c", "comb", "how to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0);
Optionpk<unsigned short> bag_opt("\0", "bag", "Number of bootstrap aggregations", 1);
Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation", 100);
@@ -229,14 +228,10 @@ int main(int argc, char *argv[])
int nband=0;
int startBand=2;//first two bands represent X and Y pos
- vector< vector<double> > offset(nbag);
- vector< vector<double> > scale(nbag);
- vector< Vector2d<float> > trainingPixels;//[class][sample][band]
-
- if(reclass_opt.size()>1){
+ if(reclass_opt.size()){
vreclass.resize(reclass_opt.size());
for(int iclass=0;iclass<reclass_opt.size();++iclass){
- reclassMap[iclass]=reclass_opt[iclass];
+ // reclassMap[iclass]=reclass_opt[iclass];
vreclass[iclass]=reclass_opt[iclass];
}
}
@@ -258,15 +253,36 @@ int main(int argc, char *argv[])
std::sort(band_opt.begin(),band_opt.end());
//----------------------------------- Training -------------------------------
+ vector< vector<double> > offset(nbag);
+ vector< vector<double> > scale(nbag);
+ map<string,Vector2d<float> > trainingMap;
+ vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+
vector<struct svm_problem> prob(nbag);
vector<struct svm_node *> x_space(nbag);
+
+ //test
+ // ImgReaderOgr testOgr(training_opt[0]);
+ // OGRDataSource* testSource=testOgr.getDataSource();
+ // OGRLayer *poLayer=testSource->GetLayer(0);
+ // unsigned long int ifeature=0;
+ // if(poLayer!=NULL){
+ // OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();
+ // OGRFeature *poFeature;
+ // while( (poFeature = poLayer->GetNextFeature()) != NULL ){
+ // std::cout << "got feature " << ifeature << std::endl;
+ // ++ifeature;
+ // }
+ // exit(1);
+ // }
+
// struct svm_node *x_space;
vector<string> fields;
for(int ibag=0;ibag<nbag;++ibag){
//organize training data
if(ibag<training_opt.size()){//if bag contains new training pixels
+ trainingMap.clear();
trainingPixels.clear();
- map<int,Vector2d<float> > trainingMap;
if(verbose_opt[0]>=1)
std::cout << "reading imageShape file " << training_opt[0] << std::endl;
try{
@@ -287,20 +303,21 @@ int main(int argc, char *argv[])
cerr << "error catched" << std::endl;
exit(1);
}
- //delete class 0
- if(verbose_opt[0]>=1)
- std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
- totalSamples-=trainingMap[0].size();
- trainingMap.erase(0);
+ //todo: delete class 0 ?
+ // if(verbose_opt[0]>=1)
+ // std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+ // totalSamples-=trainingMap[0].size();
+ // trainingMap.erase(0);
+
//convert map to vector
short iclass=0;
- if(reclass_opt.size()==1){//no reclass option, read classes from shape
- reclassMap.clear();
+ if(reclass_opt.empty()){//no reclass option, read classes from shape
+ // reclassMap.clear();
vreclass.clear();
}
if(verbose_opt[0]>1)
std::cout << "training pixels: " << std::endl;
- map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
while(mapit!=trainingMap.end()){
// for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
//delete small classes
@@ -309,9 +326,10 @@ int main(int argc, char *argv[])
continue;
//todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
}
- if(reclass_opt.size()==1){//no reclass option, read classes from shape
- reclassMap[iclass]=(mapit->first);
- vreclass.push_back(mapit->first);
+ if(reclass_opt.empty()){//no reclass option, read classes from shape
+ // reclassMap[iclass]=(mapit->first);
+ // vreclass.push_back(mapit->first);
+ vreclass.push_back(iclass);
}
trainingPixels.push_back(mapit->second);
if(verbose_opt[0]>1)
@@ -327,7 +345,8 @@ int main(int argc, char *argv[])
assert(nclass==trainingPixels.size());
assert(nband==trainingPixels[0][0].size()-2);
}
- assert(reclassMap.size()==nclass);
+ // assert(reclassMap.size()==nclass);
+ assert(vreclass.size()==nclass);
//do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
//balance training data
@@ -438,7 +457,7 @@ int main(int argc, char *argv[])
std::cout << std::endl;
}
- vector<int> vuniqueclass=vreclass;
+ vuniqueclass=vreclass;
//remove duplicate elements from vuniqueclass
sort( vuniqueclass.begin(), vuniqueclass.end() );
vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
@@ -507,8 +526,9 @@ int main(int argc, char *argv[])
unsigned int nFeatures=trainingFeatures[0][0].size();
unsigned int ntraining=0;
for(int iclass=0;iclass<nclass;++iclass){
- if(verbose_opt[0]>=1)
- std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
+ //vcode has size nreclass???
+ // if(verbose_opt[0]>=1)
+ // std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
ntraining+=trainingFeatures[iclass].size();
}
// vector<struct svm_problem> prob(ibag);
@@ -521,14 +541,10 @@ int main(int argc, char *argv[])
int lIndex=0;
for(int iclass=0;iclass<nclass;++iclass){
for(int isample=0;isample<trainingFeatures[iclass].size();++isample){
- // //test
- // std::cout << iclass;
prob[ibag].x[lIndex]=&(x_space[ibag][spaceIndex]);
for(int ifeature=0;ifeature<nFeatures;++ifeature){
x_space[ibag][spaceIndex].index=ifeature+1;
x_space[ibag][spaceIndex].value=trainingFeatures[iclass][isample][ifeature];
- // //test
- // std::cout << " " << x_space[ibag][spaceIndex].index << ":" << x_space[ibag][spaceIndex].value;
++spaceIndex;
}
x_space[ibag][spaceIndex++].index=-1;
@@ -566,41 +582,65 @@ int main(int argc, char *argv[])
if(verbose_opt[0]>1)
std::cout << "SVM is now trained" << std::endl;
if(cv_opt[0]>0){
- std::cout << "Confusion matrix" << std::endl;
- ConfusionMatrix cm(nclass);
- // for(int iclass=0;iclass<nclass;++iclass)
- // cm.pushBackClassName(type2string(iclass));
+ //todo: implement reclassification
+ // ConfusionMatrix cm(nclass);
+ ConfusionMatrix cm;
+ map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ if(reclass_opt.empty()){
+ while(mapit!=trainingMap.end()){
+ cm.pushBackClassName(mapit->first);
+ ++mapit;
+ }
+ }
+ else{
+ if(verbose_opt[0]>1)
+ std::cout << "classes for confusion matrix: " << std::endl;
+ for(int iclass=0;iclass<nreclass;++iclass){
+ ostringstream os;
+ os << vcode[iclass];
+ if(verbose_opt[0]>1)
+ std::cout << os.str() << " ";
+ cm.pushBackClassName(os.str());
+ }
+ if(verbose_opt[0]>1)
+ std::cout << std::endl;
+ }
+ assert(cm.size()==nreclass);
+
double *target = Malloc(double,prob[ibag].l);
svm_cross_validation(&prob[ibag],¶m[ibag],cv_opt[0],target);
assert(param[ibag].svm_type != EPSILON_SVR&¶m[ibag].svm_type != NU_SVR);//only for regression
- int total_correct=0;
+
for(int i=0;i<prob[ibag].l;i++)
- cm.incrementResult(cm.getClass(prob[ibag].y[i]),cm.getClass(target[i]),1);
+ cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1);
assert(cm.nReference());
std::cout << cm << std::endl;
- std::cout << "Kappa: " << cm.kappa() << std::endl;
+ cout << "class #samples userAcc prodAcc" << endl;
+ double se95_ua=0;
+ double se95_pa=0;
double se95_oa=0;
+ double dua=0;
+ double dpa=0;
double doa=0;
+ for(int iclass=0;iclass<cm.nClasses();++iclass){
+ dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
+ dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
+ cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
+ }
+ std::cout << "Kappa: " << cm.kappa() << std::endl;
doa=cm.oa_pct(&se95_oa);
std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl;
free(target);
}
-
// *NOTE* Because svm_model contains pointers to svm_problem, you can
// not free the memory used by svm_problem if you are still using the
// svm_model produced by svm_train().
-
- // free(prob.y);
- // free(prob.x);
- // free(x_space);
- // svm_destroy_param(¶m);
}//for ibag
//--------------------------------- end of training -----------------------------------
- if(!output_opt.size())
+ if(input_opt.empty())
exit(0);
-
const char* pszMessage;
void* pProgressArg=NULL;
GDALProgressFunc pfnProgress=GDALTermProgress;
diff --git a/src/imageclasses/ImgReaderOgr.h b/src/imageclasses/ImgReaderOgr.h
index 5863420..2ead995 100644
--- a/src/imageclasses/ImgReaderOgr.h
+++ b/src/imageclasses/ImgReaderOgr.h
@@ -49,6 +49,7 @@ public:
template <typename T> int readData(vector<T>& data, const OGRFieldType& fieldType, const string& theField, int layer=0, bool verbose=false);
template <typename T> int readData(Vector2d<T>& data, const OGRFieldType& fieldType, vector<string>& fields, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data
template <typename T> int readData(map<int,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data
+ template <typename T> int readData(map<string,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer=0, bool pos=false, bool verbose=false);//default layer 0 and no pos information in data
void shape2ascii(ostream& theOstream, const string& pointname, int layer=0, bool verbose=false);
unsigned long int getFeatureCount(int layer=0) const;
int getFieldCount(int layer=0) const;
@@ -207,6 +208,141 @@ template <typename T> int ImgReaderOgr::readData(map<int,Vector2d<T> >& data, co
}
}
+//read data from all features in a map, organized by class names
+template <typename T> int ImgReaderOgr::readData(map<string,Vector2d<T> >& data, const OGRFieldType& fieldType, vector<string>& fields, const string& label, int layer, bool pos, bool verbose)
+{
+ if(layer<0)
+ layer=m_datasource->GetLayerCount()-1;
+ assert(m_datasource->GetLayerCount()>layer);
+ OGRLayer *poLayer;
+ if(verbose)
+ cout << "number of layers: " << m_datasource->GetLayerCount() << endl;
+ poLayer = m_datasource->GetLayer(layer);
+ if(poLayer!=NULL){
+ OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();
+ if(fields.empty()){
+ fields.resize(poFDefn->GetFieldCount());
+ if(verbose)
+ cout << "resized fields to " << fields.size() << endl;
+ }
+ //start reading features from the layer
+ OGRFeature *poFeature;
+ if(verbose)
+ cout << "reset reading" << endl;
+ poLayer->ResetReading();
+ unsigned long int ifeature=0;
+ int posOffset=(pos)?2:0;
+ if(verbose)
+ cout << "going through features to fill in string map" << endl << flush;
+ string theClass;
+ while( (poFeature = poLayer->GetNextFeature()) != NULL ){
+ vector<T> theFeature;//(fields.size()+posOffset);//x,y+selectedfields
+ if(verbose)
+ cout << "reading feature " << ifeature << endl << flush;
+ OGRGeometry *poGeometry;
+ poGeometry = poFeature->GetGeometryRef();
+ if(verbose){
+ if(poGeometry == NULL)
+ cerr << "no geometry defined" << endl << flush;
+ else if(wkbFlatten(poGeometry->getGeometryType()) != wkbPoint)
+ cerr << "Warning: poGeometry type: " << wkbFlatten(poGeometry->getGeometryType()) << endl << flush;
+ }
+ assert(poGeometry != NULL );
+ // && wkbFlatten(poGeometry->getGeometryType()) == wkbPoint);
+ OGRPoint *poPoint;
+ if(pos){
+ poPoint = (OGRPoint *) poGeometry;
+ if(wkbFlatten(poGeometry->getGeometryType()) == wkbPoint){
+ theFeature.push_back(poPoint->getX());
+ theFeature.push_back(poPoint->getY());
+ }
+ else if(wkbFlatten(poGeometry->getGeometryType()) == wkbPolygon){
+ OGRPolygon * poPolygon = (OGRPolygon *) poGeometry;
+ poPolygon->Centroid(poPoint);
+ theFeature.push_back(poPoint->getX());
+ theFeature.push_back(poPoint->getY());
+ }
+ else{
+ string errorstring="Error: Centroid for non polygon geometry not supported until OGR 1.8.0, change ImgReaderOgr if version >= 1.8.0 is installed...";
+ throw(errorstring);
+ // poGeometry->Centroid(poPoint);
+ // theFeature.push_back(poPoint->getX());
+ // theFeature.push_back(poPoint->getY());
+ }
+ }
+ // OGRFeatureDefn *poFDefn = poLayer->GetLayerDefn();//got LayerDefn already...
+ string featurename;
+ for(int iField=0;iField<poFDefn->GetFieldCount();++iField){
+ OGRFieldDefn *poFieldDefn = poFDefn->GetFieldDefn(iField);
+ string fieldname=poFieldDefn->GetNameRef();
+ if(fieldname==label){
+ theClass=poFeature->GetFieldAsString(iField);
+ if(verbose)
+ std::cout << "read feature for " << theClass << std::endl;
+ }
+ else{
+ switch(fieldType){
+ case(OFTReal):
+ if(fields.size()<poFDefn->GetFieldCount()){
+ if(find(fields.begin(),fields.end(),fieldname)!=fields.end())
+ theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+ }
+ else{
+ fields[iField]=fieldname;
+ theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+ }
+ break;
+ case(OFTInteger):
+ if(fields.size()<poFDefn->GetFieldCount()){
+ if(find(fields.begin(),fields.end(),fieldname)!=fields.end())
+ theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+ }
+ else{
+ fields[iField]=fieldname;
+ theFeature.push_back(poFeature->GetFieldAsDouble(iField));
+ }
+ break;
+ default:
+ {
+ string errorstring="field type not supported in ImgReaderOgr::ReadData";
+ throw(errorstring);
+ }
+ break;
+ }
+ }
+ }
+ data[theClass].push_back(theFeature);
+ ++ifeature;
+ ++ifeature;
+ }
+ if(verbose)
+ cout << "number of features read: " << ifeature << endl << flush;
+ typename map<string,Vector2d<T> >::const_iterator mit=data.begin();
+ int nband=0;
+ if(verbose)
+ cout << "read classes: " << flush;
+ while(mit!=data.end()){
+ if(verbose)
+ cout << mit->first << " " << flush;
+ if(!nband)
+ nband=fields.size();
+ if(pos)
+ assert((mit->second)[0].size()==nband+2);
+ else
+ assert((mit->second)[0].size()==nband);
+ ++mit;
+ }
+ if(verbose)
+ cout << endl << flush;
+ return(nband);
+ }
+ else{
+ ostringstream ess;
+ ess << "no layer in " << m_filename;
+ throw(ess.str());
+ }
+}
+
//read x positions
template <typename T> int ImgReaderOgr::readXY(vector<T>& xVector, vector<T>& yVector, int layer, bool verbose){
if(layer<0)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git
More information about the Pkg-grass-devel
mailing list