[pktools] 46/375: confusion matrix supports bagging for pkclassify_svm.cc, balancing and bagsize for each class. Still to be implemented for pkclassify_nn.cc
Bas Couwenberg
sebastic at xs4all.nl
Wed Dec 3 21:53:57 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch upstream-master
in repository pktools.
commit 2b73cf4a846816f60724b2b0ee3c619773c76333
Author: Pieter Kempeneers <kempenep at gmail.com>
Date: Fri Feb 1 18:14:48 2013 +0100
confusion matrix supports bagging for pkclassify_svm.cc, balancing and bagsize for each class. Still to be implemented for pkclassify_nn.cc
---
src/apps/pkascii2img.cc | 1 -
src/apps/pkascii2ogr.cc | 1 -
src/apps/pkclassify_nn.cc | 1 -
src/apps/pkclassify_svm.cc | 114 ++++++++++++++++++++++++---------------------
src/apps/pkcrop.cc | 24 +++++-----
src/apps/pkextract.cc | 2 +-
src/apps/pklas2img.cc | 4 +-
7 files changed, 76 insertions(+), 71 deletions(-)
diff --git a/src/apps/pkascii2img.cc b/src/apps/pkascii2img.cc
index 1b51861..4e3e064 100644
--- a/src/apps/pkascii2img.cc
+++ b/src/apps/pkascii2img.cc
@@ -43,7 +43,6 @@ int main(int argc, char *argv[])
bool doProcess;//stop process when program was invoked with help option (-h --help)
try{
doProcess=input_opt.retrieveOption(argc,argv);
- input_opt.retrieveOption(argc,argv);
output_opt.retrieveOption(argc,argv);
dataType_opt.retrieveOption(argc,argv);
imageType_opt.retrieveOption(argc,argv);
diff --git a/src/apps/pkascii2ogr.cc b/src/apps/pkascii2ogr.cc
index 60791ba..7d90515 100644
--- a/src/apps/pkascii2ogr.cc
+++ b/src/apps/pkascii2ogr.cc
@@ -40,7 +40,6 @@ int main(int argc, char *argv[])
bool doProcess;//stop process when program was invoked with help option (-h --help)
try{
doProcess=input_opt.retrieveOption(argc,argv);
- input_opt.retrieveOption(argc,argv);
output_opt.retrieveOption(argc,argv);
colX_opt.retrieveOption(argc,argv);
colY_opt.retrieveOption(argc,argv);
diff --git a/src/apps/pkclassify_nn.cc b/src/apps/pkclassify_nn.cc
index afcd750..8a2a2bd 100644
--- a/src/apps/pkclassify_nn.cc
+++ b/src/apps/pkclassify_nn.cc
@@ -120,7 +120,6 @@ int main(int argc, char *argv[])
bool doProcess;//stop process when program was invoked with help option (-h --help)
try{
doProcess=input_opt.retrieveOption(argc,argv);
- input_opt.retrieveOption(argc,argv);
training_opt.retrieveOption(argc,argv);
label_opt.retrieveOption(argc,argv);
reclass_opt.retrieveOption(argc,argv);
diff --git a/src/apps/pkclassify_svm.cc b/src/apps/pkclassify_svm.cc
index 8cd1029..913f5f3 100644
--- a/src/apps/pkclassify_svm.cc
+++ b/src/apps/pkclassify_svm.cc
@@ -114,7 +114,7 @@ int main(int argc, char *argv[])
// Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true);
Optionpk<unsigned short> comb_opt("c", "comb", "how to combine bootstrap aggregation classifiers (0: sum rule, 1: product rule, 2: max rule). Also used to aggregate classes with rc option.",0);
Optionpk<unsigned short> bag_opt("\0", "bag", "Number of bootstrap aggregations", 1);
- Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation", 100);
+ Optionpk<int> bagSize_opt("\0", "bsize", "Percentage of features used from available training features for each bootstrap aggregation (one size for all classes, or a different size for each class respectively", 100);
Optionpk<string> classBag_opt("\0", "class", "output for each individual bootstrap aggregation");
Optionpk<string> mask_opt("\0", "mask", "mask image (see also mvalue option");
Optionpk<short> maskValue_opt("\0", "mvalue", "mask value(s) not to consider for classification (use negative values if only these values should be taken into account). Values will be taken over in classification image.", 0);
@@ -195,6 +195,7 @@ int main(int argc, char *argv[])
if(verbose_opt[0]>=1)
std::cout << "number of bootstrap aggregations: " << nbag << std::endl;
+
unsigned int totalSamples=0;
vector<short> vcode;//unique reclass codes (e.g., -rc 1 -rc 1 -rc 2 -rc 2 -> vcode[0]=1,vcode[1]=2)
vector<struct svm_model*> svm(nbag);
@@ -247,6 +248,7 @@ int main(int argc, char *argv[])
std::sort(band_opt.begin(),band_opt.end());
//----------------------------------- Training -------------------------------
+ ConfusionMatrix cm;
vector< vector<double> > offset(nbag);
vector< vector<double> > scale(nbag);
map<string,Vector2d<float> > trainingMap;
@@ -272,6 +274,7 @@ int main(int argc, char *argv[])
// struct svm_node *x_space;
vector<string> fields;
+
for(int ibag=0;ibag<nbag;++ibag){
//organize training data
if(ibag<training_opt.size()){//if bag contains new training pixels
@@ -351,26 +354,27 @@ int main(int argc, char *argv[])
//do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
//balance training data
if(balance_opt[0]>0){
+ while(balance_opt.size()<nclass)
+ balance_opt.push_back(balance_opt.back());
if(random)
srand(time(NULL));
totalSamples=0;
for(short iclass=0;iclass<nclass;++iclass){
- if(trainingPixels[iclass].size()>balance_opt[0]){
- while(trainingPixels[iclass].size()>balance_opt[0]){
+ if(trainingPixels[iclass].size()>balance_opt[iclass]){
+ while(trainingPixels[iclass].size()>balance_opt[iclass]){
int index=rand()%trainingPixels[iclass].size();
trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
}
}
else{
int oldsize=trainingPixels[iclass].size();
- for(int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
+ for(int isample=trainingPixels[iclass].size();isample<balance_opt[iclass];++isample){
int index = rand()%oldsize;
trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
}
}
totalSamples+=trainingPixels[iclass].size();
}
- assert(totalSamples==nclass*balance_opt[0]);
}
//set scale and offset
@@ -477,6 +481,7 @@ int main(int argc, char *argv[])
}
assert(priors_opt.size()==1||priors_opt.size()==nclass);
+ //set priors
priorsReclass.resize(nreclass);
for(short iclass=0;iclass<nreclass;++iclass){
priorsReclass[iclass]=0;
@@ -485,6 +490,9 @@ int main(int argc, char *argv[])
priorsReclass[iclass]+=priors[ic];
}
}
+ //set bagsize for each class if not done already via command line
+ while(bagSize_opt.size()<nclass)
+ bagSize_opt.push_back(bagSize_opt.back());
if(verbose_opt[0]>=1){
std::cout << "number of bands: " << nband << std::endl;
@@ -494,6 +502,28 @@ int main(int argc, char *argv[])
std::cout << " " << priors[iclass];
std::cout << std::endl;
}
+ // ConfusionMatrix cm(nclass);
+ map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ if(reclass_opt.empty()){
+ while(mapit!=trainingMap.end()){
+ cm.pushBackClassName(mapit->first);
+ ++mapit;
+ }
+ }
+ else{
+ if(verbose_opt[0]>1)
+ std::cout << "classes for confusion matrix: " << std::endl;
+ for(short iclass=0;iclass<nreclass;++iclass){
+ ostringstream os;
+ os << vcode[iclass];
+ if(verbose_opt[0]>1)
+ std::cout << os.str() << " ";
+ cm.pushBackClassName(os.str());
+ }
+ if(verbose_opt[0]>1)
+ std::cout << std::endl;
+ }
+ assert(cm.size()==nreclass);
}//if(!ibag)
//Calculate features of trainig set
@@ -504,12 +534,12 @@ int main(int argc, char *argv[])
std::cout << "calculating features for class " << iclass << std::endl;
if(random)
srand(time(NULL));
- nctraining=(bagSize_opt[0]<100)? trainingPixels[iclass].size()/100.0*bagSize_opt[0] : trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+ nctraining=(bagSize_opt[iclass]<100)? trainingPixels[iclass].size()/100.0*bagSize_opt[iclass] : trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
if(nctraining<=0)
nctraining=1;
assert(nctraining<=trainingPixels[iclass].size());
int index=0;
- if(bagSize_opt[0]<100)
+ if(bagSize_opt[iclass]<100)
random_shuffle(trainingPixels[iclass].begin(),trainingPixels[iclass].end());
trainingFeatures[iclass].resize(nctraining);
@@ -578,64 +608,40 @@ int main(int argc, char *argv[])
if(verbose_opt[0])
std::cout << "parameters ok, training" << std::endl;
svm[ibag]=svm_train(&prob[ibag],¶m[ibag]);
-
- if(verbose_opt[0]>1)
- std::cout << "SVM is now trained" << std::endl;
if(cv_opt[0]>0){
- //todo: implement reclassification
- // ConfusionMatrix cm(nclass);
- ConfusionMatrix cm;
- map<string,Vector2d<float> >::iterator mapit=trainingMap.begin();
- if(reclass_opt.empty()){
- while(mapit!=trainingMap.end()){
- cm.pushBackClassName(mapit->first);
- ++mapit;
- }
- }
- else{
- if(verbose_opt[0]>1)
- std::cout << "classes for confusion matrix: " << std::endl;
- for(short iclass=0;iclass<nreclass;++iclass){
- ostringstream os;
- os << vcode[iclass];
- if(verbose_opt[0]>1)
- std::cout << os.str() << " ";
- cm.pushBackClassName(os.str());
- }
- if(verbose_opt[0]>1)
- std::cout << std::endl;
- }
- assert(cm.size()==nreclass);
-
double *target = Malloc(double,prob[ibag].l);
svm_cross_validation(&prob[ibag],¶m[ibag],cv_opt[0],target);
assert(param[ibag].svm_type != EPSILON_SVR&¶m[ibag].svm_type != NU_SVR);//only for regression
for(int i=0;i<prob[ibag].l;i++)
- cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1);
- assert(cm.nReference());
- std::cout << cm << std::endl;
- cout << "class #samples userAcc prodAcc" << endl;
- double se95_ua=0;
- double se95_pa=0;
- double se95_oa=0;
- double dua=0;
- double dpa=0;
- double doa=0;
- for(short iclass=0;iclass<cm.nClasses();++iclass){
- dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
- dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
- cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
- }
- std::cout << "Kappa: " << cm.kappa() << std::endl;
- doa=cm.oa_pct(&se95_oa);
- std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl;
+ cm.incrementResult(cm.getClass(vreclass[prob[ibag].y[i]]),cm.getClass(vreclass[target[i]]),1.0/nbag);
free(target);
- }
+ }
+ if(verbose_opt[0]>1)
+ std::cout << "SVM is now trained" << std::endl;
// *NOTE* Because svm_model contains pointers to svm_problem, you can
// not free the memory used by svm_problem if you are still using the
// svm_model produced by svm_train().
}//for ibag
+ if(cv_opt[0]>0){
+ assert(cm.nReference());
+ std::cout << cm << std::endl;
+ cout << "class #samples userAcc prodAcc" << endl;
+ double se95_ua=0;
+ double se95_pa=0;
+ double se95_oa=0;
+ double dua=0;
+ double dpa=0;
+ double doa=0;
+ for(short iclass=0;iclass<cm.nClasses();++iclass){
+ dua=cm.ua_pct(cm.getClass(iclass),&se95_ua);
+ dpa=cm.pa_pct(cm.getClass(iclass),&se95_pa);
+ cout << cm.getClass(iclass) << " " << cm.nReference(cm.getClass(iclass)) << " " << dua << " (" << se95_ua << ")" << " " << dpa << " (" << se95_pa << ")" << endl;
+ }
+ std::cout << "Kappa: " << cm.kappa() << std::endl;
+ doa=cm.oa_pct(&se95_oa);
+ std::cout << "Overall Accuracy: " << doa << " (" << se95_oa << ")" << std::endl;
+ }
//--------------------------------- end of training -----------------------------------
if(input_opt.empty())
diff --git a/src/apps/pkcrop.cc b/src/apps/pkcrop.cc
index 6a4e71b..158ed5f 100644
--- a/src/apps/pkcrop.cc
+++ b/src/apps/pkcrop.cc
@@ -30,8 +30,8 @@ along with pktools. If not, see <http://www.gnu.org/licenses/>.
int main(int argc, char *argv[])
{
- Optionpk<string> input_opt("i", "input", "Input image file(s). If input contains multiple images, a multi-band output is created", "");
- Optionpk<string> output_opt("o", "output", "Output image file", "");
+ Optionpk<string> input_opt("i", "input", "Input image file(s). If input contains multiple images, a multi-band output is created");
+ Optionpk<string> output_opt("o", "output", "Output image file");
Optionpk<string> projection_opt("p", "projection", "projection in EPSG format (leave blank to copy from input file, use EPSG:3035 to use European projection and to force to European grid", "");
Optionpk<string> extent_opt("e", "extent", "get boundary from extent from polygons in vector file", "");
Optionpk<bool> mask_opt("m","mask","mask values out of polygon in extent file to flag option (tip: for better performance, use gdal_rasterize -i -burn 0 -l extent extent.shp output (with output the result of pkcrop)",false);
@@ -56,7 +56,6 @@ int main(int argc, char *argv[])
bool doProcess;//stop process when program was invoked with help option (-h --help)
try{
doProcess=input_opt.retrieveOption(argc,argv);
- input_opt.retrieveOption(argc,argv);
output_opt.retrieveOption(argc,argv);
projection_opt.retrieveOption(argc,argv);
extent_opt.retrieveOption(argc,argv);
@@ -87,6 +86,14 @@ int main(int argc, char *argv[])
std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
exit(0);//help was invoked, stop processing
}
+ if(input_opt.empty()){
+ std::cerr << "No input file provided (use option -i). Use pkinfo --help for help information" << std::endl;
+ exit(0);//help was invoked, stop processing
+ }
+ if(output_opt.empty()){
+ std::cerr << "No output file provided (use option -i). Use pkinfo --help for help information" << std::endl;
+ exit(0);//help was invoked, stop processing
+ }
RESAMPLE theResample;
if(resample_opt[0]=="near"){
@@ -307,16 +314,11 @@ int main(int argc, char *argv[])
else if(imgReader.isGeoRef())
imgWriter.setProjection(imgReader.getProjection());
if(colorTable_opt.size()){
- if(verbose_opt[0])
- cout << "set colortable " << colorTable_opt[0] << endl;
- assert(imgWriter.getDataType()==GDT_Byte);
- imgWriter.setColorTable(colorTable_opt[0]);
+ if(colorTable_opt[0]!="none")
+ imgWriter.setColorTable(colorTable_opt[0]);
}
- else if(imgReader.getColorTable()!=NULL){
- if(verbose_opt[0])
- cout << "set colortable from input image" << endl;
+ else if (imgReader.getColorTable()!=NULL)//copy colorTable from input image
imgWriter.setColorTable(imgReader.getColorTable());
- }
}
double startCol=uli;
double endCol=lri;
diff --git a/src/apps/pkextract.cc b/src/apps/pkextract.cc
index b18b17c..e6e46ad 100644
--- a/src/apps/pkextract.cc
+++ b/src/apps/pkextract.cc
@@ -46,7 +46,7 @@ int main(int argc, char *argv[])
Optionpk<string> bufferOutput_opt("bu", "bu", "Buffer output shape file", "");
Optionpk<short> geo_opt("g", "geo", "geo coordinates", 1);
Optionpk<short> down_opt("down", "down", "down sampling factor. Can be used to create grid points", 1);
- Optionpk<float> threshold_opt("t", "threshold", "threshold for selecting samples (randomly). Provide probability in percentage (>0) or absolute (<0). Use multiple threshold values (e.g. -t 80 -t 60) is more classes are to be extracted with random selection. Use value 100 to select all pixels for selected class(es)", 100);
+ Optionpk<float> threshold_opt("t", "threshold", "threshold for selecting samples (randomly). Provide probability in percentage (>0) or absolute (<0). Use multiple threshold values (e.g. -t 80 -t 60) if more classes are to be extracted with random selection. Use value 100 to select all pixels for selected class(es)", 100);
Optionpk<double> min_opt("min", "min", "minimum number of samples to select (0)", 0);
Optionpk<short> boundary_opt("bo", "boundary", "boundary for selecting the sample", 1);
Optionpk<short> rbox_opt("rb", "rbox", "rectangular boundary box (total width in m) to draw around the selected pixel. Can not combined with class option. Use multiple rbox options for multiple boundary boxes. Use value 0 for no box)", 0);
diff --git a/src/apps/pklas2img.cc b/src/apps/pklas2img.cc
index afbf4ad..fd330a2 100644
--- a/src/apps/pklas2img.cc
+++ b/src/apps/pklas2img.cc
@@ -450,8 +450,8 @@ int main(int argc,char **argv) {
cout << errorString << endl;
exit(1);
}
- int newdimx=2*(dimx-1)+1;
- int newdimy=2*(dimy-1)+1;//from PE&RS vol 71 pp313-324
+ int newdimx=(dimx==1)? 3: 2*(dimx-1)+1;
+ int newdimy=(dimx==1)? 3: 2*(dimy-1)+1;//from PE&RS vol 71 pp313-324
hThreshold=hThreshold_opt[0]+maxSlope_opt[0]*(newdimx-dimx)*dx_opt[0];
dimx=newdimx;
dimy=newdimy;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git
More information about the Pkg-grass-devel
mailing list