[pktools] 31/375: added OptFactory.h and pkopt_svm.cc
Bas Couwenberg
sebastic at xs4all.nl
Wed Dec 3 21:53:55 UTC 2014
This is an automated email from the git hooks/post-receive script.
sebastic-guest pushed a commit to branch upstream-master
in repository pktools.
commit 9c4dbcc97e5a732185d8fd71da9d7e23fbbe6383
Author: Pieter Kempeneers <kempenep at gmail.com>
Date: Wed Jan 16 15:56:07 2013 +0100
added OptFactory.h and pkopt_svm.cc
---
src/algorithms/OptFactory.h | 215 ++++++++++++++++++
src/apps/pkopt_svm.cc | 530 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 745 insertions(+)
diff --git a/src/algorithms/OptFactory.h b/src/algorithms/OptFactory.h
new file mode 100644
index 0000000..d13ba23
--- /dev/null
+++ b/src/algorithms/OptFactory.h
@@ -0,0 +1,215 @@
+/**********************************************************************
+OptFactory.h: factory class for nlopt::opt (selecting algorithm via string)
+Copyright (C) 2008-2013 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools. If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#ifndef _OPTFACTORY_H_
+#define _OPTFACTORY_H_
+
+#include <map>
+
+class OptFactory
+{
+private:
+ static void initMap(std::map<std::string, nlopt::algorithm>& m_algMap){
+ //initialize selMap
+ m_algMap["GN_DIRECT"]=nlopt::GN_DIRECT;
+ m_algMap["GN_DIRECT_L"]=nlopt::GN_DIRECT_L;
+ m_algMap["GN_DIRECT_L_RAND"]=nlopt::GN_DIRECT_L_RAND;
+ m_algMap["GN_DIRECT_NOSCAL"]=nlopt::GN_DIRECT_NOSCAL;
+ m_algMap["GN_DIRECT_L_NOSCAL"]=nlopt::GN_DIRECT_L_NOSCAL;
+ m_algMap["GN_DIRECT_L_RAND_NOSCAL"]=nlopt::GN_DIRECT_L_RAND_NOSCAL;
+ m_algMap["GN_ORIG_DIRECT"]=nlopt::GN_ORIG_DIRECT;
+ m_algMap["GN_ORIG_DIRECT_L"]=nlopt::GN_ORIG_DIRECT_L;
+ m_algMap["GD_STOGO"]=nlopt::GD_STOGO;
+ m_algMap["GD_STOGO_RAND"]=nlopt::GD_STOGO_RAND;
+ m_algMap["LD_LBFGS_NOCEDAL"]=nlopt::LD_LBFGS_NOCEDAL;
+ m_algMap["LD_LBFGS"]=nlopt::LD_LBFGS;
+ m_algMap["LN_PRAXIS"]=nlopt::LN_PRAXIS;
+ m_algMap["LD_VAR1"]=nlopt::LD_VAR1;
+ m_algMap["LD_VAR2"]=nlopt::LD_VAR2;
+ m_algMap["LD_TNEWTON"]=nlopt::LD_TNEWTON;
+ m_algMap["LD_TNEWTON_RESTART"]=nlopt::LD_TNEWTON_RESTART;
+ m_algMap["LD_TNEWTON_PRECOND"]=nlopt::LD_TNEWTON_PRECOND;
+ m_algMap["LD_TNEWTON_PRECOND_RESTART"]=nlopt::LD_TNEWTON_PRECOND_RESTART;
+ m_algMap["GN_CRS2_LM"]=nlopt::GN_CRS2_LM;
+ m_algMap["GN_MLSL"]=nlopt::GN_MLSL;
+ m_algMap["GD_MLSL"]=nlopt::GD_MLSL;
+ m_algMap["GN_MLSL_LDS"]=nlopt::GN_MLSL_LDS;
+ m_algMap["GD_MLSL_LDS"]=nlopt::GD_MLSL_LDS;
+ m_algMap["LD_MMA"]=nlopt::LD_MMA;
+ m_algMap["LN_COBYLA"]=nlopt::LN_COBYLA;
+ m_algMap["LN_NEWUOA"]=nlopt::LN_NEWUOA;
+ m_algMap["LN_NEWUOA_BOUND"]=nlopt::LN_NEWUOA_BOUND;
+ m_algMap["LN_NELDERMEAD"]=nlopt::LN_NELDERMEAD;
+ m_algMap["LN_SBPLX"]=nlopt::LN_SBPLX;
+ m_algMap["LN_AUGLAG"]=nlopt::LN_AUGLAG;
+ m_algMap["LD_AUGLAG"]=nlopt::LD_AUGLAG;
+ m_algMap["LN_AUGLAG_EQ"]=nlopt::LN_AUGLAG_EQ;
+ m_algMap["LD_AUGLAG_EQ"]=nlopt::LD_AUGLAG_EQ;
+ m_algMap["LN_BOBYQA"]=nlopt::LN_BOBYQA;
+ m_algMap["GN_ISRES"]=nlopt::GN_ISRES;
+ m_algMap["AUGLAG"]=nlopt::AUGLAG;
+ m_algMap["AUGLAG_EQ"]=nlopt::AUGLAG_EQ;
+ m_algMap["G_MLSL"]=nlopt::G_MLSL;
+ m_algMap["G_MLSL_LDS"]=nlopt::G_MLSL_LDS;
+ m_algMap["LD_SLSQP "]=nlopt::LD_SLSQP;
+ }
+public:
+ OptFactory(){
+ };
+ ~OptFactory(){};
+ static nlopt::opt getOptimizer(const std::string& algorithmString, unsigned int npar){
+ std::map<std::string, nlopt::algorithm> m_algMap;
+ initMap(m_algMap);
+ switch(m_algMap[algorithmString]){
+ case(nlopt::GN_DIRECT):{
+ nlopt::opt theOptimizer(nlopt::GN_DIRECT,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_DIRECT_L):{
+ nlopt::opt theOptimizer(nlopt::GN_DIRECT_L,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_DIRECT_L_RAND):{
+ nlopt::opt theOptimizer(nlopt::GN_DIRECT_L_RAND,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_DIRECT_NOSCAL):{
+ nlopt::opt theOptimizer(nlopt::GN_DIRECT_NOSCAL,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_DIRECT_L_NOSCAL):{
+ nlopt::opt theOptimizer(nlopt::GN_DIRECT_L_NOSCAL,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_DIRECT_L_RAND_NOSCAL):{
+ nlopt::opt theOptimizer(nlopt::GN_DIRECT_L_RAND_NOSCAL,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_ORIG_DIRECT):{
+ nlopt::opt theOptimizer(nlopt::GN_ORIG_DIRECT,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_ORIG_DIRECT_L):{
+ nlopt::opt theOptimizer(nlopt::GN_ORIG_DIRECT_L,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_PRAXIS):{
+ nlopt::opt theOptimizer(nlopt::LN_PRAXIS,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_CRS2_LM):{
+ nlopt::opt theOptimizer(nlopt::GN_CRS2_LM,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_MLSL):{
+ nlopt::opt theOptimizer(nlopt::GN_MLSL,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_MLSL_LDS):{
+ nlopt::opt theOptimizer(nlopt::GN_MLSL_LDS,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_COBYLA):{
+ nlopt::opt theOptimizer(nlopt::LN_COBYLA,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_NEWUOA):{
+ nlopt::opt theOptimizer(nlopt::LN_NEWUOA,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_NEWUOA_BOUND):{
+ nlopt::opt theOptimizer(nlopt::LN_NEWUOA_BOUND,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_NELDERMEAD):{
+ nlopt::opt theOptimizer(nlopt::LN_NELDERMEAD,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_SBPLX):{
+ nlopt::opt theOptimizer(nlopt::LN_SBPLX,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_AUGLAG):{
+ nlopt::opt theOptimizer(nlopt::LN_AUGLAG,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_AUGLAG_EQ):{
+ nlopt::opt theOptimizer(nlopt::LN_AUGLAG_EQ,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::LN_BOBYQA):{
+ nlopt::opt theOptimizer(nlopt::LN_BOBYQA,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::GN_ISRES):{
+ nlopt::opt theOptimizer(nlopt::GN_ISRES,npar);
+ return(theOptimizer);
+ break;
+ }
+ case(nlopt::G_MLSL_LDS):
+ case(nlopt::AUGLAG):
+ case(nlopt::AUGLAG_EQ):
+ case(nlopt::G_MLSL):
+ case(nlopt::GD_MLSL):
+ case(nlopt::GD_MLSL_LDS):
+ case(nlopt::GD_STOGO):
+ case(nlopt::GD_STOGO_RAND):
+ case(nlopt::LD_LBFGS_NOCEDAL):
+ case(nlopt::LD_LBFGS):
+ case(nlopt::LD_VAR1):
+ case(nlopt::LD_VAR2):
+ case(nlopt::LD_TNEWTON):
+ case(nlopt::LD_TNEWTON_RESTART):
+ case(nlopt::LD_TNEWTON_PRECOND):
+ case(nlopt::LD_TNEWTON_PRECOND_RESTART):
+ case(nlopt::LD_MMA):
+ case(nlopt::LD_AUGLAG):
+ case(nlopt::LD_AUGLAG_EQ):
+ case(nlopt::LD_SLSQP):
+ default:{
+ std::string errorString="Error: derivative optimization algorithm ";
+ errorString+= algorithmString;
+ errorString+= " not supported, select derivative-free algorithm ([GL]N_*])";
+ throw(errorString);
+ break;
+ }
+ }
+ };
+};
+#endif /* _OPTFACTORY_H_ */
diff --git a/src/apps/pkopt_svm.cc b/src/apps/pkopt_svm.cc
new file mode 100644
index 0000000..517f321
--- /dev/null
+++ b/src/apps/pkopt_svm.cc
@@ -0,0 +1,530 @@
+/**********************************************************************
+pkopt_svm.cc: program to optimize parameters for SVM classification
+Copyright (C) 2008-2013 Pieter Kempeneers
+
+This file is part of pktools
+
+pktools is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+pktools is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with pktools. If not, see <http://www.gnu.org/licenses/>.
+***********************************************************************/
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <vector>
+#include <math.h>
+#include <nlopt.hpp>
+#include "base/Optionpk.h"
+#include "base/Optionpk.h"
+#include "algorithms/ConfusionMatrix.h"
+#include "algorithms/FeatureSelector.h"
+#include "algorithms/OptFactory.h"
+#include "algorithms/svm.h"
+#include "pkclassify_nn.h"
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
+ //declare objective function
+double objFunction(const std::vector<double> &x, std::vector<double> &grad, void *my_func_data);
+
+//global parameters used in objective function
+Optionpk<unsigned short> svm_type_opt("svmt", "svmtype", "type of SVM (0: C-SVC, 1: nu-SVC, 2: one-class SVM, 3: epsilon-SVR, 4: nu-SVR)",0);
+Optionpk<unsigned short> kernel_type_opt("kt", "kerneltype", "type of kernel function (0: linear: u'*v, 1: polynomial: (gamma*u'*v + coef0)^degree, 2: radial basis function: exp(-gamma*|u-v|^2), 3: sigmoid: tanh(gamma*u'*v + coef0), 4: precomputed kernel (kernel values in training_set_file)",2);
+Optionpk<unsigned short> kernel_degree_opt("kd", "kd", "degree in kernel function",3);
+Optionpk<float> coef0_opt("c0", "coef0", "coef0 in kernel function",0);
+Optionpk<float> nu_opt("nu", "nu", "the parameter nu of nu-SVC, one-class SVM, and nu-SVR",0.5);
+Optionpk<float> epsilon_loss_opt("eloss", "eloss", "the epsilon in loss function of epsilon-SVR",0.1);
+Optionpk<int> cache_opt("cache", "cache", "cache memory size in MB",100);
+Optionpk<float> epsilon_tol_opt("etol", "etol", "the tolerance of termination criterion",0.001);
+Optionpk<bool> shrinking_opt("shrink", "shrink", "whether to use the shrinking heuristics",false);
+Optionpk<bool> prob_est_opt("pe", "probest", "whether to train a SVC or SVR model for probability estimates",false);
+// Optionpk<bool> weight_opt("wi", "wi", "set the parameter C of class i to weight*C, for C-SVC",true);
+Optionpk<unsigned short> cv_opt("cv", "cv", "n-fold cross validation mode",2);
+Optionpk<short> verbose_opt("v", "verbose", "set to: 0 (results only), 1 (confusion matrix), 2 (debug)",0);
+
+double objFunction(const std::vector<double> &x, std::vector<double> &grad, void *my_func_data){
+ assert(grad.empty());
+ vector<Vector2d<float> > *tf=reinterpret_cast<vector<Vector2d<float> >*> (my_func_data);
+ float gamma=x[0];
+ float ccost=x[1];
+ double error=1.0/epsilon_tol_opt[0];
+ double kappa=1.0;
+ double oa=1.0;
+ //todo: calculate kappa using cross validation
+ unsigned short nclass=tf->size();
+ unsigned int ntraining=0;
+ for(int iclass=0;iclass<nclass;++iclass)
+ ntraining+=(*tf)[iclass].size();
+ unsigned short nFeatures=(*tf)[0][0].size();
+ struct svm_parameter param;
+ param.svm_type = svm_type_opt[0];
+ param.kernel_type = kernel_type_opt[0];
+ param.degree = kernel_degree_opt[0];
+ param.gamma = gamma;
+ param.coef0 = coef0_opt[0];
+ param.nu = nu_opt[0];
+ param.cache_size = cache_opt[0];
+ param.C = ccost;
+ param.eps = epsilon_tol_opt[0];
+ param.p = epsilon_loss_opt[0];
+ param.shrinking = (shrinking_opt[0])? 1 : 0;
+ param.probability = (prob_est_opt[0])? 1 : 0;
+ param.nr_weight = 0;//not used: I use priors and balancing
+ param.weight_label = NULL;
+ param.weight = NULL;
+ param.verbose=(verbose_opt[0]>2)? true:false;
+ struct svm_model* svm;
+ struct svm_problem prob;
+ struct svm_node* x_space;
+ prob.l=ntraining;
+ prob.y = Malloc(double,prob.l);
+ prob.x = Malloc(struct svm_node *,prob.l);
+ x_space = Malloc(struct svm_node,(nFeatures+1)*ntraining);
+ unsigned long int spaceIndex=0;
+ int lIndex=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ for(int isample=0;isample<(*tf)[iclass].size();++isample){
+ prob.x[lIndex]=&(x_space[spaceIndex]);
+ for(int ifeature=0;ifeature<nFeatures;++ifeature){
+ x_space[spaceIndex].index=ifeature+1;
+ x_space[spaceIndex].value=(*tf)[iclass][isample][ifeature];
+ ++spaceIndex;
+ }
+ x_space[spaceIndex++].index=-1;
+ prob.y[lIndex]=iclass;
+ ++lIndex;
+ }
+ }
+
+ assert(lIndex==prob.l);
+ if(verbose_opt[0]>2)
+ std::cout << "checking parameters" << std::endl;
+ svm_check_parameter(&prob,¶m);
+ if(verbose_opt[0]>2)
+ std::cout << "parameters ok, training" << std::endl;
+ svm=svm_train(&prob,¶m);
+ if(verbose_opt[0]>2)
+ std::cout << "SVM is now trained" << std::endl;
+
+ ConfusionMatrix cm(nclass);
+ double *target = Malloc(double,prob.l);
+ svm_cross_validation(&prob,¶m,cv_opt[0],target);
+ assert(param.svm_type != EPSILON_SVR&¶m.svm_type != NU_SVR);//only for regression
+ int total_correct=0;
+ for(int i=0;i<prob.l;i++)
+ cm.incrementResult(cm.getClass(prob.y[i]),cm.getClass(target[i]),1);
+ assert(cm.nReference());
+ free(target);
+ free(prob.y);
+ free(prob.x);
+ free(x_space);
+ svm_free_and_destroy_model(&(svm));
+ if(verbose_opt[0]>2)
+ std::cout << cm << std::endl;
+ kappa=cm.kappa();
+ oa=cm.oa();
+ if(verbose_opt[0]>1){
+ std::cout << " --ccost " << x[0];
+ std::cout << " --gamma " << x[1];
+ std::cout << std::endl;
+ std::cout << "oa: " << oa << std::endl;
+ std::cout << "kappa: " << kappa << std::endl;
+ }
+ if(oa)
+ error=1.0/oa;
+ return(error);
+}
+
+int main(int argc, char *argv[])
+{
+ map<short,int> reclassMap;
+ vector<int> vreclass;
+ std::string versionString="version ";
+ versionString+=VERSION;
+ versionString+=", Copyright (C) 2008-2012 Pieter Kempeneers.\n\
+ This program comes with ABSOLUTELY NO WARRANTY; for details type use option -h.\n\
+ This is free software, and you are welcome to redistribute it\n\
+ under certain conditions; use option --license for details.";
+ Optionpk<bool> version_opt("\0","version",versionString,false);
+ Optionpk<bool> license_opt("lic","license","show license information",false);
+ Optionpk<bool> todo_opt("\0","todo","",false);
+ Optionpk<bool> help_opt("h","help","shows this help info",false);
+ Optionpk<string> training_opt("t", "training", "training shape file. A single shape file contains all training features (must be set as: B0, B1, B2,...) for all classes (class numbers identified by label option).");
+ Optionpk<string> label_opt("\0", "label", "identifier for class label in training shape file.","label");
+ Optionpk<unsigned short> reclass_opt("\0", "rc", "reclass code (e.g. --rc=12 --rc=23 to reclass first two classes to 12 and 23 resp.).", 0);
+ Optionpk<unsigned int> balance_opt("\0", "balance", "balance the input data to this number of samples for each class", 0);
+ Optionpk<int> minSize_opt("m", "min", "if number of training pixels is less then min, do not take this class into account", 0);
+ Optionpk<double> start_opt("s", "start", "start band sequence number (set to 0)",0);
+ Optionpk<double> end_opt("e", "end", "end band sequence number (set to 0 for all bands)", 0);
+ Optionpk<short> band_opt("b", "band", "band index (starting from 0, either use band option or use start to end)");
+ Optionpk<double> offset_opt("\0", "offset", "offset value for each spectral band input features: refl[band]=(DN[band]-offset[band])/scale[band]", 0.0);
+ Optionpk<double> scale_opt("\0", "scale", "scale value for each spectral band input features: refl=(DN[band]-offset[band])/scale[band] (use 0 if scale min and max in each band to -1.0 and 1.0)", 0.0);
+ Optionpk<float> gamma_opt("g", "gamma", "min max boundaries for gamma in kernel function (optional: initial value)",0);
+ Optionpk<float> ccost_opt("cc", "ccost", "min and max boundaries the parameter C of C-SVC, epsilon-SVR, and nu-SVR (optional: initial value)",1);
+ Optionpk<unsigned int> maxit_opt("maxit","maxit","maximum number of iterations",500);
+ Optionpk<string> algorithm_opt("a", "algorithm", "optimization algorithm (see http://ab-initio.mit.edu/wiki/index.php/NLopt_Algorithms)","LN_COBYLA");
+ Optionpk<double> tolerance_opt("tol","tolerance","relative tolerance for stopping criterion",0.0001);
+
+ version_opt.retrieveOption(argc,argv);
+ license_opt.retrieveOption(argc,argv);
+ help_opt.retrieveOption(argc,argv);
+ todo_opt.retrieveOption(argc,argv);
+ training_opt.retrieveOption(argc,argv);
+ label_opt.retrieveOption(argc,argv);
+ reclass_opt.retrieveOption(argc,argv);
+ balance_opt.retrieveOption(argc,argv);
+ minSize_opt.retrieveOption(argc,argv);
+ start_opt.retrieveOption(argc,argv);
+ end_opt.retrieveOption(argc,argv);
+ band_opt.retrieveOption(argc,argv);
+ offset_opt.retrieveOption(argc,argv);
+ scale_opt.retrieveOption(argc,argv);
+ svm_type_opt.retrieveOption(argc,argv);
+ kernel_type_opt.retrieveOption(argc,argv);
+ kernel_degree_opt.retrieveOption(argc,argv);
+ gamma_opt.retrieveOption(argc,argv);
+ coef0_opt.retrieveOption(argc,argv);
+ ccost_opt.retrieveOption(argc,argv);
+ nu_opt.retrieveOption(argc,argv);
+ epsilon_loss_opt.retrieveOption(argc,argv);
+ cache_opt.retrieveOption(argc,argv);
+ epsilon_tol_opt.retrieveOption(argc,argv);
+ shrinking_opt.retrieveOption(argc,argv);
+ prob_est_opt.retrieveOption(argc,argv);
+ cv_opt.retrieveOption(argc,argv);
+ maxit_opt.retrieveOption(argc,argv);
+ tolerance_opt.retrieveOption(argc,argv);
+ algorithm_opt.retrieveOption(argc,argv);
+ verbose_opt.retrieveOption(argc,argv);
+
+ if(version_opt[0]||todo_opt[0]){
+ std::cout << version_opt.getHelp() << std::endl;
+ std::cout << "todo: " << todo_opt.getHelp() << std::endl;
+ exit(0);
+ }
+ if(license_opt[0]){
+ std::cout << Optionpk<bool>::getGPLv3License() << std::endl;
+ exit(0);
+ }
+ if(help_opt[0]){
+ std::cout << "usage: pkopt_svm -t training [OPTIONS]" << std::endl;
+ exit(0);
+ }
+
+
+ assert(training_opt[0].size());
+ if(verbose_opt[0]>=1)
+ std::cout << "training shape file: " << training_opt[0] << std::endl;
+
+ unsigned int totalSamples=0;
+ int nreclass=0;
+ vector<int> vcode;//unique class codes in recode string
+
+ unsigned short nclass=0;
+ int nband=0;
+ int startBand=2;//first two bands represent X and Y pos
+
+ vector<double> offset;
+ vector<double> scale;
+ vector< Vector2d<float> > trainingPixels;//[class][sample][band]
+
+ if(reclass_opt.size()>1){
+ vreclass.resize(reclass_opt.size());
+ for(int iclass=0;iclass<reclass_opt.size();++iclass){
+ reclassMap[iclass]=reclass_opt[iclass];
+ vreclass[iclass]=reclass_opt[iclass];
+ }
+ }
+ // if(priors_opt.size()>1){//priors from argument list
+ // priors.resize(priors_opt.size());
+ // double normPrior=0;
+ // for(int iclass=0;iclass<priors_opt.size();++iclass){
+ // priors[iclass]=priors_opt[iclass];
+ // normPrior+=priors[iclass];
+ // }
+ // //normalize
+ // for(int iclass=0;iclass<priors_opt.size();++iclass)
+ // priors[iclass]/=normPrior;
+ // }
+
+ //sort bands
+ if(band_opt.size())
+ std::sort(band_opt.begin(),band_opt.end());
+ //----------------------------------- Training -------------------------------
+ struct svm_problem prob;
+ vector<string> fields;
+ //organize training data
+ trainingPixels.clear();
+ map<int,Vector2d<float> > trainingMap;
+ if(verbose_opt[0]>=1)
+ std::cout << "reading imageShape file " << training_opt[0] << std::endl;
+ try{
+ if(band_opt.size())
+ totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,band_opt,label_opt[0],verbose_opt[0]);
+ else
+ totalSamples=readDataImageShape(training_opt[0],trainingMap,fields,start_opt[0],end_opt[0],label_opt[0],verbose_opt[0]);
+ if(trainingMap.size()<2){
+ string errorstring="Error: could not read at least two classes from training file";
+ throw(errorstring);
+ }
+ }
+ catch(string error){
+ cerr << error << std::endl;
+ exit(1);
+ }
+ catch(...){
+ cerr << "error catched" << std::endl;
+ exit(1);
+ }
+ //delete class 0
+ if(verbose_opt[0]>=1)
+ std::cout << "erasing class 0 from training set (" << trainingMap[0].size() << " from " << totalSamples << ") samples" << std::endl;
+ totalSamples-=trainingMap[0].size();
+ trainingMap.erase(0);
+ //convert map to vector
+ short iclass=0;
+ if(reclass_opt.size()==1){//no reclass option, read classes from shape
+ reclassMap.clear();
+ vreclass.clear();
+ }
+ if(verbose_opt[0]>1)
+ std::cout << "training pixels: " << std::endl;
+ map<int,Vector2d<float> >::iterator mapit=trainingMap.begin();
+ while(mapit!=trainingMap.end()){
+ // for(map<int,Vector2d<float> >::const_iterator mapit=trainingMap.begin();mapit!=trainingMap.end();++mapit){
+ //delete small classes
+ if((mapit->second).size()<minSize_opt[0]){
+ trainingMap.erase(mapit);
+ continue;
+ //todo: beware of reclass option: delete this reclass if no samples are left in this classes!!
+ }
+ if(reclass_opt.size()==1){//no reclass option, read classes from shape
+ reclassMap[iclass]=(mapit->first);
+ vreclass.push_back(mapit->first);
+ }
+ trainingPixels.push_back(mapit->second);
+ if(verbose_opt[0]>1)
+ std::cout << mapit->first << ": " << (mapit->second).size() << " samples" << std::endl;
+ ++iclass;
+ ++mapit;
+ }
+ nclass=trainingPixels.size();
+ nband=trainingPixels[0][0].size()-2;//X and Y//trainingPixels[0][0].size();
+ assert(reclassMap.size()==nclass);
+
+ //do not remove outliers here: could easily be obtained through ogr2ogr -where 'B2<110' output.shp input.shp
+ //balance training data
+ if(balance_opt[0]>0){
+ if(random)
+ srand(time(NULL));
+ totalSamples=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ if(trainingPixels[iclass].size()>balance_opt[0]){
+ while(trainingPixels[iclass].size()>balance_opt[0]){
+ int index=rand()%trainingPixels[iclass].size();
+ trainingPixels[iclass].erase(trainingPixels[iclass].begin()+index);
+ }
+ }
+ else{
+ int oldsize=trainingPixels[iclass].size();
+ for(int isample=trainingPixels[iclass].size();isample<balance_opt[0];++isample){
+ int index = rand()%oldsize;
+ trainingPixels[iclass].push_back(trainingPixels[iclass][index]);
+ }
+ }
+ totalSamples+=trainingPixels[iclass].size();
+ }
+ assert(totalSamples==nclass*balance_opt[0]);
+ }
+
+ //set scale and offset
+ offset.resize(nband);
+ scale.resize(nband);
+ if(offset_opt.size()>1)
+ assert(offset_opt.size()==nband);
+ if(scale_opt.size()>1)
+ assert(scale_opt.size()==nband);
+ Histogram hist;
+ for(int iband=0;iband<nband;++iband){
+ if(verbose_opt[0]>1)
+ std::cout << "scaling for band" << iband << std::endl;
+ offset[iband]=(offset_opt.size()==1)?offset_opt[0]:offset_opt[iband];
+ scale[iband]=(scale_opt.size()==1)?scale_opt[0]:scale_opt[iband];
+ //search for min and maximum
+ if(scale[iband]<=0){
+ float theMin=trainingPixels[0][0][iband+startBand];
+ float theMax=trainingPixels[0][0][iband+startBand];
+ for(int iclass=0;iclass<nclass;++iclass){
+ for(int isample=0;isample<trainingPixels[iclass].size();++isample){
+ if(theMin>trainingPixels[iclass][isample][iband+startBand])
+ theMin=trainingPixels[iclass][isample][iband+startBand];
+ if(theMax<trainingPixels[iclass][isample][iband+startBand])
+ theMax=trainingPixels[iclass][isample][iband+startBand];
+ }
+ }
+ offset[iband]=theMin+(theMax-theMin)/2.0;
+ scale[iband]=(theMax-theMin)/2.0;
+ if(verbose_opt[0]>1){
+ std::cout << "Extreme image values for band " << iband << ": [" << theMin << "," << theMax << "]" << std::endl;
+ std::cout << "Using offset, scale: " << offset[iband] << ", " << scale[iband] << std::endl;
+ std::cout << "scaled values for band " << iband << ": [" << (theMin-offset[iband])/scale[iband] << "," << (theMax-offset[iband])/scale[iband] << "]" << std::endl;
+ }
+ }
+ }
+
+ //recode vreclass to ordered vector, starting from 0 to nreclass
+ vcode.clear();
+ if(verbose_opt[0]>=1){
+ std::cout << "before recoding: " << std::endl;
+ for(int iclass = 0; iclass < vreclass.size(); iclass++)
+ std::cout << " " << vreclass[iclass];
+ std::cout << std::endl;
+ }
+ vector<int> vord=vreclass;//ordered vector, starting from 0 to nreclass
+ map<short,int> mreclass;
+ for(int ic=0;ic<vreclass.size();++ic){
+ if(mreclass.find(vreclass[ic])==mreclass.end())
+ mreclass[vreclass[ic]]=iclass++;
+ }
+ for(int ic=0;ic<vreclass.size();++ic)
+ vord[ic]=mreclass[vreclass[ic]];
+ //construct uniqe class codes
+ while(!vreclass.empty()){
+ vcode.push_back(*(vreclass.begin()));
+ //delete all these entries from vreclass
+ vector<int>::iterator vit;
+ while((vit=find(vreclass.begin(),vreclass.end(),vcode.back()))!=vreclass.end())
+ vreclass.erase(vit);
+ }
+ if(verbose_opt[0]>=1){
+ std::cout << "recode values: " << std::endl;
+ for(int icode=0;icode<vcode.size();++icode)
+ std::cout << vcode[icode] << " ";
+ std::cout << std::endl;
+ }
+ vreclass=vord;
+ if(verbose_opt[0]>=1){
+ std::cout << "after recoding: " << std::endl;
+ for(int iclass = 0; iclass < vord.size(); iclass++)
+ std::cout << " " << vord[iclass];
+ std::cout << std::endl;
+ }
+
+ vector<int> vuniqueclass=vreclass;
+ //remove duplicate elements from vuniqueclass
+ sort( vuniqueclass.begin(), vuniqueclass.end() );
+ vuniqueclass.erase( unique( vuniqueclass.begin(), vuniqueclass.end() ), vuniqueclass.end() );
+ nreclass=vuniqueclass.size();
+ if(verbose_opt[0]>=1){
+ std::cout << "unique classes: " << std::endl;
+ for(int iclass = 0; iclass < vuniqueclass.size(); iclass++)
+ std::cout << " " << vuniqueclass[iclass];
+ std::cout << std::endl;
+ std::cout << "number of reclasses: " << nreclass << std::endl;
+ }
+
+ // if(priors_opt.size()==1){//default: equal priors for each class
+ // priors.resize(nclass);
+ // for(int iclass=0;iclass<nclass;++iclass)
+ // priors[iclass]=1.0/nclass;
+ // }
+ // assert(priors_opt.size()==1||priors_opt.size()==nclass);
+
+ if(verbose_opt[0]>=1){
+ std::cout << "number of bands: " << nband << std::endl;
+ std::cout << "number of classes: " << nclass << std::endl;
+ // std::cout << "priors:";
+ // for(int iclass=0;iclass<nclass;++iclass)
+ // std::cout << " " << priors[iclass];
+ // std::cout << std::endl;
+ }
+
+ //Calculate features of trainig set
+ vector< Vector2d<float> > trainingFeatures(nclass);
+ for(int iclass=0;iclass<nclass;++iclass){
+ int nctraining=0;
+ if(verbose_opt[0]>=1)
+ std::cout << "calculating features for class " << iclass << std::endl;
+ if(random)
+ srand(time(NULL));
+ nctraining=trainingPixels[iclass].size();//bagSize_opt[0] given in % of training size
+ if(verbose_opt[0]>=1)
+ std::cout << "nctraining class " << iclass << ": " << nctraining << std::endl;
+ int index=0;
+
+ trainingFeatures[iclass].resize(nctraining);
+ for(int isample=0;isample<nctraining;++isample){
+ //scale pixel values according to scale and offset!!!
+ for(int iband=0;iband<nband;++iband){
+ assert(trainingPixels[iclass].size()>isample);
+ assert(trainingPixels[iclass][isample].size()>iband+startBand);
+ assert(offset.size()>iband);
+ assert(scale.size()>iband);
+ float value=trainingPixels[iclass][isample][iband+startBand];
+ trainingFeatures[iclass][isample].push_back((value-offset[iband])/scale[iband]);
+ }
+ }
+ assert(trainingFeatures[iclass].size()==nctraining);
+ }
+
+ unsigned int ntraining=0;
+ for(int iclass=0;iclass<nclass;++iclass){
+ if(verbose_opt[0]>1)
+ std::cout << "training sample size for class " << vcode[iclass] << ": " << trainingFeatures[iclass].size() << std::endl;
+ ntraining+=trainingFeatures[iclass].size();
+ }
+
+ assert(ccost_opt.size()>1);//must have boundaries at least (initial value is optional)
+ if(ccost_opt.size()<3)//create initial value
+ ccost_opt.push_back(0.5*(ccost_opt[0]+ccost_opt[1]));
+ assert(gamma_opt.size()>1);//must have boundaries at least (initial value is optional)
+ if(gamma_opt.size()<3)//create initial value
+ gamma_opt.push_back(0);//will be translated to 1.0/nFeatures
+ assert(ccost_opt.size()==3);//min, init, max
+ assert(gamma_opt.size()==3);//min, init, max
+ nlopt::opt optimizer=OptFactory::getOptimizer(algorithm_opt[0],2);
+ if(verbose_opt[0]>1)
+ std::cout << "optimization algorithm: " << optimizer.get_algorithm_name() << "..." << std::endl;
+ std::vector<double> lb(2);
+ std::vector<double> init(2);
+ std::vector<double> ub(2);
+ lb[0]=ccost_opt[0];
+ lb[1]=(gamma_opt[0]>0)? gamma_opt[0] : 1.0/trainingFeatures[0][0].size();
+ init[0]=ccost_opt[2];
+ init[1]=(gamma_opt[2]>0)? gamma_opt[1] : 1.0/trainingFeatures[0][0].size();
+ ub[0]=ccost_opt[1];
+ ub[1]=(gamma_opt[1]>0)? gamma_opt[1] : 1.0/trainingFeatures[0][0].size();
+ optimizer.set_min_objective(objFunction, &trainingFeatures);
+ optimizer.set_lower_bounds(lb);
+ optimizer.set_upper_bounds(ub);
+
+ if(verbose_opt[0]>1)
+ std::cout << "set stopping criteria" << std::endl;
+ //set stopping criteria
+ if(maxit_opt[0])
+ optimizer.set_maxeval(maxit_opt[0]);
+ else
+ optimizer.set_xtol_rel(tolerance_opt[0]);
+ double minf=0;
+ std::vector<double> x=init;
+ optimizer.optimize(x, minf);
+ double ccost=x[0];
+ double gamma=x[1];
+ if(verbose_opt[0])
+ std::cout << "optimized with " << optimizer.get_algorithm_name() << "..." << std::endl;
+ std::cout << " --ccost " << x[0];
+ std::cout << " --gamma " << x[1];
+ std::cout << std::endl;
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-grass/pktools.git
More information about the Pkg-grass-devel
mailing list