SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/ui/GUIClassifier.h> 00012 #include <shogun/ui/SGInterface.h> 00013 00014 #include <shogun/lib/config.h> 00015 #include <shogun/io/SGIO.h> 00016 00017 #include <shogun/features/SparseFeatures.h> 00018 #include <shogun/features/RealFileFeatures.h> 00019 #include <shogun/labels/Labels.h> 00020 00021 #include <shogun/kernel/AUCKernel.h> 00022 00023 #include <shogun/multiclass/KNN.h> 00024 #include <shogun/clustering/KMeans.h> 00025 #include <shogun/clustering/Hierarchical.h> 00026 #include <shogun/classifier/PluginEstimate.h> 00027 00028 #include <shogun/classifier/LDA.h> 00029 #include <shogun/classifier/LPM.h> 00030 #include <shogun/classifier/LPBoost.h> 00031 #include <shogun/classifier/Perceptron.h> 00032 00033 #include <shogun/machine/LinearMachine.h> 00034 00035 #ifdef USE_SVMLIGHT 00036 #include <shogun/classifier/svm/SVMLight.h> 00037 #include <shogun/classifier/svm/SVMLightOneClass.h> 00038 #include <shogun/regression/svr/SVRLight.h> 00039 #endif //USE_SVMLIGHT 00040 00041 #include <shogun/classifier/mkl/MKLClassification.h> 00042 #include <shogun/regression/svr/MKLRegression.h> 00043 #include <shogun/classifier/mkl/MKLOneClass.h> 00044 #include <shogun/classifier/mkl/MKLMulticlass.h> 00045 #include <shogun/classifier/svm/LibSVM.h> 00046 #include <shogun/multiclass/LaRank.h> 00047 #include <shogun/classifier/svm/GPBTSVM.h> 00048 #include <shogun/classifier/svm/LibSVMOneClass.h> 00049 #include <shogun/multiclass/MulticlassLibSVM.h> 00050 00051 #include <shogun/regression/svr/LibSVR.h> 00052 #include <shogun/regression/KernelRidgeRegression.h> 00053 00054 #include <shogun/classifier/svm/LibLinear.h> 00055 #include <shogun/classifier/svm/MPDSVM.h> 00056 #include <shogun/classifier/svm/GNPPSVM.h> 00057 #include <shogun/multiclass/GMNPSVM.h> 00058 #include <shogun/multiclass/ScatterSVM.h> 00059 00060 #include <shogun/classifier/svm/SVMLin.h> 00061 #include <shogun/classifier/svm/SubGradientSVM.h> 00062 #include <shogun/classifier/SubGradientLPM.h> 00063 #include <shogun/classifier/svm/SVMOcas.h> 00064 #include <shogun/classifier/svm/SVMSGD.h> 00065 #include <shogun/classifier/svm/WDSVMOcas.h> 00066 00067 #include <shogun/io/SerializableAsciiFile.h> 00068 00069 using namespace shogun; 00070 00071 CGUIClassifier::CGUIClassifier(CSGInterface* ui_) 00072 : CSGObject(), ui(ui_) 00073 { 00074 constraint_generator=NULL; 00075 classifier=NULL; 00076 max_train_time=0; 00077 00078 // Perceptron parameters 00079 perceptron_learnrate=0.1; 00080 perceptron_maxiter=1000; 00081 00082 // SVM parameters 00083 svm_qpsize=41; 00084 svm_bufsize=3000; 00085 svm_max_qpsize=1000; 00086 mkl_norm=1; 00087 ent_lambda=0; 00088 mkl_block_norm=4; 00089 svm_C1=1; 00090 svm_C2=1; 00091 C_mkl=0; 00092 mkl_use_interleaved=true; 00093 svm_weight_epsilon=1e-5; 00094 svm_epsilon=1e-5; 00095 svm_tube_epsilon=1e-2; 00096 svm_nu=0.5; 00097 svm_use_shrinking = true ; 00098 00099 svm_use_bias = true; 00100 svm_use_batch_computation = true ; 00101 svm_use_linadd = true ; 00102 svm_do_auc_maximization = false ; 00103 00104 // KRR parameters 00105 krr_tau=1; 00106 00107 solver_type=ST_AUTO; 00108 } 00109 00110 CGUIClassifier::~CGUIClassifier() 00111 { 00112 SG_UNREF(classifier); 00113 SG_UNREF(constraint_generator); 00114 } 00115 00116 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d) 00117 { 00118 if (strcmp(name,"LIBSVM_ONECLASS")==0) 00119 { 00120 SG_UNREF(classifier); 00121 classifier = new CLibSVMOneClass(); 00122 SG_INFO("created SVMlibsvm object for oneclass\n"); 00123 } 00124 else if (strcmp(name,"LIBSVM_MULTICLASS")==0) 00125 { 00126 SG_UNREF(classifier); 00127 classifier = new CMulticlassLibSVM(); 00128 SG_INFO("created SVMlibsvm object for multiclass\n"); 00129 } 00130 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0) 00131 { 00132 SG_UNREF(classifier); 00133 classifier= new CMulticlassLibSVM(LIBSVM_NU_SVC); 00134 SG_INFO("created SVMlibsvm object for multiclass\n") ; 00135 } 00136 #ifdef USE_SVMLIGHT 00137 else if (strcmp(name,"SCATTERSVM_NO_BIAS_SVMLIGHT")==0) 00138 { 00139 SG_UNREF(classifier); 00140 classifier= new CScatterSVM(NO_BIAS_SVMLIGHT); 00141 SG_INFO("created ScatterSVM NO BIAS SVMLIGHT object\n") ; 00142 } 00143 #endif //USE_SVMLIGHT 00144 else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0) 00145 { 00146 SG_UNREF(classifier); 00147 classifier= new CScatterSVM(NO_BIAS_LIBSVM); 00148 SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") ; 00149 } 00150 else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0) 00151 { 00152 SG_UNREF(classifier); 00153 classifier= new CScatterSVM(TEST_RULE1); 00154 SG_INFO("created ScatterSVM TESTRULE1 object\n") ; 00155 } 00156 else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0) 00157 { 00158 SG_UNREF(classifier); 00159 classifier= new CScatterSVM(TEST_RULE2); 00160 SG_INFO("created ScatterSVM TESTRULE2 object\n") ; 00161 } 00162 else if (strcmp(name,"LIBSVM_NU")==0) 00163 { 00164 SG_UNREF(classifier); 00165 classifier= new CLibSVM(LIBSVM_NU_SVC); 00166 SG_INFO("created SVMlibsvm object\n") ; 00167 } 00168 else if (strcmp(name,"LIBSVM")==0) 00169 { 00170 SG_UNREF(classifier); 00171 classifier= new CLibSVM(); 00172 SG_INFO("created SVMlibsvm object\n") ; 00173 } 00174 else if (strcmp(name,"LARANK")==0) 00175 { 00176 SG_UNREF(classifier); 00177 classifier= new CLaRank(); 00178 SG_INFO("created LaRank object\n") ; 00179 } 00180 #ifdef USE_SVMLIGHT 00181 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0)) 00182 { 00183 SG_UNREF(classifier); 00184 classifier= new CSVMLight(); 00185 SG_INFO("created SVMLight object\n") ; 00186 } 00187 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0) 00188 { 00189 SG_UNREF(classifier); 00190 classifier= new CSVMLightOneClass(); 00191 SG_INFO("created SVMLightOneClass object\n") ; 00192 } 00193 else if (strcmp(name,"SVRLIGHT")==0) 00194 { 00195 SG_UNREF(classifier); 00196 classifier= new CSVRLight(); 00197 SG_INFO("created SVRLight object\n") ; 00198 } 00199 #endif //USE_SVMLIGHT 00200 else if (strcmp(name,"GPBTSVM")==0) 00201 { 00202 SG_UNREF(classifier); 00203 classifier= new CGPBTSVM(); 00204 SG_INFO("created GPBT-SVM object\n") ; 00205 } 00206 else if (strcmp(name,"MPDSVM")==0) 00207 { 00208 SG_UNREF(classifier); 00209 classifier= new CMPDSVM(); 00210 SG_INFO("created MPD-SVM object\n") ; 00211 } 00212 else if (strcmp(name,"GNPPSVM")==0) 00213 { 00214 SG_UNREF(classifier); 00215 classifier= new CGNPPSVM(); 00216 SG_INFO("created GNPP-SVM object\n") ; 00217 } 00218 else if (strcmp(name,"GMNPSVM")==0) 00219 { 00220 SG_UNREF(classifier); 00221 classifier= new CGMNPSVM(); 00222 SG_INFO("created GMNP-SVM object\n") ; 00223 } 00224 else if (strcmp(name,"LIBSVR")==0) 00225 { 00226 SG_UNREF(classifier); 00227 classifier= new CLibSVR(); 00228 SG_INFO("created SVRlibsvm object\n") ; 00229 } 00230 #ifdef HAVE_LAPACK 00231 else if (strcmp(name, "KERNELRIDGEREGRESSION")==0) 00232 { 00233 SG_UNREF(classifier); 00234 classifier=new CKernelRidgeRegression(krr_tau, ui->ui_kernel->get_kernel(), 00235 ui->ui_labels->get_train_labels()); 00236 SG_INFO("created KernelRidgeRegression object %p\n", classifier); 00237 } 00238 #endif //HAVE_LAPACK 00239 else if (strcmp(name,"PERCEPTRON")==0) 00240 { 00241 SG_UNREF(classifier); 00242 classifier= new CPerceptron(); 00243 SG_INFO("created Perceptron object\n") ; 00244 } 00245 #ifdef HAVE_LAPACK 00246 else if (strncmp(name,"LIBLINEAR",9)==0) 00247 { 00248 LIBLINEAR_SOLVER_TYPE st=L2R_LR; 00249 00250 if (strcmp(name,"LIBLINEAR_L2R_LR")==0) 00251 { 00252 st=L2R_LR; 00253 SG_INFO("created LibLinear l2 regularized logistic regression object\n") ; 00254 } 00255 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0) 00256 { 00257 st=L2R_L2LOSS_SVC_DUAL; 00258 SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") ; 00259 } 00260 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0) 00261 { 00262 st=L2R_L2LOSS_SVC; 00263 SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") ; 00264 } 00265 else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0) 00266 { 00267 st=L1R_L2LOSS_SVC; 00268 SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") ; 00269 } 00270 else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0) 00271 { 00272 st=L2R_L1LOSS_SVC_DUAL; 00273 SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") ; 00274 } 00275 else 00276 SG_ERROR("unknown liblinear type\n"); 00277 00278 SG_UNREF(classifier); 00279 classifier= new CLibLinear(st); 00280 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2); 00281 ((CLibLinear*) classifier)->set_epsilon(svm_epsilon); 00282 ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias); 00283 } 00284 else if (strcmp(name,"LDA")==0) 00285 { 00286 SG_UNREF(classifier); 00287 classifier= new CLDA(); 00288 SG_INFO("created LDA object\n") ; 00289 } 00290 #endif //HAVE_LAPACK 00291 #ifdef USE_CPLEX 00292 else if (strcmp(name,"LPM")==0) 00293 { 00294 SG_UNREF(classifier); 00295 classifier= new CLPM(); 00296 ((CLPM*) classifier)->set_C(svm_C1, svm_C2); 00297 ((CLPM*) classifier)->set_epsilon(svm_epsilon); 00298 ((CLPM*) classifier)->set_bias_enabled(svm_use_bias); 00299 ((CLPM*) classifier)->set_max_train_time(max_train_time); 00300 SG_INFO("created LPM object\n") ; 00301 } 00302 else if (strcmp(name,"LPBOOST")==0) 00303 { 00304 SG_UNREF(classifier); 00305 classifier= new CLPBoost(); 00306 ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2); 00307 ((CLPBoost*) classifier)->set_epsilon(svm_epsilon); 00308 ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias); 00309 ((CLPBoost*) classifier)->set_max_train_time(max_train_time); 00310 SG_INFO("created LPBoost object\n") ; 00311 } 00312 else if (strcmp(name,"SUBGRADIENTLPM")==0) 00313 { 00314 SG_UNREF(classifier); 00315 classifier= new CSubGradientLPM(); 00316 00317 ((CSubGradientLPM*) classifier)->set_bias_enabled(svm_use_bias); 00318 ((CSubGradientLPM*) classifier)->set_qpsize(svm_qpsize); 00319 ((CSubGradientLPM*) classifier)->set_qpsize_max(svm_max_qpsize); 00320 ((CSubGradientLPM*) classifier)->set_C(svm_C1, svm_C2); 00321 ((CSubGradientLPM*) classifier)->set_epsilon(svm_epsilon); 00322 ((CSubGradientLPM*) classifier)->set_max_train_time(max_train_time); 00323 SG_INFO("created Subgradient LPM object\n") ; 00324 } 00325 #endif //USE_CPLEX 00326 else if (strncmp(name,"KNN", strlen("KNN"))==0) 00327 { 00328 SG_UNREF(classifier); 00329 classifier= new CKNN(); 00330 SG_INFO("created KNN object\n") ; 00331 } 00332 else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0) 00333 { 00334 SG_UNREF(classifier); 00335 classifier= new CKMeans(); 00336 SG_INFO("created KMeans object\n") ; 00337 } 00338 else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0) 00339 { 00340 SG_UNREF(classifier); 00341 classifier= new CHierarchical(); 00342 SG_INFO("created Hierarchical clustering object\n") ; 00343 } 00344 else if (strcmp(name,"SVMLIN")==0) 00345 { 00346 SG_UNREF(classifier); 00347 classifier= new CSVMLin(); 00348 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2); 00349 ((CSVMLin*) classifier)->set_epsilon(svm_epsilon); 00350 ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias); 00351 SG_INFO("created SVMLin object\n") ; 00352 } 00353 else if (strcmp(name,"SUBGRADIENTSVM")==0) 00354 { 00355 SG_UNREF(classifier); 00356 classifier= new CSubGradientSVM(); 00357 00358 ((CSubGradientSVM*) classifier)->set_bias_enabled(svm_use_bias); 00359 ((CSubGradientSVM*) classifier)->set_qpsize(svm_qpsize); 00360 ((CSubGradientSVM*) classifier)->set_qpsize_max(svm_max_qpsize); 00361 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2); 00362 ((CSubGradientSVM*) classifier)->set_epsilon(svm_epsilon); 00363 ((CSubGradientSVM*) classifier)->set_max_train_time(max_train_time); 00364 SG_INFO("created Subgradient SVM object\n") ; 00365 } 00366 else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0) 00367 { 00368 SG_UNREF(classifier); 00369 classifier= new CWDSVMOcas(SVM_OCAS); 00370 00371 ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00372 ((CWDSVMOcas*) classifier)->set_degree(d, from_d); 00373 ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00374 ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00375 ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00376 SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) ; 00377 } 00378 else if (strcmp(name,"SVMOCAS")==0) 00379 { 00380 SG_UNREF(classifier); 00381 classifier= new CSVMOcas(SVM_OCAS); 00382 00383 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00384 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00385 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00386 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00387 SG_INFO("created SVM Ocas(OCAS) object\n") ; 00388 } 00389 else if (strcmp(name,"SVMSGD")==0) 00390 { 00391 SG_UNREF(classifier); 00392 classifier= new CSVMSGD(svm_C1); 00393 ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias); 00394 SG_INFO("created SVM SGD object\n") ; 00395 } 00396 else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0)) 00397 { 00398 SG_UNREF(classifier); 00399 classifier= new CSVMOcas(SVM_BMRM); 00400 00401 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00402 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon); 00403 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize); 00404 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias); 00405 SG_INFO("created SVM Ocas(BMRM/PERF) object\n") ; 00406 } 00407 else if (strcmp(name,"MKL_CLASSIFICATION")==0) 00408 { 00409 SG_UNREF(classifier); 00410 classifier= new CMKLClassification(); 00411 } 00412 else if (strcmp(name,"MKL_ONECLASS")==0) 00413 { 00414 SG_UNREF(classifier); 00415 classifier= new CMKLOneClass(); 00416 } 00417 else if (strcmp(name,"MKL_MULTICLASS")==0) 00418 { 00419 SG_UNREF(classifier); 00420 classifier= new CMKLMulticlass(); 00421 } 00422 else if (strcmp(name,"MKL_REGRESSION")==0) 00423 { 00424 SG_UNREF(classifier); 00425 classifier= new CMKLRegression(); 00426 } 00427 else 00428 { 00429 SG_ERROR("Unknown classifier %s.\n", name); 00430 return false; 00431 } 00432 SG_REF(classifier); 00433 00434 return (classifier!=NULL); 00435 } 00436 00437 bool CGUIClassifier::train_mkl_multiclass() 00438 { 00439 CMKLMulticlass* mkl= (CMKLMulticlass*) classifier; 00440 if (!mkl) 00441 SG_ERROR("No MKL available.\n"); 00442 00443 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00444 if (!trainlabels) 00445 SG_ERROR("No trainlabels available.\n"); 00446 00447 CKernel* kernel=ui->ui_kernel->get_kernel(); 00448 if (!kernel) 00449 SG_ERROR("No kernel available.\n"); 00450 00451 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00452 00453 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00454 SG_ERROR("Kernel not initialized / no train features available.\n"); 00455 00456 int32_t num_vec=kernel->get_num_vec_lhs(); 00457 if (trainlabels->get_num_labels() != num_vec) 00458 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00459 00460 SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon); 00461 00462 mkl->set_mkl_epsilon(svm_weight_epsilon); 00463 mkl->set_mkl_norm(mkl_norm); 00464 //mkl->set_max_num_mkliters(-1); 00465 mkl->set_solver_type(solver_type); 00466 mkl->set_bias_enabled(svm_use_bias); 00467 mkl->set_epsilon(svm_epsilon); 00468 mkl->set_max_train_time(max_train_time); 00469 mkl->set_tube_epsilon(svm_tube_epsilon); 00470 mkl->set_nu(svm_nu); 00471 mkl->set_C(svm_C1); 00472 mkl->set_qpsize(svm_qpsize); 00473 mkl->set_shrinking_enabled(svm_use_shrinking); 00474 mkl->set_linadd_enabled(svm_use_linadd); 00475 mkl->set_batch_computation_enabled(svm_use_batch_computation); 00476 00477 ((CKernelMulticlassMachine*) mkl)->set_labels(trainlabels); 00478 ((CKernelMulticlassMachine*) mkl)->set_kernel(kernel); 00479 00480 return mkl->train(); 00481 } 00482 00483 bool CGUIClassifier::train_mkl() 00484 { 00485 CMKL* mkl= (CMKL*) classifier; 00486 if (!mkl) 00487 SG_ERROR("No SVM available.\n"); 00488 00489 bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS); 00490 CLabels* trainlabels=NULL; 00491 if(!oneclass) 00492 trainlabels=ui->ui_labels->get_train_labels(); 00493 else 00494 SG_INFO("Training one class mkl.\n"); 00495 if (!trainlabels && !oneclass) 00496 SG_ERROR("No trainlabels available.\n"); 00497 00498 CKernel* kernel=ui->ui_kernel->get_kernel(); 00499 if (!kernel) 00500 SG_ERROR("No kernel available.\n"); 00501 00502 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00503 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00504 SG_ERROR("Kernel not initialized.\n"); 00505 00506 int32_t num_vec=kernel->get_num_vec_lhs(); 00507 if (!oneclass && trainlabels->get_num_labels() != num_vec) 00508 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00509 00510 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon); 00511 00512 if (constraint_generator) 00513 mkl->set_constraint_generator(constraint_generator); 00514 mkl->set_solver_type(solver_type); 00515 mkl->set_bias_enabled(svm_use_bias); 00516 mkl->set_epsilon(svm_epsilon); 00517 mkl->set_max_train_time(max_train_time); 00518 mkl->set_tube_epsilon(svm_tube_epsilon); 00519 mkl->set_nu(svm_nu); 00520 mkl->set_C(svm_C1, svm_C2); 00521 mkl->set_qpsize(svm_qpsize); 00522 mkl->set_shrinking_enabled(svm_use_shrinking); 00523 mkl->set_linadd_enabled(svm_use_linadd); 00524 mkl->set_batch_computation_enabled(svm_use_batch_computation); 00525 mkl->set_mkl_epsilon(svm_weight_epsilon); 00526 mkl->set_mkl_norm(mkl_norm); 00527 mkl->set_elasticnet_lambda(ent_lambda); 00528 mkl->set_mkl_block_norm(mkl_block_norm); 00529 mkl->set_C_mkl(C_mkl); 00530 mkl->set_interleaved_optimization_enabled(mkl_use_interleaved); 00531 00532 if (svm_do_auc_maximization) 00533 { 00534 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel); 00535 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels); 00536 ((CKernelMachine*) mkl)->set_labels(auc_labels); 00537 ((CKernelMachine*) mkl)->set_kernel(auc_kernel); 00538 SG_UNREF(auc_labels); 00539 } 00540 else 00541 { 00542 if(!oneclass) 00543 ((CKernelMachine*) mkl)->set_labels(trainlabels); 00544 ((CKernelMachine*) mkl)->set_kernel(kernel); 00545 } 00546 00547 bool result=mkl->train(); 00548 00549 return result; 00550 } 00551 00552 bool CGUIClassifier::train_svm() 00553 { 00554 EMachineType type = classifier->get_classifier_type(); 00555 00556 if (!classifier) 00557 SG_ERROR("No SVM available.\n"); 00558 00559 bool oneclass=(type==CT_LIBSVMONECLASS); 00560 CLabels* trainlabels=NULL; 00561 if(!oneclass) 00562 trainlabels=ui->ui_labels->get_train_labels(); 00563 else 00564 SG_INFO("Training one class svm.\n"); 00565 if (!trainlabels && !oneclass) 00566 SG_ERROR("No trainlabels available.\n"); 00567 00568 CKernel* kernel=ui->ui_kernel->get_kernel(); 00569 if (!kernel) 00570 SG_ERROR("No kernel available.\n"); 00571 00572 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00573 00574 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00575 SG_ERROR("Kernel not initialized / no train features available.\n"); 00576 00577 int32_t num_vec=kernel->get_num_vec_lhs(); 00578 if (!oneclass && trainlabels->get_num_labels() != num_vec) 00579 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00580 00581 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon); 00582 00583 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS) 00584 { 00585 CMulticlassSVM* svm = (CMulticlassSVM*)classifier; 00586 svm->set_solver_type(solver_type); 00587 svm->set_bias_enabled(svm_use_bias); 00588 svm->set_epsilon(svm_epsilon); 00589 svm->set_max_train_time(max_train_time); 00590 svm->set_tube_epsilon(svm_tube_epsilon); 00591 svm->set_nu(svm_nu); 00592 svm->set_C(svm_C1); 00593 svm->set_qpsize(svm_qpsize); 00594 svm->set_shrinking_enabled(svm_use_shrinking); 00595 svm->set_linadd_enabled(svm_use_linadd); 00596 svm->set_batch_computation_enabled(svm_use_batch_computation); 00597 } 00598 else 00599 { 00600 CSVM* svm = (CSVM*)classifier; 00601 svm->set_solver_type(solver_type); 00602 svm->set_bias_enabled(svm_use_bias); 00603 svm->set_epsilon(svm_epsilon); 00604 svm->set_max_train_time(max_train_time); 00605 svm->set_tube_epsilon(svm_tube_epsilon); 00606 svm->set_nu(svm_nu); 00607 svm->set_C(svm_C1, svm_C2); 00608 svm->set_qpsize(svm_qpsize); 00609 svm->set_shrinking_enabled(svm_use_shrinking); 00610 svm->set_linadd_enabled(svm_use_linadd); 00611 svm->set_batch_computation_enabled(svm_use_batch_computation); 00612 } 00613 00614 if (type==CT_MKLMULTICLASS) 00615 { 00616 ((CMKLMulticlass *)classifier)->set_mkl_epsilon(svm_weight_epsilon); 00617 } 00618 00619 if (svm_do_auc_maximization) 00620 { 00621 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel); 00622 CLabels* auc_labels = auc_kernel->setup_auc_maximization(trainlabels); 00623 ((CKernelMachine*)classifier)->set_labels(auc_labels); 00624 ((CKernelMachine*)classifier)->set_kernel(auc_kernel); 00625 SG_UNREF(auc_labels); 00626 } 00627 else 00628 { 00629 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS) 00630 { 00631 ((CKernelMulticlassMachine*)classifier)->set_labels(trainlabels); 00632 ((CKernelMulticlassMachine*)classifier)->set_kernel(kernel); 00633 } 00634 else 00635 { 00636 if(!oneclass) 00637 ((CKernelMachine*)classifier)->set_labels(trainlabels); 00638 00639 ((CKernelMachine*)classifier)->set_kernel(kernel); 00640 } 00641 } 00642 00643 bool result = classifier->train(); 00644 00645 return result; 00646 } 00647 00648 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter) 00649 { 00650 bool result=false; 00651 CDistance* distance=ui->ui_distance->get_distance(); 00652 00653 if (!distance) 00654 SG_ERROR("No distance available\n"); 00655 00656 if (!ui->ui_distance->init_distance("TRAIN")) 00657 SG_ERROR("Initializing distance with train features failed.\n"); 00658 00659 ((CDistanceMachine*) classifier)->set_distance(distance); 00660 00661 EMachineType type=classifier->get_classifier_type(); 00662 switch (type) 00663 { 00664 case CT_KMEANS: 00665 { 00666 ((CKMeans*) classifier)->set_k(k); 00667 ((CKMeans*) classifier)->set_max_iter(max_iter); 00668 result=((CKMeans*) classifier)->train(); 00669 break; 00670 } 00671 case CT_HIERARCHICAL: 00672 { 00673 ((CHierarchical*) classifier)->set_merges(k); 00674 result=((CHierarchical*) classifier)->train(); 00675 break; 00676 } 00677 default: 00678 SG_ERROR("Unknown clustering type %d\n", type); 00679 } 00680 00681 return result; 00682 } 00683 00684 bool CGUIClassifier::train_knn(int32_t k) 00685 { 00686 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00687 CDistance* distance=ui->ui_distance->get_distance(); 00688 00689 bool result=false; 00690 00691 if (trainlabels) 00692 { 00693 if (distance) 00694 { 00695 if (!ui->ui_distance->init_distance("TRAIN")) 00696 SG_ERROR("Initializing distance with train features failed.\n"); 00697 ((CKNN*) classifier)->set_labels(trainlabels); 00698 ((CKNN*) classifier)->set_distance(distance); 00699 ((CKNN*) classifier)->set_k(k); 00700 result=((CKNN*) classifier)->train(); 00701 } 00702 else 00703 SG_ERROR("No distance available.\n"); 00704 } 00705 else 00706 SG_ERROR("No labels available\n"); 00707 00708 return result; 00709 } 00710 00711 bool CGUIClassifier::train_krr() 00712 { 00713 #ifdef HAVE_LAPACK 00714 CKernelRidgeRegression* krr= (CKernelRidgeRegression*) classifier; 00715 if (!krr) 00716 SG_ERROR("No SVM available.\n"); 00717 00718 CLabels* trainlabels=NULL; 00719 trainlabels=ui->ui_labels->get_train_labels(); 00720 if (!trainlabels) 00721 SG_ERROR("No trainlabels available.\n"); 00722 00723 CKernel* kernel=ui->ui_kernel->get_kernel(); 00724 if (!kernel) 00725 SG_ERROR("No kernel available.\n"); 00726 00727 bool success=ui->ui_kernel->init_kernel("TRAIN"); 00728 00729 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features()) 00730 SG_ERROR("Kernel not initialized / no train features available.\n"); 00731 00732 int32_t num_vec=kernel->get_num_vec_lhs(); 00733 if (trainlabels->get_num_labels() != num_vec) 00734 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec); 00735 00736 00737 // Set training labels and kernel 00738 krr->set_labels(trainlabels); 00739 krr->set_kernel(kernel); 00740 00741 bool result=krr->train(); 00742 return result; 00743 #else 00744 return false; 00745 #endif 00746 } 00747 00748 bool CGUIClassifier::train_linear(float64_t gamma) 00749 { 00750 ASSERT(classifier); 00751 EMachineType ctype = classifier->get_classifier_type(); 00752 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 00753 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00754 bool result=false; 00755 00756 if (!trainfeatures) 00757 SG_ERROR("No trainfeatures available.\n"); 00758 00759 if (!trainfeatures->has_property(FP_DOT)) 00760 SG_ERROR("Trainfeatures not based on DotFeatures.\n"); 00761 00762 if (!trainlabels) 00763 SG_ERROR("No labels available\n"); 00764 00765 if (ctype==CT_PERCEPTRON) 00766 { 00767 ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate); 00768 ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter); 00769 } 00770 00771 #ifdef HAVE_LAPACK 00772 if (ctype==CT_LDA) 00773 { 00774 if (trainfeatures->get_feature_type()!=F_DREAL || 00775 trainfeatures->get_feature_class()!=C_DENSE) 00776 SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n"); 00777 ((CLDA*) classifier)->set_gamma(gamma); 00778 } 00779 #endif 00780 00781 if (ctype==CT_SVMOCAS) 00782 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2); 00783 #ifdef HAVE_LAPACK 00784 else if (ctype==CT_LIBLINEAR) 00785 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2); 00786 #endif 00787 else if (ctype==CT_SVMLIN) 00788 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2); 00789 else if (ctype==CT_SVMSGD) 00790 ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2); 00791 else if (ctype==CT_SUBGRADIENTSVM) 00792 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2); 00793 00794 else if (ctype==CT_LPM || ctype==CT_LPBOOST) 00795 { 00796 if (trainfeatures->get_feature_class()!=C_SPARSE || 00797 trainfeatures->get_feature_type()!=F_DREAL) 00798 SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n"); 00799 } 00800 00801 ((CLinearMachine*) classifier)->set_labels(trainlabels); 00802 ((CLinearMachine*) classifier)->set_features((CDenseFeatures<float64_t>*) trainfeatures); 00803 result=((CLinearMachine*) classifier)->train(); 00804 00805 return result; 00806 } 00807 00808 bool CGUIClassifier::train_wdocas() 00809 { 00810 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 00811 CLabels* trainlabels=ui->ui_labels->get_train_labels(); 00812 00813 bool result=false; 00814 00815 if (!trainfeatures) 00816 SG_ERROR("No trainfeatures available.\n"); 00817 00818 if (trainfeatures->get_feature_class()!=C_STRING || 00819 trainfeatures->get_feature_type()!=F_BYTE ) 00820 SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n"); 00821 00822 if (!trainlabels) 00823 SG_ERROR("No labels available.\n"); 00824 00825 ((CWDSVMOcas*) classifier)->set_labels(trainlabels); 00826 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures); 00827 result=((CWDSVMOcas*) classifier)->train(); 00828 00829 return result; 00830 } 00831 00832 bool CGUIClassifier::load(char* filename, char* type) 00833 { 00834 bool result=false; 00835 00836 if (new_classifier(type)) 00837 { 00838 FILE* model_file=fopen(filename, "r"); 00839 CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(model_file,'r'); 00840 00841 if (ascii_file) 00842 { 00843 if (classifier && classifier->load_serializable(ascii_file)) 00844 { 00845 SG_DEBUG("file successfully read.\n"); 00846 result=true; 00847 } 00848 else 00849 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename); 00850 00851 delete ascii_file; 00852 } 00853 else 00854 SG_ERROR("Opening file %s failed.\n", filename); 00855 00856 return result; 00857 } 00858 else 00859 SG_ERROR("Type %s of SVM/Classifier unknown.\n", type); 00860 00861 return false; 00862 } 00863 00864 bool CGUIClassifier::save(char* param) 00865 { 00866 bool result=false; 00867 param=SGIO::skip_spaces(param); 00868 00869 if (classifier) 00870 { 00871 FILE* file=fopen(param, "w"); 00872 CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(file,'w'); 00873 00874 if ((!ascii_file) || (!classifier->save_serializable(ascii_file))) 00875 printf("writing to file %s failed!\n", param); 00876 else 00877 { 00878 printf("successfully written classifier into \"%s\" !\n", param); 00879 result=true; 00880 } 00881 00882 if (ascii_file) 00883 delete ascii_file; 00884 } 00885 else 00886 SG_ERROR("create classifier first\n"); 00887 00888 return result; 00889 } 00890 00891 bool CGUIClassifier::set_perceptron_parameters( 00892 float64_t learnrate, int32_t maxiter) 00893 { 00894 if (learnrate<=0) 00895 perceptron_learnrate=0.01; 00896 else 00897 perceptron_learnrate=learnrate; 00898 00899 if (maxiter<=0) 00900 perceptron_maxiter=1000; 00901 else 00902 perceptron_maxiter=maxiter; 00903 SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter); 00904 00905 return true; 00906 } 00907 00908 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon) 00909 { 00910 if (epsilon<0) 00911 svm_epsilon=1e-4; 00912 else 00913 svm_epsilon=epsilon; 00914 SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon); 00915 00916 return true; 00917 } 00918 00919 bool CGUIClassifier::set_max_train_time(float64_t max) 00920 { 00921 if (max>0) 00922 { 00923 max_train_time=max; 00924 SG_INFO("Set to max_train_time=%f.\n", max_train_time); 00925 } 00926 else 00927 SG_INFO("Disabling max_train_time.\n"); 00928 00929 return true; 00930 } 00931 00932 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon) 00933 { 00934 if (!classifier) 00935 SG_ERROR("No regression method allocated\n"); 00936 00937 if (classifier->get_classifier_type() != CT_LIBSVR && 00938 classifier->get_classifier_type() != CT_SVRLIGHT && 00939 classifier->get_classifier_type() != CT_MKLREGRESSION ) 00940 { 00941 SG_ERROR("Underlying method not capable of SV-regression\n"); 00942 } 00943 00944 if (tube_epsilon<0) 00945 svm_tube_epsilon=1e-2; 00946 svm_tube_epsilon=tube_epsilon; 00947 00948 ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon); 00949 SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon); 00950 00951 return true; 00952 } 00953 00954 bool CGUIClassifier::set_svm_nu(float64_t nu) 00955 { 00956 if (nu<0 || nu>1) 00957 nu=0.5; 00958 00959 svm_nu=nu; 00960 SG_INFO("Set to nu=%f.\n", svm_nu); 00961 00962 return true; 00963 } 00964 00965 bool CGUIClassifier::set_svm_mkl_parameters( 00966 float64_t weight_epsilon, float64_t C, float64_t norm) 00967 { 00968 if (weight_epsilon<0) 00969 weight_epsilon=1e-4; 00970 if (C<0) 00971 C=0; 00972 if (norm<0) 00973 SG_ERROR("MKL norm >= 0\n"); 00974 00975 svm_weight_epsilon=weight_epsilon; 00976 C_mkl=C; 00977 mkl_norm=norm; 00978 00979 SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon); 00980 SG_INFO("Set to C_mkl=%f.\n", C_mkl); 00981 SG_INFO("Set to mkl_norm=%f.\n", mkl_norm); 00982 00983 return true; 00984 } 00985 00986 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda) 00987 { 00988 if (lambda<0 || lambda>1) 00989 SG_ERROR("0 <= ent_lambda <= 1\n"); 00990 00991 ent_lambda = lambda; 00992 return true; 00993 } 00994 00995 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm) 00996 { 00997 if (mkl_bnorm<1) 00998 SG_ERROR("1 <= mkl_block_norm <= inf\n"); 00999 01000 mkl_block_norm=mkl_bnorm; 01001 return true; 01002 } 01003 01004 01005 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2) 01006 { 01007 if (C1<0) 01008 svm_C1=1.0; 01009 else 01010 svm_C1=C1; 01011 01012 if (C2<0) 01013 svm_C2=svm_C1; 01014 else 01015 svm_C2=C2; 01016 01017 SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2); 01018 01019 return true; 01020 } 01021 01022 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize) 01023 { 01024 if (qpsize<2) 01025 svm_qpsize=41; 01026 else 01027 svm_qpsize=qpsize; 01028 SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize); 01029 01030 return true; 01031 } 01032 01033 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize) 01034 { 01035 if (max_qpsize<50) 01036 svm_max_qpsize=50; 01037 else 01038 svm_max_qpsize=max_qpsize; 01039 SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize); 01040 01041 return true; 01042 } 01043 01044 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize) 01045 { 01046 if (svm_bufsize<0) 01047 svm_bufsize=3000; 01048 else 01049 svm_bufsize=bufsize; 01050 SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize); 01051 01052 return true ; 01053 } 01054 01055 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled) 01056 { 01057 svm_use_shrinking=enabled; 01058 if (svm_use_shrinking) 01059 SG_INFO("Enabling shrinking optimization.\n"); 01060 else 01061 SG_INFO("Disabling shrinking optimization.\n"); 01062 01063 return true; 01064 } 01065 01066 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled) 01067 { 01068 svm_use_batch_computation=enabled; 01069 if (svm_use_batch_computation) 01070 SG_INFO("Enabling batch computation.\n"); 01071 else 01072 SG_INFO("Disabling batch computation.\n"); 01073 01074 return true; 01075 } 01076 01077 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled) 01078 { 01079 svm_use_linadd=enabled; 01080 if (svm_use_linadd) 01081 SG_INFO("Enabling LINADD optimization.\n"); 01082 else 01083 SG_INFO("Disabling LINADD optimization.\n"); 01084 01085 return true; 01086 } 01087 01088 bool CGUIClassifier::set_svm_bias_enabled(bool enabled) 01089 { 01090 svm_use_bias=enabled; 01091 if (svm_use_bias) 01092 SG_INFO("Enabling svm bias.\n"); 01093 else 01094 SG_INFO("Disabling svm bias.\n"); 01095 01096 return true; 01097 } 01098 01099 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled) 01100 { 01101 mkl_use_interleaved=enabled; 01102 if (mkl_use_interleaved) 01103 SG_INFO("Enabling mkl interleaved optimization.\n"); 01104 else 01105 SG_INFO("Disabling mkl interleaved optimization.\n"); 01106 01107 return true; 01108 } 01109 01110 bool CGUIClassifier::set_do_auc_maximization(bool do_auc) 01111 { 01112 svm_do_auc_maximization=do_auc; 01113 01114 if (svm_do_auc_maximization) 01115 SG_INFO("Enabling AUC maximization.\n"); 01116 else 01117 SG_INFO("Disabling AUC maximization.\n"); 01118 01119 return true; 01120 } 01121 01122 01123 CLabels* CGUIClassifier::classify() 01124 { 01125 ASSERT(classifier); 01126 01127 switch (classifier->get_classifier_type()) 01128 { 01129 case CT_LIGHT: 01130 case CT_LIGHTONECLASS: 01131 case CT_LIBSVM: 01132 case CT_SCATTERSVM: 01133 case CT_MPD: 01134 case CT_GPBT: 01135 case CT_CPLEXSVM: 01136 case CT_GMNPSVM: 01137 case CT_GNPPSVM: 01138 case CT_LIBSVR: 01139 case CT_LIBSVMMULTICLASS: 01140 case CT_LIBSVMONECLASS: 01141 case CT_SVRLIGHT: 01142 case CT_MKLCLASSIFICATION: 01143 case CT_MKLMULTICLASS: 01144 case CT_MKLREGRESSION: 01145 case CT_MKLONECLASS: 01146 case CT_KERNELRIDGEREGRESSION: 01147 return classify_kernelmachine(); 01148 case CT_KNN: 01149 return classify_distancemachine(); 01150 case CT_PERCEPTRON: 01151 case CT_LDA: 01152 return classify_linear(); 01153 case CT_SVMLIN: 01154 case CT_SVMPERF: 01155 case CT_SUBGRADIENTSVM: 01156 case CT_SVMOCAS: 01157 case CT_SVMSGD: 01158 case CT_LPM: 01159 case CT_LPBOOST: 01160 case CT_SUBGRADIENTLPM: 01161 case CT_LIBLINEAR: 01162 return classify_linear(); 01163 case CT_WDSVMOCAS: 01164 return classify_byte_linear(); 01165 default: 01166 SG_ERROR("unknown classifier type\n"); 01167 break; 01168 }; 01169 01170 return NULL; 01171 } 01172 01173 CLabels* CGUIClassifier::classify_kernelmachine() 01174 { 01175 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01176 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01177 01178 if (!classifier) 01179 SG_ERROR("No kernelmachine available.\n"); 01180 01181 bool success=true; 01182 01183 if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM) 01184 { 01185 if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED 01186 && ( !trainfeatures || !testfeatures )) 01187 { 01188 SG_DEBUG("skipping initialisation of combined kernel " 01189 "as train/test features are unavailable\n"); 01190 } 01191 else 01192 { 01193 if (!trainfeatures) 01194 SG_ERROR("No training features available.\n"); 01195 if (!testfeatures) 01196 SG_ERROR("No test features available.\n"); 01197 01198 success=ui->ui_kernel->init_kernel("TEST"); 01199 } 01200 } 01201 01202 if (!success || !ui->ui_kernel->is_initialized()) 01203 SG_ERROR("Kernel not initialized.\n"); 01204 01205 EMachineType type = classifier->get_classifier_type(); 01206 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS || 01207 type==CT_MKLMULTICLASS) 01208 { 01209 CKernelMulticlassMachine* kmcm = (CKernelMulticlassMachine*) classifier; 01210 kmcm->set_kernel(ui->ui_kernel->get_kernel()); 01211 } 01212 else 01213 { 01214 CKernelMachine* km=(CKernelMachine*) classifier; 01215 km->set_kernel(ui->ui_kernel->get_kernel()); 01216 km->set_batch_computation_enabled(svm_use_batch_computation); 01217 } 01218 01219 SG_INFO("Starting kernel machine testing.\n"); 01220 return classifier->apply(); 01221 } 01222 01223 bool CGUIClassifier::get_trained_classifier( 01224 float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias, 01225 int32_t& brows, int32_t& bcols, 01226 int32_t idx) // which SVM for Multiclass 01227 { 01228 ASSERT(classifier); 01229 01230 switch (classifier->get_classifier_type()) 01231 { 01232 case CT_SCATTERSVM: 01233 case CT_GNPPSVM: 01234 case CT_LIBSVMMULTICLASS: 01235 case CT_LIGHT: 01236 case CT_LIGHTONECLASS: 01237 case CT_LIBSVM: 01238 case CT_MPD: 01239 case CT_GPBT: 01240 case CT_CPLEXSVM: 01241 case CT_GMNPSVM: 01242 case CT_LIBSVR: 01243 case CT_LIBSVMONECLASS: 01244 case CT_SVRLIGHT: 01245 case CT_MKLCLASSIFICATION: 01246 case CT_MKLREGRESSION: 01247 case CT_MKLONECLASS: 01248 case CT_MKLMULTICLASS: 01249 case CT_KERNELRIDGEREGRESSION: 01250 return get_svm(weights, rows, cols, bias, brows, bcols, idx); 01251 break; 01252 case CT_PERCEPTRON: 01253 case CT_LDA: 01254 case CT_LPM: 01255 case CT_LPBOOST: 01256 case CT_SUBGRADIENTLPM: 01257 case CT_SVMOCAS: 01258 case CT_SVMSGD: 01259 case CT_SVMLIN: 01260 case CT_SVMPERF: 01261 case CT_SUBGRADIENTSVM: 01262 case CT_LIBLINEAR: 01263 return get_linear(weights, rows, cols, bias, brows, bcols); 01264 break; 01265 case CT_KMEANS: 01266 case CT_HIERARCHICAL: 01267 return get_clustering(weights, rows, cols, bias, brows, bcols); 01268 break; 01269 case CT_KNN: 01270 SG_ERROR("not implemented"); 01271 break; 01272 default: 01273 SG_ERROR("unknown classifier type\n"); 01274 break; 01275 }; 01276 return false; 01277 } 01278 01279 01280 int32_t CGUIClassifier::get_num_svms() 01281 { 01282 ASSERT(classifier); 01283 return ((CMulticlassSVM*) classifier)->get_num_machines(); 01284 } 01285 01286 bool CGUIClassifier::get_svm( 01287 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias, 01288 int32_t& brows, int32_t& bcols, int32_t idx) 01289 { 01290 CSVM* svm=(CSVM*) classifier; 01291 01292 if (idx>-1) // should be MulticlassSVM 01293 svm=((CMulticlassSVM*) svm)->get_svm(idx); 01294 01295 if (svm) 01296 { 01297 brows=1; 01298 bcols=1; 01299 bias=SG_MALLOC(float64_t, 1); 01300 *bias=svm->get_bias(); 01301 01302 rows=svm->get_num_support_vectors(); 01303 cols=2; 01304 weights=SG_MALLOC(float64_t, rows*cols); 01305 01306 for (int32_t i=0; i<rows; i++) 01307 { 01308 weights[i]=svm->get_alpha(i); 01309 weights[i+rows]=svm->get_support_vector(i); 01310 } 01311 01312 return true; 01313 } 01314 01315 return false; 01316 } 01317 01318 bool CGUIClassifier::get_clustering( 01319 float64_t* ¢ers, int32_t& rows, int32_t& cols, float64_t*& radi, 01320 int32_t& brows, int32_t& bcols) 01321 { 01322 if (!classifier) 01323 return false; 01324 01325 switch (classifier->get_classifier_type()) 01326 { 01327 case CT_KMEANS: 01328 { 01329 CKMeans* clustering=(CKMeans*) classifier; 01330 01331 bcols=1; 01332 SGVector<float64_t> r=clustering->get_radiuses(); 01333 brows=r.vlen; 01334 radi=SG_MALLOC(float64_t, brows); 01335 memcpy(radi, r.vector, sizeof(float64_t)*brows); 01336 01337 cols=1; 01338 SGMatrix<float64_t> c=clustering->get_cluster_centers(); 01339 rows=c.num_rows; 01340 cols=c.num_cols; 01341 centers=SG_MALLOC(float64_t, rows*cols); 01342 memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols); 01343 break; 01344 } 01345 01346 case CT_HIERARCHICAL: 01347 { 01348 CHierarchical* clustering=(CHierarchical*) classifier; 01349 01350 // radi == merge_distances, centers == pairs 01351 bcols=1; 01352 SGVector<float64_t> r=clustering->get_merge_distances(); 01353 brows=r.vlen; 01354 radi=SG_MALLOC(float64_t, brows); 01355 memcpy(radi, r.vector, sizeof(float64_t)*brows); 01356 01357 SGMatrix<int32_t> p=clustering->get_cluster_pairs(); 01358 rows=p.num_rows; 01359 cols=p.num_cols; 01360 centers=SG_MALLOC(float64_t, rows*cols); 01361 for (int32_t i=0; i<rows*cols; i++) 01362 centers[i]=(float64_t) p.matrix[i]; 01363 01364 break; 01365 } 01366 01367 default: 01368 SG_ERROR("internal error - unknown clustering type\n"); 01369 } 01370 01371 return true; 01372 } 01373 01374 bool CGUIClassifier::get_linear( 01375 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias, 01376 int32_t& brows, int32_t& bcols) 01377 { 01378 CLinearMachine* linear=(CLinearMachine*) classifier; 01379 01380 if (!linear) 01381 return false; 01382 01383 bias=SG_MALLOC(float64_t, 1); 01384 *bias=linear->get_bias(); 01385 brows=1; 01386 bcols=1; 01387 01388 SGVector<float64_t> w=linear->get_w(); 01389 cols=1; 01390 rows=w.vlen; 01391 01392 weights= SG_MALLOC(float64_t, w.vlen); 01393 memcpy(weights, w.vector, sizeof(float64_t)*w.vlen); 01394 01395 return true; 01396 } 01397 01398 CLabels* CGUIClassifier::classify_distancemachine() 01399 { 01400 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01401 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01402 01403 if (!classifier) 01404 { 01405 SG_ERROR("no kernelmachine available\n") ; 01406 return NULL; 01407 } 01408 if (!trainfeatures) 01409 { 01410 SG_ERROR("no training features available\n") ; 01411 return NULL; 01412 } 01413 01414 if (!testfeatures) 01415 { 01416 SG_ERROR("no test features available\n") ; 01417 return NULL; 01418 } 01419 01420 bool success=ui->ui_distance->init_distance("TEST"); 01421 01422 if (!success || !ui->ui_distance->is_initialized()) 01423 { 01424 SG_ERROR("distance not initialized\n") ; 01425 return NULL; 01426 } 01427 01428 ((CDistanceMachine*) classifier)->set_distance( 01429 ui->ui_distance->get_distance()); 01430 SG_INFO("starting distance machine testing\n") ; 01431 return classifier->apply(); 01432 } 01433 01434 01435 CLabels* CGUIClassifier::classify_linear() 01436 { 01437 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01438 01439 if (!classifier) 01440 { 01441 SG_ERROR("no classifier available\n") ; 01442 return NULL; 01443 } 01444 if (!testfeatures) 01445 { 01446 SG_ERROR("no test features available\n") ; 01447 return NULL; 01448 } 01449 if (!(testfeatures->has_property(FP_DOT))) 01450 { 01451 SG_ERROR("testfeatures not based on DotFeatures\n") ; 01452 return NULL; 01453 } 01454 01455 ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures); 01456 SG_INFO("starting linear classifier testing\n") ; 01457 return classifier->apply(); 01458 } 01459 01460 CLabels* CGUIClassifier::classify_byte_linear() 01461 { 01462 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01463 01464 if (!classifier) 01465 { 01466 SG_ERROR("no svm available\n") ; 01467 return NULL; 01468 } 01469 if (!testfeatures) 01470 { 01471 SG_ERROR("no test features available\n") ; 01472 return NULL; 01473 } 01474 if (testfeatures->get_feature_class() != C_STRING || 01475 testfeatures->get_feature_type() != F_BYTE ) 01476 { 01477 SG_ERROR("testfeatures not of class STRING type BYTE\n") ; 01478 return NULL; 01479 } 01480 01481 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures); 01482 SG_INFO("starting linear classifier testing\n") ; 01483 return classifier->apply(); 01484 } 01485 01486 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result) 01487 { 01488 CFeatures* trainfeatures=ui->ui_features->get_train_features(); 01489 CFeatures* testfeatures=ui->ui_features->get_test_features(); 01490 01491 if (!classifier) 01492 { 01493 SG_ERROR("no svm available\n") ; 01494 return false; 01495 } 01496 01497 if (!ui->ui_kernel->is_initialized()) 01498 { 01499 SG_ERROR("kernel not initialized\n") ; 01500 return false; 01501 } 01502 01503 if (!ui->ui_kernel->get_kernel() || 01504 !ui->ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM) 01505 { 01506 if (!trainfeatures) 01507 { 01508 SG_ERROR("no training features available\n") ; 01509 return false; 01510 } 01511 01512 if (!testfeatures) 01513 { 01514 SG_ERROR("no test features available\n") ; 01515 return false; 01516 } 01517 } 01518 01519 ((CKernelMachine*) classifier)->set_kernel( 01520 ui->ui_kernel->get_kernel()); 01521 01522 result=((CKernelMachine*)classifier)->apply_one(idx); 01523 return true ; 01524 } 01525 01526 01527 bool CGUIClassifier::set_krr_tau(float64_t tau) 01528 { 01529 #ifdef HAVE_LAPACK 01530 krr_tau=tau; 01531 ((CKernelRidgeRegression*) classifier)->set_tau(krr_tau); 01532 SG_INFO("Set to krr_tau=%f.\n", krr_tau); 01533 01534 return true; 01535 #else 01536 return false; 01537 #endif 01538 } 01539 01540 bool CGUIClassifier::set_solver(char* solver) 01541 { 01542 ESolverType s=ST_AUTO; 01543 01544 if (strncmp(solver,"NEWTON", 6)==0) 01545 { 01546 SG_INFO("Using NEWTON solver.\n"); 01547 s=ST_NEWTON; 01548 } 01549 else if (strncmp(solver,"DIRECT", 6)==0) 01550 { 01551 SG_INFO("Using DIRECT solver\n"); 01552 s=ST_DIRECT; 01553 } 01554 else if (strncmp(solver,"BLOCK_NORM", 9)==0) 01555 { 01556 SG_INFO("Using BLOCK_NORM solver\n"); 01557 s=ST_BLOCK_NORM; 01558 } 01559 else if (strncmp(solver,"ELASTICNET", 10)==0) 01560 { 01561 SG_INFO("Using ELASTICNET solver\n"); 01562 s=ST_ELASTICNET; 01563 } 01564 else if (strncmp(solver,"AUTO", 4)==0) 01565 { 01566 SG_INFO("Automagically determining solver.\n"); 01567 s=ST_AUTO; 01568 } 01569 #ifdef USE_CPLEX 01570 else if (strncmp(solver, "CPLEX", 5)==0) 01571 { 01572 SG_INFO("USING CPLEX METHOD selected\n"); 01573 s=ST_CPLEX; 01574 } 01575 #endif 01576 #ifdef USE_GLPK 01577 else if (strncmp(solver,"GLPK", 4)==0) 01578 { 01579 SG_INFO("Using GLPK solver\n"); 01580 s=ST_GLPK; 01581 } 01582 #endif 01583 else 01584 SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver); 01585 01586 01587 solver_type=s; 01588 return true; 01589 } 01590 01591 bool CGUIClassifier::set_constraint_generator(char* name) 01592 { 01593 if (strcmp(name,"LIBSVM_ONECLASS")==0) 01594 { 01595 SG_UNREF(constraint_generator); 01596 constraint_generator = new CLibSVMOneClass(); 01597 SG_INFO("created SVMlibsvm object for oneclass\n"); 01598 } 01599 else if (strcmp(name,"LIBSVM_NU")==0) 01600 { 01601 SG_UNREF(constraint_generator); 01602 constraint_generator= new CLibSVM(LIBSVM_NU_SVC); 01603 SG_INFO("created SVMlibsvm object\n") ; 01604 } 01605 else if (strcmp(name,"LIBSVM")==0) 01606 { 01607 SG_UNREF(constraint_generator); 01608 constraint_generator= new CLibSVM(); 01609 SG_INFO("created SVMlibsvm object\n") ; 01610 } 01611 #ifdef USE_SVMLIGHT 01612 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0)) 01613 { 01614 SG_UNREF(constraint_generator); 01615 constraint_generator= new CSVMLight(); 01616 SG_INFO("created SVMLight object\n") ; 01617 } 01618 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0) 01619 { 01620 SG_UNREF(constraint_generator); 01621 constraint_generator= new CSVMLightOneClass(); 01622 SG_INFO("created SVMLightOneClass object\n") ; 01623 } 01624 else if (strcmp(name,"SVRLIGHT")==0) 01625 { 01626 SG_UNREF(constraint_generator); 01627 constraint_generator= new CSVRLight(); 01628 SG_INFO("created SVRLight object\n") ; 01629 } 01630 #endif //USE_SVMLIGHT 01631 else if (strcmp(name,"GPBTSVM")==0) 01632 { 01633 SG_UNREF(constraint_generator); 01634 constraint_generator= new CGPBTSVM(); 01635 SG_INFO("created GPBT-SVM object\n") ; 01636 } 01637 else if (strcmp(name,"MPDSVM")==0) 01638 { 01639 SG_UNREF(constraint_generator); 01640 constraint_generator= new CMPDSVM(); 01641 SG_INFO("created MPD-SVM object\n") ; 01642 } 01643 else if (strcmp(name,"GNPPSVM")==0) 01644 { 01645 SG_UNREF(constraint_generator); 01646 constraint_generator= new CGNPPSVM(); 01647 SG_INFO("created GNPP-SVM object\n") ; 01648 } 01649 else if (strcmp(name,"LIBSVR")==0) 01650 { 01651 SG_UNREF(constraint_generator); 01652 constraint_generator= new CLibSVR(); 01653 SG_INFO("created SVRlibsvm object\n") ; 01654 } 01655 else 01656 { 01657 SG_ERROR("Unknown SV-classifier %s.\n", name); 01658 return false; 01659 } 01660 SG_REF(constraint_generator); 01661 01662 return (constraint_generator!=NULL); 01663 }