SHOGUN
v2.0.0
|
00001 /***********************************************************************/ 00002 /* */ 00003 /* SVMLight.cpp */ 00004 /* */ 00005 /* Author: Thorsten Joachims */ 00006 /* Date: 19.07.99 */ 00007 /* */ 00008 /* Copyright (c) 1999 Universitaet Dortmund - All rights reserved */ 00009 /* */ 00010 /* This software is available for non-commercial use only. It must */ 00011 /* not be modified and distributed without prior permission of the */ 00012 /* author. The author is not responsible for implications from the */ 00013 /* use of this software. */ 00014 /* */ 00015 /* THIS INCLUDES THE FOLLOWING ADDITIONS */ 00016 /* Generic Kernel Interfacing: Soeren Sonnenburg */ 00017 /* Parallizations: Soeren Sonnenburg */ 00018 /* Multiple Kernel Learning: Gunnar Raetsch, Soeren Sonnenburg, */ 00019 /* Alexander Zien, Marius Kloft, Chen Guohua */ 00020 /* Linadd Speedup: Gunnar Raetsch, Soeren Sonnenburg */ 00021 /* */ 00022 /***********************************************************************/ 00023 #include <shogun/lib/config.h> 00024 00025 #ifdef USE_SVMLIGHT 00026 00027 #include <shogun/io/SGIO.h> 00028 #include <shogun/lib/Signal.h> 00029 #include <shogun/mathematics/Math.h> 00030 #include <shogun/lib/Time.h> 00031 #include <shogun/mathematics/lapack.h> 00032 00033 #include <shogun/classifier/svm/SVMLight.h> 00034 #include <shogun/lib/external/pr_loqo.h> 00035 00036 #include <shogun/kernel/Kernel.h> 00037 #include <shogun/machine/KernelMachine.h> 00038 #include <shogun/kernel/CombinedKernel.h> 00039 00040 #include <unistd.h> 00041 00042 #include <shogun/base/Parallel.h> 00043 #include <shogun/labels/BinaryLabels.h> 00044 00045 #ifdef HAVE_PTHREAD 00046 #include <pthread.h> 00047 #endif 00048 00049 using namespace shogun; 00050 00051 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00052 struct S_THREAD_PARAM_REACTIVATE_LINADD 00053 { 00054 CKernel* kernel; 00055 float64_t* lin; 00056 float64_t* last_lin; 00057 int32_t* active; 00058 int32_t* docs; 00059 int32_t start; 00060 int32_t end; 00061 }; 00062 00063 struct S_THREAD_PARAM 00064 { 00065 float64_t * lin ; 00066 float64_t* W; 00067 int32_t start, end; 00068 int32_t * active2dnum ; 00069 int32_t * docs ; 00070 CKernel* kernel ; 00071 }; 00072 00073 struct S_THREAD_PARAM_REACTIVATE_VANILLA 00074 { 00075 CKernel* kernel; 00076 float64_t* lin; 00077 float64_t* aicache; 00078 float64_t* a; 00079 float64_t* a_old; 00080 int32_t* changed2dnum; 00081 int32_t* inactive2dnum; 00082 int32_t* label; 00083 int32_t start; 00084 int32_t end; 00085 }; 00086 00087 struct S_THREAD_PARAM_KERNEL 00088 { 00089 float64_t *Kval ; 00090 int32_t *KI, *KJ ; 00091 int32_t start, end; 00092 CSVMLight* svmlight; 00093 }; 00094 00095 #endif // DOXYGEN_SHOULD_SKIP_THIS 00096 00097 void* CSVMLight::update_linear_component_linadd_helper(void* p) 00098 { 00099 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p; 00100 00101 int32_t jj=0, j=0 ; 00102 00103 for (jj=params->start;(jj<params->end) && (j=params->active2dnum[jj])>=0;jj++) 00104 params->lin[j]+=params->kernel->compute_optimized(params->docs[j]); 00105 00106 return NULL ; 00107 } 00108 00109 void* CSVMLight::compute_kernel_helper(void* p) 00110 { 00111 S_THREAD_PARAM_KERNEL* params = (S_THREAD_PARAM_KERNEL*) p; 00112 00113 int32_t jj=0 ; 00114 for (jj=params->start;jj<params->end;jj++) 00115 params->Kval[jj]=params->svmlight->compute_kernel(params->KI[jj], params->KJ[jj]) ; 00116 00117 return NULL ; 00118 } 00119 00120 CSVMLight::CSVMLight() 00121 : CSVM() 00122 { 00123 init(); 00124 set_kernel(NULL); 00125 } 00126 00127 CSVMLight::CSVMLight(float64_t C, CKernel* k, CLabels* lab) 00128 : CSVM(C, k, lab) 00129 { 00130 init(); 00131 } 00132 00133 void CSVMLight::init() 00134 { 00135 //optimizer settings 00136 primal=NULL; 00137 dual=NULL; 00138 init_margin=0.15; 00139 init_iter=500; 00140 precision_violations=0; 00141 model_b=0; 00142 verbosity=1; 00143 opt_precision=DEF_PRECISION; 00144 00145 // svm variables 00146 W=NULL; 00147 model=SG_MALLOC(MODEL, 1); 00148 learn_parm=SG_MALLOC(LEARN_PARM, 1); 00149 model->supvec=NULL; 00150 model->alpha=NULL; 00151 model->index=NULL; 00152 00153 // MKL stuff 00154 mymaxdiff=1 ; 00155 mkl_converged=false; 00156 } 00157 00158 CSVMLight::~CSVMLight() 00159 { 00160 00161 SG_FREE(model->supvec); 00162 SG_FREE(model->alpha); 00163 SG_FREE(model->index); 00164 SG_FREE(model); 00165 SG_FREE(learn_parm); 00166 00167 // MKL stuff 00168 SG_FREE(W); 00169 00170 // optimizer variables 00171 SG_FREE(dual); 00172 SG_FREE(primal); 00173 } 00174 00175 bool CSVMLight::train_machine(CFeatures* data) 00176 { 00177 //certain setup params 00178 mkl_converged=false; 00179 verbosity=1 ; 00180 init_margin=0.15; 00181 init_iter=500; 00182 precision_violations=0; 00183 opt_precision=DEF_PRECISION; 00184 00185 strcpy (learn_parm->predfile, ""); 00186 learn_parm->biased_hyperplane= get_bias_enabled() ? 1 : 0; 00187 learn_parm->sharedslack=0; 00188 learn_parm->remove_inconsistent=0; 00189 learn_parm->skip_final_opt_check=0; 00190 learn_parm->svm_maxqpsize=get_qpsize(); 00191 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize-1; 00192 learn_parm->maxiter=100000; 00193 learn_parm->svm_iter_to_shrink=100; 00194 learn_parm->svm_c=C1; 00195 learn_parm->transduction_posratio=0.33; 00196 learn_parm->svm_costratio=C2/C1; 00197 learn_parm->svm_costratio_unlab=1.0; 00198 learn_parm->svm_unlabbound=1E-5; 00199 learn_parm->epsilon_crit=epsilon; // GU: better decrease it ... ?? 00200 learn_parm->epsilon_a=1E-15; 00201 learn_parm->compute_loo=0; 00202 learn_parm->rho=1.0; 00203 learn_parm->xa_depth=0; 00204 00205 if (!kernel) 00206 SG_ERROR( "SVM_light can not proceed without kernel!\n"); 00207 00208 if (data) 00209 { 00210 if (m_labels->get_num_labels() != data->get_num_vectors()) 00211 { 00212 SG_ERROR("%s::train_machine(): Number of training vectors (%d) does" 00213 " not match number of labels (%d)\n", get_name(), 00214 data->get_num_vectors(), m_labels->get_num_labels()); 00215 } 00216 kernel->init(data, data); 00217 } 00218 00219 if (!kernel->has_features()) 00220 SG_ERROR( "SVM_light can not proceed without initialized kernel!\n"); 00221 00222 ASSERT(m_labels && m_labels->get_num_labels()); 00223 ASSERT(m_labels->get_label_type() == LT_BINARY); 00224 ASSERT(kernel->get_num_vec_lhs()==m_labels->get_num_labels()); 00225 00226 // in case of LINADD enabled kernels cleanup! 00227 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 00228 kernel->clear_normal() ; 00229 00230 // output some info 00231 SG_DEBUG( "threads = %i\n", parallel->get_num_threads()) ; 00232 SG_DEBUG( "qpsize = %i\n", learn_parm->svm_maxqpsize) ; 00233 SG_DEBUG( "epsilon = %1.1e\n", learn_parm->epsilon_crit) ; 00234 SG_DEBUG( "kernel->has_property(KP_LINADD) = %i\n", kernel->has_property(KP_LINADD)) ; 00235 SG_DEBUG( "kernel->has_property(KP_KERNCOMBINATION) = %i\n", kernel->has_property(KP_KERNCOMBINATION)) ; 00236 SG_DEBUG( "kernel->has_property(KP_BATCHEVALUATION) = %i\n", kernel->has_property(KP_BATCHEVALUATION)) ; 00237 SG_DEBUG( "kernel->get_optimization_type() = %s\n", kernel->get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : "SLOWBUTMEMEFFICIENT" ) ; 00238 SG_DEBUG( "get_solver_type() = %i\n", get_solver_type()); 00239 SG_DEBUG( "get_linadd_enabled() = %i\n", get_linadd_enabled()) ; 00240 SG_DEBUG( "get_batch_computation_enabled() = %i\n", get_batch_computation_enabled()) ; 00241 SG_DEBUG( "kernel->get_num_subkernels() = %i\n", kernel->get_num_subkernels()) ; 00242 00243 use_kernel_cache = !((kernel->get_kernel_type() == K_CUSTOM) || 00244 (get_linadd_enabled() && kernel->has_property(KP_LINADD))); 00245 00246 SG_DEBUG( "use_kernel_cache = %i\n", use_kernel_cache) ; 00247 00248 if (kernel->get_kernel_type() == K_COMBINED) 00249 { 00250 CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel(); 00251 00252 while (kn) 00253 { 00254 // allocate kernel cache but clean up beforehand 00255 kn->resize_kernel_cache(kn->get_cache_size()); 00256 SG_UNREF(kn); 00257 kn = ((CCombinedKernel*) kernel)->get_next_kernel(); 00258 } 00259 } 00260 00261 kernel->resize_kernel_cache(kernel->get_cache_size()); 00262 00263 // train the svm 00264 svm_learn(); 00265 00266 // brain damaged svm light work around 00267 create_new_model(model->sv_num-1); 00268 set_bias(-model->b); 00269 for (int32_t i=0; i<model->sv_num-1; i++) 00270 { 00271 set_alpha(i, model->alpha[i+1]); 00272 set_support_vector(i, model->supvec[i+1]); 00273 } 00274 00275 // in case of LINADD enabled kernels cleanup! 00276 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 00277 { 00278 kernel->clear_normal() ; 00279 kernel->delete_optimization() ; 00280 } 00281 00282 if (use_kernel_cache) 00283 kernel->kernel_cache_cleanup(); 00284 00285 return true ; 00286 } 00287 00288 int32_t CSVMLight::get_runtime() 00289 { 00290 clock_t start; 00291 start = clock(); 00292 return((int32_t)((float64_t)start*100.0/(float64_t)CLOCKS_PER_SEC)); 00293 } 00294 00295 00296 void CSVMLight::svm_learn() 00297 { 00298 int32_t *inconsistent, i; 00299 int32_t misclassified,upsupvecnum; 00300 float64_t maxdiff, *lin, *c, *a; 00301 int32_t iterations; 00302 int32_t trainpos=0, trainneg=0 ; 00303 ASSERT(m_labels); 00304 SGVector<int32_t> lab=((CBinaryLabels*) m_labels)->get_int_labels(); 00305 int32_t totdoc=lab.vlen; 00306 ASSERT(lab.vector && lab.vlen); 00307 int32_t* label=SGVector<int32_t>::clone_vector(lab.vector, lab.vlen); 00308 00309 int32_t* docs=SG_MALLOC(int32_t, totdoc); 00310 SG_FREE(W); 00311 W=NULL; 00312 count = 0 ; 00313 00314 if (kernel->has_property(KP_KERNCOMBINATION) && callback) 00315 { 00316 W = SG_MALLOC(float64_t, totdoc*kernel->get_num_subkernels()); 00317 for (i=0; i<totdoc*kernel->get_num_subkernels(); i++) 00318 W[i]=0; 00319 } 00320 00321 for (i=0; i<totdoc; i++) 00322 docs[i]=i; 00323 00324 float64_t *xi_fullset; /* buffer for storing xi on full sample in loo */ 00325 float64_t *a_fullset; /* buffer for storing alpha on full sample in loo */ 00326 TIMING timing_profile; 00327 SHRINK_STATE shrink_state; 00328 00329 timing_profile.time_kernel=0; 00330 timing_profile.time_opti=0; 00331 timing_profile.time_shrink=0; 00332 timing_profile.time_update=0; 00333 timing_profile.time_model=0; 00334 timing_profile.time_check=0; 00335 timing_profile.time_select=0; 00336 00337 /* make sure -n value is reasonable */ 00338 if((learn_parm->svm_newvarsinqp < 2) 00339 || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { 00340 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; 00341 } 00342 00343 init_shrink_state(&shrink_state,totdoc,(int32_t)MAXSHRINK); 00344 00345 inconsistent = SG_MALLOC(int32_t, totdoc); 00346 c = SG_MALLOC(float64_t, totdoc); 00347 a = SG_MALLOC(float64_t, totdoc); 00348 a_fullset = SG_MALLOC(float64_t, totdoc); 00349 xi_fullset = SG_MALLOC(float64_t, totdoc); 00350 lin = SG_MALLOC(float64_t, totdoc); 00351 if (m_linear_term.vlen>0) 00352 learn_parm->eps=get_linear_term_array(); 00353 else 00354 { 00355 learn_parm->eps=SG_MALLOC(float64_t, totdoc); /* equivalent regression epsilon for classification */ 00356 SGVector<float64_t>::fill_vector(learn_parm->eps, totdoc, -1.0); 00357 } 00358 00359 learn_parm->svm_cost = SG_MALLOC(float64_t, totdoc); 00360 00361 SG_FREE(model->supvec); 00362 SG_FREE(model->alpha); 00363 SG_FREE(model->index); 00364 model->supvec = SG_MALLOC(int32_t, totdoc+2); 00365 model->alpha = SG_MALLOC(float64_t, totdoc+2); 00366 model->index = SG_MALLOC(int32_t, totdoc+2); 00367 00368 model->at_upper_bound=0; 00369 model->b=0; 00370 model->supvec[0]=0; /* element 0 reserved and empty for now */ 00371 model->alpha[0]=0; 00372 model->totdoc=totdoc; 00373 00374 model->kernel=kernel; 00375 00376 model->sv_num=1; 00377 model->loo_error=-1; 00378 model->loo_recall=-1; 00379 model->loo_precision=-1; 00380 model->xa_error=-1; 00381 model->xa_recall=-1; 00382 model->xa_precision=-1; 00383 00384 for (i=0;i<totdoc;i++) { /* various inits */ 00385 inconsistent[i]=0; 00386 c[i]=0; 00387 a[i]=0; 00388 lin[i]=0; 00389 00390 if(label[i] > 0) { 00391 learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* 00392 fabs((float64_t)label[i]); 00393 label[i]=1; 00394 trainpos++; 00395 } 00396 else if(label[i] < 0) { 00397 learn_parm->svm_cost[i]=learn_parm->svm_c*fabs((float64_t)label[i]); 00398 label[i]=-1; 00399 trainneg++; 00400 } 00401 else { 00402 learn_parm->svm_cost[i]=0; 00403 } 00404 } 00405 00406 /* compute starting state for initial alpha values */ 00407 SG_DEBUG( "alpha:%d num_sv:%d\n", m_alpha.vector, get_num_support_vectors()); 00408 00409 if(m_alpha.vector && get_num_support_vectors()) { 00410 if(verbosity>=1) { 00411 SG_INFO( "Computing starting state..."); 00412 } 00413 00414 float64_t* alpha = SG_MALLOC(float64_t, totdoc); 00415 00416 for (i=0; i<totdoc; i++) 00417 alpha[i]=0; 00418 00419 for (i=0; i<get_num_support_vectors(); i++) 00420 alpha[get_support_vector(i)]=get_alpha(i); 00421 00422 int32_t* index = SG_MALLOC(int32_t, totdoc); 00423 int32_t* index2dnum = SG_MALLOC(int32_t, totdoc+11); 00424 float64_t* aicache = SG_MALLOC(float64_t, totdoc); 00425 for (i=0;i<totdoc;i++) { /* create full index and clip alphas */ 00426 index[i]=1; 00427 alpha[i]=fabs(alpha[i]); 00428 if(alpha[i]<0) alpha[i]=0; 00429 if(alpha[i]>learn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; 00430 } 00431 00432 if (use_kernel_cache) 00433 { 00434 if (callback && 00435 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()) 00436 ) 00437 { 00438 CCombinedKernel* k = (CCombinedKernel*) kernel; 00439 CKernel* kn = k->get_first_kernel(); 00440 00441 while (kn) 00442 { 00443 for (i=0;i<totdoc;i++) // fill kernel cache with unbounded SV 00444 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 00445 && (kn->kernel_cache_space_available())) 00446 kn->cache_kernel_row(i); 00447 00448 for (i=0;i<totdoc;i++) // fill rest of kernel cache with bounded SV 00449 if((alpha[i]==learn_parm->svm_cost[i]) 00450 && (kn->kernel_cache_space_available())) 00451 kn->cache_kernel_row(i); 00452 00453 SG_UNREF(kn); 00454 kn = k->get_next_kernel(); 00455 } 00456 } 00457 else 00458 { 00459 for (i=0;i<totdoc;i++) /* fill kernel cache with unbounded SV */ 00460 if((alpha[i]>0) && (alpha[i]<learn_parm->svm_cost[i]) 00461 && (kernel->kernel_cache_space_available())) 00462 kernel->cache_kernel_row(i); 00463 00464 for (i=0;i<totdoc;i++) /* fill rest of kernel cache with bounded SV */ 00465 if((alpha[i]==learn_parm->svm_cost[i]) 00466 && (kernel->kernel_cache_space_available())) 00467 kernel->cache_kernel_row(i); 00468 } 00469 } 00470 compute_index(index,totdoc,index2dnum); 00471 update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, 00472 lin,aicache,NULL); 00473 calculate_svm_model(docs,label,lin,alpha,a,c, 00474 index2dnum,index2dnum); 00475 for (i=0;i<totdoc;i++) { /* copy initial alphas */ 00476 a[i]=alpha[i]; 00477 } 00478 00479 SG_FREE(index); 00480 SG_FREE(index2dnum); 00481 SG_FREE(aicache); 00482 SG_FREE(alpha); 00483 00484 if(verbosity>=1) 00485 SG_DONE(); 00486 } 00487 SG_DEBUG( "%d totdoc %d pos %d neg\n", totdoc, trainpos, trainneg); 00488 SG_DEBUG( "Optimizing...\n"); 00489 00490 /* train the svm */ 00491 iterations=optimize_to_convergence(docs,label,totdoc, 00492 &shrink_state,inconsistent,a,lin, 00493 c,&timing_profile, 00494 &maxdiff,(int32_t)-1, 00495 (int32_t)1); 00496 00497 00498 if(verbosity>=1) { 00499 if(verbosity==1) 00500 { 00501 SG_DONE(); 00502 SG_DEBUG("(%ld iterations)", iterations); 00503 } 00504 00505 misclassified=0; 00506 for (i=0;(i<totdoc);i++) { /* get final statistic */ 00507 if((lin[i]-model->b)*(float64_t)label[i] <= 0.0) 00508 misclassified++; 00509 } 00510 00511 SG_INFO( "Optimization finished (%ld misclassified, maxdiff=%.8f).\n", 00512 misclassified,maxdiff); 00513 00514 SG_INFO( "obj = %.16f, rho = %.16f\n",get_objective(),model->b); 00515 if (maxdiff>epsilon) 00516 SG_WARNING( "maximum violation (%f) exceeds svm_epsilon (%f) due to numerical difficulties\n", maxdiff, epsilon); 00517 00518 upsupvecnum=0; 00519 for (i=1;i<model->sv_num;i++) 00520 { 00521 if(fabs(model->alpha[i]) >= 00522 (learn_parm->svm_cost[model->supvec[i]]- 00523 learn_parm->epsilon_a)) 00524 upsupvecnum++; 00525 } 00526 SG_INFO( "Number of SV: %ld (including %ld at upper bound)\n", 00527 model->sv_num-1,upsupvecnum); 00528 } 00529 00530 shrink_state_cleanup(&shrink_state); 00531 SG_FREE(label); 00532 SG_FREE(inconsistent); 00533 SG_FREE(c); 00534 SG_FREE(a); 00535 SG_FREE(a_fullset); 00536 SG_FREE(xi_fullset); 00537 SG_FREE(lin); 00538 SG_FREE(learn_parm->eps); 00539 SG_FREE(learn_parm->svm_cost); 00540 SG_FREE(docs); 00541 } 00542 00543 int32_t CSVMLight::optimize_to_convergence(int32_t* docs, int32_t* label, int32_t totdoc, 00544 SHRINK_STATE *shrink_state, 00545 int32_t *inconsistent, 00546 float64_t *a, float64_t *lin, float64_t *c, 00547 TIMING *timing_profile, float64_t *maxdiff, 00548 int32_t heldout, int32_t retrain) 00549 /* docs: Training vectors (x-part) */ 00550 /* label: Training labels/value (y-part, zero if test example for 00551 transduction) */ 00552 /* totdoc: Number of examples in docs/label */ 00553 /* laern_parm: Learning paramenters */ 00554 /* kernel_parm: Kernel paramenters */ 00555 /* kernel_cache: Initialized/partly filled Cache, if using a kernel. 00556 NULL if linear. */ 00557 /* shrink_state: State of active variables */ 00558 /* inconsistent: examples thrown out as inconstistent */ 00559 /* a: alphas */ 00560 /* lin: linear component of gradient */ 00561 /* c: right hand side of inequalities (margin) */ 00562 /* maxdiff: returns maximum violation of KT-conditions */ 00563 /* heldout: marks held-out example for leave-one-out (or -1) */ 00564 /* retrain: selects training mode (1=regular / 2=holdout) */ 00565 { 00566 00567 int32_t *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; 00568 int32_t inconsistentnum,choosenum,already_chosen=0,iteration; 00569 int32_t misclassified,supvecnum=0,*active2dnum,inactivenum; 00570 int32_t *working2dnum,*selexam; 00571 int32_t activenum; 00572 float64_t criterion, eq; 00573 float64_t *a_old; 00574 int32_t t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ 00575 float64_t epsilon_crit_org; 00576 float64_t bestmaxdiff; 00577 float64_t worstmaxdiff; 00578 int32_t bestmaxdiffiter,terminate; 00579 bool reactivated=false; 00580 00581 float64_t *selcrit; /* buffer for sorting */ 00582 float64_t *aicache; /* buffer to keep one row of hessian */ 00583 QP qp; /* buffer for one quadratic program */ 00584 00585 epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ 00586 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) { 00587 learn_parm->epsilon_crit=2.0; 00588 /* caching makes no sense for linear kernel */ 00589 } 00590 learn_parm->epsilon_shrink=2; 00591 (*maxdiff)=1; 00592 00593 SG_DEBUG("totdoc:%d\n",totdoc); 00594 chosen = SG_MALLOC(int32_t, totdoc); 00595 last_suboptimal_at =SG_MALLOC(int32_t, totdoc); 00596 key =SG_MALLOC(int32_t, totdoc+11); 00597 selcrit =SG_MALLOC(float64_t, totdoc); 00598 selexam =SG_MALLOC(int32_t, totdoc); 00599 a_old =SG_MALLOC(float64_t, totdoc); 00600 aicache =SG_MALLOC(float64_t, totdoc); 00601 working2dnum =SG_MALLOC(int32_t, totdoc+11); 00602 active2dnum =SG_MALLOC(int32_t, totdoc+11); 00603 qp.opt_ce =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00604 qp.opt_ce0 =SG_MALLOC(float64_t, 1); 00605 qp.opt_g =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize*learn_parm->svm_maxqpsize); 00606 qp.opt_g0 =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00607 qp.opt_xinit =SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00608 qp.opt_low=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00609 qp.opt_up=SG_MALLOC(float64_t, learn_parm->svm_maxqpsize); 00610 00611 choosenum=0; 00612 inconsistentnum=0; 00613 if(!retrain) retrain=1; 00614 iteration=1; 00615 bestmaxdiffiter=1; 00616 bestmaxdiff=999999999; 00617 worstmaxdiff=1e-10; 00618 terminate=0; 00619 00620 00621 kernel->set_time(iteration); /* for lru cache */ 00622 00623 for (i=0;i<totdoc;i++) { /* various inits */ 00624 chosen[i]=0; 00625 a_old[i]=a[i]; 00626 last_suboptimal_at[i]=1; 00627 if(inconsistent[i]) 00628 inconsistentnum++; 00629 } 00630 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00631 inactivenum=totdoc-activenum; 00632 clear_index(working2dnum); 00633 00634 /* repeat this loop until we have convergence */ 00635 CTime start_time; 00636 mkl_converged=false; 00637 00638 00639 #ifdef CYGWIN 00640 for (;((iteration<100 || (!mkl_converged && callback) ) || (retrain && (!terminate))); iteration++){ 00641 #else 00642 CSignal::clear_cancel(); 00643 for (;((!CSignal::cancel_computations()) && ((iteration<3 || (!mkl_converged && callback) ) || (retrain && (!terminate)))); iteration++){ 00644 #endif 00645 00646 if(use_kernel_cache) 00647 kernel->set_time(iteration); /* for lru cache */ 00648 00649 if(verbosity>=2) t0=get_runtime(); 00650 if(verbosity>=3) { 00651 SG_DEBUG( "\nSelecting working set... "); 00652 } 00653 00654 if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) 00655 learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; 00656 00657 i=0; 00658 for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ 00659 if((chosen[j]>=(learn_parm->svm_maxqpsize/ 00660 CMath::min(learn_parm->svm_maxqpsize, 00661 learn_parm->svm_newvarsinqp))) 00662 || (inconsistent[j]) 00663 || (j == heldout)) { 00664 chosen[j]=0; 00665 choosenum--; 00666 } 00667 else { 00668 chosen[j]++; 00669 working2dnum[i++]=j; 00670 } 00671 } 00672 working2dnum[i]=-1; 00673 00674 if(retrain == 2) { 00675 choosenum=0; 00676 for (jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ 00677 chosen[j]=0; 00678 } 00679 clear_index(working2dnum); 00680 for (i=0;i<totdoc;i++) { /* set inconsistent examples to zero (-i 1) */ 00681 if((inconsistent[i] || (heldout==i)) && (a[i] != 0.0)) { 00682 chosen[i]=99999; 00683 choosenum++; 00684 a[i]=0; 00685 } 00686 } 00687 if(learn_parm->biased_hyperplane) { 00688 eq=0; 00689 for (i=0;i<totdoc;i++) { /* make sure we fulfill equality constraint */ 00690 eq+=a[i]*label[i]; 00691 } 00692 for (i=0;(i<totdoc) && (fabs(eq) > learn_parm->epsilon_a);i++) { 00693 if((eq*label[i] > 0) && (a[i] > 0)) { 00694 chosen[i]=88888; 00695 choosenum++; 00696 if((eq*label[i]) > a[i]) { 00697 eq-=(a[i]*label[i]); 00698 a[i]=0; 00699 } 00700 else { 00701 a[i]-=(eq*label[i]); 00702 eq=0; 00703 } 00704 } 00705 } 00706 } 00707 compute_index(chosen,totdoc,working2dnum); 00708 } 00709 else 00710 { /* select working set according to steepest gradient */ 00711 if(iteration % 101) 00712 { 00713 already_chosen=0; 00714 if(CMath::min(learn_parm->svm_newvarsinqp, learn_parm->svm_maxqpsize-choosenum)>=4 && 00715 (!(kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00716 { 00717 /* select part of the working set from cache */ 00718 already_chosen=select_next_qp_subproblem_grad( 00719 label,a,lin,c,totdoc, 00720 (int32_t)(CMath::min(learn_parm->svm_maxqpsize-choosenum, 00721 learn_parm->svm_newvarsinqp)/2), 00722 inconsistent,active2dnum, 00723 working2dnum,selcrit,selexam,1, 00724 key,chosen); 00725 choosenum+=already_chosen; 00726 } 00727 choosenum+=select_next_qp_subproblem_grad( 00728 label,a,lin,c,totdoc, 00729 CMath::min(learn_parm->svm_maxqpsize-choosenum, 00730 learn_parm->svm_newvarsinqp-already_chosen), 00731 inconsistent,active2dnum, 00732 working2dnum,selcrit,selexam,0,key, 00733 chosen); 00734 } 00735 else { /* once in a while, select a somewhat random working set 00736 to get unlocked of infinite loops due to numerical 00737 inaccuracies in the core qp-solver */ 00738 choosenum+=select_next_qp_subproblem_rand( 00739 label,a,lin,c,totdoc, 00740 CMath::min(learn_parm->svm_maxqpsize-choosenum, 00741 learn_parm->svm_newvarsinqp), 00742 inconsistent,active2dnum, 00743 working2dnum,selcrit,selexam,key, 00744 chosen,iteration); 00745 } 00746 } 00747 00748 if(verbosity>=2) { 00749 SG_INFO( " %ld vectors chosen\n",choosenum); 00750 } 00751 00752 if(verbosity>=2) t1=get_runtime(); 00753 00754 if (use_kernel_cache) 00755 { 00756 // in case of MKL w/o linadd cache each kernel independently 00757 // else if linadd is disabled cache single kernel 00758 if ( callback && 00759 (!((CCombinedKernel*) kernel)->get_append_subkernel_weights()) 00760 ) 00761 { 00762 CCombinedKernel* k = (CCombinedKernel*) kernel; 00763 CKernel* kn = k->get_first_kernel(); 00764 00765 while (kn) 00766 { 00767 kn->cache_multiple_kernel_rows(working2dnum, choosenum); 00768 SG_UNREF(kn); 00769 kn = k->get_next_kernel() ; 00770 } 00771 } 00772 else 00773 kernel->cache_multiple_kernel_rows(working2dnum, choosenum); 00774 } 00775 00776 if(verbosity>=2) t2=get_runtime(); 00777 00778 if(retrain != 2) { 00779 optimize_svm(docs,label,inconsistent,0.0,chosen,active2dnum, 00780 totdoc,working2dnum,choosenum,a,lin,c, 00781 aicache,&qp,&epsilon_crit_org); 00782 } 00783 00784 if(verbosity>=2) t3=get_runtime(); 00785 update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, 00786 lin,aicache,c); 00787 00788 if(verbosity>=2) t4=get_runtime(); 00789 supvecnum=calculate_svm_model(docs,label,lin,a,a_old,c,working2dnum,active2dnum); 00790 00791 if(verbosity>=2) t5=get_runtime(); 00792 00793 for (jj=0;(i=working2dnum[jj])>=0;jj++) { 00794 a_old[i]=a[i]; 00795 } 00796 00797 retrain=check_optimality(label,a,lin,c,totdoc, 00798 maxdiff,epsilon_crit_org,&misclassified, 00799 inconsistent,active2dnum,last_suboptimal_at, 00800 iteration); 00801 00802 if(verbosity>=2) { 00803 t6=get_runtime(); 00804 timing_profile->time_select+=t1-t0; 00805 timing_profile->time_kernel+=t2-t1; 00806 timing_profile->time_opti+=t3-t2; 00807 timing_profile->time_update+=t4-t3; 00808 timing_profile->time_model+=t5-t4; 00809 timing_profile->time_check+=t6-t5; 00810 } 00811 00812 /* checking whether optimizer got stuck */ 00813 if((*maxdiff) < bestmaxdiff) { 00814 bestmaxdiff=(*maxdiff); 00815 bestmaxdiffiter=iteration; 00816 } 00817 if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { 00818 /* int32_t time no progress? */ 00819 terminate=1; 00820 retrain=0; 00821 SG_WARNING( "Relaxing KT-Conditions due to slow progress! Terminating!\n"); 00822 SG_DEBUG("(iteration :%d, bestmaxdiffiter: %d, lean_param->maxiter: %d)\n", iteration, bestmaxdiffiter, learn_parm->maxiter ); 00823 } 00824 00825 noshrink= (get_shrinking_enabled()) ? 0 : 1; 00826 00827 if ((!callback) && (!retrain) && (inactivenum>0) && 00828 ((!learn_parm->skip_final_opt_check) || (kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00829 { 00830 t1=get_runtime(); 00831 SG_DEBUG( "reactivating inactive examples\n"); 00832 00833 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc, 00834 iteration,inconsistent, 00835 docs,aicache, 00836 maxdiff); 00837 reactivated=true; 00838 SG_DEBUG("done reactivating inactive examples (maxdiff:%8f eps_crit:%8f orig_eps:%8f)\n", *maxdiff, learn_parm->epsilon_crit, epsilon_crit_org); 00839 /* Update to new active variables. */ 00840 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00841 inactivenum=totdoc-activenum; 00842 /* reset watchdog */ 00843 bestmaxdiff=(*maxdiff); 00844 bestmaxdiffiter=iteration; 00845 00846 /* termination criterion */ 00847 noshrink=1; 00848 retrain=0; 00849 if((*maxdiff) > learn_parm->epsilon_crit) 00850 { 00851 SG_INFO( "restarting optimization as we are - due to shrinking - deviating too much (maxdiff(%f) > eps(%f))\n", *maxdiff, learn_parm->epsilon_crit); 00852 retrain=1; 00853 } 00854 timing_profile->time_shrink+=get_runtime()-t1; 00855 if (((verbosity>=1) && (!(kernel->has_property(KP_LINADD) && get_linadd_enabled()))) 00856 || (verbosity>=2)) { 00857 SG_DONE(); 00858 SG_DEBUG("Number of inactive variables = %ld\n", inactivenum); 00859 } 00860 } 00861 00862 if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) 00863 learn_parm->epsilon_crit=(*maxdiff); 00864 if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { 00865 learn_parm->epsilon_crit/=4.0; 00866 retrain=1; 00867 noshrink=1; 00868 } 00869 if(learn_parm->epsilon_crit<epsilon_crit_org) 00870 learn_parm->epsilon_crit=epsilon_crit_org; 00871 00872 if(verbosity>=2) { 00873 SG_INFO( " => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", 00874 supvecnum,model->at_upper_bound,(*maxdiff)); 00875 00876 } 00877 mymaxdiff=*maxdiff ; 00878 00879 //don't shrink w/ mkl 00880 if (((iteration % 10) == 0) && (!noshrink) && !callback) 00881 { 00882 activenum=shrink_problem(shrink_state,active2dnum,last_suboptimal_at,iteration,totdoc, 00883 CMath::max((int32_t)(activenum/10), 00884 CMath::max((int32_t)(totdoc/500),(int32_t) 100)), 00885 a,inconsistent, c, lin, label); 00886 00887 inactivenum=totdoc-activenum; 00888 00889 if (use_kernel_cache && (supvecnum>kernel->get_max_elems_cache()) 00890 && ((kernel->get_activenum_cache()-activenum)>CMath::max((int32_t)(activenum/10),(int32_t) 500))) { 00891 00892 kernel->kernel_cache_shrink(totdoc, CMath::min((int32_t) (kernel->get_activenum_cache()-activenum), 00893 (int32_t) (kernel->get_activenum_cache()-supvecnum)), 00894 shrink_state->active); 00895 } 00896 } 00897 00898 if (bestmaxdiff>worstmaxdiff) 00899 worstmaxdiff=bestmaxdiff; 00900 00901 SG_ABS_PROGRESS(bestmaxdiff, -CMath::log10(bestmaxdiff), -CMath::log10(worstmaxdiff), -CMath::log10(epsilon), 6); 00902 00903 /* Terminate loop */ 00904 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time) { 00905 terminate = 1; 00906 retrain = 0; 00907 } 00908 00909 } /* end of loop */ 00910 00911 SG_DEBUG( "inactive:%d\n", inactivenum); 00912 00913 if (inactivenum && !reactivated && !callback) 00914 { 00915 SG_DEBUG( "reactivating inactive examples\n"); 00916 reactivate_inactive_examples(label,a,shrink_state,lin,c,totdoc, 00917 iteration,inconsistent, 00918 docs,aicache, 00919 maxdiff); 00920 SG_DEBUG( "done reactivating inactive examples\n"); 00921 /* Update to new active variables. */ 00922 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 00923 inactivenum=totdoc-activenum; 00924 /* reset watchdog */ 00925 bestmaxdiff=(*maxdiff); 00926 bestmaxdiffiter=iteration; 00927 } 00928 00929 //use this for our purposes! 00930 criterion=compute_objective_function(a,lin,c,learn_parm->eps,label,totdoc); 00931 CSVM::set_objective(criterion); 00932 00933 SG_FREE(chosen); 00934 SG_FREE(last_suboptimal_at); 00935 SG_FREE(key); 00936 SG_FREE(selcrit); 00937 SG_FREE(selexam); 00938 SG_FREE(a_old); 00939 SG_FREE(aicache); 00940 SG_FREE(working2dnum); 00941 SG_FREE(active2dnum); 00942 SG_FREE(qp.opt_ce); 00943 SG_FREE(qp.opt_ce0); 00944 SG_FREE(qp.opt_g); 00945 SG_FREE(qp.opt_g0); 00946 SG_FREE(qp.opt_xinit); 00947 SG_FREE(qp.opt_low); 00948 SG_FREE(qp.opt_up); 00949 00950 learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ 00951 00952 return(iteration); 00953 } 00954 00955 float64_t CSVMLight::compute_objective_function( 00956 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label, 00957 int32_t totdoc) 00958 /* Return value of objective function. */ 00959 /* Works only relative to the active variables! */ 00960 { 00961 /* calculate value of objective function */ 00962 float64_t criterion=0; 00963 00964 for (int32_t i=0;i<totdoc;i++) 00965 criterion=criterion+(eps[i]-(float64_t)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; 00966 00967 return(criterion); 00968 } 00969 00970 00971 void CSVMLight::clear_index(int32_t *index) 00972 /* initializes and empties index */ 00973 { 00974 index[0]=-1; 00975 } 00976 00977 void CSVMLight::add_to_index(int32_t *index, int32_t elem) 00978 /* initializes and empties index */ 00979 { 00980 register int32_t i; 00981 for (i=0;index[i] != -1;i++); 00982 index[i]=elem; 00983 index[i+1]=-1; 00984 } 00985 00986 int32_t CSVMLight::compute_index( 00987 int32_t *binfeature, int32_t range, int32_t *index) 00988 /* create an inverted index of binfeature */ 00989 { 00990 register int32_t i,ii; 00991 00992 ii=0; 00993 for (i=0;i<range;i++) { 00994 if(binfeature[i]) { 00995 index[ii]=i; 00996 ii++; 00997 } 00998 } 00999 for (i=0;i<4;i++) { 01000 index[ii+i]=-1; 01001 } 01002 return(ii); 01003 } 01004 01005 01006 void CSVMLight::optimize_svm( 01007 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01008 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc, 01009 int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin, 01010 float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target) 01011 /* Do optimization on the working set. */ 01012 { 01013 int32_t i; 01014 float64_t *a_v; 01015 01016 //compute_matrices_for_optimization_parallel(docs,label, 01017 // exclude_from_eq_const,eq_target,chosen, 01018 // active2dnum,working2dnum,a,lin,c, 01019 // varnum,totdoc,aicache,qp); 01020 01021 compute_matrices_for_optimization(docs,label, 01022 exclude_from_eq_const,eq_target,chosen, 01023 active2dnum,working2dnum,a,lin,c, 01024 varnum,totdoc,aicache,qp); 01025 01026 if(verbosity>=3) { 01027 SG_DEBUG( "Running optimizer..."); 01028 } 01029 /* call the qp-subsolver */ 01030 a_v=optimize_qp(qp,epsilon_crit_target, 01031 learn_parm->svm_maxqpsize, 01032 &(model->b), /* in case the optimizer gives us */ 01033 learn_parm->svm_maxqpsize); /* the threshold for free. otherwise */ 01034 /* b is calculated in calculate_model. */ 01035 if(verbosity>=3) { 01036 SG_DONE(); 01037 } 01038 01039 for (i=0;i<varnum;i++) 01040 a[working2dnum[i]]=a_v[i]; 01041 } 01042 01043 void CSVMLight::compute_matrices_for_optimization_parallel( 01044 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01045 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, 01046 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, 01047 float64_t *aicache, QP *qp) 01048 { 01049 if (parallel->get_num_threads()<=1) 01050 { 01051 compute_matrices_for_optimization(docs, label, exclude_from_eq_const, eq_target, 01052 chosen, active2dnum, key, a, lin, c, 01053 varnum, totdoc, aicache, qp) ; 01054 } 01055 #ifdef HAVE_PTHREAD 01056 else 01057 { 01058 register int32_t ki,kj,i,j; 01059 register float64_t kernel_temp; 01060 01061 qp->opt_n=varnum; 01062 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ 01063 for (j=1;j<model->sv_num;j++) { /* start at 1 */ 01064 if((!chosen[model->supvec[j]]) 01065 && (!exclude_from_eq_const[(model->supvec[j])])) { 01066 qp->opt_ce0[0]+=model->alpha[j]; 01067 } 01068 } 01069 if(learn_parm->biased_hyperplane) 01070 qp->opt_m=1; 01071 else 01072 qp->opt_m=0; /* eq-constraint will be ignored */ 01073 01074 /* init linear part of objective function */ 01075 for (i=0;i<varnum;i++) { 01076 qp->opt_g0[i]=lin[key[i]]; 01077 } 01078 01079 ASSERT(parallel->get_num_threads()>1); 01080 int32_t *KI=SG_MALLOC(int32_t, varnum*varnum); 01081 int32_t *KJ=SG_MALLOC(int32_t, varnum*varnum); 01082 int32_t Knum=0 ; 01083 float64_t *Kval = SG_MALLOC(float64_t, varnum*(varnum+1)/2); 01084 for (i=0;i<varnum;i++) { 01085 ki=key[i]; 01086 KI[Knum]=docs[ki] ; 01087 KJ[Knum]=docs[ki] ; 01088 Knum++ ; 01089 for (j=i+1;j<varnum;j++) 01090 { 01091 kj=key[j]; 01092 KI[Knum]=docs[ki] ; 01093 KJ[Knum]=docs[kj] ; 01094 Knum++ ; 01095 } 01096 } 01097 ASSERT(Knum<=varnum*(varnum+1)/2); 01098 01099 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); 01100 S_THREAD_PARAM_KERNEL* params = SG_MALLOC(S_THREAD_PARAM_KERNEL, parallel->get_num_threads()-1); 01101 int32_t step= Knum/parallel->get_num_threads(); 01102 //SG_DEBUG( "\nkernel-step size: %i\n", step) ; 01103 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01104 { 01105 params[t].svmlight = this; 01106 params[t].start = t*step; 01107 params[t].end = (t+1)*step; 01108 params[t].KI=KI ; 01109 params[t].KJ=KJ ; 01110 params[t].Kval=Kval ; 01111 pthread_create(&threads[t], NULL, CSVMLight::compute_kernel_helper, (void*)¶ms[t]); 01112 } 01113 for (i=params[parallel->get_num_threads()-2].end; i<Knum; i++) 01114 Kval[i]=compute_kernel(KI[i],KJ[i]) ; 01115 01116 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01117 pthread_join(threads[t], NULL); 01118 01119 SG_FREE(params); 01120 SG_FREE(threads); 01121 01122 Knum=0 ; 01123 for (i=0;i<varnum;i++) { 01124 ki=key[i]; 01125 01126 /* Compute the matrix for equality constraints */ 01127 qp->opt_ce[i]=label[ki]; 01128 qp->opt_low[i]=0; 01129 qp->opt_up[i]=learn_parm->svm_cost[ki]; 01130 01131 kernel_temp=Kval[Knum] ; 01132 Knum++ ; 01133 /* compute linear part of objective function */ 01134 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01135 /* compute quadratic part of objective function */ 01136 qp->opt_g[varnum*i+i]=kernel_temp; 01137 01138 for (j=i+1;j<varnum;j++) { 01139 kj=key[j]; 01140 kernel_temp=Kval[Knum] ; 01141 Knum++ ; 01142 /* compute linear part of objective function */ 01143 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]); 01144 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01145 /* compute quadratic part of objective function */ 01146 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01147 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01148 } 01149 01150 if(verbosity>=3) { 01151 if(i % 20 == 0) { 01152 SG_DEBUG( "%ld..",i); 01153 } 01154 } 01155 } 01156 01157 SG_FREE(KI); 01158 SG_FREE(KJ); 01159 SG_FREE(Kval); 01160 01161 for (i=0;i<varnum;i++) { 01162 /* assure starting at feasible point */ 01163 qp->opt_xinit[i]=a[key[i]]; 01164 /* set linear part of objective function */ 01165 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(float64_t)label[key[i]]; 01166 } 01167 01168 if(verbosity>=3) { 01169 SG_DONE(); 01170 } 01171 } 01172 #endif 01173 } 01174 01175 void CSVMLight::compute_matrices_for_optimization( 01176 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const, 01177 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key, 01178 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc, 01179 float64_t *aicache, QP *qp) 01180 { 01181 register int32_t ki,kj,i,j; 01182 register float64_t kernel_temp; 01183 01184 qp->opt_n=varnum; 01185 qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ 01186 for (j=1;j<model->sv_num;j++) { /* start at 1 */ 01187 if((!chosen[model->supvec[j]]) 01188 && (!exclude_from_eq_const[(model->supvec[j])])) { 01189 qp->opt_ce0[0]+=model->alpha[j]; 01190 } 01191 } 01192 if(learn_parm->biased_hyperplane) 01193 qp->opt_m=1; 01194 else 01195 qp->opt_m=0; /* eq-constraint will be ignored */ 01196 01197 /* init linear part of objective function */ 01198 for (i=0;i<varnum;i++) { 01199 qp->opt_g0[i]=lin[key[i]]; 01200 } 01201 01202 for (i=0;i<varnum;i++) { 01203 ki=key[i]; 01204 01205 /* Compute the matrix for equality constraints */ 01206 qp->opt_ce[i]=label[ki]; 01207 qp->opt_low[i]=0; 01208 qp->opt_up[i]=learn_parm->svm_cost[ki]; 01209 01210 kernel_temp=compute_kernel(docs[ki], docs[ki]); 01211 /* compute linear part of objective function */ 01212 qp->opt_g0[i]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01213 /* compute quadratic part of objective function */ 01214 qp->opt_g[varnum*i+i]=kernel_temp; 01215 01216 for (j=i+1;j<varnum;j++) { 01217 kj=key[j]; 01218 kernel_temp=compute_kernel(docs[ki], docs[kj]); 01219 01220 /* compute linear part of objective function */ 01221 qp->opt_g0[i]-=(kernel_temp*a[kj]*(float64_t)label[kj]); 01222 qp->opt_g0[j]-=(kernel_temp*a[ki]*(float64_t)label[ki]); 01223 /* compute quadratic part of objective function */ 01224 qp->opt_g[varnum*i+j]=(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01225 qp->opt_g[varnum*j+i]=qp->opt_g[varnum*i+j];//(float64_t)label[ki]*(float64_t)label[kj]*kernel_temp; 01226 } 01227 01228 if(verbosity>=3) { 01229 if(i % 20 == 0) { 01230 SG_DEBUG( "%ld..",i); 01231 } 01232 } 01233 } 01234 01235 for (i=0;i<varnum;i++) { 01236 /* assure starting at feasible point */ 01237 qp->opt_xinit[i]=a[key[i]]; 01238 /* set linear part of objective function */ 01239 qp->opt_g0[i]=(learn_parm->eps[key[i]]-(float64_t)label[key[i]]*c[key[i]]) + qp->opt_g0[i]*(float64_t)label[key[i]]; 01240 } 01241 01242 if(verbosity>=3) { 01243 SG_DONE(); 01244 } 01245 } 01246 01247 01248 int32_t CSVMLight::calculate_svm_model( 01249 int32_t* docs, int32_t *label, float64_t *lin, float64_t *a, 01250 float64_t *a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum) 01251 /* Compute decision function based on current values */ 01252 /* of alpha. */ 01253 { 01254 int32_t i,ii,pos,b_calculated=0,first_low,first_high; 01255 float64_t ex_c,b_temp,b_low,b_high; 01256 01257 if(verbosity>=3) { 01258 SG_DEBUG( "Calculating model..."); 01259 } 01260 01261 if(!learn_parm->biased_hyperplane) { 01262 model->b=0; 01263 b_calculated=1; 01264 } 01265 01266 for (ii=0;(i=working2dnum[ii])>=0;ii++) { 01267 if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ 01268 pos=model->index[i]; 01269 model->index[i]=-1; 01270 (model->sv_num)--; 01271 model->supvec[pos]=model->supvec[model->sv_num]; 01272 model->alpha[pos]=model->alpha[model->sv_num]; 01273 model->index[model->supvec[pos]]=pos; 01274 } 01275 else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ 01276 model->supvec[model->sv_num]=docs[i]; 01277 model->alpha[model->sv_num]=a[i]*(float64_t)label[i]; 01278 model->index[i]=model->sv_num; 01279 (model->sv_num)++; 01280 } 01281 else if(a_old[i]==a[i]) { /* nothing to do */ 01282 } 01283 else { /* just update alpha */ 01284 model->alpha[model->index[i]]=a[i]*(float64_t)label[i]; 01285 } 01286 01287 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01288 if((a_old[i]>=ex_c) && (a[i]<ex_c)) { 01289 (model->at_upper_bound)--; 01290 } 01291 else if((a_old[i]<ex_c) && (a[i]>=ex_c)) { 01292 (model->at_upper_bound)++; 01293 } 01294 01295 if((!b_calculated) 01296 && (a[i]>learn_parm->epsilon_a) && (a[i]<ex_c)) { /* calculate b */ 01297 model->b=((float64_t)label[i]*learn_parm->eps[i]-c[i]+lin[i]); 01298 b_calculated=1; 01299 } 01300 } 01301 01302 /* No alpha in the working set not at bounds, so b was not 01303 calculated in the usual way. The following handles this special 01304 case. */ 01305 if(learn_parm->biased_hyperplane 01306 && (!b_calculated) 01307 && (model->sv_num-1 == model->at_upper_bound)) { 01308 first_low=1; 01309 first_high=1; 01310 b_low=0; 01311 b_high=0; 01312 for (ii=0;(i=active2dnum[ii])>=0;ii++) { 01313 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01314 if(a[i]<ex_c) { 01315 if(label[i]>0) { 01316 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]); 01317 if((b_temp>b_low) || (first_low)) { 01318 b_low=b_temp; 01319 first_low=0; 01320 } 01321 } 01322 else { 01323 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]); 01324 if((b_temp<b_high) || (first_high)) { 01325 b_high=b_temp; 01326 first_high=0; 01327 } 01328 } 01329 } 01330 else { 01331 if(label[i]<0) { 01332 b_temp=-(-learn_parm->eps[i]-c[i]+lin[i]); 01333 if((b_temp>b_low) || (first_low)) { 01334 b_low=b_temp; 01335 first_low=0; 01336 } 01337 } 01338 else { 01339 b_temp=-(learn_parm->eps[i]-c[i]+lin[i]); 01340 if((b_temp<b_high) || (first_high)) { 01341 b_high=b_temp; 01342 first_high=0; 01343 } 01344 } 01345 } 01346 } 01347 if(first_high) { 01348 model->b=-b_low; 01349 } 01350 else if(first_low) { 01351 model->b=-b_high; 01352 } 01353 else { 01354 model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ 01355 } 01356 } 01357 01358 if(verbosity>=3) { 01359 SG_DONE(); 01360 } 01361 01362 return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ 01363 } 01364 01365 int32_t CSVMLight::check_optimality( 01366 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01367 float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified, 01368 int32_t *inconsistent, int32_t *active2dnum, int32_t *last_suboptimal_at, 01369 int32_t iteration) 01370 /* Check KT-conditions */ 01371 { 01372 int32_t i,ii,retrain; 01373 float64_t dist,ex_c,target; 01374 01375 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 01376 learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; 01377 else 01378 learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; 01379 retrain=0; 01380 (*maxdiff)=0; 01381 (*misclassified)=0; 01382 for (ii=0;(i=active2dnum[ii])>=0;ii++) { 01383 if((!inconsistent[i]) && label[i]) { 01384 dist=(lin[i]-model->b)*(float64_t)label[i];/* 'distance' from 01385 hyperplane*/ 01386 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]); 01387 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 01388 if(dist <= 0) { 01389 (*misclassified)++; /* does not work due to deactivation of var */ 01390 } 01391 if((a[i]>learn_parm->epsilon_a) && (dist > target)) { 01392 if((dist-target)>(*maxdiff)) /* largest violation */ 01393 (*maxdiff)=dist-target; 01394 } 01395 else if((a[i]<ex_c) && (dist < target)) { 01396 if((target-dist)>(*maxdiff)) /* largest violation */ 01397 (*maxdiff)=target-dist; 01398 } 01399 /* Count how int32_t a variable was at lower/upper bound (and optimal).*/ 01400 /* Variables, which were at the bound and optimal for a int32_t */ 01401 /* time are unlikely to become support vectors. In case our */ 01402 /* cache is filled up, those variables are excluded to save */ 01403 /* kernel evaluations. (See chapter 'Shrinking').*/ 01404 if((a[i]>(learn_parm->epsilon_a)) 01405 && (a[i]<ex_c)) { 01406 last_suboptimal_at[i]=iteration; /* not at bound */ 01407 } 01408 else if((a[i]<=(learn_parm->epsilon_a)) 01409 && (dist < (target+learn_parm->epsilon_shrink))) { 01410 last_suboptimal_at[i]=iteration; /* not likely optimal */ 01411 } 01412 else if((a[i]>=ex_c) 01413 && (dist > (target-learn_parm->epsilon_shrink))) { 01414 last_suboptimal_at[i]=iteration; /* not likely optimal */ 01415 } 01416 } 01417 } 01418 01419 /* termination criterion */ 01420 if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { 01421 retrain=1; 01422 } 01423 return(retrain); 01424 } 01425 01426 void CSVMLight::update_linear_component( 01427 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01428 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01429 float64_t *aicache, float64_t* c) 01430 /* keep track of the linear component */ 01431 /* lin of the gradient etc. by updating */ 01432 /* based on the change of the variables */ 01433 /* in the current working set */ 01434 { 01435 register int32_t i=0,ii=0,j=0,jj=0; 01436 01437 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) 01438 { 01439 if (callback) 01440 { 01441 update_linear_component_mkl_linadd(docs, label, active2dnum, a, 01442 a_old, working2dnum, totdoc, lin, aicache); 01443 } 01444 else 01445 { 01446 kernel->clear_normal(); 01447 01448 int32_t num_working=0; 01449 for (ii=0;(i=working2dnum[ii])>=0;ii++) { 01450 if(a[i] != a_old[i]) { 01451 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); 01452 num_working++; 01453 } 01454 } 01455 01456 if (num_working>0) 01457 { 01458 if (parallel->get_num_threads() < 2) 01459 { 01460 for (jj=0;(j=active2dnum[jj])>=0;jj++) { 01461 lin[j]+=kernel->compute_optimized(docs[j]); 01462 } 01463 } 01464 #ifdef HAVE_PTHREAD 01465 else 01466 { 01467 int32_t num_elem = 0 ; 01468 for (jj=0;(j=active2dnum[jj])>=0;jj++) num_elem++ ; 01469 01470 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); 01471 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1); 01472 int32_t start = 0 ; 01473 int32_t step = num_elem/parallel->get_num_threads(); 01474 int32_t end = step ; 01475 01476 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01477 { 01478 params[t].kernel = kernel ; 01479 params[t].lin = lin ; 01480 params[t].docs = docs ; 01481 params[t].active2dnum=active2dnum ; 01482 params[t].start = start ; 01483 params[t].end = end ; 01484 start=end ; 01485 end+=step ; 01486 pthread_create(&threads[t], NULL, update_linear_component_linadd_helper, (void*)¶ms[t]) ; 01487 } 01488 01489 for (jj=params[parallel->get_num_threads()-2].end;(j=active2dnum[jj])>=0;jj++) { 01490 lin[j]+=kernel->compute_optimized(docs[j]); 01491 } 01492 void* ret; 01493 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01494 pthread_join(threads[t], &ret) ; 01495 01496 SG_FREE(params); 01497 SG_FREE(threads); 01498 } 01499 #endif 01500 } 01501 } 01502 } 01503 else 01504 { 01505 if (callback) 01506 { 01507 update_linear_component_mkl(docs, label, active2dnum, 01508 a, a_old, working2dnum, totdoc, lin, aicache); 01509 } 01510 else { 01511 for (jj=0;(i=working2dnum[jj])>=0;jj++) { 01512 if(a[i] != a_old[i]) { 01513 kernel->get_kernel_row(i,active2dnum,aicache); 01514 for (ii=0;(j=active2dnum[ii])>=0;ii++) 01515 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 01516 } 01517 } 01518 } 01519 } 01520 } 01521 01522 01523 void CSVMLight::update_linear_component_mkl( 01524 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01525 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01526 float64_t *aicache) 01527 { 01528 //int inner_iters=0; 01529 int32_t num = kernel->get_num_vec_rhs(); 01530 int32_t num_weights = -1; 01531 int32_t num_kernels = kernel->get_num_subkernels() ; 01532 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights); 01533 ASSERT(num_weights==num_kernels); 01534 01535 if ((kernel->get_kernel_type()==K_COMBINED) && 01536 (!((CCombinedKernel*)kernel)->get_append_subkernel_weights()))// for combined kernel 01537 { 01538 CCombinedKernel* k = (CCombinedKernel*) kernel; 01539 CKernel* kn = k->get_first_kernel() ; 01540 int32_t n = 0, i, j ; 01541 01542 while (kn!=NULL) 01543 { 01544 for (i=0;i<num;i++) 01545 { 01546 if(a[i] != a_old[i]) 01547 { 01548 kn->get_kernel_row(i,NULL,aicache, true); 01549 for (j=0;j<num;j++) 01550 W[j*num_kernels+n]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 01551 } 01552 } 01553 01554 SG_UNREF(kn); 01555 kn = k->get_next_kernel(); 01556 n++ ; 01557 } 01558 } 01559 else // hope the kernel is fast ... 01560 { 01561 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels); 01562 float64_t* w1 = SG_MALLOC(float64_t, num_kernels); 01563 01564 // backup and set to zero 01565 for (int32_t i=0; i<num_kernels; i++) 01566 { 01567 w_backup[i] = old_beta[i] ; 01568 w1[i]=0.0 ; 01569 } 01570 for (int32_t n=0; n<num_kernels; n++) 01571 { 01572 w1[n]=1.0 ; 01573 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights)); 01574 01575 for (int32_t i=0;i<num;i++) 01576 { 01577 if(a[i] != a_old[i]) 01578 { 01579 for (int32_t j=0;j<num;j++) 01580 W[j*num_kernels+n]+=(a[i]-a_old[i])*compute_kernel(i,j)*(float64_t)label[i]; 01581 } 01582 } 01583 w1[n]=0.0 ; 01584 } 01585 01586 // restore old weights 01587 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights)); 01588 01589 SG_FREE(w_backup); 01590 SG_FREE(w1); 01591 } 01592 01593 call_mkl_callback(a, label, lin); 01594 } 01595 01596 01597 void CSVMLight::update_linear_component_mkl_linadd( 01598 int32_t* docs, int32_t* label, int32_t *active2dnum, float64_t *a, 01599 float64_t *a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin, 01600 float64_t *aicache) 01601 { 01602 //int inner_iters=0; 01603 01604 // kernel with LP_LINADD property is assumed to have 01605 // compute_by_subkernel functions 01606 int32_t num = kernel->get_num_vec_rhs(); 01607 int32_t num_weights = -1; 01608 int32_t num_kernels = kernel->get_num_subkernels() ; 01609 const float64_t* old_beta = kernel->get_subkernel_weights(num_weights); 01610 ASSERT(num_weights==num_kernels); 01611 01612 float64_t* w_backup = SG_MALLOC(float64_t, num_kernels); 01613 float64_t* w1 = SG_MALLOC(float64_t, num_kernels); 01614 01615 // backup and set to one 01616 for (int32_t i=0; i<num_kernels; i++) 01617 { 01618 w_backup[i] = old_beta[i] ; 01619 w1[i]=1.0 ; 01620 } 01621 // set the kernel weights 01622 kernel->set_subkernel_weights(SGVector<float64_t>(w1, num_weights)); 01623 01624 // create normal update (with changed alphas only) 01625 kernel->clear_normal(); 01626 for (int32_t ii=0, i=0;(i=working2dnum[ii])>=0;ii++) { 01627 if(a[i] != a_old[i]) { 01628 kernel->add_to_normal(docs[i], (a[i]-a_old[i])*(float64_t)label[i]); 01629 } 01630 } 01631 01632 if (parallel->get_num_threads() < 2) 01633 { 01634 // determine contributions of different kernels 01635 for (int32_t i=0; i<num; i++) 01636 kernel->compute_by_subkernel(i,&W[i*num_kernels]); 01637 } 01638 #ifdef HAVE_PTHREAD 01639 else 01640 { 01641 pthread_t* threads = SG_MALLOC(pthread_t, parallel->get_num_threads()-1); 01642 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, parallel->get_num_threads()-1); 01643 int32_t step= num/parallel->get_num_threads(); 01644 01645 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01646 { 01647 params[t].kernel = kernel; 01648 params[t].W = W; 01649 params[t].start = t*step; 01650 params[t].end = (t+1)*step; 01651 pthread_create(&threads[t], NULL, CSVMLight::update_linear_component_mkl_linadd_helper, (void*)¶ms[t]); 01652 } 01653 01654 for (int32_t i=params[parallel->get_num_threads()-2].end; i<num; i++) 01655 kernel->compute_by_subkernel(i,&W[i*num_kernels]); 01656 01657 for (int32_t t=0; t<parallel->get_num_threads()-1; t++) 01658 pthread_join(threads[t], NULL); 01659 01660 SG_FREE(params); 01661 SG_FREE(threads); 01662 } 01663 #endif 01664 01665 // restore old weights 01666 kernel->set_subkernel_weights(SGVector<float64_t>(w_backup,num_weights)); 01667 01668 call_mkl_callback(a, label, lin); 01669 } 01670 01671 void* CSVMLight::update_linear_component_mkl_linadd_helper(void* p) 01672 { 01673 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p; 01674 01675 int32_t num_kernels=params->kernel->get_num_subkernels(); 01676 01677 // determine contributions of different kernels 01678 for (int32_t i=params->start; i<params->end; i++) 01679 params->kernel->compute_by_subkernel(i,&(params->W[i*num_kernels])); 01680 01681 return NULL ; 01682 } 01683 01684 void CSVMLight::call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin) 01685 { 01686 int32_t num = kernel->get_num_vec_rhs(); 01687 int32_t num_kernels = kernel->get_num_subkernels() ; 01688 01689 float64_t suma=0; 01690 float64_t* sumw=SG_MALLOC(float64_t, num_kernels); 01691 #ifdef HAVE_LAPACK 01692 int nk = (int) num_kernels; /* calling external lib */ 01693 double* alphay = SG_MALLOC(double, num); 01694 01695 for (int32_t i=0; i<num; i++) 01696 { 01697 alphay[i]=a[i]*label[i]; 01698 suma+=a[i]; 01699 } 01700 01701 for (int32_t i=0; i<num_kernels; i++) 01702 sumw[i]=0; 01703 01704 cblas_dgemv(CblasColMajor, CblasNoTrans, num_kernels, (int) num, 0.5, (double*) W, 01705 num_kernels, alphay, 1, 1.0, (double*) sumw, 1); 01706 01707 SG_FREE(alphay); 01708 #else 01709 for (int32_t i=0; i<num; i++) 01710 suma += a[i]; 01711 01712 for (int32_t d=0; d<num_kernels; d++) 01713 { 01714 sumw[d]=0; 01715 for (int32_t i=0; i<num; i++) 01716 sumw[d] += a[i]*(0.5*label[i]*W[i*num_kernels+d]); 01717 } 01718 #endif 01719 01720 if (callback) 01721 mkl_converged=callback(mkl, sumw, suma); 01722 01723 01724 const float64_t* new_beta = kernel->get_subkernel_weights(num_kernels); 01725 01726 // update lin 01727 #ifdef HAVE_LAPACK 01728 cblas_dgemv(CblasColMajor, CblasTrans, nk, (int) num, 1.0, (double*) W, 01729 nk, (double*) new_beta, 1, 0.0, (double*) lin, 1); 01730 #else 01731 for (int32_t i=0; i<num; i++) 01732 lin[i]=0 ; 01733 for (int32_t d=0; d<num_kernels; d++) 01734 if (new_beta[d]!=0) 01735 for (int32_t i=0; i<num; i++) 01736 lin[i] += new_beta[d]*W[i*num_kernels+d] ; 01737 #endif 01738 01739 SG_FREE(sumw); 01740 } 01741 01742 01743 /*************************** Working set selection ***************************/ 01744 01745 int32_t CSVMLight::select_next_qp_subproblem_grad( 01746 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01747 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, 01748 int32_t *working2dnum, float64_t *selcrit, int32_t *select, 01749 int32_t cache_only, int32_t *key, int32_t *chosen) 01750 /* Use the feasible direction approach to select the next 01751 qp-subproblem (see chapter 'Selecting a good working set'). If 01752 'cache_only' is true, then the variables are selected only among 01753 those for which the kernel evaluations are cached. */ 01754 { 01755 int32_t choosenum,i,j,k,activedoc,inum,valid; 01756 float64_t s; 01757 01758 for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ 01759 choosenum=0; 01760 activedoc=0; 01761 for (i=0;(j=active2dnum[i])>=0;i++) { 01762 s=-label[j]; 01763 if(cache_only) 01764 { 01765 if (use_kernel_cache) 01766 valid=(kernel->kernel_cache_check(j)); 01767 else 01768 valid = 1 ; 01769 } 01770 else 01771 valid=1; 01772 if(valid 01773 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01774 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01775 && (s>0))) 01776 && (!chosen[j]) 01777 && (label[j]) 01778 && (!inconsistent[j])) 01779 { 01780 selcrit[activedoc]=(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]); 01781 key[activedoc]=j; 01782 activedoc++; 01783 } 01784 } 01785 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01786 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { 01787 i=key[select[k]]; 01788 chosen[i]=1; 01789 working2dnum[inum+choosenum]=i; 01790 choosenum+=1; 01791 if (use_kernel_cache) 01792 kernel->kernel_cache_touch(i); 01793 /* make sure it does not get kicked */ 01794 /* out of cache */ 01795 } 01796 01797 activedoc=0; 01798 for (i=0;(j=active2dnum[i])>=0;i++) { 01799 s=label[j]; 01800 if(cache_only) 01801 { 01802 if (use_kernel_cache) 01803 valid=(kernel->kernel_cache_check(j)); 01804 else 01805 valid = 1 ; 01806 } 01807 else 01808 valid=1; 01809 if(valid 01810 && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01811 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01812 && (s>0))) 01813 && (!chosen[j]) 01814 && (label[j]) 01815 && (!inconsistent[j])) 01816 { 01817 selcrit[activedoc]=-(float64_t)label[j]*(learn_parm->eps[j]-(float64_t)label[j]*c[j]+(float64_t)label[j]*lin[j]); 01818 /* selcrit[activedoc]=-(float64_t)(label[j]*(-1.0+(float64_t)label[j]*lin[j])); */ 01819 key[activedoc]=j; 01820 activedoc++; 01821 } 01822 } 01823 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01824 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { 01825 i=key[select[k]]; 01826 chosen[i]=1; 01827 working2dnum[inum+choosenum]=i; 01828 choosenum+=1; 01829 if (use_kernel_cache) 01830 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01831 /* out of cache */ 01832 } 01833 working2dnum[inum+choosenum]=-1; /* complete index */ 01834 return(choosenum); 01835 } 01836 01837 int32_t CSVMLight::select_next_qp_subproblem_rand( 01838 int32_t* label, float64_t *a, float64_t *lin, float64_t *c, int32_t totdoc, 01839 int32_t qp_size, int32_t *inconsistent, int32_t *active2dnum, 01840 int32_t *working2dnum, float64_t *selcrit, int32_t *select, int32_t *key, 01841 int32_t *chosen, int32_t iteration) 01842 /* Use the feasible direction approach to select the next 01843 qp-subproblem (see section 'Selecting a good working set'). Chooses 01844 a feasible direction at (pseudo) random to help jump over numerical 01845 problem. */ 01846 { 01847 int32_t choosenum,i,j,k,activedoc,inum; 01848 float64_t s; 01849 01850 for (inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ 01851 choosenum=0; 01852 activedoc=0; 01853 for (i=0;(j=active2dnum[i])>=0;i++) { 01854 s=-label[j]; 01855 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01856 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01857 && (s>0))) 01858 && (!inconsistent[j]) 01859 && (label[j]) 01860 && (!chosen[j])) { 01861 selcrit[activedoc]=(j+iteration) % totdoc; 01862 key[activedoc]=j; 01863 activedoc++; 01864 } 01865 } 01866 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01867 for (k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k<activedoc);k++) { 01868 i=key[select[k]]; 01869 chosen[i]=1; 01870 working2dnum[inum+choosenum]=i; 01871 choosenum+=1; 01872 if (use_kernel_cache) 01873 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01874 /* out of cache */ 01875 } 01876 01877 activedoc=0; 01878 for (i=0;(j=active2dnum[i])>=0;i++) { 01879 s=label[j]; 01880 if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) 01881 && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) 01882 && (s>0))) 01883 && (!inconsistent[j]) 01884 && (label[j]) 01885 && (!chosen[j])) { 01886 selcrit[activedoc]=(j+iteration) % totdoc; 01887 key[activedoc]=j; 01888 activedoc++; 01889 } 01890 } 01891 select_top_n(selcrit,activedoc,select,(int32_t)(qp_size/2)); 01892 for (k=0;(choosenum<qp_size) && (k<(qp_size/2)) && (k<activedoc);k++) { 01893 i=key[select[k]]; 01894 chosen[i]=1; 01895 working2dnum[inum+choosenum]=i; 01896 choosenum+=1; 01897 if (use_kernel_cache) 01898 kernel->kernel_cache_touch(i); /* make sure it does not get kicked */ 01899 /* out of cache */ 01900 } 01901 working2dnum[inum+choosenum]=-1; /* complete index */ 01902 return(choosenum); 01903 } 01904 01905 01906 01907 void CSVMLight::select_top_n( 01908 float64_t *selcrit, int32_t range, int32_t* select, int32_t n) 01909 { 01910 register int32_t i,j; 01911 01912 for (i=0;(i<n) && (i<range);i++) { /* Initialize with the first n elements */ 01913 for (j=i;j>=0;j--) { 01914 if((j>0) && (selcrit[select[j-1]]<selcrit[i])){ 01915 select[j]=select[j-1]; 01916 } 01917 else { 01918 select[j]=i; 01919 j=-1; 01920 } 01921 } 01922 } 01923 if(n>0) { 01924 for (i=n;i<range;i++) { 01925 if(selcrit[i]>selcrit[select[n-1]]) { 01926 for (j=n-1;j>=0;j--) { 01927 if((j>0) && (selcrit[select[j-1]]<selcrit[i])) { 01928 select[j]=select[j-1]; 01929 } 01930 else { 01931 select[j]=i; 01932 j=-1; 01933 } 01934 } 01935 } 01936 } 01937 } 01938 } 01939 01940 01941 /******************************** Shrinking *********************************/ 01942 01943 void CSVMLight::init_shrink_state( 01944 SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory) 01945 { 01946 int32_t i; 01947 01948 shrink_state->deactnum=0; 01949 shrink_state->active = SG_MALLOC(int32_t, totdoc); 01950 shrink_state->inactive_since = SG_MALLOC(int32_t, totdoc); 01951 shrink_state->a_history = SG_MALLOC(float64_t*, maxhistory); 01952 shrink_state->maxhistory=maxhistory; 01953 shrink_state->last_lin = SG_MALLOC(float64_t, totdoc); 01954 shrink_state->last_a = SG_MALLOC(float64_t, totdoc); 01955 01956 for (i=0;i<totdoc;i++) { 01957 shrink_state->active[i]=1; 01958 shrink_state->inactive_since[i]=0; 01959 shrink_state->last_a[i]=0; 01960 shrink_state->last_lin[i]=0; 01961 } 01962 } 01963 01964 void CSVMLight::shrink_state_cleanup(SHRINK_STATE *shrink_state) 01965 { 01966 SG_FREE(shrink_state->active); 01967 SG_FREE(shrink_state->inactive_since); 01968 if(shrink_state->deactnum > 0) 01969 SG_FREE((shrink_state->a_history[shrink_state->deactnum-1])); 01970 SG_FREE((shrink_state->a_history)); 01971 SG_FREE((shrink_state->last_a)); 01972 SG_FREE((shrink_state->last_lin)); 01973 } 01974 01975 int32_t CSVMLight::shrink_problem( 01976 SHRINK_STATE *shrink_state, int32_t *active2dnum, 01977 int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc, 01978 int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c, 01979 float64_t* lin, int* label) 01980 /* Shrink some variables away. Do the shrinking only if at least 01981 minshrink variables can be removed. */ 01982 { 01983 int32_t i,ii,change,activenum,lastiter; 01984 float64_t *a_old=NULL; 01985 01986 activenum=0; 01987 change=0; 01988 for (ii=0;active2dnum[ii]>=0;ii++) { 01989 i=active2dnum[ii]; 01990 activenum++; 01991 lastiter=last_suboptimal_at[i]; 01992 01993 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 01994 || (inconsistent[i])) { 01995 change++; 01996 } 01997 } 01998 if((change>=minshrink) /* shrink only if sufficiently many candidates */ 01999 && (shrink_state->deactnum<shrink_state->maxhistory)) { /* and enough memory */ 02000 /* Shrink problem by removing those variables which are */ 02001 /* optimal at a bound for a minimum number of iterations */ 02002 if(verbosity>=2) { 02003 SG_INFO( " Shrinking..."); 02004 } 02005 02006 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* non-linear case save alphas */ 02007 02008 a_old=SG_MALLOC(float64_t, totdoc); 02009 shrink_state->a_history[shrink_state->deactnum]=a_old; 02010 for (i=0;i<totdoc;i++) { 02011 a_old[i]=a[i]; 02012 } 02013 } 02014 for (ii=0;active2dnum[ii]>=0;ii++) { 02015 i=active2dnum[ii]; 02016 lastiter=last_suboptimal_at[i]; 02017 if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) 02018 || (inconsistent[i])) { 02019 shrink_state->active[i]=0; 02020 shrink_state->inactive_since[i]=shrink_state->deactnum; 02021 } 02022 } 02023 activenum=compute_index(shrink_state->active,totdoc,active2dnum); 02024 shrink_state->deactnum++; 02025 if(kernel->has_property(KP_LINADD) && get_linadd_enabled()) 02026 shrink_state->deactnum=0; 02027 02028 if(verbosity>=2) { 02029 SG_DONE(); 02030 SG_DEBUG("Number of inactive variables = %ld\n", totdoc-activenum); 02031 } 02032 } 02033 return(activenum); 02034 } 02035 02036 void* CSVMLight::reactivate_inactive_examples_linadd_helper(void* p) 02037 { 02038 S_THREAD_PARAM_REACTIVATE_LINADD* params = (S_THREAD_PARAM_REACTIVATE_LINADD*) p; 02039 02040 CKernel* k = params->kernel; 02041 float64_t* lin = params->lin; 02042 float64_t* last_lin = params->last_lin; 02043 int32_t* active = params->active; 02044 int32_t* docs = params->docs; 02045 int32_t start = params->start; 02046 int32_t end = params->end; 02047 02048 for (int32_t i=start;i<end;i++) 02049 { 02050 if (!active[i]) 02051 lin[i] = last_lin[i]+k->compute_optimized(docs[i]); 02052 02053 last_lin[i]=lin[i]; 02054 } 02055 02056 return NULL; 02057 } 02058 02059 void* CSVMLight::reactivate_inactive_examples_vanilla_helper(void* p) 02060 { 02061 S_THREAD_PARAM_REACTIVATE_VANILLA* params = (S_THREAD_PARAM_REACTIVATE_VANILLA*) p; 02062 ASSERT(params); 02063 ASSERT(params->kernel); 02064 ASSERT(params->lin); 02065 ASSERT(params->aicache); 02066 ASSERT(params->a); 02067 ASSERT(params->a_old); 02068 ASSERT(params->changed2dnum); 02069 ASSERT(params->inactive2dnum); 02070 ASSERT(params->label); 02071 02072 CKernel* k = params->kernel; 02073 float64_t* lin = params->lin; 02074 float64_t* aicache = params->aicache; 02075 float64_t* a= params->a; 02076 float64_t* a_old = params->a_old; 02077 int32_t* changed2dnum = params->changed2dnum; 02078 int32_t* inactive2dnum = params->inactive2dnum; 02079 int32_t* label = params->label; 02080 int32_t start = params->start; 02081 int32_t end = params->end; 02082 02083 for (int32_t ii=start;ii<end;ii++) 02084 { 02085 int32_t i=changed2dnum[ii]; 02086 int32_t j=0; 02087 ASSERT(i>=0); 02088 02089 k->get_kernel_row(i,inactive2dnum,aicache); 02090 for (int32_t jj=0;(j=inactive2dnum[jj])>=0;jj++) 02091 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 02092 } 02093 return NULL; 02094 } 02095 02096 void CSVMLight::reactivate_inactive_examples( 02097 int32_t* label, float64_t *a, SHRINK_STATE *shrink_state, float64_t *lin, 02098 float64_t *c, int32_t totdoc, int32_t iteration, int32_t *inconsistent, 02099 int32_t* docs, float64_t *aicache, float64_t *maxdiff) 02100 /* Make all variables active again which had been removed by 02101 shrinking. */ 02102 /* Computes lin for those variables from scratch. */ 02103 { 02104 register int32_t i,j,ii,jj,t,*changed2dnum,*inactive2dnum; 02105 int32_t *changed,*inactive; 02106 register float64_t *a_old,dist; 02107 float64_t ex_c,target; 02108 02109 if (kernel->has_property(KP_LINADD) && get_linadd_enabled()) { /* special linear case */ 02110 a_old=shrink_state->last_a; 02111 02112 if (!use_batch_computation || !kernel->has_property(KP_BATCHEVALUATION)) 02113 { 02114 SG_DEBUG( " clear normal - linadd\n"); 02115 kernel->clear_normal(); 02116 02117 int32_t num_modified=0; 02118 for (i=0;i<totdoc;i++) { 02119 if(a[i] != a_old[i]) { 02120 kernel->add_to_normal(docs[i], ((a[i]-a_old[i])*(float64_t)label[i])); 02121 a_old[i]=a[i]; 02122 num_modified++; 02123 } 02124 } 02125 02126 if (num_modified>0) 02127 { 02128 int32_t num_threads=parallel->get_num_threads(); 02129 ASSERT(num_threads>0); 02130 if (num_threads < 2) 02131 { 02132 S_THREAD_PARAM_REACTIVATE_LINADD params; 02133 params.kernel=kernel; 02134 params.lin=lin; 02135 params.last_lin=shrink_state->last_lin; 02136 params.docs=docs; 02137 params.active=shrink_state->active; 02138 params.start=0; 02139 params.end=totdoc; 02140 reactivate_inactive_examples_linadd_helper((void*) ¶ms); 02141 } 02142 #ifdef HAVE_PTHREAD 02143 else 02144 { 02145 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 02146 S_THREAD_PARAM_REACTIVATE_LINADD* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_LINADD, num_threads); 02147 int32_t step= totdoc/num_threads; 02148 02149 for (t=0; t<num_threads-1; t++) 02150 { 02151 params[t].kernel=kernel; 02152 params[t].lin=lin; 02153 params[t].last_lin=shrink_state->last_lin; 02154 params[t].docs=docs; 02155 params[t].active=shrink_state->active; 02156 params[t].start = t*step; 02157 params[t].end = (t+1)*step; 02158 pthread_create(&threads[t], NULL, CSVMLight::reactivate_inactive_examples_linadd_helper, (void*)¶ms[t]); 02159 } 02160 02161 params[t].kernel=kernel; 02162 params[t].lin=lin; 02163 params[t].last_lin=shrink_state->last_lin; 02164 params[t].docs=docs; 02165 params[t].active=shrink_state->active; 02166 params[t].start = t*step; 02167 params[t].end = totdoc; 02168 reactivate_inactive_examples_linadd_helper((void*) ¶ms[t]); 02169 02170 for (t=0; t<num_threads-1; t++) 02171 pthread_join(threads[t], NULL); 02172 02173 SG_FREE(threads); 02174 SG_FREE(params); 02175 } 02176 #endif 02177 02178 } 02179 } 02180 else 02181 { 02182 float64_t *alphas = SG_MALLOC(float64_t, totdoc); 02183 int32_t *idx = SG_MALLOC(int32_t, totdoc); 02184 int32_t num_suppvec=0 ; 02185 02186 for (i=0; i<totdoc; i++) 02187 { 02188 if(a[i] != a_old[i]) 02189 { 02190 alphas[num_suppvec] = (a[i]-a_old[i])*(float64_t)label[i]; 02191 idx[num_suppvec] = i ; 02192 a_old[i] = a[i] ; 02193 num_suppvec++ ; 02194 } 02195 } 02196 02197 if (num_suppvec>0) 02198 { 02199 int32_t num_inactive=0; 02200 int32_t* inactive_idx=SG_MALLOC(int32_t, totdoc); // infact we only need a subset 02201 02202 j=0; 02203 for (i=0;i<totdoc;i++) 02204 { 02205 if(!shrink_state->active[i]) 02206 { 02207 inactive_idx[j++] = i; 02208 num_inactive++; 02209 } 02210 } 02211 02212 if (num_inactive>0) 02213 { 02214 float64_t* dest = SG_MALLOC(float64_t, num_inactive); 02215 memset(dest, 0, sizeof(float64_t)*num_inactive); 02216 02217 kernel->compute_batch(num_inactive, inactive_idx, dest, num_suppvec, idx, alphas); 02218 02219 j=0; 02220 for (i=0;i<totdoc;i++) { 02221 if(!shrink_state->active[i]) { 02222 lin[i] = shrink_state->last_lin[i] + dest[j++] ; 02223 } 02224 shrink_state->last_lin[i]=lin[i]; 02225 } 02226 02227 SG_FREE(dest); 02228 } 02229 else 02230 { 02231 for (i=0;i<totdoc;i++) 02232 shrink_state->last_lin[i]=lin[i]; 02233 } 02234 SG_FREE(inactive_idx); 02235 } 02236 SG_FREE(alphas); 02237 SG_FREE(idx); 02238 } 02239 02240 kernel->delete_optimization(); 02241 } 02242 else 02243 { 02244 changed=SG_MALLOC(int32_t, totdoc); 02245 changed2dnum=SG_MALLOC(int32_t, totdoc+11); 02246 inactive=SG_MALLOC(int32_t, totdoc); 02247 inactive2dnum=SG_MALLOC(int32_t, totdoc+11); 02248 for (t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) 02249 { 02250 if(verbosity>=2) { 02251 SG_INFO( "%ld..",t); 02252 } 02253 a_old=shrink_state->a_history[t]; 02254 for (i=0;i<totdoc;i++) { 02255 inactive[i]=((!shrink_state->active[i]) 02256 && (shrink_state->inactive_since[i] == t)); 02257 changed[i]= (a[i] != a_old[i]); 02258 } 02259 compute_index(inactive,totdoc,inactive2dnum); 02260 compute_index(changed,totdoc,changed2dnum); 02261 02262 02263 //TODO: PUT THIS BACK IN!!!!!!!! int32_t num_threads=parallel->get_num_threads(); 02264 int32_t num_threads=1; 02265 ASSERT(num_threads>0); 02266 02267 if (num_threads < 2) 02268 { 02269 for (ii=0;(i=changed2dnum[ii])>=0;ii++) { 02270 kernel->get_kernel_row(i,inactive2dnum,aicache); 02271 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02272 lin[j]+=(a[i]-a_old[i])*aicache[j]*(float64_t)label[i]; 02273 } 02274 } 02275 #ifdef HAVE_PTHREAD 02276 else 02277 { 02278 //find number of the changed ones 02279 int32_t num_changed=0; 02280 for (ii=0;changed2dnum[ii]>=0;ii++) 02281 num_changed++; 02282 02283 if (num_changed>0) 02284 { 02285 pthread_t* threads= SG_MALLOC(pthread_t, num_threads-1); 02286 S_THREAD_PARAM_REACTIVATE_VANILLA* params = SG_MALLOC(S_THREAD_PARAM_REACTIVATE_VANILLA, num_threads); 02287 int32_t step= num_changed/num_threads; 02288 02289 // alloc num_threads many tmp buffers 02290 float64_t* tmp_lin=SG_MALLOC(float64_t, totdoc*num_threads); 02291 memset(tmp_lin, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads); 02292 float64_t* tmp_aicache=SG_MALLOC(float64_t, totdoc*num_threads); 02293 memset(tmp_aicache, 0, sizeof(float64_t)*((size_t) totdoc)*num_threads); 02294 02295 int32_t thr; 02296 for (thr=0; thr<num_threads-1; thr++) 02297 { 02298 params[thr].kernel=kernel; 02299 params[thr].lin=&tmp_lin[thr*totdoc]; 02300 params[thr].aicache=&tmp_aicache[thr*totdoc]; 02301 params[thr].a=a; 02302 params[thr].a_old=a_old; 02303 params[thr].changed2dnum=changed2dnum; 02304 params[thr].inactive2dnum=inactive2dnum; 02305 params[thr].label=label; 02306 params[thr].start = thr*step; 02307 params[thr].end = (thr+1)*step; 02308 pthread_create(&threads[thr], NULL, CSVMLight::reactivate_inactive_examples_vanilla_helper, (void*)¶ms[thr]); 02309 } 02310 02311 params[thr].kernel=kernel; 02312 params[thr].lin=&tmp_lin[thr*totdoc]; 02313 params[thr].aicache=&tmp_aicache[thr*totdoc]; 02314 params[thr].a=a; 02315 params[thr].a_old=a_old; 02316 params[thr].changed2dnum=changed2dnum; 02317 params[thr].inactive2dnum=inactive2dnum; 02318 params[thr].label=label; 02319 params[thr].start = thr*step; 02320 params[thr].end = num_changed; 02321 reactivate_inactive_examples_vanilla_helper((void*) ¶ms[thr]); 02322 02323 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02324 lin[j]+=tmp_lin[totdoc*thr+j]; 02325 02326 for (thr=0; thr<num_threads-1; thr++) 02327 { 02328 pthread_join(threads[thr], NULL); 02329 02330 //add up results 02331 for (jj=0;(j=inactive2dnum[jj])>=0;jj++) 02332 lin[j]+=tmp_lin[totdoc*thr+j]; 02333 } 02334 02335 SG_FREE(tmp_lin); 02336 SG_FREE(tmp_aicache); 02337 SG_FREE(threads); 02338 SG_FREE(params); 02339 } 02340 } 02341 #endif 02342 } 02343 SG_FREE(changed); 02344 SG_FREE(changed2dnum); 02345 SG_FREE(inactive); 02346 SG_FREE(inactive2dnum); 02347 } 02348 02349 (*maxdiff)=0; 02350 for (i=0;i<totdoc;i++) { 02351 shrink_state->inactive_since[i]=shrink_state->deactnum-1; 02352 if(!inconsistent[i]) { 02353 dist=(lin[i]-model->b)*(float64_t)label[i]; 02354 target=-(learn_parm->eps[i]-(float64_t)label[i]*c[i]); 02355 ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; 02356 if((a[i]>learn_parm->epsilon_a) && (dist > target)) { 02357 if((dist-target)>(*maxdiff)) /* largest violation */ 02358 (*maxdiff)=dist-target; 02359 } 02360 else if((a[i]<ex_c) && (dist < target)) { 02361 if((target-dist)>(*maxdiff)) /* largest violation */ 02362 (*maxdiff)=target-dist; 02363 } 02364 if((a[i]>(0+learn_parm->epsilon_a)) 02365 && (a[i]<ex_c)) { 02366 shrink_state->active[i]=1; /* not at bound */ 02367 } 02368 else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { 02369 shrink_state->active[i]=1; 02370 } 02371 else if((a[i]>=ex_c) 02372 && (dist > (target-learn_parm->epsilon_shrink))) { 02373 shrink_state->active[i]=1; 02374 } 02375 else if(learn_parm->sharedslack) { /* make all active when sharedslack */ 02376 shrink_state->active[i]=1; 02377 } 02378 } 02379 } 02380 02381 if (!(kernel->has_property(KP_LINADD) && get_linadd_enabled())) { /* update history for non-linear */ 02382 for (i=0;i<totdoc;i++) { 02383 (shrink_state->a_history[shrink_state->deactnum-1])[i]=a[i]; 02384 } 02385 for (t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { 02386 SG_FREE(shrink_state->a_history[t]); 02387 shrink_state->a_history[t]=0; 02388 } 02389 } 02390 } 02391 02392 02393 02394 /* start the optimizer and return the optimal values */ 02395 float64_t* CSVMLight::optimize_qp( 02396 QP *qp, float64_t *epsilon_crit, int32_t nx, float64_t *threshold, 02397 int32_t& svm_maxqpsize) 02398 { 02399 register int32_t i, j, result; 02400 float64_t margin, obj_before, obj_after; 02401 float64_t sigdig, dist, epsilon_loqo; 02402 int32_t iter; 02403 02404 if(!primal) { /* allocate memory at first call */ 02405 primal=SG_MALLOC(float64_t, nx*3); 02406 dual=SG_MALLOC(float64_t, nx*2+1); 02407 } 02408 02409 obj_before=0; /* calculate objective before optimization */ 02410 for(i=0;i<qp->opt_n;i++) { 02411 obj_before+=(qp->opt_g0[i]*qp->opt_xinit[i]); 02412 obj_before+=(0.5*qp->opt_xinit[i]*qp->opt_xinit[i]*qp->opt_g[i*qp->opt_n+i]); 02413 for(j=0;j<i;j++) { 02414 obj_before+=(qp->opt_xinit[j]*qp->opt_xinit[i]*qp->opt_g[j*qp->opt_n+i]); 02415 } 02416 } 02417 02418 result=STILL_RUNNING; 02419 qp->opt_ce0[0]*=(-1.0); 02420 /* Run pr_loqo. If a run fails, try again with parameters which lead */ 02421 /* to a slower, but more robust setting. */ 02422 for(margin=init_margin,iter=init_iter; 02423 (margin<=0.9999999) && (result!=OPTIMAL_SOLUTION);) { 02424 02425 opt_precision=CMath::max(opt_precision, DEF_PRECISION); 02426 sigdig=-log10(opt_precision); 02427 02428 result=pr_loqo((int32_t)qp->opt_n,(int32_t)qp->opt_m, 02429 (float64_t *)qp->opt_g0,(float64_t *)qp->opt_g, 02430 (float64_t *)qp->opt_ce,(float64_t *)qp->opt_ce0, 02431 (float64_t *)qp->opt_low,(float64_t *)qp->opt_up, 02432 (float64_t *)primal,(float64_t *)dual, 02433 (int32_t)(verbosity-2), 02434 (float64_t)sigdig,(int32_t)iter, 02435 (float64_t)margin,(float64_t)(qp->opt_up[0])/4.0,(int32_t)0); 02436 02437 if(CMath::is_nan(dual[0])) { /* check for choldc problem */ 02438 if(verbosity>=2) { 02439 SG_SDEBUG("Restarting PR_LOQO with more conservative parameters.\n"); 02440 } 02441 if(init_margin<0.80) { /* become more conservative in general */ 02442 init_margin=(4.0*margin+1.0)/5.0; 02443 } 02444 margin=(margin+1.0)/2.0; 02445 (opt_precision)*=10.0; /* reduce precision */ 02446 if(verbosity>=2) { 02447 SG_SDEBUG("Reducing precision of PR_LOQO.\n"); 02448 } 02449 } 02450 else if(result!=OPTIMAL_SOLUTION) { 02451 iter+=2000; 02452 init_iter+=10; 02453 (opt_precision)*=10.0; /* reduce precision */ 02454 if(verbosity>=2) { 02455 SG_SDEBUG("Reducing precision of PR_LOQO due to (%ld).\n",result); 02456 } 02457 } 02458 } 02459 02460 if(qp->opt_m) /* Thanks to Alex Smola for this hint */ 02461 model_b=dual[0]; 02462 else 02463 model_b=0; 02464 02465 /* Check the precision of the alphas. If results of current optimization */ 02466 /* violate KT-Conditions, relax the epsilon on the bounds on alphas. */ 02467 epsilon_loqo=1E-10; 02468 for(i=0;i<qp->opt_n;i++) { 02469 dist=-model_b*qp->opt_ce[i]; 02470 dist+=(qp->opt_g0[i]+1.0); 02471 for(j=0;j<i;j++) { 02472 dist+=(primal[j]*qp->opt_g[j*qp->opt_n+i]); 02473 } 02474 for(j=i;j<qp->opt_n;j++) { 02475 dist+=(primal[j]*qp->opt_g[i*qp->opt_n+j]); 02476 } 02477 /* SG_SDEBUG("LOQO: a[%d]=%f, dist=%f, b=%f\n",i,primal[i],dist,dual[0]); */ 02478 if((primal[i]<(qp->opt_up[i]-epsilon_loqo)) && (dist < (1.0-(*epsilon_crit)))) { 02479 epsilon_loqo=(qp->opt_up[i]-primal[i])*2.0; 02480 } 02481 else if((primal[i]>(0+epsilon_loqo)) && (dist > (1.0+(*epsilon_crit)))) { 02482 epsilon_loqo=primal[i]*2.0; 02483 } 02484 } 02485 02486 for(i=0;i<qp->opt_n;i++) { /* clip alphas to bounds */ 02487 if(primal[i]<=(0+epsilon_loqo)) { 02488 primal[i]=0; 02489 } 02490 else if(primal[i]>=(qp->opt_up[i]-epsilon_loqo)) { 02491 primal[i]=qp->opt_up[i]; 02492 } 02493 } 02494 02495 obj_after=0; /* calculate objective after optimization */ 02496 for(i=0;i<qp->opt_n;i++) { 02497 obj_after+=(qp->opt_g0[i]*primal[i]); 02498 obj_after+=(0.5*primal[i]*primal[i]*qp->opt_g[i*qp->opt_n+i]); 02499 for(j=0;j<i;j++) { 02500 obj_after+=(primal[j]*primal[i]*qp->opt_g[j*qp->opt_n+i]); 02501 } 02502 } 02503 02504 /* if optimizer returned NAN values, reset and retry with smaller */ 02505 /* working set. */ 02506 if(CMath::is_nan(obj_after) || CMath::is_nan(model_b)) { 02507 for(i=0;i<qp->opt_n;i++) { 02508 primal[i]=qp->opt_xinit[i]; 02509 } 02510 model_b=0; 02511 if(svm_maxqpsize>2) { 02512 svm_maxqpsize--; /* decrease size of qp-subproblems */ 02513 } 02514 } 02515 02516 if(obj_after >= obj_before) { /* check whether there was progress */ 02517 (opt_precision)/=100.0; 02518 precision_violations++; 02519 if(verbosity>=2) { 02520 SG_SDEBUG("Increasing Precision of PR_LOQO.\n"); 02521 } 02522 } 02523 02524 // TODO: add parameter for this (e.g. for MTL experiments) 02525 if(precision_violations > 5000) { 02526 (*epsilon_crit)*=10.0; 02527 precision_violations=0; 02528 SG_SINFO("Relaxing epsilon on KT-Conditions.\n"); 02529 } 02530 02531 (*threshold)=model_b; 02532 02533 if(result!=OPTIMAL_SOLUTION) { 02534 SG_SERROR("PR_LOQO did not converge.\n"); 02535 return(qp->opt_xinit); 02536 } 02537 else { 02538 return(primal); 02539 } 02540 } 02541 02542 02543 #endif //USE_SVMLIGHT