SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2004-2009 Soeren Sonnenburg, Gunnar Raetsch 00008 * Alexander Zien, Marius Kloft, Chen Guohua 00009 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 * Copyright (C) 2010 Ryota Tomioka (University of Tokyo) 00011 */ 00012 00013 #include <list> 00014 #include <shogun/lib/Signal.h> 00015 #include <shogun/classifier/mkl/MKL.h> 00016 #include <shogun/classifier/svm/LibSVM.h> 00017 #include <shogun/kernel/CombinedKernel.h> 00018 00019 using namespace shogun; 00020 00021 CMKL::CMKL(CSVM* s) : CSVM(), svm(NULL), C_mkl(0), mkl_norm(1), ent_lambda(0), 00022 mkl_block_norm(1),beta_local(NULL), mkl_iterations(0), mkl_epsilon(1e-5), 00023 interleaved_optimization(true), w_gap(1.0), rho(0) 00024 { 00025 set_constraint_generator(s); 00026 #ifdef USE_CPLEX 00027 lp_cplex = NULL ; 00028 env = NULL ; 00029 #endif 00030 00031 #ifdef USE_GLPK 00032 lp_glpk = NULL; 00033 #endif 00034 00035 SG_DEBUG("creating MKL object %p\n", this); 00036 lp_initialized = false ; 00037 } 00038 00039 CMKL::~CMKL() 00040 { 00041 // -- Delete beta_local for ElasticnetMKL 00042 SG_FREE(beta_local); 00043 00044 SG_DEBUG("deleting MKL object %p\n", this); 00045 if (svm) 00046 svm->set_callback_function(NULL, NULL); 00047 SG_UNREF(svm); 00048 } 00049 00050 void CMKL::init_solver() 00051 { 00052 #ifdef USE_CPLEX 00053 cleanup_cplex(); 00054 00055 if (get_solver_type()==ST_CPLEX) 00056 init_cplex(); 00057 #endif 00058 00059 #ifdef USE_GLPK 00060 cleanup_glpk(); 00061 00062 if (get_solver_type() == ST_GLPK) 00063 init_glpk(); 00064 #endif 00065 } 00066 00067 #ifdef USE_CPLEX 00068 bool CMKL::init_cplex() 00069 { 00070 while (env==NULL) 00071 { 00072 SG_INFO( "trying to initialize CPLEX\n") ; 00073 00074 int status = 0; // calling external lib 00075 env = CPXopenCPLEX (&status); 00076 00077 if ( env == NULL ) 00078 { 00079 char errmsg[1024]; 00080 SG_WARNING( "Could not open CPLEX environment.\n"); 00081 CPXgeterrorstring (env, status, errmsg); 00082 SG_WARNING( "%s", errmsg); 00083 SG_WARNING( "retrying in 60 seconds\n"); 00084 sleep(60); 00085 } 00086 else 00087 { 00088 // select dual simplex based optimization 00089 status = CPXsetintparam (env, CPX_PARAM_LPMETHOD, CPX_ALG_DUAL); 00090 if ( status ) 00091 { 00092 SG_ERROR( "Failure to select dual lp optimization, error %d.\n", status); 00093 } 00094 else 00095 { 00096 status = CPXsetintparam (env, CPX_PARAM_DATACHECK, CPX_ON); 00097 if ( status ) 00098 { 00099 SG_ERROR( "Failure to turn on data checking, error %d.\n", status); 00100 } 00101 else 00102 { 00103 lp_cplex = CPXcreateprob (env, &status, "light"); 00104 00105 if ( lp_cplex == NULL ) 00106 SG_ERROR( "Failed to create LP.\n"); 00107 else 00108 CPXchgobjsen (env, lp_cplex, CPX_MIN); /* Problem is minimization */ 00109 } 00110 } 00111 } 00112 } 00113 00114 return (lp_cplex != NULL) && (env != NULL); 00115 } 00116 00117 bool CMKL::cleanup_cplex() 00118 { 00119 bool result=false; 00120 00121 if (lp_cplex) 00122 { 00123 int32_t status = CPXfreeprob(env, &lp_cplex); 00124 lp_cplex = NULL; 00125 lp_initialized = false; 00126 00127 if (status) 00128 SG_WARNING( "CPXfreeprob failed, error code %d.\n", status); 00129 else 00130 result = true; 00131 } 00132 00133 if (env) 00134 { 00135 int32_t status = CPXcloseCPLEX (&env); 00136 env=NULL; 00137 00138 if (status) 00139 { 00140 char errmsg[1024]; 00141 SG_WARNING( "Could not close CPLEX environment.\n"); 00142 CPXgeterrorstring (env, status, errmsg); 00143 SG_WARNING( "%s", errmsg); 00144 } 00145 else 00146 result = true; 00147 } 00148 return result; 00149 } 00150 #endif 00151 00152 #ifdef USE_GLPK 00153 bool CMKL::init_glpk() 00154 { 00155 lp_glpk = lpx_create_prob(); 00156 lpx_set_obj_dir(lp_glpk, LPX_MIN); 00157 lpx_set_int_parm(lp_glpk, LPX_K_DUAL, GLP_ON ); 00158 lpx_set_int_parm(lp_glpk, LPX_K_PRESOL, GLP_ON ); 00159 00160 glp_term_out(GLP_OFF); 00161 return (lp_glpk != NULL); 00162 } 00163 00164 bool CMKL::cleanup_glpk() 00165 { 00166 lp_initialized = false; 00167 if (lp_glpk) 00168 lpx_delete_prob(lp_glpk); 00169 lp_glpk = NULL; 00170 return true; 00171 } 00172 00173 bool CMKL::check_lpx_status(LPX *lp) 00174 { 00175 int status = lpx_get_status(lp); 00176 00177 if (status==LPX_INFEAS) 00178 { 00179 SG_PRINT("solution is infeasible!\n"); 00180 return false; 00181 } 00182 else if(status==LPX_NOFEAS) 00183 { 00184 SG_PRINT("problem has no feasible solution!\n"); 00185 return false; 00186 } 00187 return true; 00188 } 00189 #endif // USE_GLPK 00190 00191 bool CMKL::train_machine(CFeatures* data) 00192 { 00193 ASSERT(kernel); 00194 ASSERT(m_labels && m_labels->get_num_labels()); 00195 00196 if (data) 00197 { 00198 if (m_labels->get_num_labels() != data->get_num_vectors()) 00199 { 00200 SG_ERROR("%s::train_machine(): Number of training vectors (%d) does" 00201 " not match number of labels (%d)\n", get_name(), 00202 data->get_num_vectors(), m_labels->get_num_labels()); 00203 } 00204 kernel->init(data, data); 00205 } 00206 00207 init_training(); 00208 if (!svm) 00209 SG_ERROR("No constraint generator (SVM) set\n"); 00210 00211 int32_t num_label=0; 00212 if (m_labels) 00213 num_label = m_labels->get_num_labels(); 00214 00215 SG_INFO("%d trainlabels (%ld)\n", num_label, m_labels); 00216 if (mkl_epsilon<=0) 00217 mkl_epsilon=1e-2 ; 00218 00219 SG_INFO("mkl_epsilon = %1.1e\n", mkl_epsilon) ; 00220 SG_INFO("C_mkl = %1.1e\n", C_mkl) ; 00221 SG_INFO("mkl_norm = %1.3e\n", mkl_norm); 00222 SG_INFO("solver = %d\n", get_solver_type()); 00223 SG_INFO("ent_lambda = %f\n", ent_lambda); 00224 SG_INFO("mkl_block_norm = %f\n", mkl_block_norm); 00225 00226 int32_t num_weights = -1; 00227 int32_t num_kernels = kernel->get_num_subkernels(); 00228 SG_INFO("num_kernels = %d\n", num_kernels); 00229 const float64_t* beta_const = kernel->get_subkernel_weights(num_weights); 00230 float64_t* beta = SGVector<float64_t>::clone_vector(beta_const, num_weights); 00231 ASSERT(num_weights==num_kernels); 00232 00233 if (get_solver_type()==ST_BLOCK_NORM && 00234 mkl_block_norm>=1 && 00235 mkl_block_norm<=2) 00236 { 00237 mkl_norm=mkl_block_norm/(2-mkl_block_norm); 00238 SG_WARNING("Switching to ST_DIRECT method with mkl_norm=%g\n", mkl_norm); 00239 set_solver_type(ST_DIRECT); 00240 } 00241 00242 if (get_solver_type()==ST_ELASTICNET) 00243 { 00244 // -- Initialize subkernel weights for Elasticnet MKL 00245 SGVector<float64_t>::scale_vector(1/SGVector<float64_t>::qnorm(beta, num_kernels, 1.0), beta, num_kernels); 00246 00247 SG_FREE(beta_local); 00248 beta_local = SGVector<float64_t>::clone_vector(beta, num_kernels); 00249 00250 elasticnet_transform(beta, ent_lambda, num_kernels); 00251 } 00252 else 00253 { 00254 SGVector<float64_t>::scale_vector(1/SGVector<float64_t>::qnorm(beta, num_kernels, mkl_norm), 00255 beta, num_kernels); //q-norm = 1 00256 } 00257 00258 kernel->set_subkernel_weights(SGVector<float64_t>(beta, num_kernels, false)); 00259 SG_FREE(beta); 00260 00261 svm->set_bias_enabled(get_bias_enabled()); 00262 svm->set_epsilon(get_epsilon()); 00263 svm->set_max_train_time(get_max_train_time()); 00264 svm->set_nu(get_nu()); 00265 svm->set_C(get_C1(), get_C2()); 00266 svm->set_qpsize(get_qpsize()); 00267 svm->set_shrinking_enabled(get_shrinking_enabled()); 00268 svm->set_linadd_enabled(get_linadd_enabled()); 00269 svm->set_batch_computation_enabled(get_batch_computation_enabled()); 00270 svm->set_labels(m_labels); 00271 svm->set_kernel(kernel); 00272 00273 #ifdef USE_CPLEX 00274 cleanup_cplex(); 00275 00276 if (get_solver_type()==ST_CPLEX) 00277 init_cplex(); 00278 #endif 00279 00280 #ifdef USE_GLPK 00281 if (get_solver_type()==ST_GLPK) 00282 init_glpk(); 00283 #endif 00284 00285 mkl_iterations = 0; 00286 CSignal::clear_cancel(); 00287 00288 training_time_clock.start(); 00289 00290 if (interleaved_optimization) 00291 { 00292 if (svm->get_classifier_type() != CT_LIGHT && svm->get_classifier_type() != CT_SVRLIGHT) 00293 { 00294 SG_ERROR("Interleaved MKL optimization is currently " 00295 "only supported with SVMlight\n"); 00296 } 00297 00298 //Note that there is currently no safe way to properly resolve this 00299 //situation: the mkl object hands itself as a reference to the svm which 00300 //in turn increases the reference count of mkl and when done decreases 00301 //the count. Thus we have to protect the mkl object from deletion by 00302 //ref()'ing it before we set the callback function and should also 00303 //unref() it afterwards. However, when the reference count was zero 00304 //before this unref() might actually try to destroy this (crash ahead) 00305 //but if we don't actually unref() the object we might leak memory... 00306 //So as a workaround we only unref when the reference count was >1 00307 //before. 00308 #ifdef USE_REFERENCE_COUNTING 00309 int32_t refs=this->ref(); 00310 #endif 00311 svm->set_callback_function(this, perform_mkl_step_helper); 00312 svm->train(); 00313 SG_DONE(); 00314 svm->set_callback_function(NULL, NULL); 00315 #ifdef USE_REFERENCE_COUNTING 00316 if (refs>1) 00317 this->unref(); 00318 #endif 00319 } 00320 else 00321 { 00322 float64_t* sumw = SG_MALLOC(float64_t, num_kernels); 00323 00324 00325 00326 while (true) 00327 { 00328 svm->train(); 00329 00330 float64_t suma=compute_sum_alpha(); 00331 compute_sum_beta(sumw); 00332 00333 if((training_time_clock.cur_time_diff()>get_max_train_time ())&&(get_max_train_time ()>0)) 00334 { 00335 SG_SWARNING("MKL Algorithm terminates PREMATURELY due to current training time exceeding get_max_train_time ()= %f . It may have not converged yet!\n",get_max_train_time ()); 00336 break; 00337 } 00338 00339 00340 mkl_iterations++; 00341 if (perform_mkl_step(sumw, suma) || CSignal::cancel_computations()) 00342 break; 00343 } 00344 00345 SG_FREE(sumw); 00346 } 00347 #ifdef USE_CPLEX 00348 cleanup_cplex(); 00349 #endif 00350 #ifdef USE_GLPK 00351 cleanup_glpk(); 00352 #endif 00353 00354 int32_t nsv=svm->get_num_support_vectors(); 00355 create_new_model(nsv); 00356 00357 set_bias(svm->get_bias()); 00358 for (int32_t i=0; i<nsv; i++) 00359 { 00360 set_alpha(i, svm->get_alpha(i)); 00361 set_support_vector(i, svm->get_support_vector(i)); 00362 } 00363 return true; 00364 } 00365 00366 00367 void CMKL::set_mkl_norm(float64_t norm) 00368 { 00369 00370 if (norm<1) 00371 SG_ERROR("Norm must be >= 1, e.g., 1-norm is the standard MKL; norms>1 nonsparse MKL\n"); 00372 00373 mkl_norm = norm; 00374 } 00375 00376 void CMKL::set_elasticnet_lambda(float64_t lambda) 00377 { 00378 if (lambda>1 || lambda<0) 00379 SG_ERROR("0<=lambda<=1\n"); 00380 00381 if (lambda==0) 00382 lambda = 1e-6; 00383 else if (lambda==1.0) 00384 lambda = 1.0-1e-6; 00385 00386 ent_lambda=lambda; 00387 } 00388 00389 void CMKL::set_mkl_block_norm(float64_t q) 00390 { 00391 if (q<1) 00392 SG_ERROR("1<=q<=inf\n"); 00393 00394 mkl_block_norm=q; 00395 } 00396 00397 bool CMKL::perform_mkl_step( 00398 const float64_t* sumw, float64_t suma) 00399 { 00400 if((training_time_clock.cur_time_diff()>get_max_train_time ())&&(get_max_train_time ()>0)) 00401 { 00402 SG_SWARNING("MKL Algorithm terminates PREMATURELY due to current training time exceeding get_max_train_time ()= %f . It may have not converged yet!\n",get_max_train_time ()); 00403 return true; 00404 } 00405 00406 int32_t num_kernels = kernel->get_num_subkernels(); 00407 int32_t nweights=0; 00408 const float64_t* old_beta = kernel->get_subkernel_weights(nweights); 00409 ASSERT(nweights==num_kernels); 00410 float64_t* beta = SG_MALLOC(float64_t, num_kernels); 00411 00412 int32_t inner_iters=0; 00413 float64_t mkl_objective=0; 00414 00415 mkl_objective=-suma; 00416 for (int32_t i=0; i<num_kernels; i++) 00417 { 00418 beta[i]=old_beta[i]; 00419 mkl_objective+=old_beta[i]*sumw[i]; 00420 } 00421 00422 w_gap = CMath::abs(1-rho/mkl_objective) ; 00423 00424 if( (w_gap >= mkl_epsilon) || 00425 (get_solver_type()==ST_AUTO || get_solver_type()==ST_NEWTON || get_solver_type()==ST_DIRECT ) || get_solver_type()==ST_ELASTICNET || get_solver_type()==ST_BLOCK_NORM) 00426 { 00427 if (get_solver_type()==ST_AUTO || get_solver_type()==ST_DIRECT) 00428 { 00429 rho=compute_optimal_betas_directly(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00430 } 00431 else if (get_solver_type()==ST_BLOCK_NORM) 00432 { 00433 rho=compute_optimal_betas_block_norm(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00434 } 00435 else if (get_solver_type()==ST_ELASTICNET) 00436 { 00437 // -- Direct update of subkernel weights for ElasticnetMKL 00438 // Note that ElasticnetMKL solves a different optimization 00439 // problem from the rest of the solver types 00440 rho=compute_optimal_betas_elasticnet(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00441 } 00442 else if (get_solver_type()==ST_NEWTON) 00443 rho=compute_optimal_betas_newton(beta, old_beta, num_kernels, sumw, suma, mkl_objective); 00444 #ifdef USE_CPLEX 00445 else if (get_solver_type()==ST_CPLEX) 00446 rho=compute_optimal_betas_via_cplex(beta, old_beta, num_kernels, sumw, suma, inner_iters); 00447 #endif 00448 #ifdef USE_GLPK 00449 else if (get_solver_type()==ST_GLPK) 00450 rho=compute_optimal_betas_via_glpk(beta, old_beta, num_kernels, sumw, suma, inner_iters); 00451 #endif 00452 else 00453 SG_ERROR("Solver type not supported (not compiled in?)\n"); 00454 00455 w_gap = CMath::abs(1-rho/mkl_objective) ; 00456 } 00457 00458 kernel->set_subkernel_weights(SGVector<float64_t>(beta, num_kernels, false)); 00459 SG_FREE(beta); 00460 00461 return converged(); 00462 } 00463 00464 float64_t CMKL::compute_optimal_betas_elasticnet( 00465 float64_t* beta, const float64_t* old_beta, const int32_t num_kernels, 00466 const float64_t* sumw, const float64_t suma, 00467 const float64_t mkl_objective ) 00468 { 00469 const float64_t epsRegul = 0.01; // fraction of root mean squared deviation 00470 float64_t obj; 00471 float64_t Z; 00472 float64_t preR; 00473 int32_t p; 00474 int32_t nofKernelsGood; 00475 00476 // --- optimal beta 00477 nofKernelsGood = num_kernels; 00478 00479 Z = 0; 00480 for (p=0; p<num_kernels; ++p ) 00481 { 00482 if (sumw[p] >= 0.0 && old_beta[p] >= 0.0 ) 00483 { 00484 beta[p] = CMath::sqrt(sumw[p]*old_beta[p]*old_beta[p]); 00485 Z += beta[p]; 00486 } 00487 else 00488 { 00489 beta[p] = 0.0; 00490 --nofKernelsGood; 00491 } 00492 ASSERT( beta[p] >= 0 ); 00493 } 00494 00495 // --- normalize 00496 SGVector<float64_t>::scale_vector(1.0/Z, beta, num_kernels); 00497 00498 // --- regularize & renormalize 00499 00500 preR = 0.0; 00501 for( p=0; p<num_kernels; ++p ) 00502 preR += CMath::pow( beta_local[p] - beta[p], 2.0 ); 00503 const float64_t R = CMath::sqrt( preR ) * epsRegul; 00504 if( !( R >= 0 ) ) 00505 { 00506 SG_PRINT( "MKL-direct: p = %.3f\n", 1.0 ); 00507 SG_PRINT( "MKL-direct: nofKernelsGood = %d\n", nofKernelsGood ); 00508 SG_PRINT( "MKL-direct: Z = %e\n", Z ); 00509 SG_PRINT( "MKL-direct: eps = %e\n", epsRegul ); 00510 for( p=0; p<num_kernels; ++p ) 00511 { 00512 const float64_t t = CMath::pow( beta_local[p] - beta[p], 2.0 ); 00513 SG_PRINT( "MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, beta_local[p]-beta[p], beta_local[p], beta[p] ); 00514 } 00515 SG_PRINT( "MKL-direct: preR = %e\n", preR ); 00516 SG_PRINT( "MKL-direct: preR/p = %e\n", preR ); 00517 SG_PRINT( "MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR) ); 00518 SG_PRINT( "MKL-direct: R = %e\n", R ); 00519 SG_ERROR( "Assertion R >= 0 failed!\n" ); 00520 } 00521 00522 Z = 0.0; 00523 for( p=0; p<num_kernels; ++p ) 00524 { 00525 beta[p] += R; 00526 Z += beta[p]; 00527 ASSERT( beta[p] >= 0 ); 00528 } 00529 Z = CMath::pow( Z, -1.0 ); 00530 ASSERT( Z >= 0 ); 00531 for( p=0; p<num_kernels; ++p ) 00532 { 00533 beta[p] *= Z; 00534 ASSERT( beta[p] >= 0.0 ); 00535 if (beta[p] > 1.0 ) 00536 beta[p] = 1.0; 00537 } 00538 00539 // --- copy beta into beta_local 00540 for( p=0; p<num_kernels; ++p ) 00541 beta_local[p] = beta[p]; 00542 00543 // --- elastic-net transform 00544 elasticnet_transform(beta, ent_lambda, num_kernels); 00545 00546 // --- objective 00547 obj = -suma; 00548 for (p=0; p<num_kernels; ++p ) 00549 { 00550 //obj += sumw[p] * old_beta[p]*old_beta[p] / beta[p]; 00551 obj += sumw[p] * beta[p]; 00552 } 00553 return obj; 00554 } 00555 00556 void CMKL::elasticnet_dual(float64_t *ff, float64_t *gg, float64_t *hh, 00557 const float64_t &del, const float64_t* nm, int32_t len, 00558 const float64_t &lambda) 00559 { 00560 std::list<int32_t> I; 00561 float64_t gam = 1.0-lambda; 00562 for (int32_t i=0; i<len;i++) 00563 { 00564 if (nm[i]>=CMath::sqrt(2*del*gam)) 00565 I.push_back(i); 00566 } 00567 int32_t M=I.size(); 00568 00569 *ff=del; 00570 *gg=-(M*gam+lambda); 00571 *hh=0; 00572 for (std::list<int32_t>::iterator it=I.begin(); it!=I.end(); it++) 00573 { 00574 float64_t nmit = nm[*it]; 00575 00576 *ff += del*gam*CMath::pow(nmit/CMath::sqrt(2*del*gam)-1,2)/lambda; 00577 *gg += CMath::sqrt(gam/(2*del))*nmit; 00578 *hh += -0.5*CMath::sqrt(gam/(2*CMath::pow(del,3)))*nmit; 00579 } 00580 } 00581 00582 // assumes that all constraints are satisfied 00583 float64_t CMKL::compute_elasticnet_dual_objective() 00584 { 00585 int32_t n=get_num_support_vectors(); 00586 int32_t num_kernels = kernel->get_num_subkernels(); 00587 float64_t mkl_obj=0; 00588 00589 if (m_labels && kernel && kernel->get_kernel_type() == K_COMBINED) 00590 { 00591 // Compute Elastic-net dual 00592 float64_t* nm = SG_MALLOC(float64_t, num_kernels); 00593 float64_t del=0; 00594 CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel(); 00595 00596 int32_t k=0; 00597 while (kn) 00598 { 00599 float64_t sum=0; 00600 for (int32_t i=0; i<n; i++) 00601 { 00602 int32_t ii=get_support_vector(i); 00603 00604 for (int32_t j=0; j<n; j++) 00605 { 00606 int32_t jj=get_support_vector(j); 00607 sum+=get_alpha(i)*get_alpha(j)*kn->kernel(ii,jj); 00608 } 00609 } 00610 nm[k]= CMath::pow(sum, 0.5); 00611 del = CMath::max(del, nm[k]); 00612 00613 // SG_PRINT("nm[%d]=%f\n",k,nm[k]); 00614 k++; 00615 00616 00617 SG_UNREF(kn); 00618 kn = ((CCombinedKernel*) kernel)->get_next_kernel(); 00619 } 00620 // initial delta 00621 del = del/CMath::sqrt(2*(1-ent_lambda)); 00622 00623 // Newton's method to optimize delta 00624 k=0; 00625 float64_t ff, gg, hh; 00626 elasticnet_dual(&ff, &gg, &hh, del, nm, num_kernels, ent_lambda); 00627 while (CMath::abs(gg)>+1e-8 && k<100) 00628 { 00629 float64_t ff_old = ff; 00630 float64_t gg_old = gg; 00631 float64_t del_old = del; 00632 00633 // SG_PRINT("[%d] fval=%f gg=%f del=%f\n", k, ff, gg, del); 00634 00635 float64_t step=1.0; 00636 do 00637 { 00638 del=del_old*CMath::exp(-step*gg/(hh*del+gg)); 00639 elasticnet_dual(&ff, &gg, &hh, del, nm, num_kernels, ent_lambda); 00640 step/=2; 00641 } while(ff>ff_old+1e-4*gg_old*(del-del_old)); 00642 00643 k++; 00644 } 00645 mkl_obj=-ff; 00646 00647 SG_FREE(nm); 00648 00649 mkl_obj+=compute_sum_alpha(); 00650 00651 } 00652 else 00653 SG_ERROR( "cannot compute objective, labels or kernel not set\n"); 00654 00655 return -mkl_obj; 00656 } 00657 00658 float64_t CMKL::compute_optimal_betas_block_norm( 00659 float64_t* beta, const float64_t* old_beta, const int32_t num_kernels, 00660 const float64_t* sumw, const float64_t suma, 00661 const float64_t mkl_objective ) 00662 { 00663 float64_t obj; 00664 float64_t Z=0; 00665 int32_t p; 00666 00667 // --- optimal beta 00668 for( p=0; p<num_kernels; ++p ) 00669 { 00670 ASSERT(sumw[p]>=0); 00671 00672 beta[p] = CMath::pow( sumw[p], -(2.0-mkl_block_norm)/(2.0-2.0*mkl_block_norm)); 00673 Z+= CMath::pow( sumw[p], -(mkl_block_norm)/(2.0-2.0*mkl_block_norm)); 00674 00675 ASSERT( beta[p] >= 0 ); 00676 } 00677 00678 ASSERT(Z>=0); 00679 00680 Z=1.0/CMath::pow(Z, (2.0-mkl_block_norm)/mkl_block_norm); 00681 00682 for( p=0; p<num_kernels; ++p ) 00683 beta[p] *= Z; 00684 00685 // --- objective 00686 obj = -suma; 00687 for( p=0; p<num_kernels; ++p ) 00688 obj += sumw[p] * beta[p]; 00689 00690 return obj; 00691 } 00692 00693 00694 float64_t CMKL::compute_optimal_betas_directly( 00695 float64_t* beta, const float64_t* old_beta, const int32_t num_kernels, 00696 const float64_t* sumw, const float64_t suma, 00697 const float64_t mkl_objective ) 00698 { 00699 const float64_t epsRegul = 0.01; // fraction of root mean squared deviation 00700 float64_t obj; 00701 float64_t Z; 00702 float64_t preR; 00703 int32_t p; 00704 int32_t nofKernelsGood; 00705 00706 // --- optimal beta 00707 nofKernelsGood = num_kernels; 00708 for( p=0; p<num_kernels; ++p ) 00709 { 00710 //SG_PRINT( "MKL-direct: sumw[%3d] = %e ( oldbeta = %e )\n", p, sumw[p], old_beta[p] ); 00711 if( sumw[p] >= 0.0 && old_beta[p] >= 0.0 ) 00712 { 00713 beta[p] = sumw[p] * old_beta[p]*old_beta[p] / mkl_norm; 00714 beta[p] = CMath::pow( beta[p], 1.0 / (mkl_norm+1.0) ); 00715 } 00716 else 00717 { 00718 beta[p] = 0.0; 00719 --nofKernelsGood; 00720 } 00721 ASSERT( beta[p] >= 0 ); 00722 } 00723 00724 // --- normalize 00725 Z = 0.0; 00726 for( p=0; p<num_kernels; ++p ) 00727 Z += CMath::pow( beta[p], mkl_norm ); 00728 00729 Z = CMath::pow( Z, -1.0/mkl_norm ); 00730 ASSERT( Z >= 0 ); 00731 for( p=0; p<num_kernels; ++p ) 00732 beta[p] *= Z; 00733 00734 // --- regularize & renormalize 00735 preR = 0.0; 00736 for( p=0; p<num_kernels; ++p ) 00737 preR += CMath::sq( old_beta[p] - beta[p]); 00738 00739 const float64_t R = CMath::sqrt( preR / mkl_norm ) * epsRegul; 00740 if( !( R >= 0 ) ) 00741 { 00742 SG_PRINT( "MKL-direct: p = %.3f\n", mkl_norm ); 00743 SG_PRINT( "MKL-direct: nofKernelsGood = %d\n", nofKernelsGood ); 00744 SG_PRINT( "MKL-direct: Z = %e\n", Z ); 00745 SG_PRINT( "MKL-direct: eps = %e\n", epsRegul ); 00746 for( p=0; p<num_kernels; ++p ) 00747 { 00748 const float64_t t = CMath::pow( old_beta[p] - beta[p], 2.0 ); 00749 SG_PRINT( "MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, old_beta[p]-beta[p], old_beta[p], beta[p] ); 00750 } 00751 SG_PRINT( "MKL-direct: preR = %e\n", preR ); 00752 SG_PRINT( "MKL-direct: preR/p = %e\n", preR/mkl_norm ); 00753 SG_PRINT( "MKL-direct: sqrt(preR/p) = %e\n", CMath::sqrt(preR/mkl_norm) ); 00754 SG_PRINT( "MKL-direct: R = %e\n", R ); 00755 SG_ERROR( "Assertion R >= 0 failed!\n" ); 00756 } 00757 00758 Z = 0.0; 00759 for( p=0; p<num_kernels; ++p ) 00760 { 00761 beta[p] += R; 00762 Z += CMath::pow( beta[p], mkl_norm ); 00763 ASSERT( beta[p] >= 0 ); 00764 } 00765 Z = CMath::pow( Z, -1.0/mkl_norm ); 00766 ASSERT( Z >= 0 ); 00767 for( p=0; p<num_kernels; ++p ) 00768 { 00769 beta[p] *= Z; 00770 ASSERT( beta[p] >= 0.0 ); 00771 if( beta[p] > 1.0 ) 00772 beta[p] = 1.0; 00773 } 00774 00775 // --- objective 00776 obj = -suma; 00777 for( p=0; p<num_kernels; ++p ) 00778 obj += sumw[p] * beta[p]; 00779 00780 return obj; 00781 } 00782 00783 float64_t CMKL::compute_optimal_betas_newton(float64_t* beta, 00784 const float64_t* old_beta, int32_t num_kernels, 00785 const float64_t* sumw, float64_t suma, 00786 float64_t mkl_objective) 00787 { 00788 SG_DEBUG("MKL via NEWTON\n"); 00789 00790 if (mkl_norm==1) 00791 SG_ERROR("MKL via NEWTON works only for norms>1\n"); 00792 00793 const double epsBeta = 1e-32; 00794 const double epsGamma = 1e-12; 00795 const double epsWsq = 1e-12; 00796 const double epsNewt = 0.0001; 00797 const double epsStep = 1e-9; 00798 const int nofNewtonSteps = 3; 00799 const double hessRidge = 1e-6; 00800 const int inLogSpace = 0; 00801 00802 const float64_t r = mkl_norm / ( mkl_norm - 1.0 ); 00803 float64_t* newtDir = SG_MALLOC(float64_t, num_kernels ); 00804 float64_t* newtBeta = SG_MALLOC(float64_t, num_kernels ); 00805 //float64_t newtStep; 00806 float64_t stepSize; 00807 float64_t Z; 00808 float64_t obj; 00809 float64_t gamma; 00810 int32_t p; 00811 int i; 00812 00813 // --- check beta 00814 Z = 0.0; 00815 for( p=0; p<num_kernels; ++p ) 00816 { 00817 beta[p] = old_beta[p]; 00818 if( !( beta[p] >= epsBeta ) ) 00819 beta[p] = epsBeta; 00820 00821 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 ); 00822 Z += CMath::pow( beta[p], mkl_norm ); 00823 } 00824 00825 Z = CMath::pow( Z, -1.0/mkl_norm ); 00826 if( !( fabs(Z-1.0) <= epsGamma ) ) 00827 { 00828 SG_WARNING( "old_beta not normalized (diff=%e); forcing normalization. ", Z-1.0 ); 00829 for( p=0; p<num_kernels; ++p ) 00830 { 00831 beta[p] *= Z; 00832 if( beta[p] > 1.0 ) 00833 beta[p] = 1.0; 00834 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 ); 00835 } 00836 } 00837 00838 // --- compute gamma 00839 gamma = 0.0; 00840 for ( p=0; p<num_kernels; ++p ) 00841 { 00842 if ( !( sumw[p] >= 0 ) ) 00843 { 00844 if( !( sumw[p] >= -epsWsq ) ) 00845 SG_WARNING( "sumw[%d] = %e; treated as 0. ", p, sumw[p] ); 00846 // should better recompute sumw[] !!! 00847 } 00848 else 00849 { 00850 ASSERT( sumw[p] >= 0 ); 00851 //gamma += CMath::pow( sumw[p] * beta[p]*beta[p], r ); 00852 gamma += CMath::pow( sumw[p] * beta[p]*beta[p] / mkl_norm, r ); 00853 } 00854 } 00855 gamma = CMath::pow( gamma, 1.0/r ) / mkl_norm; 00856 ASSERT( gamma > -1e-9 ); 00857 if( !( gamma > epsGamma ) ) 00858 { 00859 SG_WARNING( "bad gamma: %e; set to %e. ", gamma, epsGamma ); 00860 // should better recompute sumw[] !!! 00861 gamma = epsGamma; 00862 } 00863 ASSERT( gamma >= epsGamma ); 00864 //gamma = -gamma; 00865 00866 // --- compute objective 00867 obj = 0.0; 00868 for( p=0; p<num_kernels; ++p ) 00869 { 00870 obj += beta[p] * sumw[p]; 00871 //obj += gamma/mkl_norm * CMath::pow( beta[p], mkl_norm ); 00872 } 00873 if( !( obj >= 0.0 ) ) 00874 SG_WARNING( "negative objective: %e. ", obj ); 00875 //SG_PRINT( "OBJ = %e. \n", obj ); 00876 00877 00878 // === perform Newton steps 00879 for (i = 0; i < nofNewtonSteps; ++i ) 00880 { 00881 // --- compute Newton direction (Hessian is diagonal) 00882 const float64_t gqq1 = mkl_norm * (mkl_norm-1.0) * gamma; 00883 // newtStep = 0.0; 00884 for( p=0; p<num_kernels; ++p ) 00885 { 00886 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 ); 00887 //const float halfw2p = ( sumw[p] >= 0.0 ) ? sumw[p] : 0.0; 00888 //const float64_t t1 = halfw2p*beta[p] - mkl_norm*gamma*CMath::pow(beta[p],mkl_norm); 00889 //const float64_t t2 = 2.0*halfw2p + gqq1*CMath::pow(beta[p],mkl_norm-1.0); 00890 const float halfw2p = ( sumw[p] >= 0.0 ) ? (sumw[p]*old_beta[p]*old_beta[p]) : 0.0; 00891 const float64_t t0 = halfw2p*beta[p] - mkl_norm*gamma*CMath::pow(beta[p],mkl_norm+2.0); 00892 const float64_t t1 = ( t0 < 0 ) ? 0.0 : t0; 00893 const float64_t t2 = 2.0*halfw2p + gqq1*CMath::pow(beta[p],mkl_norm+1.0); 00894 if( inLogSpace ) 00895 newtDir[p] = t1 / ( t1 + t2*beta[p] + hessRidge ); 00896 else 00897 newtDir[p] = ( t1 == 0.0 ) ? 0.0 : ( t1 / t2 ); 00898 // newtStep += newtDir[p] * grad[p]; 00899 ASSERT( newtDir[p] == newtDir[p] ); 00900 //SG_PRINT( "newtDir[%d] = %6.3f = %e / %e \n", p, newtDir[p], t1, t2 ); 00901 } 00902 //CMath::display_vector( newtDir, num_kernels, "newton direction " ); 00903 //SG_PRINT( "Newton step size = %e\n", Z ); 00904 00905 // --- line search 00906 stepSize = 1.0; 00907 while( stepSize >= epsStep ) 00908 { 00909 // --- perform Newton step 00910 Z = 0.0; 00911 while( Z == 0.0 ) 00912 { 00913 for( p=0; p<num_kernels; ++p ) 00914 { 00915 if( inLogSpace ) 00916 newtBeta[p] = beta[p] * CMath::exp( + stepSize * newtDir[p] ); 00917 else 00918 newtBeta[p] = beta[p] + stepSize * newtDir[p]; 00919 if( !( newtBeta[p] >= epsBeta ) ) 00920 newtBeta[p] = epsBeta; 00921 Z += CMath::pow( newtBeta[p], mkl_norm ); 00922 } 00923 ASSERT( 0.0 <= Z ); 00924 Z = CMath::pow( Z, -1.0/mkl_norm ); 00925 if( Z == 0.0 ) 00926 stepSize /= 2.0; 00927 } 00928 00929 // --- normalize new beta (wrt p-norm) 00930 ASSERT( 0.0 < Z ); 00931 ASSERT( Z < CMath::INFTY ); 00932 for( p=0; p<num_kernels; ++p ) 00933 { 00934 newtBeta[p] *= Z; 00935 if( newtBeta[p] > 1.0 ) 00936 { 00937 //SG_WARNING( "beta[%d] = %e; set to 1. ", p, beta[p] ); 00938 newtBeta[p] = 1.0; 00939 } 00940 ASSERT( 0.0 <= newtBeta[p] && newtBeta[p] <= 1.0 ); 00941 } 00942 00943 // --- objective increased? 00944 float64_t newtObj; 00945 newtObj = 0.0; 00946 for( p=0; p<num_kernels; ++p ) 00947 newtObj += sumw[p] * old_beta[p]*old_beta[p] / newtBeta[p]; 00948 //SG_PRINT( "step = %.8f: obj = %e -> %e. \n", stepSize, obj, newtObj ); 00949 if ( newtObj < obj - epsNewt*stepSize*obj ) 00950 { 00951 for( p=0; p<num_kernels; ++p ) 00952 beta[p] = newtBeta[p]; 00953 obj = newtObj; 00954 break; 00955 } 00956 stepSize /= 2.0; 00957 00958 } 00959 00960 if( stepSize < epsStep ) 00961 break; 00962 } 00963 SG_FREE(newtDir); 00964 SG_FREE(newtBeta); 00965 00966 // === return new objective 00967 obj = -suma; 00968 for( p=0; p<num_kernels; ++p ) 00969 obj += beta[p] * sumw[p]; 00970 return obj; 00971 } 00972 00973 00974 00975 float64_t CMKL::compute_optimal_betas_via_cplex(float64_t* new_beta, const float64_t* old_beta, int32_t num_kernels, 00976 const float64_t* sumw, float64_t suma, int32_t& inner_iters) 00977 { 00978 SG_DEBUG("MKL via CPLEX\n"); 00979 00980 #ifdef USE_CPLEX 00981 ASSERT(new_beta); 00982 ASSERT(old_beta); 00983 00984 int32_t NUMCOLS = 2*num_kernels + 1; 00985 double* x=SG_MALLOC(double, NUMCOLS); 00986 00987 if (!lp_initialized) 00988 { 00989 SG_INFO( "creating LP\n") ; 00990 00991 double obj[NUMCOLS]; /* calling external lib */ 00992 double lb[NUMCOLS]; /* calling external lib */ 00993 double ub[NUMCOLS]; /* calling external lib */ 00994 00995 for (int32_t i=0; i<2*num_kernels; i++) 00996 { 00997 obj[i]=0 ; 00998 lb[i]=0 ; 00999 ub[i]=1 ; 01000 } 01001 01002 for (int32_t i=num_kernels; i<2*num_kernels; i++) 01003 obj[i]= C_mkl; 01004 01005 obj[2*num_kernels]=1 ; 01006 lb[2*num_kernels]=-CPX_INFBOUND ; 01007 ub[2*num_kernels]=CPX_INFBOUND ; 01008 01009 int status = CPXnewcols (env, lp_cplex, NUMCOLS, obj, lb, ub, NULL, NULL); 01010 if ( status ) { 01011 char errmsg[1024]; 01012 CPXgeterrorstring (env, status, errmsg); 01013 SG_ERROR( "%s", errmsg); 01014 } 01015 01016 // add constraint sum(w)=1; 01017 SG_INFO( "adding the first row\n"); 01018 int initial_rmatbeg[1]; /* calling external lib */ 01019 int initial_rmatind[num_kernels+1]; /* calling external lib */ 01020 double initial_rmatval[num_kernels+1]; /* calling ext lib */ 01021 double initial_rhs[1]; /* calling external lib */ 01022 char initial_sense[1]; 01023 01024 // 1-norm MKL 01025 if (mkl_norm==1) 01026 { 01027 initial_rmatbeg[0] = 0; 01028 initial_rhs[0]=1 ; // rhs=1 ; 01029 initial_sense[0]='E' ; // equality 01030 01031 // sparse matrix 01032 for (int32_t i=0; i<num_kernels; i++) 01033 { 01034 initial_rmatind[i]=i ; //index of non-zero element 01035 initial_rmatval[i]=1 ; //value of ... 01036 } 01037 initial_rmatind[num_kernels]=2*num_kernels ; //number of non-zero elements 01038 initial_rmatval[num_kernels]=0 ; 01039 01040 status = CPXaddrows (env, lp_cplex, 0, 1, num_kernels+1, 01041 initial_rhs, initial_sense, initial_rmatbeg, 01042 initial_rmatind, initial_rmatval, NULL, NULL); 01043 01044 } 01045 else // 2 and q-norm MKL 01046 { 01047 initial_rmatbeg[0] = 0; 01048 initial_rhs[0]=0 ; // rhs=1 ; 01049 initial_sense[0]='L' ; // <= (inequality) 01050 01051 initial_rmatind[0]=2*num_kernels ; 01052 initial_rmatval[0]=0 ; 01053 01054 status = CPXaddrows (env, lp_cplex, 0, 1, 1, 01055 initial_rhs, initial_sense, initial_rmatbeg, 01056 initial_rmatind, initial_rmatval, NULL, NULL); 01057 01058 01059 if (mkl_norm==2) 01060 { 01061 for (int32_t i=0; i<num_kernels; i++) 01062 { 01063 initial_rmatind[i]=i ; 01064 initial_rmatval[i]=1 ; 01065 } 01066 initial_rmatind[num_kernels]=2*num_kernels ; 01067 initial_rmatval[num_kernels]=0 ; 01068 01069 status = CPXaddqconstr (env, lp_cplex, 0, num_kernels+1, 1.0, 'L', NULL, NULL, 01070 initial_rmatind, initial_rmatind, initial_rmatval, NULL); 01071 } 01072 } 01073 01074 01075 if ( status ) 01076 SG_ERROR( "Failed to add the first row.\n"); 01077 01078 lp_initialized = true ; 01079 01080 if (C_mkl!=0.0) 01081 { 01082 for (int32_t q=0; q<num_kernels-1; q++) 01083 { 01084 // add constraint w[i]-w[i+1]<s[i]; 01085 // add constraint w[i+1]-w[i]<s[i]; 01086 int rmatbeg[1]; /* calling external lib */ 01087 int rmatind[3]; /* calling external lib */ 01088 double rmatval[3]; /* calling external lib */ 01089 double rhs[1]; /* calling external lib */ 01090 char sense[1]; 01091 01092 rmatbeg[0] = 0; 01093 rhs[0]=0 ; // rhs=0 ; 01094 sense[0]='L' ; // <= 01095 rmatind[0]=q ; 01096 rmatval[0]=1 ; 01097 rmatind[1]=q+1 ; 01098 rmatval[1]=-1 ; 01099 rmatind[2]=num_kernels+q ; 01100 rmatval[2]=-1 ; 01101 status = CPXaddrows (env, lp_cplex, 0, 1, 3, 01102 rhs, sense, rmatbeg, 01103 rmatind, rmatval, NULL, NULL); 01104 if ( status ) 01105 SG_ERROR( "Failed to add a smothness row (1).\n"); 01106 01107 rmatbeg[0] = 0; 01108 rhs[0]=0 ; // rhs=0 ; 01109 sense[0]='L' ; // <= 01110 rmatind[0]=q ; 01111 rmatval[0]=-1 ; 01112 rmatind[1]=q+1 ; 01113 rmatval[1]=1 ; 01114 rmatind[2]=num_kernels+q ; 01115 rmatval[2]=-1 ; 01116 status = CPXaddrows (env, lp_cplex, 0, 1, 3, 01117 rhs, sense, rmatbeg, 01118 rmatind, rmatval, NULL, NULL); 01119 if ( status ) 01120 SG_ERROR( "Failed to add a smothness row (2).\n"); 01121 } 01122 } 01123 } 01124 01125 { // add the new row 01126 //SG_INFO( "add the new row\n") ; 01127 01128 int rmatbeg[1]; 01129 int rmatind[num_kernels+1]; 01130 double rmatval[num_kernels+1]; 01131 double rhs[1]; 01132 char sense[1]; 01133 01134 rmatbeg[0] = 0; 01135 if (mkl_norm==1) 01136 rhs[0]=0 ; 01137 else 01138 rhs[0]=-suma ; 01139 01140 sense[0]='L' ; 01141 01142 for (int32_t i=0; i<num_kernels; i++) 01143 { 01144 rmatind[i]=i ; 01145 if (mkl_norm==1) 01146 rmatval[i]=-(sumw[i]-suma) ; 01147 else 01148 rmatval[i]=-sumw[i]; 01149 } 01150 rmatind[num_kernels]=2*num_kernels ; 01151 rmatval[num_kernels]=-1 ; 01152 01153 int32_t status = CPXaddrows (env, lp_cplex, 0, 1, num_kernels+1, 01154 rhs, sense, rmatbeg, 01155 rmatind, rmatval, NULL, NULL); 01156 if ( status ) 01157 SG_ERROR( "Failed to add the new row.\n"); 01158 } 01159 01160 inner_iters=0; 01161 int status; 01162 { 01163 01164 if (mkl_norm==1) // optimize 1 norm MKL 01165 status = CPXlpopt (env, lp_cplex); 01166 else if (mkl_norm==2) // optimize 2-norm MKL 01167 status = CPXbaropt(env, lp_cplex); 01168 else // q-norm MKL 01169 { 01170 float64_t* beta=SG_MALLOC(float64_t, 2*num_kernels+1); 01171 float64_t objval_old=1e-8; //some value to cause the loop to not terminate yet 01172 for (int32_t i=0; i<num_kernels; i++) 01173 beta[i]=old_beta[i]; 01174 for (int32_t i=num_kernels; i<2*num_kernels+1; i++) 01175 beta[i]=0; 01176 01177 while (true) 01178 { 01179 //int rows=CPXgetnumrows(env, lp_cplex); 01180 //int cols=CPXgetnumcols(env, lp_cplex); 01181 //SG_PRINT("rows:%d, cols:%d (kernel:%d)\n", rows, cols, num_kernels); 01182 CMath::scale_vector(1/CMath::qnorm(beta, num_kernels, mkl_norm), beta, num_kernels); 01183 01184 set_qnorm_constraints(beta, num_kernels); 01185 01186 status = CPXbaropt(env, lp_cplex); 01187 if ( status ) 01188 SG_ERROR( "Failed to optimize Problem.\n"); 01189 01190 int solstat=0; 01191 double objval=0; 01192 status=CPXsolution(env, lp_cplex, &solstat, &objval, 01193 (double*) beta, NULL, NULL, NULL); 01194 01195 if ( status ) 01196 { 01197 CMath::display_vector(beta, num_kernels, "beta"); 01198 SG_ERROR( "Failed to obtain solution.\n"); 01199 } 01200 01201 CMath::scale_vector(1/CMath::qnorm(beta, num_kernels, mkl_norm), beta, num_kernels); 01202 01203 //SG_PRINT("[%d] %f (%f)\n", inner_iters, objval, objval_old); 01204 if ((1-abs(objval/objval_old) < 0.1*mkl_epsilon)) // && (inner_iters>2)) 01205 break; 01206 01207 objval_old=objval; 01208 01209 inner_iters++; 01210 } 01211 SG_FREE(beta); 01212 } 01213 01214 if ( status ) 01215 SG_ERROR( "Failed to optimize Problem.\n"); 01216 01217 // obtain solution 01218 int32_t cur_numrows=(int32_t) CPXgetnumrows(env, lp_cplex); 01219 int32_t cur_numcols=(int32_t) CPXgetnumcols(env, lp_cplex); 01220 int32_t num_rows=cur_numrows; 01221 ASSERT(cur_numcols<=2*num_kernels+1); 01222 01223 float64_t* slack=SG_MALLOC(float64_t, cur_numrows); 01224 float64_t* pi=SG_MALLOC(float64_t, cur_numrows); 01225 01226 /* calling external lib */ 01227 int solstat=0; 01228 double objval=0; 01229 01230 if (mkl_norm==1) 01231 { 01232 status=CPXsolution(env, lp_cplex, &solstat, &objval, 01233 (double*) x, (double*) pi, (double*) slack, NULL); 01234 } 01235 else 01236 { 01237 status=CPXsolution(env, lp_cplex, &solstat, &objval, 01238 (double*) x, NULL, (double*) slack, NULL); 01239 } 01240 01241 int32_t solution_ok = (!status) ; 01242 if ( status ) 01243 SG_ERROR( "Failed to obtain solution.\n"); 01244 01245 int32_t num_active_rows=0 ; 01246 if (solution_ok) 01247 { 01248 /* 1 norm mkl */ 01249 float64_t max_slack = -CMath::INFTY ; 01250 int32_t max_idx = -1 ; 01251 int32_t start_row = 1 ; 01252 if (C_mkl!=0.0) 01253 start_row+=2*(num_kernels-1); 01254 01255 for (int32_t i = start_row; i < cur_numrows; i++) // skip first 01256 { 01257 if (mkl_norm==1) 01258 { 01259 if ((pi[i]!=0)) 01260 num_active_rows++ ; 01261 else 01262 { 01263 if (slack[i]>max_slack) 01264 { 01265 max_slack=slack[i] ; 01266 max_idx=i ; 01267 } 01268 } 01269 } 01270 else // 2-norm or general q-norm 01271 { 01272 if ((CMath::abs(slack[i])<1e-6)) 01273 num_active_rows++ ; 01274 else 01275 { 01276 if (slack[i]>max_slack) 01277 { 01278 max_slack=slack[i] ; 01279 max_idx=i ; 01280 } 01281 } 01282 } 01283 } 01284 01285 // have at most max(100,num_active_rows*2) rows, if not, remove one 01286 if ( (num_rows-start_row>CMath::max(100,2*num_active_rows)) && (max_idx!=-1)) 01287 { 01288 //SG_INFO( "-%i(%i,%i)",max_idx,start_row,num_rows) ; 01289 status = CPXdelrows (env, lp_cplex, max_idx, max_idx) ; 01290 if ( status ) 01291 SG_ERROR( "Failed to remove an old row.\n"); 01292 } 01293 01294 //CMath::display_vector(x, num_kernels, "beta"); 01295 01296 rho = -x[2*num_kernels] ; 01297 SG_FREE(pi); 01298 SG_FREE(slack); 01299 01300 } 01301 else 01302 { 01303 /* then something is wrong and we rather 01304 stop sooner than later */ 01305 rho = 1 ; 01306 } 01307 } 01308 for (int32_t i=0; i<num_kernels; i++) 01309 new_beta[i]=x[i]; 01310 01311 SG_FREE(x); 01312 #else 01313 SG_ERROR("Cplex not enabled at compile time\n"); 01314 #endif 01315 return rho; 01316 } 01317 01318 float64_t CMKL::compute_optimal_betas_via_glpk(float64_t* beta, const float64_t* old_beta, 01319 int num_kernels, const float64_t* sumw, float64_t suma, int32_t& inner_iters) 01320 { 01321 SG_DEBUG("MKL via GLPK\n"); 01322 01323 if (mkl_norm!=1) 01324 SG_ERROR("MKL via GLPK works only for norm=1\n"); 01325 01326 float64_t obj=1.0; 01327 #ifdef USE_GLPK 01328 int32_t NUMCOLS = 2*num_kernels + 1 ; 01329 if (!lp_initialized) 01330 { 01331 SG_INFO( "creating LP\n") ; 01332 01333 //set obj function, note glpk indexing is 1-based 01334 lpx_add_cols(lp_glpk, NUMCOLS); 01335 for (int i=1; i<=2*num_kernels; i++) 01336 { 01337 lpx_set_obj_coef(lp_glpk, i, 0); 01338 lpx_set_col_bnds(lp_glpk, i, LPX_DB, 0, 1); 01339 } 01340 for (int i=num_kernels+1; i<=2*num_kernels; i++) 01341 { 01342 lpx_set_obj_coef(lp_glpk, i, C_mkl); 01343 } 01344 lpx_set_obj_coef(lp_glpk, NUMCOLS, 1); 01345 lpx_set_col_bnds(lp_glpk, NUMCOLS, LPX_FR, 0,0); //unbound 01346 01347 //add first row. sum[w]=1 01348 int row_index = lpx_add_rows(lp_glpk, 1); 01349 int* ind = SG_MALLOC(int, num_kernels+2); 01350 float64_t* val = SG_MALLOC(float64_t, num_kernels+2); 01351 for (int i=1; i<=num_kernels; i++) 01352 { 01353 ind[i] = i; 01354 val[i] = 1; 01355 } 01356 ind[num_kernels+1] = NUMCOLS; 01357 val[num_kernels+1] = 0; 01358 lpx_set_mat_row(lp_glpk, row_index, num_kernels, ind, val); 01359 lpx_set_row_bnds(lp_glpk, row_index, LPX_FX, 1, 1); 01360 SG_FREE(val); 01361 SG_FREE(ind); 01362 01363 lp_initialized = true; 01364 01365 if (C_mkl!=0.0) 01366 { 01367 for (int32_t q=1; q<num_kernels; q++) 01368 { 01369 int mat_ind[4]; 01370 float64_t mat_val[4]; 01371 int mat_row_index = lpx_add_rows(lp_glpk, 2); 01372 mat_ind[1] = q; 01373 mat_val[1] = 1; 01374 mat_ind[2] = q+1; 01375 mat_val[2] = -1; 01376 mat_ind[3] = num_kernels+q; 01377 mat_val[3] = -1; 01378 lpx_set_mat_row(lp_glpk, mat_row_index, 3, mat_ind, mat_val); 01379 lpx_set_row_bnds(lp_glpk, mat_row_index, LPX_UP, 0, 0); 01380 mat_val[1] = -1; 01381 mat_val[2] = 1; 01382 lpx_set_mat_row(lp_glpk, mat_row_index+1, 3, mat_ind, mat_val); 01383 lpx_set_row_bnds(lp_glpk, mat_row_index+1, LPX_UP, 0, 0); 01384 } 01385 } 01386 } 01387 01388 int* ind=SG_MALLOC(int,num_kernels+2); 01389 float64_t* val=SG_MALLOC(float64_t, num_kernels+2); 01390 int row_index = lpx_add_rows(lp_glpk, 1); 01391 for (int32_t i=1; i<=num_kernels; i++) 01392 { 01393 ind[i] = i; 01394 val[i] = -(sumw[i-1]-suma); 01395 } 01396 ind[num_kernels+1] = 2*num_kernels+1; 01397 val[num_kernels+1] = -1; 01398 lpx_set_mat_row(lp_glpk, row_index, num_kernels+1, ind, val); 01399 lpx_set_row_bnds(lp_glpk, row_index, LPX_UP, 0, 0); 01400 SG_FREE(ind); 01401 SG_FREE(val); 01402 01403 //optimize 01404 lpx_simplex(lp_glpk); 01405 bool res = check_lpx_status(lp_glpk); 01406 if (!res) 01407 SG_ERROR( "Failed to optimize Problem.\n"); 01408 01409 int32_t cur_numrows = lpx_get_num_rows(lp_glpk); 01410 int32_t cur_numcols = lpx_get_num_cols(lp_glpk); 01411 int32_t num_rows=cur_numrows; 01412 ASSERT(cur_numcols<=2*num_kernels+1); 01413 01414 float64_t* col_primal = SG_MALLOC(float64_t, cur_numcols); 01415 float64_t* row_primal = SG_MALLOC(float64_t, cur_numrows); 01416 float64_t* row_dual = SG_MALLOC(float64_t, cur_numrows); 01417 01418 for (int i=0; i<cur_numrows; i++) 01419 { 01420 row_primal[i] = lpx_get_row_prim(lp_glpk, i+1); 01421 row_dual[i] = lpx_get_row_dual(lp_glpk, i+1); 01422 } 01423 for (int i=0; i<cur_numcols; i++) 01424 col_primal[i] = lpx_get_col_prim(lp_glpk, i+1); 01425 01426 obj = -col_primal[2*num_kernels]; 01427 01428 for (int i=0; i<num_kernels; i++) 01429 beta[i] = col_primal[i]; 01430 01431 int32_t num_active_rows=0; 01432 if(res) 01433 { 01434 float64_t max_slack = CMath::INFTY; 01435 int32_t max_idx = -1; 01436 int32_t start_row = 1; 01437 if (C_mkl!=0.0) 01438 start_row += 2*(num_kernels-1); 01439 01440 for (int32_t i= start_row; i<cur_numrows; i++) 01441 { 01442 if (row_dual[i]!=0) 01443 num_active_rows++; 01444 else 01445 { 01446 if (row_primal[i]<max_slack) 01447 { 01448 max_slack = row_primal[i]; 01449 max_idx = i; 01450 } 01451 } 01452 } 01453 01454 if ((num_rows-start_row>CMath::max(100, 2*num_active_rows)) && max_idx!=-1) 01455 { 01456 int del_rows[2]; 01457 del_rows[1] = max_idx+1; 01458 lpx_del_rows(lp_glpk, 1, del_rows); 01459 } 01460 } 01461 01462 SG_FREE(row_dual); 01463 SG_FREE(row_primal); 01464 SG_FREE(col_primal); 01465 #else 01466 SG_ERROR("Glpk not enabled at compile time\n"); 01467 #endif 01468 01469 return obj; 01470 } 01471 01472 void CMKL::compute_sum_beta(float64_t* sumw) 01473 { 01474 ASSERT(sumw); 01475 ASSERT(svm); 01476 01477 int32_t nsv=svm->get_num_support_vectors(); 01478 int32_t num_kernels = kernel->get_num_subkernels(); 01479 SGVector<float64_t> beta=SGVector<float64_t>(num_kernels); 01480 int32_t nweights=0; 01481 const float64_t* old_beta = kernel->get_subkernel_weights(nweights); 01482 ASSERT(nweights==num_kernels); 01483 ASSERT(old_beta); 01484 01485 for (int32_t i=0; i<num_kernels; i++) 01486 { 01487 beta.vector[i]=0; 01488 sumw[i]=0; 01489 } 01490 01491 for (int32_t n=0; n<num_kernels; n++) 01492 { 01493 beta.vector[n]=1.0; 01494 /* this only copies the value of the first entry of this array 01495 * so it may be freed safely afterwards. */ 01496 kernel->set_subkernel_weights(beta); 01497 01498 for (int32_t i=0; i<nsv; i++) 01499 { 01500 int32_t ii=svm->get_support_vector(i); 01501 01502 for (int32_t j=0; j<nsv; j++) 01503 { 01504 int32_t jj=svm->get_support_vector(j); 01505 sumw[n]+=0.5*svm->get_alpha(i)*svm->get_alpha(j)*kernel->kernel(ii,jj); 01506 } 01507 } 01508 beta[n]=0.0; 01509 } 01510 01511 mkl_iterations++; 01512 kernel->set_subkernel_weights(SGVector<float64_t>( (float64_t*) old_beta, num_kernels, false)); 01513 } 01514 01515 01516 // assumes that all constraints are satisfied 01517 float64_t CMKL::compute_mkl_dual_objective() 01518 { 01519 if (get_solver_type()==ST_ELASTICNET) 01520 { 01521 // -- Compute ElasticnetMKL dual objective 01522 return compute_elasticnet_dual_objective(); 01523 } 01524 01525 int32_t n=get_num_support_vectors(); 01526 float64_t mkl_obj=0; 01527 01528 if (m_labels && kernel && kernel->get_kernel_type() == K_COMBINED) 01529 { 01530 CKernel* kn = ((CCombinedKernel*)kernel)->get_first_kernel(); 01531 while (kn) 01532 { 01533 float64_t sum=0; 01534 for (int32_t i=0; i<n; i++) 01535 { 01536 int32_t ii=get_support_vector(i); 01537 01538 for (int32_t j=0; j<n; j++) 01539 { 01540 int32_t jj=get_support_vector(j); 01541 sum+=get_alpha(i)*get_alpha(j)*kn->kernel(ii,jj); 01542 } 01543 } 01544 01545 if (mkl_norm==1.0) 01546 mkl_obj = CMath::max(mkl_obj, sum); 01547 else 01548 mkl_obj += CMath::pow(sum, mkl_norm/(mkl_norm-1)); 01549 01550 SG_UNREF(kn); 01551 kn = ((CCombinedKernel*) kernel)->get_next_kernel(); 01552 } 01553 01554 if (mkl_norm==1.0) 01555 mkl_obj=-0.5*mkl_obj; 01556 else 01557 mkl_obj= -0.5*CMath::pow(mkl_obj, (mkl_norm-1)/mkl_norm); 01558 01559 mkl_obj+=compute_sum_alpha(); 01560 } 01561 else 01562 SG_ERROR( "cannot compute objective, labels or kernel not set\n"); 01563 01564 return -mkl_obj; 01565 } 01566 01567 #ifdef USE_CPLEX 01568 void CMKL::set_qnorm_constraints(float64_t* beta, int32_t num_kernels) 01569 { 01570 ASSERT(num_kernels>0); 01571 01572 float64_t* grad_beta=SG_MALLOC(float64_t, num_kernels); 01573 float64_t* hess_beta=SG_MALLOC(float64_t, num_kernels+1); 01574 float64_t* lin_term=SG_MALLOC(float64_t, num_kernels+1); 01575 int* ind=SG_MALLOC(int, num_kernels+1); 01576 01577 //CMath::display_vector(beta, num_kernels, "beta"); 01578 double const_term = 1-CMath::qsq(beta, num_kernels, mkl_norm); 01579 01580 //SG_PRINT("const=%f\n", const_term); 01581 ASSERT(CMath::fequal(const_term, 0.0)); 01582 01583 for (int32_t i=0; i<num_kernels; i++) 01584 { 01585 grad_beta[i]=mkl_norm * pow(beta[i], mkl_norm-1); 01586 hess_beta[i]=0.5*mkl_norm*(mkl_norm-1) * pow(beta[i], mkl_norm-2); 01587 lin_term[i]=grad_beta[i] - 2*beta[i]*hess_beta[i]; 01588 const_term+=grad_beta[i]*beta[i] - CMath::sq(beta[i])*hess_beta[i]; 01589 ind[i]=i; 01590 } 01591 ind[num_kernels]=2*num_kernels; 01592 hess_beta[num_kernels]=0; 01593 lin_term[num_kernels]=0; 01594 01595 int status=0; 01596 int num=CPXgetnumqconstrs (env, lp_cplex); 01597 01598 if (num>0) 01599 { 01600 status = CPXdelqconstrs (env, lp_cplex, 0, 0); 01601 ASSERT(!status); 01602 } 01603 01604 status = CPXaddqconstr (env, lp_cplex, num_kernels+1, num_kernels+1, const_term, 'L', ind, lin_term, 01605 ind, ind, hess_beta, NULL); 01606 ASSERT(!status); 01607 01608 //CPXwriteprob (env, lp_cplex, "prob.lp", NULL); 01609 //CPXqpwrite (env, lp_cplex, "prob.qp"); 01610 01611 SG_FREE(grad_beta); 01612 SG_FREE(hess_beta); 01613 SG_FREE(lin_term); 01614 SG_FREE(ind); 01615 } 01616 #endif // USE_CPLEX