SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2009 Soeren Sonnenburg 00008 * Written (W) 2007-2008 Vojtech Franc 00009 * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include <shogun/lib/config.h> 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/lib/Signal.h> 00015 #include <shogun/lib/Time.h> 00016 #include <shogun/machine/LinearMachine.h> 00017 #include <shogun/classifier/svm/SubGradientSVM.h> 00018 #include <shogun/classifier/svm/QPBSVMLib.h> 00019 #include <shogun/features/DotFeatures.h> 00020 #include <shogun/labels/Labels.h> 00021 #include <shogun/labels/BinaryLabels.h> 00022 00023 #undef DEBUG_SUBGRADIENTSVM 00024 00025 using namespace shogun; 00026 00027 CSubGradientSVM::CSubGradientSVM() 00028 : CLinearMachine(), C1(1), C2(1), epsilon(1e-5), qpsize(42), 00029 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0) 00030 { 00031 } 00032 00033 CSubGradientSVM::CSubGradientSVM( 00034 float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00035 : CLinearMachine(), C1(C), C2(C), epsilon(1e-5), qpsize(42), 00036 qpsize_max(2000), use_bias(false), delta_active(0), delta_bound(0) 00037 { 00038 set_features(traindat); 00039 set_labels(trainlab); 00040 } 00041 00042 00043 CSubGradientSVM::~CSubGradientSVM() 00044 { 00045 } 00046 00047 /* 00048 int32_t CSubGradientSVM::find_active(int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound) 00049 { 00050 int32_t delta_active=0; 00051 num_active=0; 00052 num_bound=0; 00053 00054 for (int32_t i=0; i<num_vec; i++) 00055 { 00056 active[i]=0; 00057 00058 //within margin/wrong side 00059 if (proj[i] < 1-work_epsilon) 00060 { 00061 idx_active[num_active++]=i; 00062 active[i]=1; 00063 } 00064 00065 //on margin 00066 if (CMath::abs(proj[i]-1) <= work_epsilon) 00067 { 00068 idx_bound[num_bound++]=i; 00069 active[i]=2; 00070 } 00071 00072 if (active[i]!=old_active[i]) 00073 delta_active++; 00074 } 00075 00076 return delta_active; 00077 } 00078 */ 00079 00080 int32_t CSubGradientSVM::find_active( 00081 int32_t num_feat, int32_t num_vec, int32_t& num_active, int32_t& num_bound) 00082 { 00083 delta_bound=0; 00084 delta_active=0; 00085 num_active=0; 00086 num_bound=0; 00087 00088 for (int32_t i=0; i<num_vec; i++) 00089 { 00090 active[i]=0; 00091 00092 //within margin/wrong side 00093 if (proj[i] < 1-autoselected_epsilon) 00094 { 00095 idx_active[num_active++]=i; 00096 active[i]=1; 00097 } 00098 00099 //on margin 00100 if (CMath::abs(proj[i]-1) <= autoselected_epsilon) 00101 { 00102 idx_bound[num_bound++]=i; 00103 active[i]=2; 00104 } 00105 00106 if (active[i]!=old_active[i]) 00107 delta_active++; 00108 00109 if (active[i]==2 && old_active[i]==2) 00110 delta_bound++; 00111 } 00112 00113 00114 if (delta_active==0 && work_epsilon<=epsilon) //we converged 00115 return 0; 00116 else if (delta_active==0) //lets decrease work_epsilon 00117 { 00118 work_epsilon=CMath::min(work_epsilon/2, autoselected_epsilon); 00119 work_epsilon=CMath::max(work_epsilon, epsilon); 00120 num_bound=qpsize; 00121 } 00122 00123 delta_bound=0; 00124 delta_active=0; 00125 num_active=0; 00126 num_bound=0; 00127 00128 for (int32_t i=0; i<num_vec; i++) 00129 { 00130 tmp_proj[i]=CMath::abs(proj[i]-1); 00131 tmp_proj_idx[i]=i; 00132 } 00133 00134 CMath::qsort_index(tmp_proj, tmp_proj_idx, num_vec); 00135 00136 autoselected_epsilon=tmp_proj[CMath::min(qpsize,num_vec-1)]; 00137 00138 #ifdef DEBUG_SUBGRADIENTSVM 00139 //SG_PRINT("autoseleps: %15.15f\n", autoselected_epsilon); 00140 #endif 00141 00142 if (autoselected_epsilon>work_epsilon) 00143 autoselected_epsilon=work_epsilon; 00144 00145 if (autoselected_epsilon<epsilon) 00146 { 00147 autoselected_epsilon=epsilon; 00148 00149 int32_t i=0; 00150 while (i < num_vec && tmp_proj[i] <= autoselected_epsilon) 00151 i++; 00152 00153 //SG_PRINT("lower bound on epsilon requires %d variables in qp\n", i); 00154 00155 if (i>=qpsize_max && autoselected_epsilon>epsilon) //qpsize limit 00156 { 00157 SG_INFO("qpsize limit (%d) reached\n", qpsize_max); 00158 int32_t num_in_qp=i; 00159 while (--i>=0 && num_in_qp>=qpsize_max) 00160 { 00161 if (tmp_proj[i] < autoselected_epsilon) 00162 { 00163 autoselected_epsilon=tmp_proj[i]; 00164 num_in_qp--; 00165 } 00166 } 00167 00168 //SG_PRINT("new qpsize will be %d, autoeps:%15.15f\n", num_in_qp, autoselected_epsilon); 00169 } 00170 } 00171 00172 for (int32_t i=0; i<num_vec; i++) 00173 { 00174 active[i]=0; 00175 00176 //within margin/wrong side 00177 if (proj[i] < 1-autoselected_epsilon) 00178 { 00179 idx_active[num_active++]=i; 00180 active[i]=1; 00181 } 00182 00183 //on margin 00184 if (CMath::abs(proj[i]-1) <= autoselected_epsilon) 00185 { 00186 idx_bound[num_bound++]=i; 00187 active[i]=2; 00188 } 00189 00190 if (active[i]!=old_active[i]) 00191 delta_active++; 00192 00193 if (active[i]==2 && old_active[i]==2) 00194 delta_bound++; 00195 } 00196 00197 //SG_PRINT("delta_bound: %d of %d (%02.2f)\n", delta_bound, num_bound, 100.0*delta_bound/num_bound); 00198 return delta_active; 00199 } 00200 00201 00202 void CSubGradientSVM::update_active(int32_t num_feat, int32_t num_vec) 00203 { 00204 for (int32_t i=0; i<num_vec; i++) 00205 { 00206 if (active[i]==1 && old_active[i]!=1) 00207 { 00208 features->add_to_dense_vec(C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat); 00209 if (use_bias) 00210 sum_Cy_active+=C1*((CBinaryLabels*) m_labels)->get_label(i); 00211 } 00212 else if (old_active[i]==1 && active[i]!=1) 00213 { 00214 features->add_to_dense_vec(-C1*((CBinaryLabels*) m_labels)->get_label(i), i, sum_CXy_active, num_feat); 00215 if (use_bias) 00216 sum_Cy_active-=C1*((CBinaryLabels*) m_labels)->get_label(i); 00217 } 00218 } 00219 00220 CMath::swap(active,old_active); 00221 } 00222 00223 float64_t CSubGradientSVM::line_search(int32_t num_feat, int32_t num_vec) 00224 { 00225 float64_t sum_B = 0; 00226 float64_t A_zero = 0.5*SGVector<float64_t>::dot(grad_w, grad_w, num_feat); 00227 float64_t B_zero = -SGVector<float64_t>::dot(w.vector, grad_w, num_feat); 00228 00229 int32_t num_hinge=0; 00230 00231 for (int32_t i=0; i<num_vec; i++) 00232 { 00233 float64_t p=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, grad_w, num_feat)+grad_b); 00234 grad_proj[i]=p; 00235 if (p!=0) 00236 { 00237 hinge_point[num_hinge]=(proj[i]-1)/p; 00238 hinge_idx[num_hinge]=i; 00239 num_hinge++; 00240 00241 if (p<0) 00242 sum_B+=p; 00243 } 00244 } 00245 sum_B*=C1; 00246 00247 CMath::qsort_index(hinge_point, hinge_idx, num_hinge); 00248 00249 00250 float64_t alpha = hinge_point[0]; 00251 float64_t grad_val = 2*A_zero*alpha + B_zero + sum_B; 00252 00253 //CMath::display_vector(grad_w, num_feat, "grad_w"); 00254 //CMath::display_vector(grad_proj, num_vec, "grad_proj"); 00255 //CMath::display_vector(hinge_point, num_vec, "hinge_point"); 00256 //SG_PRINT("A_zero=%f\n", A_zero); 00257 //SG_PRINT("B_zero=%f\n", B_zero); 00258 //SG_PRINT("sum_B=%f\n", sum_B); 00259 //SG_PRINT("alpha=%f\n", alpha); 00260 //SG_PRINT("grad_val=%f\n", grad_val); 00261 00262 float64_t old_grad_val = grad_val; 00263 float64_t old_alpha = alpha; 00264 00265 for (int32_t i=1; i < num_hinge && grad_val < 0; i++) 00266 { 00267 alpha = hinge_point[i]; 00268 grad_val = 2*A_zero*alpha + B_zero + sum_B; 00269 00270 if (grad_val > 0) 00271 { 00272 ASSERT(old_grad_val-grad_val != 0); 00273 float64_t gamma = -grad_val/(old_grad_val-grad_val); 00274 alpha = old_alpha*gamma + (1-gamma)*alpha; 00275 } 00276 else 00277 { 00278 old_grad_val = grad_val; 00279 old_alpha = alpha; 00280 00281 sum_B = sum_B + CMath::abs(C1*grad_proj[hinge_idx[i]]); 00282 grad_val = 2*A_zero*alpha + B_zero + sum_B; 00283 } 00284 } 00285 00286 return alpha; 00287 } 00288 00289 float64_t CSubGradientSVM::compute_min_subgradient( 00290 int32_t num_feat, int32_t num_vec, int32_t num_active, int32_t num_bound) 00291 { 00292 float64_t dir_deriv=0; 00293 00294 if (num_bound > 0) 00295 { 00296 00297 CTime t2; 00298 SGVector<float64_t>::add(v, 1.0, w.vector, -1.0, sum_CXy_active, num_feat); 00299 00300 if (num_bound>=qpsize_max && num_it_noimprovement!=10) // if qp gets to large, lets just choose a random beta 00301 { 00302 //SG_PRINT("qpsize too large (%d>=%d) choosing random subgradient/beta\n", num_bound, qpsize_max); 00303 for (int32_t i=0; i<num_bound; i++) 00304 beta[i]=CMath::random(0.0,1.0); 00305 } 00306 else 00307 { 00308 memset(beta, 0, sizeof(float64_t)*num_bound); 00309 00310 float64_t bias_const=0; 00311 00312 if (use_bias) 00313 bias_const=1; 00314 00315 for (int32_t i=0; i<num_bound; i++) 00316 { 00317 for (int32_t j=i; j<num_bound; j++) 00318 { 00319 Z[i*num_bound+j]= 2.0*C1*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*((CBinaryLabels*) m_labels)->get_label(idx_bound[j])* 00320 (features->dot(idx_bound[i], features, idx_bound[j]) + bias_const); 00321 00322 Z[j*num_bound+i]=Z[i*num_bound+j]; 00323 00324 } 00325 00326 Zv[i]=-2.0*C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])* 00327 (features->dense_dot(idx_bound[i], v, num_feat)-sum_Cy_active); 00328 } 00329 00330 //CMath::display_matrix(Z, num_bound, num_bound, "Z"); 00331 //CMath::display_vector(Zv, num_bound, "Zv"); 00332 t2.stop(); 00333 #ifdef DEBUG_SUBGRADIENTSVM 00334 t2.time_diff_sec(true); 00335 #endif 00336 00337 CTime t; 00338 CQPBSVMLib solver(Z,num_bound, Zv,num_bound, 1.0); 00339 //solver.set_solver(QPB_SOLVER_GRADDESC); 00340 //solver.set_solver(QPB_SOLVER_GS); 00341 #ifdef USE_CPLEX 00342 solver.set_solver(QPB_SOLVER_CPLEX); 00343 #else 00344 solver.set_solver(QPB_SOLVER_SCAS); 00345 #endif 00346 00347 solver.solve_qp(beta, num_bound); 00348 00349 t.stop(); 00350 #ifdef DEBUG_SUBGRADIENTSVM 00351 tim+=t.time_diff_sec(true); 00352 #else 00353 tim+=t.time_diff_sec(false); 00354 #endif 00355 00356 //CMath::display_vector(beta, num_bound, "beta gs"); 00357 //solver.set_solver(QPB_SOLVER_CPLEX); 00358 //solver.solve_qp(beta, num_bound); 00359 //CMath::display_vector(beta, num_bound, "beta cplex"); 00360 00361 //CMath::display_vector(grad_w, num_feat, "grad_w"); 00362 //SG_PRINT("grad_b:%f\n", grad_b); 00363 } 00364 00365 SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat); 00366 grad_b = -sum_Cy_active; 00367 00368 for (int32_t i=0; i<num_bound; i++) 00369 { 00370 features->add_to_dense_vec(-C1*beta[i]*((CBinaryLabels*) m_labels)->get_label(idx_bound[i]), idx_bound[i], grad_w, num_feat); 00371 if (use_bias) 00372 grad_b -= C1 * ((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*beta[i]; 00373 } 00374 00375 dir_deriv = SGVector<float64_t>::dot(grad_w, v, num_feat) - grad_b*sum_Cy_active; 00376 for (int32_t i=0; i<num_bound; i++) 00377 { 00378 float64_t val= C1*((CBinaryLabels*) m_labels)->get_label(idx_bound[i])*(features->dense_dot(idx_bound[i], grad_w, num_feat)+grad_b); 00379 dir_deriv += CMath::max(0.0, val); 00380 } 00381 } 00382 else 00383 { 00384 SGVector<float64_t>::add(grad_w, 1.0, w.vector, -1.0, sum_CXy_active, num_feat); 00385 grad_b = -sum_Cy_active; 00386 00387 dir_deriv = SGVector<float64_t>::dot(grad_w, grad_w, num_feat)+ grad_b*grad_b; 00388 } 00389 00390 return dir_deriv; 00391 } 00392 00393 float64_t CSubGradientSVM::compute_objective(int32_t num_feat, int32_t num_vec) 00394 { 00395 float64_t result= 0.5 * SGVector<float64_t>::dot(w.vector,w.vector, num_feat); 00396 00397 for (int32_t i=0; i<num_vec; i++) 00398 { 00399 if (proj[i]<1.0) 00400 result += C1 * (1.0-proj[i]); 00401 } 00402 00403 return result; 00404 } 00405 00406 void CSubGradientSVM::compute_projection(int32_t num_feat, int32_t num_vec) 00407 { 00408 for (int32_t i=0; i<num_vec; i++) 00409 proj[i]=((CBinaryLabels*) m_labels)->get_label(i)*(features->dense_dot(i, w.vector, num_feat)+bias); 00410 } 00411 00412 void CSubGradientSVM::update_projection(float64_t alpha, int32_t num_vec) 00413 { 00414 SGVector<float64_t>::vec1_plus_scalar_times_vec2(proj,-alpha, grad_proj, num_vec); 00415 } 00416 00417 void CSubGradientSVM::init(int32_t num_vec, int32_t num_feat) 00418 { 00419 // alloc normal and bias inited with 0 00420 w=SGVector<float64_t>(num_feat); 00421 w.zero(); 00422 //CMath::random_vector(w, num_feat, -1.0, 1.0); 00423 bias=0; 00424 num_it_noimprovement=0; 00425 grad_b=0; 00426 qpsize_limit=5000; 00427 00428 grad_w=SG_MALLOC(float64_t, num_feat); 00429 memset(grad_w,0,sizeof(float64_t)*num_feat); 00430 00431 sum_CXy_active=SG_MALLOC(float64_t, num_feat); 00432 memset(sum_CXy_active,0,sizeof(float64_t)*num_feat); 00433 00434 v=SG_MALLOC(float64_t, num_feat); 00435 memset(v,0,sizeof(float64_t)*num_feat); 00436 00437 old_v=SG_MALLOC(float64_t, num_feat); 00438 memset(old_v,0,sizeof(float64_t)*num_feat); 00439 00440 sum_Cy_active=0; 00441 00442 proj= SG_MALLOC(float64_t, num_vec); 00443 memset(proj,0,sizeof(float64_t)*num_vec); 00444 00445 tmp_proj=SG_MALLOC(float64_t, num_vec); 00446 memset(proj,0,sizeof(float64_t)*num_vec); 00447 00448 tmp_proj_idx= SG_MALLOC(int32_t, num_vec); 00449 memset(tmp_proj_idx,0,sizeof(int32_t)*num_vec); 00450 00451 grad_proj= SG_MALLOC(float64_t, num_vec); 00452 memset(grad_proj,0,sizeof(float64_t)*num_vec); 00453 00454 hinge_point= SG_MALLOC(float64_t, num_vec); 00455 memset(hinge_point,0,sizeof(float64_t)*num_vec); 00456 00457 hinge_idx= SG_MALLOC(int32_t, num_vec); 00458 memset(hinge_idx,0,sizeof(int32_t)*num_vec); 00459 00460 active=SG_MALLOC(uint8_t, num_vec); 00461 memset(active,0,sizeof(uint8_t)*num_vec); 00462 00463 old_active=SG_MALLOC(uint8_t, num_vec); 00464 memset(old_active,0,sizeof(uint8_t)*num_vec); 00465 00466 idx_bound=SG_MALLOC(int32_t, num_vec); 00467 memset(idx_bound,0,sizeof(int32_t)*num_vec); 00468 00469 idx_active=SG_MALLOC(int32_t, num_vec); 00470 memset(idx_active,0,sizeof(int32_t)*num_vec); 00471 00472 Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit); 00473 memset(Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit); 00474 00475 Zv=SG_MALLOC(float64_t, qpsize_limit); 00476 memset(Zv,0,sizeof(float64_t)*qpsize_limit); 00477 00478 beta=SG_MALLOC(float64_t, qpsize_limit); 00479 memset(beta,0,sizeof(float64_t)*qpsize_limit); 00480 00481 old_Z=SG_MALLOC(float64_t, qpsize_limit*qpsize_limit); 00482 memset(old_Z,0,sizeof(float64_t)*qpsize_limit*qpsize_limit); 00483 00484 old_Zv=SG_MALLOC(float64_t, qpsize_limit); 00485 memset(old_Zv,0,sizeof(float64_t)*qpsize_limit); 00486 00487 old_beta=SG_MALLOC(float64_t, qpsize_limit); 00488 memset(old_beta,0,sizeof(float64_t)*qpsize_limit); 00489 00490 } 00491 00492 void CSubGradientSVM::cleanup() 00493 { 00494 SG_FREE(hinge_idx); 00495 SG_FREE(hinge_point); 00496 SG_FREE(grad_proj); 00497 SG_FREE(proj); 00498 SG_FREE(tmp_proj); 00499 SG_FREE(tmp_proj_idx); 00500 SG_FREE(active); 00501 SG_FREE(old_active); 00502 SG_FREE(idx_bound); 00503 SG_FREE(idx_active); 00504 SG_FREE(sum_CXy_active); 00505 SG_FREE(grad_w); 00506 SG_FREE(v); 00507 SG_FREE(Z); 00508 SG_FREE(Zv); 00509 SG_FREE(beta); 00510 SG_FREE(old_v); 00511 SG_FREE(old_Z); 00512 SG_FREE(old_Zv); 00513 SG_FREE(old_beta); 00514 00515 hinge_idx=NULL; 00516 proj=NULL; 00517 active=NULL; 00518 old_active=NULL; 00519 idx_bound=NULL; 00520 idx_active=NULL; 00521 sum_CXy_active=NULL; 00522 grad_w=NULL; 00523 v=NULL; 00524 Z=NULL; 00525 Zv=NULL; 00526 beta=NULL; 00527 } 00528 00529 bool CSubGradientSVM::train_machine(CFeatures* data) 00530 { 00531 tim=0; 00532 SG_INFO("C=%f epsilon=%f\n", C1, epsilon); 00533 ASSERT(m_labels); 00534 00535 if (data) 00536 { 00537 if (!data->has_property(FP_DOT)) 00538 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00539 set_features((CDotFeatures*) data); 00540 } 00541 ASSERT(get_features()); 00542 00543 int32_t num_iterations=0; 00544 int32_t num_train_labels=m_labels->get_num_labels(); 00545 int32_t num_feat=features->get_dim_feature_space(); 00546 int32_t num_vec=features->get_num_vectors(); 00547 00548 ASSERT(num_vec==num_train_labels); 00549 00550 init(num_vec, num_feat); 00551 00552 int32_t num_active=0; 00553 int32_t num_bound=0; 00554 float64_t alpha=0; 00555 float64_t dir_deriv=0; 00556 float64_t obj=0; 00557 delta_active=num_vec; 00558 last_it_noimprovement=-1; 00559 00560 work_epsilon=0.99; 00561 autoselected_epsilon=work_epsilon; 00562 00563 compute_projection(num_feat, num_vec); 00564 00565 CTime time; 00566 #ifdef DEBUG_SUBGRADIENTSVM 00567 float64_t loop_time=0; 00568 #endif 00569 while (!(CSignal::cancel_computations())) 00570 { 00571 CTime t; 00572 delta_active=find_active(num_feat, num_vec, num_active, num_bound); 00573 00574 update_active(num_feat, num_vec); 00575 00576 #ifdef DEBUG_SUBGRADIENTSVM 00577 SG_PRINT("==================================================\niteration: %d ", num_iterations); 00578 obj=compute_objective(num_feat, num_vec); 00579 SG_PRINT("objective:%.10f alpha: %.10f dir_deriv: %f num_bound: %d num_active: %d work_eps: %10.10f eps: %10.10f auto_eps: %10.10f time:%f\n", 00580 obj, alpha, dir_deriv, num_bound, num_active, work_epsilon, epsilon, autoselected_epsilon, loop_time); 00581 #else 00582 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6); 00583 #endif 00584 //CMath::display_vector(w, w_dim, "w"); 00585 //SG_PRINT("bias: %f\n", bias); 00586 //CMath::display_vector(proj, num_vec, "proj"); 00587 //CMath::display_vector(idx_active, num_active, "idx_active"); 00588 //SG_PRINT("num_active: %d\n", num_active); 00589 //CMath::display_vector(idx_bound, num_bound, "idx_bound"); 00590 //SG_PRINT("num_bound: %d\n", num_bound); 00591 //CMath::display_vector(sum_CXy_active, num_feat, "sum_CXy_active"); 00592 //SG_PRINT("sum_Cy_active: %f\n", sum_Cy_active); 00593 //CMath::display_vector(grad_w, num_feat, "grad_w"); 00594 //SG_PRINT("grad_b:%f\n", grad_b); 00595 00596 dir_deriv=compute_min_subgradient(num_feat, num_vec, num_active, num_bound); 00597 00598 alpha=line_search(num_feat, num_vec); 00599 00600 if (num_it_noimprovement==10 || num_bound<qpsize_max) 00601 { 00602 float64_t norm_grad=SGVector<float64_t>::dot(grad_w, grad_w, num_feat) + 00603 grad_b*grad_b; 00604 00605 #ifdef DEBUG_SUBGRADIENTSVM 00606 SG_PRINT("CHECKING OPTIMALITY CONDITIONS: " 00607 "work_epsilon: %10.10f delta_active:%d alpha: %10.10f norm_grad: %10.10f a*norm_grad:%10.16f\n", 00608 work_epsilon, delta_active, alpha, norm_grad, CMath::abs(alpha*norm_grad)); 00609 #else 00610 SG_ABS_PROGRESS(work_epsilon, -CMath::log10(work_epsilon), -CMath::log10(0.99999999), -CMath::log10(epsilon), 6); 00611 #endif 00612 00613 if (work_epsilon<=epsilon && delta_active==0 && CMath::abs(alpha*norm_grad)<1e-6) 00614 break; 00615 else 00616 num_it_noimprovement=0; 00617 } 00618 00619 if ((dir_deriv<0 || alpha==0) && (work_epsilon<=epsilon && delta_active==0)) 00620 { 00621 if (last_it_noimprovement==num_iterations-1) 00622 { 00623 SG_PRINT("no improvement...\n"); 00624 num_it_noimprovement++; 00625 } 00626 else 00627 num_it_noimprovement=0; 00628 00629 last_it_noimprovement=num_iterations; 00630 } 00631 00632 SGVector<float64_t>::vec1_plus_scalar_times_vec2(w.vector, -alpha, grad_w, num_feat); 00633 bias-=alpha*grad_b; 00634 00635 update_projection(alpha, num_vec); 00636 //compute_projection(num_feat, num_vec); 00637 //CMath::display_vector(w.vector, w_dim, "w"); 00638 //SG_PRINT("bias: %f\n", bias); 00639 //CMath::display_vector(proj, num_vec, "proj"); 00640 00641 t.stop(); 00642 #ifdef DEBUG_SUBGRADIENTSVM 00643 loop_time=t.time_diff_sec(); 00644 #endif 00645 num_iterations++; 00646 00647 if (get_max_train_time()>0 && time.cur_time_diff()>get_max_train_time()) 00648 break; 00649 } 00650 SG_DONE(); 00651 00652 SG_INFO("converged after %d iterations\n", num_iterations); 00653 00654 obj=compute_objective(num_feat, num_vec); 00655 SG_INFO("objective: %f alpha: %f dir_deriv: %f num_bound: %d num_active: %d\n", 00656 obj, alpha, dir_deriv, num_bound, num_active); 00657 00658 #ifdef DEBUG_SUBGRADIENTSVM 00659 CMath::display_vector(w.vector, w.vlen, "w"); 00660 SG_PRINT("bias: %f\n", bias); 00661 #endif 00662 SG_INFO("solver time:%f s\n", tim); 00663 00664 cleanup(); 00665 00666 return true; 00667 }