SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * libbmrm.h: Implementation of the BMRM solver for SO training 00008 * 00009 * Copyright (C) 2012 Michal Uricar, uricamic@cmp.felk.cvut.cz 00010 * 00011 * Implementation of the BMRM solver 00012 *--------------------------------------------------------------------- */ 00013 00014 #include <stdlib.h> 00015 #include <string.h> 00016 #include <math.h> 00017 00018 #include <shogun/structure/libbmrm.h> 00019 #include <shogun/lib/external/libqp.h> 00020 #include <shogun/lib/Time.h> 00021 00022 namespace shogun 00023 { 00024 static const uint32_t QPSolverMaxIter=0xFFFFFFFF; 00025 static const float64_t epsilon=0.0; 00026 00027 static float64_t *H; 00028 static uint32_t BufSize; 00029 00030 void add_cutting_plane( 00031 bmrm_ll** tail, 00032 bool* map, 00033 float64_t* A, 00034 uint32_t free_idx, 00035 float64_t* cp_data, 00036 uint32_t dim) 00037 { 00038 ASSERT(map[free_idx]); 00039 00040 LIBBMRM_MEMCPY(A+free_idx*dim, cp_data, dim*sizeof(float64_t)); 00041 map[free_idx]=false; 00042 00043 bmrm_ll *cp=(bmrm_ll*)LIBBMRM_CALLOC(1, sizeof(bmrm_ll)); 00044 00045 if (cp==NULL) 00046 { 00047 SG_SERROR("Out of memory.\n"); 00048 return; 00049 } 00050 00051 cp->address=A+(free_idx*dim); 00052 cp->prev=*tail; 00053 cp->next=NULL; 00054 cp->idx=free_idx; 00055 (*tail)->next=cp; 00056 *tail=cp; 00057 } 00058 00059 void remove_cutting_plane( 00060 bmrm_ll** head, 00061 bmrm_ll** tail, 00062 bool* map, 00063 float64_t* icp) 00064 { 00065 bmrm_ll *cp_list_ptr=*head; 00066 00067 while(cp_list_ptr->address != icp) 00068 { 00069 cp_list_ptr=cp_list_ptr->next; 00070 } 00071 00072 if (cp_list_ptr==*head) 00073 { 00074 *head=(*head)->next; 00075 cp_list_ptr->next->prev=NULL; 00076 } 00077 else if (cp_list_ptr==*tail) 00078 { 00079 *tail=(*tail)->prev; 00080 cp_list_ptr->prev->next=NULL; 00081 } 00082 else 00083 { 00084 cp_list_ptr->prev->next=cp_list_ptr->next; 00085 cp_list_ptr->next->prev=cp_list_ptr->prev; 00086 } 00087 00088 map[cp_list_ptr->idx]=true; 00089 LIBBMRM_FREE(cp_list_ptr); 00090 } 00091 00092 /*---------------------------------------------------------------------- 00093 Returns pointer at i-th column of Hessian matrix. 00094 ----------------------------------------------------------------------*/ 00095 static const float64_t *get_col( uint32_t i) 00096 { 00097 return( &H[ BufSize*i ] ); 00098 } 00099 00100 bmrm_return_value_T svm_bmrm_solver( 00101 CStructuredModel* model, 00102 float64_t* W, 00103 float64_t TolRel, 00104 float64_t TolAbs, 00105 float64_t _lambda, 00106 uint32_t _BufSize, 00107 bool cleanICP, 00108 uint32_t cleanAfter, 00109 float64_t K, 00110 uint32_t Tmax, 00111 bool verbose) 00112 { 00113 bmrm_return_value_T bmrm; 00114 libqp_state_T qp_exitflag={0, 0, 0, 0}; 00115 float64_t *b, *beta, *diag_H, *prevW; 00116 float64_t R, *subgrad, *A, QPSolverTolRel, C=1.0, wdist=0.0; 00117 floatmax_t rsum, sq_norm_W, sq_norm_Wdiff=0.0; 00118 uint32_t *I, *ICPcounter, *ACPs, cntICP=0, cntACP=0; 00119 uint8_t S=1; 00120 uint32_t nDim=model->get_dim(); 00121 float64_t **ICPs; 00122 00123 CTime ttime; 00124 float64_t tstart, tstop; 00125 00126 uint32_t nCP_new=0; 00127 00128 bmrm_ll *CPList_head, *CPList_tail, *cp_ptr, *cp_ptr2, *cp_list=NULL; 00129 float64_t *A_1=NULL, *A_2=NULL, *H_buff; 00130 bool *map=NULL; 00131 00132 00133 tstart=ttime.cur_time_diff(false); 00134 00135 BufSize=_BufSize; 00136 QPSolverTolRel=1e-9; 00137 00138 H=NULL; 00139 b=NULL; 00140 beta=NULL; 00141 A=NULL; 00142 subgrad=NULL; 00143 diag_H=NULL; 00144 I=NULL; 00145 ICPcounter=NULL; 00146 ICPs=NULL; 00147 ACPs=NULL; 00148 H_buff=NULL; 00149 prevW=NULL; 00150 00151 H= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, sizeof(float64_t)); 00152 00153 if (H==NULL) 00154 { 00155 bmrm.exitflag=-2; 00156 goto cleanup; 00157 } 00158 00159 A= (float64_t*) LIBBMRM_CALLOC(nDim*BufSize, sizeof(float64_t)); 00160 00161 if (A==NULL) 00162 { 00163 bmrm.exitflag=-2; 00164 goto cleanup; 00165 } 00166 00167 b= (float64_t*) LIBBMRM_CALLOC(BufSize, sizeof(float64_t)); 00168 00169 if (b==NULL) 00170 { 00171 bmrm.exitflag=-2; 00172 goto cleanup; 00173 } 00174 00175 beta= (float64_t*) LIBBMRM_CALLOC(BufSize, sizeof(float64_t)); 00176 00177 if (beta==NULL) 00178 { 00179 bmrm.exitflag=-2; 00180 goto cleanup; 00181 } 00182 00183 subgrad= (float64_t*) LIBBMRM_CALLOC(nDim, sizeof(float64_t)); 00184 00185 if (subgrad==NULL) 00186 { 00187 bmrm.exitflag=-2; 00188 goto cleanup; 00189 } 00190 00191 diag_H= (float64_t*) LIBBMRM_CALLOC(BufSize, sizeof(float64_t)); 00192 00193 if (diag_H==NULL) 00194 { 00195 bmrm.exitflag=-2; 00196 goto cleanup; 00197 } 00198 00199 I= (uint32_t*) LIBBMRM_CALLOC(BufSize, sizeof(uint32_t)); 00200 00201 if (I==NULL) 00202 { 00203 bmrm.exitflag=-2; 00204 goto cleanup; 00205 } 00206 00207 ICPcounter= (uint32_t*) LIBBMRM_CALLOC(BufSize, sizeof(uint32_t)); 00208 00209 if (ICPcounter==NULL) 00210 { 00211 bmrm.exitflag=-2; 00212 goto cleanup; 00213 } 00214 00215 ICPs= (float64_t**) LIBBMRM_CALLOC(BufSize, sizeof(float64_t*)); 00216 00217 if (ICPs==NULL) 00218 { 00219 bmrm.exitflag=-2; 00220 goto cleanup; 00221 } 00222 00223 ACPs= (uint32_t*) LIBBMRM_CALLOC(BufSize, sizeof(uint32_t)); 00224 00225 if (ACPs==NULL) 00226 { 00227 bmrm.exitflag=-2; 00228 goto cleanup; 00229 } 00230 00231 map= (bool*) LIBBMRM_CALLOC(BufSize, sizeof(bool)); 00232 00233 if (map==NULL) 00234 { 00235 bmrm.exitflag=-2; 00236 goto cleanup; 00237 } 00238 00239 memset( (bool*) map, true, BufSize); 00240 00241 cp_list= (bmrm_ll*) LIBBMRM_CALLOC(1, sizeof(bmrm_ll)); 00242 00243 if (cp_list==NULL) 00244 { 00245 bmrm.exitflag=-2; 00246 goto cleanup; 00247 } 00248 00249 /* Temporary buffers for ICP removal */ 00250 H_buff= (float64_t*) LIBBMRM_CALLOC(BufSize*BufSize, sizeof(float64_t)); 00251 00252 if (H_buff==NULL) 00253 { 00254 bmrm.exitflag=-2; 00255 goto cleanup; 00256 } 00257 00258 prevW= (float64_t*) LIBBMRM_CALLOC(nDim, sizeof(float64_t)); 00259 00260 if (prevW==NULL) 00261 { 00262 bmrm.exitflag=-2; 00263 goto cleanup; 00264 } 00265 00266 bmrm.hist_Fp = SGVector< float64_t >(BufSize); 00267 bmrm.hist_Fd = SGVector< float64_t >(BufSize); 00268 bmrm.hist_wdist = SGVector< float64_t >(BufSize); 00269 00270 /* Iinitial solution */ 00271 R=model->risk(subgrad, W); 00272 00273 bmrm.nCP=0; 00274 bmrm.nIter=0; 00275 bmrm.exitflag=0; 00276 00277 b[0]=-R; 00278 00279 /* Cutting plane auxiliary double linked list */ 00280 00281 LIBBMRM_MEMCPY(A, subgrad, nDim*sizeof(float64_t)); 00282 map[0]=false; 00283 cp_list->address=&A[0]; 00284 cp_list->idx=0; 00285 cp_list->prev=NULL; 00286 cp_list->next=NULL; 00287 CPList_head=cp_list; 00288 CPList_tail=cp_list; 00289 00290 /* Compute initial value of Fp, Fd, assuming that W is zero vector */ 00291 00292 sq_norm_W=0; 00293 bmrm.Fp=R+0.5*_lambda*sq_norm_W; 00294 bmrm.Fd=-LIBBMRM_PLUS_INF; 00295 00296 tstop=ttime.cur_time_diff(false); 00297 00298 /* Verbose output */ 00299 00300 if (verbose) 00301 SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, R=%lf\n", 00302 bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, R); 00303 00304 /* store Fp, Fd and wdist history */ 00305 bmrm.hist_Fp[0]=bmrm.Fp; 00306 bmrm.hist_Fd[0]=bmrm.Fd; 00307 bmrm.hist_wdist[0]=0.0; 00308 00309 /* main loop */ 00310 00311 while (bmrm.exitflag==0) 00312 { 00313 tstart=ttime.cur_time_diff(false); 00314 bmrm.nIter++; 00315 00316 /* Update H */ 00317 00318 if (bmrm.nCP>0) 00319 { 00320 A_2=get_cutting_plane(CPList_tail); 00321 cp_ptr=CPList_head; 00322 00323 for (uint32_t i=0; i<bmrm.nCP; ++i) 00324 { 00325 A_1=get_cutting_plane(cp_ptr); 00326 cp_ptr=cp_ptr->next; 00327 rsum=0.0; 00328 00329 for (uint32_t j=0; j<nDim; ++j) 00330 { 00331 rsum+=A_1[j]*A_2[j]; 00332 } 00333 00334 H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]=rsum/_lambda; 00335 } 00336 00337 for (uint32_t i=0; i<bmrm.nCP; ++i) 00338 { 00339 H[LIBBMRM_INDEX(bmrm.nCP, i, BufSize)]= 00340 H[LIBBMRM_INDEX(i, bmrm.nCP, BufSize)]; 00341 } 00342 } 00343 00344 rsum=0.0; 00345 A_2=get_cutting_plane(CPList_tail); 00346 00347 for (uint32_t i=0; i<nDim; ++i) 00348 rsum+=A_2[i]*A_2[i]; 00349 00350 H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]=rsum/_lambda; 00351 00352 diag_H[bmrm.nCP]=H[LIBBMRM_INDEX(bmrm.nCP, bmrm.nCP, BufSize)]; 00353 I[bmrm.nCP]=1; 00354 00355 bmrm.nCP++; 00356 beta[bmrm.nCP]=0.0; // [beta; 0] 00357 00358 /* call QP solver */ 00359 qp_exitflag=libqp_splx_solver(&get_col, diag_H, b, &C, I, &S, beta, 00360 bmrm.nCP, QPSolverMaxIter, 0.0, QPSolverTolRel, -LIBBMRM_PLUS_INF, 0); 00361 00362 bmrm.qp_exitflag=qp_exitflag.exitflag; 00363 00364 /* Update ICPcounter (add one to unused and reset used) 00365 * + compute number of active CPs */ 00366 bmrm.nzA=0; 00367 00368 for (uint32_t aaa=0; aaa<bmrm.nCP; ++aaa) 00369 { 00370 if (beta[aaa]>epsilon) 00371 { 00372 ++bmrm.nzA; 00373 ICPcounter[aaa]=0; 00374 } 00375 else 00376 { 00377 ICPcounter[aaa]+=1; 00378 } 00379 } 00380 00381 /* W update */ 00382 for (uint32_t i=0; i<nDim; ++i) 00383 { 00384 rsum=0.0; 00385 cp_ptr=CPList_head; 00386 00387 for (uint32_t j=0; j<bmrm.nCP; ++j) 00388 { 00389 A_1=get_cutting_plane(cp_ptr); 00390 cp_ptr=cp_ptr->next; 00391 rsum+=A_1[i]*beta[j]; 00392 } 00393 00394 W[i]=-rsum/_lambda; 00395 } 00396 00397 /* risk and subgradient computation */ 00398 R = model->risk(subgrad, W); 00399 b[bmrm.nCP]=-R; 00400 add_cutting_plane(&CPList_tail, map, A, 00401 find_free_idx(map, BufSize), subgrad, nDim); 00402 00403 sq_norm_W=0.0; 00404 sq_norm_Wdiff=0.0; 00405 00406 for (uint32_t j=0; j<nDim; ++j) 00407 { 00408 b[bmrm.nCP]+=subgrad[j]*W[j]; 00409 sq_norm_W+=W[j]*W[j]; 00410 sq_norm_Wdiff+=(W[j]-prevW[j])*(W[j]-prevW[j]); 00411 } 00412 00413 bmrm.Fp=R+0.5*_lambda*sq_norm_W; 00414 bmrm.Fd=-qp_exitflag.QP; 00415 wdist=::sqrt(sq_norm_Wdiff); 00416 00417 /* Stopping conditions */ 00418 00419 if (bmrm.Fp - bmrm.Fd <= TolRel*LIBBMRM_ABS(bmrm.Fp)) 00420 bmrm.exitflag=1; 00421 00422 if (bmrm.Fp - bmrm.Fd <= TolAbs) 00423 bmrm.exitflag=2; 00424 00425 if (bmrm.nCP >= BufSize) 00426 bmrm.exitflag=-1; 00427 00428 tstop=ttime.cur_time_diff(false); 00429 00430 /* Verbose output */ 00431 00432 if (verbose) 00433 SG_SPRINT("%4d: tim=%.3lf, Fp=%lf, Fd=%lf, (Fp-Fd)=%lf, (Fp-Fd)/Fp=%lf, R=%lf, nCP=%d, nzA=%d, QPexitflag=%d\n", 00434 bmrm.nIter, tstop-tstart, bmrm.Fp, bmrm.Fd, bmrm.Fp-bmrm.Fd, 00435 (bmrm.Fp-bmrm.Fd)/bmrm.Fp, R, bmrm.nCP, bmrm.nzA, qp_exitflag.exitflag); 00436 00437 /* Keep Fp, Fd and w_dist history */ 00438 bmrm.hist_Fp[bmrm.nIter]=bmrm.Fp; 00439 bmrm.hist_Fd[bmrm.nIter]=bmrm.Fd; 00440 bmrm.hist_wdist[bmrm.nIter]=wdist; 00441 00442 /* Check size of Buffer */ 00443 00444 if (bmrm.nCP>=BufSize) 00445 { 00446 bmrm.exitflag=-2; 00447 SG_SERROR("Buffer exceeded.\n"); 00448 } 00449 00450 /* keep W (for wdist history track) */ 00451 LIBBMRM_MEMCPY(prevW, W, nDim*sizeof(float64_t)); 00452 00453 /* Inactive Cutting Planes (ICP) removal */ 00454 if (cleanICP) 00455 { 00456 /* find ICP */ 00457 cntICP=0; 00458 cntACP=0; 00459 cp_ptr=CPList_head; 00460 uint32_t tmp_idx=0; 00461 00462 while (cp_ptr != CPList_tail) 00463 { 00464 if (ICPcounter[tmp_idx++]>=cleanAfter) 00465 { 00466 ICPs[cntICP++]=cp_ptr->address; 00467 } 00468 else 00469 { 00470 ACPs[cntACP++]=tmp_idx-1; 00471 } 00472 00473 cp_ptr=cp_ptr->next; 00474 } 00475 00476 /* do ICP removal */ 00477 if (cntICP > 0) 00478 { 00479 nCP_new=bmrm.nCP-cntICP; 00480 00481 for (uint32_t i=0; i<cntICP; ++i) 00482 { 00483 tmp_idx=0; 00484 cp_ptr=CPList_head; 00485 00486 while(cp_ptr->address != ICPs[i]) 00487 { 00488 cp_ptr=cp_ptr->next; 00489 tmp_idx++; 00490 } 00491 00492 remove_cutting_plane(&CPList_head, &CPList_tail, map, ICPs[i]); 00493 00494 LIBBMRM_MEMMOVE(b+tmp_idx, b+tmp_idx+1, 00495 (bmrm.nCP-tmp_idx)*sizeof(float64_t)); 00496 LIBBMRM_MEMMOVE(beta+tmp_idx, beta+tmp_idx+1, 00497 (bmrm.nCP-tmp_idx)*sizeof(float64_t)); 00498 LIBBMRM_MEMMOVE(diag_H+tmp_idx, diag_H+tmp_idx+1, 00499 (bmrm.nCP-tmp_idx)*sizeof(float64_t)); 00500 LIBBMRM_MEMMOVE(I+tmp_idx, I+tmp_idx+1, 00501 (bmrm.nCP-tmp_idx)*sizeof(uint32_t)); 00502 LIBBMRM_MEMMOVE(ICPcounter+tmp_idx, ICPcounter+tmp_idx+1, 00503 (bmrm.nCP-tmp_idx)*sizeof(uint32_t)); 00504 } 00505 00506 /* H */ 00507 for (uint32_t i=0; i < nCP_new; ++i) 00508 { 00509 for (uint32_t j=0; j < nCP_new; ++j) 00510 { 00511 H_buff[LIBBMRM_INDEX(i, j, BufSize)]= 00512 H[LIBBMRM_INDEX(ACPs[i], ACPs[j], BufSize)]; 00513 } 00514 } 00515 00516 for (uint32_t i=0; i<nCP_new; ++i) 00517 for (uint32_t j=0; j<nCP_new; ++j) 00518 H[LIBBMRM_INDEX(i, j, BufSize)]= 00519 H_buff[LIBBMRM_INDEX(i, j, BufSize)]; 00520 00521 bmrm.nCP=nCP_new; 00522 } 00523 } 00524 } /* end of main loop */ 00525 00526 bmrm.hist_Fp.resize_vector(bmrm.nIter); 00527 bmrm.hist_Fd.resize_vector(bmrm.nIter); 00528 bmrm.hist_wdist.resize_vector(bmrm.nIter); 00529 00530 cp_ptr=CPList_head; 00531 00532 while(cp_ptr!=NULL) 00533 { 00534 cp_ptr2=cp_ptr; 00535 cp_ptr=cp_ptr->next; 00536 LIBBMRM_FREE(cp_ptr2); 00537 cp_ptr2=NULL; 00538 } 00539 00540 cp_list=NULL; 00541 00542 cleanup: 00543 00544 LIBBMRM_FREE(H); 00545 LIBBMRM_FREE(b); 00546 LIBBMRM_FREE(beta); 00547 LIBBMRM_FREE(A); 00548 LIBBMRM_FREE(subgrad); 00549 LIBBMRM_FREE(diag_H); 00550 LIBBMRM_FREE(I); 00551 LIBBMRM_FREE(ICPcounter); 00552 LIBBMRM_FREE(ICPs); 00553 LIBBMRM_FREE(ACPs); 00554 LIBBMRM_FREE(H_buff); 00555 LIBBMRM_FREE(map); 00556 LIBBMRM_FREE(prevW); 00557 00558 if (cp_list) 00559 LIBBMRM_FREE(cp_list); 00560 00561 return(bmrm); 00562 } 00563 }