SHOGUN
v2.0.0
|
00001 /* 00002 * Copyright (c) 2007-2009 The LIBLINEAR Project. 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 00016 * 3. Neither name of copyright holders nor the names of its contributors 00017 * may be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 */ 00033 #include <shogun/lib/config.h> 00034 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00035 #ifdef HAVE_LAPACK 00036 #include <math.h> 00037 #include <stdio.h> 00038 #include <stdlib.h> 00039 #include <string.h> 00040 #include <stdarg.h> 00041 00042 #include <shogun/mathematics/Math.h> 00043 #include <shogun/optimization/liblinear/shogun_liblinear.h> 00044 #include <shogun/optimization/liblinear/tron.h> 00045 #include <shogun/lib/Time.h> 00046 #include <shogun/lib/Signal.h> 00047 00048 using namespace shogun; 00049 00050 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t* Cs) 00051 { 00052 int l=p->l; 00053 00054 this->m_prob = p; 00055 00056 z = SG_MALLOC(double, l); 00057 D = SG_MALLOC(double, l); 00058 C = Cs; 00059 } 00060 00061 l2r_lr_fun::~l2r_lr_fun() 00062 { 00063 SG_FREE(z); 00064 SG_FREE(D); 00065 } 00066 00067 00068 double l2r_lr_fun::fun(double *w) 00069 { 00070 int i; 00071 double f=0; 00072 float64_t *y=m_prob->y; 00073 int l=m_prob->l; 00074 int32_t n=m_prob->n; 00075 00076 Xv(w, z); 00077 for(i=0;i<l;i++) 00078 { 00079 double yz = y[i]*z[i]; 00080 if (yz >= 0) 00081 f += C[i]*log(1 + exp(-yz)); 00082 else 00083 f += C[i]*(-yz+log(1 + exp(yz))); 00084 } 00085 f += 0.5 *SGVector<float64_t>::dot(w,w,n); 00086 00087 return(f); 00088 } 00089 00090 void l2r_lr_fun::grad(double *w, double *g) 00091 { 00092 int i; 00093 float64_t *y=m_prob->y; 00094 int l=m_prob->l; 00095 int w_size=get_nr_variable(); 00096 00097 for(i=0;i<l;i++) 00098 { 00099 z[i] = 1/(1 + exp(-y[i]*z[i])); 00100 D[i] = z[i]*(1-z[i]); 00101 z[i] = C[i]*(z[i]-1)*y[i]; 00102 } 00103 XTv(z, g); 00104 00105 for(i=0;i<w_size;i++) 00106 g[i] = w[i] + g[i]; 00107 } 00108 00109 int l2r_lr_fun::get_nr_variable() 00110 { 00111 return m_prob->n; 00112 } 00113 00114 void l2r_lr_fun::Hv(double *s, double *Hs) 00115 { 00116 int i; 00117 int l=m_prob->l; 00118 int w_size=get_nr_variable(); 00119 double *wa = SG_MALLOC(double, l); 00120 00121 Xv(s, wa); 00122 for(i=0;i<l;i++) 00123 wa[i] = C[i]*D[i]*wa[i]; 00124 00125 XTv(wa, Hs); 00126 for(i=0;i<w_size;i++) 00127 Hs[i] = s[i] + Hs[i]; 00128 SG_FREE(wa); 00129 } 00130 00131 void l2r_lr_fun::Xv(double *v, double *res_Xv) 00132 { 00133 int32_t l=m_prob->l; 00134 int32_t n=m_prob->n; 00135 float64_t bias=0; 00136 00137 if (m_prob->use_bias) 00138 { 00139 n--; 00140 bias=v[n]; 00141 } 00142 00143 m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias); 00144 } 00145 00146 void l2r_lr_fun::XTv(double *v, double *res_XTv) 00147 { 00148 int l=m_prob->l; 00149 int32_t n=m_prob->n; 00150 00151 memset(res_XTv, 0, sizeof(double)*m_prob->n); 00152 00153 if (m_prob->use_bias) 00154 n--; 00155 00156 for (int32_t i=0;i<l;i++) 00157 { 00158 m_prob->x->add_to_dense_vec(v[i], i, res_XTv, n); 00159 00160 if (m_prob->use_bias) 00161 res_XTv[n]+=v[i]; 00162 } 00163 } 00164 00165 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double* Cs) 00166 { 00167 int l=p->l; 00168 00169 this->m_prob = p; 00170 00171 z = SG_MALLOC(double, l); 00172 D = SG_MALLOC(double, l); 00173 I = SG_MALLOC(int, l); 00174 C=Cs; 00175 00176 } 00177 00178 l2r_l2_svc_fun::~l2r_l2_svc_fun() 00179 { 00180 SG_FREE(z); 00181 SG_FREE(D); 00182 SG_FREE(I); 00183 } 00184 00185 double l2r_l2_svc_fun::fun(double *w) 00186 { 00187 int i; 00188 double f=0; 00189 float64_t *y=m_prob->y; 00190 int l=m_prob->l; 00191 int w_size=get_nr_variable(); 00192 00193 Xv(w, z); 00194 for(i=0;i<l;i++) 00195 { 00196 z[i] = y[i]*z[i]; 00197 double d = 1-z[i]; 00198 if (d > 0) 00199 f += C[i]*d*d; 00200 } 00201 f += 0.5*SGVector<float64_t>::dot(w, w, w_size); 00202 00203 return(f); 00204 } 00205 00206 void l2r_l2_svc_fun::grad(double *w, double *g) 00207 { 00208 int i; 00209 float64_t *y=m_prob->y; 00210 int l=m_prob->l; 00211 int w_size=get_nr_variable(); 00212 00213 sizeI = 0; 00214 for (i=0;i<l;i++) 00215 if (z[i] < 1) 00216 { 00217 z[sizeI] = C[i]*y[i]*(z[i]-1); 00218 I[sizeI] = i; 00219 sizeI++; 00220 } 00221 subXTv(z, g); 00222 00223 for(i=0;i<w_size;i++) 00224 g[i] = w[i] + 2*g[i]; 00225 } 00226 00227 int l2r_l2_svc_fun::get_nr_variable() 00228 { 00229 return m_prob->n; 00230 } 00231 00232 void l2r_l2_svc_fun::Hv(double *s, double *Hs) 00233 { 00234 int i; 00235 int l=m_prob->l; 00236 int w_size=get_nr_variable(); 00237 double *wa = SG_MALLOC(double, l); 00238 00239 subXv(s, wa); 00240 for(i=0;i<sizeI;i++) 00241 wa[i] = C[I[i]]*wa[i]; 00242 00243 subXTv(wa, Hs); 00244 for(i=0;i<w_size;i++) 00245 Hs[i] = s[i] + 2*Hs[i]; 00246 SG_FREE(wa); 00247 } 00248 00249 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv) 00250 { 00251 int32_t l=m_prob->l; 00252 int32_t n=m_prob->n; 00253 float64_t bias=0; 00254 00255 if (m_prob->use_bias) 00256 { 00257 n--; 00258 bias=v[n]; 00259 } 00260 00261 m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias); 00262 } 00263 00264 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv) 00265 { 00266 int32_t n=m_prob->n; 00267 float64_t bias=0; 00268 00269 if (m_prob->use_bias) 00270 { 00271 n--; 00272 bias=v[n]; 00273 } 00274 00275 m_prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias); 00276 00277 /*for (int32_t i=0;i<sizeI;i++) 00278 { 00279 res_Xv[i]=m_prob->x->dense_dot(I[i], v, n); 00280 00281 if (m_prob->use_bias) 00282 res_Xv[i]+=bias; 00283 }*/ 00284 } 00285 00286 void l2r_l2_svc_fun::subXTv(double *v, double *XTv) 00287 { 00288 int32_t n=m_prob->n; 00289 00290 if (m_prob->use_bias) 00291 n--; 00292 00293 memset(XTv, 0, sizeof(float64_t)*m_prob->n); 00294 for (int32_t i=0;i<sizeI;i++) 00295 { 00296 m_prob->x->add_to_dense_vec(v[i], I[i], XTv, n); 00297 00298 if (m_prob->use_bias) 00299 XTv[n]+=v[i]; 00300 } 00301 } 00302 00303 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *Cs, double p): 00304 l2r_l2_svc_fun(prob, Cs) 00305 { 00306 m_p = p; 00307 } 00308 00309 double l2r_l2_svr_fun::fun(double *w) 00310 { 00311 int i; 00312 double f=0; 00313 double *y=m_prob->y; 00314 int l=m_prob->l; 00315 int w_size=get_nr_variable(); 00316 double d; 00317 00318 Xv(w, z); 00319 00320 for(i=0;i<w_size;i++) 00321 f += w[i]*w[i]; 00322 f /= 2; 00323 for(i=0;i<l;i++) 00324 { 00325 d = z[i] - y[i]; 00326 if(d < -m_p) 00327 f += C[i]*(d+m_p)*(d+m_p); 00328 else if(d > m_p) 00329 f += C[i]*(d-m_p)*(d-m_p); 00330 } 00331 00332 return(f); 00333 } 00334 00335 void l2r_l2_svr_fun::grad(double *w, double *g) 00336 { 00337 int i; 00338 double *y=m_prob->y; 00339 int l=m_prob->l; 00340 int w_size=get_nr_variable(); 00341 double d; 00342 00343 sizeI = 0; 00344 for(i=0;i<l;i++) 00345 { 00346 d = z[i] - y[i]; 00347 00348 // generate index set I 00349 if(d < -m_p) 00350 { 00351 z[sizeI] = C[i]*(d+m_p); 00352 I[sizeI] = i; 00353 sizeI++; 00354 } 00355 else if(d > m_p) 00356 { 00357 z[sizeI] = C[i]*(d-m_p); 00358 I[sizeI] = i; 00359 sizeI++; 00360 } 00361 00362 } 00363 subXTv(z, g); 00364 00365 for(i=0;i<w_size;i++) 00366 g[i] = w[i] + 2*g[i]; 00367 } 00368 00369 00370 // A coordinate descent algorithm for 00371 // multi-class support vector machines by Crammer and Singer 00372 // 00373 // min_{\alpha} 0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i 00374 // s.t. \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i 00375 // 00376 // where e^m_i = 0 if y_i = m, 00377 // e^m_i = 1 if y_i != m, 00378 // C^m_i = C if m = y_i, 00379 // C^m_i = 0 if m != y_i, 00380 // and w_m(\alpha) = \sum_i \alpha^m_i x_i 00381 // 00382 // Given: 00383 // x, y, C 00384 // eps is the stopping tolerance 00385 // 00386 // solution will be put in w 00387 00388 #define GETI(i) (prob->y[i]) 00389 // To support weights for instances, use GETI(i) (i) 00390 00391 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, 00392 double *weighted_C, double *w0_reg, 00393 double epsilon, int max_it, double max_time, 00394 mcsvm_state* given_state) 00395 { 00396 this->w_size = p->n; 00397 this->l = p->l; 00398 this->nr_class = n_class; 00399 this->eps = epsilon; 00400 this->max_iter = max_it; 00401 this->prob = p; 00402 this->C = weighted_C; 00403 this->w0 = w0_reg; 00404 this->max_train_time = max_time; 00405 this->state = given_state; 00406 } 00407 00408 Solver_MCSVM_CS::~Solver_MCSVM_CS() 00409 { 00410 } 00411 00412 int compare_double(const void *a, const void *b) 00413 { 00414 if(*(double *)a > *(double *)b) 00415 return -1; 00416 if(*(double *)a < *(double *)b) 00417 return 1; 00418 return 0; 00419 } 00420 00421 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new) 00422 { 00423 int r; 00424 double *D=SGVector<float64_t>::clone_vector(state->B, active_i); 00425 00426 if(yi < active_i) 00427 D[yi] += A_i*C_yi; 00428 qsort(D, active_i, sizeof(double), compare_double); 00429 00430 double beta = D[0] - A_i*C_yi; 00431 for(r=1;r<active_i && beta<r*D[r];r++) 00432 beta += D[r]; 00433 00434 beta /= r; 00435 for(r=0;r<active_i;r++) 00436 { 00437 if(r == yi) 00438 alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i); 00439 else 00440 alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i); 00441 } 00442 SG_FREE(D); 00443 } 00444 00445 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG) 00446 { 00447 double bound = 0; 00448 if(m == yi) 00449 bound = C[int32_t(GETI(i))]; 00450 if(alpha_i == bound && state->G[m] < minG) 00451 return true; 00452 return false; 00453 } 00454 00455 void Solver_MCSVM_CS::solve() 00456 { 00457 int i, m, s, k; 00458 int iter = 0; 00459 double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val; 00460 int *index,*d_ind,*alpha_index,*y_index,*active_size_i; 00461 00462 if (!state->allocated) 00463 { 00464 state->w = SG_CALLOC(double, nr_class*w_size); 00465 state->B = SG_CALLOC(double, nr_class); 00466 state->G = SG_CALLOC(double, nr_class); 00467 state->alpha = SG_CALLOC(double, l*nr_class); 00468 state->alpha_new = SG_CALLOC(double, nr_class); 00469 state->index = SG_CALLOC(int, l); 00470 state->QD = SG_CALLOC(double, l); 00471 state->d_ind = SG_CALLOC(int, nr_class); 00472 state->d_val = SG_CALLOC(double, nr_class); 00473 state->alpha_index = SG_CALLOC(int, nr_class*l); 00474 state->y_index = SG_CALLOC(int, l); 00475 state->active_size_i = SG_CALLOC(int, l); 00476 state->allocated = true; 00477 } 00478 w = state->w; 00479 B = state->B; 00480 G = state->G; 00481 alpha = state->alpha; 00482 alpha_new = state->alpha_new; 00483 index = state->index; 00484 QD = state->QD; 00485 d_ind = state->d_ind; 00486 d_val = state->d_val; 00487 alpha_index = state->alpha_index; 00488 y_index = state->y_index; 00489 active_size_i = state->active_size_i; 00490 00491 double* tx = SG_MALLOC(double, w_size); 00492 int dim = prob->x->get_dim_feature_space(); 00493 00494 int active_size = l; 00495 double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking 00496 bool start_from_all = true; 00497 CTime start_time; 00498 // initial 00499 if (!state->inited) 00500 { 00501 for(i=0;i<l;i++) 00502 { 00503 for(m=0;m<nr_class;m++) 00504 alpha_index[i*nr_class+m] = m; 00505 00506 QD[i] = prob->x->dot(i, prob->x,i); 00507 if (prob->use_bias) 00508 QD[i] += 1.0; 00509 00510 active_size_i[i] = nr_class; 00511 y_index[i] = prob->y[i]; 00512 index[i] = i; 00513 } 00514 state->inited = true; 00515 } 00516 00517 while(iter < max_iter && !CSignal::cancel_computations()) 00518 { 00519 double stopping = -CMath::INFTY; 00520 for(i=0;i<active_size;i++) 00521 { 00522 int j = i+rand()%(active_size-i); 00523 CMath::swap(index[i], index[j]); 00524 } 00525 for(s=0;s<active_size;s++) 00526 { 00527 i = index[s]; 00528 double Ai = QD[i]; 00529 double *alpha_i = &alpha[i*nr_class]; 00530 int *alpha_index_i = &alpha_index[i*nr_class]; 00531 00532 if(Ai > 0) 00533 { 00534 for(m=0;m<active_size_i[i];m++) 00535 G[m] = 1; 00536 if(y_index[i] < active_size_i[i]) 00537 G[y_index[i]] = 0; 00538 00539 memset(tx,0,dim*sizeof(double)); 00540 prob->x->add_to_dense_vec(1.0,i,tx,dim); 00541 for (k=0; k<dim; k++) 00542 { 00543 if (tx[k]==0.0) 00544 continue; 00545 00546 double* w_i = &w[k*nr_class]; 00547 for (m=0; m<active_size_i[i]; m++) 00548 G[m] += w_i[alpha_index_i[m]]*tx[k]; 00549 } 00550 00551 // experimental 00552 // *** 00553 if (prob->use_bias) 00554 { 00555 double *w_i = &w[(w_size-1)*nr_class]; 00556 for(m=0; m<active_size_i[i]; m++) 00557 G[m] += w_i[alpha_index_i[m]]; 00558 } 00559 if (w0) 00560 { 00561 for (k=0; k<dim; k++) 00562 { 00563 double *w0_i = &w0[k*nr_class]; 00564 for(m=0; m<active_size_i[i]; m++) 00565 G[m] += w0_i[alpha_index_i[m]]; 00566 } 00567 } 00568 // *** 00569 00570 double minG = CMath::INFTY; 00571 double maxG = -CMath::INFTY; 00572 for(m=0;m<active_size_i[i];m++) 00573 { 00574 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG) 00575 minG = G[m]; 00576 if(G[m] > maxG) 00577 maxG = G[m]; 00578 } 00579 if(y_index[i] < active_size_i[i]) 00580 if(alpha_i[int32_t(prob->y[i])] < C[int32_t(GETI(i))] && G[y_index[i]] < minG) 00581 minG = G[y_index[i]]; 00582 00583 for(m=0;m<active_size_i[i];m++) 00584 { 00585 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG)) 00586 { 00587 active_size_i[i]--; 00588 while(active_size_i[i]>m) 00589 { 00590 if(!be_shrunk(i, active_size_i[i], y_index[i], 00591 alpha_i[alpha_index_i[active_size_i[i]]], minG)) 00592 { 00593 CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]); 00594 CMath::swap(G[m], G[active_size_i[i]]); 00595 if(y_index[i] == active_size_i[i]) 00596 y_index[i] = m; 00597 else if(y_index[i] == m) 00598 y_index[i] = active_size_i[i]; 00599 break; 00600 } 00601 active_size_i[i]--; 00602 } 00603 } 00604 } 00605 00606 if(active_size_i[i] <= 1) 00607 { 00608 active_size--; 00609 CMath::swap(index[s], index[active_size]); 00610 s--; 00611 continue; 00612 } 00613 00614 if(maxG-minG <= 1e-12) 00615 continue; 00616 else 00617 stopping = CMath::CMath::max(maxG - minG, stopping); 00618 00619 for(m=0;m<active_size_i[i];m++) 00620 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ; 00621 00622 solve_sub_problem(Ai, y_index[i], C[int32_t(GETI(i))], active_size_i[i], alpha_new); 00623 int nz_d = 0; 00624 for(m=0;m<active_size_i[i];m++) 00625 { 00626 double d = alpha_new[m] - alpha_i[alpha_index_i[m]]; 00627 alpha_i[alpha_index_i[m]] = alpha_new[m]; 00628 if(fabs(d) >= 1e-12) 00629 { 00630 d_ind[nz_d] = alpha_index_i[m]; 00631 d_val[nz_d] = d; 00632 nz_d++; 00633 } 00634 } 00635 00636 memset(tx,0,dim*sizeof(double)); 00637 prob->x->add_to_dense_vec(1.0,i,tx,dim); 00638 for (k=0; k<dim; k++) 00639 { 00640 if (tx[k]==0.0) 00641 continue; 00642 00643 double* w_i = &w[k*nr_class]; 00644 for (m=0; m<nz_d; m++) 00645 w_i[d_ind[m]] += d_val[m]*tx[k]; 00646 } 00647 // experimental 00648 // *** 00649 if (prob->use_bias) 00650 { 00651 double *w_i = &w[(w_size-1)*nr_class]; 00652 for(m=0;m<nz_d;m++) 00653 w_i[d_ind[m]] += d_val[m]; 00654 } 00655 // *** 00656 } 00657 } 00658 00659 iter++; 00660 /* 00661 if(iter % 10 == 0) 00662 { 00663 SG_SINFO("."); 00664 } 00665 */ 00666 00667 if(stopping < eps_shrink) 00668 { 00669 if(stopping < eps && start_from_all == true) 00670 break; 00671 else 00672 { 00673 active_size = l; 00674 for(i=0;i<l;i++) 00675 active_size_i[i] = nr_class; 00676 //SG_SINFO("*"); 00677 eps_shrink = CMath::max(eps_shrink/2, eps); 00678 start_from_all = true; 00679 } 00680 } 00681 else 00682 start_from_all = false; 00683 00684 if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff()) 00685 break; 00686 } 00687 00688 SG_SINFO("\noptimization finished, #iter = %d\n",iter); 00689 if (iter >= max_iter) 00690 SG_SINFO("Warning: reaching max number of iterations\n"); 00691 00692 SG_FREE(tx); 00693 } 00694 00695 // 00696 // Interface functions 00697 // 00698 00699 void destroy_model(struct model *model_) 00700 { 00701 SG_FREE(model_->w); 00702 SG_FREE(model_->label); 00703 SG_FREE(model_); 00704 } 00705 00706 void destroy_param(parameter* param) 00707 { 00708 SG_FREE(param->weight_label); 00709 SG_FREE(param->weight); 00710 } 00711 #endif //HAVE_LAPACK 00712 #endif // DOXYGEN_SHOULD_SKIP_THIS