SHOGUN
v2.0.0
|
00001 /* Copyright 2006 Vikas Sindhwani (vikass@cs.uchicago.edu) 00002 SVM-lin: Fast SVM Solvers for Supervised and Semi-supervised Learning 00003 00004 This file is part of SVM-lin. 00005 00006 SVM-lin is free software; you can redistribute it and/or modify 00007 it under the terms of the GNU General Public License as published by 00008 the Free Software Foundation; either version 2 of the License, or 00009 (at your option) any later version. 00010 00011 SVM-lin is distributed in the hope that it will be useful, 00012 but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 GNU General Public License for more details. 00015 00016 You should have received a copy of the GNU General Public License 00017 along with SVM-lin (see gpl.txt); if not, write to the Free Software 00018 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 00019 */ 00020 00021 #include <stdio.h> 00022 #include <stdlib.h> 00023 #include <ctype.h> 00024 #include <algorithm> 00025 00026 #include <shogun/io/SGIO.h> 00027 #include <shogun/mathematics/Math.h> 00028 #include <shogun/features/SparseFeatures.h> 00029 #include <shogun/lib/external/ssl.h> 00030 00031 namespace shogun 00032 { 00033 void ssl_train(struct data *Data, 00034 struct options *Options, 00035 struct vector_double *Weights, 00036 struct vector_double *Outputs) 00037 { 00038 // initialize 00039 initialize(Weights,Data->n,0.0); 00040 initialize(Outputs,Data->m,0.0); 00041 vector_int *Subset = SG_MALLOC(vector_int, 1); 00042 initialize(Subset,Data->m); 00043 // call the right algorithm 00044 int32_t optimality = 0; 00045 switch(Options->algo) 00046 { 00047 case -1: 00048 SG_SINFO("Regularized Least Squares Regression (CGLS)\n"); 00049 optimality=CGLS(Data,Options,Subset,Weights,Outputs); 00050 break; 00051 case RLS: 00052 SG_SINFO("Regularized Least Squares Classification (CGLS)\n"); 00053 optimality=CGLS(Data,Options,Subset,Weights,Outputs); 00054 break; 00055 case SVM: 00056 SG_SINFO("Modified Finite Newton L2-SVM (L2-SVM-MFN)\n"); 00057 optimality=L2_SVM_MFN(Data,Options,Weights,Outputs,0); 00058 break; 00059 case TSVM: 00060 SG_SINFO("Transductive L2-SVM (TSVM)\n"); 00061 optimality=TSVM_MFN(Data,Options,Weights,Outputs); 00062 break; 00063 case DA_SVM: 00064 SG_SINFO("Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n"); 00065 optimality=DA_S3VM(Data,Options,Weights,Outputs); 00066 break; 00067 default: 00068 SG_SERROR("Algorithm unspecified\n"); 00069 } 00070 00071 if (!optimality) 00072 SG_SWARNING("SSL-Algorithm terminated without reaching optimum.\n"); 00073 00074 SG_FREE(Subset->vec); 00075 SG_FREE(Subset); 00076 return; 00077 } 00078 00079 int32_t CGLS( 00080 const struct data *Data, const struct options *Options, 00081 const struct vector_int *Subset, struct vector_double *Weights, 00082 struct vector_double *Outputs) 00083 { 00084 SG_SDEBUG("CGLS starting..."); 00085 00086 /* Disassemble the structures */ 00087 int32_t active = Subset->d; 00088 int32_t *J = Subset->vec; 00089 CDotFeatures* features=Data->features; 00090 float64_t *Y = Data->Y; 00091 float64_t *C = Data->C; 00092 int32_t n = Data->n; 00093 float64_t lambda = Options->lambda; 00094 int32_t cgitermax = Options->cgitermax; 00095 float64_t epsilon = Options->epsilon; 00096 float64_t *beta = Weights->vec; 00097 float64_t *o = Outputs->vec; 00098 // initialize z 00099 float64_t *z = SG_MALLOC(float64_t, active); 00100 float64_t *q = SG_MALLOC(float64_t, active); 00101 int32_t ii=0; 00102 for (int32_t i = active ; i-- ;){ 00103 ii=J[i]; 00104 z[i] = C[ii]*(Y[ii] - o[ii]); 00105 } 00106 float64_t *r = SG_MALLOC(float64_t, n); 00107 for (int32_t i = n ; i-- ;) 00108 r[i] = 0.0; 00109 for (register int32_t j=0; j < active; j++) 00110 { 00111 features->add_to_dense_vec(z[j], J[j], r, n-1); 00112 r[n-1]+=Options->bias*z[j]; //bias (modelled as last dim) 00113 } 00114 float64_t *p = SG_MALLOC(float64_t, n); 00115 float64_t omega1 = 0.0; 00116 for (int32_t i = n ; i-- ;) 00117 { 00118 r[i] -= lambda*beta[i]; 00119 p[i] = r[i]; 00120 omega1 += r[i]*r[i]; 00121 } 00122 float64_t omega_p = omega1; 00123 float64_t omega_q = 0.0; 00124 float64_t inv_omega2 = 1/omega1; 00125 float64_t scale = 0.0; 00126 float64_t omega_z=0.0; 00127 float64_t gamma = 0.0; 00128 int32_t cgiter = 0; 00129 int32_t optimality = 0; 00130 float64_t epsilon2 = epsilon*epsilon; 00131 // iterate 00132 while(cgiter < cgitermax) 00133 { 00134 cgiter++; 00135 omega_q=0.0; 00136 float64_t t=0.0; 00137 register int32_t i,j; 00138 // #pragma omp parallel for private(i,j) 00139 for (i=0; i < active; i++) 00140 { 00141 ii=J[i]; 00142 t=features->dense_dot(ii, p, n-1); 00143 t+=Options->bias*p[n-1]; //bias (modelled as last dim) 00144 q[i]=t; 00145 omega_q += C[ii]*t*t; 00146 } 00147 gamma = omega1/(lambda*omega_p + omega_q); 00148 inv_omega2 = 1/omega1; 00149 for (i = n ; i-- ;) 00150 { 00151 r[i] = 0.0; 00152 beta[i] += gamma*p[i]; 00153 } 00154 omega_z=0.0; 00155 for (i = active ; i-- ;) 00156 { 00157 ii=J[i]; 00158 o[ii] += gamma*q[i]; 00159 z[i] -= gamma*C[ii]*q[i]; 00160 omega_z+=z[i]*z[i]; 00161 } 00162 for (j=0; j < active; j++) 00163 { 00164 t=z[j]; 00165 00166 features->add_to_dense_vec(t, J[j], r, n-1); 00167 r[n-1]+=Options->bias*t; //bias (modelled as last dim) 00168 } 00169 omega1 = 0.0; 00170 for (i = n ; i-- ;) 00171 { 00172 r[i] -= lambda*beta[i]; 00173 omega1 += r[i]*r[i]; 00174 } 00175 if(omega1 < epsilon2*omega_z) 00176 { 00177 optimality=1; 00178 break; 00179 } 00180 omega_p=0.0; 00181 scale=omega1*inv_omega2; 00182 for(i = n ; i-- ;) 00183 { 00184 p[i] = r[i] + p[i]*scale; 00185 omega_p += p[i]*p[i]; 00186 } 00187 } 00188 SG_SDEBUG("...Done."); 00189 SG_SINFO("CGLS converged in %d iteration(s)", cgiter); 00190 00191 SG_FREE(z); 00192 SG_FREE(q); 00193 SG_FREE(r); 00194 SG_FREE(p); 00195 return optimality; 00196 } 00197 00198 int32_t L2_SVM_MFN( 00199 const struct data *Data, struct options *Options, 00200 struct vector_double *Weights, struct vector_double *Outputs, 00201 int32_t ini) 00202 { 00203 /* Disassemble the structures */ 00204 CDotFeatures* features=Data->features; 00205 float64_t *Y = Data->Y; 00206 float64_t *C = Data->C; 00207 int32_t n = Data->n; 00208 int32_t m = Data->m; 00209 float64_t lambda = Options->lambda; 00210 float64_t epsilon; 00211 float64_t *w = Weights->vec; 00212 float64_t *o = Outputs->vec; 00213 float64_t F_old = 0.0; 00214 float64_t F = 0.0; 00215 float64_t diff=0.0; 00216 vector_int *ActiveSubset = SG_MALLOC(vector_int, 1); 00217 ActiveSubset->vec = SG_MALLOC(int32_t, m); 00218 ActiveSubset->d = m; 00219 // initialize 00220 if(ini==0) { 00221 epsilon=BIG_EPSILON; 00222 Options->cgitermax=SMALL_CGITERMAX; 00223 Options->epsilon=BIG_EPSILON; 00224 } 00225 else {epsilon = Options->epsilon;} 00226 for (int32_t i=0;i<n;i++) F+=w[i]*w[i]; 00227 F=0.5*lambda*F; 00228 int32_t active=0; 00229 int32_t inactive=m-1; // l-1 00230 for (int32_t i=0; i<m ; i++) 00231 { 00232 diff=1-Y[i]*o[i]; 00233 if(diff>0) 00234 { 00235 ActiveSubset->vec[active]=i; 00236 active++; 00237 F+=0.5*C[i]*diff*diff; 00238 } 00239 else 00240 { 00241 ActiveSubset->vec[inactive]=i; 00242 inactive--; 00243 } 00244 } 00245 ActiveSubset->d=active; 00246 int32_t iter=0; 00247 int32_t opt=0; 00248 int32_t opt2=0; 00249 vector_double *Weights_bar = SG_MALLOC(vector_double, 1); 00250 vector_double *Outputs_bar = SG_MALLOC(vector_double, 1); 00251 float64_t *w_bar = SG_MALLOC(float64_t, n); 00252 float64_t *o_bar = SG_MALLOC(float64_t, m); 00253 Weights_bar->vec=w_bar; 00254 Outputs_bar->vec=o_bar; 00255 Weights_bar->d=n; 00256 Outputs_bar->d=m; 00257 float64_t delta=0.0; 00258 float64_t t=0.0; 00259 int32_t ii = 0; 00260 while(iter<MFNITERMAX) 00261 { 00262 iter++; 00263 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F); 00264 for (int32_t i=n; i-- ;) 00265 w_bar[i]=w[i]; 00266 for (int32_t i=m; i-- ;) 00267 o_bar[i]=o[i]; 00268 00269 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar); 00270 for(register int32_t i=active; i < m; i++) 00271 { 00272 ii=ActiveSubset->vec[i]; 00273 00274 t=features->dense_dot(ii, w_bar, n-1); 00275 t+=Options->bias*w_bar[n-1]; //bias (modelled as last dim) 00276 00277 o_bar[ii]=t; 00278 } 00279 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;}; 00280 opt2=1; 00281 for (int32_t i=0;i<m;i++) 00282 { 00283 ii=ActiveSubset->vec[i]; 00284 if(i<active) 00285 opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+epsilon)); 00286 else 00287 opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-epsilon)); 00288 if(opt2==0) break; 00289 } 00290 if(opt && opt2) // l 00291 { 00292 if(epsilon==BIG_EPSILON) 00293 { 00294 epsilon=EPSILON; 00295 Options->epsilon=EPSILON; 00296 SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f", BIG_EPSILON , EPSILON); 00297 continue; 00298 } 00299 else 00300 { 00301 for (int32_t i=n; i-- ;) 00302 w[i]=w_bar[i]; 00303 for (int32_t i=m; i-- ;) 00304 o[i]=o_bar[i]; 00305 SG_FREE(ActiveSubset->vec); 00306 SG_FREE(ActiveSubset); 00307 SG_FREE(o_bar); 00308 SG_FREE(w_bar); 00309 SG_FREE(Weights_bar); 00310 SG_FREE(Outputs_bar); 00311 SG_SINFO("L2_SVM_MFN converged (optimality) in %d", iter); 00312 return 1; 00313 } 00314 } 00315 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m); 00316 SG_SDEBUG("LINE_SEARCH delta = %f\n", delta); 00317 F_old=F; 00318 F=0.0; 00319 for (int32_t i=n; i-- ;) { 00320 w[i]+=delta*(w_bar[i]-w[i]); 00321 F+=w[i]*w[i]; 00322 } 00323 F=0.5*lambda*F; 00324 active=0; 00325 inactive=m-1; 00326 for (int32_t i=0; i<m ; i++) 00327 { 00328 o[i]+=delta*(o_bar[i]-o[i]); 00329 diff=1-Y[i]*o[i]; 00330 if(diff>0) 00331 { 00332 ActiveSubset->vec[active]=i; 00333 active++; 00334 F+=0.5*C[i]*diff*diff; 00335 } 00336 else 00337 { 00338 ActiveSubset->vec[inactive]=i; 00339 inactive--; 00340 } 00341 } 00342 ActiveSubset->d=active; 00343 if(CMath::abs(F-F_old)<RELATIVE_STOP_EPS*CMath::abs(F_old)) 00344 { 00345 SG_SINFO("L2_SVM_MFN converged (rel. criterion) in %d iterations", iter); 00346 return 2; 00347 } 00348 } 00349 SG_FREE(ActiveSubset->vec); 00350 SG_FREE(ActiveSubset); 00351 SG_FREE(o_bar); 00352 SG_FREE(w_bar); 00353 SG_FREE(Weights_bar); 00354 SG_FREE(Outputs_bar); 00355 SG_SINFO("L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter); 00356 return 0; 00357 } 00358 00359 float64_t line_search(float64_t *w, 00360 float64_t *w_bar, 00361 float64_t lambda, 00362 float64_t *o, 00363 float64_t *o_bar, 00364 float64_t *Y, 00365 float64_t *C, 00366 int32_t d, /* data dimensionality -- 'n' */ 00367 int32_t l) /* number of examples */ 00368 { 00369 float64_t omegaL = 0.0; 00370 float64_t omegaR = 0.0; 00371 float64_t diff=0.0; 00372 for(int32_t i=d; i--; ) 00373 { 00374 diff=w_bar[i]-w[i]; 00375 omegaL+=w[i]*diff; 00376 omegaR+=w_bar[i]*diff; 00377 } 00378 omegaL=lambda*omegaL; 00379 omegaR=lambda*omegaR; 00380 float64_t L=0.0; 00381 float64_t R=0.0; 00382 int32_t ii=0; 00383 for (int32_t i=0;i<l;i++) 00384 { 00385 if(Y[i]*o[i]<1) 00386 { 00387 diff=C[i]*(o_bar[i]-o[i]); 00388 L+=(o[i]-Y[i])*diff; 00389 R+=(o_bar[i]-Y[i])*diff; 00390 } 00391 } 00392 L+=omegaL; 00393 R+=omegaR; 00394 Delta* deltas=SG_MALLOC(Delta, l); 00395 int32_t p=0; 00396 for(int32_t i=0;i<l;i++) 00397 { 00398 diff=Y[i]*(o_bar[i]-o[i]); 00399 00400 if(Y[i]*o[i]<1) 00401 { 00402 if(diff>0) 00403 { 00404 deltas[p].delta=(1-Y[i]*o[i])/diff; 00405 deltas[p].index=i; 00406 deltas[p].s=-1; 00407 p++; 00408 } 00409 } 00410 else 00411 { 00412 if(diff<0) 00413 { 00414 deltas[p].delta=(1-Y[i]*o[i])/diff; 00415 deltas[p].index=i; 00416 deltas[p].s=1; 00417 p++; 00418 } 00419 } 00420 } 00421 std::sort(deltas,deltas+p); 00422 float64_t delta_prime=0.0; 00423 for (int32_t i=0;i<p;i++) 00424 { 00425 delta_prime = L + deltas[i].delta*(R-L); 00426 if(delta_prime>=0) 00427 break; 00428 ii=deltas[i].index; 00429 diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]); 00430 L+=diff*(o[ii]-Y[ii]); 00431 R+=diff*(o_bar[ii]-Y[ii]); 00432 } 00433 SG_FREE(deltas); 00434 return (-L/(R-L)); 00435 } 00436 00437 int32_t TSVM_MFN( 00438 const struct data *Data, struct options *Options, 00439 struct vector_double *Weights, struct vector_double *Outputs) 00440 { 00441 /* Setup labeled-only examples and train L2_SVM_MFN */ 00442 struct data *Data_Labeled = SG_MALLOC(data, 1); 00443 struct vector_double *Outputs_Labeled = SG_MALLOC(vector_double, 1); 00444 initialize(Outputs_Labeled,Data->l,0.0); 00445 SG_SDEBUG("Initializing weights, unknown labels"); 00446 GetLabeledData(Data_Labeled,Data); /* gets labeled data and sets C=1/l */ 00447 L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0); 00449 /* Use this weight vector to classify R*u unlabeled examples as 00450 positive*/ 00451 int32_t p=0,q=0; 00452 float64_t t=0.0; 00453 int32_t *JU = SG_MALLOC(int32_t, Data->u); 00454 float64_t *ou = SG_MALLOC(float64_t, Data->u); 00455 float64_t lambda_0 = TSVM_LAMBDA_SMALL; 00456 for (int32_t i=0;i<Data->m;i++) 00457 { 00458 if(Data->Y[i]==0.0) 00459 { 00460 t=Data->features->dense_dot(i, Weights->vec, Data->n-1); 00461 t+=Options->bias*Weights->vec[Data->n-1]; //bias (modelled as last dim) 00462 00463 Outputs->vec[i]=t; 00464 Data->C[i]=lambda_0*1.0/Data->u; 00465 JU[q]=i; 00466 ou[q]=t; 00467 q++; 00468 } 00469 else 00470 { 00471 Outputs->vec[i]=Outputs_Labeled->vec[p]; 00472 Data->C[i]=1.0/Data->l; 00473 p++; 00474 } 00475 } 00476 std::nth_element(ou,ou+int32_t((1-Options->R)*Data->u-1),ou+Data->u); 00477 float64_t thresh=*(ou+int32_t((1-Options->R)*Data->u)-1); 00478 SG_FREE(ou); 00479 for (int32_t i=0;i<Data->u;i++) 00480 { 00481 if(Outputs->vec[JU[i]]>thresh) 00482 Data->Y[JU[i]]=1.0; 00483 else 00484 Data->Y[JU[i]]=-1.0; 00485 } 00486 for (int32_t i=0;i<Data->n;i++) 00487 Weights->vec[i]=0.0; 00488 for (int32_t i=0;i<Data->m;i++) 00489 Outputs->vec[i]=0.0; 00490 L2_SVM_MFN(Data,Options,Weights,Outputs,0); 00491 int32_t num_switches=0; 00492 int32_t s=0; 00493 int32_t last_round=0; 00494 while(lambda_0 <= Options->lambda_u) 00495 { 00496 int32_t iter2=0; 00497 while(1){ 00498 s=switch_labels(Data->Y,Outputs->vec,JU,Data->u,Options->S); 00499 if(s==0) break; 00500 iter2++; 00501 SG_SDEBUG("****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2); 00502 SG_SDEBUG("Optimizing unknown labels. switched %d labels.\n"); 00503 num_switches+=s; 00504 SG_SDEBUG("Optimizing weights\n"); 00505 L2_SVM_MFN(Data,Options,Weights,Outputs,1); 00506 } 00507 if(last_round==1) break; 00508 lambda_0=TSVM_ANNEALING_RATE*lambda_0; 00509 if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;} 00510 for (int32_t i=0;i<Data->u;i++) 00511 Data->C[JU[i]]=lambda_0*1.0/Data->u; 00512 SG_SDEBUG("****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u); 00513 SG_SDEBUG("Optimizing weights\n"); 00514 L2_SVM_MFN(Data,Options,Weights,Outputs,1); 00515 } 00516 SG_SDEBUG("Total Number of Switches = %d\n", num_switches); 00517 /* reset labels */ 00518 for (int32_t i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0; 00519 float64_t F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u); 00520 SG_SDEBUG("Objective Value = %f\n",F); 00521 delete [] JU; 00522 return num_switches; 00523 } 00524 00525 int32_t switch_labels(float64_t* Y, float64_t* o, int32_t* JU, int32_t u, int32_t S) 00526 { 00527 int32_t npos=0; 00528 int32_t nneg=0; 00529 for (int32_t i=0;i<u;i++) 00530 { 00531 if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++; 00532 if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++; 00533 } 00534 Delta* positive=SG_MALLOC(Delta, npos); 00535 Delta* negative=SG_MALLOC(Delta, nneg); 00536 int32_t p=0; 00537 int32_t n=0; 00538 int32_t ii=0; 00539 for (int32_t i=0;i<u;i++) 00540 { 00541 ii=JU[i]; 00542 if((Y[ii]>0.0) && (o[ii]<1.0)) { 00543 positive[p].delta=o[ii]; 00544 positive[p].index=ii; 00545 positive[p].s=0; 00546 p++;}; 00547 if((Y[ii]<0.0) && (-o[ii]<1.0)) 00548 { 00549 negative[n].delta=-o[ii]; 00550 negative[n].index=ii; 00551 negative[n].s=0; 00552 n++;}; 00553 } 00554 std::sort(positive,positive+npos); 00555 std::sort(negative,negative+nneg); 00556 int32_t s=-1; 00557 while(1) 00558 { 00559 s++; 00560 if((s>=S) || (positive[s].delta>=-negative[s].delta) || (s>=npos) || (s>=nneg)) 00561 break; 00562 Y[positive[s].index]=-1.0; 00563 Y[negative[s].index]= 1.0; 00564 } 00565 SG_FREE(positive); 00566 SG_FREE(negative); 00567 return s; 00568 } 00569 00570 int32_t DA_S3VM( 00571 struct data *Data, struct options *Options, struct vector_double *Weights, 00572 struct vector_double *Outputs) 00573 { 00574 float64_t T = DA_INIT_TEMP*Options->lambda_u; 00575 int32_t iter1 = 0, iter2 =0; 00576 float64_t *p = SG_MALLOC(float64_t, Data->u); 00577 float64_t *q = SG_MALLOC(float64_t, Data->u); 00578 float64_t *g = SG_MALLOC(float64_t, Data->u); 00579 float64_t F,F_min; 00580 float64_t *w_min = SG_MALLOC(float64_t, Data->n); 00581 float64_t *o_min = SG_MALLOC(float64_t, Data->m); 00582 float64_t *w = Weights->vec; 00583 float64_t *o = Outputs->vec; 00584 float64_t kl_divergence = 1.0; 00585 /*initialize */ 00586 SG_SDEBUG("Initializing weights, p"); 00587 for (int32_t i=0;i<Data->u; i++) 00588 p[i] = Options->R; 00589 /* record which examples are unlabeled */ 00590 int32_t *JU = SG_MALLOC(int32_t, Data->u); 00591 int32_t j=0; 00592 for(int32_t i=0;i<Data->m;i++) 00593 { 00594 if(Data->Y[i]==0.0) 00595 {JU[j]=i;j++;} 00596 } 00597 float64_t H = entropy(p,Data->u); 00598 optimize_w(Data,p,Options,Weights,Outputs,0); 00599 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u); 00600 F_min = F; 00601 for (int32_t i=0;i<Weights->d;i++) 00602 w_min[i]=w[i]; 00603 for (int32_t i=0;i<Outputs->d;i++) 00604 o_min[i]=o[i]; 00605 while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon)) 00606 { 00607 iter1++; 00608 iter2=0; 00609 kl_divergence=1.0; 00610 while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon)) 00611 { 00612 iter2++; 00613 for (int32_t i=0;i<Data->u;i++) 00614 { 00615 q[i]=p[i]; 00616 g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]]))); 00617 } 00618 SG_SDEBUG("Optimizing p.\n"); 00619 optimize_p(g,Data->u,T,Options->R,p); 00620 kl_divergence=KL(p,q,Data->u); 00621 SG_SDEBUG("Optimizing weights\n"); 00622 optimize_w(Data,p,Options,Weights,Outputs,1); 00623 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u); 00624 if(F < F_min) 00625 { 00626 F_min = F; 00627 for (int32_t i=0;i<Weights->d;i++) 00628 w_min[i]=w[i]; 00629 for (int32_t i=0;i<Outputs->d;i++) 00630 o_min[i]=o[i]; 00631 } 00632 SG_SDEBUG("***** outer_iter = %d T = %g inner_iter = %d kl = %g cost = %g *****\n",iter1,T,iter2,kl_divergence,F); 00633 } 00634 H = entropy(p,Data->u); 00635 SG_SDEBUG("***** Finished outer_iter = %d T = %g Entropy = %g ***\n", iter1,T,H); 00636 T = T/DA_ANNEALING_RATE; 00637 } 00638 for (int32_t i=0;i<Weights->d;i++) 00639 w[i]=w_min[i]; 00640 for (int32_t i=0;i<Outputs->d;i++) 00641 o[i]=o_min[i]; 00642 /* may want to reset the original Y */ 00643 SG_FREE(p); 00644 SG_FREE(q); 00645 SG_FREE(g); 00646 SG_FREE(JU); 00647 SG_FREE(w_min); 00648 SG_FREE(o_min); 00649 SG_SINFO("(min) Objective Value = %f", F_min); 00650 return 1; 00651 } 00652 00653 int32_t optimize_w( 00654 const struct data *Data, const float64_t *p, struct options *Options, 00655 struct vector_double *Weights, struct vector_double *Outputs, int32_t ini) 00656 { 00657 int32_t i,j; 00658 CDotFeatures* features=Data->features; 00659 int32_t n = Data->n; 00660 int32_t m = Data->m; 00661 int32_t u = Data->u; 00662 float64_t lambda = Options->lambda; 00663 float64_t epsilon; 00664 float64_t *w = Weights->vec; 00665 float64_t *o = SG_MALLOC(float64_t, m+u); 00666 float64_t *Y = SG_MALLOC(float64_t, m+u); 00667 float64_t *C = SG_MALLOC(float64_t, m+u); 00668 int32_t *labeled_indices = SG_MALLOC(int32_t, m); 00669 float64_t F_old = 0.0; 00670 float64_t F = 0.0; 00671 float64_t diff=0.0; 00672 float64_t lambda_u_by_u = Options->lambda_u/u; 00673 vector_int *ActiveSubset = SG_MALLOC(vector_int, 1); 00674 ActiveSubset->vec = SG_MALLOC(int32_t, m); 00675 ActiveSubset->d = m; 00676 // initialize 00677 if(ini==0) 00678 { 00679 epsilon=BIG_EPSILON; 00680 Options->cgitermax=SMALL_CGITERMAX; 00681 Options->epsilon=BIG_EPSILON;} 00682 else {epsilon = Options->epsilon;} 00683 00684 for(i=0;i<n;i++) F+=w[i]*w[i]; 00685 F=lambda*F; 00686 int32_t active=0; 00687 int32_t inactive=m-1; // l-1 00688 float64_t temp1; 00689 float64_t temp2; 00690 00691 j = 0; 00692 for(i=0; i<m ; i++) 00693 { 00694 o[i]=Outputs->vec[i]; 00695 if(Data->Y[i]==0.0) 00696 { 00697 labeled_indices[i]=0; 00698 o[m+j]=o[i]; 00699 Y[i]=1; 00700 Y[m+j]=-1; 00701 C[i]=lambda_u_by_u*p[j]; 00702 C[m+j]=lambda_u_by_u*(1-p[j]); 00703 ActiveSubset->vec[active]=i; 00704 active++; 00705 diff = 1 - CMath::abs(o[i]); 00706 if(diff>0) 00707 { 00708 Data->Y[i] = 2*p[j]-1; 00709 Data->C[i] = lambda_u_by_u; 00710 temp1 = (1 - o[i]); 00711 temp2 = (1 + o[i]); 00712 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2); 00713 } 00714 else 00715 { 00716 if(o[i]>0) 00717 { 00718 Data->Y[i] = -1.0; 00719 Data->C[i] = C[m+j]; 00720 } 00721 else 00722 { 00723 Data->Y[i] = 1.0; 00724 Data->C[i] = C[i]; 00725 } 00726 temp1 = (1-Data->Y[i]*o[i]); 00727 F+= Data->C[i]*temp1*temp1; 00728 } 00729 j++; 00730 } 00731 else 00732 { 00733 labeled_indices[i]=1; 00734 Y[i]=Data->Y[i]; 00735 C[i]=1.0/Data->l; 00736 Data->C[i]=1.0/Data->l; 00737 diff=1-Data->Y[i]*o[i]; 00738 if(diff>0) 00739 { 00740 ActiveSubset->vec[active]=i; 00741 active++; 00742 F+=Data->C[i]*diff*diff; 00743 } 00744 else 00745 { 00746 ActiveSubset->vec[inactive]=i; 00747 inactive--; 00748 } 00749 } 00750 } 00751 F=0.5*F; 00752 ActiveSubset->d=active; 00753 int32_t iter=0; 00754 int32_t opt=0; 00755 int32_t opt2=0; 00756 vector_double *Weights_bar = SG_MALLOC(vector_double, 1); 00757 vector_double *Outputs_bar = SG_MALLOC(vector_double, 1); 00758 float64_t *w_bar = SG_MALLOC(float64_t, n); 00759 float64_t *o_bar = SG_MALLOC(float64_t, m+u); 00760 Weights_bar->vec=w_bar; 00761 Outputs_bar->vec=o_bar; 00762 Weights_bar->d=n; 00763 Outputs_bar->d=m; /* read only the top m ; bottom u will be copies */ 00764 float64_t delta=0.0; 00765 float64_t t=0.0; 00766 int32_t ii = 0; 00767 while(iter<MFNITERMAX) 00768 { 00769 iter++; 00770 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)", iter, active, F); 00771 for(i=n; i-- ;) 00772 w_bar[i]=w[i]; 00773 00774 for(i=m+u; i-- ;) 00775 o_bar[i]=o[i]; 00776 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar); 00777 for(i=active; i < m; i++) 00778 { 00779 ii=ActiveSubset->vec[i]; 00780 t=features->dense_dot(ii, w_bar, n-1); 00781 t+=Options->bias*w_bar[n-1]; //bias (modelled as last dim) 00782 00783 o_bar[ii]=t; 00784 } 00785 // make o_bar consistent in the bottom half 00786 j=0; 00787 for(i=0; i<m;i++) 00788 { 00789 if(labeled_indices[i]==0) 00790 {o_bar[m+j]=o_bar[i]; j++;}; 00791 } 00792 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;}; 00793 opt2=1; 00794 for(i=0; i < m ;i++) 00795 { 00796 ii=ActiveSubset->vec[i]; 00797 if(i<active) 00798 { 00799 if(labeled_indices[ii]==1) 00800 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+epsilon)); 00801 else 00802 { 00803 if(CMath::abs(o[ii])<1) 00804 opt2=(opt2 && (CMath::abs(o_bar[ii])<=1+epsilon)); 00805 else 00806 opt2=(opt2 && (CMath::abs(o_bar[ii])>=1-epsilon)); 00807 } 00808 } 00809 else 00810 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-epsilon)); 00811 if(opt2==0) break; 00812 } 00813 if(opt && opt2) // l 00814 { 00815 if(epsilon==BIG_EPSILON) 00816 { 00817 epsilon=EPSILON; 00818 Options->epsilon=EPSILON; 00819 SG_SDEBUG( "epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON); 00820 continue; 00821 } 00822 else 00823 { 00824 for(i=n; i-- ;) 00825 w[i]=w_bar[i]; 00826 for(i=m; i-- ;) 00827 Outputs->vec[i]=o_bar[i]; 00828 for(i=m; i-- ;) 00829 { 00830 if(labeled_indices[i]==0) 00831 Data->Y[i]=0.0; 00832 } 00833 SG_FREE(ActiveSubset->vec); 00834 SG_FREE(ActiveSubset); 00835 SG_FREE(o_bar); 00836 SG_FREE(w_bar); 00837 SG_FREE(o); 00838 SG_FREE(Weights_bar); 00839 SG_FREE(Outputs_bar); 00840 SG_FREE(Y); 00841 SG_FREE(C); 00842 SG_FREE(labeled_indices); 00843 SG_SINFO("L2_SVM_MFN converged in %d iteration(s)", iter); 00844 return 1; 00845 } 00846 } 00847 00848 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u); 00849 SG_SDEBUG("LINE_SEARCH delta = %f", delta); 00850 F_old=F; 00851 F=0.0; 00852 for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]); F+=w[i]*w[i];} 00853 F=lambda*F; 00854 j=0; 00855 active=0; 00856 inactive=m-1; 00857 for(i=0; i<m ; i++) 00858 { 00859 o[i]+=delta*(o_bar[i]-o[i]); 00860 if(labeled_indices[i]==0) 00861 { 00862 o[m+j]=o[i]; 00863 ActiveSubset->vec[active]=i; 00864 active++; 00865 diff = 1 - CMath::abs(o[i]); 00866 if(diff>0) 00867 { 00868 Data->Y[i] = 2*p[j]-1; 00869 Data->C[i] = lambda_u_by_u; 00870 temp1 = (1 - o[i]); 00871 temp2 = (1 + o[i]); 00872 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2); 00873 } 00874 else 00875 { 00876 if(o[i]>0) 00877 { 00878 Data->Y[i] = -1; 00879 Data->C[i] = C[m+j]; 00880 } 00881 else 00882 { 00883 Data->Y[i] = +1; 00884 Data->C[i] = C[i]; 00885 } 00886 temp1=(1-Data->Y[i]*o[i]); 00887 F+= Data->C[i]*temp1*temp1; 00888 } 00889 j++; 00890 } 00891 else 00892 { 00893 diff=1-Data->Y[i]*o[i]; 00894 if(diff>0) 00895 { 00896 ActiveSubset->vec[active]=i; 00897 active++; 00898 F+=Data->C[i]*diff*diff; 00899 } 00900 else 00901 { 00902 ActiveSubset->vec[inactive]=i; 00903 inactive--; 00904 } 00905 } 00906 } 00907 F=0.5*F; 00908 ActiveSubset->d=active; 00909 if(CMath::abs(F-F_old)<EPSILON) 00910 break; 00911 } 00912 for(i=m; i-- ;) 00913 { 00914 Outputs->vec[i]=o[i]; 00915 if(labeled_indices[i]==0) 00916 Data->Y[i]=0.0; 00917 } 00918 SG_FREE(ActiveSubset->vec); 00919 SG_FREE(labeled_indices); 00920 SG_FREE(ActiveSubset); 00921 SG_FREE(o_bar); 00922 SG_FREE(w_bar); 00923 SG_FREE(o); 00924 SG_FREE(Weights_bar); 00925 SG_FREE(Outputs_bar); 00926 SG_FREE(Y); 00927 SG_FREE(C); 00928 SG_SINFO("L2_SVM_MFN converged in %d iterations", iter); 00929 return 0; 00930 } 00931 00932 void optimize_p( 00933 const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t* p) 00934 { 00935 int32_t iter=0; 00936 float64_t epsilon=1e-10; 00937 int32_t maxiter=500; 00938 float64_t nu_minus=g[0]; 00939 float64_t nu_plus=g[0]; 00940 for (int32_t i=0;i<u;i++) 00941 { 00942 if(g[i]<nu_minus) nu_minus=g[i]; 00943 if(g[i]>nu_plus) nu_plus=g[i]; 00944 }; 00945 00946 float64_t b=T*log((1-r)/r); 00947 nu_minus-=b; 00948 nu_plus-=b; 00949 float64_t nu=(nu_plus+nu_minus)/2; 00950 float64_t Bnu=0.0; 00951 float64_t BnuPrime=0.0; 00952 float64_t s=0.0; 00953 float64_t tmp=0.0; 00954 for (int32_t i=0;i<u;i++) 00955 { 00956 s=exp((g[i]-nu)/T); 00957 if(!(CMath::is_infinity(s))) 00958 { 00959 tmp=1.0/(1.0+s); 00960 Bnu+=tmp; 00961 BnuPrime+=s*tmp*tmp; 00962 } 00963 } 00964 Bnu=Bnu/u; 00965 Bnu-=r; 00966 BnuPrime=BnuPrime/(T*u); 00967 float64_t nuHat=0.0; 00968 while((CMath::abs(Bnu)>epsilon) && (iter < maxiter)) 00969 { 00970 iter++; 00971 if(CMath::abs(BnuPrime)>0.0) 00972 nuHat=nu-Bnu/BnuPrime; 00973 if((CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus) | (nuHat < nu_minus)) 00974 nu=(nu_minus+nu_plus)/2.0; 00975 else 00976 nu=nuHat; 00977 Bnu=0.0; 00978 BnuPrime=0.0; 00979 for(int32_t i=0;i<u;i++) 00980 { 00981 s=exp((g[i]-nu)/T); 00982 if(!(CMath::is_infinity(s))) 00983 { 00984 tmp=1.0/(1.0+s); 00985 Bnu+=tmp; 00986 BnuPrime+=s*tmp*tmp; 00987 } 00988 } 00989 Bnu=Bnu/u; 00990 Bnu-=r; 00991 BnuPrime=BnuPrime/(T*u); 00992 if(Bnu<0) 00993 nu_minus=nu; 00994 else 00995 nu_plus=nu; 00996 if(CMath::abs(nu_minus-nu_plus)<epsilon) 00997 break; 00998 } 00999 if(CMath::abs(Bnu)>epsilon) 01000 SG_SWARNING("Warning (Root): root not found to required precision\n"); 01001 01002 for (int32_t i=0;i<u;i++) 01003 { 01004 s=exp((g[i]-nu)/T); 01005 if(CMath::is_infinity(s)) p[i]=0.0; 01006 else p[i]=1.0/(1.0+s); 01007 } 01008 SG_SINFO(" root (nu) = %f B(nu) = %f", nu, Bnu); 01009 } 01010 01011 float64_t transductive_cost( 01012 float64_t normWeights, float64_t *Y, float64_t *Outputs, int32_t m, 01013 float64_t lambda, float64_t lambda_u) 01014 { 01015 float64_t F1=0.0,F2=0.0, o=0.0, y=0.0; 01016 int32_t u=0,l=0; 01017 for (int32_t i=0;i<m;i++) 01018 { 01019 o=Outputs[i]; 01020 y=Y[i]; 01021 if(y==0.0) 01022 {F1 += CMath::abs(o) > 1 ? 0 : (1 - CMath::abs(o))*(1 - CMath::abs(o)); u++;} 01023 else 01024 {F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;} 01025 } 01026 float64_t F; 01027 F = 0.5*(lambda*normWeights + lambda_u*F1/u + F2/l); 01028 return F; 01029 } 01030 01031 float64_t entropy(const float64_t *p, int32_t u) 01032 { 01033 float64_t h=0.0; 01034 float64_t q=0.0; 01035 for (int32_t i=0;i<u;i++) 01036 { 01037 q=p[i]; 01038 if(q>0 && q<1) 01039 h+= -(q*CMath::log2(q) + (1-q)*CMath::log2(1-q)); 01040 } 01041 return h/u; 01042 } 01043 01044 float64_t KL(const float64_t *p, const float64_t *q, int32_t u) 01045 { 01046 float64_t h=0.0; 01047 float64_t p1=0.0; 01048 float64_t q1=0.0; 01049 float64_t g=0.0; 01050 for (int32_t i=0;i<u;i++) 01051 { 01052 p1=p[i]; 01053 q1=q[i]; 01054 if(p1>1-1e-8) p1-=1e-8; 01055 if(p1<1-1e-8) p1+=1e-8; 01056 if(q1>1-1e-8) q1-=1e-8; 01057 if(q1<1-1e-8) q1+=1e-8; 01058 g= (p1*CMath::log2(p1/q1) + (1-p1)*CMath::log2((1-p1)/(1-q1))); 01059 if(CMath::abs(g)<1e-12 || CMath::is_nan(g)) g=0.0; 01060 h+=g; 01061 } 01062 return h/u; 01063 } 01064 01065 /********************** UTILITIES ********************/ 01066 float64_t norm_square(const vector_double *A) 01067 { 01068 float64_t x=0.0, t=0.0; 01069 for(int32_t i=0;i<A->d;i++) 01070 { 01071 t=A->vec[i]; 01072 x+=t*t; 01073 } 01074 return x; 01075 } 01076 01077 void initialize(struct vector_double *A, int32_t k, float64_t a) 01078 { 01079 float64_t *vec = SG_MALLOC(float64_t, k); 01080 for (int32_t i=0;i<k;i++) 01081 vec[i]=a; 01082 A->vec = vec; 01083 A->d = k; 01084 return; 01085 } 01086 01087 void initialize(struct vector_int *A, int32_t k) 01088 { 01089 int32_t *vec = SG_MALLOC(int32_t, k); 01090 for(int32_t i=0;i<k;i++) 01091 vec[i]=i; 01092 A->vec = vec; 01093 A->d = k; 01094 return; 01095 } 01096 01097 void GetLabeledData(struct data *D, const struct data *Data) 01098 { 01099 /*FIXME 01100 int32_t *J = SG_MALLOC(int, Data->l); 01101 D->C = SG_MALLOC(float64_t, Data->l); 01102 D->Y = SG_MALLOC(float64_t, Data->l); 01103 int32_t nz=0; 01104 int32_t k=0; 01105 int32_t rowptrs_=Data->l; 01106 for(int32_t i=0;i<Data->m;i++) 01107 { 01108 if(Data->Y[i]!=0.0) 01109 { 01110 J[k]=i; 01111 D->Y[k]=Data->Y[i]; 01112 D->C[k]=1.0/Data->l; 01113 nz+=(Data->rowptr[i+1] - Data->rowptr[i]); 01114 k++; 01115 } 01116 } 01117 D->val = SG_MALLOC(float64_t, nz); 01118 D->colind = SG_MALLOC(int32_t, nz); 01119 D->rowptr = new int32_trowptrs_+1]; 01120 nz=0; 01121 for(int32_t i=0;i<Data->l;i++) 01122 { 01123 D->rowptr[i]=nz; 01124 for(int32_t j=Data->rowptr[J[i]];j<Data->rowptr[J[i]+1];j++) 01125 { 01126 D->val[nz] = Data->val[j]; 01127 D->colind[nz] = Data->colind[j]; 01128 nz++; 01129 } 01130 } 01131 D->rowptr[rowptrs_]=nz; 01132 D->nz=nz; 01133 D->l=Data->l; 01134 D->m=Data->l; 01135 D->n=Data->n; 01136 D->u=0; 01137 SG_FREE(J);*/ 01138 } 01139 }