SHOGUN
v2.0.0
|
00001 // -*- C++ -*- 00002 // Main functions of the LaRank algorithm for soving Multiclass SVM 00003 // Copyright (C) 2008- Antoine Bordes 00004 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg 00005 00006 // This library is free software; you can redistribute it and/or 00007 // modify it under the terms of the GNU Lesser General Public 00008 // License as published by the Free Software Foundation; either 00009 // version 2.1 of the License, or (at your option) any later version. 00010 // 00011 // This program is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 // 00016 // You should have received a copy of the GNU General Public License 00017 // aint64_t with this program; if not, write to the Free Software 00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00019 // 00020 /*********************************************************************** 00021 * 00022 * LUSH Lisp Universal Shell 00023 * Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI. 00024 * Includes parts of TL3: 00025 * Copyright (C) 1987-1999 Leon Bottou and Neuristique. 00026 * Includes selected parts of SN3.2: 00027 * Copyright (C) 1991-2001 AT&T Corp. 00028 * 00029 * This program is free software; you can redistribute it and/or modify 00030 * it under the terms of the GNU General Public License as published by 00031 * the Free Software Foundation; either version 2 of the License, or 00032 * (at your option) any later version. 00033 * 00034 * This program is distributed in the hope that it will be useful, 00035 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00036 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00037 * GNU General Public License for more details. 00038 * 00039 * You should have received a copy of the GNU General Public License 00040 * aint64_t with this program; if not, write to the Free Software 00041 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00042 * 00043 ***********************************************************************/ 00044 00045 /*********************************************************************** 00046 * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $ 00047 **********************************************************************/ 00048 00049 #include <vector> 00050 #include <algorithm> 00051 00052 #include <shogun/io/SGIO.h> 00053 #include <shogun/lib/Signal.h> 00054 #include <shogun/lib/Time.h> 00055 #include <shogun/mathematics/Math.h> 00056 #include <shogun/multiclass/LaRank.h> 00057 #include <shogun/multiclass/MulticlassOneVsRestStrategy.h> 00058 #include <shogun/kernel/Kernel.h> 00059 #include <shogun/labels/MulticlassLabels.h> 00060 00061 using namespace shogun; 00062 00063 namespace shogun 00064 { 00065 static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc) 00066 { 00067 larank_kcache_t *self; 00068 self = SG_CALLOC (larank_kcache_t, 1); 00069 self->l = 0; 00070 self->maxrowlen = 0; 00071 self->func = kernelfunc; 00072 self->prevbuddy = self; 00073 self->nextbuddy = self; 00074 self->cursize = sizeof (larank_kcache_t); 00075 self->maxsize = 256 * 1024 * 1024; 00076 self->qprev = SG_MALLOC(int32_t, 1); 00077 self->qnext = SG_MALLOC(int32_t, 1); 00078 self->rnext = self->qnext + 1; 00079 self->rprev = self->qprev + 1; 00080 self->rprev[-1] = -1; 00081 self->rnext[-1] = -1; 00082 return self; 00083 } 00084 00085 static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen) 00086 { 00087 int32_t olen = self->rsize[k]; 00088 if (nlen < olen) 00089 { 00090 float32_t *ndata; 00091 float32_t *odata = self->rdata[k]; 00092 if (nlen > 0) 00093 { 00094 ndata = SG_MALLOC(float32_t, nlen); 00095 memcpy (ndata, odata, nlen * sizeof (float32_t)); 00096 } 00097 else 00098 { 00099 ndata = 0; 00100 self->rnext[self->rprev[k]] = self->rnext[k]; 00101 self->rprev[self->rnext[k]] = self->rprev[k]; 00102 self->rnext[k] = self->rprev[k] = k; 00103 } 00104 SG_FREE (odata); 00105 self->rdata[k] = ndata; 00106 self->rsize[k] = nlen; 00107 self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t); 00108 } 00109 } 00110 00111 static void xpurge (larank_kcache_t * self) 00112 { 00113 if (self->cursize > self->maxsize) 00114 { 00115 int32_t k = self->rprev[-1]; 00116 while (self->cursize > self->maxsize && k != self->rnext[-1]) 00117 { 00118 int32_t pk = self->rprev[k]; 00119 xtruncate (self, k, 0); 00120 k = pk; 00121 } 00122 } 00123 } 00124 00125 static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries) 00126 { 00127 ASSERT (self); 00128 ASSERT (entries > 0); 00129 self->maxsize = entries; 00130 xpurge (self); 00131 } 00132 00133 static void larank_kcache_destroy (larank_kcache_t * self) 00134 { 00135 if (self) 00136 { 00137 int32_t i; 00138 larank_kcache_t *nb = self->nextbuddy; 00139 larank_kcache_t *pb = self->prevbuddy; 00140 pb->nextbuddy = nb; 00141 nb->prevbuddy = pb; 00142 /* delete */ 00143 if (self->i2r) 00144 SG_FREE (self->i2r); 00145 if (self->r2i) 00146 SG_FREE (self->r2i); 00147 if (self->rdata) 00148 for (i = 0; i < self->l; i++) 00149 if (self->rdata[i]) 00150 SG_FREE (self->rdata[i]); 00151 if (self->rdata) 00152 SG_FREE (self->rdata); 00153 if (self->rsize) 00154 SG_FREE (self->rsize); 00155 if (self->rdiag) 00156 SG_FREE (self->rdiag); 00157 if (self->qnext) 00158 SG_FREE (self->qnext); 00159 if (self->qprev) 00160 SG_FREE (self->qprev); 00161 memset (self, 0, sizeof (larank_kcache_t)); 00162 SG_FREE (self); 00163 } 00164 } 00165 00166 static void xminsize (larank_kcache_t * self, int32_t n) 00167 { 00168 int32_t ol = self->l; 00169 if (n > ol) 00170 { 00171 int32_t i; 00172 int32_t nl = CMath::max (256, ol); 00173 while (nl < n) 00174 nl = nl + nl; 00175 self->i2r = SG_REALLOC (int32_t, self->i2r, nl); 00176 self->r2i = SG_REALLOC (int32_t, self->r2i, nl); 00177 self->rsize = SG_REALLOC (int32_t, self->rsize, nl); 00178 self->qnext = SG_REALLOC (int32_t, self->qnext, (1 + nl)); 00179 self->qprev = SG_REALLOC (int32_t, self->qprev, (1 + nl)); 00180 self->rdiag = SG_REALLOC (float32_t, self->rdiag, nl); 00181 self->rdata = SG_REALLOC (float32_t*, self->rdata, nl); 00182 self->rnext = self->qnext + 1; 00183 self->rprev = self->qprev + 1; 00184 for (i = ol; i < nl; i++) 00185 { 00186 self->i2r[i] = i; 00187 self->r2i[i] = i; 00188 self->rsize[i] = -1; 00189 self->rnext[i] = i; 00190 self->rprev[i] = i; 00191 self->rdata[i] = 0; 00192 } 00193 self->l = nl; 00194 } 00195 } 00196 00197 static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n) 00198 { 00199 xminsize (self, n); 00200 return self->r2i; 00201 } 00202 00203 static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen) 00204 { 00205 int32_t olen = self->rsize[k]; 00206 if (nlen > olen) 00207 { 00208 float32_t *ndata = SG_MALLOC(float32_t, nlen); 00209 if (olen > 0) 00210 { 00211 float32_t *odata = self->rdata[k]; 00212 memcpy (ndata, odata, olen * sizeof (float32_t)); 00213 SG_FREE (odata); 00214 } 00215 self->rdata[k] = ndata; 00216 self->rsize[k] = nlen; 00217 self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t); 00218 self->maxrowlen = CMath::max (self->maxrowlen, nlen); 00219 } 00220 } 00221 00222 static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2) 00223 { 00224 /* swap row data */ 00225 if (r1 < self->maxrowlen || r2 < self->maxrowlen) 00226 { 00227 int32_t mrl = 0; 00228 int32_t k = self->rnext[-1]; 00229 while (k >= 0) 00230 { 00231 int32_t nk = self->rnext[k]; 00232 int32_t n = self->rsize[k]; 00233 int32_t rr = self->i2r[k]; 00234 float32_t *d = self->rdata[k]; 00235 if (r1 < n) 00236 { 00237 if (r2 < n) 00238 { 00239 float32_t t1 = d[r1]; 00240 float32_t t2 = d[r2]; 00241 d[r1] = t2; 00242 d[r2] = t1; 00243 } 00244 else if (rr == r2) 00245 { 00246 d[r1] = self->rdiag[k]; 00247 } 00248 else 00249 { 00250 int32_t arsize = self->rsize[i2]; 00251 if (rr < arsize && rr != r1) 00252 d[r1] = self->rdata[i2][rr]; 00253 else 00254 xtruncate (self, k, r1); 00255 } 00256 } 00257 else if (r2 < n) 00258 { 00259 if (rr == r1) 00260 { 00261 d[r2] = self->rdiag[k]; 00262 } 00263 else 00264 { 00265 int32_t arsize = self->rsize[i1]; 00266 if (rr < arsize && rr != r2) 00267 d[r2] = self->rdata[i1][rr]; 00268 else 00269 xtruncate (self, k, r2); 00270 } 00271 } 00272 mrl = CMath::max (mrl, self->rsize[k]); 00273 k = nk; 00274 } 00275 self->maxrowlen = mrl; 00276 } 00277 /* swap r2i and i2r */ 00278 self->r2i[r1] = i2; 00279 self->r2i[r2] = i1; 00280 self->i2r[i1] = r2; 00281 self->i2r[i2] = r1; 00282 } 00283 00284 static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2) 00285 { 00286 xminsize (self, 1 + CMath::max(r1, r2)); 00287 xswap (self, self->r2i[r1], self->r2i[r2], r1, r2); 00288 } 00289 00290 static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2) 00291 { 00292 xminsize (self, 1 + CMath::max (r1, i2)); 00293 xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]); 00294 } 00295 00296 static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j) 00297 { 00298 /* search buddies */ 00299 larank_kcache_t *cache = self->nextbuddy; 00300 do 00301 { 00302 int32_t l = cache->l; 00303 if (i < l && j < l) 00304 { 00305 int32_t s = cache->rsize[i]; 00306 int32_t p = cache->i2r[j]; 00307 if (p < s) 00308 return cache->rdata[i][p]; 00309 if (i == j && s >= 0) 00310 return cache->rdiag[i]; 00311 p = cache->i2r[i]; 00312 s = cache->rsize[j]; 00313 if (p < s) 00314 return cache->rdata[j][p]; 00315 } 00316 cache = cache->nextbuddy; 00317 } 00318 while (cache != self); 00319 /* compute */ 00320 return self->func->kernel(i, j); 00321 } 00322 00323 00324 static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j) 00325 { 00326 ASSERT (self); 00327 ASSERT (i >= 0); 00328 ASSERT (j >= 0); 00329 return xquery (self, i, j); 00330 } 00331 00332 00333 static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy) 00334 { 00335 larank_kcache_t *p = self; 00336 larank_kcache_t *selflast = self->prevbuddy; 00337 larank_kcache_t *buddylast = buddy->prevbuddy; 00338 /* check functions are identical */ 00339 ASSERT (self->func == buddy->func); 00340 /* make sure we are not already buddies */ 00341 do 00342 { 00343 if (p == buddy) 00344 return; 00345 p = p->nextbuddy; 00346 } 00347 while (p != self); 00348 /* link */ 00349 selflast->nextbuddy = buddy; 00350 buddy->prevbuddy = selflast; 00351 buddylast->nextbuddy = self; 00352 self->prevbuddy = buddylast; 00353 } 00354 00355 00356 static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len) 00357 { 00358 ASSERT (i >= 0); 00359 if (i < self->l && len <= self->rsize[i]) 00360 { 00361 self->rnext[self->rprev[i]] = self->rnext[i]; 00362 self->rprev[self->rnext[i]] = self->rprev[i]; 00363 } 00364 else 00365 { 00366 int32_t olen, p; 00367 float32_t *d; 00368 if (i >= self->l || len >= self->l) 00369 xminsize (self, CMath::max (1 + i, len)); 00370 olen = self->rsize[i]; 00371 if (olen < len) 00372 { 00373 if (olen < 0) 00374 { 00375 self->rdiag[i] = self->func->kernel(i, i); 00376 olen = self->rsize[i] = 0; 00377 } 00378 xextend (self, i, len); 00379 d = self->rdata[i]; 00380 self->rsize[i] = olen; 00381 for (p = olen; p < len; p++) 00382 d[p] = larank_kcache_query (self, self->r2i[p], i); 00383 self->rsize[i] = len; 00384 } 00385 self->rnext[self->rprev[i]] = self->rnext[i]; 00386 self->rprev[self->rnext[i]] = self->rprev[i]; 00387 xpurge (self); 00388 } 00389 self->rprev[i] = -1; 00390 self->rnext[i] = self->rnext[-1]; 00391 self->rnext[self->rprev[i]] = i; 00392 self->rprev[self->rnext[i]] = i; 00393 return self->rdata[i]; 00394 } 00395 00396 } 00397 00398 00399 // Initializing an output class (basically creating a kernel cache for it) 00400 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache) 00401 { 00402 kernel = larank_kcache_create (kfunc); 00403 larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024); 00404 beta = SG_MALLOC(float32_t, 1); 00405 g = SG_MALLOC(float32_t, 1); 00406 *beta=0; 00407 *g=0; 00408 l = 0; 00409 } 00410 00411 // Destroying an output class (basically destroying the kernel cache) 00412 void LaRankOutput::destroy () 00413 { 00414 larank_kcache_destroy (kernel); 00415 kernel=NULL; 00416 SG_FREE(beta); 00417 SG_FREE(g); 00418 beta=NULL; 00419 g=NULL; 00420 } 00421 00422 // !Important! Computing the score of a given input vector for the actual output 00423 float64_t LaRankOutput::computeScore (int32_t x_id) 00424 { 00425 if (l == 0) 00426 return 0; 00427 else 00428 { 00429 float32_t *row = larank_kcache_query_row (kernel, x_id, l); 00430 return SGVector<float32_t>::dot (beta, row, l); 00431 } 00432 } 00433 00434 // !Important! Computing the gradient of a given input vector for the actual output 00435 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis) 00436 { 00437 return (yi == ythis ? 1 : 0) - computeScore (xi_id); 00438 } 00439 00440 // Updating the solution in the actual output 00441 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp) 00442 { 00443 int32_t *r2i = larank_kcache_r2i (kernel, l); 00444 int64_t xr = l + 1; 00445 for (int32_t r = 0; r < l; r++) 00446 if (r2i[r] == x_id) 00447 { 00448 xr = r; 00449 break; 00450 } 00451 00452 // updates the cache order and the beta coefficient 00453 if (xr < l) 00454 { 00455 beta[xr]+=lambda; 00456 } 00457 else 00458 { 00459 larank_kcache_swap_ri (kernel, l, x_id); 00460 SGVector<float32_t>::resize(g, l, l+1); 00461 SGVector<float32_t>::resize(beta, l, l+1); 00462 g[l]=gp; 00463 beta[l]=lambda; 00464 l++; 00465 } 00466 00467 // update stored gradients 00468 float32_t *row = larank_kcache_query_row (kernel, x_id, l); 00469 for (int32_t r = 0; r < l; r++) 00470 { 00471 float64_t oldg = g[r]; 00472 g[r]=oldg - lambda * row[r]; 00473 } 00474 } 00475 00476 // Linking the cahe of this output to the cache of an other "buddy" output 00477 // so that if a requested value is not found in this cache, you can ask your buddy if it has it. 00478 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud) 00479 { 00480 larank_kcache_set_buddy (bud, kernel); 00481 } 00482 00483 // Removing useless support vectors (for which beta=0) 00484 int32_t LaRankOutput::cleanup () 00485 { 00486 int32_t count = 0; 00487 std::vector < int32_t >idx; 00488 for (int32_t x = 0; x < l; x++) 00489 { 00490 if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON)) 00491 { 00492 idx.push_back (x); 00493 count++; 00494 } 00495 } 00496 int32_t new_l = l - count; 00497 for (int32_t xx = 0; xx < count; xx++) 00498 { 00499 int32_t i = idx[xx] - xx; 00500 for (int32_t r = i; r < (l - 1); r++) 00501 { 00502 larank_kcache_swap_rr (kernel, r, int64_t(r) + 1); 00503 beta[r]=beta[r + 1]; 00504 g[r]=g[r + 1]; 00505 } 00506 } 00507 SGVector<float32_t>::resize(beta, l, new_l+1); 00508 SGVector<float32_t>::resize(g, l, new_l+1); 00509 beta[new_l]=0; 00510 g[new_l]=0; 00511 l = new_l; 00512 return count; 00513 } 00514 00515 // --- Below are information or "get" functions --- // 00516 // 00517 float64_t LaRankOutput::getW2 () 00518 { 00519 float64_t sum = 0; 00520 int32_t *r2i = larank_kcache_r2i (kernel, l + 1); 00521 for (int32_t r = 0; r < l; r++) 00522 { 00523 float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l); 00524 sum += beta[r] * SGVector<float32_t>::dot (beta, row_r, l); 00525 } 00526 return sum; 00527 } 00528 00529 float64_t LaRankOutput::getKii (int32_t x_id) 00530 { 00531 return larank_kcache_query (kernel, x_id, x_id); 00532 } 00533 00534 // 00535 float64_t LaRankOutput::getBeta (int32_t x_id) 00536 { 00537 int32_t *r2i = larank_kcache_r2i (kernel, l); 00538 int32_t xr = -1; 00539 for (int32_t r = 0; r < l; r++) 00540 if (r2i[r] == x_id) 00541 { 00542 xr = r; 00543 break; 00544 } 00545 return (xr < 0 ? 0 : beta[xr]); 00546 } 00547 00548 // 00549 float64_t LaRankOutput::getGradient (int32_t x_id) 00550 { 00551 int32_t *r2i = larank_kcache_r2i (kernel, l); 00552 int32_t xr = -1; 00553 for (int32_t r = 0; r < l; r++) 00554 if (r2i[r] == x_id) 00555 { 00556 xr = r; 00557 break; 00558 } 00559 return (xr < 0 ? 0 : g[xr]); 00560 } 00561 bool LaRankOutput::isSupportVector (int32_t x_id) const 00562 { 00563 int32_t *r2i = larank_kcache_r2i (kernel, l); 00564 int32_t xr = -1; 00565 for (int32_t r = 0; r < l; r++) 00566 if (r2i[r] == x_id) 00567 { 00568 xr = r; 00569 break; 00570 } 00571 return (xr >= 0); 00572 } 00573 00574 // 00575 int32_t LaRankOutput::getSV (float32_t* &sv) const 00576 { 00577 sv=SG_MALLOC(float32_t, l); 00578 int32_t *r2i = larank_kcache_r2i (kernel, l); 00579 for (int32_t r = 0; r < l; r++) 00580 sv[r]=r2i[r]; 00581 return l; 00582 } 00583 00584 CLaRank::CLaRank (): CMulticlassSVM(new CMulticlassOneVsRestStrategy()), 00585 nb_seen_examples (0), nb_removed (0), 00586 n_pro (0), n_rep (0), n_opt (0), 00587 w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0), 00588 batch_mode(true), step(0) 00589 { 00590 } 00591 00592 CLaRank::CLaRank (float64_t C, CKernel* k, CLabels* lab): 00593 CMulticlassSVM(new CMulticlassOneVsRestStrategy(), C, k, lab), 00594 nb_seen_examples (0), nb_removed (0), 00595 n_pro (0), n_rep (0), n_opt (0), 00596 w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0), 00597 batch_mode(true), step(0) 00598 { 00599 } 00600 00601 CLaRank::~CLaRank () 00602 { 00603 destroy(); 00604 } 00605 00606 bool CLaRank::train_machine(CFeatures* data) 00607 { 00608 tau = 0.0001; 00609 00610 ASSERT(m_kernel); 00611 ASSERT(m_labels && m_labels->get_num_labels()); 00612 ASSERT(m_labels->get_label_type() == LT_MULTICLASS); 00613 00614 CSignal::clear_cancel(); 00615 00616 if (data) 00617 { 00618 if (data->get_num_vectors() != m_labels->get_num_labels()) 00619 { 00620 SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n", 00621 data->get_num_vectors(), m_labels->get_num_labels()); 00622 } 00623 m_kernel->init(data, data); 00624 } 00625 00626 ASSERT(m_kernel->get_num_vec_lhs() && m_kernel->get_num_vec_rhs()); 00627 00628 nb_train=m_labels->get_num_labels(); 00629 cache = m_kernel->get_cache_size(); 00630 00631 int32_t n_it = 1; 00632 float64_t gap = DBL_MAX; 00633 00634 SG_INFO("Training on %d examples\n", nb_train); 00635 while (gap > get_C() && (!CSignal::cancel_computations())) // stopping criteria 00636 { 00637 float64_t tr_err = 0; 00638 int32_t ind = step; 00639 for (int32_t i = 0; i < nb_train; i++) 00640 { 00641 int32_t y=((CMulticlassLabels*) m_labels)->get_label(i); 00642 if (add (i, y) != y) // call the add function 00643 tr_err++; 00644 00645 if (ind && i / ind) 00646 { 00647 SG_DEBUG("Done: %d %% Train error (online): %f%%\n", 00648 (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100); 00649 ind += step; 00650 } 00651 } 00652 00653 SG_DEBUG("End of iteration %d\n", n_it++); 00654 SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100); 00655 gap = computeGap (); 00656 SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(get_C()), 6); 00657 00658 if (!batch_mode) // skip stopping criteria if online mode 00659 gap = 0; 00660 } 00661 SG_DONE(); 00662 00663 int32_t num_classes = outputs.size(); 00664 create_multiclass_svm(num_classes); 00665 SG_DEBUG("%d classes\n", num_classes); 00666 00667 // Used for saving a model file 00668 int32_t i=0; 00669 for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it) 00670 { 00671 const LaRankOutput* o=&(it->second); 00672 00673 larank_kcache_t* k=o->getKernel(); 00674 int32_t l=o->get_l(); 00675 float32_t* beta=o->getBetas(); 00676 int32_t *r2i = larank_kcache_r2i (k, l); 00677 00678 ASSERT(l>0); 00679 SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0); 00680 00681 CSVM* svm=new CSVM(l); 00682 00683 for (int32_t j=0; j<l; j++) 00684 { 00685 svm->set_alpha(j, beta[j]); 00686 svm->set_support_vector(j, r2i[j]); 00687 } 00688 00689 svm->set_bias(0); 00690 set_svm(i, svm); 00691 i++; 00692 } 00693 destroy(); 00694 00695 return true; 00696 } 00697 00698 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule 00699 int32_t CLaRank::add (int32_t x_id, int32_t yi) 00700 { 00701 ++nb_seen_examples; 00702 // create a new output object if this one has never been seen before 00703 if (!getOutput (yi)) 00704 { 00705 outputs.insert (std::make_pair (yi, LaRankOutput ())); 00706 LaRankOutput *cur = getOutput (yi); 00707 cur->initialize (m_kernel, cache); 00708 if (outputs.size () == 1) 00709 y0 = outputs.begin ()->first; 00710 // link the cache of this new output to a buddy 00711 if (outputs.size () > 1) 00712 { 00713 LaRankOutput *out0 = getOutput (y0); 00714 cur->set_kernel_buddy (out0->getKernel ()); 00715 } 00716 } 00717 00718 LaRankPattern tpattern (x_id, yi); 00719 LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern; 00720 00721 // ProcessNew with the "fresh" pattern 00722 float64_t time1 = CTime::get_curtime(); 00723 process_return_t pro_ret = process (pattern, processNew); 00724 float64_t dual_increase = pro_ret.dual_increase; 00725 float64_t duration = (CTime::get_curtime() - time1); 00726 float64_t coeff = dual_increase / (0.00001 + duration); 00727 dual += dual_increase; 00728 n_pro++; 00729 w_pro = 0.05 * coeff + (1 - 0.05) * w_pro; 00730 00731 // ProcessOld & Optimize until ready for a new processnew 00732 // (Adaptative schedule here) 00733 for (;;) 00734 { 00735 float64_t w_sum = w_pro + w_rep + w_opt; 00736 float64_t prop_min = w_sum / 20; 00737 if (w_pro < prop_min) 00738 w_pro = prop_min; 00739 if (w_rep < prop_min) 00740 w_rep = prop_min; 00741 if (w_opt < prop_min) 00742 w_opt = prop_min; 00743 w_sum = w_pro + w_rep + w_opt; 00744 float64_t r = CMath::random(0.0, w_sum); 00745 if (r <= w_pro) 00746 { 00747 break; 00748 } 00749 else if ((r > w_pro) && (r <= w_pro + w_rep)) // ProcessOld here 00750 { 00751 float64_t ltime1 = CTime::get_curtime (); 00752 float64_t ldual_increase = reprocess (); 00753 float64_t lduration = (CTime::get_curtime () - ltime1); 00754 float64_t lcoeff = ldual_increase / (0.00001 + lduration); 00755 dual += ldual_increase; 00756 n_rep++; 00757 w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep; 00758 } 00759 else // Optimize here 00760 { 00761 float64_t ltime1 = CTime::get_curtime (); 00762 float64_t ldual_increase = optimize (); 00763 float64_t lduration = (CTime::get_curtime () - ltime1); 00764 float64_t lcoeff = ldual_increase / (0.00001 + lduration); 00765 dual += ldual_increase; 00766 n_opt++; 00767 w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt; 00768 } 00769 } 00770 if (nb_seen_examples % 100 == 0) // Cleanup useless Support Vectors/Patterns sometimes 00771 nb_removed += cleanup (); 00772 return pro_ret.ypred; 00773 } 00774 00775 // PREDICTION FUNCTION: main function in la_rank_classify 00776 int32_t CLaRank::predict (int32_t x_id) 00777 { 00778 int32_t res = -1; 00779 float64_t score_max = -DBL_MAX; 00780 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it) 00781 { 00782 float64_t score = it->second.computeScore (x_id); 00783 if (score > score_max) 00784 { 00785 score_max = score; 00786 res = it->first; 00787 } 00788 } 00789 return res; 00790 } 00791 00792 void CLaRank::destroy () 00793 { 00794 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it) 00795 it->second.destroy (); 00796 outputs.clear(); 00797 } 00798 00799 00800 // Compute Duality gap (costly but used in stopping criteria in batch mode) 00801 float64_t CLaRank::computeGap () 00802 { 00803 float64_t sum_sl = 0; 00804 float64_t sum_bi = 0; 00805 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00806 { 00807 const LaRankPattern & p = patterns[i]; 00808 if (!p.exists ()) 00809 continue; 00810 LaRankOutput *out = getOutput (p.y); 00811 if (!out) 00812 continue; 00813 sum_bi += out->getBeta (p.x_id); 00814 float64_t gi = out->computeGradient (p.x_id, p.y, p.y); 00815 float64_t gmin = DBL_MAX; 00816 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00817 { 00818 if (it->first != p.y && it->second.isSupportVector (p.x_id)) 00819 { 00820 float64_t g = 00821 it->second.computeGradient (p.x_id, p.y, it->first); 00822 if (g < gmin) 00823 gmin = g; 00824 } 00825 } 00826 sum_sl += CMath::max (0.0, gi - gmin); 00827 } 00828 return CMath::max (0.0, computeW2 () + get_C() * sum_sl - sum_bi); 00829 } 00830 00831 // Nuber of classes so far 00832 uint32_t CLaRank::getNumOutputs () const 00833 { 00834 return outputs.size (); 00835 } 00836 00837 // Number of Support Vectors 00838 int32_t CLaRank::getNSV () 00839 { 00840 int32_t res = 0; 00841 for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it) 00842 { 00843 float32_t* sv=NULL; 00844 res += it->second.getSV (sv); 00845 SG_FREE(sv); 00846 } 00847 return res; 00848 } 00849 00850 // Norm of the parameters vector 00851 float64_t CLaRank::computeW2 () 00852 { 00853 float64_t res = 0; 00854 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00855 { 00856 const LaRankPattern & p = patterns[i]; 00857 if (!p.exists ()) 00858 continue; 00859 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00860 if (it->second.getBeta (p.x_id)) 00861 res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id); 00862 } 00863 return res; 00864 } 00865 00866 // Compute Dual objective value 00867 float64_t CLaRank::getDual () 00868 { 00869 float64_t res = 0; 00870 for (uint32_t i = 0; i < patterns.maxcount (); ++i) 00871 { 00872 const LaRankPattern & p = patterns[i]; 00873 if (!p.exists ()) 00874 continue; 00875 LaRankOutput *out = getOutput (p.y); 00876 if (!out) 00877 continue; 00878 res += out->getBeta (p.x_id); 00879 } 00880 return res - computeW2 () / 2; 00881 } 00882 00883 LaRankOutput *CLaRank::getOutput (int32_t index) 00884 { 00885 outputhash_t::iterator it = outputs.find (index); 00886 return it == outputs.end ()? NULL : &it->second; 00887 } 00888 00889 // IMPORTANT Main SMO optimization step 00890 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype) 00891 { 00892 process_return_t pro_ret = process_return_t (0, 0); 00893 00894 /* 00895 ** compute gradient and sort 00896 */ 00897 std::vector < outputgradient_t > outputgradients; 00898 00899 outputgradients.reserve (getNumOutputs ()); 00900 00901 std::vector < outputgradient_t > outputscores; 00902 outputscores.reserve (getNumOutputs ()); 00903 00904 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 00905 { 00906 if (ptype != processOptimize 00907 || it->second.isSupportVector (pattern.x_id)) 00908 { 00909 float64_t g = 00910 it->second.computeGradient (pattern.x_id, pattern.y, it->first); 00911 outputgradients.push_back (outputgradient_t (it->first, g)); 00912 if (it->first == pattern.y) 00913 outputscores.push_back (outputgradient_t (it->first, (1 - g))); 00914 else 00915 outputscores.push_back (outputgradient_t (it->first, -g)); 00916 } 00917 } 00918 00919 std::sort (outputgradients.begin (), outputgradients.end ()); 00920 00921 /* 00922 ** determine the prediction 00923 */ 00924 std::sort (outputscores.begin (), outputscores.end ()); 00925 pro_ret.ypred = outputscores[0].output; 00926 00927 /* 00928 ** Find yp (1st part of the pair) 00929 */ 00930 outputgradient_t ygp; 00931 LaRankOutput *outp = NULL; 00932 uint32_t p; 00933 for (p = 0; p < outputgradients.size (); ++p) 00934 { 00935 outputgradient_t & current = outputgradients[p]; 00936 LaRankOutput *output = getOutput (current.output); 00937 bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id); 00938 bool goodclass = current.output == pattern.y; 00939 if ((!support && goodclass) || 00940 (support && output->getBeta (pattern.x_id) < (goodclass ? get_C() : 0))) 00941 { 00942 ygp = current; 00943 outp = output; 00944 break; 00945 } 00946 } 00947 if (p == outputgradients.size ()) 00948 return pro_ret; 00949 00950 /* 00951 ** Find ym (2nd part of the pair) 00952 */ 00953 outputgradient_t ygm; 00954 LaRankOutput *outm = NULL; 00955 int32_t m; 00956 for (m = outputgradients.size () - 1; m >= 0; --m) 00957 { 00958 outputgradient_t & current = outputgradients[m]; 00959 LaRankOutput *output = getOutput (current.output); 00960 bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id); 00961 bool goodclass = current.output == pattern.y; 00962 if (!goodclass || (support && output->getBeta (pattern.x_id) > 0)) 00963 { 00964 ygm = current; 00965 outm = output; 00966 break; 00967 } 00968 } 00969 if (m < 0) 00970 return pro_ret; 00971 00972 /* 00973 ** Throw or Insert pattern 00974 */ 00975 if ((ygp.gradient - ygm.gradient) < tau) 00976 return pro_ret; 00977 if (ptype == processNew) 00978 patterns.insert (pattern); 00979 00980 /* 00981 ** compute lambda and clip it 00982 */ 00983 float64_t kii = outp->getKii (pattern.x_id); 00984 float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii); 00985 if (ptype == processOptimize || outp->isSupportVector (pattern.x_id)) 00986 { 00987 float64_t beta = outp->getBeta (pattern.x_id); 00988 if (ygp.output == pattern.y) 00989 lambda = CMath::min (lambda, get_C() - beta); 00990 else 00991 lambda = CMath::min (lambda, fabs (beta)); 00992 } 00993 else 00994 lambda = CMath::min (lambda, get_C()); 00995 00996 /* 00997 ** update the solution 00998 */ 00999 outp->update (pattern.x_id, lambda, ygp.gradient); 01000 outm->update (pattern.x_id, -lambda, ygm.gradient); 01001 01002 pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii); 01003 return pro_ret; 01004 } 01005 01006 // ProcessOld 01007 float64_t CLaRank::reprocess () 01008 { 01009 if (patterns.size ()) 01010 { 01011 for (int32_t n = 0; n < 10; ++n) 01012 { 01013 process_return_t pro_ret = process (patterns.sample (), processOld); 01014 if (pro_ret.dual_increase) 01015 return pro_ret.dual_increase; 01016 } 01017 } 01018 return 0; 01019 } 01020 01021 // Optimize 01022 float64_t CLaRank::optimize () 01023 { 01024 float64_t dual_increase = 0; 01025 if (patterns.size ()) 01026 { 01027 for (int32_t n = 0; n < 10; ++n) 01028 { 01029 process_return_t pro_ret = 01030 process (patterns.sample(), processOptimize); 01031 dual_increase += pro_ret.dual_increase; 01032 } 01033 } 01034 return dual_increase; 01035 } 01036 01037 // remove patterns and return the number of patterns that were removed 01038 uint32_t CLaRank::cleanup () 01039 { 01040 /* 01041 for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it) 01042 it->second.cleanup (); 01043 01044 uint32_t res = 0; 01045 for (uint32_t i = 0; i < patterns.size (); ++i) 01046 { 01047 LaRankPattern & p = patterns[i]; 01048 if (p.exists () && !outputs[p.y].isSupportVector (p.x_id)) 01049 { 01050 patterns.remove (i); 01051 ++res; 01052 } 01053 } 01054 return res; 01055 */ 01056 return 0; 01057 }