SHOGUN
v2.0.0
|
00001 /* 00002 * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 00012 * 2. Redistributions in binary form must reproduce the above copyright 00013 * notice, this list of conditions and the following disclaimer in the 00014 * documentation and/or other materials provided with the distribution. 00015 * 00016 * 3. Neither name of copyright holders nor the names of its contributors 00017 * may be used to endorse or promote products derived from this software 00018 * without specific prior written permission. 00019 * 00020 * 00021 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00022 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00023 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00024 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 00025 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00026 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00027 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 * 00033 * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg 00034 */ 00035 00036 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00037 00038 #include <shogun/lib/memory.h> 00039 #include <shogun/lib/external/shogun_libsvm.h> 00040 #include <shogun/kernel/Kernel.h> 00041 #include <shogun/io/SGIO.h> 00042 #include <shogun/lib/Time.h> 00043 #include <shogun/lib/Signal.h> 00044 #include <shogun/lib/common.h> 00045 #include <shogun/mathematics/Math.h> 00046 00047 #include <stdio.h> 00048 #include <stdlib.h> 00049 #include <ctype.h> 00050 #include <float.h> 00051 #include <string.h> 00052 #include <stdarg.h> 00053 00054 #ifdef HAVE_PTHREAD 00055 #include <pthread.h> 00056 #endif 00057 00058 namespace shogun 00059 { 00060 00061 typedef KERNELCACHE_ELEM Qfloat; 00062 typedef float64_t schar; 00063 00064 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n) 00065 { 00066 dst = SG_MALLOC(T, n); 00067 memcpy((void *)dst,(void *)src,sizeof(T)*n); 00068 } 00069 #define INF HUGE_VAL 00070 #define TAU 1e-12 00071 00072 class QMatrix; 00073 class SVC_QMC; 00074 00075 // 00076 // Kernel Cache 00077 // 00078 // l is the number of total data items 00079 // size is the cache size limit in bytes 00080 // 00081 class Cache 00082 { 00083 public: 00084 Cache(int32_t l, int64_t size); 00085 ~Cache(); 00086 00087 // request data [0,len) 00088 // return some position p where [p,len) need to be filled 00089 // (p >= len if nothing needs to be filled) 00090 int32_t get_data(const int32_t index, Qfloat **data, int32_t len); 00091 void swap_index(int32_t i, int32_t j); // future_option 00092 00093 private: 00094 int32_t l; 00095 int64_t size; 00096 struct head_t 00097 { 00098 head_t *prev, *next; // a circular list 00099 Qfloat *data; 00100 int32_t len; // data[0,len) is cached in this entry 00101 }; 00102 00103 head_t *head; 00104 head_t lru_head; 00105 void lru_delete(head_t *h); 00106 void lru_insert(head_t *h); 00107 }; 00108 00109 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_) 00110 { 00111 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0 00112 size /= sizeof(Qfloat); 00113 size -= l * sizeof(head_t) / sizeof(Qfloat); 00114 size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns 00115 lru_head.next = lru_head.prev = &lru_head; 00116 } 00117 00118 Cache::~Cache() 00119 { 00120 for(head_t *h = lru_head.next; h != &lru_head; h=h->next) 00121 SG_FREE(h->data); 00122 SG_FREE(head); 00123 } 00124 00125 void Cache::lru_delete(head_t *h) 00126 { 00127 // delete from current location 00128 h->prev->next = h->next; 00129 h->next->prev = h->prev; 00130 } 00131 00132 void Cache::lru_insert(head_t *h) 00133 { 00134 // insert to last position 00135 h->next = &lru_head; 00136 h->prev = lru_head.prev; 00137 h->prev->next = h; 00138 h->next->prev = h; 00139 } 00140 00141 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len) 00142 { 00143 head_t *h = &head[index]; 00144 if(h->len) lru_delete(h); 00145 int32_t more = len - h->len; 00146 00147 if(more > 0) 00148 { 00149 // free old space 00150 while(size < more) 00151 { 00152 head_t *old = lru_head.next; 00153 lru_delete(old); 00154 SG_FREE(old->data); 00155 size += old->len; 00156 old->data = 0; 00157 old->len = 0; 00158 } 00159 00160 // allocate new space 00161 h->data = SG_REALLOC(Qfloat, h->data, len); 00162 size -= more; 00163 CMath::swap(h->len,len); 00164 } 00165 00166 lru_insert(h); 00167 *data = h->data; 00168 return len; 00169 } 00170 00171 void Cache::swap_index(int32_t i, int32_t j) 00172 { 00173 if(i==j) return; 00174 00175 if(head[i].len) lru_delete(&head[i]); 00176 if(head[j].len) lru_delete(&head[j]); 00177 CMath::swap(head[i].data,head[j].data); 00178 CMath::swap(head[i].len,head[j].len); 00179 if(head[i].len) lru_insert(&head[i]); 00180 if(head[j].len) lru_insert(&head[j]); 00181 00182 if(i>j) CMath::swap(i,j); 00183 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next) 00184 { 00185 if(h->len > i) 00186 { 00187 if(h->len > j) 00188 CMath::swap(h->data[i],h->data[j]); 00189 else 00190 { 00191 // give up 00192 lru_delete(h); 00193 SG_FREE(h->data); 00194 size += h->len; 00195 h->data = 0; 00196 h->len = 0; 00197 } 00198 } 00199 } 00200 } 00201 00202 // 00203 // Kernel evaluation 00204 // 00205 // the static method k_function is for doing single kernel evaluation 00206 // the constructor of Kernel prepares to calculate the l*l kernel matrix 00207 // the member function get_Q is for getting one column from the Q Matrix 00208 // 00209 class QMatrix { 00210 public: 00211 virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0; 00212 virtual Qfloat *get_QD() const = 0; 00213 virtual void swap_index(int32_t i, int32_t j) const = 0; 00214 virtual ~QMatrix() {} 00215 00216 float64_t max_train_time; 00217 }; 00218 00219 class LibSVMKernel; 00220 00221 // helper struct for threaded processing 00222 struct Q_THREAD_PARAM 00223 { 00224 int32_t i; 00225 int32_t start; 00226 int32_t end; 00227 Qfloat* data; 00228 float64_t* y; 00229 const LibSVMKernel* q; 00230 }; 00231 00232 extern Parallel* sg_parallel; 00233 00234 class LibSVMKernel: public QMatrix { 00235 public: 00236 LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param); 00237 virtual ~LibSVMKernel(); 00238 00239 virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0; 00240 virtual Qfloat *get_QD() const = 0; 00241 virtual void swap_index(int32_t i, int32_t j) const // no so const... 00242 { 00243 CMath::swap(x[i],x[j]); 00244 if(x_square) CMath::swap(x_square[i],x_square[j]); 00245 } 00246 00247 static void* compute_Q_parallel_helper(void* p) 00248 { 00249 Q_THREAD_PARAM* params= (Q_THREAD_PARAM*) p; 00250 int32_t i=params->i; 00251 int32_t start=params->start; 00252 int32_t end=params->end; 00253 float64_t* y=params->y; 00254 Qfloat* data=params->data; 00255 const LibSVMKernel* q=params->q; 00256 00257 if (y) // two class 00258 { 00259 for(int32_t j=start;j<end;j++) 00260 data[j] = (Qfloat) y[i]*y[j]*q->kernel_function(i,j); 00261 } 00262 else // one class, eps svr 00263 { 00264 for(int32_t j=start;j<end;j++) 00265 data[j] = (Qfloat) q->kernel_function(i,j); 00266 } 00267 00268 return NULL; 00269 } 00270 00271 void compute_Q_parallel(Qfloat* data, float64_t* lab, int32_t i, int32_t start, int32_t len) const 00272 { 00273 int32_t num_threads=sg_parallel->get_num_threads(); 00274 if (num_threads < 2) 00275 { 00276 Q_THREAD_PARAM params; 00277 params.i=i; 00278 params.start=start; 00279 params.end=len; 00280 params.y=lab; 00281 params.data=data; 00282 params.q=this; 00283 compute_Q_parallel_helper((void*) ¶ms); 00284 } 00285 else 00286 { 00287 #ifdef HAVE_PTHREAD 00288 int32_t total_num=(len-start); 00289 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1); 00290 Q_THREAD_PARAM* params = SG_MALLOC(Q_THREAD_PARAM, num_threads); 00291 int32_t step= total_num/num_threads; 00292 00293 int32_t t; 00294 00295 num_threads--; 00296 for (t=0; t<num_threads; t++) 00297 { 00298 params[t].i=i; 00299 params[t].start=t*step; 00300 params[t].end=(t+1)*step; 00301 params[t].y=lab; 00302 params[t].data=data; 00303 params[t].q=this; 00304 00305 int code=pthread_create(&threads[t], NULL, 00306 compute_Q_parallel_helper, (void*)¶ms[t]); 00307 00308 if (code != 0) 00309 { 00310 SG_SWARNING("Thread creation failed (thread %d of %d) " 00311 "with error:'%s'\n",t, num_threads, strerror(code)); 00312 num_threads=t; 00313 break; 00314 } 00315 } 00316 00317 params[t].i=i; 00318 params[t].start=t*step; 00319 params[t].end=len; 00320 params[t].y=lab; 00321 params[t].data=data; 00322 params[t].q=this; 00323 compute_Q_parallel_helper(¶ms[t]); 00324 00325 for (t=0; t<num_threads; t++) 00326 { 00327 if (pthread_join(threads[t], NULL) != 0) 00328 SG_SWARNING("pthread_join of thread %d/%d failed\n", t, num_threads); 00329 } 00330 00331 SG_FREE(params); 00332 SG_FREE(threads); 00333 #endif /* HAVE_PTHREAD */ 00334 } 00335 } 00336 00337 inline float64_t kernel_function(int32_t i, int32_t j) const 00338 { 00339 return kernel->kernel(x[i]->index,x[j]->index); 00340 } 00341 00342 private: 00343 CKernel* kernel; 00344 const svm_node **x; 00345 float64_t *x_square; 00346 00347 // svm_parameter 00348 const int32_t kernel_type; 00349 const int32_t degree; 00350 const float64_t gamma; 00351 const float64_t coef0; 00352 }; 00353 00354 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param) 00355 : kernel_type(param.kernel_type), degree(param.degree), 00356 gamma(param.gamma), coef0(param.coef0) 00357 { 00358 clone(x,x_,l); 00359 x_square = 0; 00360 kernel=param.kernel; 00361 max_train_time=param.max_train_time; 00362 } 00363 00364 LibSVMKernel::~LibSVMKernel() 00365 { 00366 SG_FREE(x); 00367 SG_FREE(x_square); 00368 } 00369 00370 // Generalized SMO+SVMlight algorithm 00371 // Solves: 00372 // 00373 // min 0.5(\alpha^T Q \alpha) + b^T \alpha 00374 // 00375 // y^T \alpha = \delta 00376 // y_i = +1 or -1 00377 // 0 <= alpha_i <= Cp for y_i = 1 00378 // 0 <= alpha_i <= Cn for y_i = -1 00379 // 00380 // Given: 00381 // 00382 // Q, p, y, Cp, Cn, and an initial feasible point \alpha 00383 // l is the size of vectors and matrices 00384 // eps is the stopping tolerance 00385 // 00386 // solution will be put in \alpha, objective value will be put in obj 00387 // 00388 class Solver { 00389 public: 00390 Solver() {}; 00391 virtual ~Solver() {}; 00392 00393 struct SolutionInfo { 00394 float64_t obj; 00395 float64_t rho; 00396 float64_t upper_bound_p; 00397 float64_t upper_bound_n; 00398 float64_t r; // for Solver_NU 00399 }; 00400 00401 void Solve( 00402 int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_, 00403 float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps, 00404 SolutionInfo* si, int32_t shrinking, bool use_bias); 00405 00406 protected: 00407 int32_t active_size; 00408 schar *y; 00409 float64_t *G; // gradient of objective function 00410 enum { LOWER_BOUND, UPPER_BOUND, FREE }; 00411 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE 00412 float64_t *alpha; 00413 const QMatrix *Q; 00414 const Qfloat *QD; 00415 float64_t eps; 00416 float64_t Cp,Cn; 00417 float64_t *p; 00418 int32_t *active_set; 00419 float64_t *G_bar; // gradient, if we treat free variables as 0 00420 int32_t l; 00421 bool unshrink; // XXX 00422 00423 float64_t get_C(int32_t i) 00424 { 00425 return (y[i] > 0)? Cp : Cn; 00426 } 00427 void update_alpha_status(int32_t i) 00428 { 00429 if(alpha[i] >= get_C(i)) 00430 alpha_status[i] = UPPER_BOUND; 00431 else if(alpha[i] <= 0) 00432 alpha_status[i] = LOWER_BOUND; 00433 else alpha_status[i] = FREE; 00434 } 00435 bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; } 00436 bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; } 00437 bool is_free(int32_t i) { return alpha_status[i] == FREE; } 00438 void swap_index(int32_t i, int32_t j); 00439 void reconstruct_gradient(); 00440 virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 00441 virtual float64_t calculate_rho(); 00442 virtual void do_shrinking(); 00443 private: 00444 bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2); 00445 }; 00446 00447 void Solver::swap_index(int32_t i, int32_t j) 00448 { 00449 Q->swap_index(i,j); 00450 CMath::swap(y[i],y[j]); 00451 CMath::swap(G[i],G[j]); 00452 CMath::swap(alpha_status[i],alpha_status[j]); 00453 CMath::swap(alpha[i],alpha[j]); 00454 CMath::swap(p[i],p[j]); 00455 CMath::swap(active_set[i],active_set[j]); 00456 CMath::swap(G_bar[i],G_bar[j]); 00457 } 00458 00459 void Solver::reconstruct_gradient() 00460 { 00461 // reconstruct inactive elements of G from G_bar and free variables 00462 00463 if(active_size == l) return; 00464 00465 int32_t i,j; 00466 int32_t nr_free = 0; 00467 00468 for(j=active_size;j<l;j++) 00469 G[j] = G_bar[j] + p[j]; 00470 00471 for(j=0;j<active_size;j++) 00472 if(is_free(j)) 00473 nr_free++; 00474 00475 if (nr_free*l > 2*active_size*(l-active_size)) 00476 { 00477 for(i=active_size;i<l;i++) 00478 { 00479 const Qfloat *Q_i = Q->get_Q(i,active_size); 00480 for(j=0;j<active_size;j++) 00481 if(is_free(j)) 00482 G[i] += alpha[j] * Q_i[j]; 00483 } 00484 } 00485 else 00486 { 00487 for(i=0;i<active_size;i++) 00488 if(is_free(i)) 00489 { 00490 const Qfloat *Q_i = Q->get_Q(i,l); 00491 float64_t alpha_i = alpha[i]; 00492 for(j=active_size;j<l;j++) 00493 G[j] += alpha_i * Q_i[j]; 00494 } 00495 } 00496 } 00497 00498 void Solver::Solve( 00499 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 00500 const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn, 00501 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 00502 { 00503 this->l = p_l; 00504 this->Q = &p_Q; 00505 QD=Q->get_QD(); 00506 clone(p, p_p,l); 00507 clone(y, p_y,l); 00508 clone(alpha,p_alpha,l); 00509 this->Cp = p_Cp; 00510 this->Cn = p_Cn; 00511 this->eps = p_eps; 00512 unshrink = false; 00513 00514 // initialize alpha_status 00515 { 00516 alpha_status = SG_MALLOC(char, l); 00517 for(int32_t i=0;i<l;i++) 00518 update_alpha_status(i); 00519 } 00520 00521 // initialize active set (for shrinking) 00522 { 00523 active_set = SG_MALLOC(int32_t, l); 00524 for(int32_t i=0;i<l;i++) 00525 active_set[i] = i; 00526 active_size = l; 00527 } 00528 00529 // initialize gradient 00530 CSignal::clear_cancel(); 00531 CTime start_time; 00532 { 00533 G = SG_MALLOC(float64_t, l); 00534 G_bar = SG_MALLOC(float64_t, l); 00535 int32_t i; 00536 for(i=0;i<l;i++) 00537 { 00538 G[i] = p_p[i]; 00539 G_bar[i] = 0; 00540 } 00541 SG_SINFO("Computing gradient for initial set of non-zero alphas\n"); 00542 //CMath::display_vector(alpha, l, "alphas"); 00543 for(i=0;i<l && !CSignal::cancel_computations(); i++) 00544 { 00545 if(!is_lower_bound(i)) 00546 { 00547 const Qfloat *Q_i = Q->get_Q(i,l); 00548 float64_t alpha_i = alpha[i]; 00549 int32_t j; 00550 for(j=0;j<l;j++) 00551 G[j] += alpha_i*Q_i[j]; 00552 if(is_upper_bound(i)) 00553 for(j=0;j<l;j++) 00554 G_bar[j] += get_C(i) * Q_i[j]; 00555 } 00556 SG_SPROGRESS(i, 0, l); 00557 } 00558 SG_SDONE(); 00559 } 00560 00561 // optimization step 00562 00563 int32_t iter = 0; 00564 int32_t counter = CMath::min(l,1000)+1; 00565 00566 while (!CSignal::cancel_computations()) 00567 { 00568 if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time) 00569 break; 00570 00571 // show progress and do shrinking 00572 00573 if(--counter == 0) 00574 { 00575 counter = CMath::min(l,1000); 00576 if(shrinking) do_shrinking(); 00577 //SG_SINFO("."); 00578 } 00579 00580 int32_t i,j; 00581 float64_t gap; 00582 if(select_working_set(i,j, gap)!=0) 00583 { 00584 // reconstruct the whole gradient 00585 reconstruct_gradient(); 00586 // reset active set size and check 00587 active_size = l; 00588 //SG_SINFO("*"); 00589 if(select_working_set(i,j, gap)!=0) 00590 break; 00591 else 00592 counter = 1; // do shrinking next iteration 00593 } 00594 00595 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6); 00596 00597 ++iter; 00598 00599 // update alpha[i] and alpha[j], handle bounds carefully 00600 00601 const Qfloat *Q_i = Q->get_Q(i,active_size); 00602 const Qfloat *Q_j = Q->get_Q(j,active_size); 00603 00604 float64_t C_i = get_C(i); 00605 float64_t C_j = get_C(j); 00606 00607 float64_t old_alpha_i = alpha[i]; 00608 float64_t old_alpha_j = alpha[j]; 00609 00610 if (!use_bias) 00611 { 00612 double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j]; 00613 double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j]; 00614 double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j]; 00615 double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det; 00616 alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i)); 00617 double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det; 00618 alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j)); 00619 00620 if (alpha_i==0 || alpha_i == C_i) 00621 alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j])); 00622 if (alpha_j==0 || alpha_j == C_j) 00623 alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i])); 00624 00625 alpha[i]=alpha_i; alpha[j]=alpha_j; 00626 } 00627 else 00628 { 00629 if(y[i]!=y[j]) 00630 { 00631 float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j]; 00632 if (quad_coef <= 0) 00633 quad_coef = TAU; 00634 float64_t delta = (-G[i]-G[j])/quad_coef; 00635 float64_t diff = alpha[i] - alpha[j]; 00636 alpha[i] += delta; 00637 alpha[j] += delta; 00638 00639 if(diff > 0) 00640 { 00641 if(alpha[j] < 0) 00642 { 00643 alpha[j] = 0; 00644 alpha[i] = diff; 00645 } 00646 } 00647 else 00648 { 00649 if(alpha[i] < 0) 00650 { 00651 alpha[i] = 0; 00652 alpha[j] = -diff; 00653 } 00654 } 00655 if(diff > C_i - C_j) 00656 { 00657 if(alpha[i] > C_i) 00658 { 00659 alpha[i] = C_i; 00660 alpha[j] = C_i - diff; 00661 } 00662 } 00663 else 00664 { 00665 if(alpha[j] > C_j) 00666 { 00667 alpha[j] = C_j; 00668 alpha[i] = C_j + diff; 00669 } 00670 } 00671 } 00672 else 00673 { 00674 float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j]; 00675 if (quad_coef <= 0) 00676 quad_coef = TAU; 00677 float64_t delta = (G[i]-G[j])/quad_coef; 00678 float64_t sum = alpha[i] + alpha[j]; 00679 alpha[i] -= delta; 00680 alpha[j] += delta; 00681 00682 if(sum > C_i) 00683 { 00684 if(alpha[i] > C_i) 00685 { 00686 alpha[i] = C_i; 00687 alpha[j] = sum - C_i; 00688 } 00689 } 00690 else 00691 { 00692 if(alpha[j] < 0) 00693 { 00694 alpha[j] = 0; 00695 alpha[i] = sum; 00696 } 00697 } 00698 if(sum > C_j) 00699 { 00700 if(alpha[j] > C_j) 00701 { 00702 alpha[j] = C_j; 00703 alpha[i] = sum - C_j; 00704 } 00705 } 00706 else 00707 { 00708 if(alpha[i] < 0) 00709 { 00710 alpha[i] = 0; 00711 alpha[j] = sum; 00712 } 00713 } 00714 } 00715 } 00716 00717 // update G 00718 00719 float64_t delta_alpha_i = alpha[i] - old_alpha_i; 00720 float64_t delta_alpha_j = alpha[j] - old_alpha_j; 00721 00722 for(int32_t k=0;k<active_size;k++) 00723 { 00724 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j; 00725 } 00726 00727 // update alpha_status and G_bar 00728 00729 { 00730 bool ui = is_upper_bound(i); 00731 bool uj = is_upper_bound(j); 00732 update_alpha_status(i); 00733 update_alpha_status(j); 00734 int32_t k; 00735 if(ui != is_upper_bound(i)) 00736 { 00737 Q_i = Q->get_Q(i,l); 00738 if(ui) 00739 for(k=0;k<l;k++) 00740 G_bar[k] -= C_i * Q_i[k]; 00741 else 00742 for(k=0;k<l;k++) 00743 G_bar[k] += C_i * Q_i[k]; 00744 } 00745 00746 if(uj != is_upper_bound(j)) 00747 { 00748 Q_j = Q->get_Q(j,l); 00749 if(uj) 00750 for(k=0;k<l;k++) 00751 G_bar[k] -= C_j * Q_j[k]; 00752 else 00753 for(k=0;k<l;k++) 00754 G_bar[k] += C_j * Q_j[k]; 00755 } 00756 } 00757 00758 #ifdef MCSVM_DEBUG 00759 // calculate objective value 00760 { 00761 float64_t v = 0; 00762 for(i=0;i<l;i++) 00763 v += alpha[i] * (G[i] + p[i]); 00764 00765 p_si->obj = v/2; 00766 00767 float64_t primal=0; 00768 //float64_t gap=100000; 00769 SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap); 00770 } 00771 #endif 00772 } 00773 00774 // calculate rho 00775 00776 if (!use_bias) 00777 p_si->rho = 0; 00778 else 00779 p_si->rho = calculate_rho(); 00780 00781 // calculate objective value 00782 { 00783 float64_t v = 0; 00784 int32_t i; 00785 for(i=0;i<l;i++) 00786 v += alpha[i] * (G[i] + p[i]); 00787 00788 p_si->obj = v/2; 00789 } 00790 00791 // put back the solution 00792 { 00793 for(int32_t i=0;i<l;i++) 00794 p_alpha[active_set[i]] = alpha[i]; 00795 } 00796 00797 p_si->upper_bound_p = Cp; 00798 p_si->upper_bound_n = Cn; 00799 00800 SG_SINFO("\noptimization finished, #iter = %d\n",iter); 00801 00802 SG_FREE(p); 00803 SG_FREE(y); 00804 SG_FREE(alpha); 00805 SG_FREE(alpha_status); 00806 SG_FREE(active_set); 00807 SG_FREE(G); 00808 SG_FREE(G_bar); 00809 } 00810 00811 // return 1 if already optimal, return 0 otherwise 00812 int32_t Solver::select_working_set( 00813 int32_t &out_i, int32_t &out_j, float64_t &gap) 00814 { 00815 // return i,j such that 00816 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 00817 // j: minimizes the decrease of obj value 00818 // (if quadratic coefficient <= 0, replace it with tau) 00819 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 00820 00821 float64_t Gmax = -INF; 00822 float64_t Gmax2 = -INF; 00823 int32_t Gmax_idx = -1; 00824 int32_t Gmin_idx = -1; 00825 float64_t obj_diff_min = INF; 00826 00827 for(int32_t t=0;t<active_size;t++) 00828 if(y[t]==+1) 00829 { 00830 if(!is_upper_bound(t)) 00831 if(-G[t] >= Gmax) 00832 { 00833 Gmax = -G[t]; 00834 Gmax_idx = t; 00835 } 00836 } 00837 else 00838 { 00839 if(!is_lower_bound(t)) 00840 if(G[t] >= Gmax) 00841 { 00842 Gmax = G[t]; 00843 Gmax_idx = t; 00844 } 00845 } 00846 00847 int32_t i = Gmax_idx; 00848 const Qfloat *Q_i = NULL; 00849 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1 00850 Q_i = Q->get_Q(i,active_size); 00851 00852 for(int32_t j=0;j<active_size;j++) 00853 { 00854 if(y[j]==+1) 00855 { 00856 if (!is_lower_bound(j)) 00857 { 00858 float64_t grad_diff=Gmax+G[j]; 00859 if (G[j] >= Gmax2) 00860 Gmax2 = G[j]; 00861 if (grad_diff > 0) 00862 { 00863 float64_t obj_diff; 00864 float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j]; 00865 if (quad_coef > 0) 00866 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00867 else 00868 obj_diff = -(grad_diff*grad_diff)/TAU; 00869 00870 if (obj_diff <= obj_diff_min) 00871 { 00872 Gmin_idx=j; 00873 obj_diff_min = obj_diff; 00874 } 00875 } 00876 } 00877 } 00878 else 00879 { 00880 if (!is_upper_bound(j)) 00881 { 00882 float64_t grad_diff= Gmax-G[j]; 00883 if (-G[j] >= Gmax2) 00884 Gmax2 = -G[j]; 00885 if (grad_diff > 0) 00886 { 00887 float64_t obj_diff; 00888 float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j]; 00889 if (quad_coef > 0) 00890 obj_diff = -(grad_diff*grad_diff)/quad_coef; 00891 else 00892 obj_diff = -(grad_diff*grad_diff)/TAU; 00893 00894 if (obj_diff <= obj_diff_min) 00895 { 00896 Gmin_idx=j; 00897 obj_diff_min = obj_diff; 00898 } 00899 } 00900 } 00901 } 00902 } 00903 00904 gap=Gmax+Gmax2; 00905 if(gap < eps) 00906 return 1; 00907 00908 out_i = Gmax_idx; 00909 out_j = Gmin_idx; 00910 return 0; 00911 } 00912 00913 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2) 00914 { 00915 if(is_upper_bound(i)) 00916 { 00917 if(y[i]==+1) 00918 return(-G[i] > Gmax1); 00919 else 00920 return(-G[i] > Gmax2); 00921 } 00922 else if(is_lower_bound(i)) 00923 { 00924 if(y[i]==+1) 00925 return(G[i] > Gmax2); 00926 else 00927 return(G[i] > Gmax1); 00928 } 00929 else 00930 return(false); 00931 } 00932 00933 00934 void Solver::do_shrinking() 00935 { 00936 int32_t i; 00937 float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) } 00938 float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) } 00939 00940 // find maximal violating pair first 00941 for(i=0;i<active_size;i++) 00942 { 00943 if(y[i]==+1) 00944 { 00945 if(!is_upper_bound(i)) 00946 { 00947 if(-G[i] >= Gmax1) 00948 Gmax1 = -G[i]; 00949 } 00950 if(!is_lower_bound(i)) 00951 { 00952 if(G[i] >= Gmax2) 00953 Gmax2 = G[i]; 00954 } 00955 } 00956 else 00957 { 00958 if(!is_upper_bound(i)) 00959 { 00960 if(-G[i] >= Gmax2) 00961 Gmax2 = -G[i]; 00962 } 00963 if(!is_lower_bound(i)) 00964 { 00965 if(G[i] >= Gmax1) 00966 Gmax1 = G[i]; 00967 } 00968 } 00969 } 00970 00971 if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 00972 { 00973 unshrink = true; 00974 reconstruct_gradient(); 00975 active_size = l; 00976 } 00977 00978 for(i=0;i<active_size;i++) 00979 if (be_shrunk(i, Gmax1, Gmax2)) 00980 { 00981 active_size--; 00982 while (active_size > i) 00983 { 00984 if (!be_shrunk(active_size, Gmax1, Gmax2)) 00985 { 00986 swap_index(i,active_size); 00987 break; 00988 } 00989 active_size--; 00990 } 00991 } 00992 } 00993 00994 float64_t Solver::calculate_rho() 00995 { 00996 float64_t r; 00997 int32_t nr_free = 0; 00998 float64_t ub = INF, lb = -INF, sum_free = 0; 00999 for(int32_t i=0;i<active_size;i++) 01000 { 01001 float64_t yG = y[i]*G[i]; 01002 01003 if(is_upper_bound(i)) 01004 { 01005 if(y[i]==-1) 01006 ub = CMath::min(ub,yG); 01007 else 01008 lb = CMath::max(lb,yG); 01009 } 01010 else if(is_lower_bound(i)) 01011 { 01012 if(y[i]==+1) 01013 ub = CMath::min(ub,yG); 01014 else 01015 lb = CMath::max(lb,yG); 01016 } 01017 else 01018 { 01019 ++nr_free; 01020 sum_free += yG; 01021 } 01022 } 01023 01024 if(nr_free>0) 01025 r = sum_free/nr_free; 01026 else 01027 r = (ub+lb)/2; 01028 01029 return r; 01030 } 01031 01032 01033 // 01034 //Solve with individually weighted examples 01035 // 01036 class WeightedSolver : public Solver 01037 { 01038 01039 public: 01040 01041 WeightedSolver(float64_t* cost_vec) 01042 { 01043 01044 this->Cs = cost_vec; 01045 01046 } 01047 01048 virtual float64_t get_C(int32_t i) 01049 { 01050 01051 return Cs[i]; 01052 } 01053 01054 protected: 01055 01056 float64_t* Cs; 01057 01058 }; 01059 01060 01061 // 01062 // Solver for nu-svm classification and regression 01063 // 01064 // additional constraint: e^T \alpha = constant 01065 // 01066 class Solver_NU : public Solver 01067 { 01068 public: 01069 Solver_NU() {} 01070 void Solve( 01071 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 01072 const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn, 01073 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 01074 { 01075 this->si = p_si; 01076 Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si, 01077 shrinking,use_bias); 01078 } 01079 private: 01080 SolutionInfo *si; 01081 int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 01082 float64_t calculate_rho(); 01083 bool be_shrunk( 01084 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01085 float64_t Gmax4); 01086 void do_shrinking(); 01087 }; 01088 01089 // return 1 if already optimal, return 0 otherwise 01090 int32_t Solver_NU::select_working_set( 01091 int32_t &out_i, int32_t &out_j, float64_t &gap) 01092 { 01093 // return i,j such that y_i = y_j and 01094 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 01095 // j: minimizes the decrease of obj value 01096 // (if quadratic coefficient <= 0, replace it with tau) 01097 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 01098 01099 float64_t Gmaxp = -INF; 01100 float64_t Gmaxp2 = -INF; 01101 int32_t Gmaxp_idx = -1; 01102 01103 float64_t Gmaxn = -INF; 01104 float64_t Gmaxn2 = -INF; 01105 int32_t Gmaxn_idx = -1; 01106 01107 int32_t Gmin_idx = -1; 01108 float64_t obj_diff_min = INF; 01109 01110 for(int32_t t=0;t<active_size;t++) 01111 if(y[t]==+1) 01112 { 01113 if(!is_upper_bound(t)) 01114 if(-G[t] >= Gmaxp) 01115 { 01116 Gmaxp = -G[t]; 01117 Gmaxp_idx = t; 01118 } 01119 } 01120 else 01121 { 01122 if(!is_lower_bound(t)) 01123 if(G[t] >= Gmaxn) 01124 { 01125 Gmaxn = G[t]; 01126 Gmaxn_idx = t; 01127 } 01128 } 01129 01130 int32_t ip = Gmaxp_idx; 01131 int32_t in = Gmaxn_idx; 01132 const Qfloat *Q_ip = NULL; 01133 const Qfloat *Q_in = NULL; 01134 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01135 Q_ip = Q->get_Q(ip,active_size); 01136 if(in != -1) 01137 Q_in = Q->get_Q(in,active_size); 01138 01139 for(int32_t j=0;j<active_size;j++) 01140 { 01141 if(y[j]==+1) 01142 { 01143 if (!is_lower_bound(j)) 01144 { 01145 float64_t grad_diff=Gmaxp+G[j]; 01146 if (G[j] >= Gmaxp2) 01147 Gmaxp2 = G[j]; 01148 if (grad_diff > 0) 01149 { 01150 float64_t obj_diff; 01151 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01152 if (quad_coef > 0) 01153 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01154 else 01155 obj_diff = -(grad_diff*grad_diff)/TAU; 01156 01157 if (obj_diff <= obj_diff_min) 01158 { 01159 Gmin_idx=j; 01160 obj_diff_min = obj_diff; 01161 } 01162 } 01163 } 01164 } 01165 else 01166 { 01167 if (!is_upper_bound(j)) 01168 { 01169 float64_t grad_diff=Gmaxn-G[j]; 01170 if (-G[j] >= Gmaxn2) 01171 Gmaxn2 = -G[j]; 01172 if (grad_diff > 0) 01173 { 01174 float64_t obj_diff; 01175 float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j]; 01176 if (quad_coef > 0) 01177 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01178 else 01179 obj_diff = -(grad_diff*grad_diff)/TAU; 01180 01181 if (obj_diff <= obj_diff_min) 01182 { 01183 Gmin_idx=j; 01184 obj_diff_min = obj_diff; 01185 } 01186 } 01187 } 01188 } 01189 } 01190 01191 gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2); 01192 if(gap < eps) 01193 return 1; 01194 01195 if (y[Gmin_idx] == +1) 01196 out_i = Gmaxp_idx; 01197 else 01198 out_i = Gmaxn_idx; 01199 out_j = Gmin_idx; 01200 01201 return 0; 01202 } 01203 01204 bool Solver_NU::be_shrunk( 01205 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01206 float64_t Gmax4) 01207 { 01208 if(is_upper_bound(i)) 01209 { 01210 if(y[i]==+1) 01211 return(-G[i] > Gmax1); 01212 else 01213 return(-G[i] > Gmax4); 01214 } 01215 else if(is_lower_bound(i)) 01216 { 01217 if(y[i]==+1) 01218 return(G[i] > Gmax2); 01219 else 01220 return(G[i] > Gmax3); 01221 } 01222 else 01223 return(false); 01224 } 01225 01226 void Solver_NU::do_shrinking() 01227 { 01228 float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) } 01229 float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) } 01230 float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) } 01231 float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) } 01232 01233 // find maximal violating pair first 01234 int32_t i; 01235 for(i=0;i<active_size;i++) 01236 { 01237 if(!is_upper_bound(i)) 01238 { 01239 if(y[i]==+1) 01240 { 01241 if(-G[i] > Gmax1) Gmax1 = -G[i]; 01242 } 01243 else if(-G[i] > Gmax4) Gmax4 = -G[i]; 01244 } 01245 if(!is_lower_bound(i)) 01246 { 01247 if(y[i]==+1) 01248 { 01249 if(G[i] > Gmax2) Gmax2 = G[i]; 01250 } 01251 else if(G[i] > Gmax3) Gmax3 = G[i]; 01252 } 01253 } 01254 01255 if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 01256 { 01257 unshrink = true; 01258 reconstruct_gradient(); 01259 active_size = l; 01260 } 01261 01262 for(i=0;i<active_size;i++) 01263 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4)) 01264 { 01265 active_size--; 01266 while (active_size > i) 01267 { 01268 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4)) 01269 { 01270 swap_index(i,active_size); 01271 break; 01272 } 01273 active_size--; 01274 } 01275 } 01276 } 01277 01278 float64_t Solver_NU::calculate_rho() 01279 { 01280 int32_t nr_free1 = 0,nr_free2 = 0; 01281 float64_t ub1 = INF, ub2 = INF; 01282 float64_t lb1 = -INF, lb2 = -INF; 01283 float64_t sum_free1 = 0, sum_free2 = 0; 01284 01285 for(int32_t i=0; i<active_size; i++) 01286 { 01287 if(y[i]==+1) 01288 { 01289 if(is_upper_bound(i)) 01290 lb1 = CMath::max(lb1,G[i]); 01291 else if(is_lower_bound(i)) 01292 ub1 = CMath::min(ub1,G[i]); 01293 else 01294 { 01295 ++nr_free1; 01296 sum_free1 += G[i]; 01297 } 01298 } 01299 else 01300 { 01301 if(is_upper_bound(i)) 01302 lb2 = CMath::max(lb2,G[i]); 01303 else if(is_lower_bound(i)) 01304 ub2 = CMath::min(ub2,G[i]); 01305 else 01306 { 01307 ++nr_free2; 01308 sum_free2 += G[i]; 01309 } 01310 } 01311 } 01312 01313 float64_t r1,r2; 01314 if(nr_free1 > 0) 01315 r1 = sum_free1/nr_free1; 01316 else 01317 r1 = (ub1+lb1)/2; 01318 01319 if(nr_free2 > 0) 01320 r2 = sum_free2/nr_free2; 01321 else 01322 r2 = (ub2+lb2)/2; 01323 01324 si->r = (r1+r2)/2; 01325 return (r1-r2)/2; 01326 } 01327 01328 class SVC_QMC: public LibSVMKernel 01329 { 01330 public: 01331 SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac) 01332 :LibSVMKernel(prob.l, prob.x, param) 01333 { 01334 nr_class=n_class; 01335 factor=fac; 01336 clone(y,y_,prob.l); 01337 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01338 QD = SG_MALLOC(Qfloat, prob.l); 01339 for(int32_t i=0;i<prob.l;i++) 01340 { 01341 QD[i]= factor*(nr_class-1)*kernel_function(i,i); 01342 } 01343 } 01344 01345 Qfloat *get_Q(int32_t i, int32_t len) const 01346 { 01347 Qfloat *data; 01348 int32_t start; 01349 if((start = cache->get_data(i,&data,len)) < len) 01350 { 01351 compute_Q_parallel(data, NULL, i, start, len); 01352 01353 for(int32_t j=start;j<len;j++) 01354 { 01355 if (y[i]==y[j]) 01356 data[j] *= (factor*(nr_class-1)); 01357 else 01358 data[j] *= (-factor); 01359 } 01360 } 01361 return data; 01362 } 01363 01364 inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j) 01365 { 01366 if (y[i]==y[j]) 01367 return Q/(nr_class-1); 01368 else 01369 return -Q; 01370 } 01371 01372 Qfloat *get_QD() const 01373 { 01374 return QD; 01375 } 01376 01377 void swap_index(int32_t i, int32_t j) const 01378 { 01379 cache->swap_index(i,j); 01380 LibSVMKernel::swap_index(i,j); 01381 CMath::swap(y[i],y[j]); 01382 CMath::swap(QD[i],QD[j]); 01383 } 01384 01385 ~SVC_QMC() 01386 { 01387 SG_FREE(y); 01388 delete cache; 01389 SG_FREE(QD); 01390 } 01391 private: 01392 float64_t factor; 01393 float64_t nr_class; 01394 schar *y; 01395 Cache *cache; 01396 Qfloat *QD; 01397 }; 01398 01399 // 01400 // Solver for nu-svm classification and regression 01401 // 01402 // additional constraint: e^T \alpha = constant 01403 // 01404 class Solver_NUMC : public Solver 01405 { 01406 public: 01407 Solver_NUMC(int32_t n_class, float64_t svm_nu) 01408 { 01409 nr_class=n_class; 01410 nu=svm_nu; 01411 } 01412 01413 void Solve( 01414 int32_t p_l, const QMatrix& p_Q, const float64_t *p_p, 01415 const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn, 01416 float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias) 01417 { 01418 this->si = p_si; 01419 Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias); 01420 } 01421 float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw); 01422 01423 private: 01424 SolutionInfo *si; 01425 int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap); 01426 float64_t calculate_rho(); 01427 bool be_shrunk( 01428 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01429 float64_t Gmax4); 01430 void do_shrinking(); 01431 01432 private: 01433 int32_t nr_class; 01434 float64_t nu; 01435 }; 01436 01437 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw) 01438 { 01439 clone(y, p_y,l); 01440 clone(alpha,p_alpha,l); 01441 01442 alpha_status = SG_MALLOC(char, l); 01443 for(int32_t i=0;i<l;i++) 01444 update_alpha_status(i); 01445 01446 float64_t* class_count = SG_MALLOC(float64_t, nr_class); 01447 float64_t* outputs = SG_MALLOC(float64_t, l); 01448 01449 for (int32_t i=0; i<nr_class; i++) 01450 { 01451 class_count[i]=0; 01452 biases[i+1]=0; 01453 } 01454 01455 for (int32_t i=0; i<active_size; i++) 01456 { 01457 update_alpha_status(i); 01458 if(!is_upper_bound(i) && !is_lower_bound(i)) 01459 class_count[(int32_t) y[i]]++; 01460 } 01461 01462 //CMath::display_vector(class_count, nr_class, "class_count"); 01463 01464 float64_t mu=((float64_t) nr_class)/(nu*l); 01465 //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu); 01466 01467 float64_t rho=0; 01468 float64_t quad=0; 01469 float64_t* zero_counts = SG_MALLOC(float64_t, nr_class); 01470 float64_t normwc_const = 0; 01471 01472 for (int32_t i=0; i<nr_class; i++) 01473 { 01474 zero_counts[i]=-INF; 01475 normwcw[i]=0; 01476 } 01477 01478 for (int32_t i=0; i<active_size; i++) 01479 { 01480 float64_t sum_free=0; 01481 float64_t sum_atbound=0; 01482 float64_t sum_zero_count=0; 01483 01484 Qfloat* Q_i = Q->get_Q(i,active_size); 01485 outputs[i]=0; 01486 01487 for (int j=0; j<active_size; j++) 01488 { 01489 quad+= alpha[i]*alpha[j]*Q_i[j]; 01490 float64_t tmp= alpha[j]*Q_i[j]/mu; 01491 01492 if(!is_upper_bound(i) && !is_lower_bound(i)) 01493 sum_free+=tmp; 01494 else 01495 sum_atbound+=tmp; 01496 01497 if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i]) 01498 sum_zero_count+= tmp; 01499 01500 SVC_QMC* QMC=(SVC_QMC*) Q; 01501 float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j); 01502 if (y[i]==y[j]) 01503 normwcw[(int32_t) y[i]]+=norm_tmp; 01504 01505 normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp; 01506 normwc_const+=norm_tmp; 01507 } 01508 01509 if (class_count[(int32_t) y[i]] == 0) 01510 { 01511 if (zero_counts[(int32_t) y[i]]<sum_zero_count) 01512 zero_counts[(int32_t) y[i]]=sum_zero_count; 01513 } 01514 01515 biases[(int32_t) y[i]+1]-=sum_free; 01516 if (class_count[(int32_t) y[i]] != 0.0) 01517 rho+=sum_free/class_count[(int32_t) y[i]]; 01518 outputs[i]+=sum_free+sum_atbound; 01519 } 01520 01521 for (int32_t i=0; i<nr_class; i++) 01522 { 01523 if (class_count[i] == 0.0) 01524 rho+=zero_counts[i]; 01525 01526 normwcw[i]+=normwc_const/CMath::sq(nr_class); 01527 normwcw[i]=CMath::sqrt(normwcw[i]); 01528 } 01529 01530 SGVector<float64_t>::display_vector(normwcw, nr_class, "normwcw"); 01531 01532 rho/=nr_class; 01533 01534 SG_SPRINT("rho=%f\n", rho); 01535 01536 float64_t sumb=0; 01537 for (int32_t i=0; i<nr_class; i++) 01538 { 01539 if (class_count[i] != 0.0) 01540 biases[i+1]=biases[i+1]/class_count[i]+rho; 01541 else 01542 biases[i+1]+=rho-zero_counts[i]; 01543 01544 SG_SPRINT("biases=%f\n", biases[i+1]); 01545 01546 sumb+=biases[i+1]; 01547 } 01548 SG_SPRINT("sumb=%f\n", sumb); 01549 01550 SG_FREE(zero_counts); 01551 01552 for (int32_t i=0; i<l; i++) 01553 outputs[i]+=biases[(int32_t) y[i]+1]; 01554 01555 biases[0]=rho; 01556 01557 //CMath::display_vector(outputs, l, "outputs"); 01558 01559 01560 float64_t xi=0; 01561 for (int32_t i=0; i<active_size; i++) 01562 { 01563 if (is_lower_bound(i)) 01564 continue; 01565 xi+=rho-outputs[i]; 01566 } 01567 01568 //SG_SPRINT("xi=%f\n", xi); 01569 01570 //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu); 01571 01572 float64_t primal=0.5*quad- nr_class*rho+xi*mu; 01573 01574 //SG_SPRINT("primal=%10.10f\n", primal); 01575 01576 SG_FREE(y); 01577 SG_FREE(alpha); 01578 SG_FREE(alpha_status); 01579 01580 return primal; 01581 } 01582 01583 01584 // return 1 if already optimal, return 0 otherwise 01585 int32_t Solver_NUMC::select_working_set( 01586 int32_t &out_i, int32_t &out_j, float64_t &gap) 01587 { 01588 // return i,j such that y_i = y_j and 01589 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha) 01590 // j: minimizes the decrease of obj value 01591 // (if quadratic coefficient <= 0, replace it with tau) 01592 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha) 01593 01594 int32_t retval=0; 01595 float64_t best_gap=0; 01596 int32_t best_out_i=-1; 01597 int32_t best_out_j=-1; 01598 01599 float64_t* Gmaxp = SG_MALLOC(float64_t, nr_class); 01600 float64_t* Gmaxp2 = SG_MALLOC(float64_t, nr_class); 01601 int32_t* Gmaxp_idx = SG_MALLOC(int32_t, nr_class); 01602 01603 int32_t* Gmin_idx = SG_MALLOC(int32_t, nr_class); 01604 float64_t* obj_diff_min = SG_MALLOC(float64_t, nr_class); 01605 01606 for (int32_t i=0; i<nr_class; i++) 01607 { 01608 Gmaxp[i]=-INF; 01609 Gmaxp2[i]=-INF; 01610 Gmaxp_idx[i]=-1; 01611 Gmin_idx[i]=-1; 01612 obj_diff_min[i]=INF; 01613 } 01614 01615 for(int32_t t=0;t<active_size;t++) 01616 { 01617 int32_t cidx=y[t]; 01618 if(!is_upper_bound(t)) 01619 { 01620 if(-G[t] >= Gmaxp[cidx]) 01621 { 01622 Gmaxp[cidx] = -G[t]; 01623 Gmaxp_idx[cidx] = t; 01624 } 01625 } 01626 } 01627 01628 for(int32_t j=0;j<active_size;j++) 01629 { 01630 int32_t cidx=y[j]; 01631 int32_t ip = Gmaxp_idx[cidx]; 01632 const Qfloat *Q_ip = NULL; 01633 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1 01634 Q_ip = Q->get_Q(ip,active_size); 01635 01636 if (!is_lower_bound(j)) 01637 { 01638 float64_t grad_diff=Gmaxp[cidx]+G[j]; 01639 if (G[j] >= Gmaxp2[cidx]) 01640 Gmaxp2[cidx] = G[j]; 01641 if (grad_diff > 0) 01642 { 01643 float64_t obj_diff; 01644 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j]; 01645 if (quad_coef > 0) 01646 obj_diff = -(grad_diff*grad_diff)/quad_coef; 01647 else 01648 obj_diff = -(grad_diff*grad_diff)/TAU; 01649 01650 if (obj_diff <= obj_diff_min[cidx]) 01651 { 01652 Gmin_idx[cidx]=j; 01653 obj_diff_min[cidx] = obj_diff; 01654 } 01655 } 01656 } 01657 01658 gap=Gmaxp[cidx]+Gmaxp2[cidx]; 01659 if (gap>=best_gap && Gmin_idx[cidx]>=0 && 01660 Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size) 01661 { 01662 out_i = Gmaxp_idx[cidx]; 01663 out_j = Gmin_idx[cidx]; 01664 01665 best_gap=gap; 01666 best_out_i=out_i; 01667 best_out_j=out_j; 01668 } 01669 } 01670 01671 gap=best_gap; 01672 out_i=best_out_i; 01673 out_j=best_out_j; 01674 01675 SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]); 01676 01677 01678 if(gap < eps) 01679 retval=1; 01680 01681 SG_FREE(Gmaxp); 01682 SG_FREE(Gmaxp2); 01683 SG_FREE(Gmaxp_idx); 01684 SG_FREE(Gmin_idx); 01685 SG_FREE(obj_diff_min); 01686 01687 return retval; 01688 } 01689 01690 bool Solver_NUMC::be_shrunk( 01691 int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3, 01692 float64_t Gmax4) 01693 { 01694 return false; 01695 } 01696 01697 void Solver_NUMC::do_shrinking() 01698 { 01699 } 01700 01701 float64_t Solver_NUMC::calculate_rho() 01702 { 01703 return 0; 01704 } 01705 01706 01707 // 01708 // Q matrices for various formulations 01709 // 01710 class SVC_Q: public LibSVMKernel 01711 { 01712 public: 01713 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_) 01714 :LibSVMKernel(prob.l, prob.x, param) 01715 { 01716 clone(y,y_,prob.l); 01717 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01718 QD = SG_MALLOC(Qfloat, prob.l); 01719 for(int32_t i=0;i<prob.l;i++) 01720 QD[i]= (Qfloat)kernel_function(i,i); 01721 } 01722 01723 Qfloat *get_Q(int32_t i, int32_t len) const 01724 { 01725 Qfloat *data; 01726 int32_t start; 01727 if((start = cache->get_data(i,&data,len)) < len) 01728 compute_Q_parallel(data, y, i, start, len); 01729 01730 return data; 01731 } 01732 01733 Qfloat *get_QD() const 01734 { 01735 return QD; 01736 } 01737 01738 void swap_index(int32_t i, int32_t j) const 01739 { 01740 cache->swap_index(i,j); 01741 LibSVMKernel::swap_index(i,j); 01742 CMath::swap(y[i],y[j]); 01743 CMath::swap(QD[i],QD[j]); 01744 } 01745 01746 ~SVC_Q() 01747 { 01748 SG_FREE(y); 01749 delete cache; 01750 SG_FREE(QD); 01751 } 01752 private: 01753 schar *y; 01754 Cache *cache; 01755 Qfloat *QD; 01756 }; 01757 01758 01759 class ONE_CLASS_Q: public LibSVMKernel 01760 { 01761 public: 01762 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param) 01763 :LibSVMKernel(prob.l, prob.x, param) 01764 { 01765 cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20))); 01766 QD = SG_MALLOC(Qfloat, prob.l); 01767 for(int32_t i=0;i<prob.l;i++) 01768 QD[i]= (Qfloat)kernel_function(i,i); 01769 } 01770 01771 Qfloat *get_Q(int32_t i, int32_t len) const 01772 { 01773 Qfloat *data; 01774 int32_t start; 01775 if((start = cache->get_data(i,&data,len)) < len) 01776 compute_Q_parallel(data, NULL, i, start, len); 01777 01778 return data; 01779 } 01780 01781 Qfloat *get_QD() const 01782 { 01783 return QD; 01784 } 01785 01786 void swap_index(int32_t i, int32_t j) const 01787 { 01788 cache->swap_index(i,j); 01789 LibSVMKernel::swap_index(i,j); 01790 CMath::swap(QD[i],QD[j]); 01791 } 01792 01793 ~ONE_CLASS_Q() 01794 { 01795 delete cache; 01796 SG_FREE(QD); 01797 } 01798 private: 01799 Cache *cache; 01800 Qfloat *QD; 01801 }; 01802 01803 class SVR_Q: public LibSVMKernel 01804 { 01805 public: 01806 SVR_Q(const svm_problem& prob, const svm_parameter& param) 01807 :LibSVMKernel(prob.l, prob.x, param) 01808 { 01809 l = prob.l; 01810 cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20))); 01811 QD = SG_MALLOC(Qfloat, 2*l); 01812 sign = SG_MALLOC(schar, 2*l); 01813 index = SG_MALLOC(int32_t, 2*l); 01814 for(int32_t k=0;k<l;k++) 01815 { 01816 sign[k] = 1; 01817 sign[k+l] = -1; 01818 index[k] = k; 01819 index[k+l] = k; 01820 QD[k]= (Qfloat)kernel_function(k,k); 01821 QD[k+l]=QD[k]; 01822 } 01823 buffer[0] = SG_MALLOC(Qfloat, 2*l); 01824 buffer[1] = SG_MALLOC(Qfloat, 2*l); 01825 next_buffer = 0; 01826 } 01827 01828 void swap_index(int32_t i, int32_t j) const 01829 { 01830 CMath::swap(sign[i],sign[j]); 01831 CMath::swap(index[i],index[j]); 01832 CMath::swap(QD[i],QD[j]); 01833 } 01834 01835 Qfloat *get_Q(int32_t i, int32_t len) const 01836 { 01837 Qfloat *data; 01838 int32_t real_i = index[i]; 01839 if(cache->get_data(real_i,&data,l) < l) 01840 compute_Q_parallel(data, NULL, real_i, 0, l); 01841 01842 // reorder and copy 01843 Qfloat *buf = buffer[next_buffer]; 01844 next_buffer = 1 - next_buffer; 01845 schar si = sign[i]; 01846 for(int32_t j=0;j<len;j++) 01847 buf[j] = si * sign[j] * data[index[j]]; 01848 return buf; 01849 } 01850 01851 Qfloat *get_QD() const 01852 { 01853 return QD; 01854 } 01855 01856 ~SVR_Q() 01857 { 01858 delete cache; 01859 SG_FREE(sign); 01860 SG_FREE(index); 01861 SG_FREE(buffer[0]); 01862 SG_FREE(buffer[1]); 01863 SG_FREE(QD); 01864 } 01865 01866 private: 01867 int32_t l; 01868 Cache *cache; 01869 schar *sign; 01870 int32_t *index; 01871 mutable int32_t next_buffer; 01872 Qfloat *buffer[2]; 01873 Qfloat *QD; 01874 }; 01875 01876 // 01877 // construct and solve various formulations 01878 // 01879 static void solve_c_svc( 01880 const svm_problem *prob, const svm_parameter* param, 01881 float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn) 01882 { 01883 int32_t l = prob->l; 01884 schar *y = SG_MALLOC(schar, l); 01885 01886 int32_t i; 01887 01888 for(i=0;i<l;i++) 01889 { 01890 alpha[i] = 0; 01891 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01892 } 01893 01894 Solver s; 01895 s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y, 01896 alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias); 01897 01898 float64_t sum_alpha=0; 01899 for(i=0;i<l;i++) 01900 sum_alpha += alpha[i]; 01901 01902 if (Cp==Cn) 01903 SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l)); 01904 01905 for(i=0;i<l;i++) 01906 alpha[i] *= y[i]; 01907 01908 SG_FREE(y); 01909 } 01910 01911 01912 //two weighted datasets 01913 void solve_c_svc_weighted( 01914 const svm_problem *prob, const svm_parameter* param, 01915 float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn) 01916 { 01917 int l = prob->l; 01918 float64_t *minus_ones = SG_MALLOC(float64_t, l); 01919 schar *y = SG_MALLOC(schar, l); 01920 01921 int i; 01922 01923 for(i=0;i<l;i++) 01924 { 01925 alpha[i] = 0; 01926 minus_ones[i] = -1; 01927 if(prob->y[i] > 0) y[i] = +1; else y[i]=-1; 01928 } 01929 01930 WeightedSolver s = WeightedSolver(prob->C); 01931 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y, 01932 alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias); 01933 01934 float64_t sum_alpha=0; 01935 for(i=0;i<l;i++) 01936 sum_alpha += alpha[i]; 01937 01938 //if (Cp==Cn) 01939 // SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l)); 01940 01941 for(i=0;i<l;i++) 01942 alpha[i] *= y[i]; 01943 01944 SG_FREE(minus_ones); 01945 SG_FREE(y); 01946 } 01947 01948 static void solve_nu_svc( 01949 const svm_problem *prob, const svm_parameter *param, 01950 float64_t *alpha, Solver::SolutionInfo* si) 01951 { 01952 int32_t i; 01953 int32_t l = prob->l; 01954 float64_t nu = param->nu; 01955 01956 schar *y = SG_MALLOC(schar, l); 01957 01958 for(i=0;i<l;i++) 01959 if(prob->y[i]>0) 01960 y[i] = +1; 01961 else 01962 y[i] = -1; 01963 01964 float64_t sum_pos = nu*l/2; 01965 float64_t sum_neg = nu*l/2; 01966 01967 for(i=0;i<l;i++) 01968 if(y[i] == +1) 01969 { 01970 alpha[i] = CMath::min(1.0,sum_pos); 01971 sum_pos -= alpha[i]; 01972 } 01973 else 01974 { 01975 alpha[i] = CMath::min(1.0,sum_neg); 01976 sum_neg -= alpha[i]; 01977 } 01978 01979 float64_t *zeros = SG_MALLOC(float64_t, l); 01980 01981 for(i=0;i<l;i++) 01982 zeros[i] = 0; 01983 01984 Solver_NU s; 01985 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y, 01986 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 01987 float64_t r = si->r; 01988 01989 SG_SINFO("C = %f\n",1/r); 01990 01991 for(i=0;i<l;i++) 01992 alpha[i] *= y[i]/r; 01993 01994 si->rho /= r; 01995 si->obj /= (r*r); 01996 si->upper_bound_p = 1/r; 01997 si->upper_bound_n = 1/r; 01998 01999 SG_FREE(y); 02000 SG_FREE(zeros); 02001 } 02002 02003 static void solve_nu_multiclass_svc(const svm_problem *prob, 02004 const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model) 02005 { 02006 int32_t l = prob->l; 02007 float64_t nu = param->nu; 02008 02009 float64_t *alpha = SG_MALLOC(float64_t, prob->l); 02010 schar *y = SG_MALLOC(schar, l); 02011 02012 for(int32_t i=0;i<l;i++) 02013 { 02014 alpha[i] = 0; 02015 y[i]=prob->y[i]; 02016 } 02017 02018 int32_t nr_class=param->nr_class; 02019 float64_t* sum_class = SG_MALLOC(float64_t, nr_class); 02020 02021 for (int32_t j=0; j<nr_class; j++) 02022 sum_class[j] = nu*l/nr_class; 02023 02024 for(int32_t i=0;i<l;i++) 02025 { 02026 alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]); 02027 sum_class[int32_t(y[i])] -= alpha[i]; 02028 } 02029 SG_FREE(sum_class); 02030 02031 02032 float64_t *zeros = SG_MALLOC(float64_t, l); 02033 02034 for (int32_t i=0;i<l;i++) 02035 zeros[i] = 0; 02036 02037 Solver_NUMC s(nr_class, nu); 02038 SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l)); 02039 02040 s.Solve(l, Q, zeros, y, 02041 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 02042 02043 02044 int32_t* class_sv_count=SG_MALLOC(int32_t, nr_class); 02045 02046 for (int32_t i=0; i<nr_class; i++) 02047 class_sv_count[i]=0; 02048 02049 for (int32_t i=0; i<l; i++) 02050 { 02051 if (CMath::abs(alpha[i]) > 0) 02052 class_sv_count[(int32_t) y[i]]++; 02053 } 02054 02055 model->l=l; 02056 // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper 02057 model->rho = SG_MALLOC(float64_t, nr_class+1); 02058 model->nr_class = nr_class; 02059 model->label = NULL; 02060 model->SV = SG_MALLOC(svm_node*,nr_class); 02061 model->nSV = SG_MALLOC(int32_t, nr_class); 02062 model->sv_coef = SG_MALLOC(float64_t *,nr_class); 02063 model->normwcw = SG_MALLOC(float64_t,nr_class); 02064 02065 for (int32_t i=0; i<nr_class+1; i++) 02066 model->rho[i]=0; 02067 02068 model->objective = si->obj; 02069 02070 if (param->use_bias) 02071 { 02072 SG_SDEBUG("Computing biases and primal objective\n"); 02073 float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw); 02074 SG_SINFO("Primal = %10.10f\n", primal); 02075 } 02076 02077 for (int32_t i=0; i<nr_class; i++) 02078 { 02079 model->nSV[i]=class_sv_count[i]; 02080 model->SV[i] = SG_MALLOC(svm_node,class_sv_count[i]); 02081 model->sv_coef[i] = SG_MALLOC(float64_t,class_sv_count[i]); 02082 class_sv_count[i]=0; 02083 } 02084 02085 for (int32_t i=0;i<prob->l;i++) 02086 { 02087 if(fabs(alpha[i]) > 0) 02088 { 02089 model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index; 02090 model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i]; 02091 class_sv_count[(int32_t) y[i]]++; 02092 } 02093 } 02094 02095 SG_FREE(y); 02096 SG_FREE(zeros); 02097 SG_FREE(alpha); 02098 } 02099 02100 static void solve_one_class( 02101 const svm_problem *prob, const svm_parameter *param, 02102 float64_t *alpha, Solver::SolutionInfo* si) 02103 { 02104 int32_t l = prob->l; 02105 float64_t *zeros = SG_MALLOC(float64_t, l); 02106 schar *ones = SG_MALLOC(schar, l); 02107 int32_t i; 02108 02109 int32_t n = (int32_t)(param->nu*prob->l); // # of alpha's at upper bound 02110 02111 for(i=0;i<n;i++) 02112 alpha[i] = 1; 02113 if(n<prob->l) 02114 alpha[n] = param->nu * prob->l - n; 02115 for(i=n+1;i<l;i++) 02116 alpha[i] = 0; 02117 02118 for(i=0;i<l;i++) 02119 { 02120 zeros[i] = 0; 02121 ones[i] = 1; 02122 } 02123 02124 Solver s; 02125 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones, 02126 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias); 02127 02128 SG_FREE(zeros); 02129 SG_FREE(ones); 02130 } 02131 02132 static void solve_epsilon_svr( 02133 const svm_problem *prob, const svm_parameter *param, 02134 float64_t *alpha, Solver::SolutionInfo* si) 02135 { 02136 int32_t l = prob->l; 02137 float64_t *alpha2 = SG_MALLOC(float64_t, 2*l); 02138 float64_t *linear_term = SG_MALLOC(float64_t, 2*l); 02139 schar *y = SG_MALLOC(schar, 2*l); 02140 int32_t i; 02141 02142 for(i=0;i<l;i++) 02143 { 02144 alpha2[i] = 0; 02145 linear_term[i] = param->p - prob->y[i]; 02146 y[i] = 1; 02147 02148 alpha2[i+l] = 0; 02149 linear_term[i+l] = param->p + prob->y[i]; 02150 y[i+l] = -1; 02151 } 02152 02153 Solver s; 02154 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 02155 alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias); 02156 02157 float64_t sum_alpha = 0; 02158 for(i=0;i<l;i++) 02159 { 02160 alpha[i] = alpha2[i] - alpha2[i+l]; 02161 sum_alpha += fabs(alpha[i]); 02162 } 02163 SG_SINFO("nu = %f\n",sum_alpha/(param->C*l)); 02164 02165 SG_FREE(alpha2); 02166 SG_FREE(linear_term); 02167 SG_FREE(y); 02168 } 02169 02170 static void solve_nu_svr( 02171 const svm_problem *prob, const svm_parameter *param, 02172 float64_t *alpha, Solver::SolutionInfo* si) 02173 { 02174 int32_t l = prob->l; 02175 float64_t C = param->C; 02176 float64_t *alpha2 = SG_MALLOC(float64_t, 2*l); 02177 float64_t *linear_term = SG_MALLOC(float64_t, 2*l); 02178 schar *y = SG_MALLOC(schar, 2*l); 02179 int32_t i; 02180 02181 float64_t sum = C * param->nu * l / 2; 02182 for(i=0;i<l;i++) 02183 { 02184 alpha2[i] = alpha2[i+l] = CMath::min(sum,C); 02185 sum -= alpha2[i]; 02186 02187 linear_term[i] = - prob->y[i]; 02188 y[i] = 1; 02189 02190 linear_term[i+l] = prob->y[i]; 02191 y[i+l] = -1; 02192 } 02193 02194 Solver_NU s; 02195 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y, 02196 alpha2, C, C, param->eps, si, param->shrinking, param->use_bias); 02197 02198 SG_SINFO("epsilon = %f\n",-si->r); 02199 02200 for(i=0;i<l;i++) 02201 alpha[i] = alpha2[i] - alpha2[i+l]; 02202 02203 SG_FREE(alpha2); 02204 SG_FREE(linear_term); 02205 SG_FREE(y); 02206 } 02207 02208 // 02209 // decision_function 02210 // 02211 struct decision_function 02212 { 02213 float64_t *alpha; 02214 float64_t rho; 02215 float64_t objective; 02216 }; 02217 02218 decision_function svm_train_one( 02219 const svm_problem *prob, const svm_parameter *param, 02220 float64_t Cp, float64_t Cn) 02221 { 02222 float64_t *alpha = SG_MALLOC(float64_t, prob->l); 02223 Solver::SolutionInfo si; 02224 switch(param->svm_type) 02225 { 02226 case C_SVC: 02227 solve_c_svc(prob,param,alpha,&si,Cp,Cn); 02228 break; 02229 case NU_SVC: 02230 solve_nu_svc(prob,param,alpha,&si); 02231 break; 02232 case ONE_CLASS: 02233 solve_one_class(prob,param,alpha,&si); 02234 break; 02235 case EPSILON_SVR: 02236 solve_epsilon_svr(prob,param,alpha,&si); 02237 break; 02238 case NU_SVR: 02239 solve_nu_svr(prob,param,alpha,&si); 02240 break; 02241 } 02242 02243 SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho); 02244 02245 // output SVs 02246 if (param->svm_type != ONE_CLASS) 02247 { 02248 int32_t nSV = 0; 02249 int32_t nBSV = 0; 02250 for(int32_t i=0;i<prob->l;i++) 02251 { 02252 if(fabs(alpha[i]) > 0) 02253 { 02254 ++nSV; 02255 if(prob->y[i] > 0) 02256 { 02257 if(fabs(alpha[i]) >= si.upper_bound_p) 02258 ++nBSV; 02259 } 02260 else 02261 { 02262 if(fabs(alpha[i]) >= si.upper_bound_n) 02263 ++nBSV; 02264 } 02265 } 02266 } 02267 SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV); 02268 } 02269 02270 decision_function f; 02271 f.alpha = alpha; 02272 f.rho = si.rho; 02273 f.objective=si.obj; 02274 return f; 02275 } 02276 02277 // 02278 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data 02279 // perm, length l, must be allocated before calling this subroutine 02280 void svm_group_classes( 02281 const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret, 02282 int32_t **start_ret, int32_t **count_ret, int32_t *perm) 02283 { 02284 int32_t l = prob->l; 02285 int32_t max_nr_class = 16; 02286 int32_t nr_class = 0; 02287 int32_t *label = SG_MALLOC(int32_t, max_nr_class); 02288 int32_t *count = SG_MALLOC(int32_t, max_nr_class); 02289 int32_t *data_label = SG_MALLOC(int32_t, l); 02290 int32_t i; 02291 02292 for(i=0;i<l;i++) 02293 { 02294 int32_t this_label=(int32_t) prob->y[i]; 02295 int32_t j; 02296 for(j=0;j<nr_class;j++) 02297 { 02298 if(this_label == label[j]) 02299 { 02300 ++count[j]; 02301 break; 02302 } 02303 } 02304 data_label[i] = j; 02305 if(j == nr_class) 02306 { 02307 if(nr_class == max_nr_class) 02308 { 02309 max_nr_class *= 2; 02310 label=SG_REALLOC(int32_t, label,max_nr_class); 02311 count=SG_REALLOC(int32_t, count,max_nr_class); 02312 } 02313 label[nr_class] = this_label; 02314 count[nr_class] = 1; 02315 ++nr_class; 02316 } 02317 } 02318 02319 int32_t *start = SG_MALLOC(int32_t, nr_class); 02320 start[0] = 0; 02321 for(i=1;i<nr_class;i++) 02322 start[i] = start[i-1]+count[i-1]; 02323 for(i=0;i<l;i++) 02324 { 02325 perm[start[data_label[i]]] = i; 02326 ++start[data_label[i]]; 02327 } 02328 start[0] = 0; 02329 for(i=1;i<nr_class;i++) 02330 start[i] = start[i-1]+count[i-1]; 02331 02332 *nr_class_ret = nr_class; 02333 *label_ret = label; 02334 *start_ret = start; 02335 *count_ret = count; 02336 SG_FREE(data_label); 02337 } 02338 02339 // 02340 // Interface functions 02341 // 02342 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param) 02343 { 02344 svm_model *model = SG_MALLOC(svm_model,1); 02345 model->param = *param; 02346 model->free_sv = 0; // XXX 02347 02348 if(param->svm_type == ONE_CLASS || 02349 param->svm_type == EPSILON_SVR || 02350 param->svm_type == NU_SVR) 02351 { 02352 SG_SINFO("training one class svm or doing epsilon sv regression\n"); 02353 02354 // regression or one-class-svm 02355 model->nr_class = 2; 02356 model->label = NULL; 02357 model->nSV = NULL; 02358 model->sv_coef = SG_MALLOC(float64_t *,1); 02359 decision_function f = svm_train_one(prob,param,0,0); 02360 model->rho = SG_MALLOC(float64_t, 1); 02361 model->rho[0] = f.rho; 02362 model->objective = f.objective; 02363 02364 int32_t nSV = 0; 02365 int32_t i; 02366 for(i=0;i<prob->l;i++) 02367 if(fabs(f.alpha[i]) > 0) ++nSV; 02368 model->l = nSV; 02369 model->SV = SG_MALLOC(svm_node *,nSV); 02370 model->sv_coef[0] = SG_MALLOC(float64_t, nSV); 02371 int32_t j = 0; 02372 for(i=0;i<prob->l;i++) 02373 if(fabs(f.alpha[i]) > 0) 02374 { 02375 model->SV[j] = prob->x[i]; 02376 model->sv_coef[0][j] = f.alpha[i]; 02377 ++j; 02378 } 02379 02380 SG_FREE(f.alpha); 02381 } 02382 else if(param->svm_type == NU_MULTICLASS_SVC) 02383 { 02384 Solver::SolutionInfo si; 02385 solve_nu_multiclass_svc(prob,param,&si,model); 02386 SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho); 02387 } 02388 else 02389 { 02390 // classification 02391 int32_t l = prob->l; 02392 int32_t nr_class; 02393 int32_t *label = NULL; 02394 int32_t *start = NULL; 02395 int32_t *count = NULL; 02396 int32_t *perm = SG_MALLOC(int32_t, l); 02397 02398 // group training data of the same class 02399 svm_group_classes(prob,&nr_class,&label,&start,&count,perm); 02400 svm_node **x = SG_MALLOC(svm_node *,l); 02401 float64_t *C = SG_MALLOC(float64_t,l); 02402 float64_t *pv = SG_MALLOC(float64_t,l); 02403 02404 02405 int32_t i; 02406 for(i=0;i<l;i++) { 02407 x[i] = prob->x[perm[i]]; 02408 C[i] = prob->C[perm[i]]; 02409 02410 if (prob->pv) 02411 { 02412 pv[i] = prob->pv[perm[i]]; 02413 } 02414 else 02415 { 02416 //no custom linear term is set 02417 pv[i] = -1.0; 02418 } 02419 02420 } 02421 02422 02423 // calculate weighted C 02424 float64_t *weighted_C = SG_MALLOC(float64_t, nr_class); 02425 for(i=0;i<nr_class;i++) 02426 weighted_C[i] = param->C; 02427 for(i=0;i<param->nr_weight;i++) 02428 { 02429 int32_t j; 02430 for(j=0;j<nr_class;j++) 02431 if(param->weight_label[i] == label[j]) 02432 break; 02433 if(j == nr_class) 02434 SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]); 02435 else 02436 weighted_C[j] *= param->weight[i]; 02437 } 02438 02439 // train k*(k-1)/2 models 02440 02441 bool *nonzero = SG_MALLOC(bool,l); 02442 for(i=0;i<l;i++) 02443 nonzero[i] = false; 02444 decision_function *f = SG_MALLOC(decision_function,nr_class*(nr_class-1)/2); 02445 02446 int32_t p = 0; 02447 for(i=0;i<nr_class;i++) 02448 for(int32_t j=i+1;j<nr_class;j++) 02449 { 02450 svm_problem sub_prob; 02451 int32_t si = start[i], sj = start[j]; 02452 int32_t ci = count[i], cj = count[j]; 02453 sub_prob.l = ci+cj; 02454 sub_prob.x = SG_MALLOC(svm_node *,sub_prob.l); 02455 sub_prob.y = SG_MALLOC(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err 02456 sub_prob.C = SG_MALLOC(float64_t,sub_prob.l+1); 02457 sub_prob.pv = SG_MALLOC(float64_t,sub_prob.l+1); 02458 02459 int32_t k; 02460 for(k=0;k<ci;k++) 02461 { 02462 sub_prob.x[k] = x[si+k]; 02463 sub_prob.y[k] = +1; 02464 sub_prob.C[k] = C[si+k]; 02465 sub_prob.pv[k] = pv[si+k]; 02466 02467 } 02468 for(k=0;k<cj;k++) 02469 { 02470 sub_prob.x[ci+k] = x[sj+k]; 02471 sub_prob.y[ci+k] = -1; 02472 sub_prob.C[ci+k] = C[sj+k]; 02473 sub_prob.pv[ci+k] = pv[sj+k]; 02474 } 02475 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err 02476 sub_prob.C[sub_prob.l]=-1; 02477 sub_prob.pv[sub_prob.l]=-1; 02478 02479 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]); 02480 for(k=0;k<ci;k++) 02481 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0) 02482 nonzero[si+k] = true; 02483 for(k=0;k<cj;k++) 02484 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0) 02485 nonzero[sj+k] = true; 02486 SG_FREE(sub_prob.x); 02487 SG_FREE(sub_prob.y); 02488 SG_FREE(sub_prob.C); 02489 SG_FREE(sub_prob.pv); 02490 ++p; 02491 } 02492 02493 // build output 02494 02495 model->objective = f[0].objective; 02496 model->nr_class = nr_class; 02497 02498 model->label = SG_MALLOC(int32_t, nr_class); 02499 for(i=0;i<nr_class;i++) 02500 model->label[i] = label[i]; 02501 02502 model->rho = SG_MALLOC(float64_t, nr_class*(nr_class-1)/2); 02503 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02504 model->rho[i] = f[i].rho; 02505 02506 int32_t total_sv = 0; 02507 int32_t *nz_count = SG_MALLOC(int32_t, nr_class); 02508 model->nSV = SG_MALLOC(int32_t, nr_class); 02509 for(i=0;i<nr_class;i++) 02510 { 02511 int32_t nSV = 0; 02512 for(int32_t j=0;j<count[i];j++) 02513 if(nonzero[start[i]+j]) 02514 { 02515 ++nSV; 02516 ++total_sv; 02517 } 02518 model->nSV[i] = nSV; 02519 nz_count[i] = nSV; 02520 } 02521 02522 SG_SINFO("Total nSV = %d\n",total_sv); 02523 02524 model->l = total_sv; 02525 model->SV = SG_MALLOC(svm_node *,total_sv); 02526 p = 0; 02527 for(i=0;i<l;i++) 02528 if(nonzero[i]) model->SV[p++] = x[i]; 02529 02530 int32_t *nz_start = SG_MALLOC(int32_t, nr_class); 02531 nz_start[0] = 0; 02532 for(i=1;i<nr_class;i++) 02533 nz_start[i] = nz_start[i-1]+nz_count[i-1]; 02534 02535 model->sv_coef = SG_MALLOC(float64_t *,nr_class-1); 02536 for(i=0;i<nr_class-1;i++) 02537 model->sv_coef[i] = SG_MALLOC(float64_t, total_sv); 02538 02539 p = 0; 02540 for(i=0;i<nr_class;i++) 02541 for(int32_t j=i+1;j<nr_class;j++) 02542 { 02543 // classifier (i,j): coefficients with 02544 // i are in sv_coef[j-1][nz_start[i]...], 02545 // j are in sv_coef[i][nz_start[j]...] 02546 02547 int32_t si = start[i]; 02548 int32_t sj = start[j]; 02549 int32_t ci = count[i]; 02550 int32_t cj = count[j]; 02551 02552 int32_t q = nz_start[i]; 02553 int32_t k; 02554 for(k=0;k<ci;k++) 02555 if(nonzero[si+k]) 02556 model->sv_coef[j-1][q++] = f[p].alpha[k]; 02557 q = nz_start[j]; 02558 for(k=0;k<cj;k++) 02559 if(nonzero[sj+k]) 02560 model->sv_coef[i][q++] = f[p].alpha[ci+k]; 02561 ++p; 02562 } 02563 02564 SG_FREE(label); 02565 SG_FREE(count); 02566 SG_FREE(perm); 02567 SG_FREE(start); 02568 SG_FREE(x); 02569 SG_FREE(C); 02570 SG_FREE(pv); 02571 SG_FREE(weighted_C); 02572 SG_FREE(nonzero); 02573 for(i=0;i<nr_class*(nr_class-1)/2;i++) 02574 SG_FREE(f[i].alpha); 02575 SG_FREE(f); 02576 SG_FREE(nz_count); 02577 SG_FREE(nz_start); 02578 } 02579 return model; 02580 } 02581 02582 void svm_destroy_model(svm_model* model) 02583 { 02584 if(model->free_sv && model->l > 0) 02585 SG_FREE((void *)(model->SV[0])); 02586 for(int32_t i=0;i<model->nr_class-1;i++) 02587 SG_FREE(model->sv_coef[i]); 02588 SG_FREE(model->SV); 02589 SG_FREE(model->sv_coef); 02590 SG_FREE(model->rho); 02591 SG_FREE(model->label); 02592 SG_FREE(model->nSV); 02593 SG_FREE(model); 02594 } 02595 02596 void svm_destroy_param(svm_parameter* param) 02597 { 02598 SG_FREE(param->weight_label); 02599 SG_FREE(param->weight); 02600 } 02601 02602 const char *svm_check_parameter( 02603 const svm_problem *prob, const svm_parameter *param) 02604 { 02605 // svm_type 02606 02607 int32_t svm_type = param->svm_type; 02608 if(svm_type != C_SVC && 02609 svm_type != NU_SVC && 02610 svm_type != ONE_CLASS && 02611 svm_type != EPSILON_SVR && 02612 svm_type != NU_SVR && 02613 svm_type != NU_MULTICLASS_SVC) 02614 return "unknown svm type"; 02615 02616 // kernel_type, degree 02617 02618 int32_t kernel_type = param->kernel_type; 02619 if(kernel_type != LINEAR && 02620 kernel_type != POLY && 02621 kernel_type != RBF && 02622 kernel_type != SIGMOID && 02623 kernel_type != PRECOMPUTED) 02624 return "unknown kernel type"; 02625 02626 if(param->degree < 0) 02627 return "degree of polynomial kernel < 0"; 02628 02629 // cache_size,eps,C,nu,p,shrinking 02630 02631 if(param->cache_size <= 0) 02632 return "cache_size <= 0"; 02633 02634 if(param->eps <= 0) 02635 return "eps <= 0"; 02636 02637 if(svm_type == C_SVC || 02638 svm_type == EPSILON_SVR || 02639 svm_type == NU_SVR) 02640 if(param->C <= 0) 02641 return "C <= 0"; 02642 02643 if(svm_type == NU_SVC || 02644 svm_type == ONE_CLASS || 02645 svm_type == NU_SVR) 02646 if(param->nu <= 0 || param->nu > 1) 02647 return "nu <= 0 or nu > 1"; 02648 02649 if(svm_type == EPSILON_SVR) 02650 if(param->p < 0) 02651 return "p < 0"; 02652 02653 if(param->shrinking != 0 && 02654 param->shrinking != 1) 02655 return "shrinking != 0 and shrinking != 1"; 02656 02657 02658 // check whether nu-svc is feasible 02659 02660 if(svm_type == NU_SVC) 02661 { 02662 int32_t l = prob->l; 02663 int32_t max_nr_class = 16; 02664 int32_t nr_class = 0; 02665 int32_t *label = SG_MALLOC(int32_t, max_nr_class); 02666 int32_t *count = SG_MALLOC(int32_t, max_nr_class); 02667 02668 int32_t i; 02669 for(i=0;i<l;i++) 02670 { 02671 int32_t this_label = (int32_t) prob->y[i]; 02672 int32_t j; 02673 for(j=0;j<nr_class;j++) 02674 if(this_label == label[j]) 02675 { 02676 ++count[j]; 02677 break; 02678 } 02679 if(j == nr_class) 02680 { 02681 if(nr_class == max_nr_class) 02682 { 02683 max_nr_class *= 2; 02684 label=SG_REALLOC(int32_t, label, max_nr_class); 02685 count=SG_REALLOC(int32_t, count, max_nr_class); 02686 } 02687 label[nr_class] = this_label; 02688 count[nr_class] = 1; 02689 ++nr_class; 02690 } 02691 } 02692 02693 for(i=0;i<nr_class;i++) 02694 { 02695 int32_t n1 = count[i]; 02696 for(int32_t j=i+1;j<nr_class;j++) 02697 { 02698 int32_t n2 = count[j]; 02699 if(param->nu*(n1+n2)/2 > CMath::min(n1,n2)) 02700 { 02701 SG_FREE(label); 02702 SG_FREE(count); 02703 return "specified nu is infeasible"; 02704 } 02705 } 02706 } 02707 SG_FREE(label); 02708 SG_FREE(count); 02709 } 02710 02711 return NULL; 02712 } 02713 } 02714 #endif // DOXYGEN_SHOULD_SKIP_THIS