SHOGUN
v2.0.0
|
00001 /* 00002 * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS 00003 * COPYRIGHT (C) 1999 UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED 00004 * 00005 * this program is free software; you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation; either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * Written (W) 1999-2009 Soeren Sonnenburg 00011 * Written (W) 1999-2008 Gunnar Raetsch 00012 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00013 */ 00014 00015 #include <shogun/lib/config.h> 00016 #include <shogun/lib/common.h> 00017 #include <shogun/io/SGIO.h> 00018 #include <shogun/io/File.h> 00019 #include <shogun/lib/Time.h> 00020 #include <shogun/lib/Signal.h> 00021 00022 #include <shogun/base/Parallel.h> 00023 00024 #include <shogun/kernel/Kernel.h> 00025 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h> 00026 #include <shogun/features/Features.h> 00027 #include <shogun/base/Parameter.h> 00028 00029 #include <shogun/classifier/svm/SVM.h> 00030 00031 #include <string.h> 00032 #include <unistd.h> 00033 #include <math.h> 00034 00035 #ifdef HAVE_PTHREAD 00036 #include <pthread.h> 00037 #endif 00038 00039 using namespace shogun; 00040 00041 CKernel::CKernel() : CSGObject() 00042 { 00043 init(); 00044 register_params(); 00045 } 00046 00047 CKernel::CKernel(int32_t size) : CSGObject() 00048 { 00049 init(); 00050 00051 if (size<10) 00052 size=10; 00053 00054 cache_size=size; 00055 register_params(); 00056 } 00057 00058 00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject() 00060 { 00061 init(); 00062 00063 if (size<10) 00064 size=10; 00065 00066 cache_size=size; 00067 00068 set_normalizer(new CIdentityKernelNormalizer()); 00069 init(p_lhs, p_rhs); 00070 register_params(); 00071 } 00072 00073 CKernel::~CKernel() 00074 { 00075 if (get_is_initialized()) 00076 SG_ERROR("Kernel still initialized on destruction.\n"); 00077 00078 remove_lhs_and_rhs(); 00079 SG_UNREF(normalizer); 00080 00081 SG_INFO("Kernel deleted (%p).\n", this); 00082 } 00083 00084 #ifdef USE_SVMLIGHT 00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack) 00086 { 00087 if (size<10) 00088 size=10; 00089 00090 kernel_cache_cleanup(); 00091 cache_size=size; 00092 00093 if (has_features() && get_num_vec_lhs()) 00094 kernel_cache_init(cache_size, regression_hack); 00095 } 00096 #endif //USE_SVMLIGHT 00097 00098 bool CKernel::init(CFeatures* l, CFeatures* r) 00099 { 00100 /* make sure that features are not deleted if same ones are used */ 00101 SG_REF(l); 00102 SG_REF(r); 00103 00104 //make sure features were indeed supplied 00105 ASSERT(l); 00106 ASSERT(r); 00107 00108 //make sure features are compatible 00109 ASSERT(l->get_feature_class()==r->get_feature_class()); 00110 ASSERT(l->get_feature_type()==r->get_feature_type()); 00111 00112 //remove references to previous features 00113 remove_lhs_and_rhs(); 00114 00115 //increase reference counts 00116 SG_REF(l); 00117 if (l==r) 00118 lhs_equals_rhs=true; 00119 else // l!=r 00120 SG_REF(r); 00121 00122 lhs=l; 00123 rhs=r; 00124 00125 ASSERT(!num_lhs || num_lhs==l->get_num_vectors()); 00126 ASSERT(!num_rhs || num_rhs==l->get_num_vectors()); 00127 00128 num_lhs=l->get_num_vectors(); 00129 num_rhs=r->get_num_vectors(); 00130 00131 /* unref "safety" refs from beginning */ 00132 SG_UNREF(r); 00133 SG_UNREF(l); 00134 00135 return true; 00136 } 00137 00138 bool CKernel::set_normalizer(CKernelNormalizer* n) 00139 { 00140 SG_REF(n); 00141 if (lhs && rhs) 00142 n->init(this); 00143 00144 SG_UNREF(normalizer); 00145 normalizer=n; 00146 00147 return (normalizer!=NULL); 00148 } 00149 00150 CKernelNormalizer* CKernel::get_normalizer() 00151 { 00152 SG_REF(normalizer) 00153 return normalizer; 00154 } 00155 00156 bool CKernel::init_normalizer() 00157 { 00158 return normalizer->init(this); 00159 } 00160 00161 void CKernel::cleanup() 00162 { 00163 remove_lhs_and_rhs(); 00164 } 00165 00166 #ifdef USE_SVMLIGHT 00167 /****************************** Cache handling *******************************/ 00168 00169 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack) 00170 { 00171 int32_t totdoc=get_num_vec_lhs(); 00172 if (totdoc<=0) 00173 { 00174 SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n", 00175 get_num_vec_lhs(), get_num_vec_rhs()); 00176 } 00177 uint64_t buffer_size=0; 00178 int32_t i; 00179 00180 //in regression the additional constraints are made by doubling the training data 00181 if (regression_hack) 00182 totdoc*=2; 00183 00184 buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM); 00185 if (buffer_size>((uint64_t) totdoc)*totdoc) 00186 buffer_size=((uint64_t) totdoc)*totdoc; 00187 00188 SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name()); 00189 00190 //make sure it fits in the *signed* KERNELCACHE_IDX type 00191 ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1))); 00192 00193 kernel_cache.index = SG_MALLOC(int32_t, totdoc); 00194 kernel_cache.occu = SG_MALLOC(int32_t, totdoc); 00195 kernel_cache.lru = SG_MALLOC(int32_t, totdoc); 00196 kernel_cache.invindex = SG_MALLOC(int32_t, totdoc); 00197 kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc); 00198 kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc); 00199 kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size); 00200 kernel_cache.buffsize=buffer_size; 00201 kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc); 00202 00203 if(kernel_cache.max_elems>totdoc) { 00204 kernel_cache.max_elems=totdoc; 00205 } 00206 00207 kernel_cache.elems=0; // initialize cache 00208 for(i=0;i<totdoc;i++) { 00209 kernel_cache.index[i]=-1; 00210 kernel_cache.lru[i]=0; 00211 } 00212 for(i=0;i<totdoc;i++) { 00213 kernel_cache.occu[i]=0; 00214 kernel_cache.invindex[i]=-1; 00215 } 00216 00217 kernel_cache.activenum=totdoc;; 00218 for(i=0;i<totdoc;i++) { 00219 kernel_cache.active2totdoc[i]=i; 00220 kernel_cache.totdoc2active[i]=i; 00221 } 00222 00223 kernel_cache.time=0; 00224 } 00225 00226 void CKernel::get_kernel_row( 00227 int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line) 00228 { 00229 int32_t i,j; 00230 KERNELCACHE_IDX start; 00231 00232 int32_t num_vectors = get_num_vec_lhs(); 00233 if (docnum>=num_vectors) 00234 docnum=2*num_vectors-1-docnum; 00235 00236 /* is cached? */ 00237 if(kernel_cache.index[docnum] != -1) 00238 { 00239 kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */ 00240 start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum]; 00241 00242 if (full_line) 00243 { 00244 for(j=0;j<get_num_vec_lhs();j++) 00245 { 00246 if(kernel_cache.totdoc2active[j] >= 0) 00247 buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]]; 00248 else 00249 buffer[j]=(float64_t) kernel(docnum, j); 00250 } 00251 } 00252 else 00253 { 00254 for(i=0;(j=active2dnum[i])>=0;i++) 00255 { 00256 if(kernel_cache.totdoc2active[j] >= 0) 00257 buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]]; 00258 else 00259 { 00260 int32_t k=j; 00261 if (k>=num_vectors) 00262 k=2*num_vectors-1-k; 00263 buffer[j]=(float64_t) kernel(docnum, k); 00264 } 00265 } 00266 } 00267 } 00268 else 00269 { 00270 if (full_line) 00271 { 00272 for(j=0;j<get_num_vec_lhs();j++) 00273 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j); 00274 } 00275 else 00276 { 00277 for(i=0;(j=active2dnum[i])>=0;i++) 00278 { 00279 int32_t k=j; 00280 if (k>=num_vectors) 00281 k=2*num_vectors-1-k; 00282 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k); 00283 } 00284 } 00285 } 00286 } 00287 00288 00289 // Fills cache for the row m 00290 void CKernel::cache_kernel_row(int32_t m) 00291 { 00292 register int32_t j,k,l; 00293 register KERNELCACHE_ELEM *cache; 00294 00295 int32_t num_vectors = get_num_vec_lhs(); 00296 00297 if (m>=num_vectors) 00298 m=2*num_vectors-1-m; 00299 00300 if(!kernel_cache_check(m)) // not cached yet 00301 { 00302 cache = kernel_cache_clean_and_malloc(m); 00303 if(cache) { 00304 l=kernel_cache.totdoc2active[m]; 00305 00306 for(j=0;j<kernel_cache.activenum;j++) // fill cache 00307 { 00308 k=kernel_cache.active2totdoc[j]; 00309 00310 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) { 00311 cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum) 00312 *kernel_cache.index[k]+l]; 00313 } 00314 else 00315 { 00316 if (k>=num_vectors) 00317 k=2*num_vectors-1-k; 00318 00319 cache[j]=kernel(m, k); 00320 } 00321 } 00322 } 00323 else 00324 perror("Error: Kernel cache full! => increase cache size"); 00325 } 00326 } 00327 00328 00329 void* CKernel::cache_multiple_kernel_row_helper(void* p) 00330 { 00331 int32_t j,k,l; 00332 S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p; 00333 00334 for (int32_t i=params->start; i<params->end; i++) 00335 { 00336 KERNELCACHE_ELEM* cache=params->cache[i]; 00337 int32_t m = params->uncached_rows[i]; 00338 l=params->kernel_cache->totdoc2active[m]; 00339 00340 for(j=0;j<params->kernel_cache->activenum;j++) // fill cache 00341 { 00342 k=params->kernel_cache->active2totdoc[j]; 00343 00344 if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) { 00345 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum) 00346 *params->kernel_cache->index[k]+l]; 00347 } 00348 else 00349 { 00350 if (k>=params->num_vectors) 00351 k=2*params->num_vectors-1-k; 00352 00353 cache[j]=params->kernel->kernel(m, k); 00354 } 00355 } 00356 00357 //now line m is cached 00358 params->needs_computation[m]=0; 00359 } 00360 return NULL; 00361 } 00362 00363 // Fills cache for the rows in key 00364 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows) 00365 { 00366 #ifdef HAVE_PTHREAD 00367 int32_t nthreads=parallel->get_num_threads(); 00368 00369 if (nthreads<2) 00370 { 00371 #endif 00372 for(int32_t i=0;i<num_rows;i++) 00373 cache_kernel_row(rows[i]); 00374 #ifdef HAVE_PTHREAD 00375 } 00376 else 00377 { 00378 // fill up kernel cache 00379 int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows); 00380 KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows); 00381 pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1); 00382 S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1); 00383 int32_t num_threads=nthreads-1; 00384 int32_t num_vec=get_num_vec_lhs(); 00385 ASSERT(num_vec>0); 00386 uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec); 00387 00388 int32_t step=0; 00389 int32_t num=0; 00390 int32_t end=0; 00391 00392 // allocate cachelines if necessary 00393 for (int32_t i=0; i<num_rows; i++) 00394 { 00395 int32_t idx=rows[i]; 00396 if (idx>=num_vec) 00397 idx=2*num_vec-1-idx; 00398 00399 if (kernel_cache_check(idx)) 00400 continue; 00401 00402 needs_computation[idx]=1; 00403 uncached_rows[num]=idx; 00404 cache[num]= kernel_cache_clean_and_malloc(idx); 00405 00406 if (!cache[num]) 00407 SG_ERROR("Kernel cache full! => increase cache size\n"); 00408 00409 num++; 00410 } 00411 00412 if (num>0) 00413 { 00414 step= num/nthreads; 00415 00416 if (step<1) 00417 { 00418 num_threads=num-1; 00419 step=1; 00420 } 00421 00422 for (int32_t t=0; t<num_threads; t++) 00423 { 00424 params[t].kernel = this; 00425 params[t].kernel_cache = &kernel_cache; 00426 params[t].cache = cache; 00427 params[t].uncached_rows = uncached_rows; 00428 params[t].needs_computation = needs_computation; 00429 params[t].num_uncached = num; 00430 params[t].start = t*step; 00431 params[t].end = (t+1)*step; 00432 params[t].num_vectors = get_num_vec_lhs(); 00433 end=params[t].end; 00434 00435 int code=pthread_create(&threads[t], NULL, 00436 CKernel::cache_multiple_kernel_row_helper, (void*)¶ms[t]); 00437 00438 if (code != 0) 00439 { 00440 SG_WARNING("Thread creation failed (thread %d of %d) " 00441 "with error:'%s'\n",t, num_threads, strerror(code)); 00442 num_threads=t; 00443 end=t*step; 00444 break; 00445 } 00446 } 00447 } 00448 else 00449 num_threads=-1; 00450 00451 00452 S_KTHREAD_PARAM last_param; 00453 last_param.kernel = this; 00454 last_param.kernel_cache = &kernel_cache; 00455 last_param.cache = cache; 00456 last_param.uncached_rows = uncached_rows; 00457 last_param.needs_computation = needs_computation; 00458 last_param.start = end; 00459 last_param.num_uncached = num; 00460 last_param.end = num; 00461 last_param.num_vectors = get_num_vec_lhs(); 00462 00463 cache_multiple_kernel_row_helper(&last_param); 00464 00465 00466 for (int32_t t=0; t<num_threads; t++) 00467 { 00468 if (pthread_join(threads[t], NULL) != 0) 00469 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads); 00470 } 00471 00472 SG_FREE(needs_computation); 00473 SG_FREE(params); 00474 SG_FREE(threads); 00475 SG_FREE(cache); 00476 SG_FREE(uncached_rows); 00477 } 00478 #endif 00479 } 00480 00481 // remove numshrink columns in the cache 00482 // which correspond to examples marked 00483 void CKernel::kernel_cache_shrink( 00484 int32_t totdoc, int32_t numshrink, int32_t *after) 00485 { 00486 register int32_t i,j,jj,scount; // 0 in after. 00487 KERNELCACHE_IDX from=0,to=0; 00488 int32_t *keep; 00489 00490 keep=SG_MALLOC(int32_t, totdoc); 00491 for(j=0;j<totdoc;j++) { 00492 keep[j]=1; 00493 } 00494 scount=0; 00495 for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) { 00496 j=kernel_cache.active2totdoc[jj]; 00497 if(!after[j]) { 00498 scount++; 00499 keep[j]=0; 00500 } 00501 } 00502 00503 for(i=0;i<kernel_cache.max_elems;i++) { 00504 for(jj=0;jj<kernel_cache.activenum;jj++) { 00505 j=kernel_cache.active2totdoc[jj]; 00506 if(!keep[j]) { 00507 from++; 00508 } 00509 else { 00510 kernel_cache.buffer[to]=kernel_cache.buffer[from]; 00511 to++; 00512 from++; 00513 } 00514 } 00515 } 00516 00517 kernel_cache.activenum=0; 00518 for(j=0;j<totdoc;j++) { 00519 if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) { 00520 kernel_cache.active2totdoc[kernel_cache.activenum]=j; 00521 kernel_cache.totdoc2active[j]=kernel_cache.activenum; 00522 kernel_cache.activenum++; 00523 } 00524 else { 00525 kernel_cache.totdoc2active[j]=-1; 00526 } 00527 } 00528 00529 kernel_cache.max_elems= 00530 (int32_t)(kernel_cache.buffsize/kernel_cache.activenum); 00531 if(kernel_cache.max_elems>totdoc) { 00532 kernel_cache.max_elems=totdoc; 00533 } 00534 00535 SG_FREE(keep); 00536 00537 } 00538 00539 void CKernel::kernel_cache_reset_lru() 00540 { 00541 int32_t maxlru=0,k; 00542 00543 for(k=0;k<kernel_cache.max_elems;k++) { 00544 if(maxlru < kernel_cache.lru[k]) 00545 maxlru=kernel_cache.lru[k]; 00546 } 00547 for(k=0;k<kernel_cache.max_elems;k++) { 00548 kernel_cache.lru[k]-=maxlru; 00549 } 00550 } 00551 00552 void CKernel::kernel_cache_cleanup() 00553 { 00554 SG_FREE(kernel_cache.index); 00555 SG_FREE(kernel_cache.occu); 00556 SG_FREE(kernel_cache.lru); 00557 SG_FREE(kernel_cache.invindex); 00558 SG_FREE(kernel_cache.active2totdoc); 00559 SG_FREE(kernel_cache.totdoc2active); 00560 SG_FREE(kernel_cache.buffer); 00561 memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE)); 00562 } 00563 00564 int32_t CKernel::kernel_cache_malloc() 00565 { 00566 int32_t i; 00567 00568 if(kernel_cache_space_available()) { 00569 for(i=0;i<kernel_cache.max_elems;i++) { 00570 if(!kernel_cache.occu[i]) { 00571 kernel_cache.occu[i]=1; 00572 kernel_cache.elems++; 00573 return(i); 00574 } 00575 } 00576 } 00577 return(-1); 00578 } 00579 00580 void CKernel::kernel_cache_free(int32_t cacheidx) 00581 { 00582 kernel_cache.occu[cacheidx]=0; 00583 kernel_cache.elems--; 00584 } 00585 00586 // remove least recently used cache 00587 // element 00588 int32_t CKernel::kernel_cache_free_lru() 00589 { 00590 register int32_t k,least_elem=-1,least_time; 00591 00592 least_time=kernel_cache.time+1; 00593 for(k=0;k<kernel_cache.max_elems;k++) { 00594 if(kernel_cache.invindex[k] != -1) { 00595 if(kernel_cache.lru[k]<least_time) { 00596 least_time=kernel_cache.lru[k]; 00597 least_elem=k; 00598 } 00599 } 00600 } 00601 00602 if(least_elem != -1) { 00603 kernel_cache_free(least_elem); 00604 kernel_cache.index[kernel_cache.invindex[least_elem]]=-1; 00605 kernel_cache.invindex[least_elem]=-1; 00606 return(1); 00607 } 00608 return(0); 00609 } 00610 00611 // Get a free cache entry. In case cache is full, the lru 00612 // element is removed. 00613 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx) 00614 { 00615 int32_t result; 00616 if((result = kernel_cache_malloc()) == -1) { 00617 if(kernel_cache_free_lru()) { 00618 result = kernel_cache_malloc(); 00619 } 00620 } 00621 kernel_cache.index[cacheidx]=result; 00622 if(result == -1) { 00623 return(0); 00624 } 00625 kernel_cache.invindex[result]=cacheidx; 00626 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru 00627 return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]]; 00628 } 00629 #endif //USE_SVMLIGHT 00630 00631 void CKernel::load(CFile* loader) 00632 { 00633 SG_SET_LOCALE_C; 00634 SG_RESET_LOCALE; 00635 } 00636 00637 void CKernel::save(CFile* writer) 00638 { 00639 SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>(); 00640 SG_SET_LOCALE_C; 00641 writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols); 00642 SG_RESET_LOCALE; 00643 } 00644 00645 void CKernel::remove_lhs_and_rhs() 00646 { 00647 if (rhs!=lhs) 00648 SG_UNREF(rhs); 00649 rhs = NULL; 00650 num_rhs=0; 00651 00652 SG_UNREF(lhs); 00653 lhs = NULL; 00654 num_lhs=0; 00655 lhs_equals_rhs=false; 00656 00657 #ifdef USE_SVMLIGHT 00658 cache_reset(); 00659 #endif //USE_SVMLIGHT 00660 } 00661 00662 void CKernel::remove_lhs() 00663 { 00664 if (rhs==lhs) 00665 rhs=NULL; 00666 SG_UNREF(lhs); 00667 lhs = NULL; 00668 num_lhs=0; 00669 lhs_equals_rhs=false; 00670 #ifdef USE_SVMLIGHT 00671 cache_reset(); 00672 #endif //USE_SVMLIGHT 00673 } 00674 00676 void CKernel::remove_rhs() 00677 { 00678 if (rhs!=lhs) 00679 SG_UNREF(rhs); 00680 rhs = NULL; 00681 num_rhs=0; 00682 lhs_equals_rhs=false; 00683 00684 #ifdef USE_SVMLIGHT 00685 cache_reset(); 00686 #endif //USE_SVMLIGHT 00687 } 00688 00689 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break; 00690 00691 void CKernel::list_kernel() 00692 { 00693 SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(), 00694 get_combined_kernel_weight(), 00695 get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" : 00696 "SLOWBUTMEMEFFICIENT"); 00697 00698 switch (get_kernel_type()) 00699 { 00700 ENUM_CASE(K_UNKNOWN) 00701 ENUM_CASE(K_LINEAR) 00702 ENUM_CASE(K_POLY) 00703 ENUM_CASE(K_GAUSSIAN) 00704 ENUM_CASE(K_GAUSSIANSHIFT) 00705 ENUM_CASE(K_GAUSSIANMATCH) 00706 ENUM_CASE(K_HISTOGRAM) 00707 ENUM_CASE(K_SALZBERG) 00708 ENUM_CASE(K_LOCALITYIMPROVED) 00709 ENUM_CASE(K_SIMPLELOCALITYIMPROVED) 00710 ENUM_CASE(K_FIXEDDEGREE) 00711 ENUM_CASE(K_WEIGHTEDDEGREE) 00712 ENUM_CASE(K_WEIGHTEDDEGREEPOS) 00713 ENUM_CASE(K_WEIGHTEDDEGREERBF) 00714 ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING) 00715 ENUM_CASE(K_POLYMATCH) 00716 ENUM_CASE(K_ALIGNMENT) 00717 ENUM_CASE(K_COMMWORDSTRING) 00718 ENUM_CASE(K_COMMULONGSTRING) 00719 ENUM_CASE(K_SPECTRUMRBF) 00720 ENUM_CASE(K_COMBINED) 00721 ENUM_CASE(K_AUC) 00722 ENUM_CASE(K_CUSTOM) 00723 ENUM_CASE(K_SIGMOID) 00724 ENUM_CASE(K_CHI2) 00725 ENUM_CASE(K_DIAG) 00726 ENUM_CASE(K_CONST) 00727 ENUM_CASE(K_DISTANCE) 00728 ENUM_CASE(K_LOCALALIGNMENT) 00729 ENUM_CASE(K_PYRAMIDCHI2) 00730 ENUM_CASE(K_OLIGO) 00731 ENUM_CASE(K_MATCHWORD) 00732 ENUM_CASE(K_TPPK) 00733 ENUM_CASE(K_REGULATORYMODULES) 00734 ENUM_CASE(K_SPARSESPATIALSAMPLE) 00735 ENUM_CASE(K_HISTOGRAMINTERSECTION) 00736 ENUM_CASE(K_WAVELET) 00737 ENUM_CASE(K_WAVE) 00738 ENUM_CASE(K_CAUCHY) 00739 ENUM_CASE(K_TSTUDENT) 00740 ENUM_CASE(K_MULTIQUADRIC) 00741 ENUM_CASE(K_EXPONENTIAL) 00742 ENUM_CASE(K_RATIONAL_QUADRATIC) 00743 ENUM_CASE(K_POWER) 00744 ENUM_CASE(K_SPHERICAL) 00745 ENUM_CASE(K_LOG) 00746 ENUM_CASE(K_SPLINE) 00747 ENUM_CASE(K_ANOVA) 00748 ENUM_CASE(K_CIRCULAR) 00749 ENUM_CASE(K_INVERSEMULTIQUADRIC) 00750 ENUM_CASE(K_SPECTRUMMISMATCHRBF) 00751 ENUM_CASE(K_DISTANTSEGMENTS) 00752 ENUM_CASE(K_BESSEL) 00753 ENUM_CASE(K_JENSENSHANNON) 00754 ENUM_CASE(K_DIRECTOR) 00755 ENUM_CASE(K_PRODUCT) 00756 ENUM_CASE(K_LINEARARD) 00757 ENUM_CASE(K_GAUSSIANARD) 00758 } 00759 00760 switch (get_feature_class()) 00761 { 00762 ENUM_CASE(C_UNKNOWN) 00763 ENUM_CASE(C_DENSE) 00764 ENUM_CASE(C_SPARSE) 00765 ENUM_CASE(C_STRING) 00766 ENUM_CASE(C_STREAMING_DENSE) 00767 ENUM_CASE(C_STREAMING_SPARSE) 00768 ENUM_CASE(C_STREAMING_STRING) 00769 ENUM_CASE(C_STREAMING_VW) 00770 ENUM_CASE(C_COMBINED) 00771 ENUM_CASE(C_COMBINED_DOT) 00772 ENUM_CASE(C_WD) 00773 ENUM_CASE(C_SPEC) 00774 ENUM_CASE(C_WEIGHTEDSPEC) 00775 ENUM_CASE(C_POLY) 00776 ENUM_CASE(C_BINNED_DOT) 00777 ENUM_CASE(C_DIRECTOR_DOT) 00778 ENUM_CASE(C_LATENT) 00779 ENUM_CASE(C_ANY) 00780 } 00781 00782 switch (get_feature_type()) 00783 { 00784 ENUM_CASE(F_UNKNOWN) 00785 ENUM_CASE(F_BOOL) 00786 ENUM_CASE(F_CHAR) 00787 ENUM_CASE(F_BYTE) 00788 ENUM_CASE(F_SHORT) 00789 ENUM_CASE(F_WORD) 00790 ENUM_CASE(F_INT) 00791 ENUM_CASE(F_UINT) 00792 ENUM_CASE(F_LONG) 00793 ENUM_CASE(F_ULONG) 00794 ENUM_CASE(F_SHORTREAL) 00795 ENUM_CASE(F_DREAL) 00796 ENUM_CASE(F_LONGREAL) 00797 ENUM_CASE(F_ANY) 00798 } 00799 SG_INFO( "\n"); 00800 } 00801 #undef ENUM_CASE 00802 00803 bool CKernel::init_optimization( 00804 int32_t count, int32_t *IDX, float64_t * weights) 00805 { 00806 SG_ERROR( "kernel does not support linadd optimization\n"); 00807 return false ; 00808 } 00809 00810 bool CKernel::delete_optimization() 00811 { 00812 SG_ERROR( "kernel does not support linadd optimization\n"); 00813 return false; 00814 } 00815 00816 float64_t CKernel::compute_optimized(int32_t vector_idx) 00817 { 00818 SG_ERROR( "kernel does not support linadd optimization\n"); 00819 return 0; 00820 } 00821 00822 void CKernel::compute_batch( 00823 int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec, 00824 int32_t* IDX, float64_t* weights, float64_t factor) 00825 { 00826 SG_ERROR( "kernel does not support batch computation\n"); 00827 } 00828 00829 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight) 00830 { 00831 SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n"); 00832 } 00833 00834 void CKernel::clear_normal() 00835 { 00836 SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n"); 00837 } 00838 00839 int32_t CKernel::get_num_subkernels() 00840 { 00841 return 1; 00842 } 00843 00844 void CKernel::compute_by_subkernel( 00845 int32_t vector_idx, float64_t * subkernel_contrib) 00846 { 00847 SG_ERROR( "kernel compute_by_subkernel not implemented\n"); 00848 } 00849 00850 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights) 00851 { 00852 num_weights=1 ; 00853 return &combined_kernel_weight ; 00854 } 00855 00856 void CKernel::set_subkernel_weights(const SGVector<float64_t> weights) 00857 { 00858 ASSERT(weights.vector); 00859 if (weights.vlen!=1) 00860 SG_ERROR( "number of subkernel weights should be one ...\n"); 00861 00862 combined_kernel_weight = weights.vector[0] ; 00863 } 00864 00865 bool CKernel::init_optimization_svm(CSVM * svm) 00866 { 00867 int32_t num_suppvec=svm->get_num_support_vectors(); 00868 int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec); 00869 float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec); 00870 00871 for (int32_t i=0; i<num_suppvec; i++) 00872 { 00873 sv_idx[i] = svm->get_support_vector(i); 00874 sv_weight[i] = svm->get_alpha(i); 00875 } 00876 bool ret = init_optimization(num_suppvec, sv_idx, sv_weight); 00877 00878 SG_FREE(sv_idx); 00879 SG_FREE(sv_weight); 00880 return ret; 00881 } 00882 00883 void CKernel::load_serializable_post() throw (ShogunException) 00884 { 00885 CSGObject::load_serializable_post(); 00886 if (lhs_equals_rhs) 00887 rhs=lhs; 00888 } 00889 00890 void CKernel::save_serializable_pre() throw (ShogunException) 00891 { 00892 CSGObject::save_serializable_pre(); 00893 00894 if (lhs_equals_rhs) 00895 rhs=NULL; 00896 } 00897 00898 void CKernel::save_serializable_post() throw (ShogunException) 00899 { 00900 CSGObject::save_serializable_post(); 00901 00902 if (lhs_equals_rhs) 00903 rhs=lhs; 00904 } 00905 00906 void CKernel::register_params() { 00907 SG_ADD(&cache_size, "cache_size", 00908 "Cache size in MB.", MS_NOT_AVAILABLE); 00909 SG_ADD((CSGObject**) &lhs, "lhs", 00910 "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE); 00911 SG_ADD((CSGObject**) &rhs, "rhs", 00912 "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE); 00913 SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs", 00914 "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE); 00915 SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.", 00916 MS_NOT_AVAILABLE); 00917 SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.", 00918 MS_NOT_AVAILABLE); 00919 SG_ADD(&combined_kernel_weight, "combined_kernel_weight", 00920 "Combined kernel weight.", MS_AVAILABLE); 00921 SG_ADD(&optimization_initialized, "optimization_initialized", 00922 "Optimization is initialized.", MS_NOT_AVAILABLE); 00923 SG_ADD((machine_int_t*) &opt_type, "opt_type", 00924 "Optimization type.", MS_NOT_AVAILABLE); 00925 SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE); 00926 SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.", 00927 MS_AVAILABLE); 00928 } 00929 00930 00931 void CKernel::init() 00932 { 00933 cache_size=10; 00934 kernel_matrix=NULL; 00935 lhs=NULL; 00936 rhs=NULL; 00937 num_lhs=0; 00938 num_rhs=0; 00939 combined_kernel_weight=1; 00940 optimization_initialized=false; 00941 opt_type=FASTBUTMEMHUNGRY; 00942 properties=KP_NONE; 00943 normalizer=NULL; 00944 00945 #ifdef USE_SVMLIGHT 00946 memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE)); 00947 #endif //USE_SVMLIGHT 00948 00949 set_normalizer(new CIdentityKernelNormalizer()); 00950 } 00951 00952 SGMatrix<float64_t> CKernel::get_parameter_gradient(TParameter* param, 00953 CSGObject* obj, index_t index) 00954 { 00955 return SGMatrix<float64_t>(); 00956 }