SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Kernel.cpp
Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #include <shogun/lib/config.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/lib/Time.h>
00020 #include <shogun/lib/Signal.h>
00021 
00022 #include <shogun/base/Parallel.h>
00023 
00024 #include <shogun/kernel/Kernel.h>
00025 #include <shogun/kernel/normalizer/IdentityKernelNormalizer.h>
00026 #include <shogun/features/Features.h>
00027 #include <shogun/base/Parameter.h>
00028 
00029 #include <shogun/classifier/svm/SVM.h>
00030 
00031 #include <string.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #ifdef HAVE_PTHREAD
00036 #include <pthread.h>
00037 #endif
00038 
00039 using namespace shogun;
00040 
00041 CKernel::CKernel() : CSGObject()
00042 {
00043     init();
00044     register_params();
00045 }
00046 
00047 CKernel::CKernel(int32_t size) : CSGObject()
00048 {
00049     init();
00050 
00051     if (size<10)
00052         size=10;
00053 
00054     cache_size=size;
00055     register_params();
00056 }
00057 
00058 
00059 CKernel::CKernel(CFeatures* p_lhs, CFeatures* p_rhs, int32_t size) : CSGObject()
00060 {
00061     init();
00062 
00063     if (size<10)
00064         size=10;
00065 
00066     cache_size=size;
00067 
00068     set_normalizer(new CIdentityKernelNormalizer());
00069     init(p_lhs, p_rhs);
00070     register_params();
00071 }
00072 
00073 CKernel::~CKernel()
00074 {
00075     if (get_is_initialized())
00076         SG_ERROR("Kernel still initialized on destruction.\n");
00077 
00078     remove_lhs_and_rhs();
00079     SG_UNREF(normalizer);
00080 
00081     SG_INFO("Kernel deleted (%p).\n", this);
00082 }
00083 
00084 #ifdef USE_SVMLIGHT
00085 void CKernel::resize_kernel_cache(KERNELCACHE_IDX size, bool regression_hack)
00086 {
00087     if (size<10)
00088         size=10;
00089 
00090     kernel_cache_cleanup();
00091     cache_size=size;
00092 
00093     if (has_features() && get_num_vec_lhs())
00094         kernel_cache_init(cache_size, regression_hack);
00095 }
00096 #endif //USE_SVMLIGHT
00097 
00098 bool CKernel::init(CFeatures* l, CFeatures* r)
00099 {
00100     /* make sure that features are not deleted if same ones are used */
00101     SG_REF(l);
00102     SG_REF(r);
00103 
00104     //make sure features were indeed supplied
00105     ASSERT(l);
00106     ASSERT(r);
00107 
00108     //make sure features are compatible
00109     ASSERT(l->get_feature_class()==r->get_feature_class());
00110     ASSERT(l->get_feature_type()==r->get_feature_type());
00111 
00112     //remove references to previous features
00113     remove_lhs_and_rhs();
00114 
00115     //increase reference counts
00116     SG_REF(l);
00117     if (l==r)
00118         lhs_equals_rhs=true;
00119     else // l!=r
00120         SG_REF(r);
00121 
00122     lhs=l;
00123     rhs=r;
00124 
00125     ASSERT(!num_lhs || num_lhs==l->get_num_vectors());
00126     ASSERT(!num_rhs || num_rhs==l->get_num_vectors());
00127 
00128     num_lhs=l->get_num_vectors();
00129     num_rhs=r->get_num_vectors();
00130 
00131     /* unref "safety" refs from beginning */
00132     SG_UNREF(r);
00133     SG_UNREF(l);
00134 
00135     return true;
00136 }
00137 
00138 bool CKernel::set_normalizer(CKernelNormalizer* n)
00139 {
00140     SG_REF(n);
00141     if (lhs && rhs)
00142         n->init(this);
00143 
00144     SG_UNREF(normalizer);
00145     normalizer=n;
00146 
00147     return (normalizer!=NULL);
00148 }
00149 
00150 CKernelNormalizer* CKernel::get_normalizer()
00151 {
00152     SG_REF(normalizer)
00153     return normalizer;
00154 }
00155 
00156 bool CKernel::init_normalizer()
00157 {
00158     return normalizer->init(this);
00159 }
00160 
00161 void CKernel::cleanup()
00162 {
00163     remove_lhs_and_rhs();
00164 }
00165 
00166 #ifdef USE_SVMLIGHT
00167 /****************************** Cache handling *******************************/
00168 
00169 void CKernel::kernel_cache_init(int32_t buffsize, bool regression_hack)
00170 {
00171     int32_t totdoc=get_num_vec_lhs();
00172     if (totdoc<=0)
00173     {
00174         SG_ERROR("kernel has zero rows: num_lhs=%d num_rhs=%d\n",
00175                 get_num_vec_lhs(), get_num_vec_rhs());
00176     }
00177     uint64_t buffer_size=0;
00178     int32_t i;
00179 
00180     //in regression the additional constraints are made by doubling the training data
00181     if (regression_hack)
00182         totdoc*=2;
00183 
00184     buffer_size=((uint64_t) buffsize)*1024*1024/sizeof(KERNELCACHE_ELEM);
00185     if (buffer_size>((uint64_t) totdoc)*totdoc)
00186         buffer_size=((uint64_t) totdoc)*totdoc;
00187 
00188     SG_INFO( "using a kernel cache of size %lld MB (%lld bytes) for %s Kernel\n", buffer_size*sizeof(KERNELCACHE_ELEM)/1024/1024, buffer_size*sizeof(KERNELCACHE_ELEM), get_name());
00189 
00190     //make sure it fits in the *signed* KERNELCACHE_IDX type
00191     ASSERT(buffer_size < (((uint64_t) 1) << (sizeof(KERNELCACHE_IDX)*8-1)));
00192 
00193     kernel_cache.index = SG_MALLOC(int32_t, totdoc);
00194     kernel_cache.occu = SG_MALLOC(int32_t, totdoc);
00195     kernel_cache.lru = SG_MALLOC(int32_t, totdoc);
00196     kernel_cache.invindex = SG_MALLOC(int32_t, totdoc);
00197     kernel_cache.active2totdoc = SG_MALLOC(int32_t, totdoc);
00198     kernel_cache.totdoc2active = SG_MALLOC(int32_t, totdoc);
00199     kernel_cache.buffer = SG_MALLOC(KERNELCACHE_ELEM, buffer_size);
00200     kernel_cache.buffsize=buffer_size;
00201     kernel_cache.max_elems=(int32_t) (kernel_cache.buffsize/totdoc);
00202 
00203     if(kernel_cache.max_elems>totdoc) {
00204         kernel_cache.max_elems=totdoc;
00205     }
00206 
00207     kernel_cache.elems=0;   // initialize cache
00208     for(i=0;i<totdoc;i++) {
00209         kernel_cache.index[i]=-1;
00210         kernel_cache.lru[i]=0;
00211     }
00212     for(i=0;i<totdoc;i++) {
00213         kernel_cache.occu[i]=0;
00214         kernel_cache.invindex[i]=-1;
00215     }
00216 
00217     kernel_cache.activenum=totdoc;;
00218     for(i=0;i<totdoc;i++) {
00219         kernel_cache.active2totdoc[i]=i;
00220         kernel_cache.totdoc2active[i]=i;
00221     }
00222 
00223     kernel_cache.time=0;
00224 }
00225 
00226 void CKernel::get_kernel_row(
00227     int32_t docnum, int32_t *active2dnum, float64_t *buffer, bool full_line)
00228 {
00229     int32_t i,j;
00230     KERNELCACHE_IDX start;
00231 
00232     int32_t num_vectors = get_num_vec_lhs();
00233     if (docnum>=num_vectors)
00234         docnum=2*num_vectors-1-docnum;
00235 
00236     /* is cached? */
00237     if(kernel_cache.index[docnum] != -1)
00238     {
00239         kernel_cache.lru[kernel_cache.index[docnum]]=kernel_cache.time; /* lru */
00240         start=((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[docnum];
00241 
00242         if (full_line)
00243         {
00244             for(j=0;j<get_num_vec_lhs();j++)
00245             {
00246                 if(kernel_cache.totdoc2active[j] >= 0)
00247                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00248                 else
00249                     buffer[j]=(float64_t) kernel(docnum, j);
00250             }
00251         }
00252         else
00253         {
00254             for(i=0;(j=active2dnum[i])>=0;i++)
00255             {
00256                 if(kernel_cache.totdoc2active[j] >= 0)
00257                     buffer[j]=kernel_cache.buffer[start+kernel_cache.totdoc2active[j]];
00258                 else
00259                 {
00260                     int32_t k=j;
00261                     if (k>=num_vectors)
00262                         k=2*num_vectors-1-k;
00263                     buffer[j]=(float64_t) kernel(docnum, k);
00264                 }
00265             }
00266         }
00267     }
00268     else
00269     {
00270         if (full_line)
00271         {
00272             for(j=0;j<get_num_vec_lhs();j++)
00273                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, j);
00274         }
00275         else
00276         {
00277             for(i=0;(j=active2dnum[i])>=0;i++)
00278             {
00279                 int32_t k=j;
00280                 if (k>=num_vectors)
00281                     k=2*num_vectors-1-k;
00282                 buffer[j]=(KERNELCACHE_ELEM) kernel(docnum, k);
00283             }
00284         }
00285     }
00286 }
00287 
00288 
00289 // Fills cache for the row m
00290 void CKernel::cache_kernel_row(int32_t m)
00291 {
00292     register int32_t j,k,l;
00293     register KERNELCACHE_ELEM *cache;
00294 
00295     int32_t num_vectors = get_num_vec_lhs();
00296 
00297     if (m>=num_vectors)
00298         m=2*num_vectors-1-m;
00299 
00300     if(!kernel_cache_check(m))   // not cached yet
00301     {
00302         cache = kernel_cache_clean_and_malloc(m);
00303         if(cache) {
00304             l=kernel_cache.totdoc2active[m];
00305 
00306             for(j=0;j<kernel_cache.activenum;j++)  // fill cache
00307             {
00308                 k=kernel_cache.active2totdoc[j];
00309 
00310                 if((kernel_cache.index[k] != -1) && (l != -1) && (k != m)) {
00311                     cache[j]=kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)
00312                         *kernel_cache.index[k]+l];
00313                 }
00314                 else
00315                 {
00316                     if (k>=num_vectors)
00317                         k=2*num_vectors-1-k;
00318 
00319                     cache[j]=kernel(m, k);
00320                 }
00321             }
00322         }
00323         else
00324             perror("Error: Kernel cache full! => increase cache size");
00325     }
00326 }
00327 
00328 
00329 void* CKernel::cache_multiple_kernel_row_helper(void* p)
00330 {
00331     int32_t j,k,l;
00332     S_KTHREAD_PARAM* params = (S_KTHREAD_PARAM*) p;
00333 
00334     for (int32_t i=params->start; i<params->end; i++)
00335     {
00336         KERNELCACHE_ELEM* cache=params->cache[i];
00337         int32_t m = params->uncached_rows[i];
00338         l=params->kernel_cache->totdoc2active[m];
00339 
00340         for(j=0;j<params->kernel_cache->activenum;j++)  // fill cache
00341         {
00342             k=params->kernel_cache->active2totdoc[j];
00343 
00344             if((params->kernel_cache->index[k] != -1) && (l != -1) && (!params->needs_computation[k])) {
00345                 cache[j]=params->kernel_cache->buffer[((KERNELCACHE_IDX) params->kernel_cache->activenum)
00346                     *params->kernel_cache->index[k]+l];
00347             }
00348             else
00349                 {
00350                     if (k>=params->num_vectors)
00351                         k=2*params->num_vectors-1-k;
00352 
00353                     cache[j]=params->kernel->kernel(m, k);
00354                 }
00355         }
00356 
00357         //now line m is cached
00358         params->needs_computation[m]=0;
00359     }
00360     return NULL;
00361 }
00362 
00363 // Fills cache for the rows in key
00364 void CKernel::cache_multiple_kernel_rows(int32_t* rows, int32_t num_rows)
00365 {
00366 #ifdef HAVE_PTHREAD
00367     int32_t nthreads=parallel->get_num_threads();
00368 
00369     if (nthreads<2)
00370     {
00371 #endif
00372         for(int32_t i=0;i<num_rows;i++)
00373             cache_kernel_row(rows[i]);
00374 #ifdef HAVE_PTHREAD
00375     }
00376     else
00377     {
00378         // fill up kernel cache
00379         int32_t* uncached_rows = SG_MALLOC(int32_t, num_rows);
00380         KERNELCACHE_ELEM** cache = SG_MALLOC(KERNELCACHE_ELEM*, num_rows);
00381         pthread_t* threads = SG_MALLOC(pthread_t, nthreads-1);
00382         S_KTHREAD_PARAM* params = SG_MALLOC(S_KTHREAD_PARAM, nthreads-1);
00383         int32_t num_threads=nthreads-1;
00384         int32_t num_vec=get_num_vec_lhs();
00385         ASSERT(num_vec>0);
00386         uint8_t* needs_computation=SG_CALLOC(uint8_t, num_vec);
00387 
00388         int32_t step=0;
00389         int32_t num=0;
00390         int32_t end=0;
00391 
00392         // allocate cachelines if necessary
00393         for (int32_t i=0; i<num_rows; i++)
00394         {
00395             int32_t idx=rows[i];
00396             if (idx>=num_vec)
00397                 idx=2*num_vec-1-idx;
00398 
00399             if (kernel_cache_check(idx))
00400                 continue;
00401 
00402             needs_computation[idx]=1;
00403             uncached_rows[num]=idx;
00404             cache[num]= kernel_cache_clean_and_malloc(idx);
00405 
00406             if (!cache[num])
00407                 SG_ERROR("Kernel cache full! => increase cache size\n");
00408 
00409             num++;
00410         }
00411 
00412         if (num>0)
00413         {
00414             step= num/nthreads;
00415 
00416             if (step<1)
00417             {
00418                 num_threads=num-1;
00419                 step=1;
00420             }
00421 
00422             for (int32_t t=0; t<num_threads; t++)
00423             {
00424                 params[t].kernel = this;
00425                 params[t].kernel_cache = &kernel_cache;
00426                 params[t].cache = cache;
00427                 params[t].uncached_rows = uncached_rows;
00428                 params[t].needs_computation = needs_computation;
00429                 params[t].num_uncached = num;
00430                 params[t].start = t*step;
00431                 params[t].end = (t+1)*step;
00432                 params[t].num_vectors = get_num_vec_lhs();
00433                 end=params[t].end;
00434 
00435                 int code=pthread_create(&threads[t], NULL,
00436                         CKernel::cache_multiple_kernel_row_helper, (void*)&params[t]);
00437 
00438                 if (code != 0)
00439                 {
00440                     SG_WARNING("Thread creation failed (thread %d of %d) "
00441                             "with error:'%s'\n",t, num_threads, strerror(code));
00442                     num_threads=t;
00443                     end=t*step;
00444                     break;
00445                 }
00446             }
00447         }
00448         else
00449             num_threads=-1;
00450 
00451 
00452         S_KTHREAD_PARAM last_param;
00453         last_param.kernel = this;
00454         last_param.kernel_cache = &kernel_cache;
00455         last_param.cache = cache;
00456         last_param.uncached_rows = uncached_rows;
00457         last_param.needs_computation = needs_computation;
00458         last_param.start = end;
00459         last_param.num_uncached = num;
00460         last_param.end = num;
00461         last_param.num_vectors = get_num_vec_lhs();
00462 
00463         cache_multiple_kernel_row_helper(&last_param);
00464 
00465 
00466         for (int32_t t=0; t<num_threads; t++)
00467         {
00468             if (pthread_join(threads[t], NULL) != 0)
00469                 SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00470         }
00471 
00472         SG_FREE(needs_computation);
00473         SG_FREE(params);
00474         SG_FREE(threads);
00475         SG_FREE(cache);
00476         SG_FREE(uncached_rows);
00477     }
00478 #endif
00479 }
00480 
00481 // remove numshrink columns in the cache
00482 // which correspond to examples marked
00483 void CKernel::kernel_cache_shrink(
00484     int32_t totdoc, int32_t numshrink, int32_t *after)
00485 {
00486     register int32_t i,j,jj,scount;     // 0 in after.
00487     KERNELCACHE_IDX from=0,to=0;
00488     int32_t *keep;
00489 
00490     keep=SG_MALLOC(int32_t, totdoc);
00491     for(j=0;j<totdoc;j++) {
00492         keep[j]=1;
00493     }
00494     scount=0;
00495     for(jj=0;(jj<kernel_cache.activenum) && (scount<numshrink);jj++) {
00496         j=kernel_cache.active2totdoc[jj];
00497         if(!after[j]) {
00498             scount++;
00499             keep[j]=0;
00500         }
00501     }
00502 
00503     for(i=0;i<kernel_cache.max_elems;i++) {
00504         for(jj=0;jj<kernel_cache.activenum;jj++) {
00505             j=kernel_cache.active2totdoc[jj];
00506             if(!keep[j]) {
00507                 from++;
00508             }
00509             else {
00510                 kernel_cache.buffer[to]=kernel_cache.buffer[from];
00511                 to++;
00512                 from++;
00513             }
00514         }
00515     }
00516 
00517     kernel_cache.activenum=0;
00518     for(j=0;j<totdoc;j++) {
00519         if((keep[j]) && (kernel_cache.totdoc2active[j] != -1)) {
00520             kernel_cache.active2totdoc[kernel_cache.activenum]=j;
00521             kernel_cache.totdoc2active[j]=kernel_cache.activenum;
00522             kernel_cache.activenum++;
00523         }
00524         else {
00525             kernel_cache.totdoc2active[j]=-1;
00526         }
00527     }
00528 
00529     kernel_cache.max_elems=
00530         (int32_t)(kernel_cache.buffsize/kernel_cache.activenum);
00531     if(kernel_cache.max_elems>totdoc) {
00532         kernel_cache.max_elems=totdoc;
00533     }
00534 
00535     SG_FREE(keep);
00536 
00537 }
00538 
00539 void CKernel::kernel_cache_reset_lru()
00540 {
00541     int32_t maxlru=0,k;
00542 
00543     for(k=0;k<kernel_cache.max_elems;k++) {
00544         if(maxlru < kernel_cache.lru[k])
00545             maxlru=kernel_cache.lru[k];
00546     }
00547     for(k=0;k<kernel_cache.max_elems;k++) {
00548         kernel_cache.lru[k]-=maxlru;
00549     }
00550 }
00551 
00552 void CKernel::kernel_cache_cleanup()
00553 {
00554     SG_FREE(kernel_cache.index);
00555     SG_FREE(kernel_cache.occu);
00556     SG_FREE(kernel_cache.lru);
00557     SG_FREE(kernel_cache.invindex);
00558     SG_FREE(kernel_cache.active2totdoc);
00559     SG_FREE(kernel_cache.totdoc2active);
00560     SG_FREE(kernel_cache.buffer);
00561     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00562 }
00563 
00564 int32_t CKernel::kernel_cache_malloc()
00565 {
00566   int32_t i;
00567 
00568   if(kernel_cache_space_available()) {
00569     for(i=0;i<kernel_cache.max_elems;i++) {
00570       if(!kernel_cache.occu[i]) {
00571     kernel_cache.occu[i]=1;
00572     kernel_cache.elems++;
00573     return(i);
00574       }
00575     }
00576   }
00577   return(-1);
00578 }
00579 
00580 void CKernel::kernel_cache_free(int32_t cacheidx)
00581 {
00582     kernel_cache.occu[cacheidx]=0;
00583     kernel_cache.elems--;
00584 }
00585 
00586 // remove least recently used cache
00587 // element
00588 int32_t CKernel::kernel_cache_free_lru()
00589 {
00590   register int32_t k,least_elem=-1,least_time;
00591 
00592   least_time=kernel_cache.time+1;
00593   for(k=0;k<kernel_cache.max_elems;k++) {
00594     if(kernel_cache.invindex[k] != -1) {
00595       if(kernel_cache.lru[k]<least_time) {
00596     least_time=kernel_cache.lru[k];
00597     least_elem=k;
00598       }
00599     }
00600   }
00601 
00602   if(least_elem != -1) {
00603     kernel_cache_free(least_elem);
00604     kernel_cache.index[kernel_cache.invindex[least_elem]]=-1;
00605     kernel_cache.invindex[least_elem]=-1;
00606     return(1);
00607   }
00608   return(0);
00609 }
00610 
00611 // Get a free cache entry. In case cache is full, the lru
00612 // element is removed.
00613 KERNELCACHE_ELEM* CKernel::kernel_cache_clean_and_malloc(int32_t cacheidx)
00614 {
00615     int32_t result;
00616     if((result = kernel_cache_malloc()) == -1) {
00617         if(kernel_cache_free_lru()) {
00618             result = kernel_cache_malloc();
00619         }
00620     }
00621     kernel_cache.index[cacheidx]=result;
00622     if(result == -1) {
00623         return(0);
00624     }
00625     kernel_cache.invindex[result]=cacheidx;
00626     kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time; // lru
00627     return &kernel_cache.buffer[((KERNELCACHE_IDX) kernel_cache.activenum)*kernel_cache.index[cacheidx]];
00628 }
00629 #endif //USE_SVMLIGHT
00630 
00631 void CKernel::load(CFile* loader)
00632 {
00633     SG_SET_LOCALE_C;
00634     SG_RESET_LOCALE;
00635 }
00636 
00637 void CKernel::save(CFile* writer)
00638 {
00639     SGMatrix<float64_t> k_matrix=get_kernel_matrix<float64_t>();
00640     SG_SET_LOCALE_C;
00641     writer->set_matrix(k_matrix.matrix, k_matrix.num_rows, k_matrix.num_cols);
00642     SG_RESET_LOCALE;
00643 }
00644 
00645 void CKernel::remove_lhs_and_rhs()
00646 {
00647     if (rhs!=lhs)
00648         SG_UNREF(rhs);
00649     rhs = NULL;
00650     num_rhs=0;
00651 
00652     SG_UNREF(lhs);
00653     lhs = NULL;
00654     num_lhs=0;
00655     lhs_equals_rhs=false;
00656 
00657 #ifdef USE_SVMLIGHT
00658     cache_reset();
00659 #endif //USE_SVMLIGHT
00660 }
00661 
00662 void CKernel::remove_lhs()
00663 {
00664     if (rhs==lhs)
00665         rhs=NULL;
00666     SG_UNREF(lhs);
00667     lhs = NULL;
00668     num_lhs=0;
00669     lhs_equals_rhs=false;
00670 #ifdef USE_SVMLIGHT
00671     cache_reset();
00672 #endif //USE_SVMLIGHT
00673 }
00674 
00676 void CKernel::remove_rhs()
00677 {
00678     if (rhs!=lhs)
00679         SG_UNREF(rhs);
00680     rhs = NULL;
00681     num_rhs=0;
00682     lhs_equals_rhs=false;
00683 
00684 #ifdef USE_SVMLIGHT
00685     cache_reset();
00686 #endif //USE_SVMLIGHT
00687 }
00688 
00689 #define ENUM_CASE(n) case n: SG_INFO(#n " "); break;
00690 
00691 void CKernel::list_kernel()
00692 {
00693     SG_INFO( "%p - \"%s\" weight=%1.2f OPT:%s", this, get_name(),
00694             get_combined_kernel_weight(),
00695             get_optimization_type()==FASTBUTMEMHUNGRY ? "FASTBUTMEMHUNGRY" :
00696             "SLOWBUTMEMEFFICIENT");
00697 
00698     switch (get_kernel_type())
00699     {
00700         ENUM_CASE(K_UNKNOWN)
00701         ENUM_CASE(K_LINEAR)
00702         ENUM_CASE(K_POLY)
00703         ENUM_CASE(K_GAUSSIAN)
00704         ENUM_CASE(K_GAUSSIANSHIFT)
00705         ENUM_CASE(K_GAUSSIANMATCH)
00706         ENUM_CASE(K_HISTOGRAM)
00707         ENUM_CASE(K_SALZBERG)
00708         ENUM_CASE(K_LOCALITYIMPROVED)
00709         ENUM_CASE(K_SIMPLELOCALITYIMPROVED)
00710         ENUM_CASE(K_FIXEDDEGREE)
00711         ENUM_CASE(K_WEIGHTEDDEGREE)
00712         ENUM_CASE(K_WEIGHTEDDEGREEPOS)
00713         ENUM_CASE(K_WEIGHTEDDEGREERBF)
00714         ENUM_CASE(K_WEIGHTEDCOMMWORDSTRING)
00715         ENUM_CASE(K_POLYMATCH)
00716         ENUM_CASE(K_ALIGNMENT)
00717         ENUM_CASE(K_COMMWORDSTRING)
00718         ENUM_CASE(K_COMMULONGSTRING)
00719         ENUM_CASE(K_SPECTRUMRBF)
00720         ENUM_CASE(K_COMBINED)
00721         ENUM_CASE(K_AUC)
00722         ENUM_CASE(K_CUSTOM)
00723         ENUM_CASE(K_SIGMOID)
00724         ENUM_CASE(K_CHI2)
00725         ENUM_CASE(K_DIAG)
00726         ENUM_CASE(K_CONST)
00727         ENUM_CASE(K_DISTANCE)
00728         ENUM_CASE(K_LOCALALIGNMENT)
00729         ENUM_CASE(K_PYRAMIDCHI2)
00730         ENUM_CASE(K_OLIGO)
00731         ENUM_CASE(K_MATCHWORD)
00732         ENUM_CASE(K_TPPK)
00733         ENUM_CASE(K_REGULATORYMODULES)
00734         ENUM_CASE(K_SPARSESPATIALSAMPLE)
00735         ENUM_CASE(K_HISTOGRAMINTERSECTION)
00736         ENUM_CASE(K_WAVELET)
00737         ENUM_CASE(K_WAVE)
00738         ENUM_CASE(K_CAUCHY)
00739         ENUM_CASE(K_TSTUDENT)
00740         ENUM_CASE(K_MULTIQUADRIC)
00741         ENUM_CASE(K_EXPONENTIAL)
00742         ENUM_CASE(K_RATIONAL_QUADRATIC)
00743         ENUM_CASE(K_POWER)
00744         ENUM_CASE(K_SPHERICAL)
00745         ENUM_CASE(K_LOG)
00746         ENUM_CASE(K_SPLINE)
00747         ENUM_CASE(K_ANOVA)
00748         ENUM_CASE(K_CIRCULAR)
00749         ENUM_CASE(K_INVERSEMULTIQUADRIC)
00750         ENUM_CASE(K_SPECTRUMMISMATCHRBF)
00751         ENUM_CASE(K_DISTANTSEGMENTS)
00752         ENUM_CASE(K_BESSEL)
00753         ENUM_CASE(K_JENSENSHANNON)
00754         ENUM_CASE(K_DIRECTOR)
00755         ENUM_CASE(K_PRODUCT)
00756         ENUM_CASE(K_LINEARARD)
00757         ENUM_CASE(K_GAUSSIANARD)
00758     }
00759 
00760     switch (get_feature_class())
00761     {
00762         ENUM_CASE(C_UNKNOWN)
00763         ENUM_CASE(C_DENSE)
00764         ENUM_CASE(C_SPARSE)
00765         ENUM_CASE(C_STRING)
00766         ENUM_CASE(C_STREAMING_DENSE)
00767         ENUM_CASE(C_STREAMING_SPARSE)
00768         ENUM_CASE(C_STREAMING_STRING)
00769         ENUM_CASE(C_STREAMING_VW)
00770         ENUM_CASE(C_COMBINED)
00771         ENUM_CASE(C_COMBINED_DOT)
00772         ENUM_CASE(C_WD)
00773         ENUM_CASE(C_SPEC)
00774         ENUM_CASE(C_WEIGHTEDSPEC)
00775         ENUM_CASE(C_POLY)
00776         ENUM_CASE(C_BINNED_DOT)
00777         ENUM_CASE(C_DIRECTOR_DOT)
00778         ENUM_CASE(C_LATENT)
00779         ENUM_CASE(C_ANY)
00780     }
00781 
00782     switch (get_feature_type())
00783     {
00784         ENUM_CASE(F_UNKNOWN)
00785         ENUM_CASE(F_BOOL)
00786         ENUM_CASE(F_CHAR)
00787         ENUM_CASE(F_BYTE)
00788         ENUM_CASE(F_SHORT)
00789         ENUM_CASE(F_WORD)
00790         ENUM_CASE(F_INT)
00791         ENUM_CASE(F_UINT)
00792         ENUM_CASE(F_LONG)
00793         ENUM_CASE(F_ULONG)
00794         ENUM_CASE(F_SHORTREAL)
00795         ENUM_CASE(F_DREAL)
00796         ENUM_CASE(F_LONGREAL)
00797         ENUM_CASE(F_ANY)
00798     }
00799     SG_INFO( "\n");
00800 }
00801 #undef ENUM_CASE
00802 
00803 bool CKernel::init_optimization(
00804     int32_t count, int32_t *IDX, float64_t * weights)
00805 {
00806    SG_ERROR( "kernel does not support linadd optimization\n");
00807     return false ;
00808 }
00809 
00810 bool CKernel::delete_optimization()
00811 {
00812    SG_ERROR( "kernel does not support linadd optimization\n");
00813     return false;
00814 }
00815 
00816 float64_t CKernel::compute_optimized(int32_t vector_idx)
00817 {
00818    SG_ERROR( "kernel does not support linadd optimization\n");
00819     return 0;
00820 }
00821 
00822 void CKernel::compute_batch(
00823     int32_t num_vec, int32_t* vec_idx, float64_t* target, int32_t num_suppvec,
00824     int32_t* IDX, float64_t* weights, float64_t factor)
00825 {
00826    SG_ERROR( "kernel does not support batch computation\n");
00827 }
00828 
00829 void CKernel::add_to_normal(int32_t vector_idx, float64_t weight)
00830 {
00831    SG_ERROR( "kernel does not support linadd optimization, add_to_normal not implemented\n");
00832 }
00833 
00834 void CKernel::clear_normal()
00835 {
00836    SG_ERROR( "kernel does not support linadd optimization, clear_normal not implemented\n");
00837 }
00838 
00839 int32_t CKernel::get_num_subkernels()
00840 {
00841     return 1;
00842 }
00843 
00844 void CKernel::compute_by_subkernel(
00845     int32_t vector_idx, float64_t * subkernel_contrib)
00846 {
00847    SG_ERROR( "kernel compute_by_subkernel not implemented\n");
00848 }
00849 
00850 const float64_t* CKernel::get_subkernel_weights(int32_t &num_weights)
00851 {
00852     num_weights=1 ;
00853     return &combined_kernel_weight ;
00854 }
00855 
00856 void CKernel::set_subkernel_weights(const SGVector<float64_t> weights)
00857 {
00858     ASSERT(weights.vector);
00859     if (weights.vlen!=1)
00860       SG_ERROR( "number of subkernel weights should be one ...\n");
00861 
00862     combined_kernel_weight = weights.vector[0] ;
00863 }
00864 
00865 bool CKernel::init_optimization_svm(CSVM * svm)
00866 {
00867     int32_t num_suppvec=svm->get_num_support_vectors();
00868     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
00869     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
00870 
00871     for (int32_t i=0; i<num_suppvec; i++)
00872     {
00873         sv_idx[i]    = svm->get_support_vector(i);
00874         sv_weight[i] = svm->get_alpha(i);
00875     }
00876     bool ret = init_optimization(num_suppvec, sv_idx, sv_weight);
00877 
00878     SG_FREE(sv_idx);
00879     SG_FREE(sv_weight);
00880     return ret;
00881 }
00882 
00883 void CKernel::load_serializable_post() throw (ShogunException)
00884 {
00885     CSGObject::load_serializable_post();
00886     if (lhs_equals_rhs)
00887         rhs=lhs;
00888 }
00889 
00890 void CKernel::save_serializable_pre() throw (ShogunException)
00891 {
00892     CSGObject::save_serializable_pre();
00893 
00894     if (lhs_equals_rhs)
00895         rhs=NULL;
00896 }
00897 
00898 void CKernel::save_serializable_post() throw (ShogunException)
00899 {
00900     CSGObject::save_serializable_post();
00901 
00902     if (lhs_equals_rhs)
00903         rhs=lhs;
00904 }
00905 
00906 void CKernel::register_params()   {
00907     SG_ADD(&cache_size, "cache_size",
00908         "Cache size in MB.", MS_NOT_AVAILABLE);
00909     SG_ADD((CSGObject**) &lhs, "lhs",
00910       "Feature vectors to occur on left hand side.", MS_NOT_AVAILABLE);
00911     SG_ADD((CSGObject**) &rhs, "rhs",
00912       "Feature vectors to occur on right hand side.", MS_NOT_AVAILABLE);
00913     SG_ADD(&lhs_equals_rhs, "lhs_equals_rhs",
00914         "If features on lhs are the same as on rhs.", MS_NOT_AVAILABLE);
00915     SG_ADD(&num_lhs, "num_lhs", "Number of feature vectors on left hand side.",
00916         MS_NOT_AVAILABLE);
00917     SG_ADD(&num_rhs, "num_rhs", "Number of feature vectors on right hand side.",
00918         MS_NOT_AVAILABLE);
00919     SG_ADD(&combined_kernel_weight, "combined_kernel_weight",
00920             "Combined kernel weight.", MS_AVAILABLE);
00921     SG_ADD(&optimization_initialized, "optimization_initialized",
00922           "Optimization is initialized.", MS_NOT_AVAILABLE);
00923     SG_ADD((machine_int_t*) &opt_type, "opt_type",
00924           "Optimization type.", MS_NOT_AVAILABLE);
00925     SG_ADD(&properties, "properties", "Kernel properties.", MS_NOT_AVAILABLE);
00926     SG_ADD((CSGObject**) &normalizer, "normalizer", "Normalize the kernel.",
00927         MS_AVAILABLE);
00928 }
00929 
00930 
00931 void CKernel::init()
00932 {
00933     cache_size=10;
00934     kernel_matrix=NULL;
00935     lhs=NULL;
00936     rhs=NULL;
00937     num_lhs=0;
00938     num_rhs=0;
00939     combined_kernel_weight=1;
00940     optimization_initialized=false;
00941     opt_type=FASTBUTMEMHUNGRY;
00942     properties=KP_NONE;
00943     normalizer=NULL;
00944 
00945 #ifdef USE_SVMLIGHT
00946     memset(&kernel_cache, 0x0, sizeof(KERNEL_CACHE));
00947 #endif //USE_SVMLIGHT
00948 
00949     set_normalizer(new CIdentityKernelNormalizer());
00950 }
00951 
00952 SGMatrix<float64_t> CKernel::get_parameter_gradient(TParameter* param,
00953         CSGObject* obj, index_t index)
00954 {
00955     return SGMatrix<float64_t>();
00956 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation