SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
Kernel.h
Go to the documentation of this file.
00001 /*
00002  * EXCEPT FOR THE KERNEL CACHING FUNCTIONS WHICH ARE (W) THORSTEN JOACHIMS
00003  * COPYRIGHT (C) 1999  UNIVERSITAET DORTMUND - ALL RIGHTS RESERVED
00004  *
00005  * this program is free software; you can redistribute it and/or modify
00006  * it under the terms of the GNU General Public License as published by
00007  * the Free Software Foundation; either version 3 of the License, or
00008  * (at your option) any later version.
00009  *
00010  * Written (W) 1999-2009 Soeren Sonnenburg
00011  * Written (W) 1999-2008 Gunnar Raetsch
00012  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00013  */
00014 
00015 #ifndef _KERNEL_H___
00016 #define _KERNEL_H___
00017 
00018 #include <shogun/lib/common.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/io/SGIO.h>
00021 #include <shogun/io/File.h>
00022 #include <shogun/mathematics/Math.h>
00023 #include <shogun/features/FeatureTypes.h>
00024 #include <shogun/base/SGObject.h>
00025 #include <shogun/features/Features.h>
00026 #include <shogun/kernel/normalizer/KernelNormalizer.h>
00027 
00028 namespace shogun
00029 {
00030     class CFile;
00031     class CFeatures;
00032     class CKernelNormalizer;
00033 
00034 #ifdef USE_SHORTREAL_KERNELCACHE
00035 
00036     typedef float32_t KERNELCACHE_ELEM;
00037 #else
00038 
00039     typedef float64_t KERNELCACHE_ELEM;
00040 #endif
00041 
00043 typedef int64_t KERNELCACHE_IDX;
00044 
00045 
00047 enum EOptimizationType
00048 {
00049     FASTBUTMEMHUNGRY,
00050     SLOWBUTMEMEFFICIENT
00051 };
00052 
00054 enum EKernelType
00055 {
00056     K_UNKNOWN = 0,
00057     K_LINEAR = 10,
00058     K_POLY = 20,
00059     K_GAUSSIAN = 30,
00060     K_GAUSSIANSHIFT = 32,
00061     K_GAUSSIANMATCH = 33,
00062     K_HISTOGRAM = 40,
00063     K_SALZBERG = 41,
00064     K_LOCALITYIMPROVED = 50,
00065     K_SIMPLELOCALITYIMPROVED = 60,
00066     K_FIXEDDEGREE = 70,
00067     K_WEIGHTEDDEGREE =    80,
00068     K_WEIGHTEDDEGREEPOS = 81,
00069     K_WEIGHTEDDEGREERBF = 82,
00070     K_WEIGHTEDCOMMWORDSTRING = 90,
00071     K_POLYMATCH = 100,
00072     K_ALIGNMENT = 110,
00073     K_COMMWORDSTRING = 120,
00074     K_COMMULONGSTRING = 121,
00075     K_SPECTRUMRBF = 122,
00076     K_SPECTRUMMISMATCHRBF = 123,
00077     K_COMBINED = 140,
00078     K_AUC = 150,
00079     K_CUSTOM = 160,
00080     K_SIGMOID = 170,
00081     K_CHI2 = 180,
00082     K_DIAG = 190,
00083     K_CONST = 200,
00084     K_DISTANCE = 220,
00085     K_LOCALALIGNMENT = 230,
00086     K_PYRAMIDCHI2 = 240,
00087     K_OLIGO = 250,
00088     K_MATCHWORD = 260,
00089     K_TPPK = 270,
00090     K_REGULATORYMODULES = 280,
00091     K_SPARSESPATIALSAMPLE = 290,
00092     K_HISTOGRAMINTERSECTION = 300,
00093     K_WAVELET = 310,
00094     K_WAVE = 320,
00095     K_CAUCHY = 330,
00096     K_TSTUDENT = 340,
00097     K_RATIONAL_QUADRATIC = 350,
00098     K_MULTIQUADRIC = 360,
00099     K_EXPONENTIAL = 370,
00100     K_SPHERICAL = 380,
00101     K_SPLINE = 390,
00102     K_ANOVA = 400,
00103     K_POWER = 410,
00104     K_LOG = 420,
00105     K_CIRCULAR = 430,
00106     K_INVERSEMULTIQUADRIC = 440,
00107     K_DISTANTSEGMENTS = 450,
00108     K_BESSEL = 460,
00109     K_JENSENSHANNON = 470,
00110     K_DIRECTOR = 480,
00111     K_PRODUCT = 490,
00112     K_LINEARARD = 500,
00113     K_GAUSSIANARD = 510
00114 };
00115 
00117 enum EKernelProperty
00118 {
00119     KP_NONE = 0,
00120     KP_LINADD = 1,  // Kernels that can be optimized via doing normal updates w + dw
00121     KP_KERNCOMBINATION = 2, // Kernels that are infact a linear combination of subkernels K=\sum_i b_i*K_i
00122     KP_BATCHEVALUATION = 4  // Kernels that can on the fly generate normals in linadd and more quickly/memory efficient process batches instead of single examples
00123 };
00124 
00125 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00126 
00127 template <class T> struct K_THREAD_PARAM
00128 {
00130     CKernel* kernel;
00132     int32_t start;
00134     int32_t end;
00136     int32_t total_start;
00138     int32_t total_end;
00140     int32_t m;
00142     int32_t n;
00144     T* result;
00146     bool symmetric;
00148     bool verbose;
00149 };
00150 #endif
00151 
00152 class CSVM;
00153 
00179 class CKernel : public CSGObject
00180 {
00181     friend class CVarianceKernelNormalizer;
00182     friend class CSqrtDiagKernelNormalizer;
00183     friend class CAvgDiagKernelNormalizer;
00184     friend class CRidgeKernelNormalizer;
00185     friend class CFirstElementKernelNormalizer;
00186     friend class CMultitaskKernelNormalizer;
00187     friend class CMultitaskKernelMklNormalizer;
00188     friend class CMultitaskKernelMaskNormalizer;
00189     friend class CMultitaskKernelMaskPairNormalizer;
00190     friend class CTanimotoKernelNormalizer;
00191     friend class CDiceKernelNormalizer;
00192     friend class CZeroMeanCenterKernelNormalizer;
00193 
00194     public:
00195 
00199         CKernel();
00200 
00201 
00206         CKernel(int32_t size);
00207 
00214         CKernel(CFeatures* l, CFeatures* r, int32_t size);
00215 
00216         virtual ~CKernel();
00217 
00225         inline float64_t kernel(int32_t idx_a, int32_t idx_b)
00226         {
00227             REQUIRE(idx_a>=0 && idx_b>=0 && idx_a<num_lhs && idx_b<num_rhs,
00228                 "Index out of Range: idx_a=%d/%d idx_b=%d/%d\n",
00229                 idx_a,num_lhs, idx_b,num_rhs);
00230 
00231             return normalizer->normalize(compute(idx_a, idx_b), idx_a, idx_b);
00232         }
00233 
00238         SGMatrix<float64_t> get_kernel_matrix()
00239         {
00240             return get_kernel_matrix<float64_t>();
00241         }
00242 
00248         virtual SGVector<float64_t> get_kernel_col(int32_t j)
00249         {
00250 
00251             SGVector<float64_t> col = SGVector<float64_t>(num_rhs);
00252 
00253             for (int32_t i=0; i!=num_rhs; i++)
00254                 col[i] = kernel(i,j);
00255 
00256             return col;
00257         }
00258 
00259 
00265         virtual SGVector<float64_t> get_kernel_row(int32_t i)
00266         {
00267             SGVector<float64_t> row = SGVector<float64_t>(num_lhs);
00268 
00269             for (int32_t j=0; j!=num_lhs; j++)
00270                 row[j] = kernel(i,j);
00271 
00272             return row;
00273         }
00274 
00279         template <class T>
00280         SGMatrix<T> get_kernel_matrix()
00281         {
00282             T* result = NULL;
00283 
00284             REQUIRE(has_features(), "no features assigned to kernel\n");
00285 
00286             int32_t m=get_num_vec_lhs();
00287             int32_t n=get_num_vec_rhs();
00288 
00289             int64_t total_num = int64_t(m)*n;
00290 
00291             // if lhs == rhs and sizes match assume k(i,j)=k(j,i)
00292             bool symmetric= (lhs && lhs==rhs && m==n);
00293 
00294             SG_DEBUG( "returning kernel matrix of size %dx%d\n", m, n);
00295 
00296             result=SG_MALLOC(T, total_num);
00297 
00298             int32_t num_threads=parallel->get_num_threads();
00299             if (num_threads < 2)
00300             {
00301                 K_THREAD_PARAM<T> params;
00302                 params.kernel=this;
00303                 params.result=result;
00304                 params.start=0;
00305                 params.end=m;
00306                 params.total_start=0;
00307                 params.total_end=total_num;
00308                 params.n=n;
00309                 params.m=m;
00310                 params.symmetric=symmetric;
00311                 params.verbose=true;
00312                 get_kernel_matrix_helper<T>((void*) &params);
00313             }
00314             else
00315             {
00316                 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00317                 K_THREAD_PARAM<T>* params = SG_MALLOC(K_THREAD_PARAM<T>, num_threads);
00318                 int64_t step= total_num/num_threads;
00319 
00320                 int32_t t;
00321 
00322                 num_threads--;
00323                 for (t=0; t<num_threads; t++)
00324                 {
00325                     params[t].kernel = this;
00326                     params[t].result = result;
00327                     params[t].start = compute_row_start(t*step, n, symmetric);
00328                     params[t].end = compute_row_start((t+1)*step, n, symmetric);
00329                     params[t].total_start=t*step;
00330                     params[t].total_end=(t+1)*step;
00331                     params[t].n=n;
00332                     params[t].m=m;
00333                     params[t].symmetric=symmetric;
00334                     params[t].verbose=false;
00335 
00336                     int code=pthread_create(&threads[t], NULL,
00337                             CKernel::get_kernel_matrix_helper<T>, (void*)&params[t]);
00338 
00339                     if (code != 0)
00340                     {
00341                         SG_WARNING("Thread creation failed (thread %d of %d) "
00342                                 "with error:'%s'\n",t, num_threads, strerror(code));
00343                         num_threads=t;
00344                         break;
00345                     }
00346                 }
00347 
00348                 params[t].kernel = this;
00349                 params[t].result = result;
00350                 params[t].start = compute_row_start(t*step, n, symmetric);
00351                 params[t].end = m;
00352                 params[t].total_start=t*step;
00353                 params[t].total_end=total_num;
00354                 params[t].n=n;
00355                 params[t].m=m;
00356                 params[t].symmetric=symmetric;
00357                 params[t].verbose=true;
00358                 get_kernel_matrix_helper<T>(&params[t]);
00359 
00360                 for (t=0; t<num_threads; t++)
00361                 {
00362                     if (pthread_join(threads[t], NULL) != 0)
00363                         SG_WARNING("pthread_join of thread %d/%d failed\n", t, num_threads);
00364                 }
00365 
00366                 SG_FREE(params);
00367                 SG_FREE(threads);
00368             }
00369 
00370             SG_DONE();
00371 
00372             return SGMatrix<T>(result,m,n,true);
00373         }
00374 
00375 
00386         virtual bool init(CFeatures* lhs, CFeatures* rhs);
00387 
00392         virtual bool set_normalizer(CKernelNormalizer* normalizer);
00393 
00398         virtual CKernelNormalizer* get_normalizer();
00399 
00403         virtual bool init_normalizer();
00404 
00411         virtual void cleanup();
00412 
00417         void load(CFile* loader);
00418 
00423         void save(CFile* writer);
00424 
00429         inline CFeatures* get_lhs() { SG_REF(lhs); return lhs; }
00430 
00435         inline CFeatures* get_rhs() { SG_REF(rhs); return rhs; }
00436 
00441         virtual int32_t get_num_vec_lhs()
00442         {
00443             return num_lhs;
00444         }
00445 
00450         virtual int32_t get_num_vec_rhs()
00451         {
00452             return num_rhs;
00453         }
00454 
00459         virtual bool has_features()
00460         {
00461             return lhs && rhs;
00462         }
00463 
00468         inline bool get_lhs_equals_rhs()
00469         {
00470             return lhs_equals_rhs;
00471         }
00472 
00474         virtual void remove_lhs_and_rhs();
00475 
00477         virtual void remove_lhs();
00478 
00480         virtual void remove_rhs();
00481 
00489         virtual EKernelType get_kernel_type()=0 ;
00490 
00497         virtual EFeatureType get_feature_type()=0;
00498 
00505         virtual EFeatureClass get_feature_class()=0;
00506 
00511         inline void set_cache_size(int32_t size)
00512         {
00513             cache_size = size;
00514 #ifdef USE_SVMLIGHT
00515             cache_reset();
00516 #endif //USE_SVMLIGHT
00517         }
00518 
00523         inline int32_t get_cache_size() { return cache_size; }
00524 
00525 #ifdef USE_SVMLIGHT
00526 
00527         inline void cache_reset() { resize_kernel_cache(cache_size); }
00528 
00533         inline int32_t get_max_elems_cache() { return kernel_cache.max_elems; }
00534 
00539         inline int32_t get_activenum_cache() { return kernel_cache.activenum; }
00540 
00548         void get_kernel_row(
00549             int32_t docnum, int32_t *active2dnum, float64_t *buffer,
00550             bool full_line=false);
00551 
00556         void cache_kernel_row(int32_t x);
00557 
00563         void cache_multiple_kernel_rows(int32_t* key, int32_t varnum);
00564 
00566         void kernel_cache_reset_lru();
00567 
00574         void kernel_cache_shrink(
00575             int32_t totdoc, int32_t num_shrink, int32_t *after);
00576 
00582         void resize_kernel_cache(KERNELCACHE_IDX size,
00583             bool regression_hack=false);
00584 
00589         inline void set_time(int32_t t)
00590         {
00591             kernel_cache.time=t;
00592         }
00593 
00599         inline int32_t kernel_cache_touch(int32_t cacheidx)
00600         {
00601             if(kernel_cache.index[cacheidx] != -1)
00602             {
00603                 kernel_cache.lru[kernel_cache.index[cacheidx]]=kernel_cache.time;
00604                 return(1);
00605             }
00606             return(0);
00607         }
00608 
00614         inline int32_t kernel_cache_check(int32_t cacheidx)
00615         {
00616             return(kernel_cache.index[cacheidx] >= 0);
00617         }
00618 
00623         inline int32_t kernel_cache_space_available()
00624         {
00625             return(kernel_cache.elems < kernel_cache.max_elems);
00626         }
00627 
00633         void kernel_cache_init(int32_t size, bool regression_hack=false);
00634 
00636         void kernel_cache_cleanup();
00637 
00638 #endif //USE_SVMLIGHT
00639 
00641         void list_kernel();
00642 
00648         inline bool has_property(EKernelProperty p) { return (properties & p) != 0; }
00649 
00653         virtual void clear_normal();
00654 
00660         virtual void add_to_normal(int32_t vector_idx, float64_t weight);
00661 
00666         inline EOptimizationType get_optimization_type() { return opt_type; }
00667 
00672         virtual inline void set_optimization_type(EOptimizationType t) { opt_type=t;}
00673 
00678         inline bool get_is_initialized() { return optimization_initialized; }
00679 
00687         virtual bool init_optimization(
00688             int32_t count, int32_t *IDX, float64_t *weights);
00689 
00694         virtual bool delete_optimization();
00695 
00701         bool init_optimization_svm(CSVM * svm) ;
00702 
00708         virtual float64_t compute_optimized(int32_t vector_idx);
00709 
00718         virtual void compute_batch(
00719             int32_t num_vec, int32_t* vec_idx, float64_t* target,
00720             int32_t num_suppvec, int32_t* IDX, float64_t* alphas,
00721             float64_t factor=1.0);
00722 
00727         inline float64_t get_combined_kernel_weight() { return combined_kernel_weight; }
00728 
00733         inline void set_combined_kernel_weight(float64_t nw) { combined_kernel_weight=nw; }
00734 
00739         virtual int32_t get_num_subkernels();
00740 
00746         virtual void compute_by_subkernel(
00747             int32_t vector_idx, float64_t * subkernel_contrib);
00748 
00754         virtual const float64_t* get_subkernel_weights(int32_t& num_weights);
00755 
00760         virtual void set_subkernel_weights(SGVector<float64_t> weights);
00761 
00770         virtual SGMatrix<float64_t> get_parameter_gradient(TParameter* param,
00771                 CSGObject* obj, index_t index = -1);
00772     protected:
00777         inline void set_property(EKernelProperty p)
00778         {
00779             properties |= p;
00780         }
00781 
00786         inline void unset_property(EKernelProperty p)
00787         {
00788             properties &= (properties | p) ^ p;
00789         }
00790 
00795         inline void set_is_initialized(bool p_init) { optimization_initialized=p_init; }
00796 
00807         virtual float64_t compute(int32_t x, int32_t y)=0;
00808 
00815         int32_t compute_row_start(int64_t offs, int32_t n, bool symmetric)
00816         {
00817             int32_t i_start;
00818 
00819             if (symmetric)
00820                 i_start=(int32_t) CMath::floor(n-CMath::sqrt(CMath::sq((float64_t) n)-offs));
00821             else
00822                 i_start=(int32_t) (offs/int64_t(n));
00823 
00824             return i_start;
00825         }
00826 
00831         template <class T>
00832         static void* get_kernel_matrix_helper(void* p)
00833         {
00834             K_THREAD_PARAM<T>* params= (K_THREAD_PARAM<T>*) p;
00835             int32_t i_start=params->start;
00836             int32_t i_end=params->end;
00837             CKernel* k=params->kernel;
00838             T* result=params->result;
00839             bool symmetric=params->symmetric;
00840             int32_t n=params->n;
00841             int32_t m=params->m;
00842             bool verbose=params->verbose;
00843             int64_t total_start=params->total_start;
00844             int64_t total_end=params->total_end;
00845             int64_t total=total_start;
00846 
00847             for (int32_t i=i_start; i<i_end; i++)
00848             {
00849                 int32_t j_start=0;
00850 
00851                 if (symmetric)
00852                     j_start=i;
00853 
00854                 for (int32_t j=j_start; j<n; j++)
00855                 {
00856                     float64_t v=k->kernel(i,j);
00857                     result[i+j*m]=v;
00858 
00859                     if (symmetric && i!=j)
00860                         result[j+i*m]=v;
00861 
00862                     if (verbose)
00863                     {
00864                         total++;
00865 
00866                         if (symmetric && i!=j)
00867                             total++;
00868 
00869                         if (total%100 == 0)
00870                             k->SG_PROGRESS(total, total_start, total_end);
00871 
00872                         if (CSignal::cancel_computations())
00873                             break;
00874                     }
00875                 }
00876 
00877             }
00878 
00879             return NULL;
00880         }
00881 
00890         virtual void load_serializable_post() throw (ShogunException);
00891 
00900         virtual void save_serializable_pre() throw (ShogunException);
00901 
00910         virtual void save_serializable_post() throw (ShogunException);
00915         virtual void register_params();
00916 
00917     private:
00920         void init();
00921 
00922 
00923 #ifdef USE_SVMLIGHT
00924 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00925 
00926         struct KERNEL_CACHE {
00928             int32_t   *index;
00930             int32_t   *invindex;
00932             int32_t   *active2totdoc;
00934             int32_t   *totdoc2active;
00936             int32_t   *lru;
00938             int32_t   *occu;
00940             int32_t   elems;
00942             int32_t   max_elems;
00944             int32_t   time;
00946             int32_t   activenum;
00947 
00949             KERNELCACHE_ELEM  *buffer;
00951             KERNELCACHE_IDX   buffsize;
00952         };
00953 
00955         struct S_KTHREAD_PARAM
00956         {
00958             CKernel* kernel;
00960             KERNEL_CACHE* kernel_cache;
00962             KERNELCACHE_ELEM** cache;
00964             int32_t* uncached_rows;
00966             int32_t num_uncached;
00968             uint8_t* needs_computation;
00970             int32_t start;
00972             int32_t end;
00974             int32_t num_vectors;
00975         };
00976 #endif // DOXYGEN_SHOULD_SKIP_THIS
00977 
00979         static void* cache_multiple_kernel_row_helper(void* p);
00980 
00982         void   kernel_cache_free(int32_t cacheidx);
00983         int32_t   kernel_cache_malloc();
00984         int32_t   kernel_cache_free_lru();
00985         KERNELCACHE_ELEM *kernel_cache_clean_and_malloc(int32_t cacheidx);
00986 #endif //USE_SVMLIGHT
00987 
00988 
00989     protected:
00991         int32_t cache_size;
00992 
00993 #ifdef USE_SVMLIGHT
00994 
00995         KERNEL_CACHE kernel_cache;
00996 #endif //USE_SVMLIGHT
00997 
01000         KERNELCACHE_ELEM* kernel_matrix;
01001 
01003         CFeatures* lhs;
01005         CFeatures* rhs;
01006 
01008         bool lhs_equals_rhs;
01009 
01011         int32_t num_lhs;
01013         int32_t num_rhs;
01014 
01016         float64_t combined_kernel_weight;
01017 
01019         bool optimization_initialized;
01023         EOptimizationType opt_type;
01024 
01026         uint64_t  properties;
01027 
01030         CKernelNormalizer* normalizer;
01031 };
01032 
01033 }
01034 #endif /* _KERNEL_H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation