SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _WEIGHTEDDEGREESTRINGKERNEL_H___ 00013 #define _WEIGHTEDDEGREESTRINGKERNEL_H___ 00014 00015 #include <shogun/lib/common.h> 00016 #include <shogun/lib/Trie.h> 00017 #include <shogun/kernel/string/StringKernel.h> 00018 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h> 00019 #include <shogun/features/StringFeatures.h> 00020 00021 namespace shogun 00022 { 00023 00025 enum EWDKernType 00026 { 00027 E_WD=0, 00028 E_EXTERNAL=1, 00029 00030 E_BLOCK_CONST=2, 00031 E_BLOCK_LINEAR=3, 00032 E_BLOCK_SQPOLY=4, 00033 E_BLOCK_CUBICPOLY=5, 00034 E_BLOCK_EXP=6, 00035 E_BLOCK_LOG=7, 00036 }; 00037 00038 00053 class CWeightedDegreeStringKernel: public CStringKernel<char> 00054 { 00055 public: 00056 00060 CWeightedDegreeStringKernel(); 00061 00062 00068 CWeightedDegreeStringKernel(int32_t degree, EWDKernType type=E_WD); 00069 00074 CWeightedDegreeStringKernel(SGVector<float64_t> weights); 00075 00082 CWeightedDegreeStringKernel( 00083 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t degree); 00084 00085 virtual ~CWeightedDegreeStringKernel(); 00086 00093 virtual bool init(CFeatures* l, CFeatures* r); 00094 00096 virtual void cleanup(); 00097 00105 EWDKernType get_type() const 00106 { 00107 return type; 00108 } 00109 00114 virtual EKernelType get_kernel_type() { return K_WEIGHTEDDEGREE; } 00115 00120 virtual const char* get_name() const { 00121 return "WeightedDegreeStringKernel"; 00122 } 00123 00131 inline virtual bool init_optimization( 00132 int32_t count, int32_t *IDX, float64_t* alphas) 00133 { 00134 return init_optimization(count, IDX, alphas, -1); 00135 } 00136 00147 virtual bool init_optimization( 00148 int32_t count, int32_t *IDX, float64_t* alphas, int32_t tree_num); 00149 00154 virtual bool delete_optimization(); 00155 00161 virtual float64_t compute_optimized(int32_t idx) 00162 { 00163 if (get_is_initialized()) 00164 return compute_by_tree(idx); 00165 00166 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n"); 00167 return 0; 00168 } 00169 00174 static void* compute_batch_helper(void* p); 00175 00186 virtual void compute_batch( 00187 int32_t num_vec, int32_t* vec_idx, float64_t* target, 00188 int32_t num_suppvec, int32_t* IDX, float64_t* alphas, 00189 float64_t factor=1.0); 00190 00194 inline virtual void clear_normal() 00195 { 00196 if (get_is_initialized()) 00197 { 00198 00199 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00200 SG_ERROR("not implemented"); 00201 00202 tries->delete_trees(max_mismatch==0); 00203 set_is_initialized(false); 00204 } 00205 } 00206 00212 inline virtual void add_to_normal(int32_t idx, float64_t weight) 00213 { 00214 00215 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00216 SG_ERROR("not implemented"); 00217 00218 if (max_mismatch==0) 00219 add_example_to_tree(idx, weight); 00220 else 00221 add_example_to_tree_mismatch(idx, weight); 00222 00223 set_is_initialized(true); 00224 } 00225 00230 inline virtual int32_t get_num_subkernels() 00231 { 00232 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00233 return ((CMultitaskKernelMklNormalizer*)normalizer)->get_num_betas(); 00234 if (position_weights!=NULL) 00235 return (int32_t) ceil(1.0*seq_length/mkl_stepsize) ; 00236 if (length==0) 00237 return (int32_t) ceil(1.0*get_degree()/mkl_stepsize); 00238 return (int32_t) ceil(1.0*get_degree()*length/mkl_stepsize) ; 00239 } 00240 00246 inline void compute_by_subkernel( 00247 int32_t idx, float64_t * subkernel_contrib) 00248 { 00249 00250 if (get_is_initialized()) 00251 { 00252 00253 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00254 SG_ERROR("not implemented"); 00255 00256 compute_by_tree(idx, subkernel_contrib); 00257 return ; 00258 } 00259 00260 SG_ERROR( "CWeightedDegreeStringKernel optimization not initialized\n"); 00261 } 00262 00268 inline const float64_t* get_subkernel_weights(int32_t& num_weights) 00269 { 00270 00271 num_weights = get_num_subkernels(); 00272 00273 SG_FREE(weights_buffer); 00274 weights_buffer = SG_MALLOC(float64_t, num_weights); 00275 00276 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00277 for (int32_t i=0; i<num_weights; i++) 00278 weights_buffer[i] = ((CMultitaskKernelMklNormalizer*)normalizer)->get_beta(i); 00279 else if (position_weights!=NULL) 00280 for (int32_t i=0; i<num_weights; i++) 00281 weights_buffer[i] = position_weights[i*mkl_stepsize]; 00282 else 00283 for (int32_t i=0; i<num_weights; i++) 00284 weights_buffer[i] = weights[i*mkl_stepsize]; 00285 00286 return weights_buffer; 00287 } 00288 00293 virtual void set_subkernel_weights(SGVector<float64_t> w) 00294 { 00295 float64_t* weights2=w.vector; 00296 int32_t num_weights2=w.vlen; 00297 int32_t num_weights = get_num_subkernels(); 00298 if (num_weights!=num_weights2) 00299 SG_ERROR( "number of weights do not match\n"); 00300 00301 00302 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00303 for (int32_t i=0; i<num_weights; i++) 00304 ((CMultitaskKernelMklNormalizer*)normalizer)->set_beta(i, weights2[i]); 00305 else if (position_weights!=NULL) 00306 { 00307 for (int32_t i=0; i<num_weights; i++) 00308 { 00309 for (int32_t j=0; j<mkl_stepsize; j++) 00310 { 00311 if (i*mkl_stepsize+j<seq_length) 00312 position_weights[i*mkl_stepsize+j] = weights2[i]; 00313 } 00314 } 00315 } 00316 else if (length==0) 00317 { 00318 for (int32_t i=0; i<num_weights; i++) 00319 { 00320 for (int32_t j=0; j<mkl_stepsize; j++) 00321 { 00322 if (i*mkl_stepsize+j<get_degree()) 00323 weights[i*mkl_stepsize+j] = weights2[i]; 00324 } 00325 } 00326 } 00327 else 00328 { 00329 for (int32_t i=0; i<num_weights; i++) 00330 { 00331 for (int32_t j=0; j<mkl_stepsize; j++) 00332 { 00333 if (i*mkl_stepsize+j<get_degree()*length) 00334 weights[i*mkl_stepsize+j] = weights2[i]; 00335 } 00336 } 00337 } 00338 } 00339 00344 virtual bool set_normalizer(CKernelNormalizer* normalizer_) { 00345 00346 if (normalizer_ && strcmp(normalizer_->get_name(),"MultitaskKernelTreeNormalizer")==0) { 00347 unset_property(KP_LINADD); 00348 unset_property(KP_BATCHEVALUATION); 00349 } 00350 else 00351 { 00352 set_property(KP_LINADD); 00353 set_property(KP_BATCHEVALUATION); 00354 } 00355 00356 00357 return CStringKernel<char>::set_normalizer(normalizer_); 00358 00359 } 00360 00361 // other kernel tree operations 00367 float64_t *compute_abs_weights(int32_t & len); 00368 00375 void compute_by_tree(int32_t idx, float64_t *LevelContrib); 00376 00381 bool is_tree_initialized() { return tree_initialized; } 00382 00388 inline float64_t *get_degree_weights(int32_t& d, int32_t& len) 00389 { 00390 d=degree; 00391 len=length; 00392 return weights; 00393 } 00394 00400 inline float64_t *get_weights(int32_t& num_weights) 00401 { 00402 00403 if (normalizer && normalizer->get_normalizer_type()==N_MULTITASK) 00404 SG_ERROR("not implemented"); 00405 00406 if (position_weights!=NULL) 00407 { 00408 num_weights = seq_length ; 00409 return position_weights ; 00410 } 00411 if (length==0) 00412 num_weights = degree ; 00413 else 00414 num_weights = degree*length ; 00415 return weights; 00416 } 00417 00423 inline float64_t *get_position_weights(int32_t& len) 00424 { 00425 len=seq_length; 00426 return position_weights; 00427 } 00428 00434 bool set_wd_weights_by_type(EWDKernType type); 00435 00440 inline void set_wd_weights(SGVector<float64_t> new_weights) 00441 { 00442 SGMatrix<float64_t> matrix = SGMatrix<float64_t>(new_weights.vector,new_weights.vlen,0); 00443 set_weights(matrix); 00444 matrix.matrix = NULL; 00445 } 00446 00451 bool set_weights(SGMatrix<float64_t> new_weights); 00452 00459 bool set_position_weights(float64_t* pws, int32_t len); 00460 00465 bool init_block_weights(); 00466 00471 bool init_block_weights_from_wd(); 00472 00477 bool init_block_weights_from_wd_external(); 00478 00483 bool init_block_weights_const(); 00484 00489 bool init_block_weights_linear(); 00490 00495 bool init_block_weights_sqpoly(); 00496 00501 bool init_block_weights_cubicpoly(); 00502 00507 bool init_block_weights_exp(); 00508 00513 bool init_block_weights_log(); 00514 00519 bool delete_position_weights() 00520 { 00521 SG_FREE(position_weights); 00522 position_weights=NULL; 00523 return true; 00524 } 00525 00531 bool set_max_mismatch(int32_t max); 00532 00537 inline int32_t get_max_mismatch() const { return max_mismatch; } 00538 00544 inline bool set_degree(int32_t deg) { degree=deg; return true; } 00545 00550 inline int32_t get_degree() const { return degree; } 00551 00557 inline bool set_use_block_computation(bool block) 00558 { 00559 block_computation=block; 00560 return true; 00561 } 00562 00567 inline bool get_use_block_computation() { return block_computation; } 00568 00574 inline bool set_mkl_stepsize(int32_t step) 00575 { 00576 if (step<1) 00577 SG_ERROR("Stepsize must be a positive integer\n"); 00578 mkl_stepsize=step; 00579 return true; 00580 } 00581 00586 inline int32_t get_mkl_stepsize() { return mkl_stepsize; } 00587 00593 inline bool set_which_degree(int32_t which) 00594 { 00595 which_degree=which; 00596 return true; 00597 } 00598 00603 inline int32_t get_which_degree() { return which_degree; } 00604 00605 protected: 00607 void create_empty_tries(); 00608 00614 void add_example_to_tree(int32_t idx, float64_t weight); 00615 00622 void add_example_to_single_tree( 00623 int32_t idx, float64_t weight, int32_t tree_num); 00624 00630 void add_example_to_tree_mismatch(int32_t idx, float64_t weight); 00631 00638 void add_example_to_single_tree_mismatch( 00639 int32_t idx, float64_t weight, int32_t tree_num); 00640 00646 float64_t compute_by_tree(int32_t idx); 00647 00656 float64_t compute(int32_t idx_a, int32_t idx_b); 00657 00666 float64_t compute_with_mismatch( 00667 char* avec, int32_t alen, char* bvec, int32_t blen); 00668 00677 float64_t compute_without_mismatch( 00678 char* avec, int32_t alen, char* bvec, int32_t blen); 00679 00688 float64_t compute_without_mismatch_matrix( 00689 char* avec, int32_t alen, char* bvec, int32_t blen); 00690 00699 float64_t compute_using_block(char* avec, int32_t alen, 00700 char* bvec, int32_t blen); 00701 00703 virtual void remove_lhs(); 00704 00705 private: 00708 void init(); 00709 00710 protected: 00714 float64_t* weights; 00716 int32_t weights_degree; 00718 int32_t weights_length; 00719 00720 00722 float64_t* position_weights; 00724 int32_t position_weights_len; 00726 float64_t* weights_buffer; 00728 int32_t mkl_stepsize; 00730 int32_t degree; 00732 int32_t length; 00733 00735 int32_t max_mismatch; 00737 int32_t seq_length; 00738 00740 bool initialized; 00741 00743 bool block_computation; 00744 00746 float64_t* block_weights; 00748 EWDKernType type; 00750 int32_t which_degree; 00751 00753 CTrie<DNATrie>* tries; 00754 00756 bool tree_initialized; 00757 00759 CAlphabet* alphabet; 00760 }; 00761 00762 } 00763 00764 #endif /* _WEIGHTEDDEGREESTRINGKERNEL_H__ */