SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2008 Vojtech Franc 00008 * Written (W) 2007-2009 Soeren Sonnenburg 00009 * Copyright (C) 2007-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _WDSVMOCAS_H___ 00013 #define _WDSVMOCAS_H___ 00014 00015 #include <shogun/lib/common.h> 00016 #include <shogun/machine/Machine.h> 00017 #include <shogun/classifier/svm/SVMOcas.h> 00018 #include <shogun/features/StringFeatures.h> 00019 #include <shogun/labels/Labels.h> 00020 00021 namespace shogun 00022 { 00023 template <class ST> class CStringFeatures; 00024 00026 class CWDSVMOcas : public CMachine 00027 { 00028 public: 00030 MACHINE_PROBLEM_TYPE(PT_BINARY); 00031 00033 CWDSVMOcas(); 00034 00039 CWDSVMOcas(E_SVM_TYPE type); 00040 00049 CWDSVMOcas( 00050 float64_t C, int32_t d, int32_t from_d, 00051 CStringFeatures<uint8_t>* traindat, CLabels* trainlab); 00052 virtual ~CWDSVMOcas(); 00053 00058 virtual inline EMachineType get_classifier_type() { return CT_WDSVMOCAS; } 00059 00066 inline void set_C(float64_t c_neg, float64_t c_pos) { C1=c_neg; C2=c_pos; } 00067 00072 inline float64_t get_C1() { return C1; } 00073 00078 inline float64_t get_C2() { return C2; } 00079 00084 inline void set_epsilon(float64_t eps) { epsilon=eps; } 00085 00090 inline float64_t get_epsilon() { return epsilon; } 00091 00096 inline void set_features(CStringFeatures<uint8_t>* feat) 00097 { 00098 SG_UNREF(features); 00099 SG_REF(feat); 00100 features=feat; 00101 } 00102 00107 inline CStringFeatures<uint8_t>* get_features() 00108 { 00109 SG_REF(features); 00110 return features; 00111 } 00112 00117 inline void set_bias_enabled(bool enable_bias) { use_bias=enable_bias; } 00118 00123 inline bool get_bias_enabled() { return use_bias; } 00124 00129 inline void set_bufsize(int32_t sz) { bufsize=sz; } 00130 00135 inline int32_t get_bufsize() { return bufsize; } 00136 00142 inline void set_degree(int32_t d, int32_t from_d) 00143 { 00144 degree=d; 00145 from_degree=from_d; 00146 } 00147 00152 inline int32_t get_degree() { return degree; } 00153 00160 virtual CBinaryLabels* apply_binary(CFeatures* data=NULL); 00161 00168 virtual CRegressionLabels* apply_regression(CFeatures* data=NULL); 00169 00175 inline virtual float64_t apply_one(int32_t num) 00176 { 00177 ASSERT(features); 00178 if (!wd_weights) 00179 set_wd_weights(); 00180 00181 int32_t len=0; 00182 float64_t sum=0; 00183 bool free_vec; 00184 uint8_t* vec=features->get_feature_vector(num, len, free_vec); 00185 //SG_INFO("len %d, string_length %d\n", len, string_length); 00186 ASSERT(len==string_length); 00187 00188 for (int32_t j=0; j<string_length; j++) 00189 { 00190 int32_t offs=w_dim_single_char*j; 00191 int32_t val=0; 00192 for (int32_t k=0; (j+k<string_length) && (k<degree); k++) 00193 { 00194 val=val*alphabet_size + vec[j+k]; 00195 sum+=wd_weights[k] * w[offs+val]; 00196 offs+=w_offsets[k]; 00197 } 00198 } 00199 features->free_feature_vector(vec, num, free_vec); 00200 return sum/normalization_const; 00201 } 00202 00204 inline void set_normalization_const() 00205 { 00206 ASSERT(features); 00207 normalization_const=0; 00208 for (int32_t i=0; i<degree; i++) 00209 normalization_const+=(string_length-i)*wd_weights[i]*wd_weights[i]; 00210 00211 normalization_const=CMath::sqrt(normalization_const); 00212 SG_DEBUG("normalization_const:%f\n", normalization_const); 00213 } 00214 00219 inline float64_t get_normalization_const() { return normalization_const; } 00220 00221 00222 protected: 00223 00228 SGVector<float64_t> apply_get_outputs(CFeatures* data); 00229 00234 int32_t set_wd_weights(); 00235 00244 static void compute_W( 00245 float64_t *sq_norm_W, float64_t *dp_WoldW, float64_t *alpha, 00246 uint32_t nSel, void* ptr ); 00247 00254 static float64_t update_W(float64_t t, void* ptr ); 00255 00261 static void* add_new_cut_helper(void* ptr); 00262 00271 static int add_new_cut( 00272 float64_t *new_col_H, uint32_t *new_cut, uint32_t cut_length, 00273 uint32_t nSel, void* ptr ); 00274 00280 static void* compute_output_helper(void* ptr); 00281 00287 static int compute_output( float64_t *output, void* ptr ); 00288 00295 static int sort( float64_t* vals, float64_t* data, uint32_t size); 00296 00298 static inline void print(ocas_return_value_T value) 00299 { 00300 return; 00301 } 00302 00303 00305 inline virtual const char* get_name() const { return "WDSVMOcas"; } 00306 00307 protected: 00316 virtual bool train_machine(CFeatures* data=NULL); 00317 00318 protected: 00320 CStringFeatures<uint8_t>* features; 00322 bool use_bias; 00324 int32_t bufsize; 00326 float64_t C1; 00328 float64_t C2; 00330 float64_t epsilon; 00332 E_SVM_TYPE method; 00333 00335 int32_t degree; 00337 int32_t from_degree; 00339 float32_t* wd_weights; 00341 int32_t num_vec; 00343 int32_t string_length; 00345 int32_t alphabet_size; 00346 00348 float64_t normalization_const; 00349 00351 float64_t bias; 00353 float64_t old_bias; 00355 int32_t* w_offsets; 00357 int32_t w_dim; 00359 int32_t w_dim_single_char; 00361 float32_t* w; 00363 float32_t* old_w; 00365 float64_t* lab; 00366 00368 float32_t** cuts; 00370 float64_t* cp_bias; 00371 }; 00372 } 00373 #endif