SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #ifndef _SPECTRUMRBFKERNEL_H___ 00013 #define _SPECTRUMRBFKERNEL_H___ 00014 00015 #include <shogun/lib/common.h> 00016 #include <shogun/lib/Trie.h> 00017 #include <shogun/kernel/string/StringKernel.h> 00018 #include <shogun/features/StringFeatures.h> 00019 00020 00021 #include <shogun/lib/DynamicArray.h> 00022 00023 #include <vector> // profile 00024 #include <string> // profile 00025 00026 namespace shogun 00027 { 00028 00030 class CSpectrumRBFKernel: public CStringKernel<char> 00031 { 00032 public: 00034 CSpectrumRBFKernel(); 00035 00042 CSpectrumRBFKernel(int32_t size, float64_t* AA_matrix, int32_t degree, float64_t width); 00043 00053 CSpectrumRBFKernel( 00054 CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t size, float64_t* AA_matrix, int32_t degree, float64_t width); 00055 00057 virtual ~CSpectrumRBFKernel(); 00058 00065 virtual bool init(CFeatures* l, CFeatures* r); 00066 00068 virtual void cleanup(); 00069 00074 int32_t get_degree() const 00075 { 00076 return degree; 00077 } 00078 00083 virtual EKernelType get_kernel_type() { return K_SPECTRUMRBF; } 00084 00089 virtual const char* get_name() const { return "SpectrumRBFKernel"; } 00090 00096 inline bool set_degree(int32_t deg) { degree=deg; return true; } 00097 00102 inline int32_t get_degree() { return degree; } 00103 00107 bool set_AA_matrix(float64_t* AA_matrix_); 00108 00109 protected: 00110 00117 float64_t AA_helper(const char* path, const int degree, const char* joint_seq, unsigned int index); 00118 00120 void read_profiles_and_sequences(); 00121 00130 float64_t compute(int32_t idx_a, int32_t idx_b); 00131 00133 virtual void remove_lhs(); 00135 virtual void register_param(); 00137 void register_alphabet(); 00138 00139 00140 protected: 00142 CAlphabet* alphabet; 00144 int32_t degree; 00146 int32_t max_mismatch; 00148 float64_t* AA_matrix ; 00150 int32_t AA_matrix_length; 00152 float64_t width; 00153 00154 //int32_t* aa_to_index; // profile 00155 00156 //double background[20]; // profile 00158 std::vector< std::vector<float64_t> > profiles; //profile 00160 std::vector<std::string> sequence_labels; // profile 00162 SGString<char>* sequences; // profile 00164 CStringFeatures<char>* string_features; 00166 int32_t nof_sequences; 00168 int32_t max_sequence_length; 00169 00171 bool initialized; 00173 CDynamicArray<float64_t> kernel_matrix; // 2d 00175 int32_t target_letter_0; 00176 00177 private: 00178 void init(); 00179 }; 00180 00181 } 00182 00183 00184 00185 #endif /* _SPECTRUMMISMATCHRBFKERNEL_H__ */