SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2008 Gunnar Raetsch 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/lib/common.h> 00012 #include <shogun/io/SGIO.h> 00013 #include <shogun/kernel/string/SimpleLocalityImprovedStringKernel.h> 00014 #include <shogun/features/Features.h> 00015 #include <shogun/features/StringFeatures.h> 00016 00017 using namespace shogun; 00018 00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel() 00020 : CStringKernel<char>() 00021 { 00022 SG_UNSTABLE("SimpleLocalityImprovedStringKernel"); 00023 init(); 00024 } 00025 00026 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel( 00027 int32_t size, int32_t l, int32_t id, int32_t od) 00028 : CStringKernel<char>(size) 00029 { 00030 SG_UNSTABLE("SimpleLocalityImprovedStringKernel"); 00031 init(); 00032 00033 length=l; 00034 inner_degree=id; 00035 outer_degree=od; 00036 } 00037 00038 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel( 00039 CStringFeatures<char>* l, CStringFeatures<char>* r, 00040 int32_t len, int32_t id, int32_t od) 00041 : CStringKernel<char>() 00042 { 00043 SG_UNSTABLE("SimpleLocalityImprovedStringKernel"); 00044 init(); 00045 00046 length=len; 00047 inner_degree=id; 00048 outer_degree=od; 00049 00050 init(l, r); 00051 } 00052 00053 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel() 00054 { 00055 cleanup(); 00056 } 00057 00058 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r) 00059 { 00060 bool result = CStringKernel<char>::init(l,r); 00061 00062 if (!result) 00063 return false; 00064 const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length(); 00065 const int32_t PYRAL = 2 * length - 1; // total window length 00066 const int32_t pyra_len = num_features-PYRAL+1; 00067 const int32_t pyra_len2 = (int32_t) pyra_len/2; 00068 00069 SG_FREE(pyramid_weights); 00070 00071 pyramid_weights = SG_MALLOC(float64_t, pyra_len); 00072 num_pyramid_weights=pyra_len; 00073 00074 SG_INFO("initializing pyramid weights: size=%ld length=%i\n", 00075 num_features, length); 00076 00077 float64_t PYRAL_pot; 00078 int32_t DEGREE1_1 = (inner_degree & 0x1)==0; 00079 int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0; 00080 int32_t DEGREE1_2 = (inner_degree & 0x2)!=0; 00081 int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0; 00082 int32_t DEGREE1_4 = (inner_degree & 0x4)!=0; 00083 { 00084 float64_t PYRAL_ = PYRAL; 00085 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_; 00086 if (DEGREE1_1n) 00087 { 00088 PYRAL_ *= PYRAL_; 00089 if (DEGREE1_2) 00090 PYRAL_pot *= PYRAL_; 00091 if (DEGREE1_3) 00092 { 00093 PYRAL_ *= PYRAL_; 00094 if (DEGREE1_4) 00095 PYRAL_pot *= PYRAL_; 00096 } 00097 } 00098 } 00099 00100 { 00101 int32_t j; 00102 for (j = 0; j < pyra_len; j++) 00103 pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len); 00104 for (j = 0; j < pyra_len; j++) 00105 pyramid_weights[j] /= PYRAL_pot; 00106 } 00107 00108 return init_normalizer(); 00109 } 00110 00111 void CSimpleLocalityImprovedStringKernel::cleanup() 00112 { 00113 SG_FREE(pyramid_weights); 00114 pyramid_weights = NULL; 00115 num_pyramid_weights = 0; 00116 00117 CKernel::cleanup(); 00118 } 00119 00120 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1, 00121 const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH, 00122 const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra) 00123 { 00124 const int32_t PYRAL = 2*NTWIDTH-1; // total window length 00125 int32_t pyra_len, pyra_len2; 00126 float64_t pot, PYRAL_pot; 00127 float64_t sum; 00128 int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0; 00129 int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0; 00130 int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0; 00131 int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0; 00132 int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0; 00133 { 00134 float64_t PYRAL_ = PYRAL; 00135 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_; 00136 if (DEGREE1_1n) 00137 { 00138 PYRAL_ *= PYRAL_; 00139 if (DEGREE1_2) PYRAL_pot *= PYRAL_; 00140 if (DEGREE1_3) 00141 { 00142 PYRAL_ *= PYRAL_; 00143 if (DEGREE1_4) PYRAL_pot *= PYRAL_; 00144 } 00145 } 00146 } 00147 00148 ASSERT((DEGREE1 & ~0x7) == 0); 00149 ASSERT((DEGREE2 & ~0x7) == 0); 00150 00151 pyra_len = NOF_NTS-PYRAL+1; 00152 pyra_len2 = (int32_t) pyra_len/2; 00153 { 00154 int32_t j; 00155 for (j = 0; j < pyra_len; j++) 00156 pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len); 00157 for (j = 0; j < pyra_len; j++) 00158 pyra[j] /= PYRAL_pot; 00159 } 00160 00161 register int32_t conv; 00162 register int32_t i; 00163 register int32_t j; 00164 00165 sum = 0.0; 00166 conv = 0; 00167 for (j = 0; j < PYRAL; j++) 00168 conv += (x1[j] == x2[j]) ? 1 : 0; 00169 00170 for (i = 0; i < NOF_NTS-PYRAL+1; i++) 00171 { 00172 register float64_t pot2; 00173 if (i>0) 00174 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) - 00175 ((x1[i-1] == x2[i-1]) ? 1 : 0); 00176 { /* potencing of conv -- float64_t is faster*/ 00177 register float64_t conv2 = conv; 00178 pot2 = (DEGREE1_1) ? 1.0 : conv2; 00179 if (DEGREE1_1n) 00180 { 00181 conv2 *= conv2; 00182 if (DEGREE1_2) 00183 pot2 *= conv2; 00184 if (DEGREE1_3 && DEGREE1_4) 00185 pot2 *= conv2*conv2; 00186 } 00187 } 00188 sum += pot2*pyra[i]; 00189 } 00190 00191 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum; 00192 if ((DEGREE2 & ~0x1) != 0) 00193 { 00194 sum *= sum; 00195 if ((DEGREE2 & 0x2) != 0) 00196 pot *= sum; 00197 if ((DEGREE2 & ~0x3) != 0) 00198 { 00199 sum *= sum; 00200 if ((DEGREE2 & 0x4) != 0) 00201 pot *= sum; 00202 } 00203 } 00204 return pot; 00205 } 00206 00207 float64_t CSimpleLocalityImprovedStringKernel::compute( 00208 int32_t idx_a, int32_t idx_b) 00209 { 00210 int32_t alen, blen; 00211 bool free_avec, free_bvec; 00212 00213 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec); 00214 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec); 00215 00216 // can only deal with strings of same length 00217 ASSERT(alen==blen); 00218 00219 float64_t dpt; 00220 00221 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights); 00222 dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree); 00223 00224 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec); 00225 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec); 00226 return (float64_t) dpt; 00227 } 00228 00229 void CSimpleLocalityImprovedStringKernel::init() 00230 { 00231 length = 3; 00232 inner_degree = 3; 00233 outer_degree = 1; 00234 pyramid_weights=NULL; 00235 num_pyramid_weights=0; 00236 00237 SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE); 00238 SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE); 00239 SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE); 00240 00241 m_parameters->add_vector(&pyramid_weights, &num_pyramid_weights, 00242 "pyramid_weights", "Pyramid weights."); 00243 }