SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 #include <shogun/base/SGObject.h> 00012 #include <shogun/lib/common.h> 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/mathematics/lapack.h> 00015 #include <shogun/io/SGIO.h> 00016 00017 #include <stdio.h> 00018 #include <stdlib.h> 00019 #include <math.h> 00020 00021 using namespace shogun; 00022 00023 #ifdef USE_LOGCACHE 00024 #ifdef USE_HMMDEBUG 00025 #define MAX_LOG_TABLE_SIZE 10*1024*1024 00026 #define LOG_TABLE_PRECISION 1e-6 00027 #else //USE_HMMDEBUG 00028 #define MAX_LOG_TABLE_SIZE 123*1024*1024 00029 #define LOG_TABLE_PRECISION 1e-15 00030 #endif //USE_HMMDEBUG 00031 int32_t CMath::LOGACCURACY = 0; // 100000 steps per integer 00032 #endif // USE_LOGCACHE 00033 00034 int32_t CMath::LOGRANGE = 0; // range for logtable: log(1+exp(x)) -25 <= x <= 0 00035 00036 const float64_t CMath::INFTY = -log(0.0); // infinity 00037 const float64_t CMath::ALMOST_INFTY = +1e+20; //a large number 00038 const float64_t CMath::ALMOST_NEG_INFTY = -1000; 00039 const float64_t CMath::PI=M_PI; 00040 const float64_t CMath::MACHINE_EPSILON=5E-16; 00041 const float64_t CMath::MAX_REAL_NUMBER=1E300; 00042 const float64_t CMath::MIN_REAL_NUMBER=1E-300; 00043 00044 #ifdef USE_LOGCACHE 00045 float64_t* CMath::logtable = NULL; 00046 #endif 00047 char* CMath::rand_state = NULL; 00048 uint32_t CMath::seed = 0; 00049 00050 CMath::CMath() 00051 : CSGObject() 00052 { 00053 CMath::rand_state=SG_MALLOC(char, RNG_SEED_SIZE); 00054 init_random(); 00055 00056 #ifdef USE_LOGCACHE 00057 LOGRANGE=CMath::determine_logrange(); 00058 LOGACCURACY=CMath::determine_logaccuracy(LOGRANGE); 00059 CMath::logtable=SG_MALLOC(float64_t, LOGRANGE*LOGACCURACY); 00060 init_log_table(); 00061 #else 00062 int32_t i=0; 00063 while ((float64_t)log(1+((float64_t)exp(-float64_t(i))))) 00064 i++; 00065 00066 LOGRANGE=i; 00067 #endif 00068 } 00069 00070 CMath::~CMath() 00071 { 00072 SG_FREE(CMath::rand_state); 00073 CMath::rand_state=NULL; 00074 #ifdef USE_LOGCACHE 00075 SG_FREE(CMath::logtable); 00076 CMath::logtable=NULL; 00077 #endif 00078 } 00079 00080 #ifdef USE_LOGCACHE 00081 int32_t CMath::determine_logrange() 00082 { 00083 int32_t i; 00084 float64_t acc=0; 00085 for (i=0; i<50; i++) 00086 { 00087 acc=((float64_t)log(1+((float64_t)exp(-float64_t(i))))); 00088 if (acc<=(float64_t)LOG_TABLE_PRECISION) 00089 break; 00090 } 00091 00092 SG_SINFO( "determined range for x in table log(1+exp(-x)) is:%d (error:%G)\n",i,acc); 00093 return i; 00094 } 00095 00096 int32_t CMath::determine_logaccuracy(int32_t range) 00097 { 00098 range=MAX_LOG_TABLE_SIZE/range/((int)sizeof(float64_t)); 00099 SG_SINFO( "determined accuracy for x in table log(1+exp(-x)) is:%d (error:%G)\n",range,1.0/(double) range); 00100 return range; 00101 } 00102 00103 //init log table of form log(1+exp(x)) 00104 void CMath::init_log_table() 00105 { 00106 for (int32_t i=0; i< LOGACCURACY*LOGRANGE; i++) 00107 logtable[i]=log(1+exp(float64_t(-i)/float64_t(LOGACCURACY))); 00108 } 00109 #endif 00110 00111 void CMath::sort(int32_t *a, int32_t cols, int32_t sort_col) 00112 { 00113 int32_t changed=1; 00114 if (a[0]==-1) return ; 00115 while (changed) 00116 { 00117 changed=0; int32_t i=0 ; 00118 while ((a[(i+1)*cols]!=-1) && (a[(i+1)*cols+1]!=-1)) // to be sure 00119 { 00120 if (a[i*cols+sort_col]>a[(i+1)*cols+sort_col]) 00121 { 00122 for (int32_t j=0; j<cols; j++) 00123 CMath::swap(a[i*cols+j],a[(i+1)*cols+j]) ; 00124 changed=1 ; 00125 } ; 00126 i++ ; 00127 } ; 00128 } ; 00129 } 00130 00131 void CMath::sort(float64_t *a, int32_t* idx, int32_t N) 00132 { 00133 int32_t changed=1; 00134 while (changed) 00135 { 00136 changed=0; 00137 for (int32_t i=0; i<N-1; i++) 00138 { 00139 if (a[i]>a[i+1]) 00140 { 00141 swap(a[i],a[i+1]) ; 00142 swap(idx[i],idx[i+1]) ; 00143 changed=1 ; 00144 } ; 00145 } ; 00146 } ; 00147 00148 } 00149 00150 float64_t CMath::Align( 00151 char* seq1, char* seq2, int32_t l1, int32_t l2, float64_t gapCost) 00152 { 00153 float64_t actCost=0 ; 00154 int32_t i1, i2 ; 00155 float64_t* const gapCosts1 = SG_MALLOC(float64_t, l1 ); 00156 float64_t* const gapCosts2 = SG_MALLOC(float64_t, l2 ); 00157 float64_t* costs2_0 = SG_MALLOC(float64_t, l2 + 1 ); 00158 float64_t* costs2_1 = SG_MALLOC(float64_t, l2 + 1 ); 00159 00160 // initialize borders 00161 for( i1 = 0; i1 < l1; ++i1 ) { 00162 gapCosts1[ i1 ] = gapCost * i1; 00163 } 00164 costs2_1[ 0 ] = 0; 00165 for( i2 = 0; i2 < l2; ++i2 ) { 00166 gapCosts2[ i2 ] = gapCost * i2; 00167 costs2_1[ i2+1 ] = costs2_1[ i2 ] + gapCosts2[ i2 ]; 00168 } 00169 // compute alignment 00170 for( i1 = 0; i1 < l1; ++i1 ) { 00171 swap( costs2_0, costs2_1 ); 00172 actCost = costs2_0[ 0 ] + gapCosts1[ i1 ]; 00173 costs2_1[ 0 ] = actCost; 00174 for( i2 = 0; i2 < l2; ++i2 ) { 00175 const float64_t actMatch = costs2_0[ i2 ] + ( seq1[i1] == seq2[i2] ); 00176 const float64_t actGap1 = costs2_0[ i2+1 ] + gapCosts1[ i1 ]; 00177 const float64_t actGap2 = actCost + gapCosts2[ i2 ]; 00178 const float64_t actGap = min( actGap1, actGap2 ); 00179 actCost = min( actMatch, actGap ); 00180 costs2_1[ i2+1 ] = actCost; 00181 } 00182 } 00183 00184 SG_FREE(gapCosts1); 00185 SG_FREE(gapCosts2); 00186 SG_FREE(costs2_0); 00187 SG_FREE(costs2_1); 00188 00189 // return the final cost 00190 return actCost; 00191 }