SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Sergey Lisitsyn 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/converter/LaplacianEigenmaps.h> 00012 #include <shogun/converter/EmbeddingConverter.h> 00013 #ifdef HAVE_LAPACK 00014 #include <shogun/mathematics/arpack.h> 00015 #include <shogun/mathematics/lapack.h> 00016 #include <shogun/lib/FibonacciHeap.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/io/SGIO.h> 00019 #include <shogun/distance/Distance.h> 00020 #include <shogun/lib/Signal.h> 00021 00022 using namespace shogun; 00023 00024 CLaplacianEigenmaps::CLaplacianEigenmaps() : 00025 CEmbeddingConverter() 00026 { 00027 m_k = 3; 00028 m_tau = 1.0; 00029 00030 init(); 00031 } 00032 00033 void CLaplacianEigenmaps::init() 00034 { 00035 SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE); 00036 SG_ADD(&m_tau, "tau", "heat distribution coefficient", MS_AVAILABLE); 00037 } 00038 00039 CLaplacianEigenmaps::~CLaplacianEigenmaps() 00040 { 00041 } 00042 00043 void CLaplacianEigenmaps::set_k(int32_t k) 00044 { 00045 ASSERT(k>0); 00046 m_k = k; 00047 } 00048 00049 int32_t CLaplacianEigenmaps::get_k() const 00050 { 00051 return m_k; 00052 } 00053 00054 void CLaplacianEigenmaps::set_tau(float64_t tau) 00055 { 00056 m_tau = tau; 00057 } 00058 00059 float64_t CLaplacianEigenmaps::get_tau() const 00060 { 00061 return m_tau; 00062 } 00063 00064 const char* CLaplacianEigenmaps::get_name() const 00065 { 00066 return "LaplacianEigenmaps"; 00067 }; 00068 00069 CFeatures* CLaplacianEigenmaps::apply(CFeatures* features) 00070 { 00071 // shorthand for simplefeatures 00072 SG_REF(features); 00073 00074 // get dimensionality and number of vectors of data 00075 int32_t N = features->get_num_vectors(); 00076 ASSERT(m_k<N); 00077 ASSERT(m_target_dim<N); 00078 00079 // compute distance matrix 00080 ASSERT(m_distance); 00081 m_distance->init(features,features); 00082 CDenseFeatures<float64_t>* embedding = embed_distance(m_distance,features); 00083 m_distance->remove_lhs_and_rhs(); 00084 SG_UNREF(features); 00085 return (CFeatures*)embedding; 00086 } 00087 00088 CDenseFeatures<float64_t>* CLaplacianEigenmaps::embed_distance(CDistance* distance, CFeatures* features) 00089 { 00090 int32_t i,j; 00091 SGMatrix<float64_t> W_sgmatrix = distance->get_distance_matrix(); 00092 ASSERT(W_sgmatrix.num_rows==W_sgmatrix.num_cols); 00093 int32_t N = W_sgmatrix.num_rows; 00094 // shorthand 00095 float64_t* W_matrix = W_sgmatrix.matrix; 00096 00097 // init heap to use 00098 CFibonacciHeap* heap = new CFibonacciHeap(N); 00099 float64_t tmp; 00100 // for each object 00101 for (i=0; i<N; i++) 00102 { 00103 // fill heap 00104 for (j=0; j<N; j++) 00105 heap->insert(j,W_matrix[i*N+j]); 00106 00107 // rearrange heap with extracting ith object itself 00108 heap->extract_min(tmp); 00109 00110 // extract nearest neighbors, takes ~O(k*log n), and change sign for them 00111 for (j=0; j<m_k; j++) 00112 W_matrix[i*N+heap->extract_min(tmp)] *= -1.0; 00113 00114 // remove all 'positive' distances and change 'negative' ones to positive 00115 for (j=0; j<N; j++) 00116 { 00117 if (W_matrix[i*N+j]>0.0) 00118 W_matrix[i*N+j] = 0.0; 00119 else 00120 W_matrix[i*N+j] *= -1.0; 00121 } 00122 00123 // clear heap to reuse 00124 heap->clear(); 00125 } 00126 delete heap; 00127 // make distance matrix symmetric with mutual kNN relation 00128 for (i=0; i<N; i++) 00129 { 00130 // check only upper triangle 00131 for (j=i; j<N; j++) 00132 { 00133 // make kNN relation symmetric 00134 if (W_matrix[i*N+j]!=0.0 || W_matrix[j*N+i]==0.0) 00135 { 00136 W_matrix[j*N+i] = W_matrix[i*N+j]; 00137 } 00138 if (W_matrix[j*N+i]!=0.0 || W_matrix[i*N+j]==0.0) 00139 { 00140 W_matrix[i*N+j] = W_matrix[j*N+i]; 00141 } 00142 00143 if (W_matrix[i*N+j] != 0.0) 00144 { 00145 // compute heat, exp(-d^2/tau) 00146 tmp = CMath::exp(-CMath::sq(W_matrix[i*N+j])/m_tau); 00147 W_matrix[i*N+j] = tmp; 00148 W_matrix[j*N+i] = tmp; 00149 } 00150 } 00151 } 00152 00153 // compute D 00154 CDenseFeatures<float64_t>* embedding = construct_embedding(features,W_sgmatrix); 00155 00156 return embedding; 00157 } 00158 00159 CDenseFeatures<float64_t>* CLaplacianEigenmaps::construct_embedding(CFeatures* features, 00160 SGMatrix<float64_t> W_matrix) 00161 { 00162 int32_t i,j; 00163 int32_t N = W_matrix.num_cols; 00164 00165 float64_t* D_diag_vector = SG_CALLOC(float64_t, N); 00166 for (i=0; i<N; i++) 00167 { 00168 for (j=0; j<N; j++) 00169 D_diag_vector[i] += W_matrix[i*N+j]; 00170 } 00171 00172 // W = -W 00173 for (i=0; i<N*N; i++) 00174 if (W_matrix[i]>0.0) 00175 W_matrix[i] *= -1.0; 00176 // W = W + D 00177 for (i=0; i<N; i++) 00178 W_matrix[i*N+i] += D_diag_vector[i]; 00179 00180 #ifdef HAVE_ARPACK 00181 // using ARPACK DS{E,A}UPD 00182 int eigenproblem_status = 0; 00183 float64_t* eigenvalues_vector = SG_MALLOC(float64_t,m_target_dim+1); 00184 arpack_dsxupd(W_matrix.matrix,D_diag_vector,true,N,m_target_dim+1,"LA",true,3,false,false,-1e-9,0.0, 00185 eigenvalues_vector,W_matrix.matrix,eigenproblem_status); 00186 00187 if (eigenproblem_status!=0) 00188 SG_ERROR("DSXUPD failed with code %d\n",eigenproblem_status); 00189 00190 SG_FREE(eigenvalues_vector); 00191 #else /* HAVE_ARPACK */ 00192 // using LAPACK DSYGVX 00193 // requires 2x memory because of dense rhs matrix usage 00194 int eigenproblem_status = 0; 00195 float64_t* eigenvalues_vector = SG_MALLOC(float64_t,N); 00196 float64_t* rhs = SG_CALLOC(float64_t,N*N); 00197 // fill rhs with diag (for safety reasons zeros will be replaced with 1e-3) 00198 for (i=0; i<N; i++) 00199 rhs[i*N+i] = D_diag_vector[i]; 00200 00201 wrap_dsygvx(1,'V','U',N,W_matrix.matrix,N,rhs,N,1,m_target_dim+2,eigenvalues_vector,W_matrix.matrix,&eigenproblem_status); 00202 00203 if (eigenproblem_status) 00204 SG_ERROR("DSYGVX failed with code: %d.\n",eigenproblem_status); 00205 00206 SG_FREE(rhs); 00207 SG_FREE(eigenvalues_vector); 00208 00209 #endif /* HAVE_ARPACK */ 00210 SG_FREE(D_diag_vector); 00211 00212 SGMatrix<float64_t> new_features = SGMatrix<float64_t>(m_target_dim,N); 00213 // fill features according to used solver 00214 for (i=0; i<m_target_dim; i++) 00215 { 00216 for (j=0; j<N; j++) 00217 { 00218 #ifdef HAVE_ARPACK 00219 new_features[j*m_target_dim+i] = W_matrix[j*(m_target_dim+1)+i+1]; 00220 #else 00221 new_features[j*m_target_dim+i] = W_matrix[(i+1)*N+j]; 00222 #endif 00223 } 00224 } 00225 return new CDenseFeatures<float64_t>(new_features); 00226 } 00227 00228 #endif /* HAVE_LAPACK */