SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Sergey Lisitsyn 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/converter/KernelLocallyLinearEmbedding.h> 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/mathematics/lapack.h> 00014 #include <shogun/lib/FibonacciHeap.h> 00015 #include <shogun/lib/common.h> 00016 #include <shogun/base/DynArray.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/io/SGIO.h> 00019 #include <shogun/lib/Time.h> 00020 #include <shogun/lib/Signal.h> 00021 00022 #ifdef HAVE_PTHREAD 00023 #include <pthread.h> 00024 #endif 00025 00026 using namespace shogun; 00027 00028 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00029 struct LK_RECONSTRUCTION_THREAD_PARAM 00030 { 00032 int32_t idx_start; 00034 int32_t idx_stop; 00036 int32_t idx_step; 00038 int32_t m_k; 00040 int32_t N; 00042 const int32_t* neighborhood_matrix; 00044 float64_t* local_gram_matrix; 00046 const float64_t* kernel_matrix; 00048 float64_t* id_vector; 00050 float64_t reconstruction_shift; 00052 float64_t* W_matrix; 00053 }; 00054 00055 class KLLE_COVERTREE_POINT 00056 { 00057 public: 00058 00059 KLLE_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix) 00060 { 00061 this->point_index = index; 00062 this->kernel_matrix = dmatrix; 00063 this->kii = dmatrix[index*dmatrix.num_rows+index]; 00064 } 00065 00066 inline double distance(const KLLE_COVERTREE_POINT& p) const 00067 { 00068 int32_t N = kernel_matrix.num_rows; 00069 return kii+kernel_matrix[p.point_index*N+p.point_index]-2.0*kernel_matrix[point_index*N+p.point_index]; 00070 } 00071 00072 inline bool operator==(const KLLE_COVERTREE_POINT& p) const 00073 { 00074 return (p.point_index==this->point_index); 00075 } 00076 00077 int32_t point_index; 00078 float64_t kii; 00079 SGMatrix<float64_t> kernel_matrix; 00080 }; 00081 00082 #endif /* DOXYGEN_SHOULD_SKIP_THIS */ 00083 00084 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding() : 00085 CLocallyLinearEmbedding() 00086 { 00087 } 00088 00089 CKernelLocallyLinearEmbedding::CKernelLocallyLinearEmbedding(CKernel* kernel) : 00090 CLocallyLinearEmbedding() 00091 { 00092 set_kernel(kernel); 00093 } 00094 00095 const char* CKernelLocallyLinearEmbedding::get_name() const 00096 { 00097 return "KernelLocallyLinearEmbedding"; 00098 }; 00099 00100 CKernelLocallyLinearEmbedding::~CKernelLocallyLinearEmbedding() 00101 { 00102 } 00103 00104 CFeatures* CKernelLocallyLinearEmbedding::apply(CFeatures* features) 00105 { 00106 ASSERT(features); 00107 SG_REF(features); 00108 00109 // get dimensionality and number of vectors of data 00110 int32_t N = features->get_num_vectors(); 00111 if (m_k>=N) 00112 SG_ERROR("Number of neighbors (%d) should be less than number of objects (%d).\n", 00113 m_k, N); 00114 00115 // compute kernel matrix 00116 ASSERT(m_kernel); 00117 m_kernel->init(features,features); 00118 CDenseFeatures<float64_t>* embedding = embed_kernel(m_kernel); 00119 m_kernel->cleanup(); 00120 SG_UNREF(features); 00121 return (CFeatures*)embedding; 00122 } 00123 00124 CDenseFeatures<float64_t>* CKernelLocallyLinearEmbedding::embed_kernel(CKernel* kernel) 00125 { 00126 CTime* time = new CTime(); 00127 00128 time->start(); 00129 SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix(); 00130 SG_DEBUG("Kernel matrix computation took %fs\n",time->cur_time_diff()); 00131 00132 time->start(); 00133 SGMatrix<int32_t> neighborhood_matrix = get_neighborhood_matrix(kernel_matrix,m_k); 00134 SG_DEBUG("Neighbors finding took %fs\n",time->cur_time_diff()); 00135 00136 time->start(); 00137 SGMatrix<float64_t> M_matrix = construct_weight_matrix(kernel_matrix,neighborhood_matrix); 00138 SG_DEBUG("Weights computation took %fs\n",time->cur_time_diff()); 00139 00140 time->start(); 00141 SGMatrix<float64_t> nullspace = construct_embedding(M_matrix,m_target_dim); 00142 SG_DEBUG("Embedding construction took %fs\n",time->cur_time_diff()); 00143 00144 delete time; 00145 00146 return new CDenseFeatures<float64_t>(nullspace); 00147 } 00148 00149 SGMatrix<float64_t> CKernelLocallyLinearEmbedding::construct_weight_matrix(SGMatrix<float64_t> kernel_matrix, 00150 SGMatrix<int32_t> neighborhood_matrix) 00151 { 00152 int32_t N = kernel_matrix.num_cols; 00153 // loop variables 00154 #ifdef HAVE_PTHREAD 00155 int32_t t; 00156 int32_t num_threads = parallel->get_num_threads(); 00157 ASSERT(num_threads>0); 00158 // allocate threads 00159 pthread_t* threads = SG_MALLOC(pthread_t, num_threads); 00160 LK_RECONSTRUCTION_THREAD_PARAM* parameters = SG_MALLOC(LK_RECONSTRUCTION_THREAD_PARAM, num_threads); 00161 pthread_attr_t attr; 00162 pthread_attr_init(&attr); 00163 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 00164 #else 00165 int32_t num_threads = 1; 00166 #endif 00167 float64_t* W_matrix = SG_CALLOC(float64_t, N*N); 00168 // init matrices and norm factor to be used 00169 float64_t* local_gram_matrix = SG_MALLOC(float64_t, m_k*m_k*num_threads); 00170 float64_t* id_vector = SG_MALLOC(float64_t, m_k*num_threads); 00171 00172 #ifdef HAVE_PTHREAD 00173 for (t=0; t<num_threads; t++) 00174 { 00175 parameters[t].idx_start = t; 00176 parameters[t].idx_step = num_threads; 00177 parameters[t].idx_stop = N; 00178 parameters[t].m_k = m_k; 00179 parameters[t].N = N; 00180 parameters[t].neighborhood_matrix = neighborhood_matrix.matrix; 00181 parameters[t].kernel_matrix = kernel_matrix.matrix; 00182 parameters[t].local_gram_matrix = local_gram_matrix+(m_k*m_k)*t; 00183 parameters[t].id_vector = id_vector+m_k*t; 00184 parameters[t].W_matrix = W_matrix; 00185 parameters[t].reconstruction_shift = m_reconstruction_shift; 00186 pthread_create(&threads[t], &attr, run_linearreconstruction_thread, (void*)¶meters[t]); 00187 } 00188 for (t=0; t<num_threads; t++) 00189 pthread_join(threads[t], NULL); 00190 pthread_attr_destroy(&attr); 00191 SG_FREE(parameters); 00192 SG_FREE(threads); 00193 #else 00194 LK_RECONSTRUCTION_THREAD_PARAM single_thread_param; 00195 single_thread_param.idx_start = 0; 00196 single_thread_param.idx_step = 1; 00197 single_thread_param.idx_stop = N; 00198 single_thread_param.m_k = m_k; 00199 single_thread_param.N = N; 00200 single_thread_param.neighborhood_matrix = neighborhood_matrix.matrix; 00201 single_thread_param.local_gram_matrix = local_gram_matrix; 00202 single_thread_param.kernel_matrix = kernel_matrix.matrix; 00203 single_thread_param.id_vector = id_vector; 00204 single_thread_param.W_matrix = W_matrix; 00205 run_linearreconstruction_thread((void*)&single_thread_param); 00206 #endif 00207 00208 // clean 00209 SG_FREE(id_vector); 00210 SG_FREE(local_gram_matrix); 00211 00212 return SGMatrix<float64_t>(W_matrix,N,N); 00213 } 00214 00215 void* CKernelLocallyLinearEmbedding::run_linearreconstruction_thread(void* p) 00216 { 00217 LK_RECONSTRUCTION_THREAD_PARAM* parameters = (LK_RECONSTRUCTION_THREAD_PARAM*)p; 00218 int32_t idx_start = parameters->idx_start; 00219 int32_t idx_step = parameters->idx_step; 00220 int32_t idx_stop = parameters->idx_stop; 00221 int32_t m_k = parameters->m_k; 00222 int32_t N = parameters->N; 00223 const int32_t* neighborhood_matrix = parameters->neighborhood_matrix; 00224 float64_t* local_gram_matrix = parameters->local_gram_matrix; 00225 const float64_t* kernel_matrix = parameters->kernel_matrix; 00226 float64_t* id_vector = parameters->id_vector; 00227 float64_t* W_matrix = parameters->W_matrix; 00228 float64_t reconstruction_shift = parameters->reconstruction_shift; 00229 00230 int32_t i,j,k; 00231 float64_t norming,trace; 00232 00233 for (i=idx_start; i<idx_stop; i+=idx_step) 00234 { 00235 for (j=0; j<m_k; j++) 00236 { 00237 for (k=0; k<m_k; k++) 00238 local_gram_matrix[j*m_k+k] = 00239 kernel_matrix[i*N+i] - 00240 kernel_matrix[i*N+neighborhood_matrix[i*m_k+j]] - 00241 kernel_matrix[i*N+neighborhood_matrix[i*m_k+k]] + 00242 kernel_matrix[neighborhood_matrix[i*m_k+j]*N+neighborhood_matrix[i*m_k+k]]; 00243 } 00244 00245 for (j=0; j<m_k; j++) 00246 id_vector[j] = 1.0; 00247 00248 // compute tr(C) 00249 trace = 0.0; 00250 for (j=0; j<m_k; j++) 00251 trace += local_gram_matrix[j*m_k+j]; 00252 00253 // regularize gram matrix 00254 for (j=0; j<m_k; j++) 00255 local_gram_matrix[j*m_k+j] += reconstruction_shift*trace; 00256 00257 clapack_dposv(CblasColMajor,CblasLower,m_k,1,local_gram_matrix,m_k,id_vector,m_k); 00258 00259 // normalize weights 00260 norming=0.0; 00261 for (j=0; j<m_k; j++) 00262 norming += id_vector[j]; 00263 00264 cblas_dscal(m_k,1.0/norming,id_vector,1); 00265 00266 memset(local_gram_matrix,0,sizeof(float64_t)*m_k*m_k); 00267 cblas_dger(CblasColMajor,m_k,m_k,1.0,id_vector,1,id_vector,1,local_gram_matrix,m_k); 00268 00269 // put weights into W matrix 00270 W_matrix[N*i+i] += 1.0; 00271 for (j=0; j<m_k; j++) 00272 { 00273 W_matrix[N*i+neighborhood_matrix[i*m_k+j]] -= id_vector[j]; 00274 W_matrix[N*neighborhood_matrix[i*m_k+j]+i] -= id_vector[j]; 00275 } 00276 for (j=0; j<m_k; j++) 00277 { 00278 for (k=0; k<m_k; k++) 00279 W_matrix[N*neighborhood_matrix[i*m_k+j]+neighborhood_matrix[i*m_k+k]]+=local_gram_matrix[j*m_k+k]; 00280 } 00281 } 00282 return NULL; 00283 } 00284 00285 SGMatrix<int32_t> CKernelLocallyLinearEmbedding::get_neighborhood_matrix(SGMatrix<float64_t> kernel_matrix, int32_t k) 00286 { 00287 int32_t i; 00288 int32_t N = kernel_matrix.num_cols; 00289 00290 int32_t* neighborhood_matrix = SG_MALLOC(int32_t, N*k); 00291 00292 float64_t max_dist=0.0; 00293 for (i=0; i<N; i++) 00294 max_dist = CMath::max(max_dist,kernel_matrix[i*N+i]); 00295 00296 std::vector<KLLE_COVERTREE_POINT> vectors; 00297 vectors.reserve(N); 00298 for (i=0; i<N; i++) 00299 vectors.push_back(KLLE_COVERTREE_POINT(i,kernel_matrix)); 00300 00301 CoverTree<KLLE_COVERTREE_POINT>* coverTree = new CoverTree<KLLE_COVERTREE_POINT>(2.0*max_dist,vectors); 00302 00303 for (i=0; i<N; i++) 00304 { 00305 std::vector<KLLE_COVERTREE_POINT> neighbors = 00306 coverTree->kNearestNeighbors(vectors[i],k+1); 00307 00308 ASSERT(neighbors.size()>=unsigned(k+1)); 00309 00310 for (std::size_t m=1; m<unsigned(k+1); m++) 00311 neighborhood_matrix[i*k+m-1] = neighbors[m].point_index; 00312 } 00313 00314 delete coverTree; 00315 00316 return SGMatrix<int32_t>(neighborhood_matrix,k,N); 00317 } 00318 00319 #endif /* HAVE_LAPACK */