SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Sergey Lisitsyn 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/converter/Isomap.h> 00012 #ifdef HAVE_LAPACK 00013 #include <shogun/lib/common.h> 00014 #include <shogun/lib/FibonacciHeap.h> 00015 #include <shogun/mathematics/Math.h> 00016 #include <shogun/io/SGIO.h> 00017 #include <shogun/base/Parallel.h> 00018 #include <shogun/lib/Signal.h> 00019 #include <shogun/lib/CoverTree.h> 00020 00021 #ifdef HAVE_PTHREAD 00022 #include <pthread.h> 00023 #endif 00024 00025 using namespace shogun; 00026 00027 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00028 /* struct storing thread params 00029 */ 00030 struct DIJKSTRA_THREAD_PARAM 00031 { 00033 CFibonacciHeap* heap; 00036 const float64_t* edges_matrix; 00039 const int32_t* edges_idx_matrix; 00041 float64_t* shortest_D; 00043 int32_t i_start; 00045 int32_t i_stop; 00047 int32_t i_step; 00049 int32_t m_k; 00051 bool* s; 00053 bool* f; 00054 }; 00055 00056 class ISOMAP_COVERTREE_POINT 00057 { 00058 public: 00059 00060 ISOMAP_COVERTREE_POINT(int32_t index, const SGMatrix<float64_t>& dmatrix) 00061 { 00062 point_index = index; 00063 distance_matrix = dmatrix; 00064 } 00065 00066 inline double distance(const ISOMAP_COVERTREE_POINT& p) const 00067 { 00068 return distance_matrix[point_index*distance_matrix.num_rows+p.point_index]; 00069 } 00070 00071 inline bool operator==(const ISOMAP_COVERTREE_POINT& p) const 00072 { 00073 return (p.point_index==point_index); 00074 } 00075 00076 int32_t point_index; 00077 SGMatrix<float64_t> distance_matrix; 00078 }; 00079 00080 #endif /* DOXYGEN_SHOULD_SKIP_THIS */ 00081 00082 CIsomap::CIsomap() : CMultidimensionalScaling() 00083 { 00084 m_k = 3; 00085 00086 init(); 00087 } 00088 00089 void CIsomap::init() 00090 { 00091 SG_ADD(&m_k, "k", "number of neighbors", MS_AVAILABLE); 00092 } 00093 00094 CIsomap::~CIsomap() 00095 { 00096 } 00097 00098 void CIsomap::set_k(int32_t k) 00099 { 00100 ASSERT(k>0); 00101 m_k = k; 00102 } 00103 00104 int32_t CIsomap::get_k() const 00105 { 00106 return m_k; 00107 } 00108 00109 const char* CIsomap::get_name() const 00110 { 00111 return "Isomap"; 00112 } 00113 00114 SGMatrix<float64_t> CIsomap::process_distance_matrix(SGMatrix<float64_t> distance_matrix) 00115 { 00116 return isomap_distance(distance_matrix); 00117 } 00118 00119 SGMatrix<float64_t> CIsomap::isomap_distance(SGMatrix<float64_t> D_matrix) 00120 { 00121 int32_t N,i; 00122 N = D_matrix.num_cols; 00123 if (D_matrix.num_cols!=D_matrix.num_rows) 00124 { 00125 SG_ERROR("Given distance matrix is not square.\n"); 00126 } 00127 if (m_k>=N) 00128 { 00129 SG_ERROR("K parameter should be less than number of given vectors (k=%d, N=%d)\n", m_k, N); 00130 } 00131 00132 // cut by k-nearest neighbors 00133 int32_t* edges_idx_matrix = SG_MALLOC(int32_t, N*m_k); 00134 float64_t* edges_matrix = SG_MALLOC(float64_t, N*m_k); 00135 00136 float64_t max_dist = SGVector<float64_t>::max(D_matrix.matrix,N*N); 00137 CoverTree<ISOMAP_COVERTREE_POINT>* coverTree = new CoverTree<ISOMAP_COVERTREE_POINT>(max_dist); 00138 00139 for (i=0; i<N; i++) 00140 coverTree->insert(ISOMAP_COVERTREE_POINT(i,D_matrix)); 00141 00142 for (i=0; i<N; i++) 00143 { 00144 ISOMAP_COVERTREE_POINT origin(i,D_matrix); 00145 std::vector<ISOMAP_COVERTREE_POINT> neighbors = 00146 coverTree->kNearestNeighbors(origin,m_k+1); 00147 for (std::size_t m=1; m<neighbors.size(); m++) 00148 { 00149 edges_matrix[i*m_k+m-1] = origin.distance(neighbors[m]); 00150 edges_idx_matrix[i*m_k+m-1] = neighbors[m].point_index; 00151 } 00152 } 00153 00154 delete coverTree; 00155 00156 #ifdef HAVE_PTHREAD 00157 int32_t t; 00158 // Parallel Dijkstra with Fibonacci Heap 00159 int32_t num_threads = parallel->get_num_threads(); 00160 ASSERT(num_threads>0); 00161 // allocate threads and thread parameters 00162 pthread_t* threads = SG_MALLOC(pthread_t, num_threads); 00163 DIJKSTRA_THREAD_PARAM* parameters = SG_MALLOC(DIJKSTRA_THREAD_PARAM, num_threads); 00164 // allocate heaps 00165 CFibonacciHeap** heaps = SG_MALLOC(CFibonacciHeap*, num_threads); 00166 for (t=0; t<num_threads; t++) 00167 heaps[t] = new CFibonacciHeap(N); 00168 00169 #else 00170 int32_t num_threads = 1; 00171 #endif 00172 00173 // allocate (s)olution 00174 bool* s = SG_MALLOC(bool,N*num_threads); 00175 // allocate (f)rontier 00176 bool* f = SG_MALLOC(bool,N*num_threads); 00177 // init matrix to store shortest distances 00178 float64_t* shortest_D = D_matrix.matrix; 00179 00180 #ifdef HAVE_PTHREAD 00181 00182 pthread_attr_t attr; 00183 pthread_attr_init(&attr); 00184 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 00185 for (t=0; t<num_threads; t++) 00186 { 00187 parameters[t].i_start = t; 00188 parameters[t].i_stop = N; 00189 parameters[t].i_step = num_threads; 00190 parameters[t].heap = heaps[t]; 00191 parameters[t].edges_matrix = edges_matrix; 00192 parameters[t].edges_idx_matrix = edges_idx_matrix; 00193 parameters[t].s = s+t*N; 00194 parameters[t].f = f+t*N; 00195 parameters[t].m_k = m_k; 00196 parameters[t].shortest_D = shortest_D; 00197 pthread_create(&threads[t], &attr, CIsomap::run_dijkstra_thread, (void*)¶meters[t]); 00198 } 00199 for (t=0; t<num_threads; t++) 00200 pthread_join(threads[t], NULL); 00201 pthread_attr_destroy(&attr); 00202 for (t=0; t<num_threads; t++) 00203 delete heaps[t]; 00204 SG_FREE(heaps); 00205 SG_FREE(parameters); 00206 SG_FREE(threads); 00207 #else 00208 DIJKSTRA_THREAD_PARAM single_thread_param; 00209 single_thread_param.i_start = 0; 00210 single_thread_param.i_stop = N; 00211 single_thread_param.i_step = 1; 00212 single_thread_param.m_k = m_k; 00213 single_thread_param.heap = new CFibonacciHeap(N); 00214 single_thread_param.edges_matrix = edges_matrix; 00215 single_thread_param.edges_idx_matrix = edges_idx_matrix; 00216 single_thread_param.s = s; 00217 single_thread_param.f = f; 00218 single_thread_param.shortest_D = shortest_D; 00219 run_dijkstra_thread((void*)&single_thread_param); 00220 delete single_thread_param.heap; 00221 #endif 00222 // cleanup 00223 SG_FREE(edges_matrix); 00224 SG_FREE(edges_idx_matrix); 00225 SG_FREE(s); 00226 SG_FREE(f); 00227 00228 return D_matrix; 00229 } 00230 00231 void* CIsomap::run_dijkstra_thread(void *p) 00232 { 00233 // get parameters from p 00234 DIJKSTRA_THREAD_PARAM* parameters = (DIJKSTRA_THREAD_PARAM*)p; 00235 CFibonacciHeap* heap = parameters->heap; 00236 int32_t i_start = parameters->i_start; 00237 int32_t i_stop = parameters->i_stop; 00238 int32_t i_step = parameters->i_step; 00239 bool* s = parameters->s; 00240 bool* f = parameters->f; 00241 const float64_t* edges_matrix = parameters->edges_matrix; 00242 const int32_t* edges_idx_matrix = parameters->edges_idx_matrix; 00243 float64_t* shortest_D = parameters->shortest_D; 00244 int32_t m_k = parameters->m_k; 00245 int32_t k,j,i,min_item,w; 00246 int32_t N = i_stop; 00247 00248 // temporary vars 00249 float64_t dist,tmp; 00250 00251 // main loop 00252 for (k=i_start; k<i_stop; k+=i_step) 00253 { 00254 // fill s and f with false, fill shortest_D with infinity 00255 for (j=0; j<N; j++) 00256 { 00257 shortest_D[k*N+j] = CMath::ALMOST_INFTY; 00258 s[j] = false; 00259 f[j] = false; 00260 } 00261 // set distance from k to k as zero 00262 shortest_D[k*N+k] = 0.0; 00263 00264 // insert kth object to heap with zero distance and set f[k] true 00265 heap->insert(k,0.0); 00266 f[k] = true; 00267 00268 // while heap is not empty 00269 while (heap->get_num_nodes()>0) 00270 { 00271 // extract min and set (s)olution state as true and (f)rontier as false 00272 min_item = heap->extract_min(tmp); 00273 s[min_item] = true; 00274 f[min_item] = false; 00275 00276 // for-each edge (min_item->w) 00277 for (i=0; i<m_k; i++) 00278 { 00279 // get w idx 00280 w = edges_idx_matrix[min_item*m_k+i]; 00281 // if w is not in solution yet 00282 if (s[w] == false) 00283 { 00284 // get distance from k to i through min_item 00285 dist = shortest_D[k*N+min_item] + edges_matrix[min_item*m_k+i]; 00286 // if distance can be relaxed 00287 if (dist < shortest_D[k*N+w]) 00288 { 00289 // relax distance 00290 shortest_D[k*N+w] = dist; 00291 // if w is in (f)rontier 00292 if (f[w]) 00293 { 00294 // decrease distance in heap 00295 heap->decrease_key(w, dist); 00296 } 00297 else 00298 { 00299 // insert w to heap and set (f)rontier as true 00300 heap->insert(w, dist); 00301 f[w] = true; 00302 } 00303 } 00304 } 00305 } 00306 } 00307 // clear heap to re-use 00308 heap->clear(); 00309 } 00310 return NULL; 00311 } 00312 00313 #endif /* HAVE_LAPACK */