SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011 Sergey Lisitsyn 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/converter/DiffusionMaps.h> 00012 #include <shogun/converter/EmbeddingConverter.h> 00013 #include <shogun/lib/config.h> 00014 #ifdef HAVE_LAPACK 00015 #include <shogun/mathematics/arpack.h> 00016 #include <shogun/mathematics/lapack.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/kernel/GaussianKernel.h> 00019 #include <shogun/io/SGIO.h> 00020 #include <shogun/kernel/Kernel.h> 00021 #include <shogun/lib/Signal.h> 00022 #include <shogun/lib/Time.h> 00023 00024 using namespace shogun; 00025 00026 CDiffusionMaps::CDiffusionMaps() : 00027 CEmbeddingConverter() 00028 { 00029 m_t = 10; 00030 set_kernel(new CGaussianKernel(10,1.0)); 00031 00032 init(); 00033 } 00034 00035 void CDiffusionMaps::init() 00036 { 00037 SG_ADD(&m_t, "t", "number of steps", MS_AVAILABLE); 00038 } 00039 00040 CDiffusionMaps::~CDiffusionMaps() 00041 { 00042 } 00043 00044 void CDiffusionMaps::set_t(int32_t t) 00045 { 00046 m_t = t; 00047 } 00048 00049 int32_t CDiffusionMaps::get_t() const 00050 { 00051 return m_t; 00052 } 00053 00054 const char* CDiffusionMaps::get_name() const 00055 { 00056 return "DiffusionMaps"; 00057 }; 00058 00059 CFeatures* CDiffusionMaps::apply(CFeatures* features) 00060 { 00061 ASSERT(features); 00062 // shorthand for simplefeatures 00063 SG_REF(features); 00064 // compute distance matrix 00065 ASSERT(m_kernel); 00066 m_kernel->init(features,features); 00067 CDenseFeatures<float64_t>* embedding = embed_kernel(m_kernel); 00068 m_kernel->cleanup(); 00069 SG_UNREF(features); 00070 return (CFeatures*)embedding; 00071 } 00072 00073 CDenseFeatures<float64_t>* CDiffusionMaps::embed_kernel(CKernel* kernel) 00074 { 00075 #ifdef HAVE_ARPACK 00076 bool use_arpack = true; 00077 #else 00078 bool use_arpack = false; 00079 #endif 00080 int32_t i,j; 00081 SGMatrix<float64_t> kernel_matrix = kernel->get_kernel_matrix(); 00082 ASSERT(kernel_matrix.num_rows==kernel_matrix.num_cols); 00083 int32_t N = kernel_matrix.num_rows; 00084 00085 float64_t* p_vector = SG_CALLOC(float64_t, N); 00086 for (i=0; i<N; i++) 00087 { 00088 for (j=0; j<N; j++) 00089 { 00090 p_vector[i] += kernel_matrix.matrix[j*N+i]; 00091 } 00092 } 00093 //CMath::display_matrix(kernel_matrix.matrix,N,N,"K"); 00094 for (i=0; i<N; i++) 00095 { 00096 for (j=0; j<N; j++) 00097 { 00098 kernel_matrix.matrix[i*N+j] /= CMath::pow(p_vector[i]*p_vector[j], m_t); 00099 } 00100 } 00101 //CMath::display_matrix(kernel_matrix.matrix,N,N,"K"); 00102 00103 for (i=0; i<N; i++) 00104 { 00105 p_vector[i] = 0.0; 00106 for (j=0; j<N; j++) 00107 { 00108 p_vector[i] += kernel_matrix.matrix[j*N+i]; 00109 } 00110 p_vector[i] = CMath::sqrt(p_vector[i]); 00111 } 00112 00113 for (i=0; i<N; i++) 00114 { 00115 for (j=0; j<N; j++) 00116 { 00117 kernel_matrix.matrix[i*N+j] /= p_vector[i]*p_vector[j]; 00118 } 00119 } 00120 00121 SG_FREE(p_vector); 00122 float64_t* s_values = SG_MALLOC(float64_t, N); 00123 00124 int32_t info = 0; 00125 SGMatrix<float64_t> new_feature_matrix = SGMatrix<float64_t>(m_target_dim,N); 00126 00127 if (use_arpack) 00128 { 00129 #ifdef HAVE_ARPACK 00130 arpack_dsxupd(kernel_matrix.matrix,NULL,false,N,m_target_dim,"LA",false,1,false,true,0.0,0.0,s_values,kernel_matrix.matrix,info); 00131 #endif /* HAVE_ARPACK */ 00132 for (i=0; i<m_target_dim; i++) 00133 { 00134 for (j=0; j<N; j++) 00135 new_feature_matrix[j*m_target_dim+i] = kernel_matrix[j*m_target_dim+i]; 00136 } 00137 } 00138 else 00139 { 00140 SG_WARNING("LAPACK does not provide efficient routines to construct embedding (this may take time). Consider installing ARPACK."); 00141 wrap_dgesvd('O','N',N,N,kernel_matrix.matrix,N,s_values,NULL,1,NULL,1,&info); 00142 for (i=0; i<m_target_dim; i++) 00143 { 00144 for (j=0; j<N; j++) 00145 new_feature_matrix[j*m_target_dim+i] = 00146 kernel_matrix[(m_target_dim-i-1)*N+j]; 00147 } 00148 } 00149 if (info) 00150 SG_ERROR("Eigenproblem solving failed with %d code", info); 00151 00152 SG_FREE(s_values); 00153 00154 return new CDenseFeatures<float64_t>(new_feature_matrix); 00155 } 00156 #endif /* HAVE_LAPACK */