SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (C) 2012 Jacob Walker 00008 * 00009 * Code adapted from Gaussian Process Machine Learning Toolbox 00010 * http://www.gaussianprocess.org/gpml/code/matlab/doc/ 00011 * 00012 */ 00013 #include <shogun/lib/config.h> 00014 #ifdef HAVE_EIGEN3 00015 #ifdef HAVE_LAPACK 00016 00017 #include <shogun/regression/gp/ExactInferenceMethod.h> 00018 #include <shogun/regression/gp/GaussianLikelihood.h> 00019 #include <shogun/mathematics/lapack.h> 00020 #include <shogun/mathematics/Math.h> 00021 #include <shogun/labels/RegressionLabels.h> 00022 #include <shogun/kernel/GaussianKernel.h> 00023 #include <shogun/features/CombinedFeatures.h> 00024 00025 using namespace shogun; 00026 00027 CExactInferenceMethod::CExactInferenceMethod() : CInferenceMethod() 00028 { 00029 update_all(); 00030 update_parameter_hash(); 00031 } 00032 00033 CExactInferenceMethod::CExactInferenceMethod(CKernel* kern, CFeatures* feat, 00034 CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) : 00035 CInferenceMethod(kern, feat, m, lab, mod) 00036 { 00037 update_all(); 00038 } 00039 00040 CExactInferenceMethod::~CExactInferenceMethod() 00041 { 00042 } 00043 00044 void CExactInferenceMethod::update_all() 00045 { 00046 if (m_labels) 00047 m_label_vector = 00048 ((CRegressionLabels*) m_labels)->get_labels().clone(); 00049 00050 if (m_features && m_features->has_property(FP_DOT) && m_features->get_num_vectors()) 00051 m_feature_matrix = 00052 ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix(); 00053 00054 else if (m_features && m_features->get_feature_class() == C_COMBINED) 00055 { 00056 CDotFeatures* feat = 00057 (CDotFeatures*)((CCombinedFeatures*)m_features)-> 00058 get_first_feature_obj(); 00059 00060 if (feat->get_num_vectors()) 00061 m_feature_matrix = feat->get_computed_dot_feature_matrix(); 00062 00063 SG_UNREF(feat); 00064 } 00065 00066 update_data_means(); 00067 00068 if (m_kernel) 00069 update_train_kernel(); 00070 00071 if (m_ktrtr.num_cols*m_ktrtr.num_rows) 00072 { 00073 update_chol(); 00074 update_alpha(); 00075 } 00076 } 00077 00078 void CExactInferenceMethod::check_members() 00079 { 00080 if (!m_labels) 00081 SG_ERROR("No labels set\n"); 00082 00083 if (m_labels->get_label_type() != LT_REGRESSION) 00084 SG_ERROR("Expected RegressionLabels\n"); 00085 00086 if (!m_features) 00087 SG_ERROR("No features set!\n"); 00088 00089 if (m_labels->get_num_labels() != m_features->get_num_vectors()) 00090 SG_ERROR("Number of training vectors does not match number of labels\n"); 00091 00092 if(m_features->get_feature_class() == C_COMBINED) 00093 { 00094 CDotFeatures* feat = 00095 (CDotFeatures*)((CCombinedFeatures*)m_features)-> 00096 get_first_feature_obj(); 00097 00098 if (!feat->has_property(FP_DOT)) 00099 SG_ERROR("Specified features are not of type CFeatures\n"); 00100 00101 if (feat->get_feature_class() != C_DENSE) 00102 SG_ERROR("Expected Simple Features\n"); 00103 00104 if (feat->get_feature_type() != F_DREAL) 00105 SG_ERROR("Expected Real Features\n"); 00106 00107 SG_UNREF(feat); 00108 } 00109 00110 else 00111 { 00112 if (!m_features->has_property(FP_DOT)) 00113 SG_ERROR("Specified features are not of type CFeatures\n"); 00114 00115 if (m_features->get_feature_class() != C_DENSE) 00116 SG_ERROR("Expected Simple Features\n"); 00117 00118 if (m_features->get_feature_type() != F_DREAL) 00119 SG_ERROR("Expected Real Features\n"); 00120 } 00121 00122 if (!m_kernel) 00123 SG_ERROR( "No kernel assigned!\n"); 00124 00125 if (!m_mean) 00126 SG_ERROR( "No mean function assigned!\n"); 00127 00128 if (m_model->get_model_type() != LT_GAUSSIAN) 00129 { 00130 SG_ERROR("Exact Inference Method can only use " \ 00131 "Gaussian Likelihood Function.\n"); 00132 } 00133 } 00134 00135 CMap<TParameter*, SGVector<float64_t> > CExactInferenceMethod:: 00136 get_marginal_likelihood_derivatives(CMap<TParameter*, 00137 CSGObject*>& para_dict) 00138 { 00139 check_members(); 00140 00141 if(update_parameter_hash()) 00142 update_all(); 00143 00144 //Get the sigma variable from the likelihood model 00145 float64_t m_sigma = 00146 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma(); 00147 00148 //Placeholder Matrix 00149 SGMatrix<float64_t> temp1(m_ktrtr.num_rows, m_ktrtr.num_cols); 00150 00151 //Placeholder Matrix 00152 SGMatrix<float64_t> temp2(m_alpha.vlen, m_alpha.vlen); 00153 00154 //Derivative Matrix 00155 SGMatrix<float64_t> Q(m_L.num_rows, m_L.num_cols); 00156 00157 //Vector used to fill diagonal of Matrix. 00158 SGVector<float64_t> diagonal(temp1.num_rows); 00159 SGVector<float64_t> diagonal2(temp2.num_rows); 00160 00161 diagonal.fill_vector(diagonal.vector, temp1.num_rows, 1.0); 00162 diagonal2.fill_vector(diagonal2.vector, temp2.num_rows, 0.0); 00163 00164 temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows); 00165 Q.create_diagonal_matrix(Q.matrix, diagonal.vector, temp2.num_rows); 00166 temp2.create_diagonal_matrix(temp2.matrix, diagonal2.vector, temp2.num_rows); 00167 00168 memcpy(temp1.matrix, m_L.matrix, 00169 m_L.num_cols*m_L.num_rows*sizeof(float64_t)); 00170 00171 //Solve (L) Q = Identity for Q. 00172 clapack_dpotrs(CblasColMajor, CblasUpper, 00173 temp1.num_rows, Q.num_cols, temp1.matrix, temp1.num_cols, 00174 Q.matrix, Q.num_cols); 00175 00176 //Calculate alpha*alpha' 00177 cblas_dger(CblasColMajor, m_alpha.vlen, m_alpha.vlen, 00178 1.0, m_alpha.vector, 1, m_alpha.vector, 1, 00179 temp2.matrix, m_alpha.vlen); 00180 00181 temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows); 00182 00183 //Subtracct alpha*alpha' from Q. 00184 cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper, 00185 temp1.num_rows, temp1.num_cols, 1.0/(m_sigma*m_sigma), 00186 Q.matrix, temp1.num_cols, 00187 temp1.matrix, temp1.num_cols, -1.0, 00188 temp2.matrix, temp2.num_cols); 00189 00190 memcpy(Q.matrix, temp2.matrix, 00191 temp2.num_cols*temp2.num_rows*sizeof(float64_t)); 00192 00193 float64_t sum = 0; 00194 00195 m_kernel->build_parameter_dictionary(para_dict); 00196 m_mean->build_parameter_dictionary(para_dict); 00197 00198 //This will be the vector we return 00199 CMap<TParameter*, SGVector<float64_t> > gradient( 00200 3+para_dict.get_num_elements(), 00201 3+para_dict.get_num_elements()); 00202 00203 for (index_t i = 0; i < para_dict.get_num_elements(); i++) 00204 { 00205 shogun::CMapNode<TParameter*, CSGObject*>* node = 00206 para_dict.get_node_ptr(i); 00207 00208 TParameter* param = node->key; 00209 CSGObject* obj = node->data; 00210 00211 index_t length = 1; 00212 00213 if ((param->m_datatype.m_ctype== CT_VECTOR || 00214 param->m_datatype.m_ctype == CT_SGVECTOR) && 00215 param->m_datatype.m_length_y != NULL) 00216 length = *(param->m_datatype.m_length_y); 00217 00218 SGVector<float64_t> variables(length); 00219 00220 bool deriv_found = false; 00221 00222 for (index_t g = 0; g < length; g++) 00223 { 00224 00225 SGMatrix<float64_t> deriv; 00226 SGVector<float64_t> mean_derivatives; 00227 00228 if (param->m_datatype.m_ctype == CT_VECTOR || 00229 param->m_datatype.m_ctype == CT_SGVECTOR) 00230 { 00231 deriv = m_kernel->get_parameter_gradient(param, obj, g); 00232 mean_derivatives = m_mean->get_parameter_derivative( 00233 param, obj, m_feature_matrix, g); 00234 } 00235 00236 else 00237 { 00238 mean_derivatives = m_mean->get_parameter_derivative( 00239 param, obj, m_feature_matrix); 00240 00241 deriv = m_kernel->get_parameter_gradient(param, obj); 00242 } 00243 00244 sum = 0; 00245 00246 00247 if (deriv.num_cols*deriv.num_rows > 0) 00248 { 00249 for (index_t k = 0; k < Q.num_rows; k++) 00250 { 00251 for (index_t j = 0; j < Q.num_cols; j++) 00252 sum += Q(k,j)*deriv(k,j)*m_scale*m_scale; 00253 } 00254 00255 sum /= 2.0; 00256 variables[g] = sum; 00257 deriv_found = true; 00258 } 00259 00260 else if (mean_derivatives.vlen > 0) 00261 { 00262 sum = mean_derivatives.dot(mean_derivatives.vector, 00263 m_alpha.vector, m_alpha.vlen); 00264 variables[g] = sum; 00265 deriv_found = true; 00266 } 00267 00268 00269 } 00270 00271 if (deriv_found) 00272 gradient.add(param, variables); 00273 00274 } 00275 00276 TParameter* param; 00277 index_t index = get_modsel_param_index("scale"); 00278 param = m_model_selection_parameters->get_parameter(index); 00279 00280 sum = 0; 00281 00282 for (index_t i = 0; i < Q.num_rows; i++) 00283 { 00284 for (index_t j = 0; j < Q.num_cols; j++) 00285 sum += Q(i,j)*m_ktrtr(i,j)*m_scale*2.0; 00286 } 00287 00288 sum /= 2.0; 00289 00290 SGVector<float64_t> scale(1); 00291 00292 scale[0] = sum; 00293 00294 gradient.add(param, scale); 00295 para_dict.add(param, this); 00296 00297 index = m_model->get_modsel_param_index("sigma"); 00298 param = m_model->m_model_selection_parameters->get_parameter(index); 00299 00300 sum = m_sigma*Q.trace(Q.matrix, Q.num_rows, Q.num_cols); 00301 00302 SGVector<float64_t> sigma(1); 00303 00304 sigma[0] = sum; 00305 gradient.add(param, sigma); 00306 para_dict.add(param, m_model); 00307 00308 return gradient; 00309 00310 } 00311 00312 SGVector<float64_t> CExactInferenceMethod::get_diagonal_vector() 00313 { 00314 if(update_parameter_hash()) 00315 update_all(); 00316 00317 check_members(); 00318 00319 float64_t m_sigma = 00320 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma(); 00321 00322 SGVector<float64_t> result = 00323 SGVector<float64_t>(m_features->get_num_vectors()); 00324 00325 result.fill_vector(result.vector, m_features->get_num_vectors(), 00326 1.0/m_sigma); 00327 00328 return result; 00329 } 00330 00331 float64_t CExactInferenceMethod::get_negative_marginal_likelihood() 00332 { 00333 if(update_parameter_hash()) 00334 update_all(); 00335 00336 float64_t result; 00337 00338 result = m_label_vector.dot(m_label_vector.vector, m_alpha.vector, 00339 m_label_vector.vlen)/2.0; 00340 00341 float64_t m_sigma = 00342 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma(); 00343 00344 for (int i = 0; i < m_L.num_rows; i++) 00345 result += CMath::log(m_L(i,i)); 00346 00347 result += m_L.num_rows * CMath::log(2*CMath::PI*m_sigma*m_sigma)/2.0; 00348 00349 return result; 00350 } 00351 00352 SGVector<float64_t> CExactInferenceMethod::get_alpha() 00353 { 00354 if(update_parameter_hash()) 00355 update_all(); 00356 00357 SGVector<float64_t> result(m_alpha); 00358 return result; 00359 } 00360 00361 SGMatrix<float64_t> CExactInferenceMethod::get_cholesky() 00362 { 00363 if(update_parameter_hash()) 00364 update_all(); 00365 00366 SGMatrix<float64_t> result(m_L); 00367 return result; 00368 } 00369 00370 void CExactInferenceMethod::update_train_kernel() 00371 { 00372 m_kernel->cleanup(); 00373 00374 m_kernel->init(m_features, m_features); 00375 00376 //K(X, X) 00377 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix(); 00378 00379 m_ktrtr=kernel_matrix.clone(); 00380 } 00381 00382 00383 void CExactInferenceMethod::update_chol() 00384 { 00385 check_members(); 00386 00387 float64_t m_sigma = 00388 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma(); 00389 00390 //Placeholder Matrices 00391 SGMatrix<float64_t> temp1(m_ktrtr.num_rows, 00392 m_ktrtr.num_cols); 00393 00394 m_kern_with_noise = SGMatrix<float64_t>(m_ktrtr.num_rows, 00395 m_ktrtr.num_cols); 00396 00397 SGMatrix<float64_t> temp2(m_ktrtr.num_rows, 00398 m_ktrtr.num_cols); 00399 00400 //Vector to fill matrix diagonals 00401 SGVector<float64_t> diagonal(temp1.num_rows); 00402 diagonal.fill_vector(diagonal.vector, temp1.num_rows, 1.0); 00403 00404 temp1.create_diagonal_matrix(temp1.matrix, diagonal.vector, temp1.num_rows); 00405 temp2.create_diagonal_matrix(temp2.matrix, diagonal.vector, temp2.num_rows); 00406 00407 //Calculate first (K(X, X)+sigma*I) 00408 cblas_dsymm(CblasColMajor, CblasLeft, CblasUpper, 00409 m_ktrtr.num_rows, temp2.num_cols, (m_scale*m_scale)/(m_sigma*m_sigma), 00410 m_ktrtr.matrix, m_ktrtr.num_cols, 00411 temp2.matrix, temp2.num_cols, 1.0, 00412 temp1.matrix, temp1.num_cols); 00413 00414 memcpy(m_kern_with_noise.matrix, temp1.matrix, 00415 temp1.num_cols*temp1.num_rows*sizeof(float64_t)); 00416 00417 //Get Lower triangle cholesky decomposition of K(X, X)+sigma*I) 00418 clapack_dpotrf(CblasColMajor, CblasUpper, 00419 temp1.num_rows, temp1.matrix, temp1.num_cols); 00420 00421 m_L = SGMatrix<float64_t>(temp1.num_rows, temp1.num_cols); 00422 00423 /* lapack for some reason wants only to calculate the upper triangle 00424 * and leave the lower triangle with junk data. Finishing the job 00425 * by filling the lower triangle with zero entries. 00426 */ 00427 for (int i = 0; i < temp1.num_rows; i++) 00428 { 00429 for (int j = 0; j < temp1.num_cols; j++) 00430 { 00431 if (i > j) 00432 temp1(i,j) = 0; 00433 } 00434 } 00435 00436 memcpy(m_L.matrix, temp1.matrix, 00437 temp1.num_cols*temp1.num_rows*sizeof(float64_t)); 00438 } 00439 00440 void CExactInferenceMethod::update_alpha() 00441 { 00442 float64_t m_sigma = 00443 dynamic_cast<CGaussianLikelihood*>(m_model)->get_sigma(); 00444 00445 //Placeholder Matrices 00446 SGMatrix<float64_t> temp1(m_L.num_rows, 00447 m_L.num_cols); 00448 00449 for (int i = 0; i < m_label_vector.vlen; i++) 00450 m_label_vector[i] = m_label_vector[i] - m_data_means[i]; 00451 00452 m_alpha = SGVector<float64_t>(m_label_vector.vlen); 00453 00454 memcpy(temp1.matrix, m_L.matrix, 00455 m_L.num_cols*m_L.num_rows*sizeof(float64_t)); 00456 00457 memcpy(m_alpha.vector, m_label_vector.vector, 00458 m_label_vector.vlen*sizeof(float64_t)); 00459 00460 //Solve (K(X, X)+sigma*I) alpha = labels for alpha. 00461 clapack_dposv(CblasColMajor, CblasLower, 00462 m_kern_with_noise.num_cols, 1, m_kern_with_noise.matrix, 00463 m_kern_with_noise.num_cols, m_alpha.vector, 00464 m_kern_with_noise.num_cols); 00465 00466 for (int i = 0; i < m_alpha.vlen; i++) 00467 m_alpha[i] = m_alpha[i]/(m_sigma*m_sigma); 00468 00469 } 00470 #endif 00471 #endif // HAVE_LAPACK