SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (C) 2012 Jacob Walker 00008 * 00009 * Code adapted from Gaussian Process Machine Learning Toolbox 00010 * http://www.gaussianprocess.org/gpml/code/matlab/doc/ 00011 * This code specifically adapted from infLaplace.m 00012 * 00013 */ 00014 #include <shogun/lib/config.h> 00015 00016 #ifdef HAVE_LAPACK 00017 #ifdef HAVE_EIGEN3 00018 00019 #include <shogun/regression/gp/LaplacianInferenceMethod.h> 00020 #include <shogun/regression/gp/GaussianLikelihood.h> 00021 #include <shogun/regression/gp/StudentsTLikelihood.h> 00022 #include <shogun/mathematics/lapack.h> 00023 #include <shogun/mathematics/Math.h> 00024 #include <shogun/labels/RegressionLabels.h> 00025 #include <shogun/kernel/GaussianKernel.h> 00026 #include <shogun/features/CombinedFeatures.h> 00027 #include <shogun/mathematics/eigen3.h> 00028 #include <shogun/lib/external/brent.h> 00029 00030 using namespace shogun; 00031 using namespace Eigen; 00032 00033 namespace shogun 00034 { 00035 /*Wrapper class used for the Brent minimizer 00036 * 00037 */ 00038 class Psi_line : public func_base 00039 { 00040 public: 00041 Eigen::Map<Eigen::VectorXd>* alpha; 00042 Eigen::VectorXd* dalpha; 00043 Eigen::Map<Eigen::MatrixXd>* K; 00044 float64_t* l1; 00045 SGVector<float64_t>* dl1; 00046 Eigen::Map<Eigen::VectorXd>* dl2; 00047 SGVector<float64_t>* mW; 00048 SGVector<float64_t>* f; 00049 SGVector<float64_t>* m; 00050 float64_t scale; 00051 CLikelihoodModel* lik; 00052 CRegressionLabels *lab; 00053 00054 Eigen::VectorXd start_alpha; 00055 00056 virtual double operator() (double x) 00057 { 00058 Eigen::Map<Eigen::VectorXd> eigen_f(f->vector, f->vlen); 00059 Eigen::Map<Eigen::VectorXd> eigen_m(m->vector, m->vlen); 00060 00061 *alpha = start_alpha + x*(*dalpha); 00062 (eigen_f) = (*K)*(*alpha)*scale*scale+(eigen_m); 00063 00064 00065 for (index_t i = 0; i < eigen_f.rows(); i++) 00066 (*f)[i] = eigen_f[i]; 00067 00068 (*dl1) = lik->get_log_probability_derivative_f(lab, (*f), 1); 00069 (*mW) = lik->get_log_probability_derivative_f(lab, (*f), 2); 00070 float64_t result = ((*alpha).dot(((eigen_f)-(eigen_m))))/2.0; 00071 00072 for (index_t i = 0; i < (*mW).vlen; i++) 00073 (*mW)[i] = -(*mW)[i]; 00074 00075 00076 00077 result -= lik->get_log_probability_f(lab, *f); 00078 00079 return result; 00080 } 00081 }; 00082 } 00083 00084 CLaplacianInferenceMethod::CLaplacianInferenceMethod() : CInferenceMethod() 00085 { 00086 init(); 00087 update_all(); 00088 update_parameter_hash(); 00089 } 00090 00091 CLaplacianInferenceMethod::CLaplacianInferenceMethod(CKernel* kern, 00092 CFeatures* feat, 00093 CMeanFunction* m, CLabels* lab, CLikelihoodModel* mod) : 00094 CInferenceMethod(kern, feat, m, lab, mod) 00095 { 00096 init(); 00097 update_all(); 00098 } 00099 00100 void CLaplacianInferenceMethod::init() 00101 { 00102 m_latent_features = NULL; 00103 m_max_itr = 30; 00104 m_opt_tolerance = 1e-6; 00105 m_tolerance = 1e-8; 00106 m_max = 5; 00107 } 00108 00109 CLaplacianInferenceMethod::~CLaplacianInferenceMethod() 00110 { 00111 } 00112 00113 void CLaplacianInferenceMethod::update_all() 00114 { 00115 if (m_labels) 00116 m_label_vector = 00117 ((CRegressionLabels*) m_labels)->get_labels().clone(); 00118 00119 if (m_features && m_features->has_property(FP_DOT) 00120 && m_features->get_num_vectors()) 00121 { 00122 m_feature_matrix = 00123 ((CDotFeatures*)m_features)->get_computed_dot_feature_matrix(); 00124 00125 } 00126 00127 else if (m_features && m_features->get_feature_class() == C_COMBINED) 00128 { 00129 CDotFeatures* feat = 00130 (CDotFeatures*)((CCombinedFeatures*)m_features)-> 00131 get_first_feature_obj(); 00132 00133 if (feat->get_num_vectors()) 00134 m_feature_matrix = feat->get_computed_dot_feature_matrix(); 00135 00136 SG_UNREF(feat); 00137 } 00138 00139 update_data_means(); 00140 00141 if (m_kernel) 00142 update_train_kernel(); 00143 00144 if (m_ktrtr.num_cols*m_ktrtr.num_rows) 00145 { 00146 update_alpha(); 00147 update_chol(); 00148 } 00149 } 00150 00151 void CLaplacianInferenceMethod::check_members() 00152 { 00153 if (!m_labels) 00154 SG_ERROR("No labels set\n"); 00155 00156 if (m_labels->get_label_type() != LT_REGRESSION) 00157 SG_ERROR("Expected RegressionLabels\n"); 00158 00159 if (!m_features) 00160 SG_ERROR("No features set!\n"); 00161 00162 if (m_labels->get_num_labels() != m_features->get_num_vectors()) 00163 SG_ERROR("Number of training vectors does not match number of labels\n"); 00164 00165 if(m_features->get_feature_class() == C_COMBINED) 00166 { 00167 CDotFeatures* feat = 00168 (CDotFeatures*)((CCombinedFeatures*)m_features)-> 00169 get_first_feature_obj(); 00170 00171 if (!feat->has_property(FP_DOT)) 00172 SG_ERROR("Specified features are not of type CFeatures\n"); 00173 00174 if (feat->get_feature_class() != C_DENSE) 00175 SG_ERROR("Expected Simple Features\n"); 00176 00177 if (feat->get_feature_type() != F_DREAL) 00178 SG_ERROR("Expected Real Features\n"); 00179 00180 SG_UNREF(feat); 00181 } 00182 00183 else 00184 { 00185 if (!m_features->has_property(FP_DOT)) 00186 SG_ERROR("Specified features are not of type CFeatures\n"); 00187 00188 if (m_features->get_feature_class() != C_DENSE) 00189 SG_ERROR("Expected Simple Features\n"); 00190 00191 if (m_features->get_feature_type() != F_DREAL) 00192 SG_ERROR("Expected Real Features\n"); 00193 } 00194 00195 if (!m_kernel) 00196 SG_ERROR( "No kernel assigned!\n"); 00197 00198 if (!m_mean) 00199 SG_ERROR( "No mean function assigned!\n"); 00200 00201 } 00202 00203 CMap<TParameter*, SGVector<float64_t> > CLaplacianInferenceMethod:: 00204 get_marginal_likelihood_derivatives(CMap<TParameter*, 00205 CSGObject*>& para_dict) 00206 { 00207 check_members(); 00208 00209 if(update_parameter_hash()) 00210 update_all(); 00211 00212 MatrixXd Z(m_L.num_rows, m_L.num_cols); 00213 00214 Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen); 00215 00216 for (index_t i = 0; i < m_L.num_rows; i++) 00217 { 00218 for (index_t j = 0; j < m_L.num_cols; j++) 00219 Z(i,j) = m_L(i,j); 00220 } 00221 00222 MatrixXd sW_temp(sW.vlen, m_ktrtr.num_cols); 00223 VectorXd sum(1); 00224 sum[0] = 0; 00225 00226 00227 for (index_t i = 0; i < sW.vlen; i++) 00228 { 00229 for (index_t j = 0; j < m_ktrtr.num_cols; j++) 00230 sW_temp(i,j) = sW[i]; 00231 } 00232 00233 VectorXd g; 00234 00235 Map<VectorXd> eigen_W(W.vector, W.vlen); 00236 00237 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, 00238 temp_kernel.num_rows, temp_kernel.num_cols); 00239 00240 if (eigen_W.minCoeff() < 0) 00241 { 00242 Z = -Z; 00243 00244 MatrixXd A = MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols); 00245 00246 MatrixXd temp_diagonal(sW.vlen, sW.vlen); 00247 temp_diagonal.setZero(sW.vlen, sW.vlen); 00248 00249 for (index_t s = 0; s < temp_diagonal.rows(); s++) 00250 { 00251 for (index_t r = 0; r < temp_diagonal.cols(); r++) 00252 temp_diagonal(r,s) = W[s]; 00253 } 00254 00255 A = A + eigen_temp_kernel*m_scale*m_scale*temp_diagonal; 00256 00257 FullPivLU<MatrixXd> lu(A); 00258 00259 MatrixXd temp_matrix = 00260 lu.inverse().cwiseProduct(eigen_temp_kernel*m_scale*m_scale); 00261 00262 VectorXd temp_sum(temp_matrix.rows()); 00263 00264 for (index_t i = 0; i < temp_matrix.rows(); i++) 00265 { 00266 for (index_t j = 0; j < temp_matrix.cols(); j++) 00267 temp_sum[i] += temp_matrix(j,i); 00268 } 00269 00270 g = temp_sum/2.0; 00271 } 00272 00273 else 00274 { 00275 MatrixXd C = Z.transpose().colPivHouseholderQr().solve( 00276 sW_temp.cwiseProduct(eigen_temp_kernel*m_scale*m_scale)); 00277 00278 MatrixXd temp_diagonal(sW.vlen, sW.vlen); 00279 temp_diagonal.setZero(sW.vlen, sW.vlen); 00280 00281 for (index_t s = 0; s < sW.vlen; s++) 00282 temp_diagonal(s,s) = sW[s]; 00283 00284 MatrixXd temp = Z.transpose(); 00285 00286 Z = Z.transpose().colPivHouseholderQr().solve(temp_diagonal); 00287 00288 Z = temp.transpose().colPivHouseholderQr().solve(Z); 00289 00290 for (index_t s = 0; s < Z.rows(); s++) 00291 { 00292 for (index_t r = 0; r < Z.cols(); r++) 00293 Z(s,r) *= sW[s]; 00294 } 00295 00296 VectorXd temp_sum(C.rows()); 00297 00298 temp_sum.setZero(C.rows()); 00299 00300 for (index_t i = 0; i < C.rows(); i++) 00301 { 00302 for (index_t j = 0; j < C.cols(); j++) 00303 temp_sum[i] += C(j,i)*C(j,i); 00304 } 00305 00306 g = (eigen_temp_kernel.diagonal()*m_scale*m_scale-temp_sum)/2.0; 00307 } 00308 00309 Map<VectorXd> eigen_d3lp(d3lp.vector, d3lp.vlen); 00310 00311 VectorXd dfhat = g.cwiseProduct(eigen_d3lp); 00312 00313 m_kernel->build_parameter_dictionary(para_dict); 00314 m_mean->build_parameter_dictionary(para_dict); 00315 m_model->build_parameter_dictionary(para_dict); 00316 00317 CMap<TParameter*, SGVector<float64_t> > gradient( 00318 3+para_dict.get_num_elements(), 00319 3+para_dict.get_num_elements()); 00320 00321 for (index_t i = 0; i < para_dict.get_num_elements(); i++) 00322 { 00323 shogun::CMapNode<TParameter*, CSGObject*>* node = 00324 para_dict.get_node_ptr(i); 00325 00326 TParameter* param = node->key; 00327 CSGObject* obj = node->data; 00328 00329 index_t length = 1; 00330 00331 if ((param->m_datatype.m_ctype== CT_VECTOR || 00332 param->m_datatype.m_ctype == CT_SGVECTOR) && 00333 param->m_datatype.m_length_y != NULL) 00334 length = *(param->m_datatype.m_length_y); 00335 00336 SGVector<float64_t> variables(length); 00337 00338 bool deriv_found = false; 00339 00340 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, 00341 temp_alpha.vlen); 00342 00343 for (index_t h = 0; h < length; h++) 00344 { 00345 00346 SGMatrix<float64_t> deriv; 00347 SGVector<float64_t> mean_derivatives; 00348 VectorXd mean_dev_temp; 00349 SGVector<float64_t> lik_first_deriv; 00350 SGVector<float64_t> lik_second_deriv; 00351 00352 if (param->m_datatype.m_ctype == CT_VECTOR || 00353 param->m_datatype.m_ctype == CT_SGVECTOR) 00354 { 00355 deriv = m_kernel->get_parameter_gradient(param, obj); 00356 00357 lik_first_deriv = m_model->get_first_derivative( 00358 (CRegressionLabels*)m_labels, param, obj, function); 00359 00360 lik_second_deriv = m_model->get_second_derivative( 00361 (CRegressionLabels*)m_labels, param, obj, function); 00362 00363 mean_derivatives = m_mean->get_parameter_derivative( 00364 param, obj, m_feature_matrix, h); 00365 00366 for (index_t d = 0; d < mean_derivatives.vlen; d++) 00367 mean_dev_temp[d] = mean_derivatives[d]; 00368 } 00369 00370 else 00371 { 00372 mean_derivatives = m_mean->get_parameter_derivative( 00373 param, obj, m_feature_matrix); 00374 00375 for (index_t d = 0; d < mean_derivatives.vlen; d++) 00376 mean_dev_temp[d] = mean_derivatives[d]; 00377 00378 deriv = m_kernel->get_parameter_gradient(param, obj); 00379 00380 lik_first_deriv = m_model->get_first_derivative( 00381 (CRegressionLabels*)m_labels, param, obj, function); 00382 00383 lik_second_deriv = m_model->get_second_derivative( 00384 (CRegressionLabels*)m_labels, param, obj, function); 00385 } 00386 00387 if (deriv.num_cols*deriv.num_rows > 0) 00388 { 00389 MatrixXd dK(deriv.num_cols, deriv.num_rows); 00390 00391 for (index_t d = 0; d < deriv.num_rows; d++) 00392 { 00393 for (index_t s = 0; s < deriv.num_cols; s++) 00394 dK(d,s) = deriv(d,s); 00395 } 00396 00397 00398 sum[0] = (Z.cwiseProduct(dK)).sum()/2.0; 00399 00400 00401 sum = sum - eigen_temp_alpha.transpose()*dK*eigen_temp_alpha/2.0; 00402 00403 VectorXd b = dK*eigen_dlp; 00404 00405 sum = sum - 00406 dfhat.transpose()*(b-eigen_temp_kernel*(Z*b)*m_scale*m_scale); 00407 00408 variables[h] = sum[0]; 00409 00410 deriv_found = true; 00411 } 00412 00413 else if (mean_derivatives.vlen > 0) 00414 { 00415 sum = -eigen_temp_alpha.transpose()*mean_dev_temp; 00416 sum = sum - dfhat.transpose()*(mean_dev_temp-eigen_temp_kernel* 00417 (Z*mean_dev_temp)*m_scale*m_scale); 00418 variables[h] = sum[0]; 00419 deriv_found = true; 00420 } 00421 00422 else if (lik_first_deriv[0]+lik_second_deriv[0] != CMath::INFTY) 00423 { 00424 Map<VectorXd> eigen_fd(lik_first_deriv.vector, lik_first_deriv.vlen); 00425 00426 Map<VectorXd> eigen_sd(lik_second_deriv.vector, lik_second_deriv.vlen); 00427 00428 sum[0] = -g.dot(eigen_sd); 00429 sum[0] = sum[0] - eigen_fd.sum(); 00430 variables[h] = sum[0]; 00431 deriv_found = true; 00432 } 00433 00434 } 00435 00436 if (deriv_found) 00437 gradient.add(param, variables); 00438 00439 } 00440 00441 TParameter* param; 00442 index_t index = get_modsel_param_index("scale"); 00443 param = m_model_selection_parameters->get_parameter(index); 00444 00445 MatrixXd dK(m_ktrtr.num_cols, m_ktrtr.num_rows); 00446 00447 for (index_t d = 0; d < m_ktrtr.num_rows; d++) 00448 { 00449 for (index_t s = 0; s < m_ktrtr.num_cols; s++) 00450 dK(d,s) = m_ktrtr(d,s)*m_scale*2.0;; 00451 } 00452 00453 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, 00454 temp_alpha.vlen); 00455 00456 sum[0] = (Z.cwiseProduct(dK)).sum()/2.0; 00457 00458 sum = sum - eigen_temp_alpha.transpose()*dK*eigen_temp_alpha/2.0; 00459 00460 VectorXd b = dK*eigen_dlp; 00461 00462 sum = sum - dfhat.transpose()*(b-eigen_temp_kernel*(Z*b)*m_scale*m_scale); 00463 00464 SGVector<float64_t> scale(1); 00465 00466 scale[0] = sum[0]; 00467 00468 gradient.add(param, scale); 00469 para_dict.add(param, this); 00470 00471 return gradient; 00472 } 00473 00474 SGVector<float64_t> CLaplacianInferenceMethod::get_diagonal_vector() 00475 { 00476 SGVector<float64_t> result(sW.vlen); 00477 00478 for (index_t i = 0; i < sW.vlen; i++) 00479 result[i] = sW[i]; 00480 00481 return result; 00482 } 00483 00484 float64_t CLaplacianInferenceMethod::get_negative_marginal_likelihood() 00485 { 00486 if(update_parameter_hash()) 00487 update_all(); 00488 00489 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, temp_alpha.vlen); 00490 00491 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, 00492 temp_kernel.num_rows, temp_kernel.num_cols); 00493 00494 Map<VectorXd> eigen_function(function.vector, 00495 function.vlen); 00496 00497 Map<VectorXd> eigen_W(W.vector, W.vlen); 00498 00499 Map<VectorXd> eigen_m_means(m_means.vector, m_means.vlen); 00500 00501 if (eigen_W.minCoeff() < 0) 00502 { 00503 MatrixXd A = MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols); 00504 00505 MatrixXd temp_diagonal(sW.vlen, sW.vlen); 00506 temp_diagonal.setZero(sW.vlen, sW.vlen); 00507 00508 for (index_t s = 0; s < sW.vlen; s++) 00509 temp_diagonal(s,s) = sW[s]; 00510 00511 A = A + eigen_temp_kernel*m_scale*m_scale*temp_diagonal; 00512 00513 FullPivLU<MatrixXd> lu(A); 00514 00515 float64_t result = (eigen_temp_alpha.transpose()*(eigen_function-eigen_m_means))[0]/2.0 - 00516 lp + log(lu.determinant())/2.0; 00517 00518 return result; 00519 } 00520 00521 else 00522 { 00523 00524 Map<VectorXd> eigen_sW(sW.vector, sW.vlen); 00525 00526 LLT<MatrixXd> L( 00527 (eigen_sW*eigen_sW.transpose()).cwiseProduct(eigen_temp_kernel*m_scale*m_scale) + 00528 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)); 00529 00530 MatrixXd chol = L.matrixL(); 00531 00532 float64_t sum = 0; 00533 00534 for (index_t i = 0; i < m_L.num_rows; i++) 00535 sum += log(m_L(i,i)); 00536 00537 float64_t result = eigen_temp_alpha.dot(eigen_function-eigen_m_means)/2.0 - 00538 lp + sum; 00539 00540 return result; 00541 } 00542 00543 } 00544 00545 SGVector<float64_t> CLaplacianInferenceMethod::get_alpha() 00546 { 00547 if(update_parameter_hash()) 00548 update_all(); 00549 00550 SGVector<float64_t> result(m_alpha); 00551 return result; 00552 } 00553 00554 SGMatrix<float64_t> CLaplacianInferenceMethod::get_cholesky() 00555 { 00556 if(update_parameter_hash()) 00557 update_all(); 00558 00559 SGMatrix<float64_t> result(m_L); 00560 return result; 00561 } 00562 00563 void CLaplacianInferenceMethod::update_train_kernel() 00564 { 00565 m_kernel->cleanup(); 00566 00567 m_kernel->init(m_features, m_features); 00568 00569 //K(X, X) 00570 SGMatrix<float64_t> kernel_matrix = m_kernel->get_kernel_matrix(); 00571 00572 m_ktrtr=kernel_matrix.clone(); 00573 00574 temp_kernel =SGMatrix<float64_t>(kernel_matrix.num_rows, kernel_matrix.num_cols); 00575 00576 for (index_t i = 0; i < kernel_matrix.num_rows; i++) 00577 { 00578 for (index_t j = 0; j < kernel_matrix.num_cols; j++) 00579 temp_kernel(i,j) = kernel_matrix(i,j); 00580 } 00581 } 00582 00583 00584 void CLaplacianInferenceMethod::update_chol() 00585 { 00586 check_members(); 00587 00588 Map<VectorXd> eigen_W(W.vector, W.vlen); 00589 00590 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, 00591 temp_kernel.num_rows, temp_kernel.num_cols); 00592 00593 00594 if (eigen_W.minCoeff() < 0) 00595 { 00596 MatrixXd temp_diagonal(sW.vlen, sW.vlen); 00597 temp_diagonal.setZero(sW.vlen, sW.vlen); 00598 00599 for (index_t s = 0; s < temp_diagonal.rows(); s++) 00600 { 00601 for (index_t r = 0; r < temp_diagonal.cols(); r++) 00602 temp_diagonal(s,s) = 1.0/W[s]; 00603 } 00604 00605 00606 MatrixXd A = eigen_temp_kernel*m_scale*m_scale+temp_diagonal; 00607 00608 FullPivLU<MatrixXd> lu(A); 00609 00610 MatrixXd chol = -lu.inverse(); 00611 00612 m_L = SGMatrix<float64_t>(chol.rows(), chol.cols()); 00613 00614 for (index_t i = 0; i < chol.rows(); i++) 00615 { 00616 for (index_t j = 0; j < chol.cols(); j++) 00617 m_L(i,j) = chol(i,j); 00618 } 00619 00620 } 00621 00622 else 00623 { 00624 00625 Map<VectorXd> eigen_sW(sW.vector, sW.vlen); 00626 00627 LLT<MatrixXd> L( 00628 (eigen_sW*eigen_sW.transpose()).cwiseProduct((eigen_temp_kernel*m_scale*m_scale)) + 00629 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)); 00630 00631 MatrixXd chol = L.matrixU(); 00632 00633 m_L = SGMatrix<float64_t>(chol.rows(), chol.cols()); 00634 00635 for (index_t i = 0; i < chol.rows(); i++) 00636 { 00637 for (index_t j = 0; j < chol.cols(); j++) 00638 m_L(i,j) = chol(i,j); 00639 } 00640 } 00641 } 00642 00643 void CLaplacianInferenceMethod::update_alpha() 00644 { 00645 float64_t Psi_Old = CMath::INFTY; 00646 float64_t Psi_New; 00647 float64_t Psi_Def; 00648 00649 SGVector<float64_t> temp_mean = m_mean->get_mean_vector(m_feature_matrix); 00650 00651 m_means = SGVector<float64_t>(temp_mean.vlen); 00652 temp_kernel = SGMatrix<float64_t>(m_ktrtr.num_rows, m_ktrtr.num_cols); 00653 temp_alpha = SGVector<float64_t>(m_labels->get_num_labels()); 00654 VectorXd first_derivative; 00655 00656 for (index_t i = 0; i < temp_mean.vlen; i++) 00657 m_means[i] = temp_mean[i]; 00658 00659 for (index_t i = 0; i < m_alpha.vlen; i++) 00660 temp_alpha[i] = m_alpha[i]; 00661 00662 for (index_t i = 0; i < m_ktrtr.num_rows; i++) 00663 { 00664 for (index_t j = 0; j < m_ktrtr.num_cols; j++) 00665 temp_kernel(i,j) = m_ktrtr(i,j); 00666 } 00667 00668 Map<MatrixXd> eigen_temp_kernel(temp_kernel.matrix, temp_kernel.num_rows, temp_kernel.num_cols); 00669 00670 VectorXd eigen_function; 00671 00672 Map<VectorXd> eigen_m_means(m_means.vector, m_means.vlen); 00673 00674 if (m_alpha.vlen != m_labels->get_num_labels()) 00675 { 00676 00677 00678 temp_alpha = SGVector<float64_t>(m_labels->get_num_labels()); 00679 00680 for (index_t i = 0; i < temp_alpha.vlen; i++) 00681 temp_alpha[i] = 0.0; 00682 00683 Map<VectorXd> eigen_temp_alpha2(temp_alpha.vector, temp_alpha.vlen); 00684 00685 eigen_function = eigen_temp_kernel*eigen_temp_alpha2*m_scale*m_scale+eigen_m_means; 00686 00687 function = SGVector<float64_t>(eigen_function.rows()); 00688 00689 for (index_t i = 0; i < eigen_function.rows(); i++) 00690 function[i] = eigen_function[i]; 00691 00692 W = m_model->get_log_probability_derivative_f( 00693 (CRegressionLabels*)m_labels, function, 2); 00694 for (index_t i = 0; i < W.vlen; i++) 00695 W[i] = -W[i]; 00696 00697 Psi_New = -m_model->get_log_probability_f( 00698 (CRegressionLabels*)m_labels, function); 00699 } 00700 00701 00702 else 00703 { 00704 00705 Map<VectorXd> eigen_temp_alpha2(temp_alpha.vector, temp_alpha.vlen); 00706 00707 eigen_function = eigen_temp_kernel*m_scale*m_scale*eigen_temp_alpha2+eigen_m_means; 00708 00709 function = SGVector<float64_t>(eigen_function.rows()); 00710 00711 for (index_t i = 0; i < eigen_function.rows(); i++) 00712 function[i] = eigen_function[i]; 00713 00714 00715 00716 W = m_model->get_log_probability_derivative_f( 00717 (CRegressionLabels*)m_labels, function, 2); 00718 00719 00720 for (index_t i = 0; i < W.vlen; i++) 00721 W[i] = -W[i]; 00722 00723 Psi_New = (eigen_temp_alpha2.transpose()*(eigen_function-eigen_m_means))[0]/2.0; 00724 00725 Psi_New -= m_model->get_log_probability_f( 00726 (CRegressionLabels*)m_labels, function); 00727 00728 00729 00730 Psi_Def = -m_model->get_log_probability_f( 00731 (CRegressionLabels*)m_labels, m_means); 00732 00733 if (Psi_Def < Psi_New) 00734 { 00735 eigen_temp_alpha2 = eigen_temp_alpha2.Zero(m_labels->get_num_labels()); 00736 00737 for (index_t i = 0; i < m_alpha.vlen; i++) 00738 temp_alpha[i] = eigen_temp_alpha2[i]; 00739 00740 W = m_model->get_log_probability_derivative_f( 00741 (CRegressionLabels*)m_labels, function, 2); 00742 00743 for (index_t i = 0; i < W.vlen; i++) 00744 W[i] = -W[i]; 00745 00746 Psi_New = -m_model->get_log_probability_f( 00747 (CRegressionLabels*)m_labels, function); 00748 } 00749 } 00750 00751 Map<VectorXd> eigen_temp_alpha(temp_alpha.vector, temp_alpha.vlen); 00752 00753 index_t itr = 0; 00754 00755 dlp = m_model->get_log_probability_derivative_f( 00756 (CRegressionLabels*)m_labels, function, 1); 00757 00758 d2lp = m_model->get_log_probability_derivative_f( 00759 (CRegressionLabels*)m_labels, function, 2); 00760 00761 d3lp = m_model->get_log_probability_derivative_f( 00762 (CRegressionLabels*)m_labels, function, 3); 00763 00764 Map<VectorXd> eigen_d2lp(d2lp.vector, d2lp.vlen); 00765 00766 sW = W.clone(); 00767 00768 while (Psi_Old - Psi_New > m_tolerance && itr < m_max_itr) 00769 { 00770 00771 Map<VectorXd> eigen_W(W.vector, W.vlen); 00772 00773 Psi_Old = Psi_New; 00774 itr++; 00775 00776 if (eigen_W.minCoeff() < 0) 00777 { 00778 //Suggested by Vanhatalo et. al., 00779 //Gaussian Process Regression with Student's t likelihood, NIPS 2009 00780 //Quoted from infLaplace.m 00781 float64_t df = m_model->get_degrees_freedom(); 00782 00783 for (index_t i = 0; i < eigen_W.rows(); i++) 00784 eigen_W[i] += 2.0/(df)*dlp[i]*dlp[i]; 00785 00786 } 00787 00788 for (index_t i = 0; i < eigen_W.rows(); i++) 00789 W[i] = eigen_W[i]; 00790 00791 for (index_t i = 0; i < W.vlen; i++) 00792 sW[i] = CMath::sqrt(float64_t(W[i])); 00793 00794 Map<VectorXd> eigen_sW(sW.vector, sW.vlen); 00795 00796 LLT<MatrixXd> L((eigen_sW*eigen_sW.transpose()).cwiseProduct( 00797 eigen_temp_kernel*m_scale*m_scale) + 00798 MatrixXd::Identity(m_ktrtr.num_rows, m_ktrtr.num_cols)); 00799 00800 MatrixXd chol = L.matrixL(); 00801 00802 MatrixXd temp = L.matrixL(); 00803 00804 Map<VectorXd> temp_eigen_dlp(dlp.vector, dlp.vlen); 00805 00806 VectorXd b = eigen_W.cwiseProduct((eigen_function - eigen_m_means)) + temp_eigen_dlp; 00807 00808 chol = chol.colPivHouseholderQr().solve(eigen_sW.cwiseProduct( 00809 (eigen_temp_kernel*b*m_scale*m_scale))); 00810 00811 chol = temp.transpose().colPivHouseholderQr().solve(chol); 00812 00813 VectorXd dalpha = b - eigen_sW.cwiseProduct(chol) - eigen_temp_alpha; 00814 00815 Psi_line func; 00816 00817 func.lab = (CRegressionLabels*)m_labels; 00818 func.K = &eigen_temp_kernel; 00819 func.scale = m_scale; 00820 func.alpha = &eigen_temp_alpha; 00821 func.dalpha = &dalpha; 00822 func.l1 = &lp; 00823 func.dl1 = &dlp; 00824 func.dl2 = &eigen_d2lp; 00825 func.f = &function; 00826 func.lik = m_model; 00827 func.m = &m_means; 00828 func.mW = &W; 00829 func.start_alpha = eigen_temp_alpha; 00830 local_min(0, m_max, m_opt_tolerance, func, Psi_New); 00831 } 00832 00833 for (index_t i = 0; i < m_alpha.vlen; i++) 00834 temp_alpha[i] = eigen_temp_alpha[i]; 00835 00836 Map<VectorXd> eigen_dlp(dlp.vector, dlp.vlen); 00837 00838 eigen_function = eigen_temp_kernel*m_scale*m_scale*eigen_temp_alpha+eigen_m_means; 00839 00840 function = SGVector<float64_t>(eigen_function.rows()); 00841 00842 for (index_t i = 0; i < eigen_function.rows(); i++) 00843 function[i] = eigen_function[i]; 00844 00845 lp = m_model->get_log_probability_f((CRegressionLabels*)m_labels, function); 00846 00847 dlp = m_model->get_log_probability_derivative_f( 00848 (CRegressionLabels*)m_labels, function, 1); 00849 00850 d2lp = m_model->get_log_probability_derivative_f( 00851 (CRegressionLabels*)m_labels, function, 2); 00852 00853 d3lp = m_model->get_log_probability_derivative_f( 00854 (CRegressionLabels*)m_labels, function, 3); 00855 00856 W = m_model->get_log_probability_derivative_f( 00857 (CRegressionLabels*)m_labels, function, 2); 00858 00859 for (index_t i = 0; i < W.vlen; i++) 00860 W[i] = -W[i]; 00861 00862 for (index_t i = 0; i < sW.vlen; i++) 00863 sW[i] = 0.0; 00864 00865 Map<VectorXd> eigen_W2(W.vector, W.vlen); 00866 00867 if (eigen_W2.minCoeff() > 0) 00868 { 00869 for (index_t i = 0; i < sW.vlen; i++) 00870 sW[i] = CMath::sqrt(float64_t(W[i])); 00871 } 00872 00873 m_alpha = SGVector<float64_t>(eigen_temp_alpha.rows()); 00874 00875 for (index_t i = 0; i < m_alpha.vlen; i++) 00876 m_alpha[i] = eigen_temp_alpha[i]; 00877 00878 } 00879 00880 #endif // HAVE_EIGEN3 00881 #endif // HAVE_LAPACK 00882