SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Chiyuan Zhang 00008 * Copyright (C) 2012 Chiyuan Zhang 00009 */ 00010 00011 #include <shogun/lib/config.h> 00012 00013 #ifdef HAVE_LAPACK 00014 00015 #include <vector> 00016 #include <limits> 00017 #include <algorithm> 00018 00019 #include <shogun/features/DenseFeatures.h> 00020 #include <shogun/mathematics/Math.h> 00021 #include <shogun/mathematics/lapack.h> 00022 #include <shogun/regression/LeastAngleRegression.h> 00023 #include <shogun/labels/RegressionLabels.h> 00024 00025 using namespace shogun; 00026 using namespace std; 00027 00028 static vector<float64_t> make_vector(int32_t size, float64_t val) 00029 { 00030 vector<float64_t> result(size); 00031 fill(result.begin(), result.end(), val); 00032 return result; 00033 } 00034 00035 static void plane_rot(float64_t x0, float64_t x1, 00036 float64_t &y0, float64_t &y1, SGMatrix<float64_t> &G) 00037 { 00038 G.zero(); 00039 if (x1 == 0) 00040 { 00041 G(0, 0) = G(1, 1) = 1; 00042 y0 = x0; 00043 y1 = x1; 00044 } 00045 else 00046 { 00047 float64_t r = CMath::sqrt(x0*x0+x1*x1); 00048 float64_t sx0 = x0 / r; 00049 float64_t sx1 = x1 / r; 00050 00051 G(0,0) = sx0; 00052 G(1,0) = -sx1; 00053 G(0,1) = sx1; 00054 G(1,1) = sx0; 00055 00056 y0 = r; 00057 y1 = 0; 00058 } 00059 } 00060 00061 static void find_max_abs(const vector<float64_t> &vec, const vector<bool> &ignore, 00062 int32_t &imax, float64_t& vmax) 00063 { 00064 imax = -1; 00065 vmax = -1; 00066 for (uint32_t i=0; i < vec.size(); ++i) 00067 { 00068 if (ignore[i]) 00069 continue; 00070 if (CMath::abs(vec[i]) > vmax) 00071 { 00072 vmax = CMath::abs(vec[i]); 00073 imax = i; 00074 } 00075 } 00076 } 00077 00078 CLeastAngleRegression::CLeastAngleRegression(bool lasso) : 00079 CLinearMachine(), m_lasso(lasso), 00080 m_max_nonz(0), m_max_l1_norm(0) 00081 { 00082 SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE); 00083 SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE); 00084 } 00085 00086 CLeastAngleRegression::~CLeastAngleRegression() 00087 { 00088 00089 } 00090 00091 bool CLeastAngleRegression::train_machine(CFeatures* data) 00092 { 00093 if (!m_labels) 00094 SG_ERROR("No labels set\n"); 00095 if (m_labels->get_label_type() != LT_REGRESSION) 00096 SG_ERROR("Expected RegressionLabels\n"); 00097 00098 if (!data) 00099 data=features; 00100 00101 if (!data) 00102 SG_ERROR("No features set\n"); 00103 00104 if (m_labels->get_num_labels() != data->get_num_vectors()) 00105 SG_ERROR("Number of training vectors does not match number of labels\n"); 00106 00107 if (data->get_feature_class() != C_DENSE) 00108 SG_ERROR("Expected Simple Features\n"); 00109 00110 if (data->get_feature_type() != F_DREAL) 00111 SG_ERROR("Expected Real Features\n"); 00112 00113 CDenseFeatures<float64_t>* feats=(CDenseFeatures<float64_t>*) data; 00114 int32_t n_fea = feats->get_num_features(); 00115 int32_t n_vec = feats->get_num_vectors(); 00116 00117 bool lasso_cond = false; 00118 bool stop_cond = false; 00119 00120 // init facilities 00121 m_beta_idx.clear(); 00122 m_beta_path.clear(); 00123 m_num_active = 0; 00124 m_active_set.clear(); 00125 m_is_active.resize(n_fea); 00126 fill(m_is_active.begin(), m_is_active.end(), false); 00127 00128 SGVector<float64_t> y = ((CRegressionLabels*) m_labels)->get_labels(); 00129 SGMatrix<float64_t> Xorig = feats->get_feature_matrix(); 00130 00131 // transpose(X) is more convenient to work with since we care 00132 // about features here. After transpose, each row will be a data 00133 // point while each column corresponds to a feature 00134 SGMatrix<float64_t> X(n_vec, n_fea, true); 00135 for (int32_t i=0; i < n_vec; ++i) 00136 { 00137 for (int32_t j=0; j < n_fea; ++j) 00138 X(i,j) = Xorig(j,i); 00139 } 00140 00141 // beta is the estimator 00142 vector<float64_t> beta = make_vector(n_fea, 0); 00143 00144 vector<float64_t> Xy = make_vector(n_fea, 0); 00145 // Xy = X' * y 00146 cblas_dgemv(CblasColMajor, CblasTrans, n_vec, n_fea, 1, X.matrix, n_vec, 00147 y.vector, 1, 0, &Xy[0], 1); 00148 00149 // mu is the prediction 00150 vector<float64_t> mu = make_vector(n_vec, 0); 00151 00152 // correlation 00153 vector<float64_t> corr = make_vector(n_fea, 0); 00154 // sign of correlation 00155 vector<float64_t> corr_sign(n_fea); 00156 00157 // Cholesky factorization R'R = X'X, R is upper triangular 00158 SGMatrix<float64_t> R; 00159 00160 float64_t max_corr = 1; 00161 int32_t i_max_corr = 1; 00162 00163 // first entry: all coefficients are zero 00164 m_beta_path.push_back(beta); 00165 m_beta_idx.push_back(0); 00166 00167 //======================================== 00168 // main loop 00169 //======================================== 00170 int32_t nloop=0; 00171 while (m_num_active < n_fea && max_corr > CMath::MACHINE_EPSILON && !stop_cond) 00172 { 00173 // corr = X' * (y-mu) = - X'*mu + Xy 00174 copy(Xy.begin(), Xy.end(), corr.begin()); 00175 cblas_dgemv(CblasColMajor, CblasTrans, n_vec, n_fea, -1, 00176 X.matrix, n_vec, &mu[0], 1, 1, &corr[0], 1); 00177 00178 // corr_sign = sign(corr) 00179 for (uint32_t i=0; i < corr.size(); ++i) 00180 corr_sign[i] = CMath::sign(corr[i]); 00181 00182 // find max absolute correlation in inactive set 00183 find_max_abs(corr, m_is_active, i_max_corr, max_corr); 00184 00185 if (!lasso_cond) 00186 { 00187 // update Cholesky factorization matrix 00188 R=cholesky_insert(X, R, i_max_corr); 00189 activate_variable(i_max_corr); 00190 } 00191 00192 // corr_sign_a = corr_sign[m_active_set] 00193 vector<float64_t> corr_sign_a(m_num_active); 00194 for (int32_t i=0; i < m_num_active; ++i) 00195 corr_sign_a[i] = corr_sign[m_active_set[i]]; 00196 00197 // GA1 = R\(R'\corr_sign_a) 00198 vector<float64_t> GA1(corr_sign_a); 00199 cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, 00200 m_num_active, 1, 1, R.matrix, m_num_active, &GA1[0], m_num_active); 00201 cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, 00202 m_num_active, 1, 1, R.matrix, m_num_active, &GA1[0], m_num_active); 00203 00204 // AA = 1/sqrt(GA1' * corr_sign_a) 00205 float64_t AA = cblas_ddot(m_num_active, &GA1[0], 1, &corr_sign_a[0], 1); 00206 AA = 1/CMath::sqrt(AA); 00207 00208 // wA = AA*GA1 00209 vector<float64_t> wA(GA1); 00210 for (int32_t i=0; i < m_num_active; ++i) 00211 wA[i] *= AA; 00212 00213 // equiangular direction (unit vector) 00214 vector<float64_t> u = make_vector(n_vec, 0); 00215 // u = X[:,m_active_set] * wA 00216 for (int32_t i=0; i < m_num_active; ++i) 00217 { 00218 // u += wA[i] * X[:,m_active_set[i]] 00219 cblas_daxpy(n_vec, wA[i], 00220 X.get_column_vector(m_active_set[i]), 1, &u[0], 1); 00221 } 00222 00223 // step size 00224 float64_t gamma = max_corr / AA; 00225 if (m_num_active < n_fea) 00226 { 00227 for (int32_t i=0; i < n_fea; ++i) 00228 { 00229 if (m_is_active[i]) 00230 continue; 00231 00232 // correlation between X[:,i] and u 00233 float64_t dir_corr = cblas_ddot(n_vec, 00234 X.get_column_vector(i), 1, &u[0], 1); 00235 00236 float64_t tmp1 = (max_corr-corr[i])/(AA-dir_corr); 00237 float64_t tmp2 = (max_corr+corr[i])/(AA+dir_corr); 00238 if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma) 00239 gamma = tmp1; 00240 if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma) 00241 gamma = tmp2; 00242 } 00243 } 00244 00245 int32_t i_kick=-1; 00246 int32_t i_change=i_max_corr; 00247 if (m_lasso) 00248 { 00249 // lasso modification to further refine gamma 00250 lasso_cond = false; 00251 float64_t lasso_bound = numeric_limits<float64_t>::max(); 00252 00253 for (int32_t i=0; i < m_num_active; ++i) 00254 { 00255 float64_t tmp = -beta[m_active_set[i]] / wA[i]; 00256 if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound) 00257 { 00258 lasso_bound = tmp; 00259 i_kick = i; 00260 } 00261 } 00262 00263 if (lasso_bound < gamma) 00264 { 00265 gamma = lasso_bound; 00266 lasso_cond = true; 00267 i_change = m_active_set[i_kick]; 00268 } 00269 } 00270 00271 // update prediction: mu = mu + gamma * u 00272 cblas_daxpy(n_vec, gamma, &u[0], 1, &mu[0], 1); 00273 00274 // update estimator 00275 for (int32_t i=0; i < m_num_active; ++i) 00276 beta[m_active_set[i]] += gamma * wA[i]; 00277 00278 // early stopping on max l1-norm 00279 if (m_max_l1_norm > 0) 00280 { 00281 float64_t l1 = SGVector<float64_t>::onenorm(&beta[0], n_fea); 00282 if (l1 > m_max_l1_norm) 00283 { 00284 // stopping with interpolated beta 00285 stop_cond = true; 00286 lasso_cond = false; 00287 float64_t l1_prev = SGVector<float64_t>::onenorm(&m_beta_path[nloop][0], n_fea); 00288 float64_t s = (m_max_l1_norm-l1_prev)/(l1-l1_prev); 00289 00290 // beta = beta_prev + s*(beta-beta_prev) 00291 // = (1-s)*beta_prev + s*beta 00292 cblas_dscal(n_fea, s, &beta[0], 1); 00293 cblas_daxpy(n_fea, 1-s, &m_beta_path[nloop][0], 1, &beta[0], 1); 00294 } 00295 } 00296 00297 // if lasso cond, drop the variable from active set 00298 if (lasso_cond) 00299 { 00300 beta[i_change] = 0; // ensure it be zero 00301 00302 // update Cholesky factorization 00303 R=cholesky_delete(R, i_kick); 00304 deactivate_variable(i_kick); 00305 } 00306 00307 nloop++; 00308 m_beta_path.push_back(beta); 00309 if (size_t(m_num_active) >= m_beta_idx.size()) 00310 m_beta_idx.push_back(nloop); 00311 else 00312 m_beta_idx[m_num_active] = nloop; 00313 00314 // early stopping with max number of non-zero variables 00315 if (m_max_nonz > 0 && m_num_active >= m_max_nonz) 00316 stop_cond = true; 00317 00318 } // main loop 00319 00320 // assign default estimator 00321 w.vlen = n_fea; 00322 switch_w(m_beta_idx.size()-1); 00323 00324 return true; 00325 } 00326 00327 SGMatrix<float64_t> CLeastAngleRegression::cholesky_insert( 00328 SGMatrix<float64_t> X, SGMatrix<float64_t> R, int32_t i_max_corr) 00329 { 00330 // diag_k = X[:,i_max_corr]' * X[:,i_max_corr] 00331 float64_t diag_k = cblas_ddot(X.num_rows, X.get_column_vector(i_max_corr), 1, 00332 X.get_column_vector(i_max_corr), 1); 00333 00334 if (m_num_active == 0) 00335 { // R isn't allocated yet 00336 SGMatrix<float64_t> nR(1,1); 00337 nR(0,0) = CMath::sqrt(diag_k); 00338 return nR; 00339 } 00340 else 00341 { 00342 00343 // col_k is the k-th column of (X'X) 00344 vector<float64_t> col_k(m_num_active); 00345 for (int32_t i=0; i < m_num_active; ++i) 00346 { 00347 // col_k[i] = X[:,i_max_corr]' * X[:,m_active_set[i]] 00348 col_k[i] = cblas_ddot(X.num_rows, X.get_column_vector(i_max_corr), 1, 00349 X.get_column_vector(m_active_set[i]), 1); 00350 } 00351 00352 // R' * R_k = (X' * X)_k = col_k, solving to get R_k 00353 vector<float64_t> R_k(col_k); 00354 cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, m_num_active, 1, 00355 1, R.matrix, m_num_active, &R_k[0], m_num_active); 00356 00357 float64_t R_kk = CMath::sqrt(diag_k - 00358 cblas_ddot(m_num_active, &R_k[0], 1, &R_k[0], 1)); 00359 00360 // new_R = [R R_k; zeros(...) R_kk] 00361 SGMatrix<float64_t> nR(m_num_active+1, m_num_active+1); 00362 for (int32_t i=0; i < m_num_active; ++i) 00363 for (int32_t j=0; j < m_num_active; ++j) 00364 nR(i,j) = R(i,j); 00365 for (int32_t i=0; i < m_num_active; ++i) 00366 nR(i, m_num_active) = R_k[i]; 00367 for (int32_t i=0; i < m_num_active; ++i) 00368 nR(m_num_active, i) = 0; 00369 nR(m_num_active, m_num_active) = R_kk; 00370 00371 return nR; 00372 } 00373 00374 } 00375 00376 SGMatrix<float64_t> CLeastAngleRegression::cholesky_delete(SGMatrix<float64_t> R, int32_t i_kick) 00377 { 00378 if (i_kick != m_num_active-1) 00379 { 00380 // remove i_kick-th column 00381 for (int32_t j=i_kick; j < m_num_active-1; ++j) 00382 for (int32_t i=0; i < m_num_active; ++i) 00383 R(i,j) = R(i,j+1); 00384 00385 SGMatrix<float64_t> G(2,2); 00386 for (int32_t i=i_kick; i < m_num_active-1; ++i) 00387 { 00388 plane_rot(R(i,i),R(i+1,i), R(i,i), R(i+1,i), G); 00389 if (i < m_num_active-2) 00390 { 00391 for (int32_t k=i+1; k < m_num_active-1; ++k) 00392 { 00393 // R[i:i+1, k] = G*R[i:i+1, k] 00394 float64_t Rik = R(i,k), Ri1k = R(i+1,k); 00395 R(i,k) = G(0,0)*Rik + G(0,1)*Ri1k; 00396 R(i+1,k) = G(1,0)*Rik+G(1,1)*Ri1k; 00397 } 00398 } 00399 } 00400 } 00401 00402 SGMatrix<float64_t> nR(m_num_active-1, m_num_active-1); 00403 for (int32_t i=0; i < m_num_active-1; ++i) 00404 for (int32_t j=0; j < m_num_active-1; ++j) 00405 nR(i,j) = R(i,j); 00406 00407 return nR; 00408 } 00409 00410 #endif // HAVE_LAPACK