SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Sergey Lisitsyn 00008 * Copyright (C) 2010-2012 Jun Liu, Jieping Ye 00009 */ 00010 00011 #include <shogun/lib/slep/slep_mc_tree_lr.h> 00012 #ifdef HAVE_EIGEN3 00013 #include <shogun/lib/slep/tree/general_altra.h> 00014 #include <shogun/lib/slep/tree/altra.h> 00015 #include <shogun/lib/slep/q1/eppMatrix.h> 00016 #include <shogun/mathematics/Math.h> 00017 #include <shogun/mathematics/eigen3.h> 00018 #include <shogun/lib/Signal.h> 00019 #include <shogun/lib/Time.h> 00020 #include <iostream> 00021 00022 using namespace shogun; 00023 using namespace Eigen; 00024 using namespace std; 00025 00026 namespace shogun 00027 { 00028 00029 slep_result_t slep_mc_tree_lr( 00030 CDotFeatures* features, 00031 CMulticlassLabels* labels, 00032 float64_t z, 00033 const slep_options& options) 00034 { 00035 int i,j; 00036 // obtain problem parameters 00037 int n_feats = features->get_dim_feature_space(); 00038 int n_vecs = features->get_num_vectors(); 00039 int n_classes = labels->get_num_classes(); 00040 00041 // labels vector containing values in range (0 .. n_classes) 00042 SGVector<float64_t> labels_vector = labels->get_labels(); 00043 00044 // initialize matrices and vectors to be used 00045 // weight vector 00046 MatrixXd w = MatrixXd::Zero(n_feats, n_classes); 00047 // intercepts (biases) 00048 VectorXd c = VectorXd::Zero(n_classes); 00049 00050 if (options.last_result) 00051 { 00052 SGMatrix<float64_t> last_w = options.last_result->w; 00053 SGVector<float64_t> last_c = options.last_result->c; 00054 for (i=0; i<n_classes; i++) 00055 { 00056 c[i] = last_c[i]; 00057 for (j=0; j<n_feats; j++) 00058 w(j,i) = last_w(j,i); 00059 } 00060 } 00061 // iterative process matrices and vectors 00062 MatrixXd wp = w, wwp = MatrixXd::Zero(n_feats, n_classes); 00063 VectorXd cp = c, ccp = VectorXd::Zero(n_classes); 00064 // search point weight vector 00065 MatrixXd search_w = MatrixXd::Zero(n_feats, n_classes); 00066 // search point intercepts 00067 VectorXd search_c = VectorXd::Zero(n_classes); 00068 // dot products 00069 MatrixXd Aw = MatrixXd::Zero(n_vecs, n_classes); 00070 for (j=0; j<n_classes; j++) 00071 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0); 00072 MatrixXd As = MatrixXd::Zero(n_vecs, n_classes); 00073 MatrixXd Awp = MatrixXd::Zero(n_vecs, n_classes); 00074 // gradients 00075 MatrixXd g = MatrixXd::Zero(n_feats, n_classes); 00076 VectorXd gc = VectorXd::Zero(n_classes); 00077 // projection 00078 MatrixXd v = MatrixXd::Zero(n_feats, n_classes); 00079 00080 // Lipschitz continuous gradient parameter for line search 00081 double L = 1.0/(n_vecs*n_classes); 00082 // coefficients for search point computation 00083 double alphap = 0, alpha = 1; 00084 00085 // lambda regularization parameter 00086 double lambda = z; 00087 // objective values 00088 double objective = 0.0; 00089 double objective_p = 0.0; 00090 00091 int iter = 0; 00092 bool done = false; 00093 CTime time; 00094 internal::set_is_malloc_allowed(false); 00095 while ((!done) && (iter<options.max_iter) && (!CSignal::cancel_computations())) 00096 { 00097 double beta = (alphap-1)/alpha; 00098 // compute search points 00099 search_w = w + beta*wwp; 00100 search_c = c + beta*ccp; 00101 00102 // update dot products with search point 00103 As = Aw + beta*(Aw-Awp); 00104 00105 // compute objective and gradient at search point 00106 double fun_s = 0; 00107 g.setZero(); 00108 gc.setZero(); 00109 // for each vector 00110 for (i=0; i<n_vecs; i++) 00111 { 00112 // class of current vector 00113 int vec_class = labels_vector[i]; 00114 // for each class 00115 for (j=0; j<n_classes; j++) 00116 { 00117 // compute logistic loss 00118 double aa = ((vec_class == j) ? -1.0 : 1.0)*(As(i,j) + search_c(j)); 00119 double bb = aa > 0.0 ? aa : 0.0; 00120 // avoid underflow via log-sum-exp trick 00121 fun_s += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb; 00122 double prob = 1.0/(1+CMath::exp(aa)); 00123 double b = ((vec_class == j) ? -1.0 : 1.0)*(1-prob); 00124 // update gradient of intercepts 00125 gc[j] += b; 00126 // update gradient of weight vectors 00127 features->add_to_dense_vec(b, i, g.col(j).data(), n_feats); 00128 } 00129 } 00130 //fun_s /= (n_vecs*n_classes); 00131 00132 wp = w; 00133 Awp = Aw; 00134 cp = c; 00135 00136 int inner_iter = 0; 00137 double fun_x = 0; 00138 00139 // line search process 00140 while (inner_iter<5000) 00141 { 00142 // compute line search point 00143 v = search_w - g/L; 00144 c = search_c - gc/L; 00145 00146 // compute projection of gradient 00147 if (options.general) 00148 general_altra_mt(w.data(),v.data(),n_classes,n_feats,options.G,options.ind_t,options.n_nodes,lambda/L); 00149 else 00150 altra_mt(w.data(),v.data(),n_classes,n_feats,options.ind_t,options.n_nodes,lambda/L); 00151 v = w - search_w; 00152 00153 // update dot products 00154 for (j=0; j<n_classes; j++) 00155 features->dense_dot_range(Aw.col(j).data(), 0, n_vecs, NULL, w.col(j).data(), n_feats, 0.0); 00156 00157 // compute objective at search point 00158 fun_x = 0; 00159 for (i=0; i<n_vecs; i++) 00160 { 00161 int vec_class = labels_vector[i]; 00162 for (j=0; j<n_classes; j++) 00163 { 00164 double aa = ((vec_class == j) ? -1.0 : 1.0)*(Aw(i,j) + c(j)); 00165 double bb = aa > 0.0 ? aa : 0.0; 00166 fun_x += CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb; 00167 } 00168 } 00169 //fun_x /= (n_vecs*n_classes); 00170 00171 // check for termination of line search 00172 double r_sum = (v.squaredNorm() + (c-search_c).squaredNorm())/2; 00173 double l_sum = fun_x - fun_s - v.cwiseProduct(g).sum() - (c-search_c).dot(gc); 00174 00175 // stop if projected gradient is less than 1e-20 00176 if (r_sum <= 1e-20) 00177 { 00178 SG_SINFO("Gradient step makes little improvement (%f)\n",r_sum); 00179 done = true; 00180 break; 00181 } 00182 00183 if (l_sum <= r_sum*L) 00184 break; 00185 else 00186 L = CMath::max(2*L, l_sum/r_sum); 00187 00188 inner_iter++; 00189 } 00190 00191 // update alpha coefficients 00192 alphap = alpha; 00193 alpha = (1+CMath::sqrt(4*alpha*alpha+1))/2; 00194 00195 // update wwp and ccp 00196 wwp = w - wp; 00197 ccp = c - cp; 00198 00199 // update objectives 00200 objective_p = objective; 00201 objective = fun_x; 00202 00203 // compute tree norm 00204 double tree_norm = 0.0; 00205 if (options.general) 00206 { 00207 for (i=0; i<n_classes; i++) 00208 tree_norm += general_treeNorm(w.col(i).data(),n_classes,n_feats,options.G,options.ind_t,options.n_nodes); 00209 } 00210 else 00211 { 00212 for (i=0; i<n_classes; i++) 00213 tree_norm += treeNorm(w.col(i).data(),n_classes,n_feats,options.ind_t,options.n_nodes); 00214 } 00215 00216 // regularize objective with tree norm 00217 objective += lambda*tree_norm; 00218 00219 //cout << "Objective = " << objective << endl; 00220 00221 // check for termination of whole process 00222 if ((CMath::abs(objective - objective_p) < options.tolerance*CMath::abs(objective_p)) && (iter>2)) 00223 { 00224 SG_SINFO("Objective changes less than tolerance\n"); 00225 done = true; 00226 } 00227 00228 iter++; 00229 } 00230 SG_SINFO("%d iterations passed, objective = %f\n",iter,objective); 00231 internal::set_is_malloc_allowed(true); 00232 00233 // output computed weight vectors and intercepts 00234 SGMatrix<float64_t> r_w(n_feats,n_classes); 00235 for (j=0; j<n_classes; j++) 00236 { 00237 for (i=0; i<n_feats; i++) 00238 r_w(i,j) = w(i,j); 00239 } 00240 //r_w.display_matrix(); 00241 SGVector<float64_t> r_c(n_classes); 00242 for (j=0; j<n_classes; j++) 00243 r_c[j] = c[j]; 00244 return slep_result_t(r_w, r_c); 00245 }; 00246 }; 00247 #endif