SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Sergey Lisitsyn 00008 * Copyright (C) 2012 Jiayu Zhou and Jieping Ye 00009 */ 00010 00011 #include <shogun/lib/malsar/malsar_clustered.h> 00012 #ifdef HAVE_EIGEN3 00013 #include <shogun/mathematics/Math.h> 00014 #include <shogun/mathematics/eigen3.h> 00015 #include <iostream> 00016 #include <shogun/lib/external/libqp.h> 00017 00018 using namespace Eigen; 00019 using namespace std; 00020 00021 namespace shogun 00022 { 00023 00024 static double* H_diag_matrix; 00025 static int H_diag_matrix_ld; 00026 00027 static const double* get_col(uint32_t j) 00028 { 00029 return H_diag_matrix + j*H_diag_matrix_ld; 00030 } 00031 00032 malsar_result_t malsar_clustered( 00033 CDotFeatures* features, 00034 double* y, 00035 double rho1, 00036 double rho2, 00037 const malsar_options& options) 00038 { 00039 int task; 00040 int n_feats = features->get_dim_feature_space(); 00041 SG_SDEBUG("n feats = %d\n", n_feats); 00042 int n_vecs = features->get_num_vectors(); 00043 SG_SDEBUG("n vecs = %d\n", n_vecs); 00044 int n_tasks = options.n_tasks; 00045 SG_SDEBUG("n tasks = %d\n", n_tasks); 00046 00047 H_diag_matrix = SG_CALLOC(double, n_tasks*n_tasks); 00048 for (int i=0; i<n_tasks; i++) 00049 H_diag_matrix[i*n_tasks+i] = 2.0; 00050 H_diag_matrix_ld = n_tasks; 00051 00052 int iter = 0; 00053 00054 // initialize weight vector and bias for each task 00055 MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks); 00056 VectorXd Cs = VectorXd::Zero(n_tasks); 00057 MatrixXd Ms = MatrixXd::Identity(n_tasks, n_tasks)*options.n_clusters/n_tasks; 00058 MatrixXd IM = Ms; 00059 MatrixXd IMsqinv = Ms; 00060 MatrixXd invEtaMWt = Ms; 00061 00062 MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws; 00063 VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs; 00064 MatrixXd Mz=Ms, Mzp=Ms, Mz_old=Ms, delta_Mzp=Ms, gMs=Ms; 00065 MatrixXd Mzp_Pz; 00066 00067 double eta = rho2/rho1; 00068 double c = rho1*eta*(1+eta); 00069 00070 double t=1, t_old=0; 00071 double gamma=1, gamma_inc=2; 00072 double obj=0.0, obj_old=0.0; 00073 00074 double* diag_H = SG_MALLOC(double, n_tasks); 00075 double* f = SG_MALLOC(double, n_tasks); 00076 double* a = SG_MALLOC(double, n_tasks); 00077 double* lb = SG_MALLOC(double, n_tasks); 00078 double* ub = SG_MALLOC(double, n_tasks); 00079 double* x = SG_CALLOC(double, n_tasks); 00080 00081 internal::set_is_malloc_allowed(false); 00082 bool done = false; 00083 while (!done && iter <= options.max_iter) 00084 { 00085 double alpha = double(t_old - 1)/t; 00086 SG_SDEBUG("alpha=%f\n",alpha); 00087 00088 // compute search point 00089 Ws = (1+alpha)*Wz - alpha*Wz_old; 00090 Cs = (1+alpha)*Cz - alpha*Cz_old; 00091 Ms = (1+alpha)*Mz - alpha*Mz_old; 00092 00093 // zero gradient 00094 gWs.setZero(); 00095 gCs.setZero(); 00096 internal::set_is_malloc_allowed(true); 00097 SG_SDEBUG("Computing gradient\n"); 00098 IM = (eta*MatrixXd::Identity(n_tasks,n_tasks)+Ms); 00099 // cout << "M" << endl << Ms << endl; 00100 // cout << "IM" << endl << IM << endl; 00101 IMsqinv = (IM*IM).inverse(); 00102 invEtaMWt = IM.inverse()*Ws.transpose(); 00103 //cout << "invEtaMWt" << endl << invEtaMWt << endl; 00104 gMs.noalias() = -c*(Ws.transpose()*Ws)*IMsqinv; 00105 gWs.noalias() += 2*c*invEtaMWt.transpose(); 00106 internal::set_is_malloc_allowed(false); 00107 00108 // compute gradient and objective at search point 00109 double Fs = 0; 00110 for (task=0; task<n_tasks; task++) 00111 { 00112 SGVector<index_t> task_idx = options.tasks_indices[task]; 00113 int n_vecs_task = task_idx.vlen; 00114 for (int i=0; i<n_vecs_task; i++) 00115 { 00116 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]); 00117 double bb = CMath::max(aa,0.0); 00118 00119 // avoid underflow when computing exponential loss 00120 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task; 00121 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_vecs_task; 00122 gCs[task] += b; 00123 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats); 00124 } 00125 } 00126 //cout << "gWs" << endl << gWs << endl; 00127 //cout << "gCs" << endl << gCs << endl; 00128 SG_SDEBUG("Computed gradient\n"); 00129 00130 // add regularizer 00131 Fs += c*(Ws*invEtaMWt).trace(); 00132 SG_SDEBUG("Fs = %f \n", Fs); 00133 00134 double Fzp = 0.0; 00135 00136 int inner_iter = 0; 00137 // line search, Armijo-Goldstein scheme 00138 while (inner_iter <= 1000) 00139 { 00140 Wzp = Ws - gWs/gamma; 00141 Czp = Cs - gCs/gamma; 00142 // compute singular projection of Ms - gMs/gamma with k 00143 internal::set_is_malloc_allowed(true); 00144 EigenSolver<MatrixXd> eigensolver(Ms-gMs/gamma); 00145 00146 // solve problem 00147 // min sum_i (s_i - s*_i)^2 s.t. sum_i s_i = k, 0<=s_i<=1 00148 for (int i=0; i<n_tasks; i++) 00149 { 00150 diag_H[i] = 2.0; 00151 f[i] = -2*eigensolver.eigenvalues()[i].real(); 00152 SG_SDEBUG("%dth eigenvalue %f\n",i,eigensolver.eigenvalues()[i].real()); 00153 a[i] = 1.0; 00154 lb[i] = 0.0; 00155 ub[i] = 1.0; 00156 x[i] = double(options.n_clusters)/n_tasks; 00157 } 00158 double b = options.n_clusters;//eigensolver.eigenvalues().sum().real(); 00159 SG_SDEBUG("b = %f\n", b); 00160 SG_SDEBUG("Calling libqp\n"); 00161 libqp_state_T problem_state = libqp_gsmo_solver(&get_col,diag_H,f,a,b,lb,ub,x,n_tasks,1000,1e-6,NULL); 00162 SG_SDEBUG("Libqp objective = %f\n",problem_state.QP); 00163 SG_SDEBUG("Exit code = %d\n",problem_state.exitflag); 00164 SG_SDEBUG("%d iteration passed\n",problem_state.nIter); 00165 SG_SDEBUG("Solution is \n [ "); 00166 for (int i=0; i<n_tasks; i++) 00167 SG_SDEBUG("%f ", x[i]); 00168 SG_SDEBUG("]\n"); 00169 Map<VectorXd> Mzp_DiagSigz(x,n_tasks); 00170 Mzp_Pz = eigensolver.eigenvectors().real(); 00171 Mzp = Mzp_Pz*Mzp_DiagSigz.asDiagonal()*Mzp_Pz.transpose(); 00172 internal::set_is_malloc_allowed(false); 00173 // walk in direction of antigradient 00174 for (int i=0; i<n_tasks; i++) 00175 Mzp_DiagSigz[i] += eta; 00176 internal::set_is_malloc_allowed(true); 00177 invEtaMWt = (Mzp_Pz* 00178 (Mzp_DiagSigz.cwiseInverse().asDiagonal())* 00179 Mzp_Pz.transpose())* 00180 Wzp.transpose(); 00181 internal::set_is_malloc_allowed(false); 00182 // compute objective at line search point 00183 Fzp = 0.0; 00184 for (task=0; task<n_tasks; task++) 00185 { 00186 SGVector<index_t> task_idx = options.tasks_indices[task]; 00187 int n_vecs_task = task_idx.vlen; 00188 for (int i=0; i<n_vecs_task; i++) 00189 { 00190 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Cs[task]); 00191 double bb = CMath::max(aa,0.0); 00192 00193 Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_vecs_task; 00194 } 00195 } 00196 Fzp += c*(Wzp*invEtaMWt).trace(); 00197 00198 // compute delta between line search point and search point 00199 delta_Wzp = Wzp - Ws; 00200 delta_Czp = Czp - Cs; 00201 delta_Mzp = Mzp - Ms; 00202 00203 // norms of delta 00204 double nrm_delta_Wzp = delta_Wzp.squaredNorm(); 00205 double nrm_delta_Czp = delta_Czp.squaredNorm(); 00206 double nrm_delta_Mzp = delta_Mzp.squaredNorm(); 00207 00208 double r_sum = (nrm_delta_Wzp + nrm_delta_Czp + nrm_delta_Mzp)/3; 00209 00210 double Fzp_gamma = 0.0; 00211 if (n_feats > n_tasks) 00212 { 00213 Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() + 00214 (delta_Czp.transpose()*gCs).trace() + 00215 (delta_Mzp.transpose()*gMs).trace() + 00216 (gamma/2)*nrm_delta_Wzp + 00217 (gamma/2)*nrm_delta_Czp + 00218 (gamma/2)*nrm_delta_Mzp; 00219 } 00220 else 00221 { 00222 Fzp_gamma = Fs + (gWs.transpose()*delta_Wzp).trace() + 00223 (gCs.transpose()*delta_Czp).trace() + 00224 (gMs.transpose()*delta_Mzp).trace() + 00225 (gamma/2)*nrm_delta_Wzp + 00226 (gamma/2)*nrm_delta_Czp + 00227 (gamma/2)*nrm_delta_Mzp; 00228 } 00229 00230 // break if delta is getting too small 00231 if (r_sum <= 1e-20) 00232 { 00233 done = true; 00234 break; 00235 } 00236 00237 // break if objective at line searc point is smaller than Fzp_gamma 00238 if (Fzp <= Fzp_gamma) 00239 break; 00240 else 00241 gamma *= gamma_inc; 00242 00243 inner_iter++; 00244 } 00245 00246 Wz_old = Wz; 00247 Cz_old = Cz; 00248 Mz_old = Mz; 00249 Wz = Wzp; 00250 Cz = Czp; 00251 Mz = Mzp; 00252 00253 // compute objective value 00254 obj_old = obj; 00255 obj = Fzp; 00256 00257 // check if process should be terminated 00258 switch (options.termination) 00259 { 00260 case 0: 00261 if (iter>=2) 00262 { 00263 if ( CMath::abs(obj-obj_old) <= options.tolerance ) 00264 done = true; 00265 } 00266 break; 00267 case 1: 00268 if (iter>=2) 00269 { 00270 if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old)) 00271 done = true; 00272 } 00273 break; 00274 case 2: 00275 if (CMath::abs(obj) <= options.tolerance) 00276 done = true; 00277 break; 00278 case 3: 00279 if (iter>=options.max_iter) 00280 done = true; 00281 break; 00282 } 00283 00284 iter++; 00285 t_old = t; 00286 t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t)); 00287 } 00288 internal::set_is_malloc_allowed(true); 00289 SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj); 00290 00291 SG_FREE(H_diag_matrix); 00292 SG_FREE(diag_H); 00293 SG_FREE(f); 00294 SG_FREE(a); 00295 SG_FREE(lb); 00296 SG_FREE(ub); 00297 SG_FREE(x); 00298 00299 SGMatrix<float64_t> tasks_w(n_feats, n_tasks); 00300 for (int i=0; i<n_feats; i++) 00301 { 00302 for (task=0; task<n_tasks; task++) 00303 tasks_w(i,task) = Wzp(i,task); 00304 } 00305 //tasks_w.display_matrix(); 00306 SGVector<float64_t> tasks_c(n_tasks); 00307 for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i]; 00308 return malsar_result_t(tasks_w, tasks_c); 00309 }; 00310 }; 00311 #endif