SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Sergey Lisitsyn 00008 * Copyright (C) 2012 Jiayu Zhou and Jieping Ye 00009 */ 00010 00011 #include <shogun/lib/malsar/malsar_low_rank.h> 00012 #ifdef HAVE_EIGEN3 00013 #include <shogun/mathematics/eigen3.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <iostream> 00016 00017 using namespace Eigen; 00018 00019 namespace shogun 00020 { 00021 00022 malsar_result_t malsar_low_rank( 00023 CDotFeatures* features, 00024 double* y, 00025 double rho, 00026 const malsar_options& options) 00027 { 00028 int task; 00029 int n_feats = features->get_dim_feature_space(); 00030 SG_SDEBUG("n feats = %d\n", n_feats); 00031 int n_vecs = features->get_num_vectors(); 00032 SG_SDEBUG("n vecs = %d\n", n_vecs); 00033 int n_tasks = options.n_tasks; 00034 SG_SDEBUG("n tasks = %d\n", n_tasks); 00035 00036 int iter = 0; 00037 00038 // initialize weight vector and bias for each task 00039 MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks); 00040 VectorXd Cs = VectorXd::Zero(n_tasks); 00041 MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws; 00042 VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs; 00043 00044 double t=1, t_old=0; 00045 double gamma=1, gamma_inc=2; 00046 double obj=0.0, obj_old=0.0; 00047 00048 double rho_L2 = 0.0; 00049 00050 internal::set_is_malloc_allowed(false); 00051 bool done = false; 00052 while (!done && iter <= options.max_iter) 00053 { 00054 double alpha = double(t_old - 1)/t; 00055 00056 // compute search point 00057 Ws = (1+alpha)*Wz - alpha*Wz_old; 00058 Cs = (1+alpha)*Cz - alpha*Cz_old; 00059 00060 // zero gradient 00061 gWs.setZero(); 00062 gCs.setZero(); 00063 00064 // compute gradient and objective at search point 00065 double Fs = 0; 00066 for (task=0; task<n_tasks; task++) 00067 { 00068 SGVector<index_t> task_idx = options.tasks_indices[task]; 00069 int n_task_vecs = task_idx.vlen; 00070 for (int i=0; i<n_task_vecs; i++) 00071 { 00072 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]); 00073 double bb = CMath::max(aa,0.0); 00074 00075 // avoid underflow when computing exponential loss 00076 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs; 00077 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs; 00078 00079 gCs[task] += b; 00080 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats); 00081 } 00082 } 00083 gWs.noalias() += 2*rho_L2*Ws; 00084 //SG_SDEBUG("gWs=%f\n",gWs.squaredNorm()); 00085 00086 // add regularizer 00087 Fs += rho_L2*Ws.squaredNorm(); 00088 00089 double Fzp = 0.0; 00090 00091 int inner_iter = 0; 00092 // line search, Armijo-Goldstein scheme 00093 while (inner_iter <= 1000) 00094 { 00095 // compute trace projection of Ws - gWs/gamma with 2*rho/gamma 00096 internal::set_is_malloc_allowed(true); 00097 Wzp.setZero(); 00098 JacobiSVD<MatrixXd> svd((Ws - gWs/gamma).transpose(),ComputeThinU | ComputeThinV); 00099 for (int i=0; i<svd.singularValues().size(); i++) 00100 { 00101 if (svd.singularValues()[i] > rho/gamma) 00102 Wzp += (svd.matrixU().col(i)* 00103 svd.singularValues()[i]* 00104 svd.matrixV().col(i).transpose()).transpose(); 00105 } 00106 internal::set_is_malloc_allowed(false); 00107 // walk in direction of antigradient 00108 Czp = Cs - gCs/gamma; 00109 00110 // compute objective at line search point 00111 Fzp = 0.0; 00112 for (task=0; task<n_tasks; task++) 00113 { 00114 SGVector<index_t> task_idx = options.tasks_indices[task]; 00115 int n_task_vecs = task_idx.vlen; 00116 for (int i=0; i<n_task_vecs; i++) 00117 { 00118 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]); 00119 double bb = CMath::max(aa,0.0); 00120 00121 Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs; 00122 } 00123 } 00124 Fzp += rho_L2*Wzp.squaredNorm(); 00125 00126 // compute delta between line search point and search point 00127 delta_Wzp = Wzp - Ws; 00128 delta_Czp = Czp - Cs; 00129 00130 // norms of delta 00131 double nrm_delta_Wzp = delta_Wzp.squaredNorm(); 00132 double nrm_delta_Czp = delta_Czp.squaredNorm(); 00133 00134 double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2; 00135 00136 double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() + 00137 (delta_Czp.transpose()*gCs).trace() + 00138 (gamma/2)*nrm_delta_Wzp + 00139 (gamma/2)*nrm_delta_Czp; 00140 00141 // break if delta is getting too small 00142 if (r_sum <= 1e-20) 00143 { 00144 done = true; 00145 break; 00146 } 00147 00148 // break if objective at line search point is smaller than Fzp_gamma 00149 if (Fzp <= Fzp_gamma) 00150 break; 00151 else 00152 gamma *= gamma_inc; 00153 } 00154 00155 Wz_old = Wz; 00156 Cz_old = Cz; 00157 Wz = Wzp; 00158 Cz = Czp; 00159 00160 // compute objective value 00161 obj_old = obj; 00162 obj = Fzp; 00163 internal::set_is_malloc_allowed(true); 00164 JacobiSVD<MatrixXd> svd(Wzp, EigenvaluesOnly); 00165 obj += rho*svd.singularValues().sum(); 00166 internal::set_is_malloc_allowed(false); 00167 00168 00169 // check if process should be terminated 00170 switch (options.termination) 00171 { 00172 case 0: 00173 if (iter>=2) 00174 { 00175 if ( CMath::abs(obj-obj_old) <= options.tolerance ) 00176 done = true; 00177 } 00178 break; 00179 case 1: 00180 if (iter>=2) 00181 { 00182 if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old)) 00183 done = true; 00184 } 00185 break; 00186 case 2: 00187 if (CMath::abs(obj) <= options.tolerance) 00188 done = true; 00189 break; 00190 case 3: 00191 if (iter>=options.max_iter) 00192 done = true; 00193 break; 00194 } 00195 00196 iter++; 00197 t_old = t; 00198 t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t)); 00199 } 00200 internal::set_is_malloc_allowed(true); 00201 SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj); 00202 00203 SGMatrix<float64_t> tasks_w(n_feats, n_tasks); 00204 for (int i=0; i<n_feats; i++) 00205 { 00206 for (task=0; task<n_tasks; task++) 00207 tasks_w(i,task) = Wzp(i,task); 00208 } 00209 SGVector<float64_t> tasks_c(n_tasks); 00210 for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i]; 00211 return malsar_result_t(tasks_w, tasks_c); 00212 }; 00213 }; 00214 #endif