SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Sergey Lisitsyn 00008 * Copyright (C) 2012 Jiayu Zhou and Jieping Ye 00009 */ 00010 00011 #include <shogun/lib/malsar/malsar_joint_feature_learning.h> 00012 #ifdef HAVE_EIGEN3 00013 #include <shogun/lib/Signal.h> 00014 #include <shogun/mathematics/Math.h> 00015 #include <shogun/mathematics/eigen3.h> 00016 #include <iostream> 00017 00018 using namespace Eigen; 00019 using namespace std; 00020 00021 namespace shogun 00022 { 00023 00024 malsar_result_t malsar_joint_feature_learning( 00025 CDotFeatures* features, 00026 double* y, 00027 double rho1, 00028 double rho2, 00029 const malsar_options& options) 00030 { 00031 int task; 00032 int n_feats = features->get_dim_feature_space(); 00033 SG_SDEBUG("n feats = %d\n", n_feats); 00034 int n_vecs = features->get_num_vectors(); 00035 SG_SDEBUG("n vecs = %d\n", n_vecs); 00036 int n_tasks = options.n_tasks; 00037 SG_SDEBUG("n tasks = %d\n", n_tasks); 00038 00039 int iter = 0; 00040 00041 // initialize weight vector and bias for each task 00042 MatrixXd Ws = MatrixXd::Zero(n_feats, n_tasks); 00043 VectorXd Cs = VectorXd::Zero(n_tasks); 00044 MatrixXd Wz=Ws, Wzp=Ws, Wz_old=Ws, delta_Wzp=Ws, gWs=Ws; 00045 VectorXd Cz=Cs, Czp=Cs, Cz_old=Cs, delta_Czp=Cs, gCs=Cs; 00046 00047 double t=1, t_old=0; 00048 double gamma=1, gamma_inc=2; 00049 double obj=0.0, obj_old=0.0; 00050 00051 internal::set_is_malloc_allowed(false); 00052 bool done = false; 00053 while (!done && iter <= options.max_iter && !CSignal::cancel_computations()) 00054 { 00055 double alpha = double(t_old - 1)/t; 00056 00057 // compute search point 00058 Ws = (1+alpha)*Wz - alpha*Wz_old; 00059 Cs = (1+alpha)*Cz - alpha*Cz_old; 00060 00061 // zero gradient 00062 gWs.setZero(); 00063 gCs.setZero(); 00064 00065 // compute gradient and objective at search point 00066 double Fs = 0; 00067 for (task=0; task<n_tasks; task++) 00068 { 00069 SGVector<index_t> task_idx = options.tasks_indices[task]; 00070 int n_task_vecs = task_idx.vlen; 00071 for (int i=0; i<n_task_vecs; i++) 00072 { 00073 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Ws.col(task).data(), n_feats)+Cs[task]); 00074 double bb = CMath::max(aa,0.0); 00075 00076 // avoid underflow when computing exponential loss 00077 Fs += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs; 00078 double b = -y[task_idx[i]]*(1 - 1/(1+CMath::exp(aa)))/n_task_vecs; 00079 00080 gCs[task] += b; 00081 features->add_to_dense_vec(b, task_idx[i], gWs.col(task).data(), n_feats); 00082 } 00083 } 00084 gWs.noalias() += 2*rho2*Ws; 00085 00086 // add regularizer 00087 Fs += Ws.squaredNorm(); 00088 00089 //cout << "gWs" << endl << gWs << endl; 00090 //cout << "gCs" << endl << gCs << endl; 00091 //SG_SPRINT("Fs = %f\n",Fs); 00092 00093 double Fzp = 0.0; 00094 00095 int inner_iter = 0; 00096 // line search, Armijo-Goldstein scheme 00097 while (inner_iter <= 1000) 00098 { 00099 // compute lasso projection of Ws - gWs/gamma 00100 for (int i=0; i<n_feats; i++) 00101 { 00102 Wzp.row(i).noalias() = Ws.row(i) - gWs.row(i)/gamma; 00103 double norm = Wzp.row(i).lpNorm<2>(); 00104 if (norm == 0.0) 00105 Wzp.row(i).setZero(); 00106 else 00107 { 00108 double threshold = norm - rho1/gamma; 00109 if (threshold < 0.0) 00110 Wzp.row(i).setZero(); 00111 else 00112 Wzp.row(i) *= threshold/norm; 00113 } 00114 } 00115 // walk in direction of antigradient 00116 Czp = Cs - gCs/gamma; 00117 00118 // compute objective at line search point 00119 Fzp = 0.0; 00120 for (task=0; task<n_tasks; task++) 00121 { 00122 SGVector<index_t> task_idx = options.tasks_indices[task]; 00123 int n_task_vecs = task_idx.vlen; 00124 for (int i=0; i<n_task_vecs; i++) 00125 { 00126 double aa = -y[task_idx[i]]*(features->dense_dot(task_idx[i], Wzp.col(task).data(), n_feats)+Czp[task]); 00127 double bb = CMath::max(aa,0.0); 00128 00129 Fzp += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/n_task_vecs; 00130 } 00131 } 00132 Fzp += rho2*Wzp.squaredNorm(); 00133 00134 // compute delta between line search point and search point 00135 delta_Wzp = Wzp - Ws; 00136 delta_Czp = Czp - Cs; 00137 00138 // norms of delta 00139 double nrm_delta_Wzp = delta_Wzp.squaredNorm(); 00140 double nrm_delta_Czp = delta_Czp.squaredNorm(); 00141 00142 double r_sum = (nrm_delta_Wzp + nrm_delta_Czp)/2; 00143 00144 double Fzp_gamma = Fs + (delta_Wzp.transpose()*gWs).trace() + 00145 (delta_Czp.transpose()*gCs).trace() + 00146 (gamma/2)*nrm_delta_Wzp + 00147 (gamma/2)*nrm_delta_Czp; 00148 00149 // break if delta is getting too small 00150 if (r_sum <= 1e-20) 00151 { 00152 SG_SDEBUG("Line search point is too close to search point\n"); 00153 done = true; 00154 break; 00155 } 00156 00157 // break if objective at line searc point is smaller than Fzp_gamma 00158 if (Fzp <= Fzp_gamma) 00159 break; 00160 else 00161 gamma *= gamma_inc; 00162 00163 inner_iter++; 00164 } 00165 00166 Wz_old = Wz; 00167 Cz_old = Cz; 00168 Wz = Wzp; 00169 Cz = Czp; 00170 00171 // compute objective value 00172 obj_old = obj; 00173 obj = Fzp; 00174 for (int i=0; i<n_feats; i++) 00175 obj += rho1*(Wz.row(i).lpNorm<2>()); 00176 //for (task=0; task<n_tasks; task++) 00177 // obj += rho1*(Wz.col(task).norm()); 00178 SG_SDEBUG("Obj = %f\n",obj); 00179 //SG_SABS_PROGRESS(obj,0.0); 00180 // check if process should be terminated 00181 switch (options.termination) 00182 { 00183 case 0: 00184 if (iter>=2) 00185 { 00186 if ( CMath::abs(obj-obj_old) <= options.tolerance ) 00187 { 00188 SG_SDEBUG("Objective changes less than tolerance\n"); 00189 done = true; 00190 } 00191 } 00192 break; 00193 case 1: 00194 if (iter>=2) 00195 { 00196 if ( CMath::abs(obj-obj_old) <= options.tolerance*CMath::abs(obj_old)) 00197 done = true; 00198 } 00199 break; 00200 case 2: 00201 if (CMath::abs(obj) <= options.tolerance) 00202 done = true; 00203 break; 00204 case 3: 00205 if (iter>=options.max_iter) 00206 done = true; 00207 break; 00208 } 00209 00210 iter++; 00211 t_old = t; 00212 t = 0.5 * (1 + CMath::sqrt(1.0 + 4*t*t)); 00213 } 00214 internal::set_is_malloc_allowed(true); 00215 SG_SDONE(); 00216 SG_SDEBUG("%d iteration passed, objective = %f\n",iter,obj); 00217 00218 SGMatrix<float64_t> tasks_w(n_feats, n_tasks); 00219 for (int i=0; i<n_feats; i++) 00220 { 00221 for (task=0; task<n_tasks; task++) 00222 tasks_w(i,task) = Wzp(i,task); 00223 } 00224 SGVector<float64_t> tasks_c(n_tasks); 00225 for (int i=0; i<n_tasks; i++) tasks_c[i] = Czp[i]; 00226 return malsar_result_t(tasks_w, tasks_c); 00227 }; 00228 }; 00229 #endif