SHOGUN
v2.0.0
|
00001 /* 00002 SVM with Quasi-Newton stochastic gradient 00003 Copyright (C) 2009- Antoine Bordes 00004 00005 This program is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Lesser General Public 00007 License as published by the Free Software Foundation; either 00008 version 2.1 of the License, or (at your option) any later version. 00009 00010 This program is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 GNU General Public License for more details. 00014 00015 You should have received a copy of the GNU General Public License 00016 along with this program; if not, write to the Free Software 00017 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA 00018 00019 Shogun adjustments (w) 2011 Siddharth Kherada 00020 */ 00021 00022 #include <shogun/classifier/svm/SGDQN.h> 00023 #include <shogun/base/Parameter.h> 00024 #include <shogun/lib/Signal.h> 00025 #include <shogun/mathematics/Math.h> 00026 #include <shogun/loss/HingeLoss.h> 00027 #include <shogun/labels/BinaryLabels.h> 00028 00029 using namespace shogun; 00030 00031 CSGDQN::CSGDQN() 00032 : CLinearMachine() 00033 { 00034 init(); 00035 } 00036 00037 CSGDQN::CSGDQN(float64_t C) 00038 : CLinearMachine() 00039 { 00040 init(); 00041 00042 C1=C; 00043 C2=C; 00044 } 00045 00046 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab) 00047 : CLinearMachine() 00048 { 00049 init(); 00050 C1=C; 00051 C2=C; 00052 00053 set_features(traindat); 00054 set_labels(trainlab); 00055 } 00056 00057 CSGDQN::~CSGDQN() 00058 { 00059 SG_UNREF(loss); 00060 } 00061 00062 void CSGDQN::set_loss_function(CLossFunction* loss_func) 00063 { 00064 if (loss) 00065 SG_UNREF(loss); 00066 loss=loss_func; 00067 SG_REF(loss); 00068 } 00069 00070 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val) 00071 { 00072 for (int32_t i=0; i < dim;i++) 00073 { 00074 float64_t diffw=W_1[i]-W[i]; 00075 if(diffw) 00076 B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]); 00077 else 00078 B[i]+=1/lambda; 00079 } 00080 } 00081 00082 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2) 00083 { 00084 for (int32_t i=0; i < dim;i++) 00085 { 00086 if(B[i]) 00087 { 00088 Bc[i] = Bc[i] * c1 + B[i] * c2; 00089 Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2); 00090 } 00091 } 00092 } 00093 00094 bool CSGDQN::train(CFeatures* data) 00095 { 00096 00097 ASSERT(m_labels); 00098 ASSERT(m_labels->get_label_type() == LT_BINARY); 00099 00100 if (data) 00101 { 00102 if (!data->has_property(FP_DOT)) 00103 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00104 set_features((CDotFeatures*) data); 00105 } 00106 00107 ASSERT(features); 00108 00109 int32_t num_train_labels=m_labels->get_num_labels(); 00110 int32_t num_vec=features->get_num_vectors(); 00111 00112 ASSERT(num_vec==num_train_labels); 00113 ASSERT(num_vec>0); 00114 00115 w=SGVector<float64_t>(features->get_dim_feature_space()); 00116 w.zero(); 00117 00118 float64_t lambda= 1.0/(C1*num_vec); 00119 00120 // Shift t in order to have a 00121 // reasonable initial learning rate. 00122 // This assumes |x| \approx 1. 00123 float64_t maxw = 1.0 / sqrt(lambda); 00124 float64_t typw = sqrt(maxw); 00125 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1)); 00126 t = 1 / (eta0 * lambda); 00127 00128 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0); 00129 00130 00131 float64_t* Bc=SG_MALLOC(float64_t, w.vlen); 00132 SGVector<float64_t>::fill_vector(Bc, w.vlen, 1/lambda); 00133 00134 float64_t* result=SG_MALLOC(float64_t, w.vlen); 00135 float64_t* B=SG_MALLOC(float64_t, w.vlen); 00136 00137 //Calibrate 00138 calibrate(); 00139 00140 SG_INFO("Training on %d vectors\n", num_vec); 00141 CSignal::clear_cancel(); 00142 00143 ELossType loss_type = loss->get_loss_type(); 00144 bool is_log_loss = false; 00145 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN)) 00146 is_log_loss = true; 00147 00148 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++) 00149 { 00150 count = skip; 00151 bool updateB=false; 00152 for (int32_t i=0; i<num_vec; i++) 00153 { 00154 SGVector<float64_t> v = features->get_computed_dot_feature_vector(i); 00155 ASSERT(w.vlen==v.vlen); 00156 float64_t eta = 1.0/t; 00157 float64_t y = ((CBinaryLabels*) m_labels)->get_label(i); 00158 float64_t z = y * features->dense_dot(i, w.vector, w.vlen); 00159 if(updateB==true) 00160 { 00161 if (z < 1 || is_log_loss) 00162 { 00163 SGVector<float64_t> w_1=w.clone(); 00164 float64_t loss_1=-loss->first_derivative(z,1); 00165 SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen); 00166 SGVector<float64_t>::add(w.vector,eta*loss_1*y,result,1.0,w.vector,w.vlen); 00167 float64_t z2 = y * features->dense_dot(i, w.vector, w.vlen); 00168 float64_t diffloss = -loss->first_derivative(z2,1) - loss_1; 00169 if(diffloss) 00170 { 00171 compute_ratio(w.vector,w_1.vector,B,v.vector,w.vlen,lambda,y*diffloss); 00172 if(t>skip) 00173 combine_and_clip(Bc,B,w.vlen,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda); 00174 else 00175 combine_and_clip(Bc,B,w.vlen,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda); 00176 } 00177 } 00178 updateB=false; 00179 } 00180 else 00181 { 00182 if(--count<=0) 00183 { 00184 SGVector<float64_t>::vector_multiply(result,Bc,w.vector,w.vlen); 00185 SGVector<float64_t>::add(w.vector,-skip*lambda*eta,result,1.0,w.vector,w.vlen); 00186 count = skip; 00187 updateB=true; 00188 } 00189 00190 if (z < 1 || is_log_loss) 00191 { 00192 SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen); 00193 SGVector<float64_t>::add(w.vector,eta*-loss->first_derivative(z,1)*y,result,1.0,w.vector,w.vlen); 00194 } 00195 } 00196 t++; 00197 } 00198 } 00199 SG_FREE(result); 00200 SG_FREE(B); 00201 00202 return true; 00203 } 00204 00205 00206 00207 void CSGDQN::calibrate() 00208 { 00209 ASSERT(features); 00210 int32_t num_vec=features->get_num_vectors(); 00211 int32_t c_dim=features->get_dim_feature_space(); 00212 00213 ASSERT(num_vec>0); 00214 ASSERT(c_dim>0); 00215 00216 SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim); 00217 00218 int32_t n = 0; 00219 float64_t r = 0; 00220 00221 for (int32_t j=0; j<num_vec ; j++, n++) 00222 r += features->get_nnz_features_for_vector(j); 00223 00224 00225 // compute weight decay skip 00226 skip = (int32_t) ((16 * n * c_dim) / r); 00227 } 00228 00229 void CSGDQN::init() 00230 { 00231 t=0; 00232 C1=1; 00233 C2=1; 00234 epochs=5; 00235 skip=1000; 00236 count=1000; 00237 00238 loss=new CHingeLoss(); 00239 SG_REF(loss); 00240 00241 m_parameters->add(&C1, "C1", "Cost constant 1."); 00242 m_parameters->add(&C2, "C2", "Cost constant 2."); 00243 m_parameters->add(&epochs, "epochs", "epochs"); 00244 m_parameters->add(&skip, "skip", "skip"); 00245 m_parameters->add(&count, "count", "count"); 00246 }