SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Copyright (C) 2012 Sergey Lisitsyn 00008 */ 00009 00010 #include <shogun/classifier/FeatureBlockLogisticRegression.h> 00011 #include <shogun/lib/slep/slep_solver.h> 00012 #include <shogun/lib/slep/slep_options.h> 00013 00014 #include <shogun/lib/IndexBlockGroup.h> 00015 #include <shogun/lib/IndexBlockTree.h> 00016 00017 namespace shogun 00018 { 00019 00020 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() : 00021 CLinearMachine(), 00022 m_feature_relation(NULL), m_z(0.0) 00023 { 00024 register_parameters(); 00025 } 00026 00027 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression( 00028 float64_t z, CDotFeatures* train_features, 00029 CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) : 00030 CLinearMachine(), 00031 m_feature_relation(NULL) 00032 { 00033 set_feature_relation(feature_relation); 00034 set_z(z); 00035 set_q(2.0); 00036 set_features(train_features); 00037 set_labels(train_labels); 00038 set_termination(0); 00039 set_regularization(0); 00040 set_tolerance(1e-3); 00041 set_max_iter(1000); 00042 register_parameters(); 00043 } 00044 00045 CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression() 00046 { 00047 SG_UNREF(m_feature_relation); 00048 } 00049 00050 void CFeatureBlockLogisticRegression::register_parameters() 00051 { 00052 SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE); 00053 SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE); 00054 SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE); 00055 SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE); 00056 SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE); 00057 SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE); 00058 SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE); 00059 } 00060 00061 CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const 00062 { 00063 SG_REF(m_feature_relation); 00064 return m_feature_relation; 00065 } 00066 00067 void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation) 00068 { 00069 SG_UNREF(m_feature_relation); 00070 SG_REF(feature_relation); 00071 m_feature_relation = feature_relation; 00072 } 00073 00074 int32_t CFeatureBlockLogisticRegression::get_max_iter() const 00075 { 00076 return m_max_iter; 00077 } 00078 00079 int32_t CFeatureBlockLogisticRegression::get_regularization() const 00080 { 00081 return m_regularization; 00082 } 00083 00084 int32_t CFeatureBlockLogisticRegression::get_termination() const 00085 { 00086 return m_termination; 00087 } 00088 00089 float64_t CFeatureBlockLogisticRegression::get_tolerance() const 00090 { 00091 return m_tolerance; 00092 } 00093 00094 float64_t CFeatureBlockLogisticRegression::get_z() const 00095 { 00096 return m_z; 00097 } 00098 00099 float64_t CFeatureBlockLogisticRegression::get_q() const 00100 { 00101 return m_q; 00102 } 00103 00104 void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter) 00105 { 00106 ASSERT(max_iter>=0); 00107 m_max_iter = max_iter; 00108 } 00109 00110 void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization) 00111 { 00112 ASSERT(regularization==0 || regularization==1); 00113 m_regularization = regularization; 00114 } 00115 00116 void CFeatureBlockLogisticRegression::set_termination(int32_t termination) 00117 { 00118 ASSERT(termination>=0 && termination<=4); 00119 m_termination = termination; 00120 } 00121 00122 void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance) 00123 { 00124 ASSERT(tolerance>0.0); 00125 m_tolerance = tolerance; 00126 } 00127 00128 void CFeatureBlockLogisticRegression::set_z(float64_t z) 00129 { 00130 m_z = z; 00131 } 00132 00133 void CFeatureBlockLogisticRegression::set_q(float64_t q) 00134 { 00135 m_q = q; 00136 } 00137 00138 bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data) 00139 { 00140 if (data && (CDotFeatures*)data) 00141 set_features((CDotFeatures*)data); 00142 00143 ASSERT(features); 00144 ASSERT(m_labels); 00145 00146 int32_t n_vecs = m_labels->get_num_labels(); 00147 SGVector<float64_t> y(n_vecs); 00148 for (int32_t i=0; i<n_vecs; i++) 00149 y[i] = ((CBinaryLabels*)m_labels)->get_label(i); 00150 00151 slep_options options = slep_options::default_options(); 00152 options.q = m_q; 00153 options.regularization = m_regularization; 00154 options.termination = m_termination; 00155 options.tolerance = m_tolerance; 00156 options.max_iter = m_max_iter; 00157 options.loss = LOGISTIC; 00158 00159 EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type(); 00160 switch (relation_type) 00161 { 00162 case GROUP: 00163 { 00164 CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation; 00165 SGVector<index_t> ind = feature_group->get_SLEP_ind(); 00166 options.ind = ind.vector; 00167 options.n_feature_blocks = ind.vlen-1; 00168 if (ind[ind.vlen-1] > features->get_num_vectors()) 00169 SG_ERROR("Group of features covers more vectors than available\n"); 00170 00171 options.gWeight = SG_MALLOC(double, options.n_feature_blocks); 00172 for (int32_t i=0; i<options.n_feature_blocks; i++) 00173 options.gWeight[i] = 1.0; 00174 options.mode = FEATURE_GROUP; 00175 options.loss = LOGISTIC; 00176 options.n_nodes = 0; 00177 slep_result_t result = slep_solver(features, y.vector, m_z, options); 00178 00179 SG_FREE(options.gWeight); 00180 int32_t n_feats = features->get_dim_feature_space(); 00181 SGVector<float64_t> new_w(n_feats); 00182 for (int i=0; i<n_feats; i++) 00183 new_w[i] = result.w[i]; 00184 set_bias(result.c[0]); 00185 00186 w = new_w; 00187 } 00188 break; 00189 case TREE: 00190 { 00191 CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation; 00192 00193 SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t(); 00194 SGVector<float64_t> G; 00195 if (feature_tree->is_general()) 00196 { 00197 G = feature_tree->get_SLEP_G(); 00198 options.general = true; 00199 } 00200 options.ind_t = ind_t.vector; 00201 options.G = G.vector; 00202 options.n_nodes = ind_t.vlen/3; 00203 options.n_feature_blocks = ind_t.vlen/3; 00204 options.mode = FEATURE_TREE; 00205 options.loss = LOGISTIC; 00206 00207 slep_result_t result = slep_solver(features, y.vector, m_z, options); 00208 00209 int32_t n_feats = features->get_dim_feature_space(); 00210 SGVector<float64_t> new_w(n_feats); 00211 for (int i=0; i<n_feats; i++) 00212 new_w[i] = result.w[i]; 00213 00214 set_bias(result.c[0]); 00215 00216 w = new_w; 00217 } 00218 break; 00219 default: 00220 SG_ERROR("Not supported feature relation type\n"); 00221 } 00222 00223 return true; 00224 } 00225 00226 float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx) 00227 { 00228 return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias)); 00229 } 00230 00231 SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data) 00232 { 00233 if (data) 00234 { 00235 if (!data->has_property(FP_DOT)) 00236 SG_ERROR("Specified features are not of type CDotFeatures\n"); 00237 00238 set_features((CDotFeatures*) data); 00239 } 00240 00241 if (!features) 00242 return SGVector<float64_t>(); 00243 00244 int32_t num=features->get_num_vectors(); 00245 ASSERT(num>0); 00246 ASSERT(w.vlen==features->get_dim_feature_space()); 00247 00248 float64_t* out=SG_MALLOC(float64_t, num); 00249 features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias); 00250 for (int32_t i=0; i<num; i++) 00251 out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0; 00252 return SGVector<float64_t>(out,num); 00253 } 00254 00255 }