SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Written (W) 2011-2012 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 */ 00012 00013 #include <shogun/features/Features.h> 00014 #include <shogun/preprocessor/Preprocessor.h> 00015 #include <shogun/io/SGIO.h> 00016 #include <shogun/base/Parameter.h> 00017 00018 #include <string.h> 00019 00020 using namespace shogun; 00021 00022 CFeatures::CFeatures(int32_t size) 00023 : CSGObject() 00024 { 00025 init(); 00026 cache_size = size; 00027 } 00028 00029 CFeatures::CFeatures(const CFeatures& orig) 00030 : CSGObject(orig) 00031 { 00032 init(); 00033 00034 preproc = orig.preproc; 00035 num_preproc = orig.num_preproc; 00036 00037 preprocessed=SG_MALLOC(bool, orig.num_preproc); 00038 memcpy(preprocessed, orig.preprocessed, sizeof(bool)*orig.num_preproc); 00039 } 00040 00041 CFeatures::CFeatures(CFile* loader) 00042 : CSGObject() 00043 { 00044 init(); 00045 00046 load(loader); 00047 SG_INFO("Feature object loaded (%p)\n",this) ; 00048 } 00049 00050 CFeatures::~CFeatures() 00051 { 00052 clean_preprocessors(); 00053 SG_UNREF(m_subset_stack); 00054 } 00055 00056 void CFeatures::init() 00057 { 00058 SG_ADD(&properties, "properties", "Feature properties", MS_NOT_AVAILABLE); 00059 SG_ADD(&cache_size, "cache_size", "Size of cache in MB", MS_NOT_AVAILABLE); 00060 00061 /* TODO, use SGVector for arrays to be able to use SG_ADD macro */ 00062 m_parameters->add_vector((CSGObject***) &preproc, &num_preproc, "preproc", 00063 "List of preprocessors"); 00064 m_parameters->add_vector(&preprocessed, &num_preproc, "preprocessed", 00065 "Feature[i] is already preprocessed"); 00066 00067 SG_ADD((CSGObject**)&m_subset_stack, "subset_stack", "Stack of subsets", 00068 MS_NOT_AVAILABLE); 00069 00070 m_subset_stack=new CSubsetStack(); 00071 SG_REF(m_subset_stack); 00072 00073 properties = FP_NONE; 00074 cache_size = 0; 00075 preproc = NULL; 00076 num_preproc = 0; 00077 preprocessed = NULL; 00078 } 00079 00081 int32_t CFeatures::add_preprocessor(CPreprocessor* p) 00082 { 00083 SG_INFO( "%d preprocs currently, new preproc list is\n", num_preproc); 00084 ASSERT(p); 00085 00086 bool* preprocd=SG_MALLOC(bool, num_preproc+1); 00087 CPreprocessor** pps=SG_MALLOC(CPreprocessor*, num_preproc+1); 00088 for (int32_t i=0; i<num_preproc; i++) 00089 { 00090 pps[i]=preproc[i]; 00091 preprocd[i]=preprocessed[i]; 00092 } 00093 SG_FREE(preproc); 00094 SG_FREE(preprocessed); 00095 preproc=pps; 00096 preprocessed=preprocd; 00097 preproc[num_preproc]=p; 00098 preprocessed[num_preproc]=false; 00099 00100 num_preproc++; 00101 00102 for (int32_t i=0; i<num_preproc; i++) 00103 SG_INFO( "preproc[%d]=%s %ld\n",i, preproc[i]->get_name(), preproc[i]) ; 00104 00105 SG_REF(p); 00106 00107 return num_preproc; 00108 } 00109 00111 CPreprocessor* CFeatures::get_preprocessor(int32_t num) const 00112 { 00113 if (num<num_preproc) 00114 { 00115 SG_REF(preproc[num]); 00116 return preproc[num]; 00117 } 00118 else 00119 return NULL; 00120 } 00121 00123 int32_t CFeatures::get_num_preprocessed() const 00124 { 00125 int32_t num=0; 00126 00127 for (int32_t i=0; i<num_preproc; i++) 00128 { 00129 if (preprocessed[i]) 00130 num++; 00131 } 00132 00133 return num; 00134 } 00135 00137 void CFeatures::clean_preprocessors() 00138 { 00139 while (del_preprocessor(0)); 00140 } 00141 00143 CPreprocessor* CFeatures::del_preprocessor(int32_t num) 00144 { 00145 CPreprocessor** pps=NULL; 00146 bool* preprocd=NULL; 00147 CPreprocessor* removed_preproc=NULL; 00148 00149 if (num_preproc>0 && num<num_preproc) 00150 { 00151 removed_preproc=preproc[num]; 00152 00153 if (num_preproc>1) 00154 { 00155 pps= SG_MALLOC(CPreprocessor*, num_preproc-1); 00156 preprocd= SG_MALLOC(bool, num_preproc-1); 00157 00158 if (pps && preprocd) 00159 { 00160 int32_t j=0; 00161 for (int32_t i=0; i<num_preproc; i++) 00162 { 00163 if (i!=num) 00164 { 00165 pps[j]=preproc[i]; 00166 preprocd[j]=preprocessed[i]; 00167 j++; 00168 } 00169 } 00170 } 00171 } 00172 00173 SG_FREE(preproc); 00174 preproc=pps; 00175 SG_FREE(preprocessed); 00176 preprocessed=preprocd; 00177 00178 num_preproc--; 00179 00180 for (int32_t i=0; i<num_preproc; i++) 00181 SG_INFO( "preproc[%d]=%s\n",i, preproc[i]->get_name()) ; 00182 } 00183 00184 SG_UNREF(removed_preproc); 00185 return removed_preproc; 00186 } 00187 00188 void CFeatures::set_preprocessed(int32_t num) 00189 { 00190 preprocessed[num]=true; 00191 } 00192 00193 bool CFeatures::is_preprocessed(int32_t num) const 00194 { 00195 return preprocessed[num]; 00196 } 00197 00198 int32_t CFeatures::get_num_preprocessors() const 00199 { 00200 return num_preproc; 00201 } 00202 00203 int32_t CFeatures::get_cache_size() const 00204 { 00205 return cache_size; 00206 } 00207 00208 bool CFeatures::reshape(int32_t num_features, int32_t num_vectors) 00209 { 00210 SG_NOTIMPLEMENTED; 00211 return false; 00212 } 00213 00214 void CFeatures::list_feature_obj() const 00215 { 00216 SG_INFO( "%p - ", this); 00217 switch (get_feature_class()) 00218 { 00219 case C_UNKNOWN: 00220 SG_INFO( "C_UNKNOWN "); 00221 break; 00222 case C_DENSE: 00223 SG_INFO( "C_DENSE "); 00224 break; 00225 case C_SPARSE: 00226 SG_INFO( "C_SPARSE "); 00227 break; 00228 case C_STRING: 00229 SG_INFO( "C_STRING "); 00230 break; 00231 case C_COMBINED: 00232 SG_INFO( "C_COMBINED "); 00233 break; 00234 case C_COMBINED_DOT: 00235 SG_INFO( "C_COMBINED_DOT "); 00236 break; 00237 case C_WD: 00238 SG_INFO( "C_WD "); 00239 break; 00240 case C_SPEC: 00241 SG_INFO( "C_SPEC "); 00242 break; 00243 case C_WEIGHTEDSPEC: 00244 SG_INFO( "C_WEIGHTEDSPEC "); 00245 break; 00246 case C_STREAMING_DENSE: 00247 SG_INFO( "C_STREAMING_DENSE "); 00248 break; 00249 case C_STREAMING_SPARSE: 00250 SG_INFO( "C_STREAMING_SPARSE "); 00251 break; 00252 case C_STREAMING_STRING: 00253 SG_INFO( "C_STREAMING_STRING "); 00254 break; 00255 case C_STREAMING_VW: 00256 SG_INFO( "C_STREAMING_VW "); 00257 break; 00258 case C_ANY: 00259 SG_INFO( "C_ANY "); 00260 break; 00261 default: 00262 SG_ERROR( "ERROR UNKNOWN FEATURE CLASS"); 00263 } 00264 00265 switch (get_feature_type()) 00266 { 00267 case F_UNKNOWN: 00268 SG_INFO( "F_UNKNOWN \n"); 00269 break; 00270 case F_CHAR: 00271 SG_INFO( "F_CHAR \n"); 00272 break; 00273 case F_BYTE: 00274 SG_INFO( "F_BYTE \n"); 00275 break; 00276 case F_SHORT: 00277 SG_INFO( "F_SHORT \n"); 00278 break; 00279 case F_WORD: 00280 SG_INFO( "F_WORD \n"); 00281 break; 00282 case F_INT: 00283 SG_INFO( "F_INT \n"); 00284 break; 00285 case F_UINT: 00286 SG_INFO( "F_UINT \n"); 00287 break; 00288 case F_LONG: 00289 SG_INFO( "F_LONG \n"); 00290 break; 00291 case F_ULONG: 00292 SG_INFO( "F_ULONG \n"); 00293 break; 00294 case F_SHORTREAL: 00295 SG_INFO( "F_SHORTEAL \n"); 00296 break; 00297 case F_DREAL: 00298 SG_INFO( "F_DREAL \n"); 00299 break; 00300 case F_LONGREAL: 00301 SG_INFO( "F_LONGREAL \n"); 00302 break; 00303 case F_ANY: 00304 SG_INFO( "F_ANY \n"); 00305 break; 00306 default: 00307 SG_ERROR( "ERROR UNKNOWN FEATURE TYPE\n"); 00308 } 00309 } 00310 00311 00312 void CFeatures::load(CFile* loader) 00313 { 00314 SG_SET_LOCALE_C; 00315 SG_NOTIMPLEMENTED; 00316 SG_RESET_LOCALE; 00317 } 00318 00319 void CFeatures::save(CFile* writer) 00320 { 00321 SG_SET_LOCALE_C; 00322 SG_NOTIMPLEMENTED; 00323 SG_RESET_LOCALE; 00324 } 00325 00326 bool CFeatures::check_feature_compatibility(CFeatures* f) const 00327 { 00328 bool result=false; 00329 00330 if (f) 00331 result= ( (this->get_feature_class() == f->get_feature_class()) && 00332 (this->get_feature_type() == f->get_feature_type())); 00333 return result; 00334 } 00335 00336 bool CFeatures::has_property(EFeatureProperty p) const 00337 { 00338 return (properties & p) != 0; 00339 } 00340 00341 void CFeatures::set_property(EFeatureProperty p) 00342 { 00343 properties |= p; 00344 } 00345 00346 void CFeatures::unset_property(EFeatureProperty p) 00347 { 00348 properties &= (properties | p) ^ p; 00349 } 00350 00351 void CFeatures::add_subset(SGVector<index_t> subset) 00352 { 00353 m_subset_stack->add_subset(subset); 00354 subset_changed_post(); 00355 } 00356 00357 void CFeatures::remove_subset() 00358 { 00359 m_subset_stack->remove_subset(); 00360 subset_changed_post(); 00361 } 00362 00363 void CFeatures::remove_all_subsets() 00364 { 00365 m_subset_stack->remove_all_subsets(); 00366 subset_changed_post(); 00367 } 00368 00369 CSubsetStack* CFeatures::get_subset_stack() 00370 { 00371 return m_subset_stack; 00372 } 00373 00374 CFeatures* CFeatures::copy_subset(SGVector<index_t> indices) 00375 { 00376 SG_ERROR("%s::copy_subset(): copy_subset and therefore model storage of " 00377 "CMachine (required for cross-validation and model-selection is " 00378 "not yet implemented yet. Ask developers!\n", get_name()); 00379 return NULL; 00380 }