SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Soeren Sonnenburg 00008 * Copyright (C) 2010 Berlin Institute of Technology 00009 */ 00010 #ifndef __BINARY_FILE_H__ 00011 #define __BINARY_FILE_H__ 00012 00013 #include <shogun/lib/config.h> 00014 #include <shogun/lib/common.h> 00015 #include <shogun/base/SGObject.h> 00016 #include <shogun/io/SGIO.h> 00017 #include <shogun/io/SimpleFile.h> 00018 #include <shogun/io/File.h> 00019 00020 namespace shogun 00021 { 00028 class CBinaryFile: public CFile 00029 { 00030 public: 00032 CBinaryFile(); 00033 00039 CBinaryFile(FILE* f, const char* name=NULL); 00040 00047 CBinaryFile(const char* fname, char rw='r', const char* name=NULL); 00048 00050 virtual ~CBinaryFile(); 00051 00059 virtual void get_vector(uint8_t*& vector, int32_t& len); 00060 virtual void get_vector(char*& vector, int32_t& len); 00061 virtual void get_vector(int32_t*& vector, int32_t& len); 00062 virtual void get_vector(float64_t*& vector, int32_t& len); 00063 virtual void get_vector(float32_t*& vector, int32_t& len); 00064 virtual void get_vector(int16_t*& vector, int32_t& len); 00065 virtual void get_vector(uint16_t*& vector, int32_t& len); 00067 00076 virtual void get_matrix( 00077 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00078 virtual void get_int8_matrix( 00079 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00080 virtual void get_matrix( 00081 char*& matrix, int32_t& num_feat, int32_t& num_vec); 00082 virtual void get_matrix( 00083 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00084 virtual void get_uint_matrix( 00085 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00086 virtual void get_long_matrix( 00087 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00088 virtual void get_ulong_matrix( 00089 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00090 virtual void get_matrix( 00091 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00092 virtual void get_matrix( 00093 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00094 virtual void get_longreal_matrix( 00095 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00096 virtual void get_matrix( 00097 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00098 virtual void get_matrix( 00099 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00101 00110 virtual void get_ndarray( 00111 uint8_t*& array, int32_t*& dims, int32_t& num_dims); 00112 virtual void get_ndarray( 00113 char*& array, int32_t*& dims, int32_t& num_dims); 00114 virtual void get_ndarray( 00115 int32_t*& array, int32_t*& dims, int32_t& num_dims); 00116 virtual void get_ndarray( 00117 float32_t*& array, int32_t*& dims, int32_t& num_dims); 00118 virtual void get_ndarray( 00119 float64_t*& array, int32_t*& dims, int32_t& num_dims); 00120 virtual void get_ndarray( 00121 int16_t*& array, int32_t*& dims, int32_t& num_dims); 00122 virtual void get_ndarray( 00123 uint16_t*& array, int32_t*& dims, int32_t& num_dims); 00125 00134 virtual void get_sparse_matrix( 00135 SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec); 00136 virtual void get_sparse_matrix( 00137 SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00138 virtual void get_int8_sparsematrix( 00139 SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00140 virtual void get_sparse_matrix( 00141 SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec); 00142 virtual void get_sparse_matrix( 00143 SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00144 virtual void get_uint_sparsematrix( 00145 SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00146 virtual void get_long_sparsematrix( 00147 SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00148 virtual void get_ulong_sparsematrix( 00149 SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00150 virtual void get_sparse_matrix( 00151 SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00152 virtual void get_sparse_matrix( 00153 SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00154 virtual void get_sparse_matrix( 00155 SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00156 virtual void get_sparse_matrix( 00157 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00158 virtual void get_longreal_sparsematrix( 00159 SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00161 00162 00171 virtual void get_string_list( 00172 SGString<uint8_t>*& strings, int32_t& num_str, 00173 int32_t& max_string_len); 00174 virtual void get_int8_string_list( 00175 SGString<int8_t>*& strings, int32_t& num_str, 00176 int32_t& max_string_len); 00177 virtual void get_string_list( 00178 SGString<char>*& strings, int32_t& num_str, 00179 int32_t& max_string_len); 00180 virtual void get_string_list( 00181 SGString<int32_t>*& strings, int32_t& num_str, 00182 int32_t& max_string_len); 00183 virtual void get_uint_string_list( 00184 SGString<uint32_t>*& strings, int32_t& num_str, 00185 int32_t& max_string_len); 00186 virtual void get_string_list( 00187 SGString<int16_t>*& strings, int32_t& num_str, 00188 int32_t& max_string_len); 00189 virtual void get_string_list( 00190 SGString<uint16_t>*& strings, int32_t& num_str, 00191 int32_t& max_string_len); 00192 virtual void get_long_string_list( 00193 SGString<int64_t>*& strings, int32_t& num_str, 00194 int32_t& max_string_len); 00195 virtual void get_ulong_string_list( 00196 SGString<uint64_t>*& strings, int32_t& num_str, 00197 int32_t& max_string_len); 00198 virtual void get_string_list( 00199 SGString<float32_t>*& strings, int32_t& num_str, 00200 int32_t& max_string_len); 00201 virtual void get_string_list( 00202 SGString<float64_t>*& strings, int32_t& num_str, 00203 int32_t& max_string_len); 00204 virtual void get_longreal_string_list( 00205 SGString<floatmax_t>*& strings, int32_t& num_str, 00206 int32_t& max_string_len); 00208 00216 virtual void set_vector(const uint8_t* vector, int32_t len); 00217 virtual void set_vector(const char* vector, int32_t len); 00218 virtual void set_vector(const int32_t* vector, int32_t len); 00219 virtual void set_vector( const float32_t* vector, int32_t len); 00220 virtual void set_vector(const float64_t* vector, int32_t len); 00221 virtual void set_vector(const int16_t* vector, int32_t len); 00222 virtual void set_vector(const uint16_t* vector, int32_t len); 00224 00225 00233 virtual void set_matrix( 00234 const uint8_t* matrix, int32_t num_feat, int32_t num_vec); 00235 virtual void set_int8_matrix( 00236 const int8_t* matrix, int32_t num_feat, int32_t num_vec); 00237 virtual void set_matrix( 00238 const char* matrix, int32_t num_feat, int32_t num_vec); 00239 virtual void set_matrix( 00240 const int32_t* matrix, int32_t num_feat, int32_t num_vec); 00241 virtual void set_uint_matrix( 00242 const uint32_t* matrix, int32_t num_feat, int32_t num_vec); 00243 virtual void set_long_matrix( 00244 const int64_t* matrix, int32_t num_feat, int32_t num_vec); 00245 virtual void set_ulong_matrix( 00246 const uint64_t* matrix, int32_t num_feat, int32_t num_vec); 00247 virtual void set_matrix( 00248 const float32_t* matrix, int32_t num_feat, int32_t num_vec); 00249 virtual void set_matrix( 00250 const float64_t* matrix, int32_t num_feat, int32_t num_vec); 00251 virtual void set_longreal_matrix( 00252 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec); 00253 virtual void set_matrix( 00254 const int16_t* matrix, int32_t num_feat, int32_t num_vec); 00255 virtual void set_matrix( 00256 const uint16_t* matrix, int32_t num_feat, int32_t num_vec); 00258 00266 virtual void set_ndarray( 00267 const uint8_t* array, int32_t* dims, int32_t num_dims); 00268 virtual void set_ndarray( 00269 const char* array, int32_t* dims, int32_t num_dims); 00270 virtual void set_ndarray( 00271 const int32_t* array, int32_t* dims, int32_t num_dims); 00272 virtual void set_ndarray( 00273 const float32_t* array, int32_t* dims, int32_t num_dims); 00274 virtual void set_ndarray( 00275 const float64_t* array, int32_t* dims, int32_t num_dims); 00276 virtual void set_ndarray( 00277 const int16_t* array, int32_t* dims, int32_t num_dims); 00278 virtual void set_ndarray( 00279 const uint16_t* array, int32_t* dims, int32_t num_dims); 00281 00289 virtual void set_sparse_matrix( 00290 const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec); 00291 virtual void set_sparse_matrix( 00292 const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec); 00293 virtual void set_int8_sparsematrix( 00294 const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec); 00295 virtual void set_sparse_matrix( 00296 const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec); 00297 virtual void set_sparse_matrix( 00298 const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec); 00299 virtual void set_uint_sparsematrix( 00300 const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec); 00301 virtual void set_long_sparsematrix( 00302 const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec); 00303 virtual void set_ulong_sparsematrix( 00304 const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec); 00305 virtual void set_sparse_matrix( 00306 const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec); 00307 virtual void set_sparse_matrix( 00308 const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec); 00309 virtual void set_sparse_matrix( 00310 const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec); 00311 virtual void set_sparse_matrix( 00312 const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec); 00313 virtual void set_longreal_sparsematrix( 00314 const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec); 00316 00317 00326 virtual void set_string_list( 00327 const SGString<uint8_t>* strings, int32_t num_str); 00328 virtual void set_int8_string_list( 00329 const SGString<int8_t>* strings, int32_t num_str); 00330 virtual void set_string_list( 00331 const SGString<char>* strings, int32_t num_str); 00332 virtual void set_string_list( 00333 const SGString<int32_t>* strings, int32_t num_str); 00334 virtual void set_uint_string_list( 00335 const SGString<uint32_t>* strings, int32_t num_str); 00336 virtual void set_string_list( 00337 const SGString<int16_t>* strings, int32_t num_str); 00338 virtual void set_string_list( 00339 const SGString<uint16_t>* strings, int32_t num_str); 00340 virtual void set_long_string_list( 00341 const SGString<int64_t>* strings, int32_t num_str); 00342 virtual void set_ulong_string_list( 00343 const SGString<uint64_t>* strings, int32_t num_str); 00344 virtual void set_string_list( 00345 const SGString<float32_t>* strings, int32_t num_str); 00346 virtual void set_string_list( 00347 const SGString<float64_t>* strings, int32_t num_str); 00348 virtual void set_longreal_string_list( 00349 const SGString<floatmax_t>* strings, int32_t num_str); 00351 00353 inline virtual const char* get_name() const { return "BinaryFile"; } 00354 00355 protected: 00360 void read_header(TSGDataType* dest); 00361 00366 void write_header(const TSGDataType* datatype); 00367 00373 int32_t parse_first_header(TSGDataType& type); 00374 00380 int32_t parse_next_header(TSGDataType& type); 00381 00382 private: 00389 template <class DT> DT* load_data(DT* target, int64_t& num) 00390 { 00391 CSimpleFile<DT> f(filename, file); 00392 return f.load(target, num); 00393 } 00394 00401 template <class DT> bool save_data(DT* src, int64_t num) 00402 { 00403 CSimpleFile<DT> f(filename, file); 00404 return f.save(src, num); 00405 } 00406 }; 00407 } 00408 #endif //__BINARY_FILE_H__