SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Parts of this code are copyright (c) 2009 Yahoo! Inc. 00008 * All rights reserved. The copyrights embodied in the content of 00009 * this file are licensed under the BSD (revised) open source license. 00010 * 00011 * Written (W) 2010 Soeren Sonnenburg 00012 * Copyright (C) 2010 Berlin Institute of Technology 00013 */ 00014 #ifndef __ASCII_FILE_H__ 00015 #define __ASCII_FILE_H__ 00016 00017 #include <shogun/lib/config.h> 00018 #include <shogun/base/DynArray.h> 00019 #include <shogun/lib/common.h> 00020 #include <shogun/io/File.h> 00021 #include <shogun/io/SGIO.h> 00022 #include <shogun/io/IOBuffer.h> 00023 00024 namespace shogun 00025 { 00035 class CAsciiFile: public CFile 00036 { 00037 public: 00039 CAsciiFile(); 00040 00046 CAsciiFile(FILE* f, const char* name=NULL); 00047 00054 CAsciiFile(const char* fname, char rw='r', const char* name=NULL); 00055 00057 virtual ~CAsciiFile(); 00058 00066 virtual void get_vector(uint8_t*& vector, int32_t& len); 00067 virtual void get_vector(char*& vector, int32_t& len); 00068 virtual void get_vector(int32_t*& vector, int32_t& len); 00069 virtual void get_vector(float64_t*& vector, int32_t& len); 00070 virtual void get_vector(float32_t*& vector, int32_t& len); 00071 virtual void get_vector(int16_t*& vector, int32_t& len); 00072 virtual void get_vector(uint16_t*& vector, int32_t& len); 00074 00083 virtual void get_matrix( 00084 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00085 virtual void get_int8_matrix( 00086 int8_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00087 virtual void get_matrix( 00088 char*& matrix, int32_t& num_feat, int32_t& num_vec); 00089 virtual void get_matrix( 00090 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00091 virtual void get_uint_matrix( 00092 uint32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00093 virtual void get_long_matrix( 00094 int64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00095 virtual void get_ulong_matrix( 00096 uint64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00097 virtual void get_matrix( 00098 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00099 virtual void get_matrix( 00100 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00101 virtual void get_longreal_matrix( 00102 floatmax_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00103 virtual void get_matrix( 00104 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00105 virtual void get_matrix( 00106 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec); 00108 00117 virtual void get_ndarray( 00118 uint8_t*& array, int32_t*& dims, int32_t& num_dims); 00119 virtual void get_int8_ndarray( 00120 int8_t*& array, int32_t*& dims, int32_t& num_dims); 00121 virtual void get_ndarray( 00122 char*& array, int32_t*& dims, int32_t& num_dims); 00123 virtual void get_ndarray( 00124 int32_t*& array, int32_t*& dims, int32_t& num_dims); 00125 virtual void get_uint_ndarray( 00126 uint32_t*& array, int32_t*& dims, int32_t& num_dims); 00127 virtual void get_long_ndarray( 00128 int64_t*& array, int32_t*& dims, int32_t& num_dims); 00129 virtual void get_ulong_ndarray( 00130 uint64_t*& array, int32_t*& dims, int32_t& num_dims); 00131 virtual void get_ndarray( 00132 float32_t*& array, int32_t*& dims, int32_t& num_dims); 00133 virtual void get_ndarray( 00134 float64_t*& array, int32_t*& dims, int32_t& num_dims); 00135 virtual void get_longreal_ndarray( 00136 floatmax_t*& array, int32_t*& dims, int32_t& num_dims); 00137 virtual void get_ndarray( 00138 int16_t*& array, int32_t*& dims, int32_t& num_dims); 00139 virtual void get_ndarray( 00140 uint16_t*& array, int32_t*& dims, int32_t& num_dims); 00142 00151 virtual void get_sparse_matrix( 00152 SGSparseVector<bool>*& matrix, int32_t& num_feat, int32_t& num_vec); 00153 virtual void get_sparse_matrix( 00154 SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00155 virtual void get_int8_sparsematrix( 00156 SGSparseVector<int8_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00157 virtual void get_sparse_matrix( 00158 SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec); 00159 virtual void get_sparse_matrix( 00160 SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00161 virtual void get_uint_sparsematrix( 00162 SGSparseVector<uint32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00163 virtual void get_long_sparsematrix( 00164 SGSparseVector<int64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00165 virtual void get_ulong_sparsematrix( 00166 SGSparseVector<uint64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00167 virtual void get_sparse_matrix( 00168 SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00169 virtual void get_sparse_matrix( 00170 SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00171 virtual void get_sparse_matrix( 00172 SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00173 virtual void get_sparse_matrix( 00174 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00175 virtual void get_longreal_sparsematrix( 00176 SGSparseVector<floatmax_t>*& matrix, int32_t& num_feat, int32_t& num_vec); 00178 00179 00188 virtual void get_string_list( 00189 SGString<uint8_t>*& strings, int32_t& num_str, 00190 int32_t& max_string_len); 00191 virtual void get_int8_string_list( 00192 SGString<int8_t>*& strings, int32_t& num_str, 00193 int32_t& max_string_len); 00194 virtual void get_string_list( 00195 SGString<char>*& strings, int32_t& num_str, 00196 int32_t& max_string_len); 00197 virtual void get_string_list( 00198 SGString<int32_t>*& strings, int32_t& num_str, 00199 int32_t& max_string_len); 00200 virtual void get_uint_string_list( 00201 SGString<uint32_t>*& strings, int32_t& num_str, 00202 int32_t& max_string_len); 00203 virtual void get_string_list( 00204 SGString<int16_t>*& strings, int32_t& num_str, 00205 int32_t& max_string_len); 00206 virtual void get_string_list( 00207 SGString<uint16_t>*& strings, int32_t& num_str, 00208 int32_t& max_string_len); 00209 virtual void get_long_string_list( 00210 SGString<int64_t>*& strings, int32_t& num_str, 00211 int32_t& max_string_len); 00212 virtual void get_ulong_string_list( 00213 SGString<uint64_t>*& strings, int32_t& num_str, 00214 int32_t& max_string_len); 00215 virtual void get_string_list( 00216 SGString<float32_t>*& strings, int32_t& num_str, 00217 int32_t& max_string_len); 00218 virtual void get_string_list( 00219 SGString<float64_t>*& strings, int32_t& num_str, 00220 int32_t& max_string_len); 00221 virtual void get_longreal_string_list( 00222 SGString<floatmax_t>*& strings, int32_t& num_str, 00223 int32_t& max_string_len); 00225 00233 virtual void set_vector(const uint8_t* vector, int32_t len); 00234 virtual void set_vector(const char* vector, int32_t len); 00235 virtual void set_vector(const int32_t* vector, int32_t len); 00236 virtual void set_vector( const float32_t* vector, int32_t len); 00237 virtual void set_vector(const float64_t* vector, int32_t len); 00238 virtual void set_vector(const int16_t* vector, int32_t len); 00239 virtual void set_vector(const uint16_t* vector, int32_t len); 00241 00242 00250 virtual void set_matrix( 00251 const uint8_t* matrix, int32_t num_feat, int32_t num_vec); 00252 virtual void set_int8_matrix( 00253 const int8_t* matrix, int32_t num_feat, int32_t num_vec); 00254 virtual void set_matrix( 00255 const char* matrix, int32_t num_feat, int32_t num_vec); 00256 virtual void set_matrix( 00257 const int32_t* matrix, int32_t num_feat, int32_t num_vec); 00258 virtual void set_uint_matrix( 00259 const uint32_t* matrix, int32_t num_feat, int32_t num_vec); 00260 virtual void set_long_matrix( 00261 const int64_t* matrix, int32_t num_feat, int32_t num_vec); 00262 virtual void set_ulong_matrix( 00263 const uint64_t* matrix, int32_t num_feat, int32_t num_vec); 00264 virtual void set_matrix( 00265 const float32_t* matrix, int32_t num_feat, int32_t num_vec); 00266 virtual void set_matrix( 00267 const float64_t* matrix, int32_t num_feat, int32_t num_vec); 00268 virtual void set_longreal_matrix( 00269 const floatmax_t* matrix, int32_t num_feat, int32_t num_vec); 00270 virtual void set_matrix( 00271 const int16_t* matrix, int32_t num_feat, int32_t num_vec); 00272 virtual void set_matrix( 00273 const uint16_t* matrix, int32_t num_feat, int32_t num_vec); 00275 00283 virtual void set_ndarray( 00284 const uint8_t* array, int32_t* dims, int32_t num_dims); 00285 virtual void set_int8_ndarray( 00286 const int8_t* array, int32_t* dims, int32_t num_dims); 00287 virtual void set_ndarray( 00288 const char* array, int32_t* dims, int32_t num_dims); 00289 virtual void set_ndarray( 00290 const int32_t* array, int32_t* dims, int32_t num_dims); 00291 virtual void set_uint_ndarray( 00292 const uint32_t* array, int32_t* dims, int32_t num_dims); 00293 virtual void set_long_ndarray( 00294 const int64_t* array, int32_t* dims, int32_t num_dims); 00295 virtual void set_ulong_ndarray( 00296 const uint64_t* array, int32_t* dims, int32_t num_dims); 00297 virtual void set_ndarray( 00298 const float32_t* array, int32_t* dims, int32_t num_dims); 00299 virtual void set_ndarray( 00300 const float64_t* array, int32_t* dims, int32_t num_dims); 00301 virtual void set_longreal_ndarray( 00302 const floatmax_t* array, int32_t* dims, int32_t num_dims); 00303 virtual void set_ndarray( 00304 const int16_t* array, int32_t* dims, int32_t num_dims); 00305 virtual void set_ndarray( 00306 const uint16_t* array, int32_t* dims, int32_t num_dims); 00308 00316 virtual void set_sparse_matrix( 00317 const SGSparseVector<bool>* matrix, int32_t num_feat, int32_t num_vec); 00318 virtual void set_sparse_matrix( 00319 const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec); 00320 virtual void set_int8_sparsematrix( 00321 const SGSparseVector<int8_t>* matrix, int32_t num_feat, int32_t num_vec); 00322 virtual void set_sparse_matrix( 00323 const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec); 00324 virtual void set_sparse_matrix( 00325 const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec); 00326 virtual void set_uint_sparsematrix( 00327 const SGSparseVector<uint32_t>* matrix, int32_t num_feat, int32_t num_vec); 00328 virtual void set_long_sparsematrix( 00329 const SGSparseVector<int64_t>* matrix, int32_t num_feat, int32_t num_vec); 00330 virtual void set_ulong_sparsematrix( 00331 const SGSparseVector<uint64_t>* matrix, int32_t num_feat, int32_t num_vec); 00332 virtual void set_sparse_matrix( 00333 const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec); 00334 virtual void set_sparse_matrix( 00335 const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec); 00336 virtual void set_sparse_matrix( 00337 const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec); 00338 virtual void set_sparse_matrix( 00339 const SGSparseVector<float64_t>* matrix, int32_t num_feat, int32_t num_vec); 00340 virtual void set_longreal_sparsematrix( 00341 const SGSparseVector<floatmax_t>* matrix, int32_t num_feat, int32_t num_vec); 00343 00344 00353 virtual void set_string_list( 00354 const SGString<uint8_t>* strings, int32_t num_str); 00355 virtual void set_int8_string_list( 00356 const SGString<int8_t>* strings, int32_t num_str); 00357 virtual void set_string_list( 00358 const SGString<char>* strings, int32_t num_str); 00359 virtual void set_string_list( 00360 const SGString<int32_t>* strings, int32_t num_str); 00361 virtual void set_uint_string_list( 00362 const SGString<uint32_t>* strings, int32_t num_str); 00363 virtual void set_string_list( 00364 const SGString<int16_t>* strings, int32_t num_str); 00365 virtual void set_string_list( 00366 const SGString<uint16_t>* strings, int32_t num_str); 00367 virtual void set_long_string_list( 00368 const SGString<int64_t>* strings, int32_t num_str); 00369 virtual void set_ulong_string_list( 00370 const SGString<uint64_t>* strings, int32_t num_str); 00371 virtual void set_string_list( 00372 const SGString<float32_t>* strings, int32_t num_str); 00373 virtual void set_string_list( 00374 const SGString<float64_t>* strings, int32_t num_str); 00375 virtual void set_longreal_string_list( 00376 const SGString<floatmax_t>* strings, int32_t num_str); 00378 00380 inline virtual const char* get_name() const { return "AsciiFile"; } 00381 00397 static ssize_t getdelim(char **lineptr, size_t *n, char delimiter, FILE* stream); 00398 00409 static ssize_t getline(char **lineptr, size_t *n, FILE *stream); 00410 00419 static void tokenize(char delim, substring s, v_array<substring> &ret); 00420 00421 private: 00428 template <class T> void append_item(DynArray<T>* items, char* ptr_data, char* ptr_item); 00429 00430 protected: 00431 00433 CIOBuffer buf; 00434 }; 00435 } 00436 #endif //__ASCII_FILE_H__