SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2010 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * Copyright (C) 2010 Berlin Institute of Technology 00010 */ 00011 00012 #include <stdio.h> 00013 #include <stdlib.h> 00014 #include <string.h> 00015 00016 #include <shogun/io/File.h> 00017 00018 #include <shogun/features/StringFeatures.h> 00019 #include <shogun/features/SparseFeatures.h> 00020 00021 using namespace shogun; 00022 00023 CFile::CFile() : CSGObject() 00024 { 00025 file=NULL; 00026 filename=NULL; 00027 variable_name=NULL; 00028 } 00029 00030 CFile::CFile(FILE* f, const char* name) : CSGObject() 00031 { 00032 file=f; 00033 filename=NULL; 00034 variable_name=NULL; 00035 00036 if (name) 00037 set_variable_name(name); 00038 } 00039 00040 CFile::CFile(const char* fname, char rw, const char* name) : CSGObject() 00041 { 00042 variable_name=NULL; 00043 task=rw; 00044 filename=strdup(fname); 00045 char mode[2]; 00046 mode[0]=rw; 00047 mode[1]='\0'; 00048 00049 if (rw=='r' || rw == 'w') 00050 { 00051 if (filename) 00052 { 00053 if (!(file=fopen((const char*) filename, (const char*) mode))) 00054 SG_ERROR("Error opening file '%s'\n", filename); 00055 } 00056 } 00057 else 00058 SG_ERROR("unknown mode '%c'\n", mode[0]); 00059 00060 if (name) 00061 set_variable_name(name); 00062 } 00063 00064 void CFile::get_vector(bool*& vector, int32_t& len) 00065 { 00066 int32_t* int_vector; 00067 get_vector(int_vector, len); 00068 00069 ASSERT(len>0); 00070 vector= SG_MALLOC(bool, len); 00071 00072 for (int32_t i=0; i<len; i++) 00073 vector[i]= (int_vector[i]!=0); 00074 00075 SG_FREE(int_vector); 00076 } 00077 00078 void CFile::set_vector(const bool* vector, int32_t len) 00079 { 00080 int32_t* int_vector = SG_MALLOC(int32_t, len); 00081 for (int32_t i=0;i<len;i++) 00082 { 00083 if (vector[i]) 00084 int_vector[i]=1; 00085 else 00086 int_vector[i]=0; 00087 } 00088 set_vector(int_vector,len); 00089 SG_FREE(int_vector); 00090 } 00091 00092 void CFile::get_matrix(bool*& matrix, int32_t& num_feat, int32_t& num_vec) 00093 { 00094 uint8_t * byte_matrix; 00095 get_matrix(byte_matrix,num_feat,num_vec); 00096 00097 ASSERT(num_feat > 0 && num_vec > 0) 00098 matrix = SG_MALLOC(bool, num_feat*num_vec); 00099 00100 for(int32_t i = 0;i < num_vec;i++) 00101 { 00102 for(int32_t j = 0;j < num_feat;j++) 00103 matrix[i*num_feat+j] = byte_matrix[i*num_feat+j] != 0 ? 1 : 0; 00104 } 00105 00106 SG_FREE(byte_matrix); 00107 } 00108 00109 void CFile::set_matrix(const bool* matrix, int32_t num_feat, int32_t num_vec) 00110 { 00111 uint8_t * byte_matrix = SG_MALLOC(uint8_t, num_feat*num_vec); 00112 for(int32_t i = 0;i < num_vec;i++) 00113 { 00114 for(int32_t j = 0;j < num_feat;j++) 00115 byte_matrix[i*num_feat+j] = matrix[i*num_feat+j] != 0 ? 1 : 0; 00116 } 00117 00118 set_matrix(byte_matrix,num_feat,num_vec); 00119 00120 SG_FREE(byte_matrix); 00121 } 00122 00123 void CFile::get_string_list( 00124 SGString<bool>*& strings, int32_t& num_str, 00125 int32_t& max_string_len) 00126 { 00127 SGString<int8_t>* strs; 00128 get_int8_string_list(strs, num_str, max_string_len); 00129 00130 ASSERT(num_str>0 && max_string_len>0); 00131 strings=SG_MALLOC(SGString<bool>, num_str); 00132 00133 for(int32_t i = 0;i < num_str;i++) 00134 { 00135 strings[i].slen = strs[i].slen; 00136 strings[i].string = SG_MALLOC(bool, strs[i].slen); 00137 for(int32_t j = 0;j < strs[i].slen;j++) 00138 strings[i].string[j] = strs[i].string[j] != 0 ? 1 : 0; 00139 } 00140 00141 for(int32_t i = 0;i < num_str;i++) 00142 SG_FREE(strs[i].string); 00143 SG_FREE(strs); 00144 } 00145 00146 void CFile::set_string_list(const SGString<bool>* strings, int32_t num_str) 00147 { 00148 SGString<int8_t> * strs = SG_MALLOC(SGString<int8_t>, num_str); 00149 00150 for(int32_t i = 0;i < num_str;i++) 00151 { 00152 strs[i].slen = strings[i].slen; 00153 strs[i].string = SG_MALLOC(int8_t, strings[i].slen); 00154 for(int32_t j = 0;j < strings[i].slen;j++) 00155 strs[i].string[j] = strings[i].string[j] != 0 ? 1 : 0; 00156 } 00157 00158 set_int8_string_list(strs,num_str); 00159 00160 for(int32_t i = 0;i < num_str;i++) 00161 SG_FREE(strs[i].string); 00162 SG_FREE(strs); 00163 } 00164 00165 CFile::~CFile() 00166 { 00167 close(); 00168 } 00169 00170 void CFile::set_variable_name(const char* name) 00171 { 00172 SG_FREE(variable_name); 00173 variable_name=strdup(name); 00174 } 00175 00176 char* CFile::get_variable_name() 00177 { 00178 return strdup(variable_name); 00179 } 00180 00181 char* CFile::read_whole_file(char* fname, size_t& len) 00182 { 00183 FILE* tmpf=fopen(fname, "r"); 00184 ASSERT(tmpf); 00185 fseek(tmpf,0,SEEK_END); 00186 len=ftell(tmpf); 00187 ASSERT(len>0); 00188 rewind(tmpf); 00189 char* result = SG_MALLOC(char, len); 00190 size_t total=fread(result,1,len,tmpf); 00191 ASSERT(total==len); 00192 fclose(tmpf); 00193 return result; 00194 }