SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2009 Soeren Sonnenburg 00008 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/features/RealFileFeatures.h> 00012 #include <shogun/features/Features.h> 00013 #include <shogun/io/SGIO.h> 00014 00015 #include <stdio.h> 00016 #include <string.h> 00017 00018 using namespace shogun; 00019 00020 CRealFileFeatures::CRealFileFeatures() 00021 { 00022 SG_UNSTABLE("CRealFileFeatures::CRealFileFeatures()", "\n"); 00023 00024 working_file=NULL; 00025 working_filename=strdup(""); 00026 intlen=0; 00027 doublelen=0; 00028 endian=0; 00029 fourcc=0; 00030 preprocd=0; 00031 labels=NULL; 00032 status=false; 00033 } 00034 00035 CRealFileFeatures::CRealFileFeatures(int32_t size, char* fname) 00036 : CDenseFeatures<float64_t>(size) 00037 { 00038 working_file=fopen(fname, "r"); 00039 working_filename=strdup(fname); 00040 ASSERT(working_file); 00041 intlen=0; 00042 doublelen=0; 00043 endian=0; 00044 fourcc=0; 00045 preprocd=0; 00046 labels=NULL; 00047 status=load_base_data(); 00048 } 00049 00050 CRealFileFeatures::CRealFileFeatures(int32_t size, FILE* file) 00051 : CDenseFeatures<float64_t>(size), working_file(file), working_filename(NULL) 00052 { 00053 ASSERT(working_file); 00054 intlen=0; 00055 doublelen=0; 00056 endian=0; 00057 fourcc=0; 00058 preprocd=0; 00059 labels=NULL; 00060 status=load_base_data(); 00061 } 00062 00063 CRealFileFeatures::~CRealFileFeatures() 00064 { 00065 SG_FREE(working_filename); 00066 SG_FREE(labels); 00067 } 00068 00069 CRealFileFeatures::CRealFileFeatures(const CRealFileFeatures & orig) 00070 : CDenseFeatures<float64_t>(orig), working_file(orig.working_file), status(orig.status) 00071 { 00072 if (orig.working_filename) 00073 working_filename=strdup(orig.working_filename); 00074 if (orig.labels && get_num_vectors()) 00075 { 00076 labels=SG_MALLOC(int32_t, get_num_vectors()); 00077 memcpy(labels, orig.labels, sizeof(int32_t)*get_num_vectors()); 00078 } 00079 } 00080 00081 float64_t* CRealFileFeatures::compute_feature_vector( 00082 int32_t num, int32_t &len, float64_t* target) 00083 { 00084 ASSERT(num<num_vectors); 00085 len=num_features; 00086 float64_t* featurevector=target; 00087 if (!featurevector) 00088 featurevector=SG_MALLOC(float64_t, num_features); 00089 ASSERT(working_file); 00090 fseek(working_file, filepos+num_features*doublelen*num, SEEK_SET); 00091 ASSERT(fread(featurevector, doublelen, num_features, working_file)==(size_t) num_features); 00092 return featurevector; 00093 } 00094 00095 float64_t* CRealFileFeatures::load_feature_matrix() 00096 { 00097 ASSERT(working_file); 00098 fseek(working_file, filepos, SEEK_SET); 00099 free_feature_matrix(); 00100 00101 SG_INFO( "allocating feature matrix of size %.2fM\n", sizeof(double)*num_features*num_vectors/1024.0/1024.0); 00102 free_feature_matrix(); 00103 feature_matrix=SGMatrix<float64_t>(num_features,num_vectors); 00104 00105 SG_INFO( "loading... be patient.\n"); 00106 00107 for (int32_t i=0; i<(int32_t) num_vectors; i++) 00108 { 00109 if (!(i % (num_vectors/10+1))) 00110 SG_PRINT( "%02d%%.", (int) (100.0*i/num_vectors)); 00111 else if (!(i % (num_vectors/200+1))) 00112 SG_PRINT( "."); 00113 00114 ASSERT(fread(&feature_matrix.matrix[num_features*i], doublelen, num_features, working_file)==(size_t) num_features); 00115 } 00116 SG_DONE(); 00117 00118 return feature_matrix.matrix; 00119 } 00120 00121 int32_t CRealFileFeatures::get_label(int32_t idx) 00122 { 00123 ASSERT(idx<num_vectors); 00124 if (labels) 00125 return labels[idx]; 00126 return 0; 00127 } 00128 00129 bool CRealFileFeatures::load_base_data() 00130 { 00131 ASSERT(working_file); 00132 uint32_t num_vec=0; 00133 uint32_t num_feat=0; 00134 00135 ASSERT(fread(&intlen, sizeof(uint8_t), 1, working_file)==1); 00136 ASSERT(fread(&doublelen, sizeof(uint8_t), 1, working_file)==1); 00137 ASSERT(fread(&endian, (uint32_t) intlen, 1, working_file)== 1); 00138 ASSERT(fread(&fourcc, (uint32_t) intlen, 1, working_file)==1); 00139 ASSERT(fread(&num_vec, (uint32_t) intlen, 1, working_file)==1); 00140 ASSERT(fread(&num_feat, (uint32_t) intlen, 1, working_file)==1); 00141 ASSERT(fread(&preprocd, (uint32_t) intlen, 1, working_file)==1); 00142 SG_INFO( "detected: intsize=%d, doublesize=%d, num_vec=%d, num_feat=%d, preprocd=%d\n", intlen, doublelen, num_vec, num_feat, preprocd); 00143 filepos=ftell(working_file); 00144 set_num_vectors(num_vec); 00145 set_num_features(num_feat); 00146 fseek(working_file, filepos+num_features*num_vectors*doublelen, SEEK_SET); 00147 SG_FREE(labels); 00148 labels=SG_MALLOC(int, num_vec); 00149 ASSERT(fread(labels, intlen, num_vec, working_file) == num_vec); 00150 return true; 00151 }