SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 1999-2010 Soeren Sonnenburg 00008 * Written (W) 1999-2008 Gunnar Raetsch 00009 * Written (W) 2011-2012 Heiko Strathmann 00010 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00011 * Copyright (C) 2010 Berlin Institute of Technology 00012 */ 00013 00014 #ifndef _SPARSEFEATURES__H__ 00015 #define _SPARSEFEATURES__H__ 00016 00017 #include <shogun/lib/common.h> 00018 #include <shogun/lib/DataType.h> 00019 #include <shogun/lib/SGSparseMatrix.h> 00020 #include <shogun/lib/Cache.h> 00021 #include <shogun/io/File.h> 00022 00023 #include <shogun/labels/RegressionLabels.h> 00024 #include <shogun/features/Features.h> 00025 #include <shogun/features/DotFeatures.h> 00026 #include <shogun/features/DenseFeatures.h> 00027 00028 namespace shogun 00029 { 00030 00031 class CFile; 00032 class CRegressionLabels; 00033 class CFeatures; 00034 class CDotFeatures; 00035 template <class ST> class CDenseFeatures; 00036 template <class ST> class SGSparseMatrix; 00037 00056 template <class ST> class CSparseFeatures : public CDotFeatures 00057 { 00058 public: 00063 CSparseFeatures(int32_t size=0); 00064 00073 CSparseFeatures(SGSparseVector<ST>* src, 00074 int32_t num_feat, int32_t num_vec,bool copy=false); 00075 00081 CSparseFeatures(SGSparseMatrix<ST> sparse); 00082 00088 CSparseFeatures(SGMatrix<ST> dense); 00089 00091 CSparseFeatures(const CSparseFeatures & orig); 00092 00097 CSparseFeatures(CFile* loader); 00098 00100 virtual ~CSparseFeatures(); 00101 00106 void free_sparse_feature_matrix(); 00107 00112 void free_sparse_features(); 00113 00118 virtual CFeatures* duplicate() const; 00119 00129 ST get_feature(int32_t num, int32_t index); 00130 00139 ST* get_full_feature_vector(int32_t num, int32_t& len); 00140 00146 SGVector<ST> get_full_feature_vector(int32_t num); 00147 00153 virtual int32_t get_nnz_features_for_vector(int32_t num); 00154 00164 SGSparseVector<ST> get_sparse_feature_vector(int32_t num); 00165 00176 static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen, 00177 SGSparseVectorEntry<ST>* bvec, int32_t blen); 00178 00191 ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b); 00192 00204 void add_to_dense_vec(float64_t alpha, int32_t num, 00205 float64_t* vec, int32_t dim, bool abs_val=false); 00206 00213 void free_sparse_feature_vector(int32_t num); 00214 00224 SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec); 00225 00233 SGSparseMatrix<ST> get_sparse_feature_matrix(); 00234 00241 CSparseFeatures<ST>* get_transposed(); 00242 00254 SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec); 00255 00263 void set_sparse_feature_matrix(SGSparseMatrix<ST> sm); 00264 00271 SGMatrix<ST> get_full_feature_matrix(); 00272 00282 virtual bool set_full_feature_matrix(SGMatrix<ST> full); 00283 00291 virtual bool apply_preprocessor(bool force_preprocessing=false); 00292 00297 virtual int32_t get_size() const; 00298 00306 bool obtain_from_simple(CDenseFeatures<ST>* sf); 00307 00312 virtual int32_t get_num_vectors() const; 00313 00318 int32_t get_num_features(); 00319 00331 int32_t set_num_features(int32_t num); 00332 00337 virtual EFeatureClass get_feature_class() const; 00338 00343 virtual EFeatureType get_feature_type() const; 00344 00351 void free_feature_vector(int32_t num); 00352 00357 int64_t get_num_nonzero_entries(); 00358 00366 float64_t* compute_squared(float64_t* sq); 00367 00382 float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs, 00383 float64_t* sq_lhs, int32_t idx_a, 00384 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs, 00385 int32_t idx_b); 00386 00393 void load(CFile* loader); 00394 00401 void save(CFile* writer); 00402 00412 CRegressionLabels* load_svmlight_file(char* fname, bool do_sort_features=true); 00413 00419 void sort_features(); 00420 00429 bool write_svmlight_file(char* fname, CRegressionLabels* label); 00430 00438 virtual int32_t get_dim_feature_space() const; 00439 00449 virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2); 00450 00459 virtual float64_t dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len); 00460 00461 #ifndef DOXYGEN_SHOULD_SKIP_THIS 00462 00463 struct sparse_feature_iterator 00464 { 00466 SGSparseVector<ST> sv; 00467 00469 int32_t vector_index; 00470 00472 int32_t index; 00473 00475 void print_info() 00476 { 00477 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n", 00478 sv.features, vector_index, sv.num_feat_entries, index); 00479 } 00480 }; 00481 #endif 00482 00494 virtual void* get_feature_iterator(int32_t vector_index); 00495 00506 virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator); 00507 00513 virtual void free_feature_iterator(void* iterator); 00514 00521 virtual CFeatures* copy_subset(SGVector<index_t> indices); 00522 00524 inline virtual const char* get_name() const { return "SparseFeatures"; } 00525 00526 protected: 00537 virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num, 00538 int32_t& len, SGSparseVectorEntry<ST>* target=NULL); 00539 00540 private: 00541 void init(); 00542 00543 protected: 00544 00546 int32_t num_vectors; 00547 00549 int32_t num_features; 00550 00552 SGSparseVector<ST>* sparse_feature_matrix; 00553 00555 CCache< SGSparseVectorEntry<ST> >* feature_cache; 00556 }; 00557 } 00558 #endif /* _SPARSEFEATURES__H__ */