SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SparseFeatures.h
Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Written (W) 2011-2012 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  * Copyright (C) 2010 Berlin Institute of Technology
00012  */
00013 
00014 #ifndef _SPARSEFEATURES__H__
00015 #define _SPARSEFEATURES__H__
00016 
00017 #include <shogun/lib/common.h>
00018 #include <shogun/lib/DataType.h>
00019 #include <shogun/lib/SGSparseMatrix.h>
00020 #include <shogun/lib/Cache.h>
00021 #include <shogun/io/File.h>
00022 
00023 #include <shogun/labels/RegressionLabels.h>
00024 #include <shogun/features/Features.h>
00025 #include <shogun/features/DotFeatures.h>
00026 #include <shogun/features/DenseFeatures.h>
00027 
00028 namespace shogun
00029 {
00030 
00031 class CFile;
00032 class CRegressionLabels;
00033 class CFeatures;
00034 class CDotFeatures;
00035 template <class ST> class CDenseFeatures;
00036 template <class ST> class SGSparseMatrix;
00037 
00056 template <class ST> class CSparseFeatures : public CDotFeatures
00057 {
00058     public:
00063         CSparseFeatures(int32_t size=0);
00064 
00073         CSparseFeatures(SGSparseVector<ST>* src,
00074                 int32_t num_feat, int32_t num_vec,bool copy=false);
00075 
00081         CSparseFeatures(SGSparseMatrix<ST> sparse);
00082 
00088         CSparseFeatures(SGMatrix<ST> dense);
00089 
00091         CSparseFeatures(const CSparseFeatures & orig);
00092 
00097         CSparseFeatures(CFile* loader);
00098 
00100         virtual ~CSparseFeatures();
00101 
00106         void free_sparse_feature_matrix();
00107 
00112         void free_sparse_features();
00113 
00118         virtual CFeatures* duplicate() const;
00119 
00129         ST get_feature(int32_t num, int32_t index);
00130 
00139         ST* get_full_feature_vector(int32_t num, int32_t& len);
00140 
00146         SGVector<ST> get_full_feature_vector(int32_t num);
00147 
00153         virtual int32_t get_nnz_features_for_vector(int32_t num);
00154 
00164         SGSparseVector<ST> get_sparse_feature_vector(int32_t num);
00165 
00176         static ST sparse_dot(ST alpha, SGSparseVectorEntry<ST>* avec, int32_t alen,
00177                 SGSparseVectorEntry<ST>* bvec, int32_t blen);
00178 
00191         ST dense_dot(ST alpha, int32_t num, ST* vec, int32_t dim, ST b);
00192 
00204         void add_to_dense_vec(float64_t alpha, int32_t num,
00205                 float64_t* vec, int32_t dim, bool abs_val=false);
00206 
00213         void free_sparse_feature_vector(int32_t num);
00214 
00224         SGSparseVector<ST>* get_sparse_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00225 
00233         SGSparseMatrix<ST> get_sparse_feature_matrix();
00234 
00241         CSparseFeatures<ST>* get_transposed();
00242 
00254         SGSparseVector<ST>* get_transposed(int32_t &num_feat, int32_t &num_vec);
00255 
00263         void set_sparse_feature_matrix(SGSparseMatrix<ST> sm);
00264 
00271         SGMatrix<ST> get_full_feature_matrix();
00272 
00282         virtual bool set_full_feature_matrix(SGMatrix<ST> full);
00283 
00291         virtual bool apply_preprocessor(bool force_preprocessing=false);
00292 
00297         virtual int32_t get_size() const;
00298 
00306         bool obtain_from_simple(CDenseFeatures<ST>* sf);
00307 
00312         virtual int32_t  get_num_vectors() const;
00313 
00318         int32_t  get_num_features();
00319 
00331         int32_t set_num_features(int32_t num);
00332 
00337         virtual EFeatureClass get_feature_class() const;
00338 
00343         virtual EFeatureType get_feature_type() const;
00344 
00351         void free_feature_vector(int32_t num);
00352 
00357         int64_t get_num_nonzero_entries();
00358 
00366         float64_t* compute_squared(float64_t* sq);
00367 
00382         float64_t compute_squared_norm(CSparseFeatures<float64_t>* lhs,
00383                 float64_t* sq_lhs, int32_t idx_a,
00384                 CSparseFeatures<float64_t>* rhs, float64_t* sq_rhs,
00385                 int32_t idx_b);
00386 
00393         void load(CFile* loader);
00394 
00401         void save(CFile* writer);
00402 
00412         CRegressionLabels* load_svmlight_file(char* fname, bool do_sort_features=true);
00413 
00419         void sort_features();
00420 
00429         bool write_svmlight_file(char* fname, CRegressionLabels* label);
00430 
00438         virtual int32_t get_dim_feature_space() const;
00439 
00449         virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2);
00450 
00459         virtual float64_t dense_dot(int32_t vec_idx1, float64_t* vec2, int32_t vec2_len);
00460 
00461         #ifndef DOXYGEN_SHOULD_SKIP_THIS
00462 
00463         struct sparse_feature_iterator
00464         {
00466             SGSparseVector<ST> sv;
00467 
00469             int32_t vector_index;
00470 
00472             int32_t index;
00473 
00475             void print_info()
00476             {
00477                 SG_SPRINT("sv=%p, vidx=%d, num_feat_entries=%d, index=%d\n",
00478                         sv.features, vector_index, sv.num_feat_entries, index);
00479             }
00480         };
00481         #endif
00482 
00494         virtual void* get_feature_iterator(int32_t vector_index);
00495 
00506         virtual bool get_next_feature(int32_t& index, float64_t& value, void* iterator);
00507 
00513         virtual void free_feature_iterator(void* iterator);
00514 
00521         virtual CFeatures* copy_subset(SGVector<index_t> indices);
00522 
00524         inline virtual const char* get_name() const { return "SparseFeatures"; }
00525 
00526     protected:
00537         virtual SGSparseVectorEntry<ST>* compute_sparse_feature_vector(int32_t num,
00538             int32_t& len, SGSparseVectorEntry<ST>* target=NULL);
00539 
00540     private:
00541         void init();
00542 
00543     protected:
00544 
00546         int32_t num_vectors;
00547 
00549         int32_t num_features;
00550 
00552         SGSparseVector<ST>* sparse_feature_matrix;
00553 
00555         CCache< SGSparseVectorEntry<ST> >* feature_cache;
00556 };
00557 }
00558 #endif /* _SPARSEFEATURES__H__ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation