SHOGUN
v2.0.0
|
00001 #include <shogun/features/DenseFeatures.h> 00002 #include <shogun/preprocessor/DensePreprocessor.h> 00003 #include <shogun/io/SGIO.h> 00004 #include <shogun/base/Parameter.h> 00005 #include <shogun/mathematics/Math.h> 00006 00007 #include <string.h> 00008 00009 namespace shogun { 00010 00011 template<class ST> CDenseFeatures<ST>::CDenseFeatures(int32_t size) : CDotFeatures(size) 00012 { 00013 init(); 00014 } 00015 00016 template<class ST> CDenseFeatures<ST>::CDenseFeatures(const CDenseFeatures & orig) : 00017 CDotFeatures(orig) 00018 { 00019 init(); 00020 set_feature_matrix(orig.feature_matrix); 00021 initialize_cache(); 00022 00023 if (orig.m_subset_stack != NULL) 00024 { 00025 SG_UNREF(m_subset_stack); 00026 m_subset_stack=new CSubsetStack(*orig.m_subset_stack); 00027 SG_REF(m_subset_stack); 00028 } 00029 } 00030 00031 template<class ST> CDenseFeatures<ST>::CDenseFeatures(SGMatrix<ST> matrix) : 00032 CDotFeatures() 00033 { 00034 init(); 00035 set_feature_matrix(matrix); 00036 } 00037 00038 template<class ST> CDenseFeatures<ST>::CDenseFeatures(ST* src, int32_t num_feat, int32_t num_vec) : 00039 CDotFeatures() 00040 { 00041 init(); 00042 set_feature_matrix(SGMatrix<ST>(src, num_feat, num_vec)); 00043 } 00044 template<class ST> CDenseFeatures<ST>::CDenseFeatures(CFile* loader) : 00045 CDotFeatures(loader) 00046 { 00047 init(); 00048 load(loader); 00049 } 00050 00051 template<class ST> CFeatures* CDenseFeatures<ST>::duplicate() const 00052 { 00053 return new CDenseFeatures<ST>(*this); 00054 } 00055 00056 template<class ST> CDenseFeatures<ST>::~CDenseFeatures() 00057 { 00058 free_features(); 00059 } 00060 00061 template<class ST> void CDenseFeatures<ST>::free_features() 00062 { 00063 m_subset_stack->remove_all_subsets(); 00064 free_feature_matrix(); 00065 SG_UNREF(feature_cache); 00066 } 00067 00068 template<class ST> void CDenseFeatures<ST>::free_feature_matrix() 00069 { 00070 m_subset_stack->remove_all_subsets(); 00071 feature_matrix=SGMatrix<ST>(); 00072 num_vectors = 0; 00073 num_features = 0; 00074 } 00075 00076 template<class ST> ST* CDenseFeatures<ST>::get_feature_vector(int32_t num, int32_t& len, bool& dofree) 00077 { 00078 /* index conversion for subset, only for array access */ 00079 int32_t real_num=m_subset_stack->subset_idx_conversion(num); 00080 00081 len = num_features; 00082 00083 if (feature_matrix.matrix) 00084 { 00085 dofree = false; 00086 return &feature_matrix.matrix[real_num * int64_t(num_features)]; 00087 } 00088 00089 ST* feat = NULL; 00090 dofree = false; 00091 00092 if (feature_cache) 00093 { 00094 feat = feature_cache->lock_entry(real_num); 00095 00096 if (feat) 00097 return feat; 00098 else 00099 feat = feature_cache->set_entry(real_num); 00100 } 00101 00102 if (!feat) 00103 dofree = true; 00104 feat = compute_feature_vector(num, len, feat); 00105 00106 if (get_num_preprocessors()) 00107 { 00108 int32_t tmp_len = len; 00109 ST* tmp_feat_before = feat; 00110 ST* tmp_feat_after = NULL; 00111 00112 for (int32_t i = 0; i < get_num_preprocessors(); i++) 00113 { 00114 CDensePreprocessor<ST>* p = 00115 (CDensePreprocessor<ST>*) get_preprocessor(i); 00116 // temporary hack 00117 SGVector<ST> applied = p->apply_to_feature_vector( 00118 SGVector<ST>(tmp_feat_before, tmp_len)); 00119 tmp_feat_after = applied.vector; 00120 SG_UNREF(p); 00121 00122 if (i != 0) // delete feature vector, except for the the first one, i.e., feat 00123 SG_FREE(tmp_feat_before); 00124 tmp_feat_before = tmp_feat_after; 00125 } 00126 00127 memcpy(feat, tmp_feat_after, sizeof(ST) * tmp_len); 00128 SG_FREE(tmp_feat_after); 00129 00130 len = tmp_len; 00131 } 00132 return feat; 00133 } 00134 00135 template<class ST> void CDenseFeatures<ST>::set_feature_vector(SGVector<ST> vector, int32_t num) 00136 { 00137 /* index conversion for subset, only for array access */ 00138 int32_t real_num=m_subset_stack->subset_idx_conversion(num); 00139 00140 if (num>=get_num_vectors()) 00141 { 00142 SG_ERROR("Index out of bounds (number of vectors %d, you " 00143 "requested %d)\n", get_num_vectors(), num); 00144 } 00145 00146 if (!feature_matrix.matrix) 00147 SG_ERROR("Requires a in-memory feature matrix\n"); 00148 00149 if (vector.vlen != num_features) 00150 SG_ERROR( 00151 "Vector not of length %d (has %d)\n", num_features, vector.vlen); 00152 00153 memcpy(&feature_matrix.matrix[real_num * int64_t(num_features)], vector.vector, 00154 int64_t(num_features) * sizeof(ST)); 00155 } 00156 00157 template<class ST> SGVector<ST> CDenseFeatures<ST>::get_feature_vector(int32_t num) 00158 { 00159 /* index conversion for subset, only for array access */ 00160 int32_t real_num=m_subset_stack->subset_idx_conversion(num); 00161 00162 if (num >= get_num_vectors()) 00163 { 00164 SG_ERROR("Index out of bounds (number of vectors %d, you " 00165 "requested %d)\n", get_num_vectors(), real_num); 00166 } 00167 00168 int32_t vlen; 00169 bool do_free; 00170 ST* vector= get_feature_vector(num, vlen, do_free); 00171 return SGVector<ST>(vector, vlen, do_free); 00172 } 00173 00174 template<class ST> void CDenseFeatures<ST>::free_feature_vector(ST* feat_vec, int32_t num, bool dofree) 00175 { 00176 if (feature_cache) 00177 feature_cache->unlock_entry(m_subset_stack->subset_idx_conversion(num)); 00178 00179 if (dofree) 00180 SG_FREE(feat_vec); 00181 } 00182 00183 template<class ST> void CDenseFeatures<ST>::free_feature_vector(SGVector<ST> vec, int32_t num) 00184 { 00185 free_feature_vector(vec.vector, num, false); 00186 vec=SGVector<ST>(); 00187 } 00188 00189 template<class ST> void CDenseFeatures<ST>::vector_subset(int32_t* idx, int32_t idx_len) 00190 { 00191 if (m_subset_stack->has_subsets()) 00192 SG_ERROR("A subset is set, cannot call vector_subset\n"); 00193 00194 ASSERT(feature_matrix.matrix); 00195 ASSERT(idx_len<=num_vectors); 00196 00197 int32_t num_vec = num_vectors; 00198 num_vectors = idx_len; 00199 00200 int32_t old_ii = -1; 00201 00202 for (int32_t i = 0; i < idx_len; i++) 00203 { 00204 int32_t ii = idx[i]; 00205 ASSERT(old_ii<ii); 00206 00207 if (ii < 0 || ii >= num_vec) 00208 SG_ERROR( "Index out of range: should be 0<%d<%d\n", ii, num_vec); 00209 00210 if (i == ii) 00211 continue; 00212 00213 memcpy(&feature_matrix.matrix[int64_t(num_features) * i], 00214 &feature_matrix.matrix[int64_t(num_features) * ii], 00215 num_features * sizeof(ST)); 00216 old_ii = ii; 00217 } 00218 } 00219 00220 template<class ST> void CDenseFeatures<ST>::feature_subset(int32_t* idx, int32_t idx_len) 00221 { 00222 if (m_subset_stack->has_subsets()) 00223 SG_ERROR("A subset is set, cannot call feature_subset\n"); 00224 00225 ASSERT(feature_matrix.matrix); 00226 ASSERT(idx_len<=num_features); 00227 int32_t num_feat = num_features; 00228 num_features = idx_len; 00229 00230 for (int32_t i = 0; i < num_vectors; i++) 00231 { 00232 ST* src = &feature_matrix.matrix[int64_t(num_feat) * i]; 00233 ST* dst = &feature_matrix.matrix[int64_t(num_features) * i]; 00234 00235 int32_t old_jj = -1; 00236 for (int32_t j = 0; j < idx_len; j++) 00237 { 00238 int32_t jj = idx[j]; 00239 ASSERT(old_jj<jj); 00240 if (jj < 0 || jj >= num_feat) 00241 SG_ERROR( 00242 "Index out of range: should be 0<%d<%d\n", jj, num_feat); 00243 00244 dst[j] = src[jj]; 00245 old_jj = jj; 00246 } 00247 } 00248 } 00249 00250 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::get_feature_matrix() 00251 { 00252 if (!m_subset_stack->has_subsets()) 00253 return feature_matrix; 00254 00255 SGMatrix<ST> submatrix(num_features, get_num_vectors()); 00256 00257 /* copy a subset vector wise */ 00258 for (int32_t i=0; i<submatrix.num_cols; ++i) 00259 { 00260 int32_t real_i = m_subset_stack->subset_idx_conversion(i); 00261 memcpy(&submatrix.matrix[i*int64_t(num_features)], 00262 &feature_matrix.matrix[real_i * int64_t(num_features)], 00263 num_features * sizeof(ST)); 00264 } 00265 00266 return submatrix; 00267 } 00268 00269 template<class ST> SGMatrix<ST> CDenseFeatures<ST>::steal_feature_matrix() 00270 { 00271 SGMatrix<ST> st_feature_matrix=feature_matrix; 00272 m_subset_stack->remove_all_subsets(); 00273 SG_UNREF(feature_cache); 00274 clean_preprocessors(); 00275 free_feature_matrix(); 00276 return st_feature_matrix; 00277 } 00278 00279 template<class ST> void CDenseFeatures<ST>::set_feature_matrix(SGMatrix<ST> matrix) 00280 { 00281 m_subset_stack->remove_all_subsets(); 00282 free_feature_matrix(); 00283 feature_matrix = matrix; 00284 num_features = matrix.num_rows; 00285 num_vectors = matrix.num_cols; 00286 } 00287 00288 template<class ST> ST* CDenseFeatures<ST>::get_feature_matrix(int32_t &num_feat, int32_t &num_vec) 00289 { 00290 num_feat = num_features; 00291 num_vec = num_vectors; 00292 return feature_matrix.matrix; 00293 } 00294 00295 template<class ST> CDenseFeatures<ST>* CDenseFeatures<ST>::get_transposed() 00296 { 00297 int32_t num_feat; 00298 int32_t num_vec; 00299 ST* fm = get_transposed(num_feat, num_vec); 00300 00301 return new CDenseFeatures<ST>(fm, num_feat, num_vec); 00302 } 00303 00304 template<class ST> ST* CDenseFeatures<ST>::get_transposed(int32_t &num_feat, int32_t &num_vec) 00305 { 00306 num_feat = get_num_vectors(); 00307 num_vec = num_features; 00308 00309 int32_t old_num_vec=get_num_vectors(); 00310 00311 ST* fm = SG_MALLOC(ST, int64_t(num_feat) * num_vec); 00312 00313 for (int32_t i=0; i<old_num_vec; i++) 00314 { 00315 SGVector<ST> vec=get_feature_vector(i); 00316 00317 for (int32_t j=0; j<vec.vlen; j++) 00318 fm[j*int64_t(old_num_vec)+i]=vec.vector[j]; 00319 00320 free_feature_vector(vec, i); 00321 } 00322 00323 return fm; 00324 } 00325 00326 template<class ST> void CDenseFeatures<ST>::copy_feature_matrix(SGMatrix<ST> src) 00327 { 00328 if (m_subset_stack->has_subsets()) 00329 SG_ERROR("A subset is set, cannot call copy_feature_matrix\n"); 00330 00331 free_feature_matrix(); 00332 feature_matrix = src.clone(); 00333 num_features = src.num_rows; 00334 num_vectors = src.num_cols; 00335 initialize_cache(); 00336 } 00337 00338 template<class ST> void CDenseFeatures<ST>::obtain_from_dot(CDotFeatures* df) 00339 { 00340 m_subset_stack->remove_all_subsets(); 00341 00342 int32_t num_feat = df->get_dim_feature_space(); 00343 int32_t num_vec = df->get_num_vectors(); 00344 00345 ASSERT(num_feat>0 && num_vec>0); 00346 00347 free_feature_matrix(); 00348 feature_matrix = SGMatrix<ST>(num_feat, num_vec); 00349 00350 for (int32_t i = 0; i < num_vec; i++) 00351 { 00352 SGVector<float64_t> v = df->get_computed_dot_feature_vector(i); 00353 ASSERT(num_feat==v.vlen); 00354 00355 for (int32_t j = 0; j < num_feat; j++) 00356 feature_matrix.matrix[i * int64_t(num_feat) + j] = (ST) v.vector[j]; 00357 } 00358 num_features = num_feat; 00359 num_vectors = num_vec; 00360 } 00361 00362 template<class ST> bool CDenseFeatures<ST>::apply_preprocessor(bool force_preprocessing) 00363 { 00364 if (m_subset_stack->has_subsets()) 00365 SG_ERROR("A subset is set, cannot call apply_preproc\n"); 00366 00367 SG_DEBUG( "force: %d\n", force_preprocessing); 00368 00369 if (feature_matrix.matrix && get_num_preprocessors()) 00370 { 00371 for (int32_t i = 0; i < get_num_preprocessors(); i++) 00372 { 00373 if ((!is_preprocessed(i) || force_preprocessing)) 00374 { 00375 set_preprocessed(i); 00376 CDensePreprocessor<ST>* p = 00377 (CDensePreprocessor<ST>*) get_preprocessor(i); 00378 SG_INFO( "preprocessing using preproc %s\n", p->get_name()); 00379 00380 if (p->apply_to_feature_matrix(this).matrix == NULL) 00381 { 00382 SG_UNREF(p); 00383 return false; 00384 } 00385 SG_UNREF(p); 00386 00387 } 00388 } 00389 00390 return true; 00391 } 00392 else 00393 { 00394 if (!feature_matrix.matrix) 00395 SG_ERROR( "no feature matrix\n"); 00396 00397 if (!get_num_preprocessors()) 00398 SG_ERROR( "no preprocessors available\n"); 00399 00400 return false; 00401 } 00402 } 00403 00404 template<class ST> int32_t CDenseFeatures<ST>::get_size() const { return sizeof(ST); } 00405 00406 template<class ST> int32_t CDenseFeatures<ST>::get_num_vectors() const 00407 { 00408 return m_subset_stack->has_subsets() ? m_subset_stack->get_size() : num_vectors; 00409 } 00410 00411 template<class ST> int32_t CDenseFeatures<ST>::get_num_features() { return num_features; } 00412 00413 template<class ST> void CDenseFeatures<ST>::set_num_features(int32_t num) 00414 { 00415 num_features = num; 00416 initialize_cache(); 00417 } 00418 00419 template<class ST> void CDenseFeatures<ST>::set_num_vectors(int32_t num) 00420 { 00421 if (m_subset_stack->has_subsets()) 00422 SG_ERROR("A subset is set, cannot call set_num_vectors\n"); 00423 00424 num_vectors = num; 00425 initialize_cache(); 00426 } 00427 00428 template<class ST> void CDenseFeatures<ST>::initialize_cache() 00429 { 00430 if (m_subset_stack->has_subsets()) 00431 SG_ERROR("A subset is set, cannot call initialize_cache\n"); 00432 00433 if (num_features && num_vectors) 00434 { 00435 SG_UNREF(feature_cache); 00436 feature_cache = new CCache<ST>(get_cache_size(), num_features, 00437 num_vectors); 00438 SG_REF(feature_cache); 00439 } 00440 } 00441 00442 template<class ST> EFeatureClass CDenseFeatures<ST>::get_feature_class() const { return C_DENSE; } 00443 00444 template<class ST> bool CDenseFeatures<ST>::reshape(int32_t p_num_features, int32_t p_num_vectors) 00445 { 00446 if (m_subset_stack->has_subsets()) 00447 SG_ERROR("A subset is set, cannot call reshape\n"); 00448 00449 if (p_num_features * p_num_vectors 00450 == this->num_features * this->num_vectors) 00451 { 00452 num_features = p_num_features; 00453 num_vectors = p_num_vectors; 00454 return true; 00455 } else 00456 return false; 00457 } 00458 00459 template<class ST> int32_t CDenseFeatures<ST>::get_dim_feature_space() const { return num_features; } 00460 00461 template<class ST> float64_t CDenseFeatures<ST>::dot(int32_t vec_idx1, CDotFeatures* df, 00462 int32_t vec_idx2) 00463 { 00464 ASSERT(df); 00465 ASSERT(df->get_feature_type() == get_feature_type()); 00466 ASSERT(df->get_feature_class() == get_feature_class()); 00467 CDenseFeatures<ST>* sf = (CDenseFeatures<ST>*) df; 00468 00469 int32_t len1, len2; 00470 bool free1, free2; 00471 00472 ST* vec1 = get_feature_vector(vec_idx1, len1, free1); 00473 ST* vec2 = sf->get_feature_vector(vec_idx2, len2, free2); 00474 00475 float64_t result = SGVector<ST>::dot(vec1, vec2, len1); 00476 00477 free_feature_vector(vec1, vec_idx1, free1); 00478 sf->free_feature_vector(vec2, vec_idx2, free2); 00479 00480 return result; 00481 } 00482 00483 template<class ST> void CDenseFeatures<ST>::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, 00484 float64_t* vec2, int32_t vec2_len, bool abs_val) 00485 { 00486 ASSERT(vec2_len == num_features); 00487 00488 int32_t vlen; 00489 bool vfree; 00490 ST* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00491 00492 ASSERT(vlen == num_features); 00493 00494 if (abs_val) 00495 { 00496 for (int32_t i = 0; i < num_features; i++) 00497 vec2[i] += alpha * CMath::abs(vec1[i]); 00498 } 00499 else 00500 { 00501 for (int32_t i = 0; i < num_features; i++) 00502 vec2[i] += alpha * vec1[i]; 00503 } 00504 00505 free_feature_vector(vec1, vec_idx1, vfree); 00506 } 00507 00508 template<class ST> int32_t CDenseFeatures<ST>::get_nnz_features_for_vector(int32_t num) 00509 { 00510 return num_features; 00511 } 00512 00513 template<class ST> void* CDenseFeatures<ST>::get_feature_iterator(int32_t vector_index) 00514 { 00515 if (vector_index>=get_num_vectors()) 00516 { 00517 SG_ERROR("Index out of bounds (number of vectors %d, you " 00518 "requested %d)\n", get_num_vectors(), vector_index); 00519 } 00520 00521 dense_feature_iterator* iterator = SG_MALLOC(dense_feature_iterator, 1); 00522 iterator->vec = get_feature_vector(vector_index, iterator->vlen, 00523 iterator->vfree); 00524 iterator->vidx = vector_index; 00525 iterator->index = 0; 00526 return iterator; 00527 } 00528 00529 template<class ST> bool CDenseFeatures<ST>::get_next_feature(int32_t& index, float64_t& value, 00530 void* iterator) 00531 { 00532 dense_feature_iterator* it = (dense_feature_iterator*) iterator; 00533 if (!it || it->index >= it->vlen) 00534 return false; 00535 00536 index = it->index++; 00537 value = (float64_t) it->vec[index]; 00538 00539 return true; 00540 } 00541 00542 template<class ST> void CDenseFeatures<ST>::free_feature_iterator(void* iterator) 00543 { 00544 if (!iterator) 00545 return; 00546 00547 dense_feature_iterator* it = (dense_feature_iterator*) iterator; 00548 free_feature_vector(it->vec, it->vidx, it->vfree); 00549 SG_FREE(it); 00550 } 00551 00552 template<class ST> CFeatures* CDenseFeatures<ST>::copy_subset(SGVector<index_t> indices) 00553 { 00554 SGMatrix<ST> feature_matrix_copy(num_features, indices.vlen); 00555 00556 for (index_t i=0; i<indices.vlen; ++i) 00557 { 00558 index_t real_idx=m_subset_stack->subset_idx_conversion(indices.vector[i]); 00559 memcpy(&feature_matrix_copy.matrix[i*num_features], 00560 &feature_matrix.matrix[real_idx*num_features], 00561 num_features*sizeof(ST)); 00562 } 00563 00564 CFeatures* result=new CDenseFeatures(feature_matrix_copy); 00565 SG_REF(result); 00566 return result; 00567 } 00568 00569 template<class ST> ST* CDenseFeatures<ST>::compute_feature_vector(int32_t num, int32_t& len, 00570 ST* target) 00571 { 00572 SG_NOTIMPLEMENTED; 00573 len = 0; 00574 return NULL; 00575 } 00576 00577 template<class ST> void CDenseFeatures<ST>::init() 00578 { 00579 num_vectors = 0; 00580 num_features = 0; 00581 00582 feature_matrix = SGMatrix<ST>(); 00583 feature_cache = NULL; 00584 00585 set_generic<ST>(); 00586 00587 /* not store number of vectors in subset */ 00588 SG_ADD(&num_vectors, "num_vectors", "Number of vectors.", MS_NOT_AVAILABLE); 00589 SG_ADD(&num_features, "num_features", "Number of features.", MS_NOT_AVAILABLE); 00590 SG_ADD(&feature_matrix, "feature_matrix", 00591 "Matrix of feature vectors / 1 vector per column.", MS_NOT_AVAILABLE); 00592 } 00593 00594 #define GET_FEATURE_TYPE(f_type, sg_type) \ 00595 template<> EFeatureType CDenseFeatures<sg_type>::get_feature_type() const \ 00596 { \ 00597 return f_type; \ 00598 } 00599 00600 GET_FEATURE_TYPE(F_BOOL, bool) 00601 GET_FEATURE_TYPE(F_CHAR, char) 00602 GET_FEATURE_TYPE(F_BYTE, uint8_t) 00603 GET_FEATURE_TYPE(F_BYTE, int8_t) 00604 GET_FEATURE_TYPE(F_SHORT, int16_t) 00605 GET_FEATURE_TYPE(F_WORD, uint16_t) 00606 GET_FEATURE_TYPE(F_INT, int32_t) 00607 GET_FEATURE_TYPE(F_UINT, uint32_t) 00608 GET_FEATURE_TYPE(F_LONG, int64_t) 00609 GET_FEATURE_TYPE(F_ULONG, uint64_t) 00610 GET_FEATURE_TYPE(F_SHORTREAL, float32_t) 00611 GET_FEATURE_TYPE(F_DREAL, float64_t) 00612 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t) 00613 #undef GET_FEATURE_TYPE 00614 00615 template<> float64_t CDenseFeatures<bool>::dense_dot(int32_t vec_idx1, 00616 float64_t* vec2, int32_t vec2_len) 00617 { 00618 ASSERT(vec2_len == num_features); 00619 00620 int32_t vlen; 00621 bool vfree; 00622 bool* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00623 00624 ASSERT(vlen == num_features); 00625 float64_t result = 0; 00626 00627 for (int32_t i = 0; i < num_features; i++) 00628 result += vec1[i] ? vec2[i] : 0; 00629 00630 free_feature_vector(vec1, vec_idx1, vfree); 00631 00632 return result; 00633 } 00634 00635 template<> float64_t CDenseFeatures<char>::dense_dot(int32_t vec_idx1, 00636 float64_t* vec2, int32_t vec2_len) 00637 { 00638 ASSERT(vec2_len == num_features); 00639 00640 int32_t vlen; 00641 bool vfree; 00642 char* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00643 00644 ASSERT(vlen == num_features); 00645 float64_t result = 0; 00646 00647 for (int32_t i = 0; i < num_features; i++) 00648 result += vec1[i] * vec2[i]; 00649 00650 free_feature_vector(vec1, vec_idx1, vfree); 00651 00652 return result; 00653 } 00654 00655 template<> float64_t CDenseFeatures<int8_t>::dense_dot(int32_t vec_idx1, 00656 float64_t* vec2, int32_t vec2_len) 00657 { 00658 ASSERT(vec2_len == num_features); 00659 00660 int32_t vlen; 00661 bool vfree; 00662 int8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00663 00664 ASSERT(vlen == num_features); 00665 float64_t result = 0; 00666 00667 for (int32_t i = 0; i < num_features; i++) 00668 result += vec1[i] * vec2[i]; 00669 00670 free_feature_vector(vec1, vec_idx1, vfree); 00671 00672 return result; 00673 } 00674 00675 template<> float64_t CDenseFeatures<uint8_t>::dense_dot( 00676 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00677 { 00678 ASSERT(vec2_len == num_features); 00679 00680 int32_t vlen; 00681 bool vfree; 00682 uint8_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00683 00684 ASSERT(vlen == num_features); 00685 float64_t result = 0; 00686 00687 for (int32_t i = 0; i < num_features; i++) 00688 result += vec1[i] * vec2[i]; 00689 00690 free_feature_vector(vec1, vec_idx1, vfree); 00691 00692 return result; 00693 } 00694 00695 template<> float64_t CDenseFeatures<int16_t>::dense_dot( 00696 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00697 { 00698 ASSERT(vec2_len == num_features); 00699 00700 int32_t vlen; 00701 bool vfree; 00702 int16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00703 00704 ASSERT(vlen == num_features); 00705 float64_t result = 0; 00706 00707 for (int32_t i = 0; i < num_features; i++) 00708 result += vec1[i] * vec2[i]; 00709 00710 free_feature_vector(vec1, vec_idx1, vfree); 00711 00712 return result; 00713 } 00714 00715 template<> float64_t CDenseFeatures<uint16_t>::dense_dot( 00716 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00717 { 00718 ASSERT(vec2_len == num_features); 00719 00720 int32_t vlen; 00721 bool vfree; 00722 uint16_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00723 00724 ASSERT(vlen == num_features); 00725 float64_t result = 0; 00726 00727 for (int32_t i = 0; i < num_features; i++) 00728 result += vec1[i] * vec2[i]; 00729 00730 free_feature_vector(vec1, vec_idx1, vfree); 00731 00732 return result; 00733 } 00734 00735 template<> float64_t CDenseFeatures<int32_t>::dense_dot( 00736 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00737 { 00738 ASSERT(vec2_len == num_features); 00739 00740 int32_t vlen; 00741 bool vfree; 00742 int32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00743 00744 ASSERT(vlen == num_features); 00745 float64_t result = 0; 00746 00747 for (int32_t i = 0; i < num_features; i++) 00748 result += vec1[i] * vec2[i]; 00749 00750 free_feature_vector(vec1, vec_idx1, vfree); 00751 00752 return result; 00753 } 00754 00755 template<> float64_t CDenseFeatures<uint32_t>::dense_dot( 00756 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00757 { 00758 ASSERT(vec2_len == num_features); 00759 00760 int32_t vlen; 00761 bool vfree; 00762 uint32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00763 00764 ASSERT(vlen == num_features); 00765 float64_t result = 0; 00766 00767 for (int32_t i = 0; i < num_features; i++) 00768 result += vec1[i] * vec2[i]; 00769 00770 free_feature_vector(vec1, vec_idx1, vfree); 00771 00772 return result; 00773 } 00774 00775 template<> float64_t CDenseFeatures<int64_t>::dense_dot( 00776 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00777 { 00778 ASSERT(vec2_len == num_features); 00779 00780 int32_t vlen; 00781 bool vfree; 00782 int64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00783 00784 ASSERT(vlen == num_features); 00785 float64_t result = 0; 00786 00787 for (int32_t i = 0; i < num_features; i++) 00788 result += vec1[i] * vec2[i]; 00789 00790 free_feature_vector(vec1, vec_idx1, vfree); 00791 00792 return result; 00793 } 00794 00795 template<> float64_t CDenseFeatures<uint64_t>::dense_dot( 00796 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00797 { 00798 ASSERT(vec2_len == num_features); 00799 00800 int32_t vlen; 00801 bool vfree; 00802 uint64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00803 00804 ASSERT(vlen == num_features); 00805 float64_t result = 0; 00806 00807 for (int32_t i = 0; i < num_features; i++) 00808 result += vec1[i] * vec2[i]; 00809 00810 free_feature_vector(vec1, vec_idx1, vfree); 00811 00812 return result; 00813 } 00814 00815 template<> float64_t CDenseFeatures<float32_t>::dense_dot( 00816 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00817 { 00818 ASSERT(vec2_len == num_features); 00819 00820 int32_t vlen; 00821 bool vfree; 00822 float32_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00823 00824 ASSERT(vlen == num_features); 00825 float64_t result = 0; 00826 00827 for (int32_t i = 0; i < num_features; i++) 00828 result += vec1[i] * vec2[i]; 00829 00830 free_feature_vector(vec1, vec_idx1, vfree); 00831 00832 return result; 00833 } 00834 00835 template<> float64_t CDenseFeatures<float64_t>::dense_dot( 00836 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00837 { 00838 ASSERT(vec2_len == num_features); 00839 00840 int32_t vlen; 00841 bool vfree; 00842 float64_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00843 00844 ASSERT(vlen == num_features); 00845 float64_t result = SGVector<float64_t>::dot(vec1, vec2, num_features); 00846 00847 free_feature_vector(vec1, vec_idx1, vfree); 00848 00849 return result; 00850 } 00851 00852 template<> float64_t CDenseFeatures<floatmax_t>::dense_dot( 00853 int32_t vec_idx1, float64_t* vec2, int32_t vec2_len) 00854 { 00855 ASSERT(vec2_len == num_features); 00856 00857 int32_t vlen; 00858 bool vfree; 00859 floatmax_t* vec1 = get_feature_vector(vec_idx1, vlen, vfree); 00860 00861 ASSERT(vlen == num_features); 00862 float64_t result = 0; 00863 00864 for (int32_t i = 0; i < num_features; i++) 00865 result += vec1[i] * vec2[i]; 00866 00867 free_feature_vector(vec1, vec_idx1, vfree); 00868 00869 return result; 00870 } 00871 00872 template<class ST> bool CDenseFeatures<ST>::is_equal(CDenseFeatures* rhs) 00873 { 00874 if ( num_features != rhs->num_features || num_vectors != rhs->num_vectors ) 00875 return false; 00876 00877 ST* vec1; 00878 ST* vec2; 00879 int32_t v1len, v2len; 00880 bool v1free, v2free, stop = false; 00881 00882 for (int32_t i = 0; i < num_vectors; i++) 00883 { 00884 vec1 = get_feature_vector(i, v1len, v1free); 00885 vec2 = rhs->get_feature_vector(i, v2len, v2free); 00886 00887 if (v1len!=v2len) 00888 stop = true; 00889 00890 for (int32_t j=0; j<v1len; j++) 00891 { 00892 if (vec1[j]!=vec2[j]) 00893 stop = true; 00894 } 00895 00896 free_feature_vector(vec1, i, v1free); 00897 free_feature_vector(vec2, i, v2free); 00898 00899 if (stop) 00900 return false; 00901 } 00902 00903 return true; 00904 } 00905 00906 template<class ST> CFeatures* CDenseFeatures<ST>::create_merged_copy( 00907 CFeatures* other) 00908 { 00909 SG_DEBUG("entering %s::create_merged_copy()\n", get_name()); 00910 if (get_feature_type()!=other->get_feature_type() || 00911 get_feature_class()!=other->get_feature_class() || 00912 strcmp(get_name(), other->get_name())) 00913 { 00914 SG_ERROR("%s::create_merged_copy(): Features are of different type!\n", 00915 get_name()); 00916 } 00917 00918 CDenseFeatures<ST>* casted=dynamic_cast<CDenseFeatures<ST>* >(other); 00919 00920 if (!casted) 00921 { 00922 SG_ERROR("%s::create_merged_copy(): Could not cast object of %s to " 00923 "same type as %s\n",get_name(), other->get_name(), get_name()); 00924 } 00925 00926 if (num_features!=casted->num_features) 00927 { 00928 SG_ERROR("%s::create_merged_copy(): Provided feature object has " 00929 "different dimension than this one\n"); 00930 } 00931 00932 /* create new feature matrix and copy both instances data into it */ 00933 SGMatrix<ST> data(num_features, num_vectors+casted->get_num_vectors()); 00934 00935 /* copy data of this instance */ 00936 SG_DEBUG("copying matrix of this instance\n"); 00937 memcpy(data.matrix, feature_matrix.matrix, 00938 num_features*num_vectors*sizeof(ST)); 00939 00940 /* copy data of provided instance */ 00941 SG_DEBUG("copying matrix of provided instance\n"); 00942 memcpy(&data.matrix[num_vectors*num_features], 00943 casted->feature_matrix.matrix, 00944 casted->num_features*casted->num_vectors*sizeof(ST)); 00945 00946 /* create new instance and return */ 00947 CDenseFeatures<ST>* result=new CDenseFeatures<ST>(data); 00948 00949 SG_DEBUG("leaving %s::create_merged_copy()\n", get_name()); 00950 return result; 00951 } 00952 00953 #define LOAD(f_load, sg_type) \ 00954 template<> void CDenseFeatures<sg_type>::load(CFile* loader) \ 00955 { \ 00956 SG_SET_LOCALE_C; \ 00957 ASSERT(loader); \ 00958 sg_type* matrix; \ 00959 int32_t num_feat; \ 00960 int32_t num_vec; \ 00961 loader->f_load(matrix, num_feat, num_vec); \ 00962 set_feature_matrix(SGMatrix<sg_type>(matrix, num_feat, num_vec)); \ 00963 SG_RESET_LOCALE; \ 00964 } 00965 00966 LOAD(get_matrix, bool) 00967 LOAD(get_matrix, char) 00968 LOAD(get_int8_matrix, int8_t) 00969 LOAD(get_matrix, uint8_t) 00970 LOAD(get_matrix, int16_t) 00971 LOAD(get_matrix, uint16_t) 00972 LOAD(get_matrix, int32_t) 00973 LOAD(get_uint_matrix, uint32_t) 00974 LOAD(get_long_matrix, int64_t) 00975 LOAD(get_ulong_matrix, uint64_t) 00976 LOAD(get_matrix, float32_t) 00977 LOAD(get_matrix, float64_t) 00978 LOAD(get_longreal_matrix, floatmax_t) 00979 #undef LOAD 00980 00981 #define SAVE(f_write, sg_type) \ 00982 template<> void CDenseFeatures<sg_type>::save(CFile* writer) \ 00983 { \ 00984 SG_SET_LOCALE_C; \ 00985 ASSERT(writer); \ 00986 writer->f_write(feature_matrix.matrix, feature_matrix.num_rows, \ 00987 feature_matrix.num_cols); \ 00988 SG_RESET_LOCALE; \ 00989 } 00990 00991 SAVE(set_matrix, bool) 00992 SAVE(set_matrix, char) 00993 SAVE(set_int8_matrix, int8_t) 00994 SAVE(set_matrix, uint8_t) 00995 SAVE(set_matrix, int16_t) 00996 SAVE(set_matrix, uint16_t) 00997 SAVE(set_matrix, int32_t) 00998 SAVE(set_uint_matrix, uint32_t) 00999 SAVE(set_long_matrix, int64_t) 01000 SAVE(set_ulong_matrix, uint64_t) 01001 SAVE(set_matrix, float32_t) 01002 SAVE(set_matrix, float64_t) 01003 SAVE(set_longreal_matrix, floatmax_t) 01004 #undef SAVE 01005 01006 template class CDenseFeatures<bool>; 01007 template class CDenseFeatures<char>; 01008 template class CDenseFeatures<int8_t>; 01009 template class CDenseFeatures<uint8_t>; 01010 template class CDenseFeatures<int16_t>; 01011 template class CDenseFeatures<uint16_t>; 01012 template class CDenseFeatures<int32_t>; 01013 template class CDenseFeatures<uint32_t>; 01014 template class CDenseFeatures<int64_t>; 01015 template class CDenseFeatures<uint64_t>; 01016 template class CDenseFeatures<float32_t>; 01017 template class CDenseFeatures<float64_t>; 01018 template class CDenseFeatures<floatmax_t>; 01019 }