SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Christian Widmer 00008 * Copyright (C) 2010 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___ 00012 #define _MULTITASKKERNELTREENORMALIZER_H___ 00013 00014 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h> 00015 #include <shogun/kernel/Kernel.h> 00016 #include <algorithm> 00017 #include <map> 00018 #include <set> 00019 #include <deque> 00020 #include <vector> 00021 00022 namespace shogun 00023 { 00024 00029 class CNode: public CSGObject 00030 { 00031 public: 00034 CNode() 00035 { 00036 parent = NULL; 00037 beta = 1.0; 00038 node_id = 0; 00039 } 00040 00044 std::set<CNode*> get_path_root() 00045 { 00046 std::set<CNode*> nodes_on_path = std::set<CNode*>(); 00047 CNode *node = this; 00048 while (node != NULL) { 00049 nodes_on_path.insert(node); 00050 node = node->parent; 00051 } 00052 return nodes_on_path; 00053 } 00054 00058 std::vector<int32_t> get_task_ids_below() 00059 { 00060 00061 std::vector<int32_t> task_ids; 00062 std::deque<CNode*> grey_nodes; 00063 grey_nodes.push_back(this); 00064 00065 while(grey_nodes.size() > 0) 00066 { 00067 00068 CNode *current_node = grey_nodes.front(); 00069 grey_nodes.pop_front(); 00070 00071 for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){ 00072 grey_nodes.push_back(current_node->children[i]); 00073 } 00074 00075 if(current_node->is_leaf()){ 00076 task_ids.push_back(current_node->getNode_id()); 00077 } 00078 } 00079 00080 return task_ids; 00081 } 00082 00086 void add_child(CNode *node) 00087 { 00088 node->parent = this; 00089 this->children.push_back(node); 00090 } 00091 00093 inline virtual const char *get_name() const 00094 { 00095 return "CNode"; 00096 } 00097 00099 bool is_leaf() 00100 { 00101 return children.empty(); 00102 00103 } 00104 00106 int32_t getNode_id() const 00107 { 00108 return node_id; 00109 } 00110 00112 void setNode_id(int32_t node_idx) 00113 { 00114 this->node_id = node_idx; 00115 } 00116 00118 float64_t beta; 00119 00120 protected: 00121 00123 CNode* parent; 00124 00126 std::vector<CNode*> children; 00127 00129 int32_t node_id; 00130 00131 }; 00132 00133 00138 class CTaxonomy : public CSGObject 00139 { 00140 00141 public: 00142 00145 CTaxonomy() : CSGObject() 00146 { 00147 root = new CNode(); 00148 nodes.push_back(root); 00149 00150 name2id = std::map<std::string, int32_t>(); 00151 name2id["root"] = 0; 00152 } 00153 00158 CNode* get_node(int32_t task_id) { 00159 return nodes[task_id]; 00160 } 00161 00165 void set_root_beta(float64_t beta) 00166 { 00167 nodes[0]->beta = beta; 00168 } 00169 00175 CNode* add_node(std::string parent_name, std::string child_name, float64_t beta) 00176 { 00177 if (child_name=="") SG_ERROR("child_name empty"); 00178 if (parent_name=="") SG_ERROR("parent_name empty"); 00179 00180 00181 CNode* child_node = new CNode(); 00182 00183 child_node->beta = beta; 00184 00185 nodes.push_back(child_node); 00186 int32_t id = nodes.size()-1; 00187 00188 name2id[child_name] = id; 00189 00190 child_node->setNode_id(id); 00191 00192 00193 //create edge 00194 CNode* parent = nodes[name2id[parent_name]]; 00195 00196 parent->add_child(child_node); 00197 00198 return child_node; 00199 } 00200 00205 int32_t get_id(std::string name) { 00206 return name2id[name]; 00207 } 00208 00214 std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs) 00215 { 00216 00217 std::set<CNode*> root_path_lhs = node_lhs->get_path_root(); 00218 std::set<CNode*> root_path_rhs = node_rhs->get_path_root(); 00219 00220 std::set<CNode*> intersection; 00221 00222 std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(), 00223 root_path_rhs.begin(), root_path_rhs.end(), 00224 std::inserter(intersection, intersection.end())); 00225 00226 return intersection; 00227 00228 } 00229 00235 float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs) 00236 { 00237 00238 CNode* node_lhs = get_node(task_lhs); 00239 CNode* node_rhs = get_node(task_rhs); 00240 00241 // compute intersection of paths to root 00242 std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs); 00243 00244 // sum up weights 00245 float64_t gamma = 0; 00246 for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) { 00247 00248 gamma += (*p)->beta; 00249 } 00250 00251 return gamma; 00252 00253 } 00254 00258 void update_task_histogram(std::vector<int32_t> task_vector_lhs) { 00259 00260 //empty map 00261 task_histogram.clear(); 00262 00263 00264 //fill map with zeros 00265 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++) 00266 { 00267 task_histogram[*it] = 0.0; 00268 } 00269 00270 //fill map 00271 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++) 00272 { 00273 task_histogram[*it] += 1.0; 00274 } 00275 00276 //compute fractions 00277 for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++) 00278 { 00279 task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size()); 00280 } 00281 00282 } 00283 00285 int32_t get_num_nodes() 00286 { 00287 return (int32_t)(nodes.size()); 00288 } 00289 00291 int32_t get_num_leaves() 00292 { 00293 int32_t num_leaves = 0; 00294 00295 for (int32_t i=0; i!=get_num_nodes(); i++) 00296 { 00297 if (get_node(i)->is_leaf()==true) 00298 { 00299 num_leaves++; 00300 } 00301 } 00302 00303 return num_leaves; 00304 } 00305 00307 float64_t get_node_weight(int32_t idx) 00308 { 00309 CNode* node = get_node(idx); 00310 return node->beta; 00311 } 00312 00317 void set_node_weight(int32_t idx, float64_t weight) 00318 { 00319 CNode* node = get_node(idx); 00320 node->beta = weight; 00321 } 00322 00324 inline virtual const char* get_name() const 00325 { 00326 return "CTaxonomy"; 00327 } 00328 00330 std::map<std::string, int32_t> get_name2id() { 00331 return name2id; 00332 } 00333 00339 int32_t get_id_by_name(std::string name) 00340 { 00341 return name2id[name]; 00342 } 00343 00344 00345 protected: 00346 00348 CNode* root; 00350 std::map<std::string, int32_t> name2id; 00352 std::vector<CNode*> nodes; 00354 std::map<int32_t, float64_t> task_histogram; 00355 00356 }; 00357 00361 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer 00362 { 00363 00364 public: 00365 00368 CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer() 00369 { 00370 } 00371 00378 CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs, 00379 std::vector<std::string> task_rhs, 00380 CTaxonomy tax) : CMultitaskKernelMklNormalizer() 00381 { 00382 00383 taxonomy = tax; 00384 set_task_vector_lhs(task_lhs); 00385 set_task_vector_rhs(task_rhs); 00386 00387 num_nodes = taxonomy.get_num_nodes(); 00388 00389 dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes); 00390 00391 update_cache(); 00392 } 00393 00394 00396 virtual ~CMultitaskKernelTreeNormalizer() 00397 { 00398 } 00399 00400 00402 void update_cache() 00403 { 00404 00405 00406 for (int32_t i=0; i!=num_nodes; i++) 00407 { 00408 for (int32_t j=0; j!=num_nodes; j++) 00409 { 00410 00411 float64_t similarity = taxonomy.compute_node_similarity(i, j); 00412 set_node_similarity(i,j,similarity); 00413 00414 } 00415 00416 } 00417 } 00418 00419 00420 00426 inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs) 00427 { 00428 //lookup tasks 00429 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00430 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00431 00432 //lookup similarity 00433 float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs); 00434 //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs); 00435 00436 //take task similarity into account 00437 float64_t similarity = (value/scale) * task_similarity; 00438 00439 00440 return similarity; 00441 } 00442 00447 inline virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs) 00448 { 00449 SG_ERROR("normalize_lhs not implemented"); 00450 return 0; 00451 } 00452 00457 inline virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs) 00458 { 00459 SG_ERROR("normalize_rhs not implemented"); 00460 return 0; 00461 } 00462 00463 00465 void set_task_vector_lhs(std::vector<std::string> vec) 00466 { 00467 00468 task_vector_lhs.clear(); 00469 00470 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i) 00471 { 00472 task_vector_lhs.push_back(taxonomy.get_id(vec[i])); 00473 } 00474 00475 //update task histogram 00476 taxonomy.update_task_histogram(task_vector_lhs); 00477 00478 } 00479 00481 void set_task_vector_rhs(std::vector<std::string> vec) 00482 { 00483 00484 task_vector_rhs.clear(); 00485 00486 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i) 00487 { 00488 task_vector_rhs.push_back(taxonomy.get_id(vec[i])); 00489 } 00490 00491 } 00492 00494 void set_task_vector(std::vector<std::string> vec) 00495 { 00496 set_task_vector_lhs(vec); 00497 set_task_vector_rhs(vec); 00498 } 00499 00501 int32_t get_num_betas() 00502 { 00503 00504 return taxonomy.get_num_nodes(); 00505 00506 } 00507 00511 float64_t get_beta(int32_t idx) 00512 { 00513 00514 return taxonomy.get_node_weight(idx); 00515 00516 } 00517 00521 void set_beta(int32_t idx, float64_t weight) 00522 { 00523 00524 taxonomy.set_node_weight(idx, weight); 00525 00526 update_cache(); 00527 00528 } 00529 00530 00536 float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs) 00537 { 00538 00539 ASSERT(node_lhs < num_nodes && node_lhs >= 0); 00540 ASSERT(node_rhs < num_nodes && node_rhs >= 0); 00541 00542 return dependency_matrix[node_lhs * num_nodes + node_rhs]; 00543 00544 } 00545 00551 void set_node_similarity(int32_t node_lhs, int32_t node_rhs, 00552 float64_t similarity) 00553 { 00554 00555 ASSERT(node_lhs < num_nodes && node_lhs >= 0); 00556 ASSERT(node_rhs < num_nodes && node_rhs >= 0); 00557 00558 dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity; 00559 00560 } 00561 00563 inline virtual const char* get_name() const 00564 { 00565 return "MultitaskKernelTreeNormalizer"; 00566 } 00567 00571 CMultitaskKernelTreeNormalizer* KernelNormalizerToMultitaskKernelTreeNormalizer(CKernelNormalizer* n) 00572 { 00573 return dynamic_cast<CMultitaskKernelTreeNormalizer*>(n); 00574 } 00575 00576 protected: 00578 CTaxonomy taxonomy; 00579 00581 int32_t num_nodes; 00582 00584 std::vector<int32_t> task_vector_lhs; 00585 00587 std::vector<int32_t> task_vector_rhs; 00588 00590 std::vector<float64_t> dependency_matrix; 00591 }; 00592 } 00593 #endif