SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Christian Widmer 00008 * Copyright (C) 2010 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELPLIFNORMALIZER_H___ 00012 #define _MULTITASKKERNELPLIFNORMALIZER_H___ 00013 00014 #include <shogun/transfer/multitask/MultitaskKernelMklNormalizer.h> 00015 #include <shogun/kernel/Kernel.h> 00016 #include <algorithm> 00017 #include <vector> 00018 00019 00020 namespace shogun 00021 { 00025 class CMultitaskKernelPlifNormalizer: public CMultitaskKernelMklNormalizer 00026 { 00027 00028 public: 00030 CMultitaskKernelPlifNormalizer() : CMultitaskKernelMklNormalizer() 00031 { 00032 num_tasks = 0; 00033 num_tasksqr = 0; 00034 num_betas = 0; 00035 } 00036 00039 CMultitaskKernelPlifNormalizer(std::vector<float64_t> support_, std::vector<int32_t> task_vector) 00040 : CMultitaskKernelMklNormalizer() 00041 { 00042 00043 num_betas = static_cast<int>(support_.size()); 00044 00045 support = support_; 00046 00047 // init support points values with constant function 00048 betas = std::vector<float64_t>(num_betas); 00049 for (int i=0; i!=num_betas; i++) 00050 { 00051 betas[i] = 1; 00052 } 00053 00054 num_tasks = get_num_unique_tasks(task_vector); 00055 num_tasksqr = num_tasks * num_tasks; 00056 00057 // set both sides equally 00058 set_task_vector(task_vector); 00059 00060 // init distance matrix 00061 distance_matrix = std::vector<float64_t>(num_tasksqr); 00062 00063 // init similarity matrix 00064 similarity_matrix = std::vector<float64_t>(num_tasksqr); 00065 00066 } 00067 00068 00074 inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, 00075 int32_t idx_rhs) 00076 { 00077 00078 //lookup tasks 00079 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00080 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00081 00082 //lookup similarity 00083 float64_t task_similarity = get_task_similarity(task_idx_lhs, 00084 task_idx_rhs); 00085 00086 //take task similarity into account 00087 float64_t similarity = (value/scale) * task_similarity; 00088 00089 00090 return similarity; 00091 00092 } 00093 00099 int32_t get_num_unique_tasks(std::vector<int32_t> vec) { 00100 00101 //sort 00102 std::sort(vec.begin(), vec.end()); 00103 00104 //reorder tasks with unique prefix 00105 std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end()); 00106 00107 //count unique tasks 00108 int32_t num_vec = std::distance(vec.begin(), endLocation); 00109 00110 return num_vec; 00111 00112 } 00113 00114 00116 virtual ~CMultitaskKernelPlifNormalizer() 00117 { 00118 } 00119 00120 00122 void update_cache() 00123 { 00124 00125 00126 for (int32_t i=0; i!=num_tasks; i++) 00127 { 00128 for (int32_t j=0; j!=num_tasks; j++) 00129 { 00130 00131 float64_t similarity = compute_task_similarity(i, j); 00132 set_task_similarity(i,j,similarity); 00133 00134 } 00135 00136 } 00137 } 00138 00139 00141 float64_t compute_task_similarity(int32_t task_a, int32_t task_b) 00142 { 00143 00144 float64_t distance = get_task_distance(task_a, task_b); 00145 float64_t similarity = -1; 00146 00147 int32_t upper_bound_idx = -1; 00148 00149 00150 // determine interval 00151 for (int i=1; i!=num_betas; i++) 00152 { 00153 if (distance <= support[i]) 00154 { 00155 upper_bound_idx = i; 00156 break; 00157 } 00158 } 00159 00160 // perform interpolation (constant for beyond upper bound) 00161 if (upper_bound_idx == -1) 00162 { 00163 00164 similarity = betas[num_betas-1]; 00165 00166 } else { 00167 00168 int32_t lower_bound_idx = upper_bound_idx - 1; 00169 float64_t interval_size = support[upper_bound_idx] - support[lower_bound_idx]; 00170 00171 float64_t factor_lower = 1 - (distance - support[lower_bound_idx]) / interval_size; 00172 float64_t factor_upper = 1 - factor_lower; 00173 00174 similarity = factor_lower*betas[lower_bound_idx] + factor_upper*betas[upper_bound_idx]; 00175 00176 } 00177 00178 return similarity; 00179 00180 } 00181 00182 00183 public: 00184 00186 virtual std::vector<int32_t> get_task_vector_lhs() const 00187 { 00188 return task_vector_lhs; 00189 } 00190 00192 virtual void set_task_vector_lhs(std::vector<int32_t> vec) 00193 { 00194 task_vector_lhs = vec; 00195 } 00196 00198 virtual std::vector<int32_t> get_task_vector_rhs() const 00199 { 00200 return task_vector_rhs; 00201 } 00202 00204 virtual void set_task_vector_rhs(std::vector<int32_t> vec) 00205 { 00206 task_vector_rhs = vec; 00207 } 00208 00210 virtual void set_task_vector(std::vector<int32_t> vec) 00211 { 00212 task_vector_lhs = vec; 00213 task_vector_rhs = vec; 00214 } 00215 00221 float64_t get_task_distance(int32_t task_lhs, int32_t task_rhs) 00222 { 00223 00224 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00225 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00226 00227 return distance_matrix[task_lhs * num_tasks + task_rhs]; 00228 00229 } 00230 00236 void set_task_distance(int32_t task_lhs, int32_t task_rhs, 00237 float64_t distance) 00238 { 00239 00240 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00241 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00242 00243 distance_matrix[task_lhs * num_tasks + task_rhs] = distance; 00244 00245 } 00246 00252 float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs) 00253 { 00254 00255 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00256 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00257 00258 return similarity_matrix[task_lhs * num_tasks + task_rhs]; 00259 00260 } 00261 00267 void set_task_similarity(int32_t task_lhs, int32_t task_rhs, 00268 float64_t similarity) 00269 { 00270 00271 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00272 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00273 00274 similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity; 00275 00276 } 00277 00281 float64_t get_beta(int32_t idx) 00282 { 00283 00284 return betas[idx]; 00285 00286 } 00287 00292 void set_beta(int32_t idx, float64_t weight) 00293 { 00294 00295 betas[idx] = weight; 00296 00297 update_cache(); 00298 00299 } 00300 00304 int32_t get_num_betas() 00305 { 00306 00307 return num_betas; 00308 00309 } 00310 00311 00313 inline virtual const char* get_name() const 00314 { 00315 return "MultitaskKernelPlifNormalizer"; 00316 } 00317 00321 CMultitaskKernelPlifNormalizer* KernelNormalizerToMultitaskKernelPlifNormalizer(CKernelNormalizer* n) 00322 { 00323 return dynamic_cast<shogun::CMultitaskKernelPlifNormalizer*>(n); 00324 } 00325 00326 protected: 00329 virtual void register_params() 00330 { 00331 SG_ADD(&num_tasks, "num_tasks", "the number of tasks", MS_NOT_AVAILABLE); 00332 SG_ADD(&num_betas, "num_betas", "the number of weights", MS_NOT_AVAILABLE); 00333 m_parameters->add_vector((SGString<float64_t>**)&distance_matrix, &num_tasksqr, "distance_matrix", "distance between tasks"); 00334 m_parameters->add_vector((SGString<float64_t>**)&similarity_matrix, &num_tasksqr, "similarity_matrix", "similarity between tasks"); 00335 m_parameters->add_vector((SGString<float64_t>**)&betas, &num_betas, "num_betas", "weights"); 00336 m_parameters->add_vector((SGString<float64_t>**)&support, &num_betas, "support", "support points"); 00337 } 00338 00340 int32_t num_tasks; 00342 int32_t num_tasksqr; 00343 00345 std::vector<int32_t> task_vector_lhs; 00346 00348 std::vector<int32_t> task_vector_rhs; 00349 00351 std::vector<float64_t> distance_matrix; 00352 00354 std::vector<float64_t> similarity_matrix; 00355 00357 int32_t num_betas; 00358 00360 std::vector<float64_t> betas; 00361 00363 std::vector<float64_t> support; 00364 00365 }; 00366 } 00367 #endif