SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Heiko Strathmann 00008 */ 00009 00010 #include <shogun/statistics/LinearTimeMMD.h> 00011 #include <shogun/features/Features.h> 00012 #include <shogun/mathematics/Statistics.h> 00013 #include <shogun/features/CombinedFeatures.h> 00014 #include <shogun/kernel/CombinedKernel.h> 00015 00016 #include <shogun/lib/external/libqp.h> 00017 00018 using namespace shogun; 00019 00020 CLinearTimeMMD::CLinearTimeMMD() : 00021 CKernelTwoSampleTestStatistic() 00022 { 00023 init(); 00024 } 00025 00026 CLinearTimeMMD::CLinearTimeMMD(CKernel* kernel, CFeatures* p_and_q, 00027 index_t q_start) : 00028 CKernelTwoSampleTestStatistic(kernel, p_and_q, q_start) 00029 { 00030 init(); 00031 00032 if (p_and_q && q_start!=p_and_q->get_num_vectors()/2) 00033 { 00034 SG_ERROR("CLinearTimeMMD: Only features with equal number of vectors " 00035 "are currently possible\n"); 00036 } 00037 } 00038 00039 CLinearTimeMMD::CLinearTimeMMD(CKernel* kernel, CFeatures* p, CFeatures* q) : 00040 CKernelTwoSampleTestStatistic(kernel, p, q) 00041 { 00042 init(); 00043 00044 if (p->get_num_vectors()!=q->get_num_vectors()) 00045 { 00046 SG_ERROR("CLinearTimeMMD: Only features with equal number of vectors " 00047 "are currently possible\n"); 00048 } 00049 } 00050 00051 CLinearTimeMMD::~CLinearTimeMMD() 00052 { 00053 00054 } 00055 00056 void CLinearTimeMMD::init() 00057 { 00058 SG_ADD(&m_opt_max_iterations, "opt_max_iterations", "Maximum number of " 00059 "iterations for qp solver", MS_NOT_AVAILABLE); 00060 SG_ADD(&m_opt_epsilon, "opt_epsilon", "Stopping criterion for qp solver", 00061 MS_NOT_AVAILABLE); 00062 SG_ADD(&m_opt_low_cut, "opt_low_cut", "Low cut value for optimization " 00063 "kernel weights", MS_NOT_AVAILABLE); 00064 SG_ADD(&m_opt_regularization_eps, "opt_regularization_eps", "Regularization" 00065 " value that is added to diagonal of Q matrix", MS_NOT_AVAILABLE); 00066 00067 m_opt_max_iterations=10000; 00068 m_opt_epsilon=10E-15; 00069 m_opt_low_cut=10E-7; 00070 m_opt_regularization_eps=0; 00071 } 00072 00073 float64_t CLinearTimeMMD::compute_statistic() 00074 { 00075 SG_DEBUG("entering CLinearTimeMMD::compute_statistic()\n"); 00076 00077 REQUIRE(m_p_and_q, "%s::compute_statistic: features needed!\n", get_name()); 00078 00079 REQUIRE(m_kernel, "%s::compute_statistic: kernel needed!\n", get_name()); 00080 00081 /* m is number of samples from each distribution, m_2 is half of it 00082 * using names from JLMR paper (see class documentation) */ 00083 index_t m=m_q_start; 00084 index_t m_2=m/2; 00085 00086 SG_DEBUG("m_q_start=%d\n", m_q_start); 00087 00088 /* compute traces of kernel matrices for linear MMD */ 00089 m_kernel->init(m_p_and_q, m_p_and_q); 00090 00091 float64_t pp=0; 00092 float64_t qq=0; 00093 float64_t pq=0; 00094 float64_t qp=0; 00095 00096 /* compute traces */ 00097 for (index_t i=0; i<m_2; ++i) 00098 { 00099 pp+=m_kernel->kernel(i, m_2+i); 00100 qq+=m_kernel->kernel(m+i, m+m_2+i); 00101 pq+=m_kernel->kernel(i, m+m_2+i); 00102 qp+=m_kernel->kernel(m_2+i, m+i); 00103 } 00104 00105 SG_DEBUG("returning: 1/%d*(%f+%f-%f-%f)\n", m_2, pp, qq, pq, qp); 00106 00107 /* mean of sum all traces is linear time mmd */ 00108 SG_DEBUG("leaving CLinearTimeMMD::compute_statistic()\n"); 00109 return 1.0/m_2*(pp+qq-pq-qp); 00110 } 00111 00112 float64_t CLinearTimeMMD::compute_p_value(float64_t statistic) 00113 { 00114 float64_t result=0; 00115 00116 switch (m_null_approximation_method) 00117 { 00118 case MMD1_GAUSSIAN: 00119 { 00120 /* compute variance and use to estimate Gaussian distribution */ 00121 float64_t std_dev=CMath::sqrt(compute_variance_estimate()); 00122 result=1.0-CStatistics::normal_cdf(statistic, std_dev); 00123 } 00124 break; 00125 00126 default: 00127 /* bootstrapping is handled here */ 00128 result=CKernelTwoSampleTestStatistic::compute_p_value(statistic); 00129 break; 00130 } 00131 00132 return result; 00133 } 00134 00135 float64_t CLinearTimeMMD::compute_threshold(float64_t alpha) 00136 { 00137 float64_t result=0; 00138 00139 switch (m_null_approximation_method) 00140 { 00141 case MMD1_GAUSSIAN: 00142 { 00143 /* compute variance and use to estimate Gaussian distribution */ 00144 float64_t std_dev=CMath::sqrt(compute_variance_estimate()); 00145 result=1.0-CStatistics::inverse_normal_cdf(1-alpha, 0, std_dev); 00146 } 00147 break; 00148 00149 default: 00150 /* bootstrapping is handled here */ 00151 result=CKernelTwoSampleTestStatistic::compute_threshold(alpha); 00152 break; 00153 } 00154 00155 return result; 00156 } 00157 00158 float64_t CLinearTimeMMD::compute_variance_estimate() 00159 { 00160 REQUIRE(m_p_and_q, "%s::compute_variance_estimate: features needed!\n", 00161 get_name()); 00162 00163 REQUIRE(m_kernel, "%s::compute_variance_estimate: kernel needed!\n", 00164 get_name()); 00165 00166 if (m_p_and_q->get_num_vectors()<1000) 00167 { 00168 SG_WARNING("%s::compute_variance_estimate: The number of samples" 00169 " should be very large (at least 1000) in order to get a" 00170 " good Gaussian approximation using MMD1_GAUSSIAN.\n", 00171 get_name()); 00172 } 00173 00174 /* this corresponds to computing the statistic itself, however, the 00175 * difference is that all terms (of the traces) have to be stored */ 00176 index_t m=m_q_start; 00177 index_t m_2=m/2; 00178 00179 m_kernel->init(m_p_and_q, m_p_and_q); 00180 00181 /* allocate memory for traces */ 00182 SGVector<float64_t> traces(m_2); 00183 00184 /* sum up diagonals of all kernel matrices */ 00185 for (index_t i=0; i<m_2; ++i) 00186 { 00187 /* init for code beauty :) */ 00188 traces[i]=0; 00189 00190 /* p and p */ 00191 traces[i]+=m_kernel->kernel(i, m_2+i); 00192 00193 /* q and q */ 00194 traces[i]+=m_kernel->kernel(m+i, m+m_2+i); 00195 00196 /* p and q */ 00197 traces[i]-=m_kernel->kernel(i, m+m_2+i); 00198 00199 /* q and p */ 00200 traces[i]-=m_kernel->kernel(m_2+i, m+i); 00201 } 00202 00203 /* return linear time variance estimate */ 00204 return CStatistics::variance(traces)/m_2; 00205 } 00206 00207 #ifdef HAVE_LAPACK 00208 void CLinearTimeMMD::optimize_kernel_weights() 00209 { 00212 if (m_kernel->get_kernel_type()!=K_COMBINED) 00213 { 00214 SG_ERROR("CLinearTimeMMD::optimize_kernel_weights(): Only possible " 00215 "with a combined kernel!\n"); 00216 } 00217 00218 if (m_p_and_q->get_feature_class()!=C_COMBINED) 00219 { 00220 SG_ERROR("CLinearTimeMMD::optimize_kernel_weights(): Only possible " 00221 "with combined features!\n"); 00222 } 00223 00224 /* think about casting and types of different kernels/features here */ 00225 CCombinedFeatures* combined_p_and_q= 00226 dynamic_cast<CCombinedFeatures*>(m_p_and_q); 00227 CCombinedKernel* combined_kernel=dynamic_cast<CCombinedKernel*>(m_kernel); 00228 ASSERT(combined_p_and_q); 00229 ASSERT(combined_kernel); 00230 00231 if (combined_kernel->get_num_subkernels()!= 00232 combined_p_and_q->get_num_feature_obj()) 00233 { 00234 SG_ERROR("CLinearTimeMMD::optimize_kernel_weights(): Only possible " 00235 "when number of sub-kernels (%d) equal number of sub-features " 00236 "(%d)\n", combined_kernel->get_num_subkernels(), 00237 combined_p_and_q->get_num_feature_obj()); 00238 } 00239 00240 /* init kernel with features */ 00241 m_kernel->init(m_p_and_q, m_p_and_q); 00242 00243 /* number of kernels and data */ 00244 index_t num_kernels=combined_kernel->get_num_subkernels(); 00245 index_t m2=m_q_start/2; 00246 00247 /* matrix with all h entries for all kernels and data */ 00248 SGMatrix<float64_t> hs(m2, num_kernels); 00249 00250 /* mmds are needed and are means of columns of hs */ 00251 SGVector<float64_t> mmds(num_kernels); 00252 00253 float64_t pp; 00254 float64_t qq; 00255 float64_t pq; 00256 float64_t qp; 00257 /* compute all h entries */ 00258 for (index_t i=0; i<num_kernels; ++i) 00259 { 00260 CKernel* current=combined_kernel->get_kernel(i); 00261 mmds[i]=0; 00262 for (index_t j=0; j<m2; ++j) 00263 { 00264 pp=current->kernel(j, m2+j); 00265 qq=current->kernel(m_q_start+j, m_q_start+m2+j); 00266 pq=current->kernel(j, m_q_start+m2+j); 00267 qp=current->kernel(m2+j, m_q_start+j); 00268 hs(j, i)=pp+qq-pq-qp; 00269 mmds[i]+=hs(j, i); 00270 } 00271 00272 /* mmd is simply mean. This is the unbiased linear time estimate */ 00273 mmds[i]/=m2; 00274 00275 SG_UNREF(current); 00276 } 00277 00278 /* compute covariance matrix of h vector, in place is safe now since h 00279 * is not needed anymore */ 00280 m_Q=CStatistics::covariance_matrix(hs, true); 00281 00282 /* evtl regularize to avoid numerical problems (ratio of MMD and std-dev 00283 * blows up when variance is small */ 00284 if (m_opt_regularization_eps) 00285 { 00286 SG_DEBUG("regularizing matrix Q by adding %f to diagonal\n", 00287 m_opt_regularization_eps); 00288 for (index_t i=0; i<num_kernels; ++i) 00289 m_Q(i,i)+=m_opt_regularization_eps; 00290 } 00291 00292 if (sg_io->get_loglevel()==MSG_DEBUG) 00293 { 00294 m_Q.display_matrix("(evtl. regularized) Q"); 00295 mmds.display_vector("mmds"); 00296 } 00297 00298 /* compute sum of mmds to generate feasible point for convex program */ 00299 float64_t sum_mmds=0; 00300 for (index_t i=0; i<mmds.vlen; ++i) 00301 sum_mmds+=mmds[i]; 00302 00303 /* QP: 0.5*x'*Q*x + f'*x 00304 * subject to 00305 * mmds'*x = b 00306 * LB[i] <= x[i] <= UB[i] for all i=1..n */ 00307 SGVector<float64_t> Q_diag(num_kernels); 00308 SGVector<float64_t> f(num_kernels); 00309 SGVector<float64_t> lb(num_kernels); 00310 SGVector<float64_t> ub(num_kernels); 00311 SGVector<float64_t> x(num_kernels); 00312 00313 /* init everything, there are two cases possible: i) at least one mmd is 00314 * is positive, ii) all mmds are negative */ 00315 bool one_pos; 00316 for (index_t i=0; i<mmds.vlen; ++i) 00317 { 00318 if (mmds[i]>0) 00319 { 00320 SG_DEBUG("found at least one positive MMD\n"); 00321 one_pos=true; 00322 break; 00323 } 00324 one_pos=false; 00325 } 00326 00327 if (!one_pos) 00328 { 00329 SG_WARNING("All mmd estimates are negative. This is techical possible," 00330 " although extremely rare. Current problem might bad\n"); 00331 00332 /* if no element is positive, Q has to be replaced by -Q */ 00333 for (index_t i=0; i<num_kernels*num_kernels; ++i) 00334 m_Q.matrix[i]=-m_Q.matrix[i]; 00335 } 00336 00337 /* init vectors */ 00338 for (index_t i=0; i<num_kernels; ++i) 00339 { 00340 Q_diag[i]=m_Q(i,i); 00341 f[i]=0; 00342 lb[i]=0; 00343 ub[i]=CMath::INFTY; 00344 00345 /* initial point has to be feasible, i.e. mmds'*x = b */ 00346 x[i]=1.0/sum_mmds; 00347 } 00348 00349 /* start libqp solver with desired parameters */ 00350 SG_DEBUG("starting libqp optimization\n"); 00351 libqp_state_T qp_exitflag=libqp_gsmo_solver(&get_Q_col, Q_diag.vector, 00352 f.vector, mmds.vector, 00353 one_pos ? 1 : -1, 00354 lb.vector, ub.vector, 00355 x.vector, num_kernels, m_opt_max_iterations, 00356 m_opt_regularization_eps, &print_state); 00357 00358 SG_DEBUG("libqp returns: nIts=%d, exit_flag: %d\n", qp_exitflag.nIter, 00359 qp_exitflag.exitflag); 00360 00361 /* set really small entries to zero and sum up for normalization */ 00362 float64_t sum_weights=0; 00363 for (index_t i=0; i<x.vlen; ++i) 00364 { 00365 if (x[i]<m_opt_low_cut) 00366 { 00367 SG_DEBUG("lowcut: weight[%i]=%f<%f; setting to zero\n", i, x[i], 00368 m_opt_low_cut); 00369 x[i]=0; 00370 } 00371 00372 sum_weights+=x[i]; 00373 } 00374 00375 /* normalize (allowed since problem is scale invariant) */ 00376 for (index_t i=0; i<x.vlen; ++i) 00377 x[i]/=sum_weights; 00378 00379 /* set weights to kernel */ 00380 m_kernel->set_subkernel_weights(x); 00381 } 00382 00383 SGMatrix<float64_t> CLinearTimeMMD::m_Q=SGMatrix<float64_t>(); 00384 00385 const float64_t* CLinearTimeMMD::get_Q_col(uint32_t i) 00386 { 00387 return &m_Q[m_Q.num_rows*i]; 00388 } 00389 00390 void CLinearTimeMMD::print_state(libqp_state_T state) 00391 { 00392 SG_SDEBUG("libqp state: primal=%f\n", state.QP); 00393 } 00394 00395 #endif //HAVE_LAPACK 00396