SHOGUN
v2.0.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2011-2012 Heiko Strathmann 00008 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society 00009 * 00010 * ALGLIB Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier under GPL2+ 00011 * http://www.alglib.net/ 00012 * See header file for which functions are taken from ALGLIB (with adjustments 00013 * for shogun) 00014 */ 00015 00016 #include <shogun/mathematics/Statistics.h> 00017 #include <shogun/mathematics/Math.h> 00018 #include <shogun/lib/SGMatrix.h> 00019 #include <shogun/lib/SGVector.h> 00020 00021 #ifdef HAVE_LAPACK 00022 #include <shogun/mathematics/lapack.h> 00023 #endif //HAVE_LAPACK 00024 using namespace shogun; 00025 00026 float64_t CStatistics::mean(SGVector<float64_t> values) 00027 { 00028 ASSERT(values.vlen>0); 00029 ASSERT(values.vector); 00030 00031 float64_t sum=0; 00032 for (index_t i=0; i<values.vlen; ++i) 00033 sum+=values.vector[i]; 00034 00035 return sum/values.vlen; 00036 } 00037 00038 float64_t CStatistics::median(SGVector<float64_t> values, bool modify, 00039 bool in_place) 00040 { 00041 float64_t result; 00042 if (modify) 00043 { 00044 /* use QuickSelect method 00045 * This Quickselect routine is based on the algorithm described in 00046 * "Numerical recipes in C", Second Edition, 00047 * Cambridge University Press, 1992, Section 8.5, ISBN 0-521-43108-5 00048 * This code by Nicolas Devillard - 1998. Public domain. 00049 * Adapted to SHOGUN by Heiko Strathmann 00050 */ 00051 int32_t low; 00052 int32_t high; 00053 int32_t median; 00054 int32_t middle; 00055 int32_t l; 00056 int32_t h; 00057 00058 low=0; 00059 high=values.vlen-1; 00060 median=(low+high)/2; 00061 00062 while (true) 00063 { 00064 if (high<=low) 00065 { 00066 result=values[median]; 00067 break; 00068 } 00069 00070 if (high==low+1) 00071 { 00072 if (values[low]>values[high]) 00073 CMath::CMath::swap(values[low], values[high]); 00074 result=values[median]; 00075 break; 00076 } 00077 00078 middle=(low+high)/2; 00079 if (values[middle]>values[high]) 00080 CMath::swap(values[middle], values[high]); 00081 if (values[low]>values[high]) 00082 CMath::swap(values[low], values[high]); 00083 if (values[middle]>values[low]) 00084 CMath::swap(values[middle], values[low]); 00085 00086 CMath::swap(values[middle], values[low+1]); 00087 00088 l=low+1; 00089 h=high; 00090 for (;;) 00091 { 00092 do 00093 l++; 00094 while (values[low]>values[l]); 00095 do 00096 h--; 00097 while (values[h]>values[low]); 00098 if (h<l) 00099 break; 00100 CMath::swap(values[l], values[h]); 00101 } 00102 00103 CMath::swap(values[low], values[h]); 00104 if (h<=median) 00105 low=l; 00106 if (h>=median) 00107 high=h-1; 00108 } 00109 00110 } 00111 else 00112 { 00113 if (in_place) 00114 { 00115 /* use Torben method 00116 * The following code is public domain. 00117 * Algorithm by Torben Mogensen, implementation by N. Devillard. 00118 * This code in public domain. 00119 * Adapted to SHOGUN by Heiko Strathmann 00120 */ 00121 int32_t i; 00122 int32_t less; 00123 int32_t greater; 00124 int32_t equal; 00125 float64_t min; 00126 float64_t max; 00127 float64_t guess; 00128 float64_t maxltguess; 00129 float64_t mingtguess; 00130 min=max=values[0]; 00131 for (i=1; i<values.vlen; i++) 00132 { 00133 if (values[i]<min) 00134 min=values[i]; 00135 if (values[i]>max) 00136 max=values[i]; 00137 } 00138 while (1) 00139 { 00140 guess=(min+max)/2; 00141 less=0; 00142 greater=0; 00143 equal=0; 00144 maxltguess=min; 00145 mingtguess=max; 00146 for (i=0; i<values.vlen; i++) 00147 { 00148 if (values[i]<guess) 00149 { 00150 less++; 00151 if (values[i]>maxltguess) 00152 maxltguess=values[i]; 00153 } 00154 else if (values[i]>guess) 00155 { 00156 greater++; 00157 if (values[i]<mingtguess) 00158 mingtguess=values[i]; 00159 } 00160 else 00161 equal++; 00162 } 00163 if (less<=(values.vlen+1)/2&&greater<=(values.vlen+1)/2) 00164 break; 00165 else if (less>greater) 00166 max=maxltguess; 00167 else 00168 min=mingtguess; 00169 } 00170 00171 if (less>=(values.vlen+1)/2) 00172 result=maxltguess; 00173 else if (less+equal>=(values.vlen+1)/2) 00174 result=guess; 00175 else 00176 result=mingtguess; 00177 } 00178 else 00179 { 00180 /* copy vector and do recursive call which modifies copy */ 00181 SGVector<float64_t> copy(values.vlen); 00182 memcpy(copy.vector, values.vector, sizeof(float64_t)*values.vlen); 00183 result=median(copy, true); 00184 } 00185 } 00186 00187 return result; 00188 } 00189 00190 float64_t CStatistics::matrix_median(SGMatrix<float64_t> values, 00191 bool modify, bool in_place) 00192 { 00193 /* create a vector that uses the matrix data, dont do reference counting */ 00194 SGVector<float64_t> as_vector(values.matrix, 00195 values.num_rows*values.num_cols, false); 00196 00197 /* return vector median method */ 00198 return median(as_vector, modify, in_place); 00199 } 00200 00201 00202 float64_t CStatistics::variance(SGVector<float64_t> values) 00203 { 00204 ASSERT(values.vlen>1); 00205 ASSERT(values.vector); 00206 00207 float64_t mean=CStatistics::mean(values); 00208 00209 float64_t sum_squared_diff=0; 00210 for (index_t i=0; i<values.vlen; ++i) 00211 sum_squared_diff+=CMath::pow(values.vector[i]-mean, 2); 00212 00213 return sum_squared_diff/(values.vlen-1); 00214 } 00215 00216 SGVector<float64_t> CStatistics::matrix_mean(SGMatrix<float64_t> values, 00217 bool col_wise) 00218 { 00219 ASSERT(values.num_rows>0); 00220 ASSERT(values.num_cols>0); 00221 ASSERT(values.matrix); 00222 00223 SGVector<float64_t> result; 00224 00225 if (col_wise) 00226 { 00227 result=SGVector<float64_t>(values.num_cols); 00228 for (index_t j=0; j<values.num_cols; ++j) 00229 { 00230 result[j]=0; 00231 for (index_t i=0; i<values.num_rows; ++i) 00232 result[j]+=values(i,j); 00233 00234 result[j]/=values.num_rows; 00235 } 00236 } 00237 else 00238 { 00239 result=SGVector<float64_t>(values.num_rows); 00240 for (index_t j=0; j<values.num_rows; ++j) 00241 { 00242 result[j]=0; 00243 for (index_t i=0; i<values.num_cols; ++i) 00244 result[j]+=values(j,i); 00245 00246 result[j]/=values.num_cols; 00247 } 00248 } 00249 00250 return result; 00251 } 00252 00253 SGVector<float64_t> CStatistics::matrix_variance(SGMatrix<float64_t> values, 00254 bool col_wise) 00255 { 00256 ASSERT(values.num_rows>0); 00257 ASSERT(values.num_cols>0); 00258 ASSERT(values.matrix); 00259 00260 /* first compute mean */ 00261 SGVector<float64_t> mean=CStatistics::matrix_mean(values, col_wise); 00262 00263 SGVector<float64_t> result; 00264 00265 if (col_wise) 00266 { 00267 result=SGVector<float64_t>(values.num_cols); 00268 for (index_t j=0; j<values.num_cols; ++j) 00269 { 00270 result[j]=0; 00271 for (index_t i=0; i<values.num_rows; ++i) 00272 result[j]+=CMath::pow(values(i,j)-mean[j], 2); 00273 00274 result[j]/=(values.num_rows-1); 00275 } 00276 } 00277 else 00278 { 00279 result=SGVector<float64_t>(values.num_rows); 00280 for (index_t j=0; j<values.num_rows; ++j) 00281 { 00282 result[j]=0; 00283 for (index_t i=0; i<values.num_cols; ++i) 00284 result[j]+=CMath::pow(values(j,i)-mean[j], 2); 00285 00286 result[j]/=(values.num_cols-1); 00287 } 00288 } 00289 00290 return result; 00291 } 00292 00293 float64_t CStatistics::std_deviation(SGVector<float64_t> values) 00294 { 00295 return CMath::sqrt(variance(values)); 00296 } 00297 00298 SGVector<float64_t> CStatistics::matrix_std_deviation( 00299 SGMatrix<float64_t> values, bool col_wise) 00300 { 00301 SGVector<float64_t> var=CStatistics::matrix_variance(values, col_wise); 00302 for (index_t i=0; i<var.vlen; ++i) 00303 var[i]=CMath::sqrt(var[i]); 00304 00305 return var; 00306 } 00307 00308 #ifdef HAVE_LAPACK 00309 SGMatrix<float64_t> CStatistics::covariance_matrix( 00310 SGMatrix<float64_t> observations, bool in_place) 00311 { 00312 SGMatrix<float64_t> centered= 00313 in_place ? 00314 observations : 00315 SGMatrix<float64_t>(observations.num_rows, 00316 observations.num_cols); 00317 00318 if (!in_place) 00319 { 00320 memcpy(centered.matrix, observations.matrix, 00321 sizeof(float64_t)*observations.num_rows*observations.num_cols); 00322 } 00323 centered.remove_column_mean(); 00324 00325 /* compute 1/(m-1) * X' * X */ 00326 SGMatrix<float64_t> cov=SGMatrix<float64_t>::matrix_multiply(centered, 00327 centered, true, false, 1.0/(observations.num_rows-1)); 00328 00329 return cov; 00330 } 00331 #endif //HAVE_LAPACK 00332 00333 float64_t CStatistics::confidence_intervals_mean(SGVector<float64_t> values, 00334 float64_t alpha, float64_t& conf_int_low, float64_t& conf_int_up) 00335 { 00336 ASSERT(values.vlen>1); 00337 ASSERT(values.vector); 00338 00339 /* using one sided student t distribution evaluation */ 00340 alpha=alpha/2; 00341 00342 /* degrees of freedom */ 00343 int32_t deg=values.vlen-1; 00344 00345 /* compute absolute value of t-value */ 00346 float64_t t=CMath::abs(inverse_student_t(deg, alpha)); 00347 00348 /* values for calculating confidence interval */ 00349 float64_t std_dev=CStatistics::std_deviation(values); 00350 float64_t mean=CStatistics::mean(values); 00351 00352 /* compute confidence interval */ 00353 float64_t interval=t*std_dev/CMath::sqrt((float64_t)values.vlen); 00354 conf_int_low=mean-interval; 00355 conf_int_up=mean+interval; 00356 00357 return mean; 00358 } 00359 00360 float64_t CStatistics::inverse_student_t(int32_t k, float64_t p) 00361 { 00362 float64_t t; 00363 float64_t rk; 00364 float64_t z; 00365 int32_t rflg; 00366 float64_t result; 00367 00368 if (!(k>0 && greater(p, 0)) && less(p, 1)) 00369 { 00370 SG_SERROR("CStatistics::inverse_student_t_distribution(): " 00371 "Domain error\n"); 00372 } 00373 rk=k; 00374 if (greater(p, 0.25) && less(p, 0.75)) 00375 { 00376 if (equal(p, 0.5)) 00377 { 00378 result=0; 00379 return result; 00380 } 00381 z=1.0-2.0*p; 00382 z=inverse_incomplete_beta(0.5, 0.5*rk, CMath::abs(z)); 00383 t=CMath::sqrt(rk*z/(1.0-z)); 00384 if (less(p, 0.5)) 00385 { 00386 t=-t; 00387 } 00388 result=t; 00389 return result; 00390 } 00391 rflg=-1; 00392 if (greater_equal(p, 0.5)) 00393 { 00394 p=1.0-p; 00395 rflg=1; 00396 } 00397 z=inverse_incomplete_beta(0.5*rk, 0.5, 2.0*p); 00398 if (less(CMath::MAX_REAL_NUMBER*z, rk)) 00399 { 00400 result=rflg*CMath::MAX_REAL_NUMBER; 00401 return result; 00402 } 00403 t=CMath::sqrt(rk/z-rk); 00404 result=rflg*t; 00405 return result; 00406 } 00407 00408 float64_t CStatistics::inverse_incomplete_beta(float64_t a, float64_t b, 00409 float64_t y) 00410 { 00411 float64_t aaa; 00412 float64_t bbb; 00413 float64_t y0; 00414 float64_t d; 00415 float64_t yyy; 00416 float64_t x; 00417 float64_t x0; 00418 float64_t x1; 00419 float64_t lgm; 00420 float64_t yp; 00421 float64_t di; 00422 float64_t dithresh; 00423 float64_t yl; 00424 float64_t yh; 00425 float64_t xt; 00426 int32_t i; 00427 int32_t rflg; 00428 int32_t dir; 00429 int32_t nflg; 00430 int32_t mainlooppos; 00431 int32_t ihalve; 00432 int32_t ihalvecycle; 00433 int32_t newt; 00434 int32_t newtcycle; 00435 int32_t breaknewtcycle; 00436 int32_t breakihalvecycle; 00437 float64_t result; 00438 00439 i=0; 00440 if (!(greater_equal(y, 0) && less_equal(y, 1))) 00441 { 00442 SG_SERROR("CStatistics::inverse_incomplete_beta(): " 00443 "Domain error\n"); 00444 } 00445 00446 /* 00447 * special cases 00448 */ 00449 if (equal(y, 0)) 00450 { 00451 result=0; 00452 return result; 00453 } 00454 if (equal(y, 1.0)) 00455 { 00456 result=1; 00457 return result; 00458 } 00459 00460 /* 00461 * these initializations are not really necessary, 00462 * but without them compiler complains about 'possibly uninitialized variables'. 00463 */ 00464 dithresh=0; 00465 rflg=0; 00466 aaa=0; 00467 bbb=0; 00468 y0=0; 00469 x=0; 00470 yyy=0; 00471 lgm=0; 00472 dir=0; 00473 di=0; 00474 00475 /* 00476 * normal initializations 00477 */ 00478 x0=0.0; 00479 yl=0.0; 00480 x1=1.0; 00481 yh=1.0; 00482 nflg=0; 00483 mainlooppos=0; 00484 ihalve=1; 00485 ihalvecycle=2; 00486 newt=3; 00487 newtcycle=4; 00488 breaknewtcycle=5; 00489 breakihalvecycle=6; 00490 00491 /* 00492 * main loop 00493 */ 00494 for (;;) 00495 { 00496 00497 /* 00498 * start 00499 */ 00500 if (mainlooppos==0) 00501 { 00502 if (less_equal(a, 1.0) || less_equal(b, 1.0)) 00503 { 00504 dithresh=1.0e-6; 00505 rflg=0; 00506 aaa=a; 00507 bbb=b; 00508 y0=y; 00509 x=aaa/(aaa+bbb); 00510 yyy=incomplete_beta(aaa, bbb, x); 00511 mainlooppos=ihalve; 00512 continue; 00513 } 00514 else 00515 { 00516 dithresh=1.0e-4; 00517 } 00518 yp=-inverse_normal_cdf(y); 00519 if (greater(y, 0.5)) 00520 { 00521 rflg=1; 00522 aaa=b; 00523 bbb=a; 00524 y0=1.0-y; 00525 yp=-yp; 00526 } 00527 else 00528 { 00529 rflg=0; 00530 aaa=a; 00531 bbb=b; 00532 y0=y; 00533 } 00534 lgm=(yp*yp-3.0)/6.0; 00535 x=2.0/(1.0/(2.0*aaa-1.0)+1.0/(2.0*bbb-1.0)); 00536 d=yp*CMath::sqrt(x+lgm)/x 00537 -(1.0/(2.0*bbb-1.0)-1.0/(2.0*aaa-1.0)) 00538 *(lgm+5.0/6.0-2.0/(3.0*x)); 00539 d=2.0*d; 00540 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER))) 00541 { 00542 x=0; 00543 break; 00544 } 00545 x=aaa/(aaa+bbb*CMath::exp(d)); 00546 yyy=incomplete_beta(aaa, bbb, x); 00547 yp=(yyy-y0)/y0; 00548 if (less(CMath::abs(yp), 0.2)) 00549 { 00550 mainlooppos=newt; 00551 continue; 00552 } 00553 mainlooppos=ihalve; 00554 continue; 00555 } 00556 00557 /* 00558 * ihalve 00559 */ 00560 if (mainlooppos==ihalve) 00561 { 00562 dir=0; 00563 di=0.5; 00564 i=0; 00565 mainlooppos=ihalvecycle; 00566 continue; 00567 } 00568 00569 /* 00570 * ihalvecycle 00571 */ 00572 if (mainlooppos==ihalvecycle) 00573 { 00574 if (i<=99) 00575 { 00576 if (i!=0) 00577 { 00578 x=x0+di*(x1-x0); 00579 if (equal(x, 1.0)) 00580 { 00581 x=1.0-CMath::MACHINE_EPSILON; 00582 } 00583 if (equal(x, 0.0)) 00584 { 00585 di=0.5; 00586 x=x0+di*(x1-x0); 00587 if (equal(x, 0.0)) 00588 { 00589 break; 00590 } 00591 } 00592 yyy=incomplete_beta(aaa, bbb, x); 00593 yp=(x1-x0)/(x1+x0); 00594 if (less(CMath::abs(yp), dithresh)) 00595 { 00596 mainlooppos=newt; 00597 continue; 00598 } 00599 yp=(yyy-y0)/y0; 00600 if (less(CMath::abs(yp), dithresh)) 00601 { 00602 mainlooppos=newt; 00603 continue; 00604 } 00605 } 00606 if (less(yyy, y0)) 00607 { 00608 x0=x; 00609 yl=yyy; 00610 if (dir<0) 00611 { 00612 dir=0; 00613 di=0.5; 00614 } 00615 else 00616 { 00617 if (dir>3) 00618 { 00619 di=1.0-(1.0-di)*(1.0-di); 00620 } 00621 else 00622 { 00623 if (dir>1) 00624 { 00625 di=0.5*di+0.5; 00626 } 00627 else 00628 { 00629 di=(y0-yyy)/(yh-yl); 00630 } 00631 } 00632 } 00633 dir=dir+1; 00634 if (greater(x0, 0.75)) 00635 { 00636 if (rflg==1) 00637 { 00638 rflg=0; 00639 aaa=a; 00640 bbb=b; 00641 y0=y; 00642 } 00643 else 00644 { 00645 rflg=1; 00646 aaa=b; 00647 bbb=a; 00648 y0=1.0-y; 00649 } 00650 x=1.0-x; 00651 yyy=incomplete_beta(aaa, bbb, x); 00652 x0=0.0; 00653 yl=0.0; 00654 x1=1.0; 00655 yh=1.0; 00656 mainlooppos=ihalve; 00657 continue; 00658 } 00659 } 00660 else 00661 { 00662 x1=x; 00663 if (rflg==1 && less(x1, CMath::MACHINE_EPSILON)) 00664 { 00665 x=0.0; 00666 break; 00667 } 00668 yh=yyy; 00669 if (dir>0) 00670 { 00671 dir=0; 00672 di=0.5; 00673 } 00674 else 00675 { 00676 if (dir<-3) 00677 { 00678 di=di*di; 00679 } 00680 else 00681 { 00682 if (dir<-1) 00683 { 00684 di=0.5*di; 00685 } 00686 else 00687 { 00688 di=(yyy-y0)/(yh-yl); 00689 } 00690 } 00691 } 00692 dir=dir-1; 00693 } 00694 i=i+1; 00695 mainlooppos=ihalvecycle; 00696 continue; 00697 } 00698 else 00699 { 00700 mainlooppos=breakihalvecycle; 00701 continue; 00702 } 00703 } 00704 00705 /* 00706 * breakihalvecycle 00707 */ 00708 if (mainlooppos==breakihalvecycle) 00709 { 00710 if (greater_equal(x0, 1.0)) 00711 { 00712 x=1.0-CMath::MACHINE_EPSILON; 00713 break; 00714 } 00715 if (less_equal(x, 0.0)) 00716 { 00717 x=0.0; 00718 break; 00719 } 00720 mainlooppos=newt; 00721 continue; 00722 } 00723 00724 /* 00725 * newt 00726 */ 00727 if (mainlooppos==newt) 00728 { 00729 if (nflg!=0) 00730 { 00731 break; 00732 } 00733 nflg=1; 00734 lgm=lgamma(aaa+bbb)-lgamma(aaa)-lgamma(bbb); 00735 i=0; 00736 mainlooppos=newtcycle; 00737 continue; 00738 } 00739 00740 /* 00741 * newtcycle 00742 */ 00743 if (mainlooppos==newtcycle) 00744 { 00745 if (i<=7) 00746 { 00747 if (i!=0) 00748 { 00749 yyy=incomplete_beta(aaa, bbb, x); 00750 } 00751 if (less(yyy, yl)) 00752 { 00753 x=x0; 00754 yyy=yl; 00755 } 00756 else 00757 { 00758 if (greater(yyy, yh)) 00759 { 00760 x=x1; 00761 yyy=yh; 00762 } 00763 else 00764 { 00765 if (less(yyy, y0)) 00766 { 00767 x0=x; 00768 yl=yyy; 00769 } 00770 else 00771 { 00772 x1=x; 00773 yh=yyy; 00774 } 00775 } 00776 } 00777 if (equal(x, 1.0) || equal(x, 0.0)) 00778 { 00779 mainlooppos=breaknewtcycle; 00780 continue; 00781 } 00782 d=(aaa-1.0)*CMath::log(x)+(bbb-1.0)*CMath::log(1.0-x)+lgm; 00783 if (less(d, CMath::log(CMath::MIN_REAL_NUMBER))) 00784 { 00785 break; 00786 } 00787 if (greater(d, CMath::log(CMath::MAX_REAL_NUMBER))) 00788 { 00789 mainlooppos=breaknewtcycle; 00790 continue; 00791 } 00792 d=CMath::exp(d); 00793 d=(yyy-y0)/d; 00794 xt=x-d; 00795 if (less_equal(xt, x0)) 00796 { 00797 yyy=(x-x0)/(x1-x0); 00798 xt=x0+0.5*yyy*(x-x0); 00799 if (less_equal(xt, 0.0)) 00800 { 00801 mainlooppos=breaknewtcycle; 00802 continue; 00803 } 00804 } 00805 if (greater_equal(xt, x1)) 00806 { 00807 yyy=(x1-x)/(x1-x0); 00808 xt=x1-0.5*yyy*(x1-x); 00809 if (greater_equal(xt, 1.0)) 00810 { 00811 mainlooppos=breaknewtcycle; 00812 continue; 00813 } 00814 } 00815 x=xt; 00816 if (less(CMath::abs(d/x), 128.0*CMath::MACHINE_EPSILON)) 00817 { 00818 break; 00819 } 00820 i=i+1; 00821 mainlooppos=newtcycle; 00822 continue; 00823 } 00824 else 00825 { 00826 mainlooppos=breaknewtcycle; 00827 continue; 00828 } 00829 } 00830 00831 /* 00832 * breaknewtcycle 00833 */ 00834 if (mainlooppos==breaknewtcycle) 00835 { 00836 dithresh=256.0*CMath::MACHINE_EPSILON; 00837 mainlooppos=ihalve; 00838 continue; 00839 } 00840 } 00841 00842 /* 00843 * done 00844 */ 00845 if (rflg!=0) 00846 { 00847 if (less_equal(x, CMath::MACHINE_EPSILON)) 00848 { 00849 x=1.0-CMath::MACHINE_EPSILON; 00850 } 00851 else 00852 { 00853 x=1.0-x; 00854 } 00855 } 00856 result=x; 00857 return result; 00858 } 00859 00860 float64_t CStatistics::incomplete_beta(float64_t a, float64_t b, float64_t x) 00861 { 00862 float64_t t; 00863 float64_t xc; 00864 float64_t w; 00865 float64_t y; 00866 int32_t flag; 00867 float64_t big; 00868 float64_t biginv; 00869 float64_t maxgam; 00870 float64_t minlog; 00871 float64_t maxlog; 00872 float64_t result; 00873 00874 big=4.503599627370496e15; 00875 biginv=2.22044604925031308085e-16; 00876 maxgam=171.624376956302725; 00877 minlog=CMath::log(CMath::MIN_REAL_NUMBER); 00878 maxlog=CMath::log(CMath::MAX_REAL_NUMBER); 00879 00880 if (!(greater(a, 0) && greater(b, 0))) 00881 { 00882 SG_SERROR("CStatistics::incomplete_beta(): " 00883 "Domain error\n"); 00884 } 00885 00886 if (!(greater_equal(x, 0) && less_equal(x, 1))) 00887 { 00888 SG_SERROR("CStatistics::incomplete_beta(): " 00889 "Domain error\n"); 00890 } 00891 00892 if (equal(x, 0)) 00893 { 00894 result=0; 00895 return result; 00896 } 00897 if (equal(x, 1)) 00898 { 00899 result=1; 00900 return result; 00901 } 00902 flag=0; 00903 if (less_equal(b*x, 1.0) && less_equal(x, 0.95)) 00904 { 00905 result=ibetaf_incompletebetaps(a, b, x, maxgam); 00906 return result; 00907 } 00908 w=1.0-x; 00909 if (greater(x, a/(a+b))) 00910 { 00911 flag=1; 00912 t=a; 00913 a=b; 00914 b=t; 00915 xc=x; 00916 x=w; 00917 } 00918 else 00919 { 00920 xc=w; 00921 } 00922 if ((flag==1 && less_equal(b*x, 1.0)) && less_equal(x, 0.95)) 00923 { 00924 t=ibetaf_incompletebetaps(a, b, x, maxgam); 00925 if (less_equal(t, CMath::MACHINE_EPSILON)) 00926 { 00927 result=1.0-CMath::MACHINE_EPSILON; 00928 } 00929 else 00930 { 00931 result=1.0-t; 00932 } 00933 return result; 00934 } 00935 y=x*(a+b-2.0)-(a-1.0); 00936 if (less(y, 0.0)) 00937 { 00938 w=ibetaf_incompletebetafe(a, b, x, big, biginv); 00939 } 00940 else 00941 { 00942 w=ibetaf_incompletebetafe2(a, b, x, big, biginv)/xc; 00943 } 00944 y=a*CMath::log(x); 00945 t=b*CMath::log(xc); 00946 if ((less(a+b, maxgam) && less(CMath::abs(y), maxlog)) 00947 && less(CMath::abs(t), maxlog)) 00948 { 00949 t=CMath::pow(xc, b); 00950 t=t*CMath::pow(x, a); 00951 t=t/a; 00952 t=t*w; 00953 t=t*(tgamma(a+b)/(tgamma(a)*tgamma(b))); 00954 if (flag==1) 00955 { 00956 if (less_equal(t, CMath::MACHINE_EPSILON)) 00957 { 00958 result=1.0-CMath::MACHINE_EPSILON; 00959 } 00960 else 00961 { 00962 result=1.0-t; 00963 } 00964 } 00965 else 00966 { 00967 result=t; 00968 } 00969 return result; 00970 } 00971 y=y+t+lgamma(a+b)-lgamma(a)-lgamma(b); 00972 y=y+CMath::log(w/a); 00973 if (less(y, minlog)) 00974 { 00975 t=0.0; 00976 } 00977 else 00978 { 00979 t=CMath::exp(y); 00980 } 00981 if (flag==1) 00982 { 00983 if (less_equal(t, CMath::MACHINE_EPSILON)) 00984 { 00985 t=1.0-CMath::MACHINE_EPSILON; 00986 } 00987 else 00988 { 00989 t=1.0-t; 00990 } 00991 } 00992 result=t; 00993 return result; 00994 } 00995 00996 float64_t CStatistics::inverse_normal_cdf(float64_t y0, float64_t mean, 00997 float64_t std_dev) 00998 { 00999 return inverse_normal_cdf(y0)*std_dev+mean; 01000 } 01001 01002 float64_t CStatistics::inverse_normal_cdf(float64_t y0) 01003 { 01004 float64_t expm2; 01005 float64_t s2pi; 01006 float64_t x; 01007 float64_t y; 01008 float64_t z; 01009 float64_t y2; 01010 float64_t x0; 01011 float64_t x1; 01012 int32_t code; 01013 float64_t p0; 01014 float64_t q0; 01015 float64_t p1; 01016 float64_t q1; 01017 float64_t p2; 01018 float64_t q2; 01019 float64_t result; 01020 01021 expm2=0.13533528323661269189; 01022 s2pi=2.50662827463100050242; 01023 if (less_equal(y0, 0)) 01024 { 01025 result=-CMath::MAX_REAL_NUMBER; 01026 return result; 01027 } 01028 if (greater_equal(y0, 1)) 01029 { 01030 result=CMath::MAX_REAL_NUMBER; 01031 return result; 01032 } 01033 code=1; 01034 y=y0; 01035 if (greater(y, 1.0-expm2)) 01036 { 01037 y=1.0-y; 01038 code=0; 01039 } 01040 if (greater(y, expm2)) 01041 { 01042 y=y-0.5; 01043 y2=y*y; 01044 p0=-59.9633501014107895267; 01045 p0=98.0010754185999661536+y2*p0; 01046 p0=-56.6762857469070293439+y2*p0; 01047 p0=13.9312609387279679503+y2*p0; 01048 p0=-1.23916583867381258016+y2*p0; 01049 q0=1; 01050 q0=1.95448858338141759834+y2*q0; 01051 q0=4.67627912898881538453+y2*q0; 01052 q0=86.3602421390890590575+y2*q0; 01053 q0=-225.462687854119370527+y2*q0; 01054 q0=200.260212380060660359+y2*q0; 01055 q0=-82.0372256168333339912+y2*q0; 01056 q0=15.9056225126211695515+y2*q0; 01057 q0=-1.18331621121330003142+y2*q0; 01058 x=y+y*y2*p0/q0; 01059 x=x*s2pi; 01060 result=x; 01061 return result; 01062 } 01063 x=CMath::sqrt(-2.0*CMath::log(y)); 01064 x0=x-CMath::log(x)/x; 01065 z=1.0/x; 01066 if (less(x, 8.0)) 01067 { 01068 p1=4.05544892305962419923; 01069 p1=31.5251094599893866154+z*p1; 01070 p1=57.1628192246421288162+z*p1; 01071 p1=44.0805073893200834700+z*p1; 01072 p1=14.6849561928858024014+z*p1; 01073 p1=2.18663306850790267539+z*p1; 01074 p1=-1.40256079171354495875*0.1+z*p1; 01075 p1=-3.50424626827848203418*0.01+z*p1; 01076 p1=-8.57456785154685413611*0.0001+z*p1; 01077 q1=1; 01078 q1=15.7799883256466749731+z*q1; 01079 q1=45.3907635128879210584+z*q1; 01080 q1=41.3172038254672030440+z*q1; 01081 q1=15.0425385692907503408+z*q1; 01082 q1=2.50464946208309415979+z*q1; 01083 q1=-1.42182922854787788574*0.1+z*q1; 01084 q1=-3.80806407691578277194*0.01+z*q1; 01085 q1=-9.33259480895457427372*0.0001+z*q1; 01086 x1=z*p1/q1; 01087 } 01088 else 01089 { 01090 p2=3.23774891776946035970; 01091 p2=6.91522889068984211695+z*p2; 01092 p2=3.93881025292474443415+z*p2; 01093 p2=1.33303460815807542389+z*p2; 01094 p2=2.01485389549179081538*0.1+z*p2; 01095 p2=1.23716634817820021358*0.01+z*p2; 01096 p2=3.01581553508235416007*0.0001+z*p2; 01097 p2=2.65806974686737550832*0.000001+z*p2; 01098 p2=6.23974539184983293730*0.000000001+z*p2; 01099 q2=1; 01100 q2=6.02427039364742014255+z*q2; 01101 q2=3.67983563856160859403+z*q2; 01102 q2=1.37702099489081330271+z*q2; 01103 q2=2.16236993594496635890*0.1+z*q2; 01104 q2=1.34204006088543189037*0.01+z*q2; 01105 q2=3.28014464682127739104*0.0001+z*q2; 01106 q2=2.89247864745380683936*0.000001+z*q2; 01107 q2=6.79019408009981274425*0.000000001+z*q2; 01108 x1=z*p2/q2; 01109 } 01110 x=x0-x1; 01111 if (code!=0) 01112 { 01113 x=-x; 01114 } 01115 result=x; 01116 return result; 01117 } 01118 01119 float64_t CStatistics::ibetaf_incompletebetaps(float64_t a, float64_t b, 01120 float64_t x, float64_t maxgam) 01121 { 01122 float64_t s; 01123 float64_t t; 01124 float64_t u; 01125 float64_t v; 01126 float64_t n; 01127 float64_t t1; 01128 float64_t z; 01129 float64_t ai; 01130 float64_t result; 01131 01132 ai=1.0/a; 01133 u=(1.0-b)*x; 01134 v=u/(a+1.0); 01135 t1=v; 01136 t=u; 01137 n=2.0; 01138 s=0.0; 01139 z=CMath::MACHINE_EPSILON*ai; 01140 while (greater(CMath::abs(v), z)) 01141 { 01142 u=(n-b)*x/n; 01143 t=t*u; 01144 v=t/(a+n); 01145 s=s+v; 01146 n=n+1.0; 01147 } 01148 s=s+t1; 01149 s=s+ai; 01150 u=a*CMath::log(x); 01151 if (less(a+b, maxgam) 01152 && less(CMath::abs(u), CMath::log(CMath::MAX_REAL_NUMBER))) 01153 { 01154 t=tgamma(a+b)/(tgamma(a)*tgamma(b)); 01155 s=s*t*CMath::pow(x, a); 01156 } 01157 else 01158 { 01159 t=lgamma(a+b)-lgamma(a)-lgamma(b)+u+CMath::log(s); 01160 if (less(t, CMath::log(CMath::MIN_REAL_NUMBER))) 01161 { 01162 s=0.0; 01163 } 01164 else 01165 { 01166 s=CMath::exp(t); 01167 } 01168 } 01169 result=s; 01170 return result; 01171 } 01172 01173 float64_t CStatistics::ibetaf_incompletebetafe(float64_t a, float64_t b, 01174 float64_t x, float64_t big, float64_t biginv) 01175 { 01176 float64_t xk; 01177 float64_t pk; 01178 float64_t pkm1; 01179 float64_t pkm2; 01180 float64_t qk; 01181 float64_t qkm1; 01182 float64_t qkm2; 01183 float64_t k1; 01184 float64_t k2; 01185 float64_t k3; 01186 float64_t k4; 01187 float64_t k5; 01188 float64_t k6; 01189 float64_t k7; 01190 float64_t k8; 01191 float64_t r; 01192 float64_t t; 01193 float64_t ans; 01194 float64_t thresh; 01195 int32_t n; 01196 float64_t result; 01197 01198 k1=a; 01199 k2=a+b; 01200 k3=a; 01201 k4=a+1.0; 01202 k5=1.0; 01203 k6=b-1.0; 01204 k7=k4; 01205 k8=a+2.0; 01206 pkm2=0.0; 01207 qkm2=1.0; 01208 pkm1=1.0; 01209 qkm1=1.0; 01210 ans=1.0; 01211 r=1.0; 01212 n=0; 01213 thresh=3.0*CMath::MACHINE_EPSILON; 01214 do 01215 { 01216 xk=-x*k1*k2/(k3*k4); 01217 pk=pkm1+pkm2*xk; 01218 qk=qkm1+qkm2*xk; 01219 pkm2=pkm1; 01220 pkm1=pk; 01221 qkm2=qkm1; 01222 qkm1=qk; 01223 xk=x*k5*k6/(k7*k8); 01224 pk=pkm1+pkm2*xk; 01225 qk=qkm1+qkm2*xk; 01226 pkm2=pkm1; 01227 pkm1=pk; 01228 qkm2=qkm1; 01229 qkm1=qk; 01230 if (not_equal(qk, 0)) 01231 { 01232 r=pk/qk; 01233 } 01234 if (not_equal(r, 0)) 01235 { 01236 t=CMath::abs((ans-r)/r); 01237 ans=r; 01238 } 01239 else 01240 { 01241 t=1.0; 01242 } 01243 if (less(t, thresh)) 01244 { 01245 break; 01246 } 01247 k1=k1+1.0; 01248 k2=k2+1.0; 01249 k3=k3+2.0; 01250 k4=k4+2.0; 01251 k5=k5+1.0; 01252 k6=k6-1.0; 01253 k7=k7+2.0; 01254 k8=k8+2.0; 01255 if (greater(CMath::abs(qk)+CMath::abs(pk), big)) 01256 { 01257 pkm2=pkm2*biginv; 01258 pkm1=pkm1*biginv; 01259 qkm2=qkm2*biginv; 01260 qkm1=qkm1*biginv; 01261 } 01262 if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv)) 01263 { 01264 pkm2=pkm2*big; 01265 pkm1=pkm1*big; 01266 qkm2=qkm2*big; 01267 qkm1=qkm1*big; 01268 } 01269 n=n+1; 01270 } 01271 while (n!=300); 01272 result=ans; 01273 return result; 01274 } 01275 01276 float64_t CStatistics::ibetaf_incompletebetafe2(float64_t a, float64_t b, 01277 float64_t x, float64_t big, float64_t biginv) 01278 { 01279 float64_t xk; 01280 float64_t pk; 01281 float64_t pkm1; 01282 float64_t pkm2; 01283 float64_t qk; 01284 float64_t qkm1; 01285 float64_t qkm2; 01286 float64_t k1; 01287 float64_t k2; 01288 float64_t k3; 01289 float64_t k4; 01290 float64_t k5; 01291 float64_t k6; 01292 float64_t k7; 01293 float64_t k8; 01294 float64_t r; 01295 float64_t t; 01296 float64_t ans; 01297 float64_t z; 01298 float64_t thresh; 01299 int32_t n; 01300 float64_t result; 01301 01302 k1=a; 01303 k2=b-1.0; 01304 k3=a; 01305 k4=a+1.0; 01306 k5=1.0; 01307 k6=a+b; 01308 k7=a+1.0; 01309 k8=a+2.0; 01310 pkm2=0.0; 01311 qkm2=1.0; 01312 pkm1=1.0; 01313 qkm1=1.0; 01314 z=x/(1.0-x); 01315 ans=1.0; 01316 r=1.0; 01317 n=0; 01318 thresh=3.0*CMath::MACHINE_EPSILON; 01319 do 01320 { 01321 xk=-z*k1*k2/(k3*k4); 01322 pk=pkm1+pkm2*xk; 01323 qk=qkm1+qkm2*xk; 01324 pkm2=pkm1; 01325 pkm1=pk; 01326 qkm2=qkm1; 01327 qkm1=qk; 01328 xk=z*k5*k6/(k7*k8); 01329 pk=pkm1+pkm2*xk; 01330 qk=qkm1+qkm2*xk; 01331 pkm2=pkm1; 01332 pkm1=pk; 01333 qkm2=qkm1; 01334 qkm1=qk; 01335 if (not_equal(qk, 0)) 01336 { 01337 r=pk/qk; 01338 } 01339 if (not_equal(r, 0)) 01340 { 01341 t=CMath::abs((ans-r)/r); 01342 ans=r; 01343 } 01344 else 01345 { 01346 t=1.0; 01347 } 01348 if (less(t, thresh)) 01349 { 01350 break; 01351 } 01352 k1=k1+1.0; 01353 k2=k2-1.0; 01354 k3=k3+2.0; 01355 k4=k4+2.0; 01356 k5=k5+1.0; 01357 k6=k6+1.0; 01358 k7=k7+2.0; 01359 k8=k8+2.0; 01360 if (greater(CMath::abs(qk)+CMath::abs(pk), big)) 01361 { 01362 pkm2=pkm2*biginv; 01363 pkm1=pkm1*biginv; 01364 qkm2=qkm2*biginv; 01365 qkm1=qkm1*biginv; 01366 } 01367 if (less(CMath::abs(qk), biginv) || less(CMath::abs(pk), biginv)) 01368 { 01369 pkm2=pkm2*big; 01370 pkm1=pkm1*big; 01371 qkm2=qkm2*big; 01372 qkm1=qkm1*big; 01373 } 01374 n=n+1; 01375 } 01376 while (n!=300); 01377 result=ans; 01378 return result; 01379 } 01380 01381 float64_t CStatistics::incomplete_gamma(float64_t a, float64_t x) 01382 { 01383 float64_t igammaepsilon; 01384 float64_t ans; 01385 float64_t ax; 01386 float64_t c; 01387 float64_t r; 01388 float64_t result; 01389 01390 igammaepsilon=0.000000000000001; 01391 if (less_equal(x, 0) || less_equal(a, 0)) 01392 { 01393 result=0; 01394 return result; 01395 } 01396 if (greater(x, 1) && greater(x, a)) 01397 { 01398 result=1-incomplete_gamma_completed(a, x); 01399 return result; 01400 } 01401 ax=a*CMath::log(x)-x-lgamma(a); 01402 if (less(ax, -709.78271289338399)) 01403 { 01404 result=0; 01405 return result; 01406 } 01407 ax=CMath::exp(ax); 01408 r=a; 01409 c=1; 01410 ans=1; 01411 do 01412 { 01413 r=r+1; 01414 c=c*x/r; 01415 ans=ans+c; 01416 } 01417 while (greater(c/ans, igammaepsilon)); 01418 result=ans*ax/a; 01419 return result; 01420 } 01421 01422 float64_t CStatistics::incomplete_gamma_completed(float64_t a, float64_t x) 01423 { 01424 float64_t igammaepsilon; 01425 float64_t igammabignumber; 01426 float64_t igammabignumberinv; 01427 float64_t ans; 01428 float64_t ax; 01429 float64_t c; 01430 float64_t yc; 01431 float64_t r; 01432 float64_t t; 01433 float64_t y; 01434 float64_t z; 01435 float64_t pk; 01436 float64_t pkm1; 01437 float64_t pkm2; 01438 float64_t qk; 01439 float64_t qkm1; 01440 float64_t qkm2; 01441 float64_t result; 01442 01443 igammaepsilon=0.000000000000001; 01444 igammabignumber=4503599627370496.0; 01445 igammabignumberinv=2.22044604925031308085*0.0000000000000001; 01446 if (less_equal(x, 0) || less_equal(a, 0)) 01447 { 01448 result=1; 01449 return result; 01450 } 01451 if (less(x, 1) || less(x, a)) 01452 { 01453 result=1-incomplete_gamma(a, x); 01454 return result; 01455 } 01456 ax=a*CMath::log(x)-x-lgamma(a); 01457 if (less(ax, -709.78271289338399)) 01458 { 01459 result=0; 01460 return result; 01461 } 01462 ax=CMath::exp(ax); 01463 y=1-a; 01464 z=x+y+1; 01465 c=0; 01466 pkm2=1; 01467 qkm2=x; 01468 pkm1=x+1; 01469 qkm1=z*x; 01470 ans=pkm1/qkm1; 01471 do 01472 { 01473 c=c+1; 01474 y=y+1; 01475 z=z+2; 01476 yc=y*c; 01477 pk=pkm1*z-pkm2*yc; 01478 qk=qkm1*z-qkm2*yc; 01479 if (not_equal(qk, 0)) 01480 { 01481 r=pk/qk; 01482 t=CMath::abs((ans-r)/r); 01483 ans=r; 01484 } 01485 else 01486 { 01487 t=1; 01488 } 01489 pkm2=pkm1; 01490 pkm1=pk; 01491 qkm2=qkm1; 01492 qkm1=qk; 01493 if (greater(CMath::abs(pk), igammabignumber)) 01494 { 01495 pkm2=pkm2*igammabignumberinv; 01496 pkm1=pkm1*igammabignumberinv; 01497 qkm2=qkm2*igammabignumberinv; 01498 qkm1=qkm1*igammabignumberinv; 01499 } 01500 } 01501 while (greater(t, igammaepsilon)); 01502 result=ans*ax; 01503 return result; 01504 } 01505 01506 float64_t CStatistics::gamma_cdf(float64_t x, float64_t a, float64_t b) 01507 { 01508 /* definition of wikipedia: incomplete gamma devised by true gamma */ 01509 return incomplete_gamma(a, x/b); 01510 } 01511 01512 float64_t CStatistics::inverse_gamma_cdf(float64_t p, float64_t a, 01513 float64_t b) 01514 { 01515 /* inverse of gamma(a,b) CDF is 01516 * inverse_incomplete_gamma_completed(a, 1. - p) * b */ 01517 return inverse_incomplete_gamma_completed(a, 1-p)*b; 01518 } 01519 01520 float64_t CStatistics::inverse_incomplete_gamma_completed(float64_t a, 01521 float64_t y0) 01522 { 01523 float64_t igammaepsilon; 01524 float64_t iinvgammabignumber; 01525 float64_t x0; 01526 float64_t x1; 01527 float64_t x; 01528 float64_t yl; 01529 float64_t yh; 01530 float64_t y; 01531 float64_t d; 01532 float64_t lgm; 01533 float64_t dithresh; 01534 int32_t i; 01535 int32_t dir; 01536 float64_t result; 01537 01538 igammaepsilon=0.000000000000001; 01539 iinvgammabignumber=4503599627370496.0; 01540 x0=iinvgammabignumber; 01541 yl=0; 01542 x1=0; 01543 yh=1; 01544 dithresh=5*igammaepsilon; 01545 d=1/(9*a); 01546 y=1-d-inverse_normal_cdf(y0)*CMath::sqrt(d); 01547 x=a*y*y*y; 01548 lgm=lgamma(a); 01549 i=0; 01550 while (i<10) 01551 { 01552 if (greater(x, x0) || less(x, x1)) 01553 { 01554 d=0.0625; 01555 break; 01556 } 01557 y=incomplete_gamma_completed(a, x); 01558 if (less(y, yl) || greater(y, yh)) 01559 { 01560 d=0.0625; 01561 break; 01562 } 01563 if (less(y, y0)) 01564 { 01565 x0=x; 01566 yl=y; 01567 } 01568 else 01569 { 01570 x1=x; 01571 yh=y; 01572 } 01573 d=(a-1)*CMath::log(x)-x-lgm; 01574 if (less(d, -709.78271289338399)) 01575 { 01576 d=0.0625; 01577 break; 01578 } 01579 d=-CMath::exp(d); 01580 d=(y-y0)/d; 01581 if (less(CMath::abs(d/x), igammaepsilon)) 01582 { 01583 result=x; 01584 return result; 01585 } 01586 x=x-d; 01587 i=i+1; 01588 } 01589 if (equal(x0, iinvgammabignumber)) 01590 { 01591 if (less_equal(x, 0)) 01592 { 01593 x=1; 01594 } 01595 while (equal(x0, iinvgammabignumber)) 01596 { 01597 x=(1+d)*x; 01598 y=incomplete_gamma_completed(a, x); 01599 if (less(y, y0)) 01600 { 01601 x0=x; 01602 yl=y; 01603 break; 01604 } 01605 d=d+d; 01606 } 01607 } 01608 d=0.5; 01609 dir=0; 01610 i=0; 01611 while (i<400) 01612 { 01613 x=x1+d*(x0-x1); 01614 y=incomplete_gamma_completed(a, x); 01615 lgm=(x0-x1)/(x1+x0); 01616 if (less(CMath::abs(lgm), dithresh)) 01617 { 01618 break; 01619 } 01620 lgm=(y-y0)/y0; 01621 if (less(CMath::abs(lgm), dithresh)) 01622 { 01623 break; 01624 } 01625 if (less_equal(x, 0.0)) 01626 { 01627 break; 01628 } 01629 if (greater_equal(y, y0)) 01630 { 01631 x1=x; 01632 yh=y; 01633 if (dir<0) 01634 { 01635 dir=0; 01636 d=0.5; 01637 } 01638 else 01639 { 01640 if (dir>1) 01641 { 01642 d=0.5*d+0.5; 01643 } 01644 else 01645 { 01646 d=(y0-yl)/(yh-yl); 01647 } 01648 } 01649 dir=dir+1; 01650 } 01651 else 01652 { 01653 x0=x; 01654 yl=y; 01655 if (dir>0) 01656 { 01657 dir=0; 01658 d=0.5; 01659 } 01660 else 01661 { 01662 if (dir<-1) 01663 { 01664 d=0.5*d; 01665 } 01666 else 01667 { 01668 d=(y0-yl)/(yh-yl); 01669 } 01670 } 01671 dir=dir-1; 01672 } 01673 i=i+1; 01674 } 01675 result=x; 01676 return result; 01677 } 01678 01679 float64_t CStatistics::normal_cdf(float64_t x, float64_t std_dev) 01680 { 01681 return 0.5*(error_function(x/std_dev/1.41421356237309504880)+1); 01682 } 01683 01684 float64_t CStatistics::error_function(float64_t x) 01685 { 01686 float64_t xsq; 01687 float64_t s; 01688 float64_t p; 01689 float64_t q; 01690 float64_t result; 01691 01692 s=CMath::sign(x); 01693 x=CMath::abs(x); 01694 if (less(x, 0.5)) 01695 { 01696 xsq=x*x; 01697 p=0.007547728033418631287834; 01698 p=0.288805137207594084924010+xsq*p; 01699 p=14.3383842191748205576712+xsq*p; 01700 p=38.0140318123903008244444+xsq*p; 01701 p=3017.82788536507577809226+xsq*p; 01702 p=7404.07142710151470082064+xsq*p; 01703 p=80437.3630960840172832162+xsq*p; 01704 q=0.0; 01705 q=1.00000000000000000000000+xsq*q; 01706 q=38.0190713951939403753468+xsq*q; 01707 q=658.070155459240506326937+xsq*q; 01708 q=6379.60017324428279487120+xsq*q; 01709 q=34216.5257924628539769006+xsq*q; 01710 q=80437.3630960840172826266+xsq*q; 01711 result=s*1.1283791670955125738961589031*x*p/q; 01712 return result; 01713 } 01714 if (greater_equal(x, 10)) 01715 { 01716 result=s; 01717 return result; 01718 } 01719 result=s*(1-error_function_complement(x)); 01720 return result; 01721 } 01722 01723 float64_t CStatistics::error_function_complement(float64_t x) 01724 { 01725 float64_t p; 01726 float64_t q; 01727 float64_t result; 01728 01729 if (less(x, 0)) 01730 { 01731 result=2-error_function_complement(-x); 01732 return result; 01733 } 01734 if (less(x, 0.5)) 01735 { 01736 result=1.0-error_function(x); 01737 return result; 01738 } 01739 if (greater_equal(x, 10)) 01740 { 01741 result=0; 01742 return result; 01743 } 01744 p=0.0; 01745 p=0.5641877825507397413087057563+x*p; 01746 p=9.675807882987265400604202961+x*p; 01747 p=77.08161730368428609781633646+x*p; 01748 p=368.5196154710010637133875746+x*p; 01749 p=1143.262070703886173606073338+x*p; 01750 p=2320.439590251635247384768711+x*p; 01751 p=2898.0293292167655611275846+x*p; 01752 p=1826.3348842295112592168999+x*p; 01753 q=1.0; 01754 q=17.14980943627607849376131193+x*q; 01755 q=137.1255960500622202878443578+x*q; 01756 q=661.7361207107653469211984771+x*q; 01757 q=2094.384367789539593790281779+x*q; 01758 q=4429.612803883682726711528526+x*q; 01759 q=6089.5424232724435504633068+x*q; 01760 q=4958.82756472114071495438422+x*q; 01761 q=1826.3348842295112595576438+x*q; 01762 result=CMath::exp(-x*x)*p/q; 01763 return result; 01764 } 01765 01766 SGVector<float64_t> CStatistics::fishers_exact_test_for_multiple_2x3_tables( 01767 SGMatrix<float64_t> tables) 01768 { 01769 SGMatrix<float64_t> table(NULL, 2, 3, false); 01770 int32_t len=tables.num_cols/3; 01771 01772 SGVector<float64_t> v(len); 01773 for (int32_t i=0; i<len; i++) 01774 { 01775 table.matrix=&tables.matrix[2*3*i]; 01776 v.vector[i]=fishers_exact_test_for_2x3_table(table); 01777 } 01778 return v; 01779 } 01780 01781 float64_t CStatistics::fishers_exact_test_for_2x3_table( 01782 SGMatrix<float64_t> table) 01783 { 01784 ASSERT(table.num_rows==2); 01785 ASSERT(table.num_cols==3); 01786 01787 int32_t m_len=3+2; 01788 float64_t* m=SG_MALLOC(float64_t, 3+2); 01789 m[0]=table.matrix[0]+table.matrix[2]+table.matrix[4]; 01790 m[1]=table.matrix[1]+table.matrix[3]+table.matrix[5]; 01791 m[2]=table.matrix[0]+table.matrix[1]; 01792 m[3]=table.matrix[2]+table.matrix[3]; 01793 m[4]=table.matrix[4]+table.matrix[5]; 01794 01795 float64_t n=SGVector<float64_t>::sum(m, m_len)/2.0; 01796 int32_t x_len=2*3*CMath::sq(SGVector<float64_t>::max(m, m_len)); 01797 float64_t* x=SG_MALLOC(float64_t, x_len); 01798 SGVector<float64_t>::fill_vector(x, x_len, 0.0); 01799 01800 float64_t log_nom=0.0; 01801 for (int32_t i=0; i<3+2; i++) 01802 log_nom+=lgamma(m[i]+1); 01803 log_nom-=lgamma(n+1.0); 01804 01805 float64_t log_denomf=0; 01806 floatmax_t log_denom=0; 01807 01808 for (int32_t i=0; i<3*2; i++) 01809 { 01810 log_denom+=lgammal((floatmax_t)table.matrix[i]+1); 01811 log_denomf+=lgammal((floatmax_t)table.matrix[i]+1); 01812 } 01813 01814 floatmax_t prob_table_log=log_nom-log_denom; 01815 01816 int32_t dim1=CMath::min(m[0], m[2]); 01817 01818 //traverse all possible tables with given m 01819 int32_t counter=0; 01820 for (int32_t k=0; k<=dim1; k++) 01821 { 01822 for (int32_t l=CMath::max(0.0, m[0]-m[4]-k); 01823 l<=CMath::min(m[0]-k, m[3]); l++) 01824 { 01825 x[0+0*2+counter*2*3]=k; 01826 x[0+1*2+counter*2*3]=l; 01827 x[0+2*2+counter*2*3]=m[0]-x[0+0*2+counter*2*3]-x[0+1*2+counter*2*3]; 01828 x[1+0*2+counter*2*3]=m[2]-x[0+0*2+counter*2*3]; 01829 x[1+1*2+counter*2*3]=m[3]-x[0+1*2+counter*2*3]; 01830 x[1+2*2+counter*2*3]=m[4]-x[0+2*2+counter*2*3]; 01831 01832 counter++; 01833 } 01834 } 01835 01836 //#define DEBUG_FISHER_TABLE 01837 #ifdef DEBUG_FISHER_TABLE 01838 SG_SPRINT("counter=%d\n", counter); 01839 SG_SPRINT("dim1=%d\n", dim1); 01840 SG_SPRINT("l=%g...%g\n", CMath::max(0.0,m[0]-m[4]-0), CMath::min(m[0]-0, m[3])); 01841 SG_SPRINT("n=%g\n", n); 01842 SG_SPRINT("prob_table_log=%.18Lg\n", prob_table_log); 01843 SG_SPRINT("log_denomf=%.18g\n", log_denomf); 01844 SG_SPRINT("log_denom=%.18Lg\n", log_denom); 01845 SG_SPRINT("log_nom=%.18g\n", log_nom); 01846 display_vector(m, m_len, "marginals"); 01847 display_vector(x, 2*3*counter, "x"); 01848 #endif // DEBUG_FISHER_TABLE 01849 01850 floatmax_t* log_denom_vec=SG_MALLOC(floatmax_t, counter); 01851 SGVector<floatmax_t>::fill_vector(log_denom_vec, counter, (floatmax_t)0.0); 01852 01853 for (int32_t k=0; k<counter; k++) 01854 { 01855 for (int32_t j=0; j<3; j++) 01856 { 01857 for (int32_t i=0; i<2; i++) 01858 log_denom_vec[k]+=lgammal(x[i+j*2+k*2*3]+1.0); 01859 } 01860 } 01861 01862 for (int32_t i=0; i<counter; i++) 01863 log_denom_vec[i]=log_nom-log_denom_vec[i]; 01864 01865 #ifdef DEBUG_FISHER_TABLE 01866 display_vector(log_denom_vec, counter, "log_denom_vec"); 01867 #endif // DEBUG_FISHER_TABLE 01868 01869 float64_t nonrand_p=-CMath::INFTY; 01870 for (int32_t i=0; i<counter; i++) 01871 { 01872 if (log_denom_vec[i]<=prob_table_log) 01873 nonrand_p=CMath::logarithmic_sum(nonrand_p, log_denom_vec[i]); 01874 } 01875 01876 #ifdef DEBUG_FISHER_TABLE 01877 SG_SPRINT("nonrand_p=%.18g\n", nonrand_p); 01878 SG_SPRINT("exp_nonrand_p=%.18g\n", CMath::exp(nonrand_p)); 01879 #endif // DEBUG_FISHER_TABLE 01880 nonrand_p=CMath::exp(nonrand_p); 01881 01882 SG_FREE(log_denom_vec); 01883 SG_FREE(x); 01884 SG_FREE(m); 01885 01886 return nonrand_p; 01887 } 01888 01889 float64_t CStatistics::mutual_info(float64_t* p1, float64_t* p2, int32_t len) 01890 { 01891 double e=0; 01892 01893 for (int32_t i=0; i<len; i++) 01894 for (int32_t j=0; j<len; j++) 01895 e+=exp(p2[j*len+i])*(p2[j*len+i]-p1[i]-p1[j]); 01896 01897 return (float64_t)e; 01898 } 01899 01900 float64_t CStatistics::relative_entropy(float64_t* p, float64_t* q, int32_t len) 01901 { 01902 double e=0; 01903 01904 for (int32_t i=0; i<len; i++) 01905 e+=exp(p[i])*(p[i]-q[i]); 01906 01907 return (float64_t)e; 01908 } 01909 01910 float64_t CStatistics::entropy(float64_t* p, int32_t len) 01911 { 01912 double e=0; 01913 01914 for (int32_t i=0; i<len; i++) 01915 e-=exp(p[i])*p[i]; 01916 01917 return (float64_t)e; 01918 } 01919 01920 SGVector<int32_t> CStatistics::sample_indices(int32_t sample_size, int32_t N) 01921 { 01922 REQUIRE(sample_size<N, 01923 "sample size should be less than number of indices\n"); 01924 int32_t* idxs=SG_MALLOC(int32_t,N); 01925 int32_t i, rnd; 01926 int32_t* permuted_idxs=SG_MALLOC(int32_t,sample_size); 01927 01928 // reservoir sampling 01929 for (i=0; i<N; i++) 01930 idxs[i]=i; 01931 for (i=0; i<sample_size; i++) 01932 permuted_idxs[i]=idxs[i]; 01933 for (i=sample_size; i<N; i++) 01934 { 01935 rnd=CMath::random(1, i); 01936 if (rnd<sample_size) 01937 permuted_idxs[rnd]=idxs[i]; 01938 } 01939 SG_FREE(idxs); 01940 01941 CMath::qsort(permuted_idxs, sample_size); 01942 return SGVector<int32_t>(permuted_idxs, sample_size); 01943 } 01944 01945 float64_t CStatistics::dlgamma(float64_t x) 01946 { 01947 x = x+6.0; 01948 float64_t df = 1./(x*x); 01949 df = (((df/240-0.003968253986254)*df+1/120.0)*df-1/120.0)*df; 01950 df = df+log(x)-0.5/x-1.0/(x-1.0)-1.0/(x-2.0)-1.0/ 01951 (x-3.0)-1.0/(x-4.0)-1.0/(x-5.0)-1.0/(x-6.0); 01952 return df; 01953 } 01954