1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat; 18 19 import org.apache.commons.math.MathRuntimeException; 20 import org.apache.commons.math.stat.descriptive.UnivariateStatistic; 21 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 22 import org.apache.commons.math.stat.descriptive.moment.Mean; 23 import org.apache.commons.math.stat.descriptive.moment.Variance; 24 import org.apache.commons.math.stat.descriptive.rank.Max; 25 import org.apache.commons.math.stat.descriptive.rank.Min; 26 import org.apache.commons.math.stat.descriptive.rank.Percentile; 27 import org.apache.commons.math.stat.descriptive.summary.Product; 28 import org.apache.commons.math.stat.descriptive.summary.Sum; 29 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 30 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 31 32 /** 33 * StatUtils provides static methods for computing statistics based on data 34 * stored in double[] arrays. 35 * 36 * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $ 37 */ 38 public final class StatUtils { 39 40 /** sum */ 41 private static final UnivariateStatistic sum = new Sum(); 42 43 /** sumSq */ 44 private static final UnivariateStatistic sumSq = new SumOfSquares(); 45 46 /** prod */ 47 private static final UnivariateStatistic prod = new Product(); 48 49 /** sumLog */ 50 private static final UnivariateStatistic sumLog = new SumOfLogs(); 51 52 /** min */ 53 private static final UnivariateStatistic min = new Min(); 54 55 /** max */ 56 private static final UnivariateStatistic max = new Max(); 57 58 /** mean */ 59 private static final UnivariateStatistic mean = new Mean(); 60 61 /** variance */ 62 private static final Variance variance = new Variance(); 63 64 /** percentile */ 65 private static final Percentile percentile = new Percentile(); 66 67 /** geometric mean */ 68 private static final GeometricMean geometricMean = new GeometricMean(); 69 70 /** 71 * Private Constructor 72 */ 73 private StatUtils() { 74 } 75 76 /** 77 * Returns the sum of the values in the input array, or 78 * <code>Double.NaN</code> if the array is empty. 79 * <p> 80 * Throws <code>IllegalArgumentException</code> if the input array 81 * is null.</p> 82 * 83 * @param values array of values to sum 84 * @return the sum of the values or <code>Double.NaN</code> if the array 85 * is empty 86 * @throws IllegalArgumentException if the array is null 87 */ 88 public static double sum(final double[] values) { 89 return sum.evaluate(values); 90 } 91 92 /** 93 * Returns the sum of the entries in the specified portion of 94 * the input array, or <code>Double.NaN</code> if the designated subarray 95 * is empty. 96 * <p> 97 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 98 * 99 * @param values the input array 100 * @param begin index of the first array element to include 101 * @param length the number of elements to include 102 * @return the sum of the values or Double.NaN if length = 0 103 * @throws IllegalArgumentException if the array is null or the array index 104 * parameters are not valid 105 */ 106 public static double sum(final double[] values, final int begin, 107 final int length) { 108 return sum.evaluate(values, begin, length); 109 } 110 111 /** 112 * Returns the sum of the squares of the entries in the input array, or 113 * <code>Double.NaN</code> if the array is empty. 114 * <p> 115 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 116 * 117 * @param values input array 118 * @return the sum of the squared values or <code>Double.NaN</code> if the 119 * array is empty 120 * @throws IllegalArgumentException if the array is null 121 */ 122 public static double sumSq(final double[] values) { 123 return sumSq.evaluate(values); 124 } 125 126 /** 127 * Returns the sum of the squares of the entries in the specified portion of 128 * the input array, or <code>Double.NaN</code> if the designated subarray 129 * is empty. 130 * <p> 131 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 132 * 133 * @param values the input array 134 * @param begin index of the first array element to include 135 * @param length the number of elements to include 136 * @return the sum of the squares of the values or Double.NaN if length = 0 137 * @throws IllegalArgumentException if the array is null or the array index 138 * parameters are not valid 139 */ 140 public static double sumSq(final double[] values, final int begin, 141 final int length) { 142 return sumSq.evaluate(values, begin, length); 143 } 144 145 /** 146 * Returns the product of the entries in the input array, or 147 * <code>Double.NaN</code> if the array is empty. 148 * <p> 149 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 150 * 151 * @param values the input array 152 * @return the product of the values or Double.NaN if the array is empty 153 * @throws IllegalArgumentException if the array is null 154 */ 155 public static double product(final double[] values) { 156 return prod.evaluate(values); 157 } 158 159 /** 160 * Returns the product of the entries in the specified portion of 161 * the input array, or <code>Double.NaN</code> if the designated subarray 162 * is empty. 163 * <p> 164 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 165 * 166 * @param values the input array 167 * @param begin index of the first array element to include 168 * @param length the number of elements to include 169 * @return the product of the values or Double.NaN if length = 0 170 * @throws IllegalArgumentException if the array is null or the array index 171 * parameters are not valid 172 */ 173 public static double product(final double[] values, final int begin, 174 final int length) { 175 return prod.evaluate(values, begin, length); 176 } 177 178 /** 179 * Returns the sum of the natural logs of the entries in the input array, or 180 * <code>Double.NaN</code> if the array is empty. 181 * <p> 182 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 183 * <p> 184 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. 185 * </p> 186 * 187 * @param values the input array 188 * @return the sum of the natural logs of the values or Double.NaN if 189 * the array is empty 190 * @throws IllegalArgumentException if the array is null 191 */ 192 public static double sumLog(final double[] values) { 193 return sumLog.evaluate(values); 194 } 195 196 /** 197 * Returns the sum of the natural logs of the entries in the specified portion of 198 * the input array, or <code>Double.NaN</code> if the designated subarray 199 * is empty. 200 * <p> 201 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 202 * <p> 203 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}. 204 * </p> 205 * 206 * @param values the input array 207 * @param begin index of the first array element to include 208 * @param length the number of elements to include 209 * @return the sum of the natural logs of the values or Double.NaN if 210 * length = 0 211 * @throws IllegalArgumentException if the array is null or the array index 212 * parameters are not valid 213 */ 214 public static double sumLog(final double[] values, final int begin, 215 final int length) { 216 return sumLog.evaluate(values, begin, length); 217 } 218 219 /** 220 * Returns the arithmetic mean of the entries in the input array, or 221 * <code>Double.NaN</code> if the array is empty. 222 * <p> 223 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 224 * <p> 225 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for 226 * details on the computing algorithm.</p> 227 * 228 * @param values the input array 229 * @return the mean of the values or Double.NaN if the array is empty 230 * @throws IllegalArgumentException if the array is null 231 */ 232 public static double mean(final double[] values) { 233 return mean.evaluate(values); 234 } 235 236 /** 237 * Returns the arithmetic mean of the entries in the specified portion of 238 * the input array, or <code>Double.NaN</code> if the designated subarray 239 * is empty. 240 * <p> 241 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 242 * <p> 243 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for 244 * details on the computing algorithm.</p> 245 * 246 * @param values the input array 247 * @param begin index of the first array element to include 248 * @param length the number of elements to include 249 * @return the mean of the values or Double.NaN if length = 0 250 * @throws IllegalArgumentException if the array is null or the array index 251 * parameters are not valid 252 */ 253 public static double mean(final double[] values, final int begin, 254 final int length) { 255 return mean.evaluate(values, begin, length); 256 } 257 258 /** 259 * Returns the geometric mean of the entries in the input array, or 260 * <code>Double.NaN</code> if the array is empty. 261 * <p> 262 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 263 * <p> 264 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} 265 * for details on the computing algorithm.</p> 266 * 267 * @param values the input array 268 * @return the geometric mean of the values or Double.NaN if the array is empty 269 * @throws IllegalArgumentException if the array is null 270 */ 271 public static double geometricMean(final double[] values) { 272 return geometricMean.evaluate(values); 273 } 274 275 /** 276 * Returns the geometric mean of the entries in the specified portion of 277 * the input array, or <code>Double.NaN</code> if the designated subarray 278 * is empty. 279 * <p> 280 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 281 * <p> 282 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean} 283 * for details on the computing algorithm.</p> 284 * 285 * @param values the input array 286 * @param begin index of the first array element to include 287 * @param length the number of elements to include 288 * @return the geometric mean of the values or Double.NaN if length = 0 289 * @throws IllegalArgumentException if the array is null or the array index 290 * parameters are not valid 291 */ 292 public static double geometricMean(final double[] values, final int begin, 293 final int length) { 294 return geometricMean.evaluate(values, begin, length); 295 } 296 297 298 /** 299 * Returns the variance of the entries in the input array, or 300 * <code>Double.NaN</code> if the array is empty. 301 * <p> 302 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 303 * details on the computing algorithm.</p> 304 * <p> 305 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 306 * <p> 307 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 308 * 309 * @param values the input array 310 * @return the variance of the values or Double.NaN if the array is empty 311 * @throws IllegalArgumentException if the array is null 312 */ 313 public static double variance(final double[] values) { 314 return variance.evaluate(values); 315 } 316 317 /** 318 * Returns the variance of the entries in the specified portion of 319 * the input array, or <code>Double.NaN</code> if the designated subarray 320 * is empty. 321 * <p> 322 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 323 * details on the computing algorithm.</p> 324 * <p> 325 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 326 * <p> 327 * Throws <code>IllegalArgumentException</code> if the array is null or the 328 * array index parameters are not valid.</p> 329 * 330 * @param values the input array 331 * @param begin index of the first array element to include 332 * @param length the number of elements to include 333 * @return the variance of the values or Double.NaN if length = 0 334 * @throws IllegalArgumentException if the array is null or the array index 335 * parameters are not valid 336 */ 337 public static double variance(final double[] values, final int begin, 338 final int length) { 339 return variance.evaluate(values, begin, length); 340 } 341 342 /** 343 * Returns the variance of the entries in the specified portion of 344 * the input array, using the precomputed mean value. Returns 345 * <code>Double.NaN</code> if the designated subarray is empty. 346 * <p> 347 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 348 * details on the computing algorithm.</p> 349 * <p> 350 * The formula used assumes that the supplied mean value is the arithmetic 351 * mean of the sample data, not a known population parameter. This method 352 * is supplied only to save computation when the mean has already been 353 * computed.</p> 354 * <p> 355 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 356 * <p> 357 * Throws <code>IllegalArgumentException</code> if the array is null or the 358 * array index parameters are not valid.</p> 359 * 360 * @param values the input array 361 * @param mean the precomputed mean value 362 * @param begin index of the first array element to include 363 * @param length the number of elements to include 364 * @return the variance of the values or Double.NaN if length = 0 365 * @throws IllegalArgumentException if the array is null or the array index 366 * parameters are not valid 367 */ 368 public static double variance(final double[] values, final double mean, 369 final int begin, final int length) { 370 return variance.evaluate(values, mean, begin, length); 371 } 372 373 /** 374 * Returns the variance of the entries in the input array, using the 375 * precomputed mean value. Returns <code>Double.NaN</code> if the array 376 * is empty. 377 * <p> 378 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for 379 * details on the computing algorithm.</p> 380 * <p> 381 * The formula used assumes that the supplied mean value is the arithmetic 382 * mean of the sample data, not a known population parameter. This method 383 * is supplied only to save computation when the mean has already been 384 * computed.</p> 385 * <p> 386 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 387 * <p> 388 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 389 * 390 * @param values the input array 391 * @param mean the precomputed mean value 392 * @return the variance of the values or Double.NaN if the array is empty 393 * @throws IllegalArgumentException if the array is null 394 */ 395 public static double variance(final double[] values, final double mean) { 396 return variance.evaluate(values, mean); 397 } 398 399 /** 400 * Returns the maximum of the entries in the input array, or 401 * <code>Double.NaN</code> if the array is empty. 402 * <p> 403 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 404 * <p> 405 * <ul> 406 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 407 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 408 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 409 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 410 * </ul></p> 411 * 412 * @param values the input array 413 * @return the maximum of the values or Double.NaN if the array is empty 414 * @throws IllegalArgumentException if the array is null 415 */ 416 public static double max(final double[] values) { 417 return max.evaluate(values); 418 } 419 420 /** 421 * Returns the maximum of the entries in the specified portion of 422 * the input array, or <code>Double.NaN</code> if the designated subarray 423 * is empty. 424 * <p> 425 * Throws <code>IllegalArgumentException</code> if the array is null or 426 * the array index parameters are not valid.</p> 427 * <p> 428 * <ul> 429 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 430 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 431 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>, 432 * the result is <code>Double.POSITIVE_INFINITY.</code></li> 433 * </ul></p> 434 * 435 * @param values the input array 436 * @param begin index of the first array element to include 437 * @param length the number of elements to include 438 * @return the maximum of the values or Double.NaN if length = 0 439 * @throws IllegalArgumentException if the array is null or the array index 440 * parameters are not valid 441 */ 442 public static double max(final double[] values, final int begin, 443 final int length) { 444 return max.evaluate(values, begin, length); 445 } 446 447 /** 448 * Returns the minimum of the entries in the input array, or 449 * <code>Double.NaN</code> if the array is empty. 450 * <p> 451 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 452 * <p> 453 * <ul> 454 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 455 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 456 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 457 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 458 * </ul> </p> 459 * 460 * @param values the input array 461 * @return the minimum of the values or Double.NaN if the array is empty 462 * @throws IllegalArgumentException if the array is null 463 */ 464 public static double min(final double[] values) { 465 return min.evaluate(values); 466 } 467 468 /** 469 * Returns the minimum of the entries in the specified portion of 470 * the input array, or <code>Double.NaN</code> if the designated subarray 471 * is empty. 472 * <p> 473 * Throws <code>IllegalArgumentException</code> if the array is null or 474 * the array index parameters are not valid.</p> 475 * <p> 476 * <ul> 477 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code> 478 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li> 479 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>, 480 * the result is <code>Double.NEGATIVE_INFINITY.</code></li> 481 * </ul></p> 482 * 483 * @param values the input array 484 * @param begin index of the first array element to include 485 * @param length the number of elements to include 486 * @return the minimum of the values or Double.NaN if length = 0 487 * @throws IllegalArgumentException if the array is null or the array index 488 * parameters are not valid 489 */ 490 public static double min(final double[] values, final int begin, 491 final int length) { 492 return min.evaluate(values, begin, length); 493 } 494 495 /** 496 * Returns an estimate of the <code>p</code>th percentile of the values 497 * in the <code>values</code> array. 498 * <p> 499 * <ul> 500 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 501 * <code>0</code></li></p> 502 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code> 503 * if <code>values</code> has length <code>1</code></li> 504 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 505 * is null or p is not a valid quantile value (p must be greater than 0 506 * and less than or equal to 100)</li> 507 * </ul></p> 508 * <p> 509 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for 510 * a description of the percentile estimation algorithm used.</p> 511 * 512 * @param values input array of values 513 * @param p the percentile value to compute 514 * @return the percentile value or Double.NaN if the array is empty 515 * @throws IllegalArgumentException if <code>values</code> is null 516 * or p is invalid 517 */ 518 public static double percentile(final double[] values, final double p) { 519 return percentile.evaluate(values,p); 520 } 521 522 /** 523 * Returns an estimate of the <code>p</code>th percentile of the values 524 * in the <code>values</code> array, starting with the element in (0-based) 525 * position <code>begin</code> in the array and including <code>length</code> 526 * values. 527 * <p> 528 * <ul> 529 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li> 530 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code> 531 * if <code>length = 1 </code></li> 532 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code> 533 * is null , <code>begin</code> or <code>length</code> is invalid, or 534 * <code>p</code> is not a valid quantile value (p must be greater than 0 535 * and less than or equal to 100)</li> 536 * </ul></p> 537 * <p> 538 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for 539 * a description of the percentile estimation algorithm used.</p> 540 * 541 * @param values array of input values 542 * @param p the percentile to compute 543 * @param begin the first (0-based) element to include in the computation 544 * @param length the number of array elements to include 545 * @return the percentile value 546 * @throws IllegalArgumentException if the parameters are not valid or the 547 * input array is null 548 */ 549 public static double percentile(final double[] values, final int begin, 550 final int length, final double p) { 551 return percentile.evaluate(values, begin, length, p); 552 } 553 554 /** 555 * Returns the sum of the (signed) differences between corresponding elements of the 556 * input arrays -- i.e., sum(sample1[i] - sample2[i]). 557 * 558 * @param sample1 the first array 559 * @param sample2 the second array 560 * @return sum of paired differences 561 * @throws IllegalArgumentException if the arrays do not have the same 562 * (positive) length 563 */ 564 public static double sumDifference(final double[] sample1, final double[] sample2) 565 throws IllegalArgumentException { 566 int n = sample1.length; 567 if ((n != sample2.length) || (n < 1)) { 568 throw MathRuntimeException.createIllegalArgumentException( 569 "input arrays must have the same positive length ({0} and {1})", 570 n, sample2.length); 571 } 572 double result = 0; 573 for (int i = 0; i < n; i++) { 574 result += sample1[i] - sample2[i]; 575 } 576 return result; 577 } 578 579 /** 580 * Returns the mean of the (signed) differences between corresponding elements of the 581 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length. 582 * 583 * @param sample1 the first array 584 * @param sample2 the second array 585 * @return mean of paired differences 586 * @throws IllegalArgumentException if the arrays do not have the same 587 * (positive) length 588 */ 589 public static double meanDifference(final double[] sample1, final double[] sample2) 590 throws IllegalArgumentException { 591 return sumDifference(sample1, sample2) / sample1.length; 592 } 593 594 /** 595 * Returns the variance of the (signed) differences between corresponding elements of the 596 * input arrays -- i.e., var(sample1[i] - sample2[i]). 597 * 598 * @param sample1 the first array 599 * @param sample2 the second array 600 * @param meanDifference the mean difference between corresponding entries 601 * @see #meanDifference(double[],double[]) 602 * @return variance of paired differences 603 * @throws IllegalArgumentException if the arrays do not have the same 604 * length or their common length is less than 2. 605 */ 606 public static double varianceDifference(final double[] sample1, final double[] sample2, 607 double meanDifference) throws IllegalArgumentException { 608 double sum1 = 0d; 609 double sum2 = 0d; 610 double diff = 0d; 611 int n = sample1.length; 612 if (n < 2 || n != sample2.length) { 613 throw MathRuntimeException.createIllegalArgumentException( 614 "input arrays must have the same length and at least two elements ({0} and {1})", 615 n, sample2.length); 616 } 617 for (int i = 0; i < n; i++) { 618 diff = sample1[i] - sample2[i]; 619 sum1 += (diff - meanDifference) *(diff - meanDifference); 620 sum2 += diff - meanDifference; 621 } 622 return (sum1 - (sum2 * sum2 / n)) / (n - 1); 623 } 624 625 }