001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive; 018 019 import java.io.Serializable; 020 021 import org.apache.commons.math.MathRuntimeException; 022 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 023 import org.apache.commons.math.stat.descriptive.moment.Mean; 024 import org.apache.commons.math.stat.descriptive.moment.SecondMoment; 025 import org.apache.commons.math.stat.descriptive.moment.Variance; 026 import org.apache.commons.math.stat.descriptive.rank.Max; 027 import org.apache.commons.math.stat.descriptive.rank.Min; 028 import org.apache.commons.math.stat.descriptive.summary.Sum; 029 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 030 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 031 import org.apache.commons.math.util.MathUtils; 032 033 /** 034 * <p> 035 * Computes summary statistics for a stream of data values added using the 036 * {@link #addValue(double) addValue} method. The data values are not stored in 037 * memory, so this class can be used to compute statistics for very large data 038 * streams. 039 * </p> 040 * <p> 041 * The {@link StorelessUnivariateStatistic} instances used to maintain summary 042 * state and compute statistics are configurable via setters. For example, the 043 * default implementation for the variance can be overridden by calling 044 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to 045 * these methods must implement the {@link StorelessUnivariateStatistic} 046 * interface and configuration must be completed before <code>addValue</code> 047 * is called. No configuration is necessary to use the default, commons-math 048 * provided implementations. 049 * </p> 050 * <p> 051 * Note: This class is not thread-safe. Use 052 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple 053 * threads is required. 054 * </p> 055 * @version $Revision: 791728 $ $Date: 2009-07-07 03:17:50 -0400 (Tue, 07 Jul 2009) $ 056 */ 057 public class SummaryStatistics implements StatisticalSummary, Serializable { 058 059 /** Serialization UID */ 060 private static final long serialVersionUID = -2021321786743555871L; 061 062 /** 063 * Construct a SummaryStatistics instance 064 */ 065 public SummaryStatistics() { 066 } 067 068 /** 069 * A copy constructor. Creates a deep-copy of the {@code original}. 070 * 071 * @param original the {@code SummaryStatistics} instance to copy 072 */ 073 public SummaryStatistics(SummaryStatistics original) { 074 copy(original, this); 075 } 076 077 /** count of values that have been added */ 078 protected long n = 0; 079 080 /** SecondMoment is used to compute the mean and variance */ 081 protected SecondMoment secondMoment = new SecondMoment(); 082 083 /** sum of values that have been added */ 084 protected Sum sum = new Sum(); 085 086 /** sum of the square of each value that has been added */ 087 protected SumOfSquares sumsq = new SumOfSquares(); 088 089 /** min of values that have been added */ 090 protected Min min = new Min(); 091 092 /** max of values that have been added */ 093 protected Max max = new Max(); 094 095 /** sumLog of values that have been added */ 096 protected SumOfLogs sumLog = new SumOfLogs(); 097 098 /** geoMean of values that have been added */ 099 protected GeometricMean geoMean = new GeometricMean(sumLog); 100 101 /** mean of values that have been added */ 102 protected Mean mean = new Mean(); 103 104 /** variance of values that have been added */ 105 protected Variance variance = new Variance(); 106 107 /** Sum statistic implementation - can be reset by setter. */ 108 private StorelessUnivariateStatistic sumImpl = sum; 109 110 /** Sum of squares statistic implementation - can be reset by setter. */ 111 private StorelessUnivariateStatistic sumsqImpl = sumsq; 112 113 /** Minimum statistic implementation - can be reset by setter. */ 114 private StorelessUnivariateStatistic minImpl = min; 115 116 /** Maximum statistic implementation - can be reset by setter. */ 117 private StorelessUnivariateStatistic maxImpl = max; 118 119 /** Sum of log statistic implementation - can be reset by setter. */ 120 private StorelessUnivariateStatistic sumLogImpl = sumLog; 121 122 /** Geometric mean statistic implementation - can be reset by setter. */ 123 private StorelessUnivariateStatistic geoMeanImpl = geoMean; 124 125 /** Mean statistic implementation - can be reset by setter. */ 126 private StorelessUnivariateStatistic meanImpl = mean; 127 128 /** Variance statistic implementation - can be reset by setter. */ 129 private StorelessUnivariateStatistic varianceImpl = variance; 130 131 /** 132 * Return a {@link StatisticalSummaryValues} instance reporting current 133 * statistics. 134 * @return Current values of statistics 135 */ 136 public StatisticalSummary getSummary() { 137 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 138 getMax(), getMin(), getSum()); 139 } 140 141 /** 142 * Add a value to the data 143 * @param value the value to add 144 */ 145 public void addValue(double value) { 146 sumImpl.increment(value); 147 sumsqImpl.increment(value); 148 minImpl.increment(value); 149 maxImpl.increment(value); 150 sumLogImpl.increment(value); 151 secondMoment.increment(value); 152 // If mean, variance or geomean have been overridden, 153 // need to increment these 154 if (!(meanImpl instanceof Mean)) { 155 meanImpl.increment(value); 156 } 157 if (!(varianceImpl instanceof Variance)) { 158 varianceImpl.increment(value); 159 } 160 if (!(geoMeanImpl instanceof GeometricMean)) { 161 geoMeanImpl.increment(value); 162 } 163 n++; 164 } 165 166 /** 167 * Returns the number of available values 168 * @return The number of available values 169 */ 170 public long getN() { 171 return n; 172 } 173 174 /** 175 * Returns the sum of the values that have been added 176 * @return The sum or <code>Double.NaN</code> if no values have been added 177 */ 178 public double getSum() { 179 return sumImpl.getResult(); 180 } 181 182 /** 183 * Returns the sum of the squares of the values that have been added. 184 * <p> 185 * Double.NaN is returned if no values have been added. 186 * </p> 187 * @return The sum of squares 188 */ 189 public double getSumsq() { 190 return sumsqImpl.getResult(); 191 } 192 193 /** 194 * Returns the mean of the values that have been added. 195 * <p> 196 * Double.NaN is returned if no values have been added. 197 * </p> 198 * @return the mean 199 */ 200 public double getMean() { 201 if (mean == meanImpl) { 202 return new Mean(secondMoment).getResult(); 203 } else { 204 return meanImpl.getResult(); 205 } 206 } 207 208 /** 209 * Returns the standard deviation of the values that have been added. 210 * <p> 211 * Double.NaN is returned if no values have been added. 212 * </p> 213 * @return the standard deviation 214 */ 215 public double getStandardDeviation() { 216 double stdDev = Double.NaN; 217 if (getN() > 0) { 218 if (getN() > 1) { 219 stdDev = Math.sqrt(getVariance()); 220 } else { 221 stdDev = 0.0; 222 } 223 } 224 return (stdDev); 225 } 226 227 /** 228 * Returns the variance of the values that have been added. 229 * <p> 230 * Double.NaN is returned if no values have been added. 231 * </p> 232 * @return the variance 233 */ 234 public double getVariance() { 235 if (varianceImpl == variance) { 236 return new Variance(secondMoment).getResult(); 237 } else { 238 return varianceImpl.getResult(); 239 } 240 } 241 242 /** 243 * Returns the maximum of the values that have been added. 244 * <p> 245 * Double.NaN is returned if no values have been added. 246 * </p> 247 * @return the maximum 248 */ 249 public double getMax() { 250 return maxImpl.getResult(); 251 } 252 253 /** 254 * Returns the minimum of the values that have been added. 255 * <p> 256 * Double.NaN is returned if no values have been added. 257 * </p> 258 * @return the minimum 259 */ 260 public double getMin() { 261 return minImpl.getResult(); 262 } 263 264 /** 265 * Returns the geometric mean of the values that have been added. 266 * <p> 267 * Double.NaN is returned if no values have been added. 268 * </p> 269 * @return the geometric mean 270 */ 271 public double getGeometricMean() { 272 return geoMeanImpl.getResult(); 273 } 274 275 /** 276 * Returns the sum of the logs of the values that have been added. 277 * <p> 278 * Double.NaN is returned if no values have been added. 279 * </p> 280 * @return the sum of logs 281 * @since 1.2 282 */ 283 public double getSumOfLogs() { 284 return sumLogImpl.getResult(); 285 } 286 287 /** 288 * Returns a statistic related to the Second Central Moment. Specifically, 289 * what is returned is the sum of squared deviations from the sample mean 290 * among the values that have been added. 291 * <p> 292 * Returns <code>Double.NaN</code> if no data values have been added and 293 * returns <code>0</code> if there is just one value in the data set.</p> 294 * <p> 295 * @return second central moment statistic 296 * @since 2.0 297 */ 298 public double getSecondMoment() { 299 return secondMoment.getResult(); 300 } 301 302 /** 303 * Generates a text report displaying summary statistics from values that 304 * have been added. 305 * @return String with line feeds displaying statistics 306 * @since 1.2 307 */ 308 @Override 309 public String toString() { 310 StringBuffer outBuffer = new StringBuffer(); 311 String endl = "\n"; 312 outBuffer.append("SummaryStatistics:").append(endl); 313 outBuffer.append("n: ").append(getN()).append(endl); 314 outBuffer.append("min: ").append(getMin()).append(endl); 315 outBuffer.append("max: ").append(getMax()).append(endl); 316 outBuffer.append("mean: ").append(getMean()).append(endl); 317 outBuffer.append("geometric mean: ").append(getGeometricMean()) 318 .append(endl); 319 outBuffer.append("variance: ").append(getVariance()).append(endl); 320 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl); 321 outBuffer.append("standard deviation: ").append(getStandardDeviation()) 322 .append(endl); 323 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl); 324 return outBuffer.toString(); 325 } 326 327 /** 328 * Resets all statistics and storage 329 */ 330 public void clear() { 331 this.n = 0; 332 minImpl.clear(); 333 maxImpl.clear(); 334 sumImpl.clear(); 335 sumLogImpl.clear(); 336 sumsqImpl.clear(); 337 geoMeanImpl.clear(); 338 secondMoment.clear(); 339 if (meanImpl != mean) { 340 meanImpl.clear(); 341 } 342 if (varianceImpl != variance) { 343 varianceImpl.clear(); 344 } 345 } 346 347 /** 348 * Returns true iff <code>object</code> is a 349 * <code>SummaryStatistics</code> instance and all statistics have the 350 * same values as this. 351 * @param object the object to test equality against. 352 * @return true if object equals this 353 */ 354 @Override 355 public boolean equals(Object object) { 356 if (object == this) { 357 return true; 358 } 359 if (object instanceof SummaryStatistics == false) { 360 return false; 361 } 362 SummaryStatistics stat = (SummaryStatistics)object; 363 return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) && 364 MathUtils.equals(stat.getMax(), this.getMax()) && 365 MathUtils.equals(stat.getMean(), this.getMean()) && 366 MathUtils.equals(stat.getMin(), this.getMin()) && 367 MathUtils.equals(stat.getN(), this.getN()) && 368 MathUtils.equals(stat.getSum(), this.getSum()) && 369 MathUtils.equals(stat.getSumsq(), this.getSumsq()) && 370 MathUtils.equals(stat.getVariance(), 371 this.getVariance())); 372 } 373 374 /** 375 * Returns hash code based on values of statistics 376 * @return hash code 377 */ 378 @Override 379 public int hashCode() { 380 int result = 31 + MathUtils.hash(getGeometricMean()); 381 result = result * 31 + MathUtils.hash(getGeometricMean()); 382 result = result * 31 + MathUtils.hash(getMax()); 383 result = result * 31 + MathUtils.hash(getMean()); 384 result = result * 31 + MathUtils.hash(getMin()); 385 result = result * 31 + MathUtils.hash(getN()); 386 result = result * 31 + MathUtils.hash(getSum()); 387 result = result * 31 + MathUtils.hash(getSumsq()); 388 result = result * 31 + MathUtils.hash(getVariance()); 389 return result; 390 } 391 392 // Getters and setters for statistics implementations 393 /** 394 * Returns the currently configured Sum implementation 395 * @return the StorelessUnivariateStatistic implementing the sum 396 * @since 1.2 397 */ 398 public StorelessUnivariateStatistic getSumImpl() { 399 return sumImpl; 400 } 401 402 /** 403 * <p> 404 * Sets the implementation for the Sum. 405 * </p> 406 * <p> 407 * This method must be activated before any data has been added - i.e., 408 * before {@link #addValue(double) addValue} has been used to add data; 409 * otherwise an IllegalStateException will be thrown. 410 * </p> 411 * @param sumImpl the StorelessUnivariateStatistic instance to use for 412 * computing the Sum 413 * @throws IllegalStateException if data has already been added (i.e if n > 414 * 0) 415 * @since 1.2 416 */ 417 public void setSumImpl(StorelessUnivariateStatistic sumImpl) { 418 checkEmpty(); 419 this.sumImpl = sumImpl; 420 } 421 422 /** 423 * Returns the currently configured sum of squares implementation 424 * @return the StorelessUnivariateStatistic implementing the sum of squares 425 * @since 1.2 426 */ 427 public StorelessUnivariateStatistic getSumsqImpl() { 428 return sumsqImpl; 429 } 430 431 /** 432 * <p> 433 * Sets the implementation for the sum of squares. 434 * </p> 435 * <p> 436 * This method must be activated before any data has been added - i.e., 437 * before {@link #addValue(double) addValue} has been used to add data; 438 * otherwise an IllegalStateException will be thrown. 439 * </p> 440 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for 441 * computing the sum of squares 442 * @throws IllegalStateException if data has already been added (i.e if n > 443 * 0) 444 * @since 1.2 445 */ 446 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) { 447 checkEmpty(); 448 this.sumsqImpl = sumsqImpl; 449 } 450 451 /** 452 * Returns the currently configured minimum implementation 453 * @return the StorelessUnivariateStatistic implementing the minimum 454 * @since 1.2 455 */ 456 public StorelessUnivariateStatistic getMinImpl() { 457 return minImpl; 458 } 459 460 /** 461 * <p> 462 * Sets the implementation for the minimum. 463 * </p> 464 * <p> 465 * This method must be activated before any data has been added - i.e., 466 * before {@link #addValue(double) addValue} has been used to add data; 467 * otherwise an IllegalStateException will be thrown. 468 * </p> 469 * @param minImpl the StorelessUnivariateStatistic instance to use for 470 * computing the minimum 471 * @throws IllegalStateException if data has already been added (i.e if n > 472 * 0) 473 * @since 1.2 474 */ 475 public void setMinImpl(StorelessUnivariateStatistic minImpl) { 476 checkEmpty(); 477 this.minImpl = minImpl; 478 } 479 480 /** 481 * Returns the currently configured maximum implementation 482 * @return the StorelessUnivariateStatistic implementing the maximum 483 * @since 1.2 484 */ 485 public StorelessUnivariateStatistic getMaxImpl() { 486 return maxImpl; 487 } 488 489 /** 490 * <p> 491 * Sets the implementation for the maximum. 492 * </p> 493 * <p> 494 * This method must be activated before any data has been added - i.e., 495 * before {@link #addValue(double) addValue} has been used to add data; 496 * otherwise an IllegalStateException will be thrown. 497 * </p> 498 * @param maxImpl the StorelessUnivariateStatistic instance to use for 499 * computing the maximum 500 * @throws IllegalStateException if data has already been added (i.e if n > 501 * 0) 502 * @since 1.2 503 */ 504 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) { 505 checkEmpty(); 506 this.maxImpl = maxImpl; 507 } 508 509 /** 510 * Returns the currently configured sum of logs implementation 511 * @return the StorelessUnivariateStatistic implementing the log sum 512 * @since 1.2 513 */ 514 public StorelessUnivariateStatistic getSumLogImpl() { 515 return sumLogImpl; 516 } 517 518 /** 519 * <p> 520 * Sets the implementation for the sum of logs. 521 * </p> 522 * <p> 523 * This method must be activated before any data has been added - i.e., 524 * before {@link #addValue(double) addValue} has been used to add data; 525 * otherwise an IllegalStateException will be thrown. 526 * </p> 527 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for 528 * computing the log sum 529 * @throws IllegalStateException if data has already been added (i.e if n > 530 * 0) 531 * @since 1.2 532 */ 533 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) { 534 checkEmpty(); 535 this.sumLogImpl = sumLogImpl; 536 geoMean.setSumLogImpl(sumLogImpl); 537 } 538 539 /** 540 * Returns the currently configured geometric mean implementation 541 * @return the StorelessUnivariateStatistic implementing the geometric mean 542 * @since 1.2 543 */ 544 public StorelessUnivariateStatistic getGeoMeanImpl() { 545 return geoMeanImpl; 546 } 547 548 /** 549 * <p> 550 * Sets the implementation for the geometric mean. 551 * </p> 552 * <p> 553 * This method must be activated before any data has been added - i.e., 554 * before {@link #addValue(double) addValue} has been used to add data; 555 * otherwise an IllegalStateException will be thrown. 556 * </p> 557 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for 558 * computing the geometric mean 559 * @throws IllegalStateException if data has already been added (i.e if n > 560 * 0) 561 * @since 1.2 562 */ 563 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) { 564 checkEmpty(); 565 this.geoMeanImpl = geoMeanImpl; 566 } 567 568 /** 569 * Returns the currently configured mean implementation 570 * @return the StorelessUnivariateStatistic implementing the mean 571 * @since 1.2 572 */ 573 public StorelessUnivariateStatistic getMeanImpl() { 574 return meanImpl; 575 } 576 577 /** 578 * <p> 579 * Sets the implementation for the mean. 580 * </p> 581 * <p> 582 * This method must be activated before any data has been added - i.e., 583 * before {@link #addValue(double) addValue} has been used to add data; 584 * otherwise an IllegalStateException will be thrown. 585 * </p> 586 * @param meanImpl the StorelessUnivariateStatistic instance to use for 587 * computing the mean 588 * @throws IllegalStateException if data has already been added (i.e if n > 589 * 0) 590 * @since 1.2 591 */ 592 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) { 593 checkEmpty(); 594 this.meanImpl = meanImpl; 595 } 596 597 /** 598 * Returns the currently configured variance implementation 599 * @return the StorelessUnivariateStatistic implementing the variance 600 * @since 1.2 601 */ 602 public StorelessUnivariateStatistic getVarianceImpl() { 603 return varianceImpl; 604 } 605 606 /** 607 * <p> 608 * Sets the implementation for the variance. 609 * </p> 610 * <p> 611 * This method must be activated before any data has been added - i.e., 612 * before {@link #addValue(double) addValue} has been used to add data; 613 * otherwise an IllegalStateException will be thrown. 614 * </p> 615 * @param varianceImpl the StorelessUnivariateStatistic instance to use for 616 * computing the variance 617 * @throws IllegalStateException if data has already been added (i.e if n > 618 * 0) 619 * @since 1.2 620 */ 621 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) { 622 checkEmpty(); 623 this.varianceImpl = varianceImpl; 624 } 625 626 /** 627 * Throws IllegalStateException if n > 0. 628 */ 629 private void checkEmpty() { 630 if (n > 0) { 631 throw MathRuntimeException.createIllegalStateException( 632 "{0} values have been added before statistic is configured", 633 n); 634 } 635 } 636 637 /** 638 * Returns a copy of this SummaryStatistics instance with the same internal state. 639 * 640 * @return a copy of this 641 */ 642 public SummaryStatistics copy() { 643 SummaryStatistics result = new SummaryStatistics(); 644 copy(this, result); 645 return result; 646 } 647 648 /** 649 * Copies source to dest. 650 * <p>Neither source nor dest can be null.</p> 651 * 652 * @param source SummaryStatistics to copy 653 * @param dest SummaryStatistics to copy to 654 * @throws NullPointerException if either source or dest is null 655 */ 656 public static void copy(SummaryStatistics source, SummaryStatistics dest) { 657 dest.maxImpl = source.maxImpl.copy(); 658 dest.meanImpl = source.meanImpl.copy(); 659 dest.minImpl = source.minImpl.copy(); 660 dest.sumImpl = source.sumImpl.copy(); 661 dest.varianceImpl = source.varianceImpl.copy(); 662 dest.sumLogImpl = source.sumLogImpl.copy(); 663 dest.sumsqImpl = source.sumsqImpl.copy(); 664 if (source.getGeoMeanImpl() instanceof GeometricMean) { 665 // Keep geoMeanImpl, sumLogImpl in synch 666 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl); 667 } else { 668 dest.geoMeanImpl = source.geoMeanImpl.copy(); 669 } 670 SecondMoment.copy(source.secondMoment, dest.secondMoment); 671 dest.n = source.n; 672 673 // Make sure that if stat == statImpl in source, same 674 // holds in dest; otherwise copy stat 675 if (source.geoMean == source.geoMeanImpl) { 676 dest.geoMean = (GeometricMean) dest.geoMeanImpl; 677 } else { 678 GeometricMean.copy(source.geoMean, dest.geoMean); 679 } 680 if (source.max == source.maxImpl) { 681 dest.max = (Max) dest.maxImpl; 682 } else { 683 Max.copy(source.max, dest.max); 684 } 685 if (source.mean == source.meanImpl) { 686 dest.mean = (Mean) dest.meanImpl; 687 } else { 688 Mean.copy(source.mean, dest.mean); 689 } 690 if (source.min == source.minImpl) { 691 dest.min = (Min) dest.minImpl; 692 } else { 693 Min.copy(source.min, dest.min); 694 } 695 if (source.sum == source.sumImpl) { 696 dest.sum = (Sum) dest.sumImpl; 697 } else { 698 Sum.copy(source.sum, dest.sum); 699 } 700 if (source.variance == source.varianceImpl) { 701 dest.variance = (Variance) dest.varianceImpl; 702 } else { 703 Variance.copy(source.variance, dest.variance); 704 } 705 if (source.sumLog == source.sumLogImpl) { 706 dest.sumLog = (SumOfLogs) dest.sumLogImpl; 707 } else { 708 SumOfLogs.copy(source.sumLog, dest.sumLog); 709 } 710 if (source.sumsq == source.sumsqImpl) { 711 dest.sumsq = (SumOfSquares) dest.sumsqImpl; 712 } else { 713 SumOfSquares.copy(source.sumsq, dest.sumsq); 714 } 715 } 716 }