View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive;
18  
19  import java.io.Serializable;
20  
21  import org.apache.commons.math.MathRuntimeException;
22  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
23  import org.apache.commons.math.stat.descriptive.moment.Mean;
24  import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
25  import org.apache.commons.math.stat.descriptive.moment.Variance;
26  import org.apache.commons.math.stat.descriptive.rank.Max;
27  import org.apache.commons.math.stat.descriptive.rank.Min;
28  import org.apache.commons.math.stat.descriptive.summary.Sum;
29  import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
30  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
31  import org.apache.commons.math.util.MathUtils;
32  
33  /**
34   * <p>
35   * Computes summary statistics for a stream of data values added using the
36   * {@link #addValue(double) addValue} method. The data values are not stored in
37   * memory, so this class can be used to compute statistics for very large data
38   * streams.
39   * </p>
40   * <p>
41   * The {@link StorelessUnivariateStatistic} instances used to maintain summary
42   * state and compute statistics are configurable via setters. For example, the
43   * default implementation for the variance can be overridden by calling
44   * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
45   * these methods must implement the {@link StorelessUnivariateStatistic}
46   * interface and configuration must be completed before <code>addValue</code>
47   * is called. No configuration is necessary to use the default, commons-math
48   * provided implementations.
49   * </p>
50   * <p>
51   * Note: This class is not thread-safe. Use
52   * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
53   * threads is required.
54   * </p>
55   * @version $Revision: 791728 $ $Date: 2009-07-07 03:17:50 -0400 (Tue, 07 Jul 2009) $
56   */
57  public class SummaryStatistics implements StatisticalSummary, Serializable {
58  
59      /** Serialization UID */
60      private static final long serialVersionUID = -2021321786743555871L;
61  
62      /**
63       * Construct a SummaryStatistics instance
64       */
65      public SummaryStatistics() {
66      }
67  
68      /**
69       * A copy constructor. Creates a deep-copy of the {@code original}.
70       * 
71       * @param original the {@code SummaryStatistics} instance to copy
72       */
73      public SummaryStatistics(SummaryStatistics original) {
74          copy(original, this);
75      }
76  
77      /** count of values that have been added */
78      protected long n = 0;
79  
80      /** SecondMoment is used to compute the mean and variance */
81      protected SecondMoment secondMoment = new SecondMoment();
82  
83      /** sum of values that have been added */
84      protected Sum sum = new Sum();
85  
86      /** sum of the square of each value that has been added */
87      protected SumOfSquares sumsq = new SumOfSquares();
88  
89      /** min of values that have been added */
90      protected Min min = new Min();
91  
92      /** max of values that have been added */
93      protected Max max = new Max();
94  
95      /** sumLog of values that have been added */
96      protected SumOfLogs sumLog = new SumOfLogs();
97  
98      /** geoMean of values that have been added */
99      protected GeometricMean geoMean = new GeometricMean(sumLog);
100 
101     /** mean of values that have been added */
102     protected Mean mean = new Mean();
103 
104     /** variance of values that have been added */
105     protected Variance variance = new Variance();
106 
107     /** Sum statistic implementation - can be reset by setter. */
108     private StorelessUnivariateStatistic sumImpl = sum;
109 
110     /** Sum of squares statistic implementation - can be reset by setter. */
111     private StorelessUnivariateStatistic sumsqImpl = sumsq;
112 
113     /** Minimum statistic implementation - can be reset by setter. */
114     private StorelessUnivariateStatistic minImpl = min;
115 
116     /** Maximum statistic implementation - can be reset by setter. */
117     private StorelessUnivariateStatistic maxImpl = max;
118 
119     /** Sum of log statistic implementation - can be reset by setter. */
120     private StorelessUnivariateStatistic sumLogImpl = sumLog;
121 
122     /** Geometric mean statistic implementation - can be reset by setter. */
123     private StorelessUnivariateStatistic geoMeanImpl = geoMean;
124 
125     /** Mean statistic implementation - can be reset by setter. */
126     private StorelessUnivariateStatistic meanImpl = mean;
127 
128     /** Variance statistic implementation - can be reset by setter. */
129     private StorelessUnivariateStatistic varianceImpl = variance;
130 
131     /**
132      * Return a {@link StatisticalSummaryValues} instance reporting current
133      * statistics.
134      * @return Current values of statistics
135      */
136     public StatisticalSummary getSummary() {
137         return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 
138                 getMax(), getMin(), getSum());
139     }
140 
141     /**
142      * Add a value to the data
143      * @param value the value to add
144      */
145     public void addValue(double value) {
146         sumImpl.increment(value);
147         sumsqImpl.increment(value);
148         minImpl.increment(value);
149         maxImpl.increment(value);
150         sumLogImpl.increment(value);
151         secondMoment.increment(value);
152         // If mean, variance or geomean have been overridden,
153         // need to increment these
154         if (!(meanImpl instanceof Mean)) {
155             meanImpl.increment(value);
156         }
157         if (!(varianceImpl instanceof Variance)) {
158             varianceImpl.increment(value);
159         }
160         if (!(geoMeanImpl instanceof GeometricMean)) {
161             geoMeanImpl.increment(value);
162         }
163         n++;
164     }
165 
166     /**
167      * Returns the number of available values
168      * @return The number of available values
169      */
170     public long getN() {
171         return n;
172     }
173 
174     /**
175      * Returns the sum of the values that have been added
176      * @return The sum or <code>Double.NaN</code> if no values have been added
177      */
178     public double getSum() {
179         return sumImpl.getResult();
180     }
181 
182     /**
183      * Returns the sum of the squares of the values that have been added.
184      * <p>
185      * Double.NaN is returned if no values have been added.
186      * </p>
187      * @return The sum of squares
188      */
189     public double getSumsq() {
190         return sumsqImpl.getResult();
191     }
192 
193     /**
194      * Returns the mean of the values that have been added.
195      * <p>
196      * Double.NaN is returned if no values have been added.
197      * </p>
198      * @return the mean
199      */
200     public double getMean() {
201         if (mean == meanImpl) {
202             return new Mean(secondMoment).getResult();
203         } else {
204             return meanImpl.getResult();
205         }
206     }
207 
208     /**
209      * Returns the standard deviation of the values that have been added.
210      * <p>
211      * Double.NaN is returned if no values have been added.
212      * </p>
213      * @return the standard deviation
214      */
215     public double getStandardDeviation() {
216         double stdDev = Double.NaN;
217         if (getN() > 0) {
218             if (getN() > 1) {
219                 stdDev = Math.sqrt(getVariance());
220             } else {
221                 stdDev = 0.0;
222             }
223         }
224         return (stdDev);
225     }
226 
227     /**
228      * Returns the variance of the values that have been added.
229      * <p>
230      * Double.NaN is returned if no values have been added.
231      * </p>
232      * @return the variance
233      */
234     public double getVariance() {
235         if (varianceImpl == variance) {
236             return new Variance(secondMoment).getResult();
237         } else {
238             return varianceImpl.getResult();
239         }
240     }
241 
242     /**
243      * Returns the maximum of the values that have been added.
244      * <p>
245      * Double.NaN is returned if no values have been added.
246      * </p>
247      * @return the maximum
248      */
249     public double getMax() {
250         return maxImpl.getResult();
251     }
252 
253     /**
254      * Returns the minimum of the values that have been added.
255      * <p>
256      * Double.NaN is returned if no values have been added.
257      * </p>
258      * @return the minimum
259      */
260     public double getMin() {
261         return minImpl.getResult();
262     }
263 
264     /**
265      * Returns the geometric mean of the values that have been added.
266      * <p>
267      * Double.NaN is returned if no values have been added.
268      * </p>
269      * @return the geometric mean
270      */
271     public double getGeometricMean() {
272         return geoMeanImpl.getResult();
273     }
274 
275     /**
276      * Returns the sum of the logs of the values that have been added.
277      * <p>
278      * Double.NaN is returned if no values have been added.
279      * </p>
280      * @return the sum of logs
281      * @since 1.2
282      */
283     public double getSumOfLogs() {
284         return sumLogImpl.getResult();
285     }
286     
287     /**
288      * Returns a statistic related to the Second Central Moment.  Specifically,
289      * what is returned is the sum of squared deviations from the sample mean
290      * among the values that have been added.
291      * <p>
292      * Returns <code>Double.NaN</code> if no data values have been added and
293      * returns <code>0</code> if there is just one value in the data set.</p>
294      * <p>
295      * @return second central moment statistic
296      * @since 2.0
297      */
298     public double getSecondMoment() {
299         return secondMoment.getResult();
300     }
301 
302     /**
303      * Generates a text report displaying summary statistics from values that
304      * have been added.
305      * @return String with line feeds displaying statistics
306      * @since 1.2
307      */
308     @Override
309     public String toString() {
310         StringBuffer outBuffer = new StringBuffer();
311         String endl = "\n";
312         outBuffer.append("SummaryStatistics:").append(endl);
313         outBuffer.append("n: ").append(getN()).append(endl);
314         outBuffer.append("min: ").append(getMin()).append(endl);
315         outBuffer.append("max: ").append(getMax()).append(endl);
316         outBuffer.append("mean: ").append(getMean()).append(endl);
317         outBuffer.append("geometric mean: ").append(getGeometricMean())
318             .append(endl);
319         outBuffer.append("variance: ").append(getVariance()).append(endl);
320         outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
321         outBuffer.append("standard deviation: ").append(getStandardDeviation())
322             .append(endl);
323         outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
324         return outBuffer.toString();
325     }
326 
327     /**
328      * Resets all statistics and storage
329      */
330     public void clear() {
331         this.n = 0;
332         minImpl.clear();
333         maxImpl.clear();
334         sumImpl.clear();
335         sumLogImpl.clear();
336         sumsqImpl.clear();
337         geoMeanImpl.clear();
338         secondMoment.clear();
339         if (meanImpl != mean) {
340             meanImpl.clear();
341         }
342         if (varianceImpl != variance) {
343             varianceImpl.clear();
344         }
345     }
346 
347     /**
348      * Returns true iff <code>object</code> is a
349      * <code>SummaryStatistics</code> instance and all statistics have the
350      * same values as this.
351      * @param object the object to test equality against.
352      * @return true if object equals this
353      */
354     @Override
355     public boolean equals(Object object) {
356         if (object == this) {
357             return true;
358         }
359         if (object instanceof SummaryStatistics == false) {
360             return false;
361         }
362         SummaryStatistics stat = (SummaryStatistics)object;
363         return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) &&
364                 MathUtils.equals(stat.getMax(), this.getMax()) &&
365                 MathUtils.equals(stat.getMean(), this.getMean()) &&
366                 MathUtils.equals(stat.getMin(), this.getMin()) &&
367                 MathUtils.equals(stat.getN(), this.getN()) &&
368                 MathUtils.equals(stat.getSum(), this.getSum()) &&
369                 MathUtils.equals(stat.getSumsq(), this.getSumsq()) &&
370                 MathUtils.equals(stat.getVariance(),
371             this.getVariance()));
372     }
373 
374     /**
375      * Returns hash code based on values of statistics
376      * @return hash code
377      */
378     @Override
379     public int hashCode() {
380         int result = 31 + MathUtils.hash(getGeometricMean());
381         result = result * 31 + MathUtils.hash(getGeometricMean());
382         result = result * 31 + MathUtils.hash(getMax());
383         result = result * 31 + MathUtils.hash(getMean());
384         result = result * 31 + MathUtils.hash(getMin());
385         result = result * 31 + MathUtils.hash(getN());
386         result = result * 31 + MathUtils.hash(getSum());
387         result = result * 31 + MathUtils.hash(getSumsq());
388         result = result * 31 + MathUtils.hash(getVariance());
389         return result;
390     }
391 
392     // Getters and setters for statistics implementations
393     /**
394      * Returns the currently configured Sum implementation
395      * @return the StorelessUnivariateStatistic implementing the sum
396      * @since 1.2
397      */
398     public StorelessUnivariateStatistic getSumImpl() {
399         return sumImpl;
400     }
401 
402     /**
403      * <p>
404      * Sets the implementation for the Sum.
405      * </p>
406      * <p>
407      * This method must be activated before any data has been added - i.e.,
408      * before {@link #addValue(double) addValue} has been used to add data;
409      * otherwise an IllegalStateException will be thrown.
410      * </p>
411      * @param sumImpl the StorelessUnivariateStatistic instance to use for
412      *        computing the Sum
413      * @throws IllegalStateException if data has already been added (i.e if n >
414      *         0)
415      * @since 1.2
416      */
417     public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
418         checkEmpty();
419         this.sumImpl = sumImpl;
420     }
421 
422     /**
423      * Returns the currently configured sum of squares implementation
424      * @return the StorelessUnivariateStatistic implementing the sum of squares
425      * @since 1.2
426      */
427     public StorelessUnivariateStatistic getSumsqImpl() {
428         return sumsqImpl;
429     }
430 
431     /**
432      * <p>
433      * Sets the implementation for the sum of squares.
434      * </p>
435      * <p>
436      * This method must be activated before any data has been added - i.e.,
437      * before {@link #addValue(double) addValue} has been used to add data;
438      * otherwise an IllegalStateException will be thrown.
439      * </p>
440      * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
441      *        computing the sum of squares
442      * @throws IllegalStateException if data has already been added (i.e if n >
443      *         0)
444      * @since 1.2
445      */
446     public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
447         checkEmpty();
448         this.sumsqImpl = sumsqImpl;
449     }
450 
451     /**
452      * Returns the currently configured minimum implementation
453      * @return the StorelessUnivariateStatistic implementing the minimum
454      * @since 1.2
455      */
456     public StorelessUnivariateStatistic getMinImpl() {
457         return minImpl;
458     }
459 
460     /**
461      * <p>
462      * Sets the implementation for the minimum.
463      * </p>
464      * <p>
465      * This method must be activated before any data has been added - i.e.,
466      * before {@link #addValue(double) addValue} has been used to add data;
467      * otherwise an IllegalStateException will be thrown.
468      * </p>
469      * @param minImpl the StorelessUnivariateStatistic instance to use for
470      *        computing the minimum
471      * @throws IllegalStateException if data has already been added (i.e if n >
472      *         0)
473      * @since 1.2
474      */
475     public void setMinImpl(StorelessUnivariateStatistic minImpl) {
476         checkEmpty();
477         this.minImpl = minImpl;
478     }
479 
480     /**
481      * Returns the currently configured maximum implementation
482      * @return the StorelessUnivariateStatistic implementing the maximum
483      * @since 1.2
484      */
485     public StorelessUnivariateStatistic getMaxImpl() {
486         return maxImpl;
487     }
488 
489     /**
490      * <p>
491      * Sets the implementation for the maximum.
492      * </p>
493      * <p>
494      * This method must be activated before any data has been added - i.e.,
495      * before {@link #addValue(double) addValue} has been used to add data;
496      * otherwise an IllegalStateException will be thrown.
497      * </p>
498      * @param maxImpl the StorelessUnivariateStatistic instance to use for
499      *        computing the maximum
500      * @throws IllegalStateException if data has already been added (i.e if n >
501      *         0)
502      * @since 1.2
503      */
504     public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
505         checkEmpty();
506         this.maxImpl = maxImpl;
507     }
508 
509     /**
510      * Returns the currently configured sum of logs implementation
511      * @return the StorelessUnivariateStatistic implementing the log sum
512      * @since 1.2
513      */
514     public StorelessUnivariateStatistic getSumLogImpl() {
515         return sumLogImpl;
516     }
517 
518     /**
519      * <p>
520      * Sets the implementation for the sum of logs.
521      * </p>
522      * <p>
523      * This method must be activated before any data has been added - i.e.,
524      * before {@link #addValue(double) addValue} has been used to add data;
525      * otherwise an IllegalStateException will be thrown.
526      * </p>
527      * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
528      *        computing the log sum
529      * @throws IllegalStateException if data has already been added (i.e if n >
530      *         0)
531      * @since 1.2
532      */
533     public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
534         checkEmpty();
535         this.sumLogImpl = sumLogImpl;
536         geoMean.setSumLogImpl(sumLogImpl);
537     }
538 
539     /**
540      * Returns the currently configured geometric mean implementation
541      * @return the StorelessUnivariateStatistic implementing the geometric mean
542      * @since 1.2
543      */
544     public StorelessUnivariateStatistic getGeoMeanImpl() {
545         return geoMeanImpl;
546     }
547 
548     /**
549      * <p>
550      * Sets the implementation for the geometric mean.
551      * </p>
552      * <p>
553      * This method must be activated before any data has been added - i.e.,
554      * before {@link #addValue(double) addValue} has been used to add data;
555      * otherwise an IllegalStateException will be thrown.
556      * </p>
557      * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
558      *        computing the geometric mean
559      * @throws IllegalStateException if data has already been added (i.e if n >
560      *         0)
561      * @since 1.2
562      */
563     public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
564         checkEmpty();
565         this.geoMeanImpl = geoMeanImpl;
566     }
567 
568     /**
569      * Returns the currently configured mean implementation
570      * @return the StorelessUnivariateStatistic implementing the mean
571      * @since 1.2
572      */
573     public StorelessUnivariateStatistic getMeanImpl() {
574         return meanImpl;
575     }
576 
577     /**
578      * <p>
579      * Sets the implementation for the mean.
580      * </p>
581      * <p>
582      * This method must be activated before any data has been added - i.e.,
583      * before {@link #addValue(double) addValue} has been used to add data;
584      * otherwise an IllegalStateException will be thrown.
585      * </p>
586      * @param meanImpl the StorelessUnivariateStatistic instance to use for
587      *        computing the mean
588      * @throws IllegalStateException if data has already been added (i.e if n >
589      *         0)
590      * @since 1.2
591      */
592     public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
593         checkEmpty();
594         this.meanImpl = meanImpl;
595     }
596 
597     /**
598      * Returns the currently configured variance implementation
599      * @return the StorelessUnivariateStatistic implementing the variance
600      * @since 1.2
601      */
602     public StorelessUnivariateStatistic getVarianceImpl() {
603         return varianceImpl;
604     }
605 
606     /**
607      * <p>
608      * Sets the implementation for the variance.
609      * </p>
610      * <p>
611      * This method must be activated before any data has been added - i.e.,
612      * before {@link #addValue(double) addValue} has been used to add data;
613      * otherwise an IllegalStateException will be thrown.
614      * </p>
615      * @param varianceImpl the StorelessUnivariateStatistic instance to use for
616      *        computing the variance
617      * @throws IllegalStateException if data has already been added (i.e if n >
618      *         0)
619      * @since 1.2
620      */
621     public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
622         checkEmpty();
623         this.varianceImpl = varianceImpl;
624     }
625 
626     /**
627      * Throws IllegalStateException if n > 0.
628      */
629     private void checkEmpty() {
630         if (n > 0) {
631             throw MathRuntimeException.createIllegalStateException(
632                     "{0} values have been added before statistic is configured",
633                     n);
634         }
635     }
636     
637     /**
638      * Returns a copy of this SummaryStatistics instance with the same internal state.
639      * 
640      * @return a copy of this
641      */
642     public SummaryStatistics copy() {
643         SummaryStatistics result = new SummaryStatistics();
644         copy(this, result);
645         return result; 
646     }
647      
648     /**
649      * Copies source to dest.
650      * <p>Neither source nor dest can be null.</p>
651      * 
652      * @param source SummaryStatistics to copy
653      * @param dest SummaryStatistics to copy to
654      * @throws NullPointerException if either source or dest is null
655      */
656     public static void copy(SummaryStatistics source, SummaryStatistics dest) {
657         dest.maxImpl = source.maxImpl.copy();
658         dest.meanImpl = source.meanImpl.copy();
659         dest.minImpl = source.minImpl.copy();
660         dest.sumImpl = source.sumImpl.copy();
661         dest.varianceImpl = source.varianceImpl.copy();
662         dest.sumLogImpl = source.sumLogImpl.copy();
663         dest.sumsqImpl = source.sumsqImpl.copy();
664         if (source.getGeoMeanImpl() instanceof GeometricMean) {
665             // Keep geoMeanImpl, sumLogImpl in synch
666             dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
667         } else {
668             dest.geoMeanImpl = source.geoMeanImpl.copy();
669         }
670         SecondMoment.copy(source.secondMoment, dest.secondMoment);
671         dest.n = source.n;
672         
673         // Make sure that if stat == statImpl in source, same
674         // holds in dest; otherwise copy stat
675         if (source.geoMean == source.geoMeanImpl) {
676             dest.geoMean = (GeometricMean) dest.geoMeanImpl;
677         } else {
678             GeometricMean.copy(source.geoMean, dest.geoMean);
679         } 
680         if (source.max == source.maxImpl) {
681             dest.max = (Max) dest.maxImpl;
682         } else {
683             Max.copy(source.max, dest.max);
684         } 
685         if (source.mean == source.meanImpl) {
686             dest.mean = (Mean) dest.meanImpl;
687         } else {
688             Mean.copy(source.mean, dest.mean);
689         } 
690         if (source.min == source.minImpl) {
691             dest.min = (Min) dest.minImpl;
692         } else {
693             Min.copy(source.min, dest.min);
694         } 
695         if (source.sum == source.sumImpl) {
696             dest.sum = (Sum) dest.sumImpl;
697         } else {
698             Sum.copy(source.sum, dest.sum);
699         } 
700         if (source.variance == source.varianceImpl) {
701             dest.variance = (Variance) dest.varianceImpl;
702         } else {
703             Variance.copy(source.variance, dest.variance);
704         } 
705         if (source.sumLog == source.sumLogImpl) {
706             dest.sumLog = (SumOfLogs) dest.sumLogImpl;
707         } else {
708             SumOfLogs.copy(source.sumLog, dest.sumLog);
709         } 
710         if (source.sumsq == source.sumsqImpl) {
711             dest.sumsq = (SumOfSquares) dest.sumsqImpl;
712         } else {
713             SumOfSquares.copy(source.sumsq, dest.sumsq);
714         } 
715     }
716 }