001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive;
018    
019    import java.io.Serializable;
020    
021    import org.apache.commons.math.MathRuntimeException;
022    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
023    import org.apache.commons.math.stat.descriptive.moment.Mean;
024    import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
025    import org.apache.commons.math.stat.descriptive.moment.Variance;
026    import org.apache.commons.math.stat.descriptive.rank.Max;
027    import org.apache.commons.math.stat.descriptive.rank.Min;
028    import org.apache.commons.math.stat.descriptive.summary.Sum;
029    import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
030    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
031    import org.apache.commons.math.util.MathUtils;
032    
033    /**
034     * <p>
035     * Computes summary statistics for a stream of data values added using the
036     * {@link #addValue(double) addValue} method. The data values are not stored in
037     * memory, so this class can be used to compute statistics for very large data
038     * streams.
039     * </p>
040     * <p>
041     * The {@link StorelessUnivariateStatistic} instances used to maintain summary
042     * state and compute statistics are configurable via setters. For example, the
043     * default implementation for the variance can be overridden by calling
044     * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
045     * these methods must implement the {@link StorelessUnivariateStatistic}
046     * interface and configuration must be completed before <code>addValue</code>
047     * is called. No configuration is necessary to use the default, commons-math
048     * provided implementations.
049     * </p>
050     * <p>
051     * Note: This class is not thread-safe. Use
052     * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
053     * threads is required.
054     * </p>
055     * @version $Revision: 791728 $ $Date: 2009-07-07 03:17:50 -0400 (Tue, 07 Jul 2009) $
056     */
057    public class SummaryStatistics implements StatisticalSummary, Serializable {
058    
059        /** Serialization UID */
060        private static final long serialVersionUID = -2021321786743555871L;
061    
062        /**
063         * Construct a SummaryStatistics instance
064         */
065        public SummaryStatistics() {
066        }
067    
068        /**
069         * A copy constructor. Creates a deep-copy of the {@code original}.
070         * 
071         * @param original the {@code SummaryStatistics} instance to copy
072         */
073        public SummaryStatistics(SummaryStatistics original) {
074            copy(original, this);
075        }
076    
077        /** count of values that have been added */
078        protected long n = 0;
079    
080        /** SecondMoment is used to compute the mean and variance */
081        protected SecondMoment secondMoment = new SecondMoment();
082    
083        /** sum of values that have been added */
084        protected Sum sum = new Sum();
085    
086        /** sum of the square of each value that has been added */
087        protected SumOfSquares sumsq = new SumOfSquares();
088    
089        /** min of values that have been added */
090        protected Min min = new Min();
091    
092        /** max of values that have been added */
093        protected Max max = new Max();
094    
095        /** sumLog of values that have been added */
096        protected SumOfLogs sumLog = new SumOfLogs();
097    
098        /** geoMean of values that have been added */
099        protected GeometricMean geoMean = new GeometricMean(sumLog);
100    
101        /** mean of values that have been added */
102        protected Mean mean = new Mean();
103    
104        /** variance of values that have been added */
105        protected Variance variance = new Variance();
106    
107        /** Sum statistic implementation - can be reset by setter. */
108        private StorelessUnivariateStatistic sumImpl = sum;
109    
110        /** Sum of squares statistic implementation - can be reset by setter. */
111        private StorelessUnivariateStatistic sumsqImpl = sumsq;
112    
113        /** Minimum statistic implementation - can be reset by setter. */
114        private StorelessUnivariateStatistic minImpl = min;
115    
116        /** Maximum statistic implementation - can be reset by setter. */
117        private StorelessUnivariateStatistic maxImpl = max;
118    
119        /** Sum of log statistic implementation - can be reset by setter. */
120        private StorelessUnivariateStatistic sumLogImpl = sumLog;
121    
122        /** Geometric mean statistic implementation - can be reset by setter. */
123        private StorelessUnivariateStatistic geoMeanImpl = geoMean;
124    
125        /** Mean statistic implementation - can be reset by setter. */
126        private StorelessUnivariateStatistic meanImpl = mean;
127    
128        /** Variance statistic implementation - can be reset by setter. */
129        private StorelessUnivariateStatistic varianceImpl = variance;
130    
131        /**
132         * Return a {@link StatisticalSummaryValues} instance reporting current
133         * statistics.
134         * @return Current values of statistics
135         */
136        public StatisticalSummary getSummary() {
137            return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 
138                    getMax(), getMin(), getSum());
139        }
140    
141        /**
142         * Add a value to the data
143         * @param value the value to add
144         */
145        public void addValue(double value) {
146            sumImpl.increment(value);
147            sumsqImpl.increment(value);
148            minImpl.increment(value);
149            maxImpl.increment(value);
150            sumLogImpl.increment(value);
151            secondMoment.increment(value);
152            // If mean, variance or geomean have been overridden,
153            // need to increment these
154            if (!(meanImpl instanceof Mean)) {
155                meanImpl.increment(value);
156            }
157            if (!(varianceImpl instanceof Variance)) {
158                varianceImpl.increment(value);
159            }
160            if (!(geoMeanImpl instanceof GeometricMean)) {
161                geoMeanImpl.increment(value);
162            }
163            n++;
164        }
165    
166        /**
167         * Returns the number of available values
168         * @return The number of available values
169         */
170        public long getN() {
171            return n;
172        }
173    
174        /**
175         * Returns the sum of the values that have been added
176         * @return The sum or <code>Double.NaN</code> if no values have been added
177         */
178        public double getSum() {
179            return sumImpl.getResult();
180        }
181    
182        /**
183         * Returns the sum of the squares of the values that have been added.
184         * <p>
185         * Double.NaN is returned if no values have been added.
186         * </p>
187         * @return The sum of squares
188         */
189        public double getSumsq() {
190            return sumsqImpl.getResult();
191        }
192    
193        /**
194         * Returns the mean of the values that have been added.
195         * <p>
196         * Double.NaN is returned if no values have been added.
197         * </p>
198         * @return the mean
199         */
200        public double getMean() {
201            if (mean == meanImpl) {
202                return new Mean(secondMoment).getResult();
203            } else {
204                return meanImpl.getResult();
205            }
206        }
207    
208        /**
209         * Returns the standard deviation of the values that have been added.
210         * <p>
211         * Double.NaN is returned if no values have been added.
212         * </p>
213         * @return the standard deviation
214         */
215        public double getStandardDeviation() {
216            double stdDev = Double.NaN;
217            if (getN() > 0) {
218                if (getN() > 1) {
219                    stdDev = Math.sqrt(getVariance());
220                } else {
221                    stdDev = 0.0;
222                }
223            }
224            return (stdDev);
225        }
226    
227        /**
228         * Returns the variance of the values that have been added.
229         * <p>
230         * Double.NaN is returned if no values have been added.
231         * </p>
232         * @return the variance
233         */
234        public double getVariance() {
235            if (varianceImpl == variance) {
236                return new Variance(secondMoment).getResult();
237            } else {
238                return varianceImpl.getResult();
239            }
240        }
241    
242        /**
243         * Returns the maximum of the values that have been added.
244         * <p>
245         * Double.NaN is returned if no values have been added.
246         * </p>
247         * @return the maximum
248         */
249        public double getMax() {
250            return maxImpl.getResult();
251        }
252    
253        /**
254         * Returns the minimum of the values that have been added.
255         * <p>
256         * Double.NaN is returned if no values have been added.
257         * </p>
258         * @return the minimum
259         */
260        public double getMin() {
261            return minImpl.getResult();
262        }
263    
264        /**
265         * Returns the geometric mean of the values that have been added.
266         * <p>
267         * Double.NaN is returned if no values have been added.
268         * </p>
269         * @return the geometric mean
270         */
271        public double getGeometricMean() {
272            return geoMeanImpl.getResult();
273        }
274    
275        /**
276         * Returns the sum of the logs of the values that have been added.
277         * <p>
278         * Double.NaN is returned if no values have been added.
279         * </p>
280         * @return the sum of logs
281         * @since 1.2
282         */
283        public double getSumOfLogs() {
284            return sumLogImpl.getResult();
285        }
286        
287        /**
288         * Returns a statistic related to the Second Central Moment.  Specifically,
289         * what is returned is the sum of squared deviations from the sample mean
290         * among the values that have been added.
291         * <p>
292         * Returns <code>Double.NaN</code> if no data values have been added and
293         * returns <code>0</code> if there is just one value in the data set.</p>
294         * <p>
295         * @return second central moment statistic
296         * @since 2.0
297         */
298        public double getSecondMoment() {
299            return secondMoment.getResult();
300        }
301    
302        /**
303         * Generates a text report displaying summary statistics from values that
304         * have been added.
305         * @return String with line feeds displaying statistics
306         * @since 1.2
307         */
308        @Override
309        public String toString() {
310            StringBuffer outBuffer = new StringBuffer();
311            String endl = "\n";
312            outBuffer.append("SummaryStatistics:").append(endl);
313            outBuffer.append("n: ").append(getN()).append(endl);
314            outBuffer.append("min: ").append(getMin()).append(endl);
315            outBuffer.append("max: ").append(getMax()).append(endl);
316            outBuffer.append("mean: ").append(getMean()).append(endl);
317            outBuffer.append("geometric mean: ").append(getGeometricMean())
318                .append(endl);
319            outBuffer.append("variance: ").append(getVariance()).append(endl);
320            outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
321            outBuffer.append("standard deviation: ").append(getStandardDeviation())
322                .append(endl);
323            outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
324            return outBuffer.toString();
325        }
326    
327        /**
328         * Resets all statistics and storage
329         */
330        public void clear() {
331            this.n = 0;
332            minImpl.clear();
333            maxImpl.clear();
334            sumImpl.clear();
335            sumLogImpl.clear();
336            sumsqImpl.clear();
337            geoMeanImpl.clear();
338            secondMoment.clear();
339            if (meanImpl != mean) {
340                meanImpl.clear();
341            }
342            if (varianceImpl != variance) {
343                varianceImpl.clear();
344            }
345        }
346    
347        /**
348         * Returns true iff <code>object</code> is a
349         * <code>SummaryStatistics</code> instance and all statistics have the
350         * same values as this.
351         * @param object the object to test equality against.
352         * @return true if object equals this
353         */
354        @Override
355        public boolean equals(Object object) {
356            if (object == this) {
357                return true;
358            }
359            if (object instanceof SummaryStatistics == false) {
360                return false;
361            }
362            SummaryStatistics stat = (SummaryStatistics)object;
363            return (MathUtils.equals(stat.getGeometricMean(), this.getGeometricMean()) &&
364                    MathUtils.equals(stat.getMax(), this.getMax()) &&
365                    MathUtils.equals(stat.getMean(), this.getMean()) &&
366                    MathUtils.equals(stat.getMin(), this.getMin()) &&
367                    MathUtils.equals(stat.getN(), this.getN()) &&
368                    MathUtils.equals(stat.getSum(), this.getSum()) &&
369                    MathUtils.equals(stat.getSumsq(), this.getSumsq()) &&
370                    MathUtils.equals(stat.getVariance(),
371                this.getVariance()));
372        }
373    
374        /**
375         * Returns hash code based on values of statistics
376         * @return hash code
377         */
378        @Override
379        public int hashCode() {
380            int result = 31 + MathUtils.hash(getGeometricMean());
381            result = result * 31 + MathUtils.hash(getGeometricMean());
382            result = result * 31 + MathUtils.hash(getMax());
383            result = result * 31 + MathUtils.hash(getMean());
384            result = result * 31 + MathUtils.hash(getMin());
385            result = result * 31 + MathUtils.hash(getN());
386            result = result * 31 + MathUtils.hash(getSum());
387            result = result * 31 + MathUtils.hash(getSumsq());
388            result = result * 31 + MathUtils.hash(getVariance());
389            return result;
390        }
391    
392        // Getters and setters for statistics implementations
393        /**
394         * Returns the currently configured Sum implementation
395         * @return the StorelessUnivariateStatistic implementing the sum
396         * @since 1.2
397         */
398        public StorelessUnivariateStatistic getSumImpl() {
399            return sumImpl;
400        }
401    
402        /**
403         * <p>
404         * Sets the implementation for the Sum.
405         * </p>
406         * <p>
407         * This method must be activated before any data has been added - i.e.,
408         * before {@link #addValue(double) addValue} has been used to add data;
409         * otherwise an IllegalStateException will be thrown.
410         * </p>
411         * @param sumImpl the StorelessUnivariateStatistic instance to use for
412         *        computing the Sum
413         * @throws IllegalStateException if data has already been added (i.e if n >
414         *         0)
415         * @since 1.2
416         */
417        public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
418            checkEmpty();
419            this.sumImpl = sumImpl;
420        }
421    
422        /**
423         * Returns the currently configured sum of squares implementation
424         * @return the StorelessUnivariateStatistic implementing the sum of squares
425         * @since 1.2
426         */
427        public StorelessUnivariateStatistic getSumsqImpl() {
428            return sumsqImpl;
429        }
430    
431        /**
432         * <p>
433         * Sets the implementation for the sum of squares.
434         * </p>
435         * <p>
436         * This method must be activated before any data has been added - i.e.,
437         * before {@link #addValue(double) addValue} has been used to add data;
438         * otherwise an IllegalStateException will be thrown.
439         * </p>
440         * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
441         *        computing the sum of squares
442         * @throws IllegalStateException if data has already been added (i.e if n >
443         *         0)
444         * @since 1.2
445         */
446        public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
447            checkEmpty();
448            this.sumsqImpl = sumsqImpl;
449        }
450    
451        /**
452         * Returns the currently configured minimum implementation
453         * @return the StorelessUnivariateStatistic implementing the minimum
454         * @since 1.2
455         */
456        public StorelessUnivariateStatistic getMinImpl() {
457            return minImpl;
458        }
459    
460        /**
461         * <p>
462         * Sets the implementation for the minimum.
463         * </p>
464         * <p>
465         * This method must be activated before any data has been added - i.e.,
466         * before {@link #addValue(double) addValue} has been used to add data;
467         * otherwise an IllegalStateException will be thrown.
468         * </p>
469         * @param minImpl the StorelessUnivariateStatistic instance to use for
470         *        computing the minimum
471         * @throws IllegalStateException if data has already been added (i.e if n >
472         *         0)
473         * @since 1.2
474         */
475        public void setMinImpl(StorelessUnivariateStatistic minImpl) {
476            checkEmpty();
477            this.minImpl = minImpl;
478        }
479    
480        /**
481         * Returns the currently configured maximum implementation
482         * @return the StorelessUnivariateStatistic implementing the maximum
483         * @since 1.2
484         */
485        public StorelessUnivariateStatistic getMaxImpl() {
486            return maxImpl;
487        }
488    
489        /**
490         * <p>
491         * Sets the implementation for the maximum.
492         * </p>
493         * <p>
494         * This method must be activated before any data has been added - i.e.,
495         * before {@link #addValue(double) addValue} has been used to add data;
496         * otherwise an IllegalStateException will be thrown.
497         * </p>
498         * @param maxImpl the StorelessUnivariateStatistic instance to use for
499         *        computing the maximum
500         * @throws IllegalStateException if data has already been added (i.e if n >
501         *         0)
502         * @since 1.2
503         */
504        public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
505            checkEmpty();
506            this.maxImpl = maxImpl;
507        }
508    
509        /**
510         * Returns the currently configured sum of logs implementation
511         * @return the StorelessUnivariateStatistic implementing the log sum
512         * @since 1.2
513         */
514        public StorelessUnivariateStatistic getSumLogImpl() {
515            return sumLogImpl;
516        }
517    
518        /**
519         * <p>
520         * Sets the implementation for the sum of logs.
521         * </p>
522         * <p>
523         * This method must be activated before any data has been added - i.e.,
524         * before {@link #addValue(double) addValue} has been used to add data;
525         * otherwise an IllegalStateException will be thrown.
526         * </p>
527         * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
528         *        computing the log sum
529         * @throws IllegalStateException if data has already been added (i.e if n >
530         *         0)
531         * @since 1.2
532         */
533        public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
534            checkEmpty();
535            this.sumLogImpl = sumLogImpl;
536            geoMean.setSumLogImpl(sumLogImpl);
537        }
538    
539        /**
540         * Returns the currently configured geometric mean implementation
541         * @return the StorelessUnivariateStatistic implementing the geometric mean
542         * @since 1.2
543         */
544        public StorelessUnivariateStatistic getGeoMeanImpl() {
545            return geoMeanImpl;
546        }
547    
548        /**
549         * <p>
550         * Sets the implementation for the geometric mean.
551         * </p>
552         * <p>
553         * This method must be activated before any data has been added - i.e.,
554         * before {@link #addValue(double) addValue} has been used to add data;
555         * otherwise an IllegalStateException will be thrown.
556         * </p>
557         * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
558         *        computing the geometric mean
559         * @throws IllegalStateException if data has already been added (i.e if n >
560         *         0)
561         * @since 1.2
562         */
563        public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
564            checkEmpty();
565            this.geoMeanImpl = geoMeanImpl;
566        }
567    
568        /**
569         * Returns the currently configured mean implementation
570         * @return the StorelessUnivariateStatistic implementing the mean
571         * @since 1.2
572         */
573        public StorelessUnivariateStatistic getMeanImpl() {
574            return meanImpl;
575        }
576    
577        /**
578         * <p>
579         * Sets the implementation for the mean.
580         * </p>
581         * <p>
582         * This method must be activated before any data has been added - i.e.,
583         * before {@link #addValue(double) addValue} has been used to add data;
584         * otherwise an IllegalStateException will be thrown.
585         * </p>
586         * @param meanImpl the StorelessUnivariateStatistic instance to use for
587         *        computing the mean
588         * @throws IllegalStateException if data has already been added (i.e if n >
589         *         0)
590         * @since 1.2
591         */
592        public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
593            checkEmpty();
594            this.meanImpl = meanImpl;
595        }
596    
597        /**
598         * Returns the currently configured variance implementation
599         * @return the StorelessUnivariateStatistic implementing the variance
600         * @since 1.2
601         */
602        public StorelessUnivariateStatistic getVarianceImpl() {
603            return varianceImpl;
604        }
605    
606        /**
607         * <p>
608         * Sets the implementation for the variance.
609         * </p>
610         * <p>
611         * This method must be activated before any data has been added - i.e.,
612         * before {@link #addValue(double) addValue} has been used to add data;
613         * otherwise an IllegalStateException will be thrown.
614         * </p>
615         * @param varianceImpl the StorelessUnivariateStatistic instance to use for
616         *        computing the variance
617         * @throws IllegalStateException if data has already been added (i.e if n >
618         *         0)
619         * @since 1.2
620         */
621        public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
622            checkEmpty();
623            this.varianceImpl = varianceImpl;
624        }
625    
626        /**
627         * Throws IllegalStateException if n > 0.
628         */
629        private void checkEmpty() {
630            if (n > 0) {
631                throw MathRuntimeException.createIllegalStateException(
632                        "{0} values have been added before statistic is configured",
633                        n);
634            }
635        }
636        
637        /**
638         * Returns a copy of this SummaryStatistics instance with the same internal state.
639         * 
640         * @return a copy of this
641         */
642        public SummaryStatistics copy() {
643            SummaryStatistics result = new SummaryStatistics();
644            copy(this, result);
645            return result; 
646        }
647         
648        /**
649         * Copies source to dest.
650         * <p>Neither source nor dest can be null.</p>
651         * 
652         * @param source SummaryStatistics to copy
653         * @param dest SummaryStatistics to copy to
654         * @throws NullPointerException if either source or dest is null
655         */
656        public static void copy(SummaryStatistics source, SummaryStatistics dest) {
657            dest.maxImpl = source.maxImpl.copy();
658            dest.meanImpl = source.meanImpl.copy();
659            dest.minImpl = source.minImpl.copy();
660            dest.sumImpl = source.sumImpl.copy();
661            dest.varianceImpl = source.varianceImpl.copy();
662            dest.sumLogImpl = source.sumLogImpl.copy();
663            dest.sumsqImpl = source.sumsqImpl.copy();
664            if (source.getGeoMeanImpl() instanceof GeometricMean) {
665                // Keep geoMeanImpl, sumLogImpl in synch
666                dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
667            } else {
668                dest.geoMeanImpl = source.geoMeanImpl.copy();
669            }
670            SecondMoment.copy(source.secondMoment, dest.secondMoment);
671            dest.n = source.n;
672            
673            // Make sure that if stat == statImpl in source, same
674            // holds in dest; otherwise copy stat
675            if (source.geoMean == source.geoMeanImpl) {
676                dest.geoMean = (GeometricMean) dest.geoMeanImpl;
677            } else {
678                GeometricMean.copy(source.geoMean, dest.geoMean);
679            } 
680            if (source.max == source.maxImpl) {
681                dest.max = (Max) dest.maxImpl;
682            } else {
683                Max.copy(source.max, dest.max);
684            } 
685            if (source.mean == source.meanImpl) {
686                dest.mean = (Mean) dest.meanImpl;
687            } else {
688                Mean.copy(source.mean, dest.mean);
689            } 
690            if (source.min == source.minImpl) {
691                dest.min = (Min) dest.minImpl;
692            } else {
693                Min.copy(source.min, dest.min);
694            } 
695            if (source.sum == source.sumImpl) {
696                dest.sum = (Sum) dest.sumImpl;
697            } else {
698                Sum.copy(source.sum, dest.sum);
699            } 
700            if (source.variance == source.varianceImpl) {
701                dest.variance = (Variance) dest.varianceImpl;
702            } else {
703                Variance.copy(source.variance, dest.variance);
704            } 
705            if (source.sumLog == source.sumLogImpl) {
706                dest.sumLog = (SumOfLogs) dest.sumLogImpl;
707            } else {
708                SumOfLogs.copy(source.sumLog, dest.sumLog);
709            } 
710            if (source.sumsq == source.sumsqImpl) {
711                dest.sumsq = (SumOfSquares) dest.sumsqImpl;
712            } else {
713                SumOfSquares.copy(source.sumsq, dest.sumsq);
714            } 
715        }
716    }