View Javadoc

1   /*
2    * Copyright 2003-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.commons.math.stat.descriptive;
17  
18  import java.io.Serializable;
19  import java.util.Arrays;
20  
21  import org.apache.commons.discovery.tools.DiscoverClass;
22  import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
23  import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
24  import org.apache.commons.math.stat.descriptive.moment.Mean;
25  import org.apache.commons.math.stat.descriptive.moment.Skewness;
26  import org.apache.commons.math.stat.descriptive.moment.Variance;
27  import org.apache.commons.math.stat.descriptive.rank.Max;
28  import org.apache.commons.math.stat.descriptive.rank.Min;
29  import org.apache.commons.math.stat.descriptive.rank.Percentile;
30  import org.apache.commons.math.stat.descriptive.summary.Sum;
31  import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
32  
33  
34  /**
35   * Abstract factory class for univariate statistical summaries.
36   *
37   * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
38   */
39  public abstract class DescriptiveStatistics implements StatisticalSummary, Serializable {
40      
41      /** Serialization UID */
42      private static final long serialVersionUID = 5188298269533339922L;
43      
44      /**
45       * Create an instance of a <code>DescriptiveStatistics</code>
46       * @param cls the type of <code>DescriptiveStatistics</code> object to
47       *        create. 
48       * @return a new factory. 
49       * @throws InstantiationException is thrown if the object can not be
50       *            created.
51       * @throws IllegalAccessException is thrown if the type's default
52       *            constructor is not accessible.
53       */
54      public static DescriptiveStatistics newInstance(Class cls) throws InstantiationException, IllegalAccessException {
55          return (DescriptiveStatistics)cls.newInstance();
56      }
57      
58      /**
59       * Create an instance of a <code>DescriptiveStatistics</code>
60       * @return a new factory. 
61       */
62      public static DescriptiveStatistics newInstance() {
63          DescriptiveStatistics factory = null;
64          try {
65              DiscoverClass dc = new DiscoverClass();
66              factory = (DescriptiveStatistics) dc.newInstance(
67                  DescriptiveStatistics.class,
68                  "org.apache.commons.math.stat.descriptive.DescriptiveStatisticsImpl");
69          } catch(Throwable t) {
70              return new DescriptiveStatisticsImpl();
71          }
72          return factory;
73      }
74      
75      /**
76       * This constant signals that a Univariate implementation
77       * takes into account the contributions of an infinite number of
78       * elements.  In other words, if getWindow returns this
79       * constant, there is, in effect, no "window".
80       */
81      public static final int INFINITE_WINDOW = -1;
82  
83      /**
84       * Adds the value to the set of numbers
85       * @param v the value to be added 
86       */
87      public abstract void addValue(double v);
88  
89      /** 
90       * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
91       * arithmetic mean </a> of the available values 
92       * @return The mean or Double.NaN if no values have been added.
93       */
94      public double getMean() {
95          return apply(new Mean());
96      }
97  
98      /** 
99       * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
100      * geometric mean </a> of the available values
101      * @return The geometricMean, Double.NaN if no values have been added, 
102      * or if the productof the available values is less than or equal to 0.
103      */
104     public double getGeometricMean() {
105         return apply(new GeometricMean());
106     }
107 
108     /** 
109      * Returns the variance of the available values.
110      * @return The variance, Double.NaN if no values have been added 
111      * or 0.0 for a single value set.  
112      */
113     public double getVariance() {
114         return apply(new Variance());
115     }
116 
117     /** 
118      * Returns the standard deviation of the available values.
119      * @return The standard deviation, Double.NaN if no values have been added 
120      * or 0.0 for a single value set. 
121      */
122     public double getStandardDeviation() {
123         double stdDev = Double.NaN;
124         if (getN() > 0) {
125             if (getN() > 1) {
126                 stdDev = Math.sqrt(getVariance());
127             } else {
128                 stdDev = 0.0;
129             }
130         }
131         return (stdDev);
132     }
133 
134     /**
135      * Returns the skewness of the available values. Skewness is a 
136      * measure of the assymetry of a given distribution.
137      * @return The skewness, Double.NaN if no values have been added 
138      * or 0.0 for a value set &lt;=2. 
139      */
140     public double getSkewness() {
141         return apply(new Skewness());
142     }
143 
144     /**
145      * Returns the Kurtosis of the available values. Kurtosis is a 
146      * measure of the "peakedness" of a distribution
147      * @return The kurtosis, Double.NaN if no values have been added, or 0.0 
148      * for a value set &lt;=3. 
149      */
150     public double getKurtosis() {
151         return apply(new Kurtosis());
152     }
153 
154     /** 
155      * Returns the maximum of the available values
156      * @return The max or Double.NaN if no values have been added.
157      */
158     public double getMax() {
159         return apply(new Max());
160     }
161 
162     /** 
163     * Returns the minimum of the available values
164     * @return The min or Double.NaN if no values have been added.
165     */
166     public double getMin() {
167         return apply(new Min());
168     }
169 
170     /** 
171      * Returns the number of available values
172      * @return The number of available values
173      */
174     public abstract long getN();
175 
176     /**
177      * Returns the sum of the values that have been added to Univariate.
178      * @return The sum or Double.NaN if no values have been added
179      */
180     public double getSum() {
181         return apply(new Sum());
182     }
183 
184     /**
185      * Returns the sum of the squares of the available values.
186      * @return The sum of the squares or Double.NaN if no 
187      * values have been added.
188      */
189     public double getSumsq() {
190         return apply(new SumOfSquares());
191     }
192 
193     /** 
194      * Resets all statistics and storage
195      */
196     public abstract void clear();
197 
198     /**
199      * Univariate has the ability to return only measures for the
200      * last N elements added to the set of values.
201      * @return The current window size or -1 if its Infinite.
202      */
203 
204     public abstract int getWindowSize();
205 
206     /**
207      * WindowSize controls the number of values which contribute 
208      * to the values returned by Univariate.  For example, if 
209      * windowSize is set to 3 and the values {1,2,3,4,5} 
210      * have been added <strong> in that order</strong> 
211      * then the <i>available values</i> are {3,4,5} and all
212      * reported statistics will be based on these values
213      * @param windowSize sets the size of the window.
214      */
215     public abstract void setWindowSize(int windowSize);
216     
217     /**
218      * Returns the current set of values in an array of double primitives.  
219      * The order of addition is preserved.  The returned array is a fresh
220      * copy of the underlying data -- i.e., it is not a reference to the
221      * stored data.
222      * 
223      * @return returns the current set of numbers in the order in which they 
224      *         were added to this set
225      */
226     public abstract double[] getValues();
227 
228     /**
229      * Returns the current set of values in an array of double primitives,  
230      * sorted in ascending order.  The returned array is a fresh
231      * copy of the underlying data -- i.e., it is not a reference to the
232      * stored data.
233      * @return returns the current set of 
234      * numbers sorted in ascending order        
235      */
236     public double[] getSortedValues() {
237         double[] sort = getValues();
238         Arrays.sort(sort);
239         return sort;
240     }
241 
242     /**
243      * Returns the element at the specified index
244      * @param index The Index of the element
245      * @return return the element at the specified index
246      */
247     public abstract double getElement(int index);
248 
249     /**
250      * Returns an estimate for the pth percentile of the stored values. 
251      * <p>
252      * The implementation provided here follows the first estimation procedure presented
253      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
254      * <p>
255      * <strong>Preconditions</strong>:<ul>
256      * <li><code>0 &lt; p &lt; 100</code> (otherwise an 
257      * <code>IllegalArgumentException</code> is thrown)</li>
258      * <li>at least one value must be stored (returns <code>Double.NaN
259      *     </code> otherwise)</li>
260      * </ul>
261      * 
262      * @param p the requested percentile (scaled from 0 - 100)
263      * @return An estimate for the pth percentile of the stored data 
264      * values
265      */
266     public double getPercentile(double p) {
267         return apply(new Percentile(p));
268     }
269     
270     /**
271      * Generates a text report displaying univariate statistics from values
272      * that have been added.  Each statistic is displayed on a separate
273      * line.
274      * 
275      * @return String with line feeds displaying statistics
276      */
277     public String toString() {
278         StringBuffer outBuffer = new StringBuffer();
279         outBuffer.append("DescriptiveStatistics:\n");
280         outBuffer.append("n: " + getN() + "\n");
281         outBuffer.append("min: " + getMin() + "\n");
282         outBuffer.append("max: " + getMax() + "\n");
283         outBuffer.append("mean: " + getMean() + "\n");
284         outBuffer.append("std dev: " + getStandardDeviation() + "\n");
285         outBuffer.append("median: " + getPercentile(50) + "\n");
286         outBuffer.append("skewness: " + getSkewness() + "\n");
287         outBuffer.append("kurtosis: " + getKurtosis() + "\n");
288         return outBuffer.toString();
289     }
290     
291     /**
292      * Apply the given statistic to the data associated with this set of statistics.
293      * @param stat the statistic to apply
294      * @return the computed value of the statistic.
295      */
296     public abstract double apply(UnivariateStatistic stat);
297 
298 }