View Javadoc

1   /*
2    * Copyright 2003-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.commons.math.stat.descriptive.moment;
17  
18  import java.io.Serializable;
19  
20  import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
21  
22  /**
23   * Computes the variance of the available values.   By default, the unbiased
24   * "sample variance" definitional formula is used: 
25   * <p>
26   * variance = sum((x_i - mean)^2) / (n - 1)
27   * <p>
28   * where mean is the {@link Mean} and <code>n</code> is the number
29   * of sample observations.  
30   * <p>
31   * The definitional formula does not have good numerical properties, so
32   * this implementation uses updating formulas based on West's algorithm
33   * as described in <a href="http://doi.acm.org/10.1145/359146.359152">
34   * Chan, T. F. andJ. G. Lewis 1979, <i>Communications of the ACM</i>,
35   * vol. 22 no. 9, pp. 526-531.</a>.
36   * <p>
37   * The "population variance"  ( sum((x_i - mean)^2) / n ) can also
38   * be computed using this statistic.  The <code>isBiasCorrected</code>
39   * property determines whether the "population" or "sample" value is
40   * returned by the <code>evaluate</code> and <code>getResult</code> methods.
41   * To compute population variances, set this property to <code>false.</code>
42   *
43   * <strong>Note that this implementation is not synchronized.</strong> If 
44   * multiple threads access an instance of this class concurrently, and at least
45   * one of the threads invokes the <code>increment()</code> or 
46   * <code>clear()</code> method, it must be synchronized externally.
47   * 
48   * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
49   */
50  public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable {
51  
52      /** Serializable version identifier */
53      private static final long serialVersionUID = -9111962718267217978L;  
54        
55      /** SecondMoment is used in incremental calculation of Variance*/
56      protected SecondMoment moment = null;
57  
58      /**
59       * Boolean test to determine if this Variance should also increment
60       * the second moment, this evaluates to false when this Variance is
61       * constructed with an external SecondMoment as a parameter.
62       */
63      protected boolean incMoment = true;
64      
65      /**
66       * Determines whether or not bias correction is applied when computing the
67       * value of the statisic.  True means that bias is corrected.  See 
68       * {@link Variance} for details on the formula.
69       */
70      private boolean isBiasCorrected = true;
71  
72      /**
73       * Constructs a Variance with default (true) <code>isBiasCorrected</code>
74       * property.
75       */
76      public Variance() {
77          moment = new SecondMoment();
78      }
79  
80      /**
81       * Constructs a Variance based on an external second moment.
82       * 
83       * @param m2 the SecondMoment (Thrid or Fourth moments work
84       * here as well.)
85       */
86      public Variance(final SecondMoment m2) {
87          incMoment = false;
88          this.moment = m2;
89      }
90      
91      /**
92       * Constructs a Variance with the specified <code>isBiasCorrected</code>
93       * property
94       * 
95       * @param isBiasCorrected  setting for bias correction - true means
96       * bias will be corrected and is equivalent to using the argumentless
97       * constructor
98       */
99      public Variance(boolean isBiasCorrected) {
100         moment = new SecondMoment();
101         this.isBiasCorrected = isBiasCorrected;
102     }
103     
104     /**
105      * Constructs a Variance with the specified <code>isBiasCorrected</code>
106      * property and the supplied external second moment.
107      * 
108      * @param isBiasCorrected  setting for bias correction - true means
109      * bias will be corrected
110      * @param m2 the SecondMoment (Thrid or Fourth moments work
111      * here as well.)
112      */
113     public Variance(boolean isBiasCorrected, SecondMoment m2) {
114         incMoment = false;
115         this.moment = m2;
116         this.isBiasCorrected = isBiasCorrected;      
117     }
118    
119     /**
120      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#increment(double)
121      */
122     public void increment(final double d) {
123         if (incMoment) {
124             moment.increment(d);
125         }
126     }
127 
128     /**
129      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getResult()
130      */
131     public double getResult() {
132             if (moment.n == 0) {
133                 return Double.NaN;
134             } else if (moment.n == 1) {
135                 return 0d;
136             } else {
137                 if (isBiasCorrected) {
138                     return moment.m2 / ((double) moment.n - 1d);
139                 } else {
140                     return moment.m2 / ((double) moment.n);
141                 }
142             }
143     }
144 
145     /**
146      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#getN()
147      */
148     public long getN() {
149         return moment.getN();
150     }
151     
152     /**
153      * @see org.apache.commons.math.stat.descriptive.StorelessUnivariateStatistic#clear()
154      */
155     public void clear() {
156         if (incMoment) {
157             moment.clear();
158         }
159     }
160     
161     /**
162      * Returns the variance of the entries in the input array, or 
163      * <code>Double.NaN</code> if the array is empty.
164      * <p>
165      * See {@link Variance} for details on the computing algorithm.
166      * <p>
167      * Returns 0 for a single-value (i.e. length = 1) sample.
168      * <p>
169      * Throws <code>IllegalArgumentException</code> if the array is null.
170      * <p>
171      * Does not change the internal state of the statistic.
172      * 
173      * @param values the input array
174      * @return the variance of the values or Double.NaN if length = 0
175      * @throws IllegalArgumentException if the array is null
176      */
177     public double evaluate(final double[] values) {
178         if (values == null) {
179             throw new IllegalArgumentException("input values array is null");
180         }
181         return evaluate(values, 0, values.length);
182     }
183 
184     /**
185      * Returns the variance of the entries in the specified portion of
186      * the input array, or <code>Double.NaN</code> if the designated subarray
187      * is empty.
188      * <p>
189      * See {@link Variance} for details on the computing algorithm.
190      * <p>
191      * Returns 0 for a single-value (i.e. length = 1) sample.
192      * <p>
193      * Does not change the internal state of the statistic.
194      * <p>
195      * Throws <code>IllegalArgumentException</code> if the array is null.
196      * 
197      * @param values the input array
198      * @param begin index of the first array element to include
199      * @param length the number of elements to include
200      * @return the variance of the values or Double.NaN if length = 0
201      * @throws IllegalArgumentException if the array is null or the array index
202      *  parameters are not valid
203      */
204     public double evaluate(final double[] values, final int begin, final int length) {
205 
206         double var = Double.NaN;
207 
208         if (test(values, begin, length)) {
209             clear();
210             if (length == 1) {
211                 var = 0.0;
212             } else if (length > 1) {
213                 Mean mean = new Mean();
214                 double m = mean.evaluate(values, begin, length);
215                 var = evaluate(values, m, begin, length);
216             }
217         }
218         return var;
219     }
220     
221     /**
222      * Returns the variance of the entries in the specified portion of
223      * the input array, using the precomputed mean value.  Returns 
224      * <code>Double.NaN</code> if the designated subarray is empty.
225      * <p>
226      * See {@link Variance} for details on the computing algorithm.
227      * <p>
228      * The formula used assumes that the supplied mean value is the arithmetic
229      * mean of the sample data, not a known population parameter.  This method
230      * is supplied only to save computation when the mean has already been
231      * computed.
232      * <p>
233      * Returns 0 for a single-value (i.e. length = 1) sample.
234      * <p>
235      * Throws <code>IllegalArgumentException</code> if the array is null.
236      * <p>
237      * Does not change the internal state of the statistic.
238      * 
239      * @param values the input array
240      * @param mean the precomputed mean value
241      * @param begin index of the first array element to include
242      * @param length the number of elements to include
243      * @return the variance of the values or Double.NaN if length = 0
244      * @throws IllegalArgumentException if the array is null or the array index
245      *  parameters are not valid
246      */
247     public double evaluate(final double[] values, final double mean, 
248             final int begin, final int length) {
249         
250         double var = Double.NaN;
251 
252         if (test(values, begin, length)) {
253             if (length == 1) {
254                 var = 0.0;
255             } else if (length > 1) {
256                 double accum = 0.0;
257                 double accum2 = 0.0;
258                 for (int i = begin; i < begin + length; i++) {
259                     accum += Math.pow((values[i] - mean), 2.0);
260                     accum2 += (values[i] - mean);
261                 }
262                 if (isBiasCorrected) {
263                     var = (accum - (Math.pow(accum2, 2) / ((double) length))) /
264                     (double) (length - 1);
265                 } else {
266                     var = (accum - (Math.pow(accum2, 2) / ((double) length))) /
267                     (double) length;
268                 }
269             }
270         }
271         return var;
272     }
273     
274     /**
275      * Returns the variance of the entries in the input array, using the
276      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
277      * is empty.
278      * <p>
279      * See {@link Variance} for details on the computing algorithm.
280      * <p>
281      * If <code>isBiasCorrected</code> is <code>true</code> the formula used
282      * assumes that the supplied mean value is the arithmetic mean of the
283      * sample data, not a known population parameter.  If the mean is a known
284      * population parameter, or if the "population" version of the variance is
285      * desired, set <code>isBiasCorrected</code> to <code>false</code> before
286      * invoking this method.
287      * <p>
288      * Returns 0 for a single-value (i.e. length = 1) sample.
289      * <p>
290      * Throws <code>IllegalArgumentException</code> if the array is null.
291      * <p>
292      * Does not change the internal state of the statistic.
293      * 
294      * @param values the input array
295      * @param mean the precomputed mean value
296      * @return the variance of the values or Double.NaN if the array is empty
297      * @throws IllegalArgumentException if the array is null
298      */
299     public double evaluate(final double[] values, final double mean) {
300         return evaluate(values, mean, 0, values.length);
301     }
302 
303     /**
304      * @return Returns the isBiasCorrected.
305      */
306     public boolean isBiasCorrected() {
307         return isBiasCorrected;
308     }
309 
310     /**
311      * @param isBiasCorrected The isBiasCorrected to set.
312      */
313     public void setBiasCorrected(boolean isBiasCorrected) {
314         this.isBiasCorrected = isBiasCorrected;
315     }
316 
317 }