View Javadoc

1   /*
2    * Copyright 2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package org.apache.commons.math.stat.inference;
17  
18  import org.apache.commons.math.MathException;
19  import org.apache.commons.math.distribution.DistributionFactory;
20  import org.apache.commons.math.distribution.ChiSquaredDistribution;
21  
22  /**
23   * Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface.
24   *
25   * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $
26   */
27  public class ChiSquareTestImpl implements ChiSquareTest {
28      
29      /** Cached DistributionFactory used to create ChiSquaredDistribution instances */
30      private DistributionFactory distributionFactory = null;
31    
32      /**
33       * Construct a ChiSquareTestImpl 
34       */
35      public ChiSquareTestImpl() {
36          super();
37      }
38  
39       /**
40       * @param observed array of observed frequency counts
41       * @param expected array of expected frequency counts
42       * @return chi-square test statistic
43       * @throws IllegalArgumentException if preconditions are not met
44       * or length is less than 2
45       */
46      public double chiSquare(double[] expected, long[] observed)
47          throws IllegalArgumentException {
48          double sumSq = 0.0d;
49          double dev = 0.0d;
50          if ((expected.length < 2) || (expected.length != observed.length)) {
51              throw new IllegalArgumentException(
52                      "observed, expected array lengths incorrect");
53          }
54          if (!isPositive(expected) || !isNonNegative(observed)) {
55              throw new IllegalArgumentException(
56                  "observed counts must be non-negative and expected counts must be postive");
57          }
58          for (int i = 0; i < observed.length; i++) {
59              dev = ((double) observed[i] - expected[i]);
60              sumSq += dev * dev / expected[i];
61          }
62          return sumSq;
63      }
64  
65      /**
66       * @param observed array of observed frequency counts
67       * @param expected array of exptected frequency counts
68       * @return p-value
69       * @throws IllegalArgumentException if preconditions are not met
70       * @throws MathException if an error occurs computing the p-value
71       */
72      public double chiSquareTest(double[] expected, long[] observed)
73          throws IllegalArgumentException, MathException {
74          ChiSquaredDistribution chiSquaredDistribution =
75              getDistributionFactory().createChiSquareDistribution(
76                      (double) expected.length - 1);
77          return 1 - chiSquaredDistribution.cumulativeProbability(
78                  chiSquare(expected, observed));
79      }
80  
81      /**
82       * @param observed array of observed frequency counts
83       * @param expected array of exptected frequency counts
84       * @param alpha significance level of the test
85       * @return true iff null hypothesis can be rejected with confidence
86       * 1 - alpha
87       * @throws IllegalArgumentException if preconditions are not met
88       * @throws MathException if an error occurs performing the test
89       */
90      public boolean chiSquareTest(double[] expected, long[] observed, 
91              double alpha) throws IllegalArgumentException, MathException {
92          if ((alpha <= 0) || (alpha > 0.5)) {
93              throw new IllegalArgumentException(
94                      "bad significance level: " + alpha);
95          }
96          return (chiSquareTest(expected, observed) < alpha);
97      }
98      
99      /**
100      * @param counts array representation of 2-way table
101      * @return chi-square test statistic
102      * @throws IllegalArgumentException if preconditions are not met
103      */
104     public double chiSquare(long[][] counts) throws IllegalArgumentException {
105         
106         checkArray(counts);
107         int nRows = counts.length;
108         int nCols = counts[0].length;
109         
110         // compute row, column and total sums
111         double[] rowSum = new double[nRows];
112         double[] colSum = new double[nCols];
113         double total = 0.0d;
114         for (int row = 0; row < nRows; row++) {
115             for (int col = 0; col < nCols; col++) {
116                 rowSum[row] += (double) counts[row][col];
117                 colSum[col] += (double) counts[row][col];
118                 total += (double) counts[row][col];
119             }
120         }
121         
122         // compute expected counts and chi-square
123         double sumSq = 0.0d;
124         double expected = 0.0d;
125         for (int row = 0; row < nRows; row++) {
126             for (int col = 0; col < nCols; col++) {
127                 expected = (rowSum[row] * colSum[col]) / total;
128                 sumSq += (((double) counts[row][col] - expected) * 
129                         ((double) counts[row][col] - expected)) / expected; 
130             }
131         } 
132         return sumSq;
133     }
134 
135     /**
136      * @param counts array representation of 2-way table
137      * @return p-value
138      * @throws IllegalArgumentException if preconditions are not met
139      * @throws MathException if an error occurs computing the p-value
140      */
141     public double chiSquareTest(long[][] counts)
142     throws IllegalArgumentException, MathException {
143         checkArray(counts);
144         double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
145         ChiSquaredDistribution chiSquaredDistribution =
146             getDistributionFactory().createChiSquareDistribution(df);
147         return 1 - chiSquaredDistribution.cumulativeProbability(chiSquare(counts));
148     }
149 
150     /**
151      * @param counts array representation of 2-way table
152      * @param alpha significance level of the test
153      * @return true iff null hypothesis can be rejected with confidence
154      * 1 - alpha
155      * @throws IllegalArgumentException if preconditions are not met
156      * @throws MathException if an error occurs performing the test
157      */
158     public boolean chiSquareTest(long[][] counts, double alpha)
159     throws IllegalArgumentException, MathException {
160         if ((alpha <= 0) || (alpha > 0.5)) {
161             throw new IllegalArgumentException("bad significance level: " + alpha);
162         }
163         return (chiSquareTest(counts) < alpha);
164     }
165     
166     /**
167      * Checks to make sure that the input long[][] array is rectangular,
168      * has at least 2 rows and 2 columns, and has all non-negative entries,
169      * throwing IllegalArgumentException if any of these checks fail.
170      * 
171      * @param in input 2-way table to check
172      * @throws IllegalArgumentException if the array is not valid
173      */
174     private void checkArray(long[][] in) throws IllegalArgumentException {
175         
176         if (in.length < 2) {
177             throw new IllegalArgumentException("Input table must have at least two rows");
178         }
179         
180         if (in[0].length < 2) {
181             throw new IllegalArgumentException("Input table must have at least two columns");
182         }    
183         
184         if (!isRectangular(in)) {
185             throw new IllegalArgumentException("Input table must be rectangular");
186         }
187         
188         if (!isNonNegative(in)) {
189             throw new IllegalArgumentException("All entries in input 2-way table must be non-negative");
190         }
191         
192     }
193     
194     //---------------------  Protected methods ---------------------------------
195     /**
196      * Gets a DistributionFactory to use in creating ChiSquaredDistribution instances.
197      * 
198      * @return a DistributionFactory
199      */
200     protected DistributionFactory getDistributionFactory() {
201         if (distributionFactory == null) {
202             distributionFactory = DistributionFactory.newInstance();
203         }
204         return distributionFactory;
205     }
206     
207     //---------------------  Private array methods -- should find a utility home for these
208     
209     /**
210      * Returns true iff input array is rectangular.
211      * 
212      * @param in array to be tested
213      * @return true if the array is rectangular
214      * @throws NullPointerException if input array is null
215      * @throws ArrayIndexOutOfBoundsException if input array is empty
216      */
217     private boolean isRectangular(long[][] in) {
218         for (int i = 1; i < in.length; i++) {
219             if (in[i].length != in[0].length) {
220                 return false;
221             }
222         }  
223         return true;
224     }
225     
226     /**
227      * Returns true iff all entries of the input array are > 0.
228      * Returns true if the array is non-null, but empty
229      * 
230      * @param in array to be tested
231      * @return true if all entries of the array are positive
232      * @throws NullPointerException if input array is null
233      */
234     private boolean isPositive(double[] in) {
235         for (int i = 0; i < in.length; i ++) {
236             if (in[i] <= 0) {
237                 return false;
238             }
239         }
240         return true;
241     }
242     
243     /**
244      * Returns true iff all entries of the input array are >= 0.
245      * Returns true if the array is non-null, but empty
246      * 
247      * @param in array to be tested
248      * @return true if all entries of the array are non-negative
249      * @throws NullPointerException if input array is null
250      */
251     private boolean isNonNegative(long[] in) {
252         for (int i = 0; i < in.length; i ++) {
253             if (in[i] < 0) {
254                 return false;
255             }
256         }
257         return true;
258     }
259     
260     /**
261      * Returns true iff all entries of (all subarrays of) the input array are >= 0.
262      * Returns true if the array is non-null, but empty
263      * 
264      * @param in array to be tested
265      * @return true if all entries of the array are non-negative
266      * @throws NullPointerException if input array is null
267      */
268     private boolean isNonNegative(long[][] in) {
269         for (int i = 0; i < in.length; i ++) {
270             for (int j = 0; j < in[i].length; j++) {
271                 if (in[i][j] < 0) {
272                     return false;
273                 }
274             }
275         }
276         return true;
277     }
278     
279 }