View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.inference;
18  
19  import org.apache.commons.math.MathException;
20  
21  /**
22   * An interface for Chi-Square tests.
23   * <p>This interface handles only known distributions. If the distribution is
24   * unknown and should be provided by a sample, then the {@link UnknownDistributionChiSquareTest
25   * UnknownDistributionChiSquareTest} extended interface should be used instead.</p>
26   * @version $Revision: 670469 $ $Date: 2008-06-23 04:01:38 -0400 (Mon, 23 Jun 2008) $ 
27   */
28  public interface ChiSquareTest {
29       
30       /**
31       * Computes the <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
32       * Chi-Square statistic</a> comparing <code>observed</code> and <code>expected</code> 
33       * frequency counts. 
34       * <p>
35       * This statistic can be used to perform a Chi-Square test evaluating the null hypothesis that
36       *  the observed counts follow the expected distribution.</p>
37       * <p>
38       * <strong>Preconditions</strong>: <ul>
39       * <li>Expected counts must all be positive.  
40       * </li>
41       * <li>Observed counts must all be >= 0.   
42       * </li>
43       * <li>The observed and expected arrays must have the same length and
44       * their common length must be at least 2.  
45       * </li></ul></p><p>
46       * If any of the preconditions are not met, an 
47       * <code>IllegalArgumentException</code> is thrown.</p>
48       *
49       * @param observed array of observed frequency counts
50       * @param expected array of expected frequency counts
51       * @return chiSquare statistic
52       * @throws IllegalArgumentException if preconditions are not met
53       */
54      double chiSquare(double[] expected, long[] observed) 
55          throws IllegalArgumentException;
56      
57      /**
58       * Returns the <i>observed significance level</i>, or <a href=
59       * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
60       * p-value</a>, associated with a 
61       * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
62       * Chi-square goodness of fit test</a> comparing the <code>observed</code> 
63       * frequency counts to those in the <code>expected</code> array.
64       * <p>
65       * The number returned is the smallest significance level at which one can reject 
66       * the null hypothesis that the observed counts conform to the frequency distribution 
67       * described by the expected counts.</p>
68       * <p>
69       * <strong>Preconditions</strong>: <ul>
70       * <li>Expected counts must all be positive.  
71       * </li>
72       * <li>Observed counts must all be >= 0.   
73       * </li>
74       * <li>The observed and expected arrays must have the same length and
75       * their common length must be at least 2.  
76       * </li></ul></p><p>
77       * If any of the preconditions are not met, an 
78       * <code>IllegalArgumentException</code> is thrown.</p>
79       *
80       * @param observed array of observed frequency counts
81       * @param expected array of expected frequency counts
82       * @return p-value
83       * @throws IllegalArgumentException if preconditions are not met
84       * @throws MathException if an error occurs computing the p-value
85       */
86      double chiSquareTest(double[] expected, long[] observed) 
87          throws IllegalArgumentException, MathException;
88      
89      /**
90       * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda35f.htm">
91       * Chi-square goodness of fit test</a> evaluating the null hypothesis that the observed counts 
92       * conform to the frequency distribution described by the expected counts, with 
93       * significance level <code>alpha</code>.  Returns true iff the null hypothesis can be rejected
94       * with 100 * (1 - alpha) percent confidence.
95       * <p>
96       * <strong>Example:</strong><br>
97       * To test the hypothesis that <code>observed</code> follows 
98       * <code>expected</code> at the 99% level, use </p><p>
99       * <code>chiSquareTest(expected, observed, 0.01) </code></p>
100      * <p>
101      * <strong>Preconditions</strong>: <ul>
102      * <li>Expected counts must all be positive.  
103      * </li>
104      * <li>Observed counts must all be >= 0.   
105      * </li>
106      * <li>The observed and expected arrays must have the same length and
107      * their common length must be at least 2.  
108      * <li> <code> 0 < alpha < 0.5 </code>
109      * </li></ul></p><p>
110      * If any of the preconditions are not met, an 
111      * <code>IllegalArgumentException</code> is thrown.</p>
112      *
113      * @param observed array of observed frequency counts
114      * @param expected array of expected frequency counts
115      * @param alpha significance level of the test
116      * @return true iff null hypothesis can be rejected with confidence
117      * 1 - alpha
118      * @throws IllegalArgumentException if preconditions are not met
119      * @throws MathException if an error occurs performing the test
120      */
121     boolean chiSquareTest(double[] expected, long[] observed, double alpha) 
122         throws IllegalArgumentException, MathException;
123     
124     /**
125      *  Computes the Chi-Square statistic associated with a 
126      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
127      *  chi-square test of independence</a> based on the input <code>counts</code>
128      *  array, viewed as a two-way table.  
129      * <p>
130      * The rows of the 2-way table are 
131      * <code>count[0], ... , count[count.length - 1] </code></p>
132      * <p>
133      * <strong>Preconditions</strong>: <ul>
134      * <li>All counts must be >= 0.  
135      * </li>
136      * <li>The count array must be rectangular (i.e. all count[i] subarrays
137      *  must have the same length). 
138      * </li>
139      * <li>The 2-way table represented by <code>counts</code> must have at
140      *  least 2 columns and at least 2 rows.
141      * </li>
142      * </li></ul></p><p>
143      * If any of the preconditions are not met, an 
144      * <code>IllegalArgumentException</code> is thrown.</p>
145      *
146      * @param counts array representation of 2-way table
147      * @return chiSquare statistic
148      * @throws IllegalArgumentException if preconditions are not met
149      */
150     double chiSquare(long[][] counts) 
151     throws IllegalArgumentException;
152     
153     /**
154      * Returns the <i>observed significance level</i>, or <a href=
155      * "http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
156      * p-value</a>, associated with a 
157      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
158      * chi-square test of independence</a> based on the input <code>counts</code>
159      * array, viewed as a two-way table.  
160      * <p>
161      * The rows of the 2-way table are 
162      * <code>count[0], ... , count[count.length - 1] </code></p>
163      * <p>
164      * <strong>Preconditions</strong>: <ul>
165      * <li>All counts must be >= 0.  
166      * </li>
167      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 
168      * </li>
169      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
170      *        at least 2 rows.
171      * </li>
172      * </li></ul></p><p>
173      * If any of the preconditions are not met, an 
174      * <code>IllegalArgumentException</code> is thrown.</p>
175      *
176      * @param counts array representation of 2-way table
177      * @return p-value
178      * @throws IllegalArgumentException if preconditions are not met
179      * @throws MathException if an error occurs computing the p-value
180      */
181     double chiSquareTest(long[][] counts) 
182     throws IllegalArgumentException, MathException;
183     
184     /**
185      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/prc/section4/prc45.htm">
186      * chi-square test of independence</a> evaluating the null hypothesis that the classifications 
187      * represented by the counts in the columns of the input 2-way table are independent of the rows,
188      * with significance level <code>alpha</code>.  Returns true iff the null hypothesis can be rejected
189      * with 100 * (1 - alpha) percent confidence.
190      * <p>
191      * The rows of the 2-way table are 
192      * <code>count[0], ... , count[count.length - 1] </code></p>
193      * <p>
194      * <strong>Example:</strong><br>
195      * To test the null hypothesis that the counts in
196      * <code>count[0], ... , count[count.length - 1] </code>
197      *  all correspond to the same underlying probability distribution at the 99% level, use </p><p>
198      * <code>chiSquareTest(counts, 0.01) </code></p>
199      * <p>
200      * <strong>Preconditions</strong>: <ul>
201      * <li>All counts must be >= 0.  
202      * </li>
203      * <li>The count array must be rectangular (i.e. all count[i] subarrays must have the same length). 
204      * </li>
205      * <li>The 2-way table represented by <code>counts</code> must have at least 2 columns and
206      *        at least 2 rows.
207      * </li>
208      * </li></ul></p><p>
209      * If any of the preconditions are not met, an 
210      * <code>IllegalArgumentException</code> is thrown.</p>
211      *
212      * @param counts array representation of 2-way table
213      * @param alpha significance level of the test
214      * @return true iff null hypothesis can be rejected with confidence
215      * 1 - alpha
216      * @throws IllegalArgumentException if preconditions are not met
217      * @throws MathException if an error occurs performing the test
218      */
219     boolean chiSquareTest(long[][] counts, double alpha) 
220     throws IllegalArgumentException, MathException;
221 
222 }