1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.math.stat.inference;
17
18 import org.apache.commons.math.MathException;
19 import org.apache.commons.math.distribution.DistributionFactory;
20 import org.apache.commons.math.distribution.ChiSquaredDistribution;
21
22 /**
23 * Implements Chi-Square test statistics defined in the {@link ChiSquareTest} interface.
24 *
25 * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $
26 */
27 public class ChiSquareTestImpl implements ChiSquareTest {
28
29 /** Cached DistributionFactory used to create ChiSquaredDistribution instances */
30 private DistributionFactory distributionFactory = null;
31
32 /**
33 * Construct a ChiSquareTestImpl
34 */
35 public ChiSquareTestImpl() {
36 super();
37 }
38
39 /**
40 * @param observed array of observed frequency counts
41 * @param expected array of expected frequency counts
42 * @return chi-square test statistic
43 * @throws IllegalArgumentException if preconditions are not met
44 * or length is less than 2
45 */
46 public double chiSquare(double[] expected, long[] observed)
47 throws IllegalArgumentException {
48 double sumSq = 0.0d;
49 double dev = 0.0d;
50 if ((expected.length < 2) || (expected.length != observed.length)) {
51 throw new IllegalArgumentException(
52 "observed, expected array lengths incorrect");
53 }
54 if (!isPositive(expected) || !isNonNegative(observed)) {
55 throw new IllegalArgumentException(
56 "observed counts must be non-negative and expected counts must be postive");
57 }
58 for (int i = 0; i < observed.length; i++) {
59 dev = ((double) observed[i] - expected[i]);
60 sumSq += dev * dev / expected[i];
61 }
62 return sumSq;
63 }
64
65 /**
66 * @param observed array of observed frequency counts
67 * @param expected array of exptected frequency counts
68 * @return p-value
69 * @throws IllegalArgumentException if preconditions are not met
70 * @throws MathException if an error occurs computing the p-value
71 */
72 public double chiSquareTest(double[] expected, long[] observed)
73 throws IllegalArgumentException, MathException {
74 ChiSquaredDistribution chiSquaredDistribution =
75 getDistributionFactory().createChiSquareDistribution(
76 (double) expected.length - 1);
77 return 1 - chiSquaredDistribution.cumulativeProbability(
78 chiSquare(expected, observed));
79 }
80
81 /**
82 * @param observed array of observed frequency counts
83 * @param expected array of exptected frequency counts
84 * @param alpha significance level of the test
85 * @return true iff null hypothesis can be rejected with confidence
86 * 1 - alpha
87 * @throws IllegalArgumentException if preconditions are not met
88 * @throws MathException if an error occurs performing the test
89 */
90 public boolean chiSquareTest(double[] expected, long[] observed,
91 double alpha) throws IllegalArgumentException, MathException {
92 if ((alpha <= 0) || (alpha > 0.5)) {
93 throw new IllegalArgumentException(
94 "bad significance level: " + alpha);
95 }
96 return (chiSquareTest(expected, observed) < alpha);
97 }
98
99 /**
100 * @param counts array representation of 2-way table
101 * @return chi-square test statistic
102 * @throws IllegalArgumentException if preconditions are not met
103 */
104 public double chiSquare(long[][] counts) throws IllegalArgumentException {
105
106 checkArray(counts);
107 int nRows = counts.length;
108 int nCols = counts[0].length;
109
110
111 double[] rowSum = new double[nRows];
112 double[] colSum = new double[nCols];
113 double total = 0.0d;
114 for (int row = 0; row < nRows; row++) {
115 for (int col = 0; col < nCols; col++) {
116 rowSum[row] += (double) counts[row][col];
117 colSum[col] += (double) counts[row][col];
118 total += (double) counts[row][col];
119 }
120 }
121
122
123 double sumSq = 0.0d;
124 double expected = 0.0d;
125 for (int row = 0; row < nRows; row++) {
126 for (int col = 0; col < nCols; col++) {
127 expected = (rowSum[row] * colSum[col]) / total;
128 sumSq += (((double) counts[row][col] - expected) *
129 ((double) counts[row][col] - expected)) / expected;
130 }
131 }
132 return sumSq;
133 }
134
135 /**
136 * @param counts array representation of 2-way table
137 * @return p-value
138 * @throws IllegalArgumentException if preconditions are not met
139 * @throws MathException if an error occurs computing the p-value
140 */
141 public double chiSquareTest(long[][] counts)
142 throws IllegalArgumentException, MathException {
143 checkArray(counts);
144 double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
145 ChiSquaredDistribution chiSquaredDistribution =
146 getDistributionFactory().createChiSquareDistribution(df);
147 return 1 - chiSquaredDistribution.cumulativeProbability(chiSquare(counts));
148 }
149
150 /**
151 * @param counts array representation of 2-way table
152 * @param alpha significance level of the test
153 * @return true iff null hypothesis can be rejected with confidence
154 * 1 - alpha
155 * @throws IllegalArgumentException if preconditions are not met
156 * @throws MathException if an error occurs performing the test
157 */
158 public boolean chiSquareTest(long[][] counts, double alpha)
159 throws IllegalArgumentException, MathException {
160 if ((alpha <= 0) || (alpha > 0.5)) {
161 throw new IllegalArgumentException("bad significance level: " + alpha);
162 }
163 return (chiSquareTest(counts) < alpha);
164 }
165
166 /**
167 * Checks to make sure that the input long[][] array is rectangular,
168 * has at least 2 rows and 2 columns, and has all non-negative entries,
169 * throwing IllegalArgumentException if any of these checks fail.
170 *
171 * @param in input 2-way table to check
172 * @throws IllegalArgumentException if the array is not valid
173 */
174 private void checkArray(long[][] in) throws IllegalArgumentException {
175
176 if (in.length < 2) {
177 throw new IllegalArgumentException("Input table must have at least two rows");
178 }
179
180 if (in[0].length < 2) {
181 throw new IllegalArgumentException("Input table must have at least two columns");
182 }
183
184 if (!isRectangular(in)) {
185 throw new IllegalArgumentException("Input table must be rectangular");
186 }
187
188 if (!isNonNegative(in)) {
189 throw new IllegalArgumentException("All entries in input 2-way table must be non-negative");
190 }
191
192 }
193
194
195 /**
196 * Gets a DistributionFactory to use in creating ChiSquaredDistribution instances.
197 *
198 * @return a DistributionFactory
199 */
200 protected DistributionFactory getDistributionFactory() {
201 if (distributionFactory == null) {
202 distributionFactory = DistributionFactory.newInstance();
203 }
204 return distributionFactory;
205 }
206
207
208
209 /**
210 * Returns true iff input array is rectangular.
211 *
212 * @param in array to be tested
213 * @return true if the array is rectangular
214 * @throws NullPointerException if input array is null
215 * @throws ArrayIndexOutOfBoundsException if input array is empty
216 */
217 private boolean isRectangular(long[][] in) {
218 for (int i = 1; i < in.length; i++) {
219 if (in[i].length != in[0].length) {
220 return false;
221 }
222 }
223 return true;
224 }
225
226 /**
227 * Returns true iff all entries of the input array are > 0.
228 * Returns true if the array is non-null, but empty
229 *
230 * @param in array to be tested
231 * @return true if all entries of the array are positive
232 * @throws NullPointerException if input array is null
233 */
234 private boolean isPositive(double[] in) {
235 for (int i = 0; i < in.length; i ++) {
236 if (in[i] <= 0) {
237 return false;
238 }
239 }
240 return true;
241 }
242
243 /**
244 * Returns true iff all entries of the input array are >= 0.
245 * Returns true if the array is non-null, but empty
246 *
247 * @param in array to be tested
248 * @return true if all entries of the array are non-negative
249 * @throws NullPointerException if input array is null
250 */
251 private boolean isNonNegative(long[] in) {
252 for (int i = 0; i < in.length; i ++) {
253 if (in[i] < 0) {
254 return false;
255 }
256 }
257 return true;
258 }
259
260 /**
261 * Returns true iff all entries of (all subarrays of) the input array are >= 0.
262 * Returns true if the array is non-null, but empty
263 *
264 * @param in array to be tested
265 * @return true if all entries of the array are non-negative
266 * @throws NullPointerException if input array is null
267 */
268 private boolean isNonNegative(long[][] in) {
269 for (int i = 0; i < in.length; i ++) {
270 for (int j = 0; j < in[i].length; j++) {
271 if (in[i][j] < 0) {
272 return false;
273 }
274 }
275 }
276 return true;
277 }
278
279 }