View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.distribution;
19  
20  import java.io.Serializable;
21  
22  import org.apache.commons.math.MathRuntimeException;
23  import org.apache.commons.math.util.MathUtils;
24  
25  /**
26   * The default implementation of {@link HypergeometricDistribution}.
27   *
28   * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
29   */
30  public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
31      implements HypergeometricDistribution, Serializable 
32  {
33  
34      /** Serializable version identifier */
35      private static final long serialVersionUID = -436928820673516179L;
36  
37      /** The number of successes in the population. */
38      private int numberOfSuccesses;
39      
40      /** The population size. */
41      private int populationSize;
42      
43      /** The sample size. */
44      private int sampleSize;
45      
46      /**
47       * Construct a new hypergeometric distribution with the given the population
48       * size, the number of successes in the population, and the sample size.
49       * @param populationSize the population size.
50       * @param numberOfSuccesses number of successes in the population.
51       * @param sampleSize the sample size.
52       */
53      public HypergeometricDistributionImpl(int populationSize,
54          int numberOfSuccesses, int sampleSize) {
55          super();
56          if (numberOfSuccesses > populationSize) {
57              throw MathRuntimeException.createIllegalArgumentException(
58                  "number of successes ({0}) must be less than or equal to population size ({1})",
59                  numberOfSuccesses, populationSize);
60          }
61          if (sampleSize > populationSize) {
62              throw MathRuntimeException.createIllegalArgumentException(
63                    "sample size ({0}) must be less than or equal to population size ({1})",
64                    sampleSize, populationSize);
65          }
66          setPopulationSize(populationSize);
67          setSampleSize(sampleSize);
68          setNumberOfSuccesses(numberOfSuccesses);
69      }
70  
71      /**
72       * For this distribution, X, this method returns P(X ≤ x).
73       * @param x the value at which the PDF is evaluated.
74       * @return PDF for this distribution. 
75       */
76      @Override
77      public double cumulativeProbability(int x) {
78          double ret;
79          
80          int n = getPopulationSize();
81          int m = getNumberOfSuccesses();
82          int k = getSampleSize();
83  
84          int[] domain = getDomain(n, m, k);
85          if (x < domain[0]) {
86              ret = 0.0;
87          } else if(x >= domain[1]) {
88              ret = 1.0;
89          } else {
90              ret = innerCumulativeProbability(domain[0], x, 1, n, m, k);
91          }
92          
93          return ret;
94      }
95  
96      /**
97       * Return the domain for the given hypergeometric distribution parameters.
98       * @param n the population size.
99       * @param m number of successes in the population.
100      * @param k the sample size.
101      * @return a two element array containing the lower and upper bounds of the
102      *         hypergeometric distribution.  
103      */
104     private int[] getDomain(int n, int m, int k){
105         return new int[]{
106             getLowerDomain(n, m, k),
107             getUpperDomain(m, k)
108         };
109     }
110     
111     /**
112      * Access the domain value lower bound, based on <code>p</code>, used to
113      * bracket a PDF root.
114      * 
115      * @param p the desired probability for the critical value
116      * @return domain value lower bound, i.e.
117      *         P(X &lt; <i>lower bound</i>) &lt; <code>p</code> 
118      */
119     @Override
120     protected int getDomainLowerBound(double p) {
121         return getLowerDomain(getPopulationSize(), getNumberOfSuccesses(),
122             getSampleSize());
123     }
124     
125     /**
126      * Access the domain value upper bound, based on <code>p</code>, used to
127      * bracket a PDF root.
128      * 
129      * @param p the desired probability for the critical value
130      * @return domain value upper bound, i.e.
131      *         P(X &lt; <i>upper bound</i>) &gt; <code>p</code> 
132      */
133     @Override
134     protected int getDomainUpperBound(double p) {
135         return getUpperDomain(getSampleSize(), getNumberOfSuccesses());
136     }
137 
138     /**
139      * Return the lowest domain value for the given hypergeometric distribution
140      * parameters.
141      * @param n the population size.
142      * @param m number of successes in the population.
143      * @param k the sample size.
144      * @return the lowest domain value of the hypergeometric distribution.  
145      */
146     private int getLowerDomain(int n, int m, int k) {
147         return Math.max(0, m - (n - k));
148     }
149 
150     /**
151      * Access the number of successes.
152      * @return the number of successes.
153      */
154     public int getNumberOfSuccesses() {
155         return numberOfSuccesses;
156     }
157 
158     /**
159      * Access the population size.
160      * @return the population size.
161      */
162     public int getPopulationSize() {
163         return populationSize;
164     }
165 
166     /**
167      * Access the sample size.
168      * @return the sample size.
169      */
170     public int getSampleSize() {
171         return sampleSize;
172     }
173 
174     /**
175      * Return the highest domain value for the given hypergeometric distribution
176      * parameters.
177      * @param m number of successes in the population.
178      * @param k the sample size.
179      * @return the highest domain value of the hypergeometric distribution.  
180      */
181     private int getUpperDomain(int m, int k){
182         return Math.min(k, m);
183     }
184 
185     /**
186      * For this distribution, X, this method returns P(X = x).
187      * 
188      * @param x the value at which the PMF is evaluated.
189      * @return PMF for this distribution. 
190      */
191     public double probability(int x) {
192         double ret;
193         
194         int n = getPopulationSize();
195         int m = getNumberOfSuccesses();
196         int k = getSampleSize();
197 
198         int[] domain = getDomain(n, m, k);
199         if(x < domain[0] || x > domain[1]){
200             ret = 0.0;
201         } else {
202             ret = probability(n, m, k, x);
203         }
204         
205         return ret;
206     }
207     
208     /**
209      * For the distribution, X, defined by the given hypergeometric distribution
210      * parameters, this method returns P(X = x).
211      * 
212      * @param n the population size.
213      * @param m number of successes in the population.
214      * @param k the sample size.
215      * @param x the value at which the PMF is evaluated.
216      * @return PMF for the distribution. 
217      */
218     private double probability(int n, int m, int k, int x) {
219         return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
220             MathUtils.binomialCoefficientLog(n - m, k - x) -
221             MathUtils.binomialCoefficientLog(n, k));
222     }
223 
224     /**
225      * Modify the number of successes.
226      * @param num the new number of successes.
227      * @throws IllegalArgumentException if <code>num</code> is negative.
228      */
229     public void setNumberOfSuccesses(int num) {
230         if(num < 0){
231             throw MathRuntimeException.createIllegalArgumentException(
232                   "number of successes must be non-negative ({0})",
233                   num);
234         }
235         numberOfSuccesses = num;
236     }
237 
238     /**
239      * Modify the population size.
240      * @param size the new population size.
241      * @throws IllegalArgumentException if <code>size</code> is not positive.
242      */
243     public void setPopulationSize(int size) {
244         if(size <= 0){
245             throw MathRuntimeException.createIllegalArgumentException(
246                   "population size must be positive ({0})",
247                   size);
248         }
249         populationSize = size;
250     }
251     
252     /**
253      * Modify the sample size.
254      * @param size the new sample size.
255      * @throws IllegalArgumentException if <code>size</code> is negative.
256      */
257     public void setSampleSize(int size) {
258         if (size < 0) {
259             throw MathRuntimeException.createIllegalArgumentException(
260                   "sample size must be positive ({0})",
261                   size);
262         }    
263         sampleSize = size;
264     }
265 
266     /**
267      * For this distribution, X, this method returns P(X &ge; x).
268      * @param x the value at which the CDF is evaluated.
269      * @return upper tail CDF for this distribution.
270      * @since 1.1
271      */
272     public double upperCumulativeProbability(int x) {
273         double ret;
274         
275         int n = getPopulationSize();
276         int m = getNumberOfSuccesses();
277         int k = getSampleSize();
278 
279         int[] domain = getDomain(n, m, k);
280         if (x < domain[0]) {
281             ret = 1.0;
282         } else if(x > domain[1]) {
283             ret = 0.0;
284         } else {
285             ret = innerCumulativeProbability(domain[1], x, -1, n, m, k);
286         }
287         
288         return ret;
289     }
290     
291     /**
292      * For this distribution, X, this method returns P(x0 &le; X &le; x1).  This
293      * probability is computed by summing the point probabilities for the values
294      * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 
295      * @param x0 the inclusive, lower bound
296      * @param x1 the inclusive, upper bound
297      * @param dx the direction of summation. 1 indicates summing from x0 to x1.
298      *           0 indicates summing from x1 to x0.
299      * @param n the population size.
300      * @param m number of successes in the population.
301      * @param k the sample size.
302      * @return P(x0 &le; X &le; x1). 
303      */
304     private double innerCumulativeProbability(
305         int x0, int x1, int dx, int n, int m, int k)
306     {
307         double ret = probability(n, m, k, x0);
308         while (x0 != x1) {
309             x0 += dx;
310             ret += probability(n, m, k, x0);
311         }
312         return ret;
313     }
314 }