View Javadoc

1   /*
2    * Copyright 2003-2004 The Apache Software Foundation.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.commons.math.random;
18  
19  import java.io.IOException;
20  import java.io.File;
21  import java.net.URL;
22  import java.util.List;
23  
24  import org.apache.commons.math.stat.descriptive.StatisticalSummary;
25  
26  /**
27   * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html">
28   * empirical probability distribution</a> -- a probability distribution derived
29   * from observed data without making any assumptions about the functional form
30   * of the population distribution that the data come from.<p>
31   * Implementations of this interface maintain data structures, called
32   * <i>distribution digests</i>, that describe empirical distributions and
33   * support the following operations: <ul>
34   * <li>loading the distribution from a file of observed data values</li>
35   * <li>dividing the input data into "bin ranges" and reporting bin frequency
36   *     counts (data for histogram)</li>
37   * <li>reporting univariate statistics describing the full set of data values
38   *     as well as the observations within each bin</li>
39   * <li>generating random values from the distribution</li>
40   * </ul>
41   * Applications can use <code>EmpiricalDistribution</code> implementations to
42   * build grouped frequnecy histograms representing the input data or to
43   * generate random values "like" those in the input file -- i.e., the values
44   * generated will follow the distribution of the values in the file.
45   * 
46   * @version $Revision: 155427 $ $Date: 2005-02-26 06:11:52 -0700 (Sat, 26 Feb 2005) $
47   */
48  public interface EmpiricalDistribution {
49  
50      /**
51       * Computes the empirical distribution from the provided
52       * array of numbers.
53       * 
54       * @param dataArray the data array
55       */
56      void load(double[] dataArray);
57  
58      /**
59       * Computes the empirical distribution from the input file.
60       * 
61       * @param file the input file
62       * @throws IOException if an IO error occurs
63       */
64      void load(File file) throws IOException;
65  
66      /**
67       * Computes the empirical distribution using data read from a URL.
68       * 
69       * @param url url of the input file
70       * @throws IOException if an IO error occurs
71       */
72      void load(URL url) throws IOException;
73  
74      /**
75       * Generates a random value from this distribution.
76       * <strong>Preconditions:</strong><ul>
77       * <li>the distribution must be loaded before invoking this method</li></ul>
78       * @return the random value.
79       * 
80       * @throws IllegalStateException if the distribution has not been loaded
81       */
82      double getNextValue() throws IllegalStateException;
83  
84  
85      /**
86       * Returns a 
87       * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary} 
88       * describing this distribution.
89       * <strong>Preconditions:</strong><ul>
90       * <li>the distribution must be loaded before invoking this method</li>
91       * </ul>
92       * 
93       * @return the sample statistics
94       * @throws IllegalStateException if the distribution has not been loaded
95       */
96      StatisticalSummary getSampleStats() throws IllegalStateException;
97  
98      /**
99       * Property indicating whether or not the distribution has been loaded.
100      * 
101      * @return true if the distribution has been loaded
102      */
103     boolean isLoaded();
104 
105      /**
106      * Returns the number of bins.
107      * 
108      * @return the number of bins
109      */
110     int getBinCount();
111 
112     /**
113      * Returns a list of 
114      * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics}
115      * containing statistics describing the values in each of the bins.  The
116      * List is indexed on the bin number.
117      * 
118      * @return List of bin statistics
119      */
120     List getBinStats();
121 
122     /**
123      * Returns the array of upper bounds for the bins.  Bins are: <br/>
124      * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],...,
125      *  (upperBounds[binCount-1],max].
126      * 
127      * @return array of bin upper bounds
128      */
129     double[] getUpperBounds();
130 
131 }