1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.math.random; 19 20 import java.io.IOException; 21 import java.io.File; 22 import java.net.URL; 23 import java.util.List; 24 25 import org.apache.commons.math.stat.descriptive.StatisticalSummary; 26 import org.apache.commons.math.stat.descriptive.SummaryStatistics; 27 28 /** 29 * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html"> 30 * empirical probability distribution</a> -- a probability distribution derived 31 * from observed data without making any assumptions about the functional form 32 * of the population distribution that the data come from.<p> 33 * Implementations of this interface maintain data structures, called 34 * <i>distribution digests</i>, that describe empirical distributions and 35 * support the following operations: <ul> 36 * <li>loading the distribution from a file of observed data values</li> 37 * <li>dividing the input data into "bin ranges" and reporting bin frequency 38 * counts (data for histogram)</li> 39 * <li>reporting univariate statistics describing the full set of data values 40 * as well as the observations within each bin</li> 41 * <li>generating random values from the distribution</li> 42 * </ul> 43 * Applications can use <code>EmpiricalDistribution</code> implementations to 44 * build grouped frequency histograms representing the input data or to 45 * generate random values "like" those in the input file -- i.e., the values 46 * generated will follow the distribution of the values in the file.</p> 47 * 48 * @version $Revision: 670469 $ $Date: 2008-06-23 04:01:38 -0400 (Mon, 23 Jun 2008) $ 49 */ 50 public interface EmpiricalDistribution { 51 52 /** 53 * Computes the empirical distribution from the provided 54 * array of numbers. 55 * 56 * @param dataArray the data array 57 */ 58 void load(double[] dataArray); 59 60 /** 61 * Computes the empirical distribution from the input file. 62 * 63 * @param file the input file 64 * @throws IOException if an IO error occurs 65 */ 66 void load(File file) throws IOException; 67 68 /** 69 * Computes the empirical distribution using data read from a URL. 70 * 71 * @param url url of the input file 72 * @throws IOException if an IO error occurs 73 */ 74 void load(URL url) throws IOException; 75 76 /** 77 * Generates a random value from this distribution. 78 * <strong>Preconditions:</strong><ul> 79 * <li>the distribution must be loaded before invoking this method</li></ul> 80 * @return the random value. 81 * 82 * @throws IllegalStateException if the distribution has not been loaded 83 */ 84 double getNextValue() throws IllegalStateException; 85 86 87 /** 88 * Returns a 89 * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary} 90 * describing this distribution. 91 * <strong>Preconditions:</strong><ul> 92 * <li>the distribution must be loaded before invoking this method</li> 93 * </ul> 94 * 95 * @return the sample statistics 96 * @throws IllegalStateException if the distribution has not been loaded 97 */ 98 StatisticalSummary getSampleStats() throws IllegalStateException; 99 100 /** 101 * Property indicating whether or not the distribution has been loaded. 102 * 103 * @return true if the distribution has been loaded 104 */ 105 boolean isLoaded(); 106 107 /** 108 * Returns the number of bins. 109 * 110 * @return the number of bins 111 */ 112 int getBinCount(); 113 114 /** 115 * Returns a list of 116 * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics} 117 * containing statistics describing the values in each of the bins. The 118 * List is indexed on the bin number. 119 * 120 * @return List of bin statistics 121 */ 122 List<SummaryStatistics> getBinStats(); 123 124 /** 125 * Returns the array of upper bounds for the bins. Bins are: <br/> 126 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],..., 127 * (upperBounds[binCount-1],max]. 128 * 129 * @return array of bin upper bounds 130 */ 131 double[] getUpperBounds(); 132 133 }