View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.math.stat.descriptive.rank;
18  
19  import java.io.Serializable;
20  import java.util.Arrays;
21  
22  import org.apache.commons.math.MathRuntimeException;
23  import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
24  
25  /**
26   * Provides percentile computation.
27   * <p>
28   * There are several commonly used methods for estimating percentiles (a.k.a. 
29   * quantiles) based on sample data.  For large samples, the different methods 
30   * agree closely, but when sample sizes are small, different methods will give
31   * significantly different results.  The algorithm implemented here works as follows:
32   * <ol>
33   * <li>Let <code>n</code> be the length of the (sorted) array and 
34   * <code>0 < p <= 100</code> be the desired percentile.</li>
35   * <li>If <code> n = 1 </code> return the unique array element (regardless of 
36   * the value of <code>p</code>); otherwise </li>
37   * <li>Compute the estimated percentile position  
38   * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
39   * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
40   * part of <code>pos</code>).  If <code>pos >= n</code> return the largest
41   * element in the array; otherwise</li>
42   * <li>Let <code>lower</code> be the element in position 
43   * <code>floor(pos)</code> in the array and let <code>upper</code> be the
44   * next element in the array.  Return <code>lower + d * (upper - lower)</code>
45   * </li>
46   * </ol></p>
47   * <p>
48   * To compute percentiles, the data must be (totally) ordered.  Input arrays
49   * are copied and then sorted using  {@link java.util.Arrays#sort(double[])}.
50   * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
51   * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes 
52   * <code>Double.NaN</code> larger than any other value (including 
53   * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
54   * (50th percentile) of  
55   * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
56   * <p>
57   * Since percentile estimation usually involves interpolation between array 
58   * elements, arrays containing  <code>NaN</code> or infinite values will often
59   * result in <code>NaN<code> or infinite values returned.</p>
60   * <p>
61   * <strong>Note that this implementation is not synchronized.</strong> If 
62   * multiple threads access an instance of this class concurrently, and at least
63   * one of the threads invokes the <code>increment()</code> or 
64   * <code>clear()</code> method, it must be synchronized externally.</p>
65   * 
66   * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
67   */
68  public class Percentile extends AbstractUnivariateStatistic implements Serializable {
69  
70      /** Serializable version identifier */
71      private static final long serialVersionUID = -8091216485095130416L; 
72         
73      /** Determines what percentile is computed when evaluate() is activated 
74       * with no quantile argument */
75      private double quantile = 0.0;
76  
77      /**
78       * Constructs a Percentile with a default quantile
79       * value of 50.0.
80       */
81      public Percentile() {
82          this(50.0);
83      }
84  
85      /**
86       * Constructs a Percentile with the specific quantile value.
87       * @param p the quantile
88       * @throws IllegalArgumentException  if p is not greater than 0 and less
89       * than or equal to 100
90       */
91      public Percentile(final double p) {
92          setQuantile(p);
93      }
94  
95      /**
96       * Copy constructor, creates a new {@code Percentile} identical
97       * to the {@code original}
98       * 
99       * @param original the {@code Percentile} instance to copy
100      */
101     public Percentile(Percentile original) {
102         copy(original, this);
103     }        
104     
105     /**
106      * Returns an estimate of the <code>p</code>th percentile of the values
107      * in the <code>values</code> array.
108      * <p>
109      * Calls to this method do not modify the internal <code>quantile</code>
110      * state of this statistic.</p>
111      * <p>
112      * <ul>
113      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
114      * <code>0</code></li>
115      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
116      *  if <code>values</code> has length <code>1</code></li>
117      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
118      * is null or p is not a valid quantile value (p must be greater than 0
119      * and less than or equal to 100) </li>
120      * </ul></p>
121      * <p>
122      * See {@link Percentile} for a description of the percentile estimation
123      * algorithm used.</p>
124      * 
125      * @param values input array of values
126      * @param p the percentile value to compute
127      * @return the percentile value or Double.NaN if the array is empty
128      * @throws IllegalArgumentException if <code>values</code> is null 
129      *     or p is invalid
130      */
131     public double evaluate(final double[] values, final double p) {
132         test(values, 0, 0);
133         return evaluate(values, 0, values.length, p);
134     }
135 
136     /**
137      * Returns an estimate of the <code>quantile</code>th percentile of the
138      * designated values in the <code>values</code> array.  The quantile
139      * estimated is determined by the <code>quantile</code> property.
140      * <p>
141      * <ul>
142      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
143      * <li>Returns (for any value of <code>quantile</code>) 
144      * <code>values[begin]</code> if <code>length = 1 </code></li>
145      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
146      * is null,  or <code>start</code> or <code>length</code> 
147      * is invalid</li>
148      * </ul></p>
149      * <p>
150      * See {@link Percentile} for a description of the percentile estimation
151      * algorithm used.</p>
152      * 
153      * @param values the input array
154      * @param start index of the first array element to include
155      * @param length the number of elements to include
156      * @return the percentile value
157      * @throws IllegalArgumentException if the parameters are not valid
158      * 
159      */
160     @Override
161     public double evaluate( final double[] values, final int start, final int length) {
162         return evaluate(values, start, length, quantile);
163     }
164 
165      /**
166      * Returns an estimate of the <code>p</code>th percentile of the values
167      * in the <code>values</code> array, starting with the element in (0-based)
168      * position <code>begin</code> in the array and including <code>length</code>
169      * values.
170      * <p>
171      * Calls to this method do not modify the internal <code>quantile</code>
172      * state of this statistic.</p>
173      * <p>
174      * <ul>
175      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
176      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
177      *  if <code>length = 1 </code></li>
178      * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
179      *  is null , <code>begin</code> or <code>length</code> is invalid, or 
180      * <code>p</code> is not a valid quantile value (p must be greater than 0
181      * and less than or equal to 100)</li>
182      * </ul></p>
183      * <p>
184      * See {@link Percentile} for a description of the percentile estimation
185      * algorithm used.</p>
186      * 
187      * @param values array of input values
188      * @param p  the percentile to compute
189      * @param begin  the first (0-based) element to include in the computation
190      * @param length  the number of array elements to include
191      * @return  the percentile value
192      * @throws IllegalArgumentException if the parameters are not valid or the
193      * input array is null
194      */
195     public double evaluate(final double[] values, final int begin, 
196             final int length, final double p) {
197 
198         test(values, begin, length);
199 
200         if ((p > 100) || (p <= 0)) {
201             throw MathRuntimeException.createIllegalArgumentException(
202                   "out of bounds quantile value: {0}, must be in (0, 100]", p);
203         }
204         if (length == 0) {
205             return Double.NaN;
206         }
207         if (length == 1) {
208             return values[begin]; // always return single value for n = 1
209         }
210         double n = length;
211         double pos = p * (n + 1) / 100;
212         double fpos = Math.floor(pos);
213         int intPos = (int) fpos;
214         double dif = pos - fpos;
215         double[] sorted = new double[length];
216         System.arraycopy(values, begin, sorted, 0, length);
217         Arrays.sort(sorted);
218 
219         if (pos < 1) {
220             return sorted[0];
221         }
222         if (pos >= n) {
223             return sorted[length - 1];
224         }
225         double lower = sorted[intPos - 1];
226         double upper = sorted[intPos];
227         return lower + dif * (upper - lower);
228     }
229 
230     /**
231      * Returns the value of the quantile field (determines what percentile is
232      * computed when evaluate() is called with no quantile argument).
233      * 
234      * @return quantile
235      */
236     public double getQuantile() {
237         return quantile;
238     }
239 
240     /**
241      * Sets the value of the quantile field (determines what percentile is 
242      * computed when evaluate() is called with no quantile argument).
243      * 
244      * @param p a value between 0 < p <= 100 
245      * @throws IllegalArgumentException  if p is not greater than 0 and less
246      * than or equal to 100
247      */
248     public void setQuantile(final double p) {
249         if (p <= 0 || p > 100) {
250             throw MathRuntimeException.createIllegalArgumentException(
251                   "out of bounds quantile value: {0}, must be in (0, 100]", p);
252         }
253         quantile = p;
254     }
255     
256     /**
257      * {@inheritDoc}
258      */
259     @Override
260     public Percentile copy() {
261         Percentile result = new Percentile();
262         copy(this, result);
263         return result;
264     }
265     
266     /**
267      * Copies source to dest.
268      * <p>Neither source nor dest can be null.</p>
269      * 
270      * @param source Percentile to copy
271      * @param dest Percentile to copy to
272      * @throws NullPointerException if either source or dest is null
273      */
274     public static void copy(Percentile source, Percentile dest) {
275         dest.quantile = source.quantile;
276     }
277 
278 }