001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive.rank;
018    
019    import java.io.Serializable;
020    import java.util.Arrays;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
024    
025    /**
026     * Provides percentile computation.
027     * <p>
028     * There are several commonly used methods for estimating percentiles (a.k.a. 
029     * quantiles) based on sample data.  For large samples, the different methods 
030     * agree closely, but when sample sizes are small, different methods will give
031     * significantly different results.  The algorithm implemented here works as follows:
032     * <ol>
033     * <li>Let <code>n</code> be the length of the (sorted) array and 
034     * <code>0 < p <= 100</code> be the desired percentile.</li>
035     * <li>If <code> n = 1 </code> return the unique array element (regardless of 
036     * the value of <code>p</code>); otherwise </li>
037     * <li>Compute the estimated percentile position  
038     * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
039     * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
040     * part of <code>pos</code>).  If <code>pos >= n</code> return the largest
041     * element in the array; otherwise</li>
042     * <li>Let <code>lower</code> be the element in position 
043     * <code>floor(pos)</code> in the array and let <code>upper</code> be the
044     * next element in the array.  Return <code>lower + d * (upper - lower)</code>
045     * </li>
046     * </ol></p>
047     * <p>
048     * To compute percentiles, the data must be (totally) ordered.  Input arrays
049     * are copied and then sorted using  {@link java.util.Arrays#sort(double[])}.
050     * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
051     * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes 
052     * <code>Double.NaN</code> larger than any other value (including 
053     * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
054     * (50th percentile) of  
055     * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
056     * <p>
057     * Since percentile estimation usually involves interpolation between array 
058     * elements, arrays containing  <code>NaN</code> or infinite values will often
059     * result in <code>NaN<code> or infinite values returned.</p>
060     * <p>
061     * <strong>Note that this implementation is not synchronized.</strong> If 
062     * multiple threads access an instance of this class concurrently, and at least
063     * one of the threads invokes the <code>increment()</code> or 
064     * <code>clear()</code> method, it must be synchronized externally.</p>
065     * 
066     * @version $Revision: 772119 $ $Date: 2009-05-06 05:43:28 -0400 (Wed, 06 May 2009) $
067     */
068    public class Percentile extends AbstractUnivariateStatistic implements Serializable {
069    
070        /** Serializable version identifier */
071        private static final long serialVersionUID = -8091216485095130416L; 
072           
073        /** Determines what percentile is computed when evaluate() is activated 
074         * with no quantile argument */
075        private double quantile = 0.0;
076    
077        /**
078         * Constructs a Percentile with a default quantile
079         * value of 50.0.
080         */
081        public Percentile() {
082            this(50.0);
083        }
084    
085        /**
086         * Constructs a Percentile with the specific quantile value.
087         * @param p the quantile
088         * @throws IllegalArgumentException  if p is not greater than 0 and less
089         * than or equal to 100
090         */
091        public Percentile(final double p) {
092            setQuantile(p);
093        }
094    
095        /**
096         * Copy constructor, creates a new {@code Percentile} identical
097         * to the {@code original}
098         * 
099         * @param original the {@code Percentile} instance to copy
100         */
101        public Percentile(Percentile original) {
102            copy(original, this);
103        }        
104        
105        /**
106         * Returns an estimate of the <code>p</code>th percentile of the values
107         * in the <code>values</code> array.
108         * <p>
109         * Calls to this method do not modify the internal <code>quantile</code>
110         * state of this statistic.</p>
111         * <p>
112         * <ul>
113         * <li>Returns <code>Double.NaN</code> if <code>values</code> has length 
114         * <code>0</code></li>
115         * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
116         *  if <code>values</code> has length <code>1</code></li>
117         * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
118         * is null or p is not a valid quantile value (p must be greater than 0
119         * and less than or equal to 100) </li>
120         * </ul></p>
121         * <p>
122         * See {@link Percentile} for a description of the percentile estimation
123         * algorithm used.</p>
124         * 
125         * @param values input array of values
126         * @param p the percentile value to compute
127         * @return the percentile value or Double.NaN if the array is empty
128         * @throws IllegalArgumentException if <code>values</code> is null 
129         *     or p is invalid
130         */
131        public double evaluate(final double[] values, final double p) {
132            test(values, 0, 0);
133            return evaluate(values, 0, values.length, p);
134        }
135    
136        /**
137         * Returns an estimate of the <code>quantile</code>th percentile of the
138         * designated values in the <code>values</code> array.  The quantile
139         * estimated is determined by the <code>quantile</code> property.
140         * <p>
141         * <ul>
142         * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
143         * <li>Returns (for any value of <code>quantile</code>) 
144         * <code>values[begin]</code> if <code>length = 1 </code></li>
145         * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
146         * is null,  or <code>start</code> or <code>length</code> 
147         * is invalid</li>
148         * </ul></p>
149         * <p>
150         * See {@link Percentile} for a description of the percentile estimation
151         * algorithm used.</p>
152         * 
153         * @param values the input array
154         * @param start index of the first array element to include
155         * @param length the number of elements to include
156         * @return the percentile value
157         * @throws IllegalArgumentException if the parameters are not valid
158         * 
159         */
160        @Override
161        public double evaluate( final double[] values, final int start, final int length) {
162            return evaluate(values, start, length, quantile);
163        }
164    
165         /**
166         * Returns an estimate of the <code>p</code>th percentile of the values
167         * in the <code>values</code> array, starting with the element in (0-based)
168         * position <code>begin</code> in the array and including <code>length</code>
169         * values.
170         * <p>
171         * Calls to this method do not modify the internal <code>quantile</code>
172         * state of this statistic.</p>
173         * <p>
174         * <ul>
175         * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
176         * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
177         *  if <code>length = 1 </code></li>
178         * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
179         *  is null , <code>begin</code> or <code>length</code> is invalid, or 
180         * <code>p</code> is not a valid quantile value (p must be greater than 0
181         * and less than or equal to 100)</li>
182         * </ul></p>
183         * <p>
184         * See {@link Percentile} for a description of the percentile estimation
185         * algorithm used.</p>
186         * 
187         * @param values array of input values
188         * @param p  the percentile to compute
189         * @param begin  the first (0-based) element to include in the computation
190         * @param length  the number of array elements to include
191         * @return  the percentile value
192         * @throws IllegalArgumentException if the parameters are not valid or the
193         * input array is null
194         */
195        public double evaluate(final double[] values, final int begin, 
196                final int length, final double p) {
197    
198            test(values, begin, length);
199    
200            if ((p > 100) || (p <= 0)) {
201                throw MathRuntimeException.createIllegalArgumentException(
202                      "out of bounds quantile value: {0}, must be in (0, 100]", p);
203            }
204            if (length == 0) {
205                return Double.NaN;
206            }
207            if (length == 1) {
208                return values[begin]; // always return single value for n = 1
209            }
210            double n = length;
211            double pos = p * (n + 1) / 100;
212            double fpos = Math.floor(pos);
213            int intPos = (int) fpos;
214            double dif = pos - fpos;
215            double[] sorted = new double[length];
216            System.arraycopy(values, begin, sorted, 0, length);
217            Arrays.sort(sorted);
218    
219            if (pos < 1) {
220                return sorted[0];
221            }
222            if (pos >= n) {
223                return sorted[length - 1];
224            }
225            double lower = sorted[intPos - 1];
226            double upper = sorted[intPos];
227            return lower + dif * (upper - lower);
228        }
229    
230        /**
231         * Returns the value of the quantile field (determines what percentile is
232         * computed when evaluate() is called with no quantile argument).
233         * 
234         * @return quantile
235         */
236        public double getQuantile() {
237            return quantile;
238        }
239    
240        /**
241         * Sets the value of the quantile field (determines what percentile is 
242         * computed when evaluate() is called with no quantile argument).
243         * 
244         * @param p a value between 0 < p <= 100 
245         * @throws IllegalArgumentException  if p is not greater than 0 and less
246         * than or equal to 100
247         */
248        public void setQuantile(final double p) {
249            if (p <= 0 || p > 100) {
250                throw MathRuntimeException.createIllegalArgumentException(
251                      "out of bounds quantile value: {0}, must be in (0, 100]", p);
252            }
253            quantile = p;
254        }
255        
256        /**
257         * {@inheritDoc}
258         */
259        @Override
260        public Percentile copy() {
261            Percentile result = new Percentile();
262            copy(this, result);
263            return result;
264        }
265        
266        /**
267         * Copies source to dest.
268         * <p>Neither source nor dest can be null.</p>
269         * 
270         * @param source Percentile to copy
271         * @param dest Percentile to copy to
272         * @throws NullPointerException if either source or dest is null
273         */
274        public static void copy(Percentile source, Percentile dest) {
275            dest.quantile = source.quantile;
276        }
277    
278    }