View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.math.stat.ranking;
19  
20  import java.util.ArrayList;
21  import java.util.Arrays;
22  import java.util.Iterator;
23  import java.util.List;
24  
25  import org.apache.commons.math.random.RandomData;
26  import org.apache.commons.math.random.RandomDataImpl;
27  import org.apache.commons.math.random.RandomGenerator;
28  
29  
30  /**
31   * <p> Ranking based on the natural ordering on doubles.</p>
32   * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
33   * are handled using the selected {@link TiesStrategy}. 
34   * Configuration settings are supplied in optional constructor arguments.
35   * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
36   * respectively. When using {@link TiesStrategy#RANDOM}, a 
37   * {@link RandomGenerator} may be supplied as a constructor argument.</p>
38   * <p>Examples:
39   * <table border="1" cellpadding="3">
40   * <tr><th colspan="3">
41   * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
42   * </th></tr>
43   * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
44   * <th><code>rank(data)</code></th>
45   * <tr>
46   * <td>default (NaNs maximal)</td>
47   * <td>default (ties averaged)</td>
48   * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
49   * <tr>
50   * <td>default (NaNs maximal)</td>
51   * <td>MINIMUM</td>
52   * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
53   * <tr>
54   * <td>MINIMAL</td>
55   * <td>default (ties averaged)</td>
56   * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
57   * <tr>
58   * <td>REMOVED</td>
59   * <td>SEQUENTIAL</td>
60   * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
61   * <tr>
62   * <td>MINIMAL</td>
63   * <td>MAXIMUM</td>
64   * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
65   * 
66   * @since 2.0
67   * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
68   */
69  public class NaturalRanking implements RankingAlgorithm {
70     
71      /** NaN strategy - defaults to NaNs maximal */
72      private final NaNStrategy nanStrategy;
73      
74      /** Ties strategy - defaults to ties averaged */
75      private final TiesStrategy tiesStrategy;
76      
77      /** Source of random data - used only when ties strategy is RANDOM */
78      private final RandomData randomData;
79      
80      /** default NaN strategy */
81      public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
82      
83      /** default ties strategy */
84      public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
85      
86      /**
87       * Create a NaturalRanking with default strategies for handling ties and NaNs.
88       */
89      public NaturalRanking() {
90          super();
91          tiesStrategy = DEFAULT_TIES_STRATEGY;
92          nanStrategy = DEFAULT_NAN_STRATEGY;
93          randomData = null;
94      }
95  
96      /**
97       * Create a NaturalRanking with the given TiesStrategy.
98       * 
99       * @param tiesStrategy the TiesStrategy to use
100      */
101     public NaturalRanking(TiesStrategy tiesStrategy) {
102         super();
103         this.tiesStrategy = tiesStrategy;
104         nanStrategy = DEFAULT_NAN_STRATEGY;
105         randomData = new RandomDataImpl();
106     }
107 
108     /**
109      * Create a NaturalRanking with the given NaNStrategy.
110      * 
111      * @param nanStrategy the NaNStrategy to use
112      */
113     public NaturalRanking(NaNStrategy nanStrategy) {
114         super();
115         this.nanStrategy = nanStrategy;
116         tiesStrategy = DEFAULT_TIES_STRATEGY;
117         randomData = null; 
118     }
119 
120     /**
121      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
122      * 
123      * @param nanStrategy NaNStrategy to use
124      * @param tiesStrategy TiesStrategy to use
125      */
126     public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
127         super();
128         this.nanStrategy = nanStrategy;
129         this.tiesStrategy = tiesStrategy;
130         randomData = new RandomDataImpl();
131     }
132     
133     /**
134      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
135      * RandomGenerator as the source of random data.
136      * 
137      * @param randomGenerator source of random data
138      */
139     public NaturalRanking(RandomGenerator randomGenerator) {
140         super();
141         this.tiesStrategy = TiesStrategy.RANDOM;
142         nanStrategy = DEFAULT_NAN_STRATEGY;
143         randomData = new RandomDataImpl(randomGenerator);
144     }
145 
146 
147     /**
148      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
149      * and the given source of random data.
150      * 
151      * @param nanStrategy NaNStrategy to use
152      * @param randomGenerator source of random data
153      */
154     public NaturalRanking(NaNStrategy nanStrategy,
155             RandomGenerator randomGenerator) {
156         super();
157         this.nanStrategy = nanStrategy;
158         this.tiesStrategy = TiesStrategy.RANDOM;
159         randomData = new RandomDataImpl(randomGenerator);
160     }
161     
162     /**
163      * Return the NaNStrategy
164      * 
165      * @return returns the NaNStrategy
166      */
167     public NaNStrategy getNanStrategy() {
168         return nanStrategy;
169     }
170 
171     /**
172      * Return the TiesStrategy
173      * 
174      * @return the TiesStrategy
175      */
176     public TiesStrategy getTiesStrategy() {
177         return tiesStrategy;
178     }
179 
180     /**
181      * Rank <code>data</code> using the natural ordering on Doubles, with
182      * NaN values handled according to <code>nanStrategy</code> and ties
183      * resolved using <code>tiesStrategy.</code>
184      * 
185      * @param data array to be ranked
186      * @return array of ranks
187      */
188     public double[] rank(double[] data) {
189         
190         // Array recording initial positions of data to be ranked
191         IntDoublePair[] ranks = new IntDoublePair[data.length];  
192         for (int i = 0; i < data.length; i++) {
193             ranks[i] = new IntDoublePair(data[i], i);
194         }
195         
196         // Recode, remove or record positions of NaNs
197         List<Integer> nanPositions = null;
198         switch (nanStrategy) {
199             case MAXIMAL: // Replace NaNs with +INFs
200                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
201                 break;
202             case MINIMAL: // Replace NaNs with -INFs
203                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
204                 break;
205             case REMOVED: // Drop NaNs from data
206                 ranks = removeNaNs(ranks);
207                 break;
208             case FIXED:   // Record positions of NaNs
209                 nanPositions = getNanPositions(ranks);
210                 break;
211         }
212         
213         // Sort the IntDoublePairs
214         Arrays.sort(ranks);
215         
216         // Walk the sorted array, filling output array using sorted positions,
217         // resolving ties as we go
218         double[] out = new double[ranks.length];
219         int pos = 1;  // position in sorted array 
220         out[ranks[0].getPosition()] = pos;
221         List<Integer> tiesTrace = new ArrayList<Integer>();
222         tiesTrace.add(ranks[0].getPosition());
223         for (int i = 1; i < ranks.length; i++) {
224             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
225                 // tie sequence has ended (or had length 1)
226                 pos = i + 1;
227                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
228                     resolveTie(out, tiesTrace);
229                 }
230                 tiesTrace = new ArrayList<Integer>();
231                 tiesTrace.add(ranks[i].getPosition());
232             } else {
233                 // tie sequence continues
234                 tiesTrace.add(ranks[i].getPosition());
235             }
236             out[ranks[i].getPosition()] = pos;
237         }
238         if (tiesTrace.size() > 1) {  // handle tie sequence at end
239             resolveTie(out, tiesTrace);
240         }
241         if (nanStrategy == NaNStrategy.FIXED) {
242             restoreNaNs(out, nanPositions);
243         }
244         return out;
245     }
246     
247     /**
248      * Returns an array that is a copy of the input array with IntDoublePairs
249      * having NaN values removed.
250      * 
251      * @param ranks input array
252      * @return array with NaN-valued entries removed
253      */
254     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
255         if (!containsNaNs(ranks)) {
256             return ranks;
257         }
258         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
259         int j = 0;
260         for (int i = 0; i < ranks.length; i++) {
261             if (Double.isNaN(ranks[i].getValue())) {
262                 // drop, but adjust original ranks of later elements
263                 for (int k = i + 1; k < ranks.length; k++) {
264                     ranks[k] = new IntDoublePair(
265                             ranks[k].getValue(), ranks[k].getPosition() - 1);
266                 }
267             } else {
268                 outRanks[j] = new IntDoublePair(
269                         ranks[i].getValue(), ranks[i].getPosition());
270                 j++;
271             }
272         }
273         IntDoublePair[] returnRanks = new IntDoublePair[j];
274         System.arraycopy(outRanks, 0, returnRanks, 0, j);
275         return returnRanks;
276     }
277 
278     /**
279      * Recodes NaN values to the given value. 
280      * 
281      * @param ranks array to recode
282      * @param value the value to replace NaNs with
283      */
284     private void recodeNaNs(IntDoublePair[] ranks, double value) {
285         for (int i = 0; i < ranks.length; i++) {
286             if (Double.isNaN(ranks[i].getValue())) {
287                 ranks[i] = new IntDoublePair(
288                         value, ranks[i].getPosition());
289             }
290         }
291     }
292     
293     /**
294      * Checks for presence of NaNs in <code>ranks.</code>
295      * 
296      * @param ranks array to be searched for NaNs
297      * @return true iff ranks contains one or more NaNs
298      */
299     private boolean containsNaNs(IntDoublePair[] ranks) {
300         for (int i = 0; i < ranks.length; i++) {
301             if (Double.isNaN(ranks[i].getValue())) {
302                 return true;
303             }
304         }
305         return false;
306     }
307     
308     /**
309      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
310      * The input <code>ranks</code> array is expected to take the same value
311      * for all indices in <code>tiesTrace</code>.  The common value is recoded
312      * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
313      * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
314      * The same array and trace with tiesStrategy AVERAGE will come out
315      * <5,8,3,6,3,7,1,3>.
316      * 
317      * @param ranks array of ranks 
318      * @param tiesTrace list of indices where <code>ranks</code> is constant
319      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 
320      * </code>
321      */
322     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
323         
324         // constant value of ranks over tiesTrace
325         final double c = ranks[tiesTrace.get(0)];
326         
327         // length of sequence of tied ranks
328         final int length = tiesTrace.size();
329         
330         switch (tiesStrategy) {
331             case  AVERAGE:  // Replace ranks with average
332                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
333                 break;
334             case MAXIMUM:   // Replace ranks with maximum values
335                 fill(ranks, tiesTrace, c + length - 1);
336                 break;
337             case MINIMUM:   // Replace ties with minimum
338                 fill(ranks, tiesTrace, c);
339                 break;
340             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
341                 Iterator<Integer> iterator = tiesTrace.iterator();
342                 long f = Math.round(c);
343                 while (iterator.hasNext()) {
344                     ranks[iterator.next()] = 
345                         randomData.nextLong(f, f + length - 1);
346                 }
347                 break;
348             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
349                 // walk and fill
350                 iterator = tiesTrace.iterator();
351                 f = Math.round(c);
352                 int i = 0;
353                 while (iterator.hasNext()) {
354                     ranks[iterator.next()] = f + i++;
355                 }
356                 break;
357         }   
358     }
359     
360     /**
361      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
362      * 
363      * @param data array to modify
364      * @param tiesTrace list of index values to set
365      * @param value value to set
366      */
367     private void fill(double[] data, List<Integer> tiesTrace, double value) {
368         Iterator<Integer> iterator = tiesTrace.iterator();
369         while (iterator.hasNext()) {
370             data[iterator.next()] = value;
371         }
372     }
373     
374     /**
375      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
376      * 
377      * @param ranks array to modify
378      * @param nanPositions list of index values to set to <code>Double.NaN</code>
379      */
380     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
381         if (nanPositions.size() == 0) {
382             return;
383         }
384         Iterator<Integer> iterator = nanPositions.iterator();
385         while (iterator.hasNext()) {
386             ranks[iterator.next().intValue()] = Double.NaN;  
387         }
388         
389     }
390     
391     /**
392      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
393      * 
394      * @param ranks array to search for <code>NaNs</code>
395      * @return list of indexes i such that <code>ranks[i] = NaN</code>
396      */
397     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
398         ArrayList<Integer> out = new ArrayList<Integer>();
399         for (int i = 0; i < ranks.length; i++) {
400             if (Double.isNaN(ranks[i].getValue())) {
401                 out.add(Integer.valueOf(i));
402             }
403         }
404         return out;     
405     }
406     
407     /**
408      * Represents the position of a double value in an ordering.
409      * Comparable interface is implemented so Arrays.sort can be used
410      * to sort an array of IntDoublePairs by value.  Note that the
411      * implicitly defined natural ordering is NOT consistent with equals.
412      */
413     private static class IntDoublePair implements Comparable<IntDoublePair>  {
414 
415         /** Value of the pair */
416         final private double value;
417 
418         /** Original position of the pair */
419         final private int position;
420 
421         /**
422          * Construct an IntDoublePair with the given value and position.
423          * @param value the value of the pair
424          * @param position the original position
425          */
426         public IntDoublePair(double value, int position) {
427             this.value = value;
428             this.position = position;
429         }
430 
431         /**
432          * Compare this IntDoublePair to another pair.
433          * Only the <strong>values</strong> are compared.
434          * 
435          * @param other the other pair to compare this to
436          * @return result of <code>Double.compare(value, other.value)</code>
437          */
438         public int compareTo(IntDoublePair other) {
439             return Double.compare(value, other.value);
440         }
441 
442         /**
443          * Returns the value of the pair.
444          * @return value
445          */
446         public double getValue() {
447             return value;
448         }
449 
450         /**
451          * Returns the original position of the pair.
452          * @return position
453          */
454         public int getPosition() {
455             return position;
456         }
457     }
458 }