1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.apache.commons.math.stat;
17
18 import java.io.Serializable;
19 import java.text.NumberFormat;
20 import java.util.Iterator;
21 import java.util.Comparator;
22 import java.util.TreeMap;
23
24 /**
25 * Maintains a frequency distribution.
26 * <p>
27 * Accepts int, long, char or Object values. New values added must be
28 * comparable to those that have been added, otherwise the add method will
29 * throw an IllegalArgumentException.
30 * <p>
31 * Integer values (int, long, Integer, Long) are not distinguished by type --
32 * i.e. <code>addValue(new Long(2)), addValue(2), addValue(2l)</code> all have
33 * the same effect (similarly for arguments to <code>getCount,</code> etc.).
34 * <p>
35 * The values are ordered using the default (natural order), unless a
36 * <code>Comparator</code> is supplied in the constructor.
37 *
38 * @version $Revision: 348519 $ $Date: 2005-11-23 12:12:18 -0700 (Wed, 23 Nov 2005) $
39 */
40 public class Frequency implements Serializable {
41
42 /** Serializable version identifier */
43 private static final long serialVersionUID = -3845586908418844111L;
44
45 /** underlying collection */
46 private TreeMap freqTable = null;
47
48 /**
49 * Default constructor.
50 */
51 public Frequency() {
52 freqTable = new TreeMap();
53 }
54
55 /**
56 * Constructor allowing values Comparator to be specified.
57 *
58 * @param comparator Comparator used to order values
59 */
60 public Frequency(Comparator comparator) {
61 freqTable = new TreeMap(comparator);
62 }
63
64 /**
65 * Return a string representation of this frequency
66 * distribution.
67 *
68 * @return a string representation.
69 */
70 public String toString() {
71 NumberFormat nf = NumberFormat.getPercentInstance();
72 StringBuffer outBuffer = new StringBuffer();
73 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
74 Iterator iter = freqTable.keySet().iterator();
75 while (iter.hasNext()) {
76 Object value = iter.next();
77 outBuffer.append(value);
78 outBuffer.append('\t');
79 outBuffer.append(getCount(value));
80 outBuffer.append('\t');
81 outBuffer.append(nf.format(getPct(value)));
82 outBuffer.append('\t');
83 outBuffer.append(nf.format(getCumPct(value)));
84 outBuffer.append('\n');
85 }
86 return outBuffer.toString();
87 }
88
89 /**
90 * Adds 1 to the frequency count for v.
91 *
92 * @param v the value to add.
93 * @throws IllegalArgumentException if <code>v</code> is not comparable.
94 */
95 public void addValue(Object v) {
96 Object obj = v;
97 if (v instanceof Integer) {
98 obj = new Long(((Integer) v).longValue());
99 }
100 try {
101 Long count = (Long) freqTable.get(obj);
102 if (count == null) {
103 freqTable.put(obj, new Long(1));
104 } else {
105 freqTable.put(obj, new Long(count.longValue() + 1));
106 }
107 } catch (ClassCastException ex) {
108
109 throw new IllegalArgumentException("Value not comparable to existing values.");
110 }
111 }
112
113 /**
114 * Adds 1 to the frequency count for v.
115 *
116 * @param v the value to add.
117 */
118 public void addValue(int v) {
119 addValue(new Long(v));
120 }
121
122 /**
123 * Adds 1 to the frequency count for v.
124 *
125 * @param v the value to add.
126 */
127 public void addValue(Integer v) {
128 addValue(new Long(v.longValue()));
129 }
130
131 /**
132 * Adds 1 to the frequency count for v.
133 *
134 * @param v the value to add.
135 */
136 public void addValue(long v) {
137 addValue(new Long(v));
138 }
139
140 /**
141 * Adds 1 to the frequency count for v.
142 *
143 * @param v the value to add.
144 */
145 public void addValue(char v) {
146 addValue(new Character(v));
147 }
148
149 /** Clears the frequency table */
150 public void clear() {
151 freqTable.clear();
152 }
153
154 /**
155 * Returns an Iterator over the set of values that have been added.
156 * <p>
157 * If added values are itegral (i.e., integers, longs, Integers, or Longs),
158 * they are converted to Longs when they are added, so the objects returned
159 * by the Iterator will in this case be Longs.
160 *
161 * @return values Iterator
162 */
163 public Iterator valuesIterator() {
164 return freqTable.keySet().iterator();
165 }
166
167
168
169 /**
170 * Returns the sum of all frequencies.
171 *
172 * @return the total frequency count.
173 */
174 public long getSumFreq() {
175 long result = 0;
176 Iterator iterator = freqTable.values().iterator();
177 while (iterator.hasNext()) {
178 result += ((Long) iterator.next()).longValue();
179 }
180 return result;
181 }
182
183 /**
184 * Returns the number of values = v.
185 *
186 * @param v the value to lookup.
187 * @return the frequency of v.
188 */
189 public long getCount(Object v) {
190 if (v instanceof Integer) {
191 return getCount(((Integer) v).longValue());
192 }
193 long result = 0;
194 try {
195 Long count = (Long) freqTable.get(v);
196 if (count != null) {
197 result = count.longValue();
198 }
199 } catch (ClassCastException ex) {
200
201 }
202 return result;
203 }
204
205 /**
206 * Returns the number of values = v.
207 *
208 * @param v the value to lookup.
209 * @return the frequency of v.
210 */
211 public long getCount(int v) {
212 return getCount(new Long(v));
213 }
214
215 /**
216 * Returns the number of values = v.
217 *
218 * @param v the value to lookup.
219 * @return the frequency of v.
220 */
221 public long getCount(long v) {
222 return getCount(new Long(v));
223 }
224
225 /**
226 * Returns the number of values = v.
227 *
228 * @param v the value to lookup.
229 * @return the frequency of v.
230 */
231 public long getCount(char v) {
232 return getCount(new Character(v));
233 }
234
235
236
237 /**
238 * Returns the percentage of values that are equal to v
239 * (as a proportion between 0 and 1).
240 * <p>
241 * Returns <code>Double.NaN</code> if no values have been added.
242 *
243 * @param v the value to lookup
244 * @return the proportion of values equal to v
245 */
246 public double getPct(Object v) {
247 if (getSumFreq() == 0) {
248 return Double.NaN;
249 }
250 return (double) getCount(v) / (double) getSumFreq();
251 }
252
253 /**
254 * Returns the percentage of values that are equal to v
255 * (as a proportion between 0 and 1).
256 *
257 * @param v the value to lookup
258 * @return the proportion of values equal to v
259 */
260 public double getPct(int v) {
261 return getPct(new Long(v));
262 }
263
264 /**
265 * Returns the percentage of values that are equal to v
266 * (as a proportion between 0 and 1).
267 *
268 * @param v the value to lookup
269 * @return the proportion of values equal to v
270 */
271 public double getPct(long v) {
272 return getPct(new Long(v));
273 }
274
275 /**
276 * Returns the percentage of values that are equal to v
277 * (as a proportion between 0 and 1).
278 *
279 * @param v the value to lookup
280 * @return the proportion of values equal to v
281 */
282 public double getPct(char v) {
283 return getPct(new Character(v));
284 }
285
286
287
288 /**
289 * Returns the cumulative frequency of values less than or equal to v.
290 * <p>
291 * Returns 0 if v is not comparable to the values set.
292 *
293 * @param v the value to lookup.
294 * @return the proportion of values equal to v
295 */
296 public long getCumFreq(Object v) {
297 if (getSumFreq() == 0) {
298 return 0;
299 }
300 if (v instanceof Integer) {
301 return getCumFreq(((Integer) v).longValue());
302 }
303 Comparator c = freqTable.comparator();
304 if (c == null) {
305 c = new NaturalComparator();
306 }
307 long result = 0;
308
309 try {
310 Long value = (Long) freqTable.get(v);
311 if (value != null) {
312 result = value.longValue();
313 }
314 } catch (ClassCastException ex) {
315 return result;
316 }
317
318 if (c.compare(v, freqTable.firstKey()) < 0) {
319 return 0;
320 }
321
322 if (c.compare(v, freqTable.lastKey()) >= 0) {
323 return getSumFreq();
324 }
325
326 Iterator values = valuesIterator();
327 while (values.hasNext()) {
328 Object nextValue = values.next();
329 if (c.compare(v, nextValue) > 0) {
330 result += getCount(nextValue);
331 } else {
332 return result;
333 }
334 }
335 return result;
336 }
337
338 /**
339 * Returns the cumulative frequency of values less than or equal to v.
340 * <p>
341 * Returns 0 if v is not comparable to the values set.
342 *
343 * @param v the value to lookup
344 * @return the proportion of values equal to v
345 */
346 public long getCumFreq(int v) {
347 return getCumFreq(new Long(v));
348 }
349
350 /**
351 * Returns the cumulative frequency of values less than or equal to v.
352 * <p>
353 * Returns 0 if v is not comparable to the values set.
354 *
355 * @param v the value to lookup
356 * @return the proportion of values equal to v
357 */
358 public long getCumFreq(long v) {
359 return getCumFreq(new Long(v));
360 }
361
362 /**
363 * Returns the cumulative frequency of values less than or equal to v.
364 * <p>
365 * Returns 0 if v is not comparable to the values set.
366 *
367 * @param v the value to lookup
368 * @return the proportion of values equal to v
369 */
370 public long getCumFreq(char v) {
371 return getCumFreq(new Character(v));
372 }
373
374
375
376 /**
377 * Returns the cumulative percentage of values less than or equal to v
378 * (as a proportion between 0 and 1).
379 * <p>
380 * Returns <code>Double.NaN</code> if no values have been added.
381 * Returns 0 if at least one value has been added, but v is not comparable
382 * to the values set.
383 *
384 * @param v the value to lookup
385 * @return the proportion of values less than or equal to v
386 */
387 public double getCumPct(Object v) {
388 if (getSumFreq() == 0) {
389 return Double.NaN;
390 }
391 return (double) getCumFreq(v) / (double) getSumFreq();
392 }
393
394 /**
395 * Returns the cumulative percentage of values less than or equal to v
396 * (as a proportion between 0 and 1).
397 * <p>
398 * Returns 0 if v is not comparable to the values set.
399 *
400 * @param v the value to lookup
401 * @return the proportion of values less than or equal to v
402 */
403 public double getCumPct(int v) {
404 return getCumPct(new Long(v));
405 }
406
407 /**
408 * Returns the cumulative percentage of values less than or equal to v
409 * (as a proportion between 0 and 1).
410 * <p>
411 * Returns 0 if v is not comparable to the values set.
412 *
413 * @param v the value to lookup
414 * @return the proportion of values less than or equal to v
415 */
416 public double getCumPct(long v) {
417 return getCumPct(new Long(v));
418 }
419
420 /**
421 * Returns the cumulative percentage of values less than or equal to v
422 * (as a proportion between 0 and 1).
423 * <p>
424 * Returns 0 if v is not comparable to the values set.
425 *
426 * @param v the value to lookup
427 * @return the proportion of values less than or equal to v
428 */
429 public double getCumPct(char v) {
430 return getCumPct(new Character(v));
431 }
432
433 /**
434 * A Comparator that compares comparable objects using the
435 * natural order. Copied from Commons Collections ComparableComparator.
436 */
437 private static class NaturalComparator implements Comparator {
438 /**
439 * Compare the two {@link Comparable Comparable} arguments.
440 * This method is equivalent to:
441 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
442 *
443 * @param o1 the first object
444 * @param o2 the second object
445 * @return result of comparison
446 * @throws NullPointerException when <i>o1</i> is <code>null</code>,
447 * or when <code>((Comparable)o1).compareTo(o2)</code> does
448 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
449 * or when <code>((Comparable)o1).compareTo(o2)</code> does
450 */
451 public int compare(Object o1, Object o2) {
452 return ((Comparable)o1).compareTo(o2);
453 }
454 }
455 }