1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.math.stat; 18 19 import java.io.Serializable; 20 import java.text.NumberFormat; 21 import java.util.Iterator; 22 import java.util.Comparator; 23 import java.util.TreeMap; 24 25 import org.apache.commons.math.MathRuntimeException; 26 27 /** 28 * Maintains a frequency distribution. 29 * <p> 30 * Accepts int, long, char or Comparable values. New values added must be 31 * comparable to those that have been added, otherwise the add method will 32 * throw an IllegalArgumentException.</p> 33 * <p> 34 * Integer values (int, long, Integer, Long) are not distinguished by type -- 35 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have 36 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p> 37 * <p> 38 * char values are converted by <code>addValue</code> to Character instances. 39 * As such, these values are not comparable to integral values, so attempts 40 * to combine integral types with chars in a frequency distribution will fail. 41 * </p> 42 * <p> 43 * The values are ordered using the default (natural order), unless a 44 * <code>Comparator</code> is supplied in the constructor.</p> 45 * 46 * @version $Revision: 791733 $ $Date: 2009-07-07 03:44:52 -0400 (Tue, 07 Jul 2009) $ 47 */ 48 public class Frequency implements Serializable { 49 50 /** Serializable version identifier */ 51 private static final long serialVersionUID = -3845586908418844111L; 52 53 /** underlying collection */ 54 private final TreeMap<Comparable<?>, Long> freqTable; 55 56 /** 57 * Default constructor. 58 */ 59 public Frequency() { 60 freqTable = new TreeMap<Comparable<?>, Long>(); 61 } 62 63 /** 64 * Constructor allowing values Comparator to be specified. 65 * 66 * @param comparator Comparator used to order values 67 */ 68 @SuppressWarnings("unchecked") 69 public Frequency(Comparator<?> comparator) { 70 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator); 71 } 72 73 /** 74 * Return a string representation of this frequency 75 * distribution. 76 * 77 * @return a string representation. 78 */ 79 @Override 80 public String toString() { 81 NumberFormat nf = NumberFormat.getPercentInstance(); 82 StringBuffer outBuffer = new StringBuffer(); 83 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n"); 84 Iterator<Comparable<?>> iter = freqTable.keySet().iterator(); 85 while (iter.hasNext()) { 86 Comparable<?> value = iter.next(); 87 outBuffer.append(value); 88 outBuffer.append('\t'); 89 outBuffer.append(getCount(value)); 90 outBuffer.append('\t'); 91 outBuffer.append(nf.format(getPct(value))); 92 outBuffer.append('\t'); 93 outBuffer.append(nf.format(getCumPct(value))); 94 outBuffer.append('\n'); 95 } 96 return outBuffer.toString(); 97 } 98 99 /** 100 * Adds 1 to the frequency count for v. 101 * <p> 102 * If other objects have already been added to this Frequency, v must 103 * be comparable to those that have already been added. 104 * </p> 105 * 106 * @param v the value to add. 107 * @throws IllegalArgumentException if <code>v</code> is not Comparable, 108 * or is not comparable with previous entries 109 * @deprecated use {@link #addValue(Comparable)} instead 110 */ 111 @Deprecated 112 public void addValue(Object v) { 113 if (v instanceof Comparable<?>){ 114 addValue((Comparable<?>) v); 115 } else { 116 throw MathRuntimeException.createIllegalArgumentException( 117 "class ({0}) does not implement Comparable", 118 v.getClass().getName()); 119 } 120 } 121 122 /** 123 * Adds 1 to the frequency count for v. 124 * <p> 125 * If other objects have already been added to this Frequency, v must 126 * be comparable to those that have already been added. 127 * </p> 128 * 129 * @param v the value to add. 130 * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries 131 */ 132 public void addValue(Comparable<?> v){ 133 Comparable<?> obj = v; 134 if (v instanceof Integer) { 135 obj = Long.valueOf(((Integer) v).longValue()); 136 } 137 try { 138 Long count = freqTable.get(obj); 139 if (count == null) { 140 freqTable.put(obj, Long.valueOf(1)); 141 } else { 142 freqTable.put(obj, Long.valueOf(count.longValue() + 1)); 143 } 144 } catch (ClassCastException ex) { 145 //TreeMap will throw ClassCastException if v is not comparable 146 throw MathRuntimeException.createIllegalArgumentException( 147 "instance of class {0} not comparable to existing values", 148 v.getClass().getName()); 149 } 150 } 151 152 /** 153 * Adds 1 to the frequency count for v. 154 * 155 * @param v the value to add. 156 */ 157 public void addValue(int v) { 158 addValue(Long.valueOf(v)); 159 } 160 161 /** 162 * Adds 1 to the frequency count for v. 163 * 164 * @param v the value to add. 165 */ 166 public void addValue(Integer v) { 167 addValue(Long.valueOf(v.longValue())); 168 } 169 170 /** 171 * Adds 1 to the frequency count for v. 172 * 173 * @param v the value to add. 174 */ 175 public void addValue(long v) { 176 addValue(Long.valueOf(v)); 177 } 178 179 /** 180 * Adds 1 to the frequency count for v. 181 * 182 * @param v the value to add. 183 */ 184 public void addValue(char v) { 185 addValue(Character.valueOf(v)); 186 } 187 188 /** Clears the frequency table */ 189 public void clear() { 190 freqTable.clear(); 191 } 192 193 /** 194 * Returns an Iterator over the set of values that have been added. 195 * <p> 196 * If added values are integral (i.e., integers, longs, Integers, or Longs), 197 * they are converted to Longs when they are added, so the objects returned 198 * by the Iterator will in this case be Longs.</p> 199 * 200 * @return values Iterator 201 */ 202 public Iterator<Comparable<?>> valuesIterator() { 203 return freqTable.keySet().iterator(); 204 } 205 206 //------------------------------------------------------------------------- 207 208 /** 209 * Returns the sum of all frequencies. 210 * 211 * @return the total frequency count. 212 */ 213 public long getSumFreq() { 214 long result = 0; 215 Iterator<Long> iterator = freqTable.values().iterator(); 216 while (iterator.hasNext()) { 217 result += iterator.next().longValue(); 218 } 219 return result; 220 } 221 222 /** 223 * Returns the number of values = v. 224 * Returns 0 if the value is not comparable. 225 * 226 * @param v the value to lookup. 227 * @return the frequency of v. 228 * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0 229 */ 230 @Deprecated 231 public long getCount(Object v) { 232 return getCount((Comparable<?>) v); 233 } 234 235 /** 236 * Returns the number of values = v. 237 * Returns 0 if the value is not comparable. 238 * 239 * @param v the value to lookup. 240 * @return the frequency of v. 241 */ 242 public long getCount(Comparable<?> v) { 243 if (v instanceof Integer) { 244 return getCount(((Integer) v).longValue()); 245 } 246 long result = 0; 247 try { 248 Long count = freqTable.get(v); 249 if (count != null) { 250 result = count.longValue(); 251 } 252 } catch (ClassCastException ex) { 253 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable 254 } 255 return result; 256 } 257 258 /** 259 * Returns the number of values = v. 260 * 261 * @param v the value to lookup. 262 * @return the frequency of v. 263 */ 264 public long getCount(int v) { 265 return getCount(Long.valueOf(v)); 266 } 267 268 /** 269 * Returns the number of values = v. 270 * 271 * @param v the value to lookup. 272 * @return the frequency of v. 273 */ 274 public long getCount(long v) { 275 return getCount(Long.valueOf(v)); 276 } 277 278 /** 279 * Returns the number of values = v. 280 * 281 * @param v the value to lookup. 282 * @return the frequency of v. 283 */ 284 public long getCount(char v) { 285 return getCount(Character.valueOf(v)); 286 } 287 288 //------------------------------------------------------------- 289 290 /** 291 * Returns the percentage of values that are equal to v 292 * (as a proportion between 0 and 1). 293 * <p> 294 * Returns <code>Double.NaN</code> if no values have been added.</p> 295 * 296 * @param v the value to lookup 297 * @return the proportion of values equal to v 298 * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0 299 */ 300 @Deprecated 301 public double getPct(Object v) { 302 return getCumPct((Comparable<?>) v); 303 } 304 305 /** 306 * Returns the percentage of values that are equal to v 307 * (as a proportion between 0 and 1). 308 * <p> 309 * Returns <code>Double.NaN</code> if no values have been added.</p> 310 * 311 * @param v the value to lookup 312 * @return the proportion of values equal to v 313 */ 314 public double getPct(Comparable<?> v) { 315 final long sumFreq = getSumFreq(); 316 if (sumFreq == 0) { 317 return Double.NaN; 318 } 319 return (double) getCount(v) / (double) sumFreq; 320 } 321 322 /** 323 * Returns the percentage of values that are equal to v 324 * (as a proportion between 0 and 1). 325 * 326 * @param v the value to lookup 327 * @return the proportion of values equal to v 328 */ 329 public double getPct(int v) { 330 return getPct(Long.valueOf(v)); 331 } 332 333 /** 334 * Returns the percentage of values that are equal to v 335 * (as a proportion between 0 and 1). 336 * 337 * @param v the value to lookup 338 * @return the proportion of values equal to v 339 */ 340 public double getPct(long v) { 341 return getPct(Long.valueOf(v)); 342 } 343 344 /** 345 * Returns the percentage of values that are equal to v 346 * (as a proportion between 0 and 1). 347 * 348 * @param v the value to lookup 349 * @return the proportion of values equal to v 350 */ 351 public double getPct(char v) { 352 return getPct(Character.valueOf(v)); 353 } 354 355 //----------------------------------------------------------------------------------------- 356 357 /** 358 * Returns the cumulative frequency of values less than or equal to v. 359 * <p> 360 * Returns 0 if v is not comparable to the values set.</p> 361 * 362 * @param v the value to lookup. 363 * @return the proportion of values equal to v 364 * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0 365 */ 366 @Deprecated 367 public long getCumFreq(Object v) { 368 return getCumFreq((Comparable<?>) v); 369 } 370 371 /** 372 * Returns the cumulative frequency of values less than or equal to v. 373 * <p> 374 * Returns 0 if v is not comparable to the values set.</p> 375 * 376 * @param v the value to lookup. 377 * @return the proportion of values equal to v 378 */ 379 @SuppressWarnings("unchecked") 380 public long getCumFreq(Comparable<?> v) { 381 if (getSumFreq() == 0) { 382 return 0; 383 } 384 if (v instanceof Integer) { 385 return getCumFreq(((Integer) v).longValue()); 386 } 387 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator(); 388 if (c == null) { 389 c = new NaturalComparator(); 390 } 391 long result = 0; 392 393 try { 394 Long value = freqTable.get(v); 395 if (value != null) { 396 result = value.longValue(); 397 } 398 } catch (ClassCastException ex) { 399 return result; // v is not comparable 400 } 401 402 if (c.compare(v, freqTable.firstKey()) < 0) { 403 return 0; // v is comparable, but less than first value 404 } 405 406 if (c.compare(v, freqTable.lastKey()) >= 0) { 407 return getSumFreq(); // v is comparable, but greater than the last value 408 } 409 410 Iterator<Comparable<?>> values = valuesIterator(); 411 while (values.hasNext()) { 412 Comparable<?> nextValue = values.next(); 413 if (c.compare(v, nextValue) > 0) { 414 result += getCount(nextValue); 415 } else { 416 return result; 417 } 418 } 419 return result; 420 } 421 422 /** 423 * Returns the cumulative frequency of values less than or equal to v. 424 * <p> 425 * Returns 0 if v is not comparable to the values set.</p> 426 * 427 * @param v the value to lookup 428 * @return the proportion of values equal to v 429 */ 430 public long getCumFreq(int v) { 431 return getCumFreq(Long.valueOf(v)); 432 } 433 434 /** 435 * Returns the cumulative frequency of values less than or equal to v. 436 * <p> 437 * Returns 0 if v is not comparable to the values set.</p> 438 * 439 * @param v the value to lookup 440 * @return the proportion of values equal to v 441 */ 442 public long getCumFreq(long v) { 443 return getCumFreq(Long.valueOf(v)); 444 } 445 446 /** 447 * Returns the cumulative frequency of values less than or equal to v. 448 * <p> 449 * Returns 0 if v is not comparable to the values set.</p> 450 * 451 * @param v the value to lookup 452 * @return the proportion of values equal to v 453 */ 454 public long getCumFreq(char v) { 455 return getCumFreq(Character.valueOf(v)); 456 } 457 458 //---------------------------------------------------------------------------------------------- 459 460 /** 461 * Returns the cumulative percentage of values less than or equal to v 462 * (as a proportion between 0 and 1). 463 * <p> 464 * Returns <code>Double.NaN</code> if no values have been added. 465 * Returns 0 if at least one value has been added, but v is not comparable 466 * to the values set.</p> 467 * 468 * @param v the value to lookup 469 * @return the proportion of values less than or equal to v 470 * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0 471 */ 472 @Deprecated 473 public double getCumPct(Object v) { 474 return getCumPct((Comparable<?>) v); 475 476 } 477 478 /** 479 * Returns the cumulative percentage of values less than or equal to v 480 * (as a proportion between 0 and 1). 481 * <p> 482 * Returns <code>Double.NaN</code> if no values have been added. 483 * Returns 0 if at least one value has been added, but v is not comparable 484 * to the values set.</p> 485 * 486 * @param v the value to lookup 487 * @return the proportion of values less than or equal to v 488 */ 489 public double getCumPct(Comparable<?> v) { 490 final long sumFreq = getSumFreq(); 491 if (sumFreq == 0) { 492 return Double.NaN; 493 } 494 return (double) getCumFreq(v) / (double) sumFreq; 495 } 496 497 /** 498 * Returns the cumulative percentage of values less than or equal to v 499 * (as a proportion between 0 and 1). 500 * <p> 501 * Returns 0 if v is not comparable to the values set.</p> 502 * 503 * @param v the value to lookup 504 * @return the proportion of values less than or equal to v 505 */ 506 public double getCumPct(int v) { 507 return getCumPct(Long.valueOf(v)); 508 } 509 510 /** 511 * Returns the cumulative percentage of values less than or equal to v 512 * (as a proportion between 0 and 1). 513 * <p> 514 * Returns 0 if v is not comparable to the values set.</p> 515 * 516 * @param v the value to lookup 517 * @return the proportion of values less than or equal to v 518 */ 519 public double getCumPct(long v) { 520 return getCumPct(Long.valueOf(v)); 521 } 522 523 /** 524 * Returns the cumulative percentage of values less than or equal to v 525 * (as a proportion between 0 and 1). 526 * <p> 527 * Returns 0 if v is not comparable to the values set.</p> 528 * 529 * @param v the value to lookup 530 * @return the proportion of values less than or equal to v 531 */ 532 public double getCumPct(char v) { 533 return getCumPct(Character.valueOf(v)); 534 } 535 536 /** 537 * A Comparator that compares comparable objects using the 538 * natural order. Copied from Commons Collections ComparableComparator. 539 */ 540 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable { 541 542 /** Serializable version identifier */ 543 private static final long serialVersionUID = -3852193713161395148L; 544 545 /** 546 * Compare the two {@link Comparable Comparable} arguments. 547 * This method is equivalent to: 548 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre> 549 * 550 * @param o1 the first object 551 * @param o2 the second object 552 * @return result of comparison 553 * @throws NullPointerException when <i>o1</i> is <code>null</code>, 554 * or when <code>((Comparable)o1).compareTo(o2)</code> does 555 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable}, 556 * or when <code>((Comparable)o1).compareTo(o2)</code> does 557 */ 558 @SuppressWarnings("unchecked") 559 public int compare(Comparable<T> o1, Comparable<T> o2) { 560 return (o1.compareTo((T) o2)); 561 } 562 } 563 564 /** {@inheritDoc} */ 565 @Override 566 public int hashCode() { 567 final int prime = 31; 568 int result = 1; 569 result = prime * result + 570 ((freqTable == null) ? 0 : freqTable.hashCode()); 571 return result; 572 } 573 574 /** {@inheritDoc} */ 575 @Override 576 public boolean equals(Object obj) { 577 if (this == obj) 578 return true; 579 if (obj == null) 580 return false; 581 if (!(obj instanceof Frequency)) 582 return false; 583 Frequency other = (Frequency) obj; 584 if (freqTable == null) { 585 if (other.freqTable != null) 586 return false; 587 } else if (!freqTable.equals(other.freqTable)) 588 return false; 589 return true; 590 } 591 592 }