001/*
002 * Copyright (C) 2008 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.base;
018
019import static com.google.common.base.Preconditions.checkArgument;
020import static com.google.common.base.Preconditions.checkNotNull;
021
022import com.google.common.annotations.Beta;
023import com.google.common.annotations.GwtCompatible;
024
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.List;
028
029/**
030 * Determines a true or false value for any Java {@code char} value, just as {@link Predicate} does
031 * for any {@link Object}. Also offers basic text processing methods based on this function.
032 * Implementations are strongly encouraged to be side-effect-free and immutable.
033 *
034 * <p>Throughout the documentation of this class, the phrase "matching character" is used to mean
035 * "any character {@code c} for which {@code this.matches(c)} returns {@code true}".
036 *
037 * <p><b>Note:</b> This class deals only with {@code char} values; it does not understand
038 * supplementary Unicode code points in the range {@code 0x10000} to {@code 0x10FFFF}. Such logical
039 * characters are encoded into a {@code String} using surrogate pairs, and a {@code CharMatcher}
040 * treats these just as two separate characters.
041 *
042 * @author Kevin Bourrillion
043 * @since 1
044 */
045@Beta // Possibly change from chars to code points; decide constants vs. methods
046@GwtCompatible
047public abstract class CharMatcher implements Predicate<Character> {
048  // Constants
049
050  // Excludes 2000-2000a, which is handled as a range
051  private static final String BREAKING_WHITESPACE_CHARS =
052      "\t\n\013\f\r \u0085\u1680\u2028\u2029\u205f\u3000";
053
054  // Excludes 2007, which is handled as a gap in a pair of ranges
055  private static final String NON_BREAKING_WHITESPACE_CHARS =
056      "\u00a0\u180e\u202f";
057
058  /**
059   * Determines whether a character is whitespace according to the latest Unicode standard, as
060   * illustrated
061   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bwhitespace%7D">here</a>.
062   * This is not the same definition used by other Java APIs. (See a
063   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
064   * definitions of "whitespace"</a>.)
065   *
066   * <p><b>Note:</b> as the Unicode definition evolves, we will modify this constant to keep it up
067   * to date.
068   */
069  public static final CharMatcher WHITESPACE =
070      anyOf(BREAKING_WHITESPACE_CHARS + NON_BREAKING_WHITESPACE_CHARS)
071          .or(inRange('\u2000', '\u200a'))
072          .precomputed();
073
074  /**
075   * Determines whether a character is a breaking whitespace (that is, a whitespace which can be
076   * interpreted as a break between words for formatting purposes). See {@link #WHITESPACE} for a
077   * discussion of that term.
078   *
079   * @since 2
080   */
081  public static final CharMatcher BREAKING_WHITESPACE =
082      anyOf(BREAKING_WHITESPACE_CHARS)
083          .or(inRange('\u2000', '\u2006'))
084          .or(inRange('\u2008', '\u200a'))
085          .precomputed();
086
087  /**
088   * Determines whether a character is ASCII, meaning that its code point is less than 128.
089   */
090  public static final CharMatcher ASCII = inRange('\0', '\u007f');
091
092  /**
093   * Determines whether a character is a digit according to
094   * <a href="http://unicode.org/cldr/utility/list-unicodeset.jsp?a=%5Cp%7Bdigit%7D">Unicode</a>.
095   */
096  public static final CharMatcher DIGIT;
097
098  static {
099    CharMatcher digit = inRange('0', '9');
100    String zeroes =
101        "\u0660\u06f0\u07c0\u0966\u09e6\u0a66\u0ae6\u0b66\u0be6\u0c66"
102            + "\u0ce6\u0d66\u0e50\u0ed0\u0f20\u1040\u1090\u17e0\u1810\u1946"
103            + "\u19d0\u1b50\u1bb0\u1c40\u1c50\ua620\ua8d0\ua900\uaa50\uff10";
104    for (char base : zeroes.toCharArray()) {
105      digit = digit.or(inRange(base, (char) (base + 9)));
106    }
107    DIGIT = digit.precomputed();
108  }
109
110  /**
111   * Determines whether a character is whitespace according to {@link Character#isWhitespace(char)
112   * Java's definition}; it is usually preferable to use {@link #WHITESPACE}. (See a
113   * <a href="http://spreadsheets.google.com/pub?key=pd8dAQyHbdewRsnE5x5GzKQ">comparison of several
114   * definitions of "whitespace"</a>.)
115   */
116  public static final CharMatcher JAVA_WHITESPACE =
117      inRange('\u0009', (char) 13)  // \\u000d doesn't work as a char literal
118          .or(inRange('\u001c', '\u0020'))
119          .or(is('\u1680'))
120          .or(is('\u180e'))
121          .or(inRange('\u2000', '\u2006'))
122          .or(inRange('\u2008', '\u200b'))
123          .or(inRange('\u2028', '\u2029'))
124          .or(is('\u205f'))
125          .or(is('\u3000'))
126          .precomputed();
127
128  /**
129   * Determines whether a character is a digit according to {@link Character#isDigit(char) Java's
130   * definition}. If you only care to match ASCII digits, you can use {@code inRange('0', '9')}.
131   */
132  public static final CharMatcher JAVA_DIGIT = new CharMatcher() {
133    @Override public boolean matches(char c) {
134      return Character.isDigit(c);
135    }
136  };
137
138  /**
139   * Determines whether a character is a letter according to {@link Character#isLetter(char) Java's
140   * definition}. If you only care to match letters of the Latin alphabet, you can use {@code
141   * inRange('a', 'z').or(inRange('A', 'Z'))}.
142   */
143  public static final CharMatcher JAVA_LETTER = new CharMatcher() {
144    @Override public boolean matches(char c) {
145      return Character.isLetter(c);
146    }
147  };
148
149  /**
150   * Determines whether a character is a letter or digit according to {@link
151   * Character#isLetterOrDigit(char) Java's definition}.
152   */
153  public static final CharMatcher JAVA_LETTER_OR_DIGIT = new CharMatcher() {
154    @Override public boolean matches(char c) {
155      return Character.isLetterOrDigit(c);
156    }
157  };
158
159  /**
160   * Determines whether a character is upper case according to {@link Character#isUpperCase(char)
161   * Java's definition}.
162   */
163  public static final CharMatcher JAVA_UPPER_CASE = new CharMatcher() {
164    @Override public boolean matches(char c) {
165      return Character.isUpperCase(c);
166    }
167  };
168
169  /**
170   * Determines whether a character is lower case according to {@link Character#isLowerCase(char)
171   * Java's definition}.
172   */
173  public static final CharMatcher JAVA_LOWER_CASE = new CharMatcher() {
174    @Override public boolean matches(char c) {
175      return Character.isLowerCase(c);
176    }
177  };
178
179  /**
180   * Determines whether a character is an ISO control character as specified by {@link
181   * Character#isISOControl(char)}.
182   */
183  public static final CharMatcher JAVA_ISO_CONTROL =
184      inRange('\u0000', '\u001f').or(inRange('\u007f', '\u009f'));
185
186  /**
187   * Determines whether a character is invisible; that is, if its Unicode category is any of
188   * SPACE_SEPARATOR, LINE_SEPARATOR, PARAGRAPH_SEPARATOR, CONTROL, FORMAT, SURROGATE, and
189   * PRIVATE_USE according to ICU4J.
190   */
191  public static final CharMatcher INVISIBLE = inRange('\u0000', '\u0020')
192      .or(inRange('\u007f', '\u00a0'))
193      .or(is('\u00ad'))
194      .or(inRange('\u0600', '\u0603'))
195      .or(anyOf("\u06dd\u070f\u1680\u17b4\u17b5\u180e"))
196      .or(inRange('\u2000', '\u200f'))
197      .or(inRange('\u2028', '\u202f'))
198      .or(inRange('\u205f', '\u2064'))
199      .or(inRange('\u206a', '\u206f'))
200      .or(is('\u3000'))
201      .or(inRange('\ud800', '\uf8ff'))
202      .or(anyOf("\ufeff\ufff9\ufffa\ufffb"))
203      .precomputed();
204
205  /**
206   * Determines whether a character is single-width (not double-width). When in doubt, this matcher
207   * errs on the side of returning {@code false} (that is, it tends to assume a character is
208   * double-width).
209   *
210   * <p><b>Note:</b> as the reference file evolves, we will modify this constant to keep it up to
211   * date.
212   */
213  public static final CharMatcher SINGLE_WIDTH = inRange('\u0000', '\u04f9')
214      .or(is('\u05be'))
215      .or(inRange('\u05d0', '\u05ea'))
216      .or(is('\u05f3'))
217      .or(is('\u05f4'))
218      .or(inRange('\u0600', '\u06ff'))
219      .or(inRange('\u0750', '\u077f'))
220      .or(inRange('\u0e00', '\u0e7f'))
221      .or(inRange('\u1e00', '\u20af'))
222      .or(inRange('\u2100', '\u213a'))
223      .or(inRange('\ufb50', '\ufdff'))
224      .or(inRange('\ufe70', '\ufeff'))
225      .or(inRange('\uff61', '\uffdc'))
226      .precomputed();
227
228  /** Matches any character. */
229  public static final CharMatcher ANY =
230      new CharMatcher() {
231        @Override public boolean matches(char c) {
232          return true;
233        }
234
235        @Override public int indexIn(CharSequence sequence) {
236          return (sequence.length() == 0) ? -1 : 0;
237        }
238
239        @Override public int indexIn(CharSequence sequence, int start) {
240          int length = sequence.length();
241          Preconditions.checkPositionIndex(start, length);
242          return (start == length) ? -1 : start;
243        }
244
245        @Override public int lastIndexIn(CharSequence sequence) {
246          return sequence.length() - 1;
247        }
248
249        @Override public boolean matchesAllOf(CharSequence sequence) {
250          checkNotNull(sequence);
251          return true;
252        }
253
254        @Override public boolean matchesNoneOf(CharSequence sequence) {
255          return sequence.length() == 0;
256        }
257
258        @Override public String removeFrom(CharSequence sequence) {
259          checkNotNull(sequence);
260          return "";
261        }
262
263        @Override public String replaceFrom(CharSequence sequence, char replacement) {
264          char[] array = new char[sequence.length()];
265          Arrays.fill(array, replacement);
266          return new String(array);
267        }
268
269        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
270          StringBuilder retval = new StringBuilder(sequence.length() * replacement.length());
271          for (int i = 0; i < sequence.length(); i++) {
272            retval.append(replacement);
273          }
274          return retval.toString();
275        }
276
277        @Override public String collapseFrom(CharSequence sequence, char replacement) {
278          return (sequence.length() == 0) ? "" : String.valueOf(replacement);
279        }
280
281        @Override public String trimFrom(CharSequence sequence) {
282          checkNotNull(sequence);
283          return "";
284        }
285
286        @Override public int countIn(CharSequence sequence) {
287          return sequence.length();
288        }
289
290        @Override public CharMatcher and(CharMatcher other) {
291          return checkNotNull(other);
292        }
293
294        @Override public CharMatcher or(CharMatcher other) {
295          checkNotNull(other);
296          return this;
297        }
298
299        @Override public CharMatcher negate() {
300          return NONE;
301        }
302
303        @Override public CharMatcher precomputed() {
304          return this;
305        }
306      };
307
308  /** Matches no characters. */
309  public static final CharMatcher NONE =
310      new CharMatcher() {
311        @Override public boolean matches(char c) {
312          return false;
313        }
314
315        @Override public int indexIn(CharSequence sequence) {
316          checkNotNull(sequence);
317          return -1;
318        }
319
320        @Override public int indexIn(CharSequence sequence, int start) {
321          int length = sequence.length();
322          Preconditions.checkPositionIndex(start, length);
323          return -1;
324        }
325
326        @Override public int lastIndexIn(CharSequence sequence) {
327          checkNotNull(sequence);
328          return -1;
329        }
330
331        @Override public boolean matchesAllOf(CharSequence sequence) {
332          return sequence.length() == 0;
333        }
334
335        @Override public boolean matchesNoneOf(CharSequence sequence) {
336          checkNotNull(sequence);
337          return true;
338        }
339
340        @Override public String removeFrom(CharSequence sequence) {
341          return sequence.toString();
342        }
343
344        @Override public String replaceFrom(CharSequence sequence, char replacement) {
345          return sequence.toString();
346        }
347
348        @Override public String replaceFrom(CharSequence sequence, CharSequence replacement) {
349          checkNotNull(replacement);
350          return sequence.toString();
351        }
352
353        @Override public String collapseFrom(CharSequence sequence, char replacement) {
354          return sequence.toString();
355        }
356
357        @Override public String trimFrom(CharSequence sequence) {
358          return sequence.toString();
359        }
360
361        @Override public int countIn(CharSequence sequence) {
362          checkNotNull(sequence);
363          return 0;
364        }
365
366        @Override public CharMatcher and(CharMatcher other) {
367          checkNotNull(other);
368          return this;
369        }
370
371        @Override public CharMatcher or(CharMatcher other) {
372          return checkNotNull(other);
373        }
374
375        @Override public CharMatcher negate() {
376          return ANY;
377        }
378
379        @Override void setBits(LookupTable table) {}
380
381        @Override public CharMatcher precomputed() {
382          return this;
383        }
384      };
385
386  // Static factories
387
388  /**
389   * Returns a {@code char} matcher that matches only one specified character.
390   */
391  public static CharMatcher is(final char match) {
392    return new CharMatcher() {
393      @Override public boolean matches(char c) {
394        return c == match;
395      }
396
397      @Override public String replaceFrom(CharSequence sequence, char replacement) {
398        return sequence.toString().replace(match, replacement);
399      }
400
401      @Override public CharMatcher and(CharMatcher other) {
402        return other.matches(match) ? this : NONE;
403      }
404
405      @Override public CharMatcher or(CharMatcher other) {
406        return other.matches(match) ? other : super.or(other);
407      }
408
409      @Override public CharMatcher negate() {
410        return isNot(match);
411      }
412
413      @Override void setBits(LookupTable table) {
414        table.set(match);
415      }
416
417      @Override public CharMatcher precomputed() {
418        return this;
419      }
420    };
421  }
422
423  /**
424   * Returns a {@code char} matcher that matches any character except the one specified.
425   *
426   * <p>To negate another {@code CharMatcher}, use {@link #negate()}.
427   */
428  public static CharMatcher isNot(final char match) {
429    return new CharMatcher() {
430      @Override public boolean matches(char c) {
431        return c != match;
432      }
433
434      @Override public CharMatcher and(CharMatcher other) {
435        return other.matches(match) ? super.and(other) : other;
436      }
437
438      @Override public CharMatcher or(CharMatcher other) {
439        return other.matches(match) ? ANY : this;
440      }
441
442      @Override public CharMatcher negate() {
443        return is(match);
444      }
445    };
446  }
447
448  /**
449   * Returns a {@code char} matcher that matches any character present in the given character
450   * sequence.
451   */
452  public static CharMatcher anyOf(final CharSequence sequence) {
453    switch (sequence.length()) {
454      case 0:
455        return NONE;
456      case 1:
457        return is(sequence.charAt(0));
458      case 2:
459        final char match1 = sequence.charAt(0);
460        final char match2 = sequence.charAt(1);
461        return new CharMatcher() {
462          @Override public boolean matches(char c) {
463            return c == match1 || c == match2;
464          }
465
466          @Override void setBits(LookupTable table) {
467            table.set(match1);
468            table.set(match2);
469          }
470
471          @Override public CharMatcher precomputed() {
472            return this;
473          }
474        };
475    }
476
477    final char[] chars = sequence.toString().toCharArray();
478    Arrays.sort(chars); // not worth collapsing duplicates
479
480    return new CharMatcher() {
481      @Override public boolean matches(char c) {
482        return Arrays.binarySearch(chars, c) >= 0;
483      }
484
485      @Override void setBits(LookupTable table) {
486        for (char c : chars) {
487          table.set(c);
488        }
489      }
490    };
491  }
492
493  /**
494   * Returns a {@code char} matcher that matches any character not present in the given character
495   * sequence.
496   */
497  public static CharMatcher noneOf(CharSequence sequence) {
498    return anyOf(sequence).negate();
499  }
500
501  /**
502   * Returns a {@code char} matcher that matches any character in a given range (both endpoints are
503   * inclusive). For example, to match any lowercase letter of the English alphabet, use {@code
504   * CharMatcher.inRange('a', 'z')}.
505   *
506   * @throws IllegalArgumentException if {@code endInclusive < startInclusive}
507   */
508  public static CharMatcher inRange(final char startInclusive, final char endInclusive) {
509    checkArgument(endInclusive >= startInclusive);
510    return new CharMatcher() {
511      @Override public boolean matches(char c) {
512        return startInclusive <= c && c <= endInclusive;
513      }
514
515      @Override void setBits(LookupTable table) {
516        char c = startInclusive;
517        while (true) {
518          table.set(c);
519          if (c++ == endInclusive) {
520            break;
521          }
522        }
523      }
524
525      @Override public CharMatcher precomputed() {
526        return this;
527      }
528    };
529  }
530
531  /**
532   * Returns a matcher with identical behavior to the given {@link Character}-based predicate, but
533   * which operates on primitive {@code char} instances instead.
534   */
535  public static CharMatcher forPredicate(final Predicate<? super Character> predicate) {
536    checkNotNull(predicate);
537    if (predicate instanceof CharMatcher) {
538      return (CharMatcher) predicate;
539    }
540    return new CharMatcher() {
541      @Override public boolean matches(char c) {
542        return predicate.apply(c);
543      }
544
545      @Override public boolean apply(Character character) {
546        return predicate.apply(checkNotNull(character));
547      }
548    };
549  }
550
551  // Abstract methods
552
553  /** Determines a true or false value for the given character. */
554  public abstract boolean matches(char c);
555
556  // Non-static factories
557
558  /**
559   * Returns a matcher that matches any character not matched by this matcher.
560   */
561  public CharMatcher negate() {
562    final CharMatcher original = this;
563    return new CharMatcher() {
564      @Override public boolean matches(char c) {
565        return !original.matches(c);
566      }
567
568      @Override public boolean matchesAllOf(CharSequence sequence) {
569        return original.matchesNoneOf(sequence);
570      }
571
572      @Override public boolean matchesNoneOf(CharSequence sequence) {
573        return original.matchesAllOf(sequence);
574      }
575
576      @Override public int countIn(CharSequence sequence) {
577        return sequence.length() - original.countIn(sequence);
578      }
579
580      @Override public CharMatcher negate() {
581        return original;
582      }
583    };
584  }
585
586  /**
587   * Returns a matcher that matches any character matched by both this matcher and {@code other}.
588   */
589  public CharMatcher and(CharMatcher other) {
590    return new And(Arrays.asList(this, checkNotNull(other)));
591  }
592
593  private static class And extends CharMatcher {
594    List<CharMatcher> components;
595
596    And(List<CharMatcher> components) {
597      this.components = components; // Skip defensive copy (private)
598    }
599
600    @Override public boolean matches(char c) {
601      for (CharMatcher matcher : components) {
602        if (!matcher.matches(c)) {
603          return false;
604        }
605      }
606      return true;
607    }
608
609    @Override public CharMatcher and(CharMatcher other) {
610      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
611      newComponents.add(checkNotNull(other));
612      return new And(newComponents);
613    }
614  }
615
616  /**
617   * Returns a matcher that matches any character matched by either this matcher or {@code other}.
618   */
619  public CharMatcher or(CharMatcher other) {
620    return new Or(Arrays.asList(this, checkNotNull(other)));
621  }
622
623  private static class Or extends CharMatcher {
624    List<CharMatcher> components;
625
626    Or(List<CharMatcher> components) {
627      this.components = components; // Skip defensive copy (private)
628    }
629
630    @Override public boolean matches(char c) {
631      for (CharMatcher matcher : components) {
632        if (matcher.matches(c)) {
633          return true;
634        }
635      }
636      return false;
637    }
638
639    @Override public CharMatcher or(CharMatcher other) {
640      List<CharMatcher> newComponents = new ArrayList<CharMatcher>(components);
641      newComponents.add(checkNotNull(other));
642      return new Or(newComponents);
643    }
644
645    @Override void setBits(LookupTable table) {
646      for (CharMatcher matcher : components) {
647        matcher.setBits(table);
648      }
649    }
650  }
651
652  /**
653   * Returns a {@code char} matcher functionally equivalent to this one, but which may be faster to
654   * query than the original; your mileage may vary. Precomputation takes time and is likely to be
655   * worthwhile only if the precomputed matcher is queried many thousands of times.
656   *
657   * <p>This method has no effect (returns {@code this}) when called in GWT: it's unclear whether a
658   * precomputed matcher is faster, but it certainly consumes more memory, which doesn't seem like a
659   * worthwhile tradeoff in a browser.
660   */
661  public CharMatcher precomputed() {
662    return Platform.precomputeCharMatcher(this);
663  }
664
665  /**
666   * This is the actual implementation of {@link #precomputed}, but we bounce calls through a method
667   * on {@link Platform} so that we can have different behavior in GWT.
668   *
669   * <p>The default precomputation is to cache the configuration of the original matcher in an
670   * eight-kilobyte bit array. In some situations this produces a matcher which is faster to query
671   * than the original.
672   *
673   * <p>The default implementation creates a new bit array and passes it to {@link
674   * #setBits(LookupTable)}.
675   */
676  CharMatcher precomputedInternal() {
677    final LookupTable table = new LookupTable();
678    setBits(table);
679
680    return new CharMatcher() {
681      @Override public boolean matches(char c) {
682        return table.get(c);
683      }
684
685      // TODO(kevinb): make methods like negate() smart?
686
687      @Override public CharMatcher precomputed() {
688        return this;
689      }
690    };
691  }
692
693  /**
694   * For use by implementors; sets the bit corresponding to each character ('\0' to '{@literal
695   * \}uFFFF') that matches this matcher in the given bit array, leaving all other bits untouched.
696   *
697   * <p>The default implementation loops over every possible character value, invoking {@link
698   * #matches} for each one.
699   */
700  void setBits(LookupTable table) {
701    char c = Character.MIN_VALUE;
702    while (true) {
703      if (matches(c)) {
704        table.set(c);
705      }
706      if (c++ == Character.MAX_VALUE) {
707        break;
708      }
709    }
710  }
711
712  /**
713   * A bit array with one bit per {@code char} value, used by {@link CharMatcher#precomputed}.
714   *
715   * <p>TODO(kevinb): possibly share a common BitArray class with BloomFilter and others... a
716   * simpler java.util.BitSet.
717   */
718  private static final class LookupTable {
719    int[] data = new int[2048];
720
721    void set(char index) {
722      data[index >> 5] |= (1 << index);
723    }
724
725    boolean get(char index) {
726      return (data[index >> 5] & (1 << index)) != 0;
727    }
728  }
729
730  // Text processing routines
731
732  /**
733   * Returns {@code true} if a character sequence contains at least one matching character.
734   * Equivalent to {@code !matchesNoneOf(sequence)}.
735   *
736   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
737   * character, until this returns {@code true} or the end is reached.
738   *
739   * @param sequence the character sequence to examine, possibly empty
740   * @return {@code true} if this matcher matches at least one character in the sequence
741   * @since 8
742   */
743  public boolean matchesAnyOf(CharSequence sequence) {
744    return !matchesNoneOf(sequence);
745  }
746
747  /**
748   * Returns {@code true} if a character sequence contains only matching characters.
749   *
750   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
751   * character, until this returns {@code false} or the end is reached.
752   *
753   * @param sequence the character sequence to examine, possibly empty
754   * @return {@code true} if this matcher matches every character in the sequence, including when
755   *         the sequence is empty
756   */
757  public boolean matchesAllOf(CharSequence sequence) {
758    for (int i = sequence.length() - 1; i >= 0; i--) {
759      if (!matches(sequence.charAt(i))) {
760        return false;
761      }
762    }
763    return true;
764  }
765
766  /**
767   * Returns {@code true} if a character sequence contains no matching characters. Equivalent to
768   * {@code !matchesAnyOf(sequence)}.
769   *
770   * <p>The default implementation iterates over the sequence, invoking {@link #matches} for each
771   * character, until this returns {@code false} or the end is reached.
772   *
773   * @param sequence the character sequence to examine, possibly empty
774   * @return {@code true} if this matcher matches every character in the sequence, including when
775   *         the sequence is empty
776   */
777  public boolean matchesNoneOf(CharSequence sequence) {
778    return indexIn(sequence) == -1;
779  }
780
781  // TODO(kevinb): add matchesAnyOf()
782
783  /**
784   * Returns the index of the first matching character in a character sequence, or {@code -1} if no
785   * matching character is present.
786   *
787   * <p>The default implementation iterates over the sequence in forward order calling {@link
788   * #matches} for each character.
789   *
790   * @param sequence the character sequence to examine from the beginning
791   * @return an index, or {@code -1} if no character matches
792   */
793  public int indexIn(CharSequence sequence) {
794    int length = sequence.length();
795    for (int i = 0; i < length; i++) {
796      if (matches(sequence.charAt(i))) {
797        return i;
798      }
799    }
800    return -1;
801  }
802
803  /**
804   * Returns the index of the first matching character in a character sequence, starting from a
805   * given position, or {@code -1} if no character matches after that position.
806   *
807   * <p>The default implementation iterates over the sequence in forward order, beginning at {@code
808   * start}, calling {@link #matches} for each character.
809   *
810   * @param sequence the character sequence to examine
811   * @param start the first index to examine; must be nonnegative and no greater than {@code
812   *        sequence.length()}
813   * @return the index of the first matching character, guaranteed to be no less than {@code start},
814   *         or {@code -1} if no character matches
815   * @throws IndexOutOfBoundsException if start is negative or greater than {@code
816   *         sequence.length()}
817   */
818  public int indexIn(CharSequence sequence, int start) {
819    int length = sequence.length();
820    Preconditions.checkPositionIndex(start, length);
821    for (int i = start; i < length; i++) {
822      if (matches(sequence.charAt(i))) {
823        return i;
824      }
825    }
826    return -1;
827  }
828
829  /**
830   * Returns the index of the last matching character in a character sequence, or {@code -1} if no
831   * matching character is present.
832   *
833   * <p>The default implementation iterates over the sequence in reverse order calling {@link
834   * #matches} for each character.
835   *
836   * @param sequence the character sequence to examine from the end
837   * @return an index, or {@code -1} if no character matches
838   */
839  public int lastIndexIn(CharSequence sequence) {
840    for (int i = sequence.length() - 1; i >= 0; i--) {
841      if (matches(sequence.charAt(i))) {
842        return i;
843      }
844    }
845    return -1;
846  }
847
848  /**
849   * Returns the number of matching characters found in a character sequence.
850   */
851  public int countIn(CharSequence sequence) {
852    int count = 0;
853    for (int i = 0; i < sequence.length(); i++) {
854      if (matches(sequence.charAt(i))) {
855        count++;
856      }
857    }
858    return count;
859  }
860
861  /**
862   * Returns a string containing all non-matching characters of a character sequence, in order. For
863   * example: <pre>   {@code
864   *
865   *   CharMatcher.is('a').removeFrom("bazaar")}</pre>
866   *
867   * ... returns {@code "bzr"}.
868   */
869  public String removeFrom(CharSequence sequence) {
870    String string = sequence.toString();
871    int pos = indexIn(string);
872    if (pos == -1) {
873      return string;
874    }
875
876    char[] chars = string.toCharArray();
877    int spread = 1;
878
879    // This unusual loop comes from extensive benchmarking
880    OUT: while (true) {
881      pos++;
882      while (true) {
883        if (pos == chars.length) {
884          break OUT;
885        }
886        if (matches(chars[pos])) {
887          break;
888        }
889        chars[pos - spread] = chars[pos];
890        pos++;
891      }
892      spread++;
893    }
894    return new String(chars, 0, pos - spread);
895  }
896
897  /**
898   * Returns a string containing all matching characters of a character sequence, in order. For
899   * example: <pre>   {@code
900   *
901   *   CharMatcher.is('a').retainFrom("bazaar")}</pre>
902   *
903   * ... returns {@code "aaa"}.
904   */
905  public String retainFrom(CharSequence sequence) {
906    return negate().removeFrom(sequence);
907  }
908
909  /**
910   * Returns a string copy of the input character sequence, with each character that matches this
911   * matcher replaced by a given replacement character. For example: <pre>   {@code
912   *
913   *   CharMatcher.is('a').replaceFrom("radar", 'o')}</pre>
914   *
915   * ... returns {@code "rodor"}.
916   *
917   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
918   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
919   * character.
920   *
921   * @param sequence the character sequence to replace matching characters in
922   * @param replacement the character to append to the result string in place of each matching
923   *        character in {@code sequence}
924   * @return the new string
925   */
926  public String replaceFrom(CharSequence sequence, char replacement) {
927    String string = sequence.toString();
928    int pos = indexIn(string);
929    if (pos == -1) {
930      return string;
931    }
932    char[] chars = string.toCharArray();
933    chars[pos] = replacement;
934    for (int i = pos + 1; i < chars.length; i++) {
935      if (matches(chars[i])) {
936        chars[i] = replacement;
937      }
938    }
939    return new String(chars);
940  }
941
942  /**
943   * Returns a string copy of the input character sequence, with each character that matches this
944   * matcher replaced by a given replacement sequence. For example: <pre>   {@code
945   *
946   *   CharMatcher.is('a').replaceFrom("yaha", "oo")}</pre>
947   *
948   * ... returns {@code "yoohoo"}.
949   *
950   * <p><b>Note:</b> If the replacement is a fixed string with only one character, you are better
951   * off calling {@link #replaceFrom(CharSequence, char)} directly.
952   *
953   * @param sequence the character sequence to replace matching characters in
954   * @param replacement the characters to append to the result string in place of each matching
955   *        character in {@code sequence}
956   * @return the new string
957   */
958  public String replaceFrom(CharSequence sequence, CharSequence replacement) {
959    int replacementLen = replacement.length();
960    if (replacementLen == 0) {
961      return removeFrom(sequence);
962    }
963    if (replacementLen == 1) {
964      return replaceFrom(sequence, replacement.charAt(0));
965    }
966
967    String string = sequence.toString();
968    int pos = indexIn(string);
969    if (pos == -1) {
970      return string;
971    }
972
973    int len = string.length();
974    StringBuilder buf = new StringBuilder((len * 3 / 2) + 16);
975
976    int oldpos = 0;
977    do {
978      buf.append(string, oldpos, pos);
979      buf.append(replacement);
980      oldpos = pos + 1;
981      pos = indexIn(string, oldpos);
982    } while (pos != -1);
983
984    buf.append(string, oldpos, len);
985    return buf.toString();
986  }
987
988  /**
989   * Returns a substring of the input character sequence that omits all characters this matcher
990   * matches from the beginning and from the end of the string. For example: <pre>   {@code
991   *
992   *   CharMatcher.anyOf("ab").trimFrom("abacatbab")}</pre>
993   *
994   * ... returns {@code "cat"}.
995   *
996   * <p>Note that: <pre>   {@code
997   *
998   *   CharMatcher.inRange('\0', ' ').trimFrom(str)}</pre>
999   *
1000   * ... is equivalent to {@link String#trim()}.
1001   */
1002  public String trimFrom(CharSequence sequence) {
1003    int len = sequence.length();
1004    int first;
1005    int last;
1006
1007    for (first = 0; first < len; first++) {
1008      if (!matches(sequence.charAt(first))) {
1009        break;
1010      }
1011    }
1012    for (last = len - 1; last > first; last--) {
1013      if (!matches(sequence.charAt(last))) {
1014        break;
1015      }
1016    }
1017
1018    return sequence.subSequence(first, last + 1).toString();
1019  }
1020
1021  /**
1022   * Returns a substring of the input character sequence that omits all characters this matcher
1023   * matches from the beginning of the string. For example: <pre> {@code
1024   *
1025   *   CharMatcher.anyOf("ab").trimLeadingFrom("abacatbab")}</pre>
1026   *
1027   * ... returns {@code "catbab"}.
1028   */
1029  public String trimLeadingFrom(CharSequence sequence) {
1030    int len = sequence.length();
1031    int first;
1032
1033    for (first = 0; first < len; first++) {
1034      if (!matches(sequence.charAt(first))) {
1035        break;
1036      }
1037    }
1038
1039    return sequence.subSequence(first, len).toString();
1040  }
1041
1042  /**
1043   * Returns a substring of the input character sequence that omits all characters this matcher
1044   * matches from the end of the string. For example: <pre> {@code
1045   *
1046   *   CharMatcher.anyOf("ab").trimTrailingFrom("abacatbab")}</pre>
1047   *
1048   * ... returns {@code "abacat"}.
1049   */
1050  public String trimTrailingFrom(CharSequence sequence) {
1051    int len = sequence.length();
1052    int last;
1053
1054    for (last = len - 1; last >= 0; last--) {
1055      if (!matches(sequence.charAt(last))) {
1056        break;
1057      }
1058    }
1059
1060    return sequence.subSequence(0, last + 1).toString();
1061  }
1062
1063  /**
1064   * Returns a string copy of the input character sequence, with each group of consecutive
1065   * characters that match this matcher replaced by a single replacement character. For example:
1066   * <pre>   {@code
1067   *
1068   *   CharMatcher.anyOf("eko").collapseFrom("bookkeeper", '-')}</pre>
1069   *
1070   * ... returns {@code "b-p-r"}.
1071   *
1072   * <p>The default implementation uses {@link #indexIn(CharSequence)} to find the first matching
1073   * character, then iterates the remainder of the sequence calling {@link #matches(char)} for each
1074   * character.
1075   *
1076   * @param sequence the character sequence to replace matching groups of characters in
1077   * @param replacement the character to append to the result string in place of each group of
1078   *        matching characters in {@code sequence}
1079   * @return the new string
1080   */
1081  public String collapseFrom(CharSequence sequence, char replacement) {
1082    int first = indexIn(sequence);
1083    if (first == -1) {
1084      return sequence.toString();
1085    }
1086
1087    // TODO(kevinb): see if this implementation can be made faster
1088    StringBuilder builder = new StringBuilder(sequence.length())
1089        .append(sequence.subSequence(0, first))
1090        .append(replacement);
1091    boolean in = true;
1092    for (int i = first + 1; i < sequence.length(); i++) {
1093      char c = sequence.charAt(i);
1094      if (apply(c)) {
1095        if (!in) {
1096          builder.append(replacement);
1097          in = true;
1098        }
1099      } else {
1100        builder.append(c);
1101        in = false;
1102      }
1103    }
1104    return builder.toString();
1105  }
1106
1107  /**
1108   * Collapses groups of matching characters exactly as {@link #collapseFrom} does, except that
1109   * groups of matching characters at the start or end of the sequence are removed without
1110   * replacement.
1111   */
1112  public String trimAndCollapseFrom(CharSequence sequence, char replacement) {
1113    int first = negate().indexIn(sequence);
1114    if (first == -1) {
1115      return ""; // everything matches. nothing's left.
1116    }
1117    StringBuilder builder = new StringBuilder(sequence.length());
1118    boolean inMatchingGroup = false;
1119    for (int i = first; i < sequence.length(); i++) {
1120      char c = sequence.charAt(i);
1121      if (apply(c)) {
1122        inMatchingGroup = true;
1123      } else {
1124        if (inMatchingGroup) {
1125          builder.append(replacement);
1126          inMatchingGroup = false;
1127        }
1128        builder.append(c);
1129      }
1130    }
1131    return builder.toString();
1132  }
1133
1134  // Predicate interface
1135
1136  /**
1137   * Returns {@code true} if this matcher matches the given character.
1138   *
1139   * @throws NullPointerException if {@code character} is null
1140   */
1141  @Override public boolean apply(Character character) {
1142    return matches(character);
1143  }
1144}