Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

uniset.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 * Copyright (C) 1999-2003, International Business Machines Corporation and others. All Rights Reserved.
00004 **********************************************************************
00005 *   Date        Name        Description
00006 *   10/20/99    alan        Creation.
00007 **********************************************************************
00008 */
00009 
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012 
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uchar.h"
00017 #include "unicode/uset.h"
00018 
00019 U_NAMESPACE_BEGIN
00020 
00021 class ParsePosition;
00022 class SymbolTable;
00023 class UVector;
00024 class CaseEquivClass;
00025 class RuleCharacterIterator;
00026     
00258 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00259 
00260     int32_t len; // length of list used; 0 <= len <= capacity
00261     int32_t capacity; // capacity of list
00262     int32_t bufferCapacity; // capacity of buffer
00263     UChar32* list; // MUST be terminated with HIGH
00264     UChar32* buffer; // internal buffer, may be NULL
00265 
00266     UVector* strings; // maintained in sorted order
00267 
00277     UnicodeString pat;
00278 
00279 public:
00280 
00285     static const UChar32 MIN_VALUE;
00286 
00291     static const UChar32 MAX_VALUE;
00292 
00293     //----------------------------------------------------------------
00294     // Constructors &c
00295     //----------------------------------------------------------------
00296 
00297 public:
00298 
00303     UnicodeSet();
00304 
00313     UnicodeSet(UChar32 start, UChar32 end);
00314 
00323     UnicodeSet(const UnicodeString& pattern,
00324                UErrorCode& status);
00325 
00336     UnicodeSet(const UnicodeString& pattern,
00337                uint32_t options,
00338                const SymbolTable* symbols,
00339                UErrorCode& status);
00340 
00354     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00355                uint32_t options,
00356                const SymbolTable* symbols,
00357                UErrorCode& status);
00358 
00359 #ifdef U_USE_UNICODESET_DEPRECATES
00360 
00366     UnicodeSet(int8_t category, UErrorCode& status);
00367 #endif
00368 
00373     UnicodeSet(const UnicodeSet& o);
00374 
00379     virtual ~UnicodeSet();
00380 
00385     UnicodeSet& operator=(const UnicodeSet& o);
00386 
00398     virtual UBool operator==(const UnicodeSet& o) const;
00399 
00405     UBool operator!=(const UnicodeSet& o) const;
00406 
00413     virtual UnicodeFunctor* clone() const;
00414 
00422     virtual int32_t hashCode(void) const;
00423 
00424     //----------------------------------------------------------------
00425     // Public API
00426     //----------------------------------------------------------------
00427 
00437     UnicodeSet& set(UChar32 start, UChar32 end);
00438 
00444     static UBool resemblesPattern(const UnicodeString& pattern,
00445                                   int32_t pos);
00446 
00458     virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00459                                      UErrorCode& status);
00460 
00476     UnicodeSet& applyPattern(const UnicodeString& pattern,
00477                              uint32_t options,
00478                              const SymbolTable* symbols,
00479                              UErrorCode& status);
00480 
00509     UnicodeSet& applyPattern(const UnicodeString& pattern,
00510                              ParsePosition& pos,
00511                              uint32_t options,
00512                              const SymbolTable* symbols,
00513                              UErrorCode& status);
00514 
00527     virtual UnicodeString& toPattern(UnicodeString& result,
00528                                      UBool escapeUnprintable = FALSE) const;
00529 
00551     UnicodeSet& applyIntPropertyValue(UProperty prop,
00552                                       int32_t value,
00553                                       UErrorCode& ec);
00554 
00582     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00583                                    const UnicodeString& value,
00584                                    UErrorCode& ec);
00585 
00593     virtual int32_t size(void) const;
00594 
00601     virtual UBool isEmpty(void) const;
00602 
00609     virtual UBool contains(UChar32 c) const;
00610     
00619     virtual UBool contains(UChar32 start, UChar32 end) const;
00620 
00628     UBool contains(const UnicodeString& s) const;
00629     
00637     virtual UBool containsAll(const UnicodeSet& c) const;
00638     
00646     UBool containsAll(const UnicodeString& s) const;
00647     
00656     UBool containsNone(UChar32 start, UChar32 end) const;
00657 
00665     UBool containsNone(const UnicodeSet& c) const;
00666     
00674     UBool containsNone(const UnicodeString& s) const;
00675         
00684     inline UBool containsSome(UChar32 start, UChar32 end) const;
00685         
00693     inline UBool containsSome(const UnicodeSet& s) const;
00694         
00702     inline UBool containsSome(const UnicodeString& s) const;
00703         
00708     UMatchDegree matches(const Replaceable& text,
00709                          int32_t& offset,
00710                          int32_t limit,
00711                          UBool incremental);
00712 
00713  private:    
00735     static int32_t matchRest(const Replaceable& text,
00736                              int32_t start, int32_t limit,
00737                              const UnicodeString& s);
00738     
00748     int32_t findCodePoint(UChar32 c) const;
00749 
00750  public:
00751 
00759     void addMatchSetTo(UnicodeSet& toUnionTo) const;
00760 
00769     int32_t indexOf(UChar32 c) const;
00770 
00780     UChar32 charAt(int32_t index) const;
00781 
00795     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00796 
00803     UnicodeSet& add(UChar32 c);
00804 
00815     UnicodeSet& add(const UnicodeString& s);
00816 
00817  private:    
00823     static int32_t getSingleCP(const UnicodeString& s);
00824 
00825     void _add(const UnicodeString& s);
00826     
00827  public:
00835     UnicodeSet& addAll(const UnicodeString& s);
00836 
00844     UnicodeSet& retainAll(const UnicodeString& s);
00845 
00853     UnicodeSet& complementAll(const UnicodeString& s);
00854 
00862     UnicodeSet& removeAll(const UnicodeString& s);
00863 
00872     static UnicodeSet* createFrom(const UnicodeString& s);
00873 
00874     
00882     static UnicodeSet* createFromAll(const UnicodeString& s);
00883 
00896     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00897 
00898 
00903     UnicodeSet& retain(UChar32 c);
00904 
00917     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00918 
00925     UnicodeSet& remove(UChar32 c);
00926 
00935     UnicodeSet& remove(const UnicodeString& s);
00936 
00943     virtual UnicodeSet& complement(void);
00944 
00958     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00959 
00966     UnicodeSet& complement(UChar32 c);
00967 
00977     UnicodeSet& complement(const UnicodeString& s);
00978 
00990     virtual UnicodeSet& addAll(const UnicodeSet& c);
00991 
01002     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01003 
01014     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01015 
01025     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01026 
01032     virtual UnicodeSet& clear(void);
01033 
01057     UnicodeSet& closeOver(int32_t attribute);
01058 
01066     virtual int32_t getRangeCount(void) const;
01067 
01075     virtual UChar32 getRangeStart(int32_t index) const;
01076 
01084     virtual UChar32 getRangeEnd(int32_t index) const;
01085 
01134     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01135 
01141     virtual UnicodeSet& compact();
01142 
01154     static UClassID getStaticClassID(void);
01155 
01164     virtual UClassID getDynamicClassID(void) const;
01165 
01166 private:
01167 
01168     // Private API for the USet API
01169 
01170     friend class USetAccess;
01171 
01172     int32_t getStringCount() const;
01173 
01174     const UnicodeString* getString(int32_t index) const;
01175 
01176     //----------------------------------------------------------------
01177     // RuleBasedTransliterator support
01178     //----------------------------------------------------------------
01179 
01180 private:
01181 
01187     virtual UBool matchesIndexValue(uint8_t v) const;
01188 
01189 private:
01190 
01191     //----------------------------------------------------------------
01192     // Implementation: Pattern parsing
01193     //----------------------------------------------------------------
01194 
01195     void applyPattern(RuleCharacterIterator& chars,
01196                       const SymbolTable* symbols,
01197                       UnicodeString& rebuiltPat,
01198                       uint32_t options,
01199                       UErrorCode& ec);
01200 
01201     //----------------------------------------------------------------
01202     // Implementation: Utility methods
01203     //----------------------------------------------------------------
01204 
01205     void ensureCapacity(int32_t newLen);
01206 
01207     void ensureBufferCapacity(int32_t newLen);
01208 
01209     void swapBuffers(void);
01210 
01211     UBool allocateStrings();
01212 
01213     UnicodeString& _toPattern(UnicodeString& result,
01214                               UBool escapeUnprintable) const;
01215 
01216     UnicodeString& _generatePattern(UnicodeString& result,
01217                                     UBool escapeUnprintable) const;
01218 
01219     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01220 
01221     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01222 
01223     //----------------------------------------------------------------
01224     // Implementation: Fundamental operators
01225     //----------------------------------------------------------------
01226 
01227     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01228 
01229     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01230 
01231     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01232 
01238     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01239                                           int32_t pos);
01240 
01241     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01242                                           int32_t iterOpts);
01243 
01282     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01283                                      ParsePosition& ppos,
01284                                      UErrorCode &ec);
01285 
01286     void applyPropertyPattern(RuleCharacterIterator& chars,
01287                               UnicodeString& rebuiltPat,
01288                               UErrorCode& ec);
01289 
01294     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01295 
01304     void applyFilter(Filter filter,
01305                      void* context,
01306                      UErrorCode &status);
01307 
01312     static const UnicodeSet* getInclusions(UErrorCode &errorCode);
01313 
01314     friend class UnicodeSetIterator;
01315 
01316     //----------------------------------------------------------------
01317     // Implementation: closeOver
01318     //----------------------------------------------------------------
01319 
01320     void caseCloseOne(const UnicodeString& folded);
01321 
01322     void caseCloseOne(const CaseEquivClass& c);
01323 
01324     void caseCloseOne(UChar folded);
01325 
01326     static const CaseEquivClass* getCaseMapOf(const UnicodeString& folded);
01327 
01328     static const CaseEquivClass* getCaseMapOf(UChar folded);
01329 };
01330 
01331 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01332     return !operator==(o);
01333 }
01334 
01335 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01336     return !containsNone(start, end);
01337 }
01338 
01339 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01340     return !containsNone(s);
01341 }
01342 
01343 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01344     return !containsNone(s);
01345 }
01346 
01347 U_NAMESPACE_END
01348 
01349 #endif

Generated on Mon Nov 24 14:35:42 2003 for ICU 2.8 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001