ICU 49.1.1
49.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2011-2012 International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 */ 00009 00010 #ifndef INDEXCHARS_H 00011 #define INDEXCHARS_H 00012 00013 #include "unicode/utypes.h" 00014 #include "unicode/uobject.h" 00015 #include "unicode/locid.h" 00016 00017 00018 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION 00019 00025 U_CDECL_BEGIN 00026 00033 typedef enum UAlphabeticIndexLabelType { 00039 U_ALPHAINDEX_NORMAL = 0, 00040 00046 U_ALPHAINDEX_UNDERFLOW = 1, 00047 00056 U_ALPHAINDEX_INFLOW = 2, 00057 00063 U_ALPHAINDEX_OVERFLOW = 3 00064 } UAlphabeticIndexLabelType; 00065 00066 00067 struct UHashtable; 00068 U_CDECL_END 00069 00070 U_NAMESPACE_BEGIN 00071 00072 // Forward Declarations 00073 00074 class Collator; 00075 class RuleBasedCollator; 00076 class StringEnumeration; 00077 class UnicodeSet; 00078 class UVector; 00079 00080 00081 00165 class U_I18N_API AlphabeticIndex: public UObject { 00166 00167 public: 00168 00181 AlphabeticIndex(const Locale &locale, UErrorCode &status); 00182 00183 00184 00195 virtual AlphabeticIndex &addLabels(const UnicodeSet &additions, UErrorCode &status); 00196 00210 virtual AlphabeticIndex &addLabels(const Locale &locale, UErrorCode &status); 00211 00216 virtual ~AlphabeticIndex(); 00217 00218 00231 virtual const RuleBasedCollator &getCollator() const; 00232 00233 00242 virtual const UnicodeString &getInflowLabel() const; 00243 00255 virtual AlphabeticIndex &setInflowLabel(const UnicodeString &inflowLabel, UErrorCode &status); 00256 00257 00258 00266 virtual const UnicodeString &getOverflowLabel() const; 00267 00268 00278 virtual AlphabeticIndex &setOverflowLabel(const UnicodeString &overflowLabel, UErrorCode &status); 00279 00287 virtual const UnicodeString &getUnderflowLabel() const; 00288 00298 virtual AlphabeticIndex &setUnderflowLabel(const UnicodeString &underflowLabel, UErrorCode &status); 00299 00300 00308 virtual int32_t getMaxLabelCount() const; 00309 00322 virtual AlphabeticIndex &setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status); 00323 00324 00337 virtual const UnicodeString &getOverflowComparisonString(const UnicodeString &lowerLimit, 00338 UErrorCode &status); 00339 00340 00357 virtual AlphabeticIndex &addRecord(const UnicodeString &name, const void *data, UErrorCode &status); 00358 00367 virtual AlphabeticIndex &clearRecords(UErrorCode &status); 00368 00369 00378 virtual int32_t getBucketCount(UErrorCode &status); 00379 00380 00389 virtual int32_t getRecordCount(UErrorCode &status); 00390 00391 00392 00405 virtual int32_t getBucketIndex(const UnicodeString &itemName, UErrorCode &status); 00406 00407 00414 virtual int32_t getBucketIndex() const; 00415 00416 00428 virtual UBool nextBucket(UErrorCode &status); 00429 00438 virtual const UnicodeString &getBucketLabel() const; 00439 00447 virtual UAlphabeticIndexLabelType getBucketLabelType() const; 00448 00457 virtual int32_t getBucketRecordCount() const; 00458 00459 00468 virtual AlphabeticIndex &resetBucketIterator(UErrorCode &status); 00469 00481 virtual UBool nextRecord(UErrorCode &status); 00482 00491 virtual const UnicodeString &getRecordName() const; 00492 00493 00502 virtual const void *getRecordData() const; 00503 00504 00511 virtual AlphabeticIndex &resetRecordIterator(); 00512 00513 private: 00514 // No ICU "poor man's RTTI" for this class nor its subclasses. 00515 virtual UClassID getDynamicClassID() const; 00516 00521 AlphabeticIndex(const AlphabeticIndex &other); 00522 00526 AlphabeticIndex &operator =(const AlphabeticIndex & /*other*/) { return *this;}; 00527 00532 virtual UBool operator==(const AlphabeticIndex& other) const; 00533 00538 virtual UBool operator!=(const AlphabeticIndex& other) const; 00539 00540 // Common initialization, for use from all constructors. 00541 void init(UErrorCode &status); 00542 00543 // Initialize & destruct static constants used by this class. 00544 static void staticInit(UErrorCode &status); 00545 00546 // Pinyin stuff. If the input name is Chinese, add the Pinyin prefix to the dest string. 00547 void hackName(UnicodeString &dest, const UnicodeString &name, const Collator *coll); 00548 void initPinyinBounds(const Collator *coll, UErrorCode &status); 00549 00550 public: 00551 #ifndef U_HIDE_INTERNAL_API 00552 00557 static void staticCleanup(); 00558 #endif /* U_HIDE_INTERNAL_API */ 00559 private: 00560 00561 // Add index characters from the specified locale to the dest set. 00562 // Does not remove any previous contents from dest. 00563 static void getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status); 00564 00565 UVector *firstStringsInScript(UErrorCode &status); 00566 00567 static UnicodeString separated(const UnicodeString &item); 00568 00569 static UnicodeSet *getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status); 00570 00571 void buildIndex(UErrorCode &status); 00572 void buildBucketList(UErrorCode &status); 00573 void bucketRecords(UErrorCode &status); 00574 00575 00576 public: 00577 00578 // The following internal items are declared public only to allow access from 00579 // implementation code written in plain C. They are not intended for 00580 // public use. 00581 00582 #ifndef U_HIDE_INTERNAL_API 00583 00587 struct Record: public UMemory { 00588 AlphabeticIndex *alphaIndex_; 00589 const UnicodeString name_; 00590 UnicodeString sortingName_; // Usually the same as name_; different for Pinyin. 00591 const void *data_; 00592 int32_t serialNumber_; // Defines sorting order for names that compare equal. 00593 Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data); 00594 ~Record(); 00595 }; 00596 #endif /* U_HIDE_INTERNAL_API */ 00597 00603 UVector *inputRecords_; 00604 00610 struct Bucket: public UMemory { 00611 UnicodeString label_; 00612 UnicodeString lowerBoundary_; 00613 UAlphabeticIndexLabelType labelType_; 00614 UVector *records_; // Records are owned by inputRecords_ vector. 00615 00616 Bucket(const UnicodeString &label, // Parameter strings are copied. 00617 const UnicodeString &lowerBoundary, 00618 UAlphabeticIndexLabelType type, UErrorCode &status); 00619 ~Bucket(); 00620 }; 00621 00622 public: 00623 00628 enum ELangType { 00630 kNormal, 00632 kSimplified, 00634 kTraditional 00635 }; 00636 00641 static ELangType langTypeFromLocale(const Locale &loc); 00642 00643 00644 private: 00645 00646 // Holds the contents of this index, buckets of user items. 00647 // UVector elements are of type (Bucket *) 00648 UVector *bucketList_; 00649 00650 int32_t labelsIterIndex_; // Index of next item to return. 00651 int32_t itemsIterIndex_; 00652 Bucket *currentBucket_; // While an iteration of the index in underway, 00653 // point to the bucket for the current label. 00654 // NULL when no iteration underway. 00655 00656 UBool indexBuildRequired_; // Caller has made changes to the index that 00657 // require rebuilding & bucketing before the 00658 // contents can be iterated. 00659 00660 int32_t maxLabelCount_; // Limit on # of labels permitted in the index. 00661 00662 UHashtable *alreadyIn_; // Key=UnicodeString, value=UnicodeSet 00663 00664 UnicodeSet *initialLabels_; // Initial (unprocessed) set of Labels. Union 00665 // of those explicitly set by the user plus 00666 // those from locales. Raw values, before 00667 // crunching into bucket labels. 00668 00669 UVector *labels_; // List of Labels, after processing, sorting. 00670 // Contents are (UnicodeString *) 00671 00672 UnicodeSet *noDistinctSorting_; // As the set of labels is built, strings may 00673 // be discarded from the exemplars. This contains 00674 // some of the discards, and is 00675 // intended for debugging. 00676 00677 UnicodeSet *notAlphabetic_; // As the set of labels is built, strings may 00678 // be discarded from the exemplars. This contains 00679 // some of the discards, and is 00680 // intended for debugging. 00681 00682 00683 UVector *firstScriptCharacters_; // The first character from each script, 00684 // in collation order. 00685 00686 Locale locale_; 00687 Collator *collator_; 00688 Collator *collatorPrimaryOnly_; 00689 00690 UnicodeString inflowLabel_; 00691 UnicodeString overflowLabel_; 00692 UnicodeString underflowLabel_; 00693 UnicodeString overflowComparisonString_; 00694 00695 ELangType langType_; // The language type, simplified Chinese, Traditional Chinese, 00696 // or not Chinese (Normal). Part of the Pinyin support 00697 00698 typedef const UChar PinyinLookup[24][3]; 00699 static PinyinLookup HACK_PINYIN_LOOKUP_SHORT; 00700 static PinyinLookup HACK_PINYIN_LOOKUP_LONG; 00701 00702 // These will be lazily set to the short or long tables based on which 00703 // Chinese collation has been configured into the ICU library. 00704 static PinyinLookup *HACK_PINYIN_LOOKUP; 00705 static const UChar *PINYIN_LOWER_BOUNDS; 00706 00707 00708 00709 int32_t recordCounter_; // Counts Records created. For minting record serial numbers. 00710 00711 // Constants. Lazily initialized the first time an AlphabeticIndex object is created. 00712 00713 static UnicodeSet *ALPHABETIC; 00714 static UnicodeSet *CORE_LATIN; 00715 static UnicodeSet *ETHIOPIC; 00716 static UnicodeSet *HANGUL; 00717 static UnicodeSet *IGNORE_SCRIPTS; 00718 static UnicodeSet *TO_TRY; 00719 static UnicodeSet *UNIHAN; 00720 static const UnicodeString *EMPTY_STRING; 00721 00722 }; 00723 00724 U_NAMESPACE_END 00725 00726 #endif /* UCONFIG_NO_COLLATION / UCONFIG_NO_NORMALIZATION */ 00727 #endif