ICU 49.1.1
49.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * 00004 * Copyright (C) 2009-2012, International Business Machines 00005 * Corporation and others. All Rights Reserved. 00006 * 00007 ******************************************************************************* 00008 * file name: normalizer2.h 00009 * encoding: US-ASCII 00010 * tab size: 8 (not used) 00011 * indentation:4 00012 * 00013 * created on: 2009nov22 00014 * created by: Markus W. Scherer 00015 */ 00016 00017 #ifndef __NORMALIZER2_H__ 00018 #define __NORMALIZER2_H__ 00019 00025 #include "unicode/utypes.h" 00026 00027 #if !UCONFIG_NO_NORMALIZATION 00028 00029 #include "unicode/uniset.h" 00030 #include "unicode/unistr.h" 00031 #include "unicode/unorm2.h" 00032 00033 U_NAMESPACE_BEGIN 00034 00078 class U_COMMON_API Normalizer2 : public UObject { 00079 public: 00084 ~Normalizer2(); 00085 00086 #ifndef U_HIDE_DRAFT_API 00087 00098 static const Normalizer2 * 00099 getNFCInstance(UErrorCode &errorCode); 00100 00112 static const Normalizer2 * 00113 getNFDInstance(UErrorCode &errorCode); 00114 00126 static const Normalizer2 * 00127 getNFKCInstance(UErrorCode &errorCode); 00128 00140 static const Normalizer2 * 00141 getNFKDInstance(UErrorCode &errorCode); 00142 00154 static const Normalizer2 * 00155 getNFKCCasefoldInstance(UErrorCode &errorCode); 00156 #endif /* U_HIDE_DRAFT_API */ 00157 00179 static const Normalizer2 * 00180 getInstance(const char *packageName, 00181 const char *name, 00182 UNormalization2Mode mode, 00183 UErrorCode &errorCode); 00184 00195 UnicodeString 00196 normalize(const UnicodeString &src, UErrorCode &errorCode) const { 00197 UnicodeString result; 00198 normalize(src, result, errorCode); 00199 return result; 00200 } 00214 virtual UnicodeString & 00215 normalize(const UnicodeString &src, 00216 UnicodeString &dest, 00217 UErrorCode &errorCode) const = 0; 00232 virtual UnicodeString & 00233 normalizeSecondAndAppend(UnicodeString &first, 00234 const UnicodeString &second, 00235 UErrorCode &errorCode) const = 0; 00250 virtual UnicodeString & 00251 append(UnicodeString &first, 00252 const UnicodeString &second, 00253 UErrorCode &errorCode) const = 0; 00254 00268 virtual UBool 00269 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 00270 00295 virtual UBool 00296 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 00297 00313 virtual UChar32 00314 composePair(UChar32 a, UChar32 b) const; 00315 00324 virtual uint8_t 00325 getCombiningClass(UChar32 c) const; 00326 00341 virtual UBool 00342 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00343 00359 virtual UNormalizationCheckResult 00360 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00361 00384 virtual int32_t 00385 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 00386 00400 virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 00401 00416 virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 00417 00431 virtual UBool isInert(UChar32 c) const = 0; 00432 00433 private: 00434 // No ICU "poor man's RTTI" for this class nor its subclasses. 00435 virtual UClassID getDynamicClassID() const; 00436 }; 00437 00449 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 00450 public: 00461 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 00462 norm2(n2), set(filterSet) {} 00463 00468 ~FilteredNormalizer2(); 00469 00483 virtual UnicodeString & 00484 normalize(const UnicodeString &src, 00485 UnicodeString &dest, 00486 UErrorCode &errorCode) const; 00501 virtual UnicodeString & 00502 normalizeSecondAndAppend(UnicodeString &first, 00503 const UnicodeString &second, 00504 UErrorCode &errorCode) const; 00519 virtual UnicodeString & 00520 append(UnicodeString &first, 00521 const UnicodeString &second, 00522 UErrorCode &errorCode) const; 00523 00535 virtual UBool 00536 getDecomposition(UChar32 c, UnicodeString &decomposition) const; 00537 00549 virtual UBool 00550 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 00551 00562 virtual UChar32 00563 composePair(UChar32 a, UChar32 b) const; 00564 00573 virtual uint8_t 00574 getCombiningClass(UChar32 c) const; 00575 00587 virtual UBool 00588 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 00600 virtual UNormalizationCheckResult 00601 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 00613 virtual int32_t 00614 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 00615 00624 virtual UBool hasBoundaryBefore(UChar32 c) const; 00625 00634 virtual UBool hasBoundaryAfter(UChar32 c) const; 00635 00643 virtual UBool isInert(UChar32 c) const; 00644 private: 00645 UnicodeString & 00646 normalize(const UnicodeString &src, 00647 UnicodeString &dest, 00648 USetSpanCondition spanCondition, 00649 UErrorCode &errorCode) const; 00650 00651 UnicodeString & 00652 normalizeSecondAndAppend(UnicodeString &first, 00653 const UnicodeString &second, 00654 UBool doNormalize, 00655 UErrorCode &errorCode) const; 00656 00657 const Normalizer2 &norm2; 00658 const UnicodeSet &set; 00659 }; 00660 00661 U_NAMESPACE_END 00662 00663 #endif // !UCONFIG_NO_NORMALIZATION 00664 #endif // __NORMALIZER2_H__