ICU 4.6  4.6
normalizer2.h
Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2009-2010, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  normalizer2.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2009nov22
00014 *   created by: Markus W. Scherer
00015 */
00016 
00017 #ifndef __NORMALIZER2_H__
00018 #define __NORMALIZER2_H__
00019 
00025 #include "unicode/utypes.h"
00026 
00027 #if !UCONFIG_NO_NORMALIZATION
00028 
00029 #include "unicode/uniset.h"
00030 #include "unicode/unistr.h"
00031 #include "unicode/unorm2.h"
00032 
00033 U_NAMESPACE_BEGIN
00034 
00078 class U_COMMON_API Normalizer2 : public UObject {
00079 public:
00101     static const Normalizer2 *
00102     getInstance(const char *packageName,
00103                 const char *name,
00104                 UNormalization2Mode mode,
00105                 UErrorCode &errorCode);
00106 
00117     UnicodeString
00118     normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00119         UnicodeString result;
00120         normalize(src, result, errorCode);
00121         return result;
00122     }
00136     virtual UnicodeString &
00137     normalize(const UnicodeString &src,
00138               UnicodeString &dest,
00139               UErrorCode &errorCode) const = 0;
00154     virtual UnicodeString &
00155     normalizeSecondAndAppend(UnicodeString &first,
00156                              const UnicodeString &second,
00157                              UErrorCode &errorCode) const = 0;
00172     virtual UnicodeString &
00173     append(UnicodeString &first,
00174            const UnicodeString &second,
00175            UErrorCode &errorCode) const = 0;
00176 
00187     virtual UBool
00188     getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00189 
00204     virtual UBool
00205     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00206 
00222     virtual UNormalizationCheckResult
00223     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00224 
00247     virtual int32_t
00248     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00249 
00263     virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00264 
00279     virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00280 
00294     virtual UBool isInert(UChar32 c) const = 0;
00295 
00296 private:
00297     // No ICU "poor man's RTTI" for this class nor its subclasses.
00298     virtual UClassID getDynamicClassID() const;
00299 };
00300 
00312 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00313 public:
00324     FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00325             norm2(n2), set(filterSet) {}
00326 
00340     virtual UnicodeString &
00341     normalize(const UnicodeString &src,
00342               UnicodeString &dest,
00343               UErrorCode &errorCode) const;
00358     virtual UnicodeString &
00359     normalizeSecondAndAppend(UnicodeString &first,
00360                              const UnicodeString &second,
00361                              UErrorCode &errorCode) const;
00376     virtual UnicodeString &
00377     append(UnicodeString &first,
00378            const UnicodeString &second,
00379            UErrorCode &errorCode) const;
00380 
00391     virtual UBool
00392     getDecomposition(UChar32 c, UnicodeString &decomposition) const;
00393 
00405     virtual UBool
00406     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
00418     virtual UNormalizationCheckResult
00419     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
00431     virtual int32_t
00432     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
00433 
00442     virtual UBool hasBoundaryBefore(UChar32 c) const;
00443 
00452     virtual UBool hasBoundaryAfter(UChar32 c) const;
00453 
00461     virtual UBool isInert(UChar32 c) const;
00462 private:
00463     UnicodeString &
00464     normalize(const UnicodeString &src,
00465               UnicodeString &dest,
00466               USetSpanCondition spanCondition,
00467               UErrorCode &errorCode) const;
00468 
00469     UnicodeString &
00470     normalizeSecondAndAppend(UnicodeString &first,
00471                              const UnicodeString &second,
00472                              UBool doNormalize,
00473                              UErrorCode &errorCode) const;
00474 
00475     const Normalizer2 &norm2;
00476     const UnicodeSet &set;
00477 };
00478 
00479 U_NAMESPACE_END
00480 
00481 #endif  // !UCONFIG_NO_NORMALIZATION
00482 #endif  // __NORMALIZER2_H__
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Defines