ICU 49.1.1
49.1.1
|
00001 /* 00002 ******************************************************************************* 00003 * Copyright (C) 2011-2012, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ******************************************************************************* 00006 * file name: messagepattern.h 00007 * encoding: US-ASCII 00008 * tab size: 8 (not used) 00009 * indentation:4 00010 * 00011 * created on: 2011mar14 00012 * created by: Markus W. Scherer 00013 */ 00014 00015 #ifndef __MESSAGEPATTERN_H__ 00016 #define __MESSAGEPATTERN_H__ 00017 00023 #include "unicode/utypes.h" 00024 00025 #if !UCONFIG_NO_FORMATTING 00026 00027 #include "unicode/parseerr.h" 00028 #include "unicode/unistr.h" 00029 00066 enum UMessagePatternApostropheMode { 00078 UMSGPAT_APOS_DOUBLE_OPTIONAL, 00087 UMSGPAT_APOS_DOUBLE_REQUIRED 00088 }; 00092 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 00093 00098 enum UMessagePatternPartType { 00108 UMSGPAT_PART_TYPE_MSG_START, 00117 UMSGPAT_PART_TYPE_MSG_LIMIT, 00125 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 00132 UMSGPAT_PART_TYPE_INSERT_CHAR, 00140 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 00151 UMSGPAT_PART_TYPE_ARG_START, 00158 UMSGPAT_PART_TYPE_ARG_LIMIT, 00163 UMSGPAT_PART_TYPE_ARG_NUMBER, 00169 UMSGPAT_PART_TYPE_ARG_NAME, 00175 UMSGPAT_PART_TYPE_ARG_TYPE, 00181 UMSGPAT_PART_TYPE_ARG_STYLE, 00187 UMSGPAT_PART_TYPE_ARG_SELECTOR, 00194 UMSGPAT_PART_TYPE_ARG_INT, 00202 UMSGPAT_PART_TYPE_ARG_DOUBLE 00203 }; 00207 typedef enum UMessagePatternPartType UMessagePatternPartType; 00208 00217 enum UMessagePatternArgType { 00222 UMSGPAT_ARG_TYPE_NONE, 00228 UMSGPAT_ARG_TYPE_SIMPLE, 00234 UMSGPAT_ARG_TYPE_CHOICE, 00244 UMSGPAT_ARG_TYPE_PLURAL, 00249 UMSGPAT_ARG_TYPE_SELECT 00250 }; 00254 typedef enum UMessagePatternArgType UMessagePatternArgType; 00255 00256 enum { 00262 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 00263 00271 UMSGPAT_ARG_NAME_NOT_VALID=-2 00272 }; 00273 00280 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 00281 00282 U_NAMESPACE_BEGIN 00283 00284 class MessagePatternDoubleList; 00285 class MessagePatternPartsList; 00286 00343 class U_COMMON_API MessagePattern : public UObject { 00344 public: 00353 MessagePattern(UErrorCode &errorCode); 00354 00364 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 00365 00384 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 00385 00391 MessagePattern(const MessagePattern &other); 00392 00399 MessagePattern &operator=(const MessagePattern &other); 00400 00405 virtual ~MessagePattern(); 00406 00424 MessagePattern &parse(const UnicodeString &pattern, 00425 UParseError *parseError, UErrorCode &errorCode); 00426 00444 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 00445 UParseError *parseError, UErrorCode &errorCode); 00446 00464 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 00465 UParseError *parseError, UErrorCode &errorCode); 00466 00484 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 00485 UParseError *parseError, UErrorCode &errorCode); 00486 00492 void clear(); 00493 00500 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 00501 clear(); 00502 aposMode=mode; 00503 } 00504 00510 UBool operator==(const MessagePattern &other) const; 00511 00517 inline UBool operator!=(const MessagePattern &other) const { 00518 return !operator==(other); 00519 } 00520 00525 int32_t hashCode() const; 00526 00531 UMessagePatternApostropheMode getApostropheMode() const { 00532 return aposMode; 00533 } 00534 00535 // Java has package-private jdkAposMode() here. 00536 // In C++, this is declared in the MessageImpl class. 00537 00542 const UnicodeString &getPatternString() const { 00543 return msg; 00544 } 00545 00551 UBool hasNamedArguments() const { 00552 return hasArgNames; 00553 } 00554 00560 UBool hasNumberedArguments() const { 00561 return hasArgNumbers; 00562 } 00563 00575 static int32_t validateArgumentName(const UnicodeString &name); 00576 00587 UnicodeString autoQuoteApostropheDeep() const; 00588 00589 class Part; 00590 00597 int32_t countParts() const { 00598 return partsLength; 00599 } 00600 00607 const Part &getPart(int32_t i) const { 00608 return parts[i]; 00609 } 00610 00618 UMessagePatternPartType getPartType(int32_t i) const { 00619 return getPart(i).type; 00620 } 00621 00629 int32_t getPatternIndex(int32_t partIndex) const { 00630 return getPart(partIndex).index; 00631 } 00632 00640 UnicodeString getSubstring(const Part &part) const { 00641 return msg.tempSubString(part.index, part.length); 00642 } 00643 00651 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 00652 return 0==msg.compare(part.index, part.length, s); 00653 } 00654 00661 double getNumericValue(const Part &part) const; 00662 00669 double getPluralOffset(int32_t pluralStart) const; 00670 00679 int32_t getLimitPartIndex(int32_t start) const { 00680 int32_t limit=getPart(start).limitPartIndex; 00681 if(limit<start) { 00682 return start; 00683 } 00684 return limit; 00685 } 00686 00694 class Part : public UMemory { 00695 public: 00700 Part() {} 00701 00707 UMessagePatternPartType getType() const { 00708 return type; 00709 } 00710 00716 int32_t getIndex() const { 00717 return index; 00718 } 00719 00726 int32_t getLength() const { 00727 return length; 00728 } 00729 00736 int32_t getLimit() const { 00737 return index+length; 00738 } 00739 00746 int32_t getValue() const { 00747 return value; 00748 } 00749 00756 UMessagePatternArgType getArgType() const { 00757 UMessagePatternPartType type=getType(); 00758 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 00759 return (UMessagePatternArgType)value; 00760 } else { 00761 return UMSGPAT_ARG_TYPE_NONE; 00762 } 00763 } 00764 00772 static UBool hasNumericValue(UMessagePatternPartType type) { 00773 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 00774 } 00775 00781 UBool operator==(const Part &other) const; 00782 00788 inline UBool operator!=(const Part &other) const { 00789 return !operator==(other); 00790 } 00791 00796 int32_t hashCode() const { 00797 return ((type*37+index)*37+length)*37+value; 00798 } 00799 00800 private: 00801 friend class MessagePattern; 00802 00803 static const int32_t MAX_LENGTH=0xffff; 00804 static const int32_t MAX_VALUE=0x7fff; 00805 00806 // Some fields are not final because they are modified during pattern parsing. 00807 // After pattern parsing, the parts are effectively immutable. 00808 UMessagePatternPartType type; 00809 int32_t index; 00810 uint16_t length; 00811 int16_t value; 00812 int32_t limitPartIndex; 00813 }; 00814 00815 private: 00816 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 00817 00818 void postParse(); 00819 00820 int32_t parseMessage(int32_t index, int32_t msgStartLength, 00821 int32_t nestingLevel, UMessagePatternArgType parentType, 00822 UParseError *parseError, UErrorCode &errorCode); 00823 00824 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 00825 UParseError *parseError, UErrorCode &errorCode); 00826 00827 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 00828 00829 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 00830 UParseError *parseError, UErrorCode &errorCode); 00831 00832 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 00833 UParseError *parseError, UErrorCode &errorCode); 00834 00843 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 00844 00845 int32_t parseArgNumber(int32_t start, int32_t limit) { 00846 return parseArgNumber(msg, start, limit); 00847 } 00848 00857 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 00858 UParseError *parseError, UErrorCode &errorCode); 00859 00860 // Java has package-private appendReducedApostrophes() here. 00861 // In C++, this is declared in the MessageImpl class. 00862 00863 int32_t skipWhiteSpace(int32_t index); 00864 00865 int32_t skipIdentifier(int32_t index); 00866 00871 int32_t skipDouble(int32_t index); 00872 00873 static UBool isArgTypeChar(UChar32 c); 00874 00875 UBool isChoice(int32_t index); 00876 00877 UBool isPlural(int32_t index); 00878 00879 UBool isSelect(int32_t index); 00880 00885 UBool inMessageFormatPattern(int32_t nestingLevel); 00886 00891 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 00892 00893 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 00894 int32_t value, UErrorCode &errorCode); 00895 00896 void addLimitPart(int32_t start, 00897 UMessagePatternPartType type, int32_t index, int32_t length, 00898 int32_t value, UErrorCode &errorCode); 00899 00900 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 00901 00902 void setParseError(UParseError *parseError, int32_t index); 00903 00904 // No ICU "poor man's RTTI" for this class nor its subclasses. 00905 virtual UClassID getDynamicClassID() const; 00906 00907 UBool init(UErrorCode &errorCode); 00908 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 00909 00910 UMessagePatternApostropheMode aposMode; 00911 UnicodeString msg; 00912 // ArrayList<Part> parts=new ArrayList<Part>(); 00913 MessagePatternPartsList *partsList; 00914 Part *parts; 00915 int32_t partsLength; 00916 // ArrayList<Double> numericValues; 00917 MessagePatternDoubleList *numericValuesList; 00918 double *numericValues; 00919 int32_t numericValuesLength; 00920 UBool hasArgNames; 00921 UBool hasArgNumbers; 00922 UBool needsAutoQuoting; 00923 }; 00924 00925 U_NAMESPACE_END 00926 00927 #endif // !UCONFIG_NO_FORMATTING 00928 00929 #endif // __MESSAGEPATTERN_H__