ICU 4.8 4.8
|
00001 /* 00002 ********************************************************************** 00003 * Copyright (C) 1998-2011, International Business Machines 00004 * Corporation and others. All Rights Reserved. 00005 ********************************************************************** 00006 * 00007 * File unistr.h 00008 * 00009 * Modification History: 00010 * 00011 * Date Name Description 00012 * 09/25/98 stephen Creation. 00013 * 11/11/98 stephen Changed per 11/9 code review. 00014 * 04/20/99 stephen Overhauled per 4/16 code review. 00015 * 11/18/99 aliu Made to inherit from Replaceable. Added method 00016 * handleReplaceBetween(); other methods unchanged. 00017 * 06/25/01 grhoten Remove dependency on iostream. 00018 ****************************************************************************** 00019 */ 00020 00021 #ifndef UNISTR_H 00022 #define UNISTR_H 00023 00029 #include "unicode/utypes.h" 00030 #include "unicode/rep.h" 00031 #include "unicode/std_string.h" 00032 #include "unicode/stringpiece.h" 00033 #include "unicode/bytestream.h" 00034 00035 struct UConverter; // unicode/ucnv.h 00036 class StringThreadTest; 00037 00038 #ifndef U_COMPARE_CODE_POINT_ORDER 00039 /* see also ustring.h and unorm.h */ 00045 #define U_COMPARE_CODE_POINT_ORDER 0x8000 00046 #endif 00047 00048 #ifndef USTRING_H 00049 00052 U_STABLE int32_t U_EXPORT2 00053 u_strlen(const UChar *s); 00054 #endif 00055 00056 U_NAMESPACE_BEGIN 00057 00058 class BreakIterator; // unicode/brkiter.h 00059 class Locale; // unicode/locid.h 00060 class StringCharacterIterator; 00061 class UnicodeStringAppendable; // unicode/appendable.h 00062 00063 /* The <iostream> include has been moved to unicode/ustream.h */ 00064 00075 #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant 00076 00094 #if defined(U_DECLARE_UTF16) 00095 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) 00096 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) 00097 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length) 00098 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY 00099 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length) 00100 #else 00101 # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV) 00102 #endif 00103 00117 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1) 00118 00188 class U_COMMON_API UnicodeString : public Replaceable 00189 { 00190 public: 00191 00200 enum EInvariant { 00205 kInvariant 00206 }; 00207 00208 //======================================== 00209 // Read-only operations 00210 //======================================== 00211 00212 /* Comparison - bitwise only - for international comparison use collation */ 00213 00221 inline UBool operator== (const UnicodeString& text) const; 00222 00230 inline UBool operator!= (const UnicodeString& text) const; 00231 00239 inline UBool operator> (const UnicodeString& text) const; 00240 00248 inline UBool operator< (const UnicodeString& text) const; 00249 00257 inline UBool operator>= (const UnicodeString& text) const; 00258 00266 inline UBool operator<= (const UnicodeString& text) const; 00267 00279 inline int8_t compare(const UnicodeString& text) const; 00280 00295 inline int8_t compare(int32_t start, 00296 int32_t length, 00297 const UnicodeString& text) const; 00298 00316 inline int8_t compare(int32_t start, 00317 int32_t length, 00318 const UnicodeString& srcText, 00319 int32_t srcStart, 00320 int32_t srcLength) const; 00321 00334 inline int8_t compare(const UChar *srcChars, 00335 int32_t srcLength) const; 00336 00351 inline int8_t compare(int32_t start, 00352 int32_t length, 00353 const UChar *srcChars) const; 00354 00372 inline int8_t compare(int32_t start, 00373 int32_t length, 00374 const UChar *srcChars, 00375 int32_t srcStart, 00376 int32_t srcLength) const; 00377 00395 inline int8_t compareBetween(int32_t start, 00396 int32_t limit, 00397 const UnicodeString& srcText, 00398 int32_t srcStart, 00399 int32_t srcLimit) const; 00400 00418 inline int8_t compareCodePointOrder(const UnicodeString& text) const; 00419 00439 inline int8_t compareCodePointOrder(int32_t start, 00440 int32_t length, 00441 const UnicodeString& srcText) const; 00442 00464 inline int8_t compareCodePointOrder(int32_t start, 00465 int32_t length, 00466 const UnicodeString& srcText, 00467 int32_t srcStart, 00468 int32_t srcLength) const; 00469 00488 inline int8_t compareCodePointOrder(const UChar *srcChars, 00489 int32_t srcLength) const; 00490 00510 inline int8_t compareCodePointOrder(int32_t start, 00511 int32_t length, 00512 const UChar *srcChars) const; 00513 00535 inline int8_t compareCodePointOrder(int32_t start, 00536 int32_t length, 00537 const UChar *srcChars, 00538 int32_t srcStart, 00539 int32_t srcLength) const; 00540 00562 inline int8_t compareCodePointOrderBetween(int32_t start, 00563 int32_t limit, 00564 const UnicodeString& srcText, 00565 int32_t srcStart, 00566 int32_t srcLimit) const; 00567 00586 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const; 00587 00608 inline int8_t caseCompare(int32_t start, 00609 int32_t length, 00610 const UnicodeString& srcText, 00611 uint32_t options) const; 00612 00635 inline int8_t caseCompare(int32_t start, 00636 int32_t length, 00637 const UnicodeString& srcText, 00638 int32_t srcStart, 00639 int32_t srcLength, 00640 uint32_t options) const; 00641 00661 inline int8_t caseCompare(const UChar *srcChars, 00662 int32_t srcLength, 00663 uint32_t options) const; 00664 00685 inline int8_t caseCompare(int32_t start, 00686 int32_t length, 00687 const UChar *srcChars, 00688 uint32_t options) const; 00689 00712 inline int8_t caseCompare(int32_t start, 00713 int32_t length, 00714 const UChar *srcChars, 00715 int32_t srcStart, 00716 int32_t srcLength, 00717 uint32_t options) const; 00718 00741 inline int8_t caseCompareBetween(int32_t start, 00742 int32_t limit, 00743 const UnicodeString& srcText, 00744 int32_t srcStart, 00745 int32_t srcLimit, 00746 uint32_t options) const; 00747 00755 inline UBool startsWith(const UnicodeString& text) const; 00756 00767 inline UBool startsWith(const UnicodeString& srcText, 00768 int32_t srcStart, 00769 int32_t srcLength) const; 00770 00779 inline UBool startsWith(const UChar *srcChars, 00780 int32_t srcLength) const; 00781 00791 inline UBool startsWith(const UChar *srcChars, 00792 int32_t srcStart, 00793 int32_t srcLength) const; 00794 00802 inline UBool endsWith(const UnicodeString& text) const; 00803 00814 inline UBool endsWith(const UnicodeString& srcText, 00815 int32_t srcStart, 00816 int32_t srcLength) const; 00817 00826 inline UBool endsWith(const UChar *srcChars, 00827 int32_t srcLength) const; 00828 00839 inline UBool endsWith(const UChar *srcChars, 00840 int32_t srcStart, 00841 int32_t srcLength) const; 00842 00843 00844 /* Searching - bitwise only */ 00845 00854 inline int32_t indexOf(const UnicodeString& text) const; 00855 00865 inline int32_t indexOf(const UnicodeString& text, 00866 int32_t start) const; 00867 00879 inline int32_t indexOf(const UnicodeString& text, 00880 int32_t start, 00881 int32_t length) const; 00882 00899 inline int32_t indexOf(const UnicodeString& srcText, 00900 int32_t srcStart, 00901 int32_t srcLength, 00902 int32_t start, 00903 int32_t length) const; 00904 00916 inline int32_t indexOf(const UChar *srcChars, 00917 int32_t srcLength, 00918 int32_t start) const; 00919 00932 inline int32_t indexOf(const UChar *srcChars, 00933 int32_t srcLength, 00934 int32_t start, 00935 int32_t length) const; 00936 00953 int32_t indexOf(const UChar *srcChars, 00954 int32_t srcStart, 00955 int32_t srcLength, 00956 int32_t start, 00957 int32_t length) const; 00958 00966 inline int32_t indexOf(UChar c) const; 00967 00976 inline int32_t indexOf(UChar32 c) const; 00977 00986 inline int32_t indexOf(UChar c, 00987 int32_t start) const; 00988 00998 inline int32_t indexOf(UChar32 c, 00999 int32_t start) const; 01000 01011 inline int32_t indexOf(UChar c, 01012 int32_t start, 01013 int32_t length) const; 01014 01026 inline int32_t indexOf(UChar32 c, 01027 int32_t start, 01028 int32_t length) const; 01029 01038 inline int32_t lastIndexOf(const UnicodeString& text) const; 01039 01049 inline int32_t lastIndexOf(const UnicodeString& text, 01050 int32_t start) const; 01051 01063 inline int32_t lastIndexOf(const UnicodeString& text, 01064 int32_t start, 01065 int32_t length) const; 01066 01083 inline int32_t lastIndexOf(const UnicodeString& srcText, 01084 int32_t srcStart, 01085 int32_t srcLength, 01086 int32_t start, 01087 int32_t length) const; 01088 01099 inline int32_t lastIndexOf(const UChar *srcChars, 01100 int32_t srcLength, 01101 int32_t start) const; 01102 01115 inline int32_t lastIndexOf(const UChar *srcChars, 01116 int32_t srcLength, 01117 int32_t start, 01118 int32_t length) const; 01119 01136 int32_t lastIndexOf(const UChar *srcChars, 01137 int32_t srcStart, 01138 int32_t srcLength, 01139 int32_t start, 01140 int32_t length) const; 01141 01149 inline int32_t lastIndexOf(UChar c) const; 01150 01159 inline int32_t lastIndexOf(UChar32 c) const; 01160 01169 inline int32_t lastIndexOf(UChar c, 01170 int32_t start) const; 01171 01181 inline int32_t lastIndexOf(UChar32 c, 01182 int32_t start) const; 01183 01194 inline int32_t lastIndexOf(UChar c, 01195 int32_t start, 01196 int32_t length) const; 01197 01209 inline int32_t lastIndexOf(UChar32 c, 01210 int32_t start, 01211 int32_t length) const; 01212 01213 01214 /* Character access */ 01215 01224 inline UChar charAt(int32_t offset) const; 01225 01233 inline UChar operator[] (int32_t offset) const; 01234 01246 inline UChar32 char32At(int32_t offset) const; 01247 01263 inline int32_t getChar32Start(int32_t offset) const; 01264 01281 inline int32_t getChar32Limit(int32_t offset) const; 01282 01333 int32_t moveIndex32(int32_t index, int32_t delta) const; 01334 01335 /* Substring extraction */ 01336 01352 inline void extract(int32_t start, 01353 int32_t length, 01354 UChar *dst, 01355 int32_t dstStart = 0) const; 01356 01378 int32_t 01379 extract(UChar *dest, int32_t destCapacity, 01380 UErrorCode &errorCode) const; 01381 01392 inline void extract(int32_t start, 01393 int32_t length, 01394 UnicodeString& target) const; 01395 01407 inline void extractBetween(int32_t start, 01408 int32_t limit, 01409 UChar *dst, 01410 int32_t dstStart = 0) const; 01411 01421 virtual void extractBetween(int32_t start, 01422 int32_t limit, 01423 UnicodeString& target) const; 01424 01446 int32_t extract(int32_t start, 01447 int32_t startLength, 01448 char *target, 01449 int32_t targetCapacity, 01450 enum EInvariant inv) const; 01451 01452 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 01453 01473 int32_t extract(int32_t start, 01474 int32_t startLength, 01475 char *target, 01476 uint32_t targetLength) const; 01477 01478 #endif 01479 01480 #if !UCONFIG_NO_CONVERSION 01481 01507 inline int32_t extract(int32_t start, 01508 int32_t startLength, 01509 char *target, 01510 const char *codepage = 0) const; 01511 01541 int32_t extract(int32_t start, 01542 int32_t startLength, 01543 char *target, 01544 uint32_t targetLength, 01545 const char *codepage) const; 01546 01564 int32_t extract(char *dest, int32_t destCapacity, 01565 UConverter *cnv, 01566 UErrorCode &errorCode) const; 01567 01568 #endif 01569 01583 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const; 01584 01595 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const; 01596 01608 void toUTF8(ByteSink &sink) const; 01609 01610 #if U_HAVE_STD_STRING 01611 01624 template<typename StringClass> 01625 StringClass &toUTF8String(StringClass &result) const { 01626 StringByteSink<StringClass> sbs(&result); 01627 toUTF8(sbs); 01628 return result; 01629 } 01630 01631 #endif 01632 01648 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const; 01649 01650 /* Length operations */ 01651 01660 inline int32_t length(void) const; 01661 01675 int32_t 01676 countChar32(int32_t start=0, int32_t length=INT32_MAX) const; 01677 01701 UBool 01702 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const; 01703 01709 inline UBool isEmpty(void) const; 01710 01720 inline int32_t getCapacity(void) const; 01721 01722 /* Other operations */ 01723 01729 inline int32_t hashCode(void) const; 01730 01743 inline UBool isBogus(void) const; 01744 01745 01746 //======================================== 01747 // Write operations 01748 //======================================== 01749 01750 /* Assignment operations */ 01751 01759 UnicodeString &operator=(const UnicodeString &srcText); 01760 01781 UnicodeString &fastCopyFrom(const UnicodeString &src); 01782 01790 inline UnicodeString& operator= (UChar ch); 01791 01799 inline UnicodeString& operator= (UChar32 ch); 01800 01812 inline UnicodeString& setTo(const UnicodeString& srcText, 01813 int32_t srcStart); 01814 01828 inline UnicodeString& setTo(const UnicodeString& srcText, 01829 int32_t srcStart, 01830 int32_t srcLength); 01831 01840 inline UnicodeString& setTo(const UnicodeString& srcText); 01841 01850 inline UnicodeString& setTo(const UChar *srcChars, 01851 int32_t srcLength); 01852 01861 UnicodeString& setTo(UChar srcChar); 01862 01871 UnicodeString& setTo(UChar32 srcChar); 01872 01893 UnicodeString &setTo(UBool isTerminated, 01894 const UChar *text, 01895 int32_t textLength); 01896 01916 UnicodeString &setTo(UChar *buffer, 01917 int32_t buffLength, 01918 int32_t buffCapacity); 01919 01960 void setToBogus(); 01961 01969 UnicodeString& setCharAt(int32_t offset, 01970 UChar ch); 01971 01972 01973 /* Append operations */ 01974 01982 inline UnicodeString& operator+= (UChar ch); 01983 01991 inline UnicodeString& operator+= (UChar32 ch); 01992 02000 inline UnicodeString& operator+= (const UnicodeString& srcText); 02001 02016 inline UnicodeString& append(const UnicodeString& srcText, 02017 int32_t srcStart, 02018 int32_t srcLength); 02019 02027 inline UnicodeString& append(const UnicodeString& srcText); 02028 02042 inline UnicodeString& append(const UChar *srcChars, 02043 int32_t srcStart, 02044 int32_t srcLength); 02045 02055 inline UnicodeString& append(const UChar *srcChars, 02056 int32_t srcLength); 02057 02064 inline UnicodeString& append(UChar srcChar); 02065 02072 inline UnicodeString& append(UChar32 srcChar); 02073 02074 02075 /* Insert operations */ 02076 02090 inline UnicodeString& insert(int32_t start, 02091 const UnicodeString& srcText, 02092 int32_t srcStart, 02093 int32_t srcLength); 02094 02103 inline UnicodeString& insert(int32_t start, 02104 const UnicodeString& srcText); 02105 02119 inline UnicodeString& insert(int32_t start, 02120 const UChar *srcChars, 02121 int32_t srcStart, 02122 int32_t srcLength); 02123 02133 inline UnicodeString& insert(int32_t start, 02134 const UChar *srcChars, 02135 int32_t srcLength); 02136 02145 inline UnicodeString& insert(int32_t start, 02146 UChar srcChar); 02147 02156 inline UnicodeString& insert(int32_t start, 02157 UChar32 srcChar); 02158 02159 02160 /* Replace operations */ 02161 02179 UnicodeString& replace(int32_t start, 02180 int32_t length, 02181 const UnicodeString& srcText, 02182 int32_t srcStart, 02183 int32_t srcLength); 02184 02197 UnicodeString& replace(int32_t start, 02198 int32_t length, 02199 const UnicodeString& srcText); 02200 02218 UnicodeString& replace(int32_t start, 02219 int32_t length, 02220 const UChar *srcChars, 02221 int32_t srcStart, 02222 int32_t srcLength); 02223 02236 inline UnicodeString& replace(int32_t start, 02237 int32_t length, 02238 const UChar *srcChars, 02239 int32_t srcLength); 02240 02252 inline UnicodeString& replace(int32_t start, 02253 int32_t length, 02254 UChar srcChar); 02255 02267 inline UnicodeString& replace(int32_t start, 02268 int32_t length, 02269 UChar32 srcChar); 02270 02280 inline UnicodeString& replaceBetween(int32_t start, 02281 int32_t limit, 02282 const UnicodeString& srcText); 02283 02298 inline UnicodeString& replaceBetween(int32_t start, 02299 int32_t limit, 02300 const UnicodeString& srcText, 02301 int32_t srcStart, 02302 int32_t srcLimit); 02303 02314 virtual void handleReplaceBetween(int32_t start, 02315 int32_t limit, 02316 const UnicodeString& text); 02317 02323 virtual UBool hasMetaData() const; 02324 02340 virtual void copy(int32_t start, int32_t limit, int32_t dest); 02341 02342 /* Search and replace operations */ 02343 02352 inline UnicodeString& findAndReplace(const UnicodeString& oldText, 02353 const UnicodeString& newText); 02354 02366 inline UnicodeString& findAndReplace(int32_t start, 02367 int32_t length, 02368 const UnicodeString& oldText, 02369 const UnicodeString& newText); 02370 02388 UnicodeString& findAndReplace(int32_t start, 02389 int32_t length, 02390 const UnicodeString& oldText, 02391 int32_t oldStart, 02392 int32_t oldLength, 02393 const UnicodeString& newText, 02394 int32_t newStart, 02395 int32_t newLength); 02396 02397 02398 /* Remove operations */ 02399 02405 inline UnicodeString& remove(void); 02406 02415 inline UnicodeString& remove(int32_t start, 02416 int32_t length = (int32_t)INT32_MAX); 02417 02426 inline UnicodeString& removeBetween(int32_t start, 02427 int32_t limit = (int32_t)INT32_MAX); 02428 02438 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX); 02439 02440 /* Length operations */ 02441 02453 UBool padLeading(int32_t targetLength, 02454 UChar padChar = 0x0020); 02455 02467 UBool padTrailing(int32_t targetLength, 02468 UChar padChar = 0x0020); 02469 02476 inline UBool truncate(int32_t targetLength); 02477 02483 UnicodeString& trim(void); 02484 02485 02486 /* Miscellaneous operations */ 02487 02493 inline UnicodeString& reverse(void); 02494 02503 inline UnicodeString& reverse(int32_t start, 02504 int32_t length); 02505 02512 UnicodeString& toUpper(void); 02513 02521 UnicodeString& toUpper(const Locale& locale); 02522 02529 UnicodeString& toLower(void); 02530 02538 UnicodeString& toLower(const Locale& locale); 02539 02540 #if !UCONFIG_NO_BREAK_ITERATION 02541 02568 UnicodeString &toTitle(BreakIterator *titleIter); 02569 02597 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale); 02598 02630 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); 02631 02632 #endif 02633 02645 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/); 02646 02647 //======================================== 02648 // Access to the internal buffer 02649 //======================================== 02650 02694 UChar *getBuffer(int32_t minCapacity); 02695 02716 void releaseBuffer(int32_t newLength=-1); 02717 02748 inline const UChar *getBuffer() const; 02749 02783 inline const UChar *getTerminatedBuffer(); 02784 02785 //======================================== 02786 // Constructors 02787 //======================================== 02788 02792 UnicodeString(); 02793 02805 UnicodeString(int32_t capacity, UChar32 c, int32_t count); 02806 02812 UnicodeString(UChar ch); 02813 02819 UnicodeString(UChar32 ch); 02820 02827 UnicodeString(const UChar *text); 02828 02836 UnicodeString(const UChar *text, 02837 int32_t textLength); 02838 02858 UnicodeString(UBool isTerminated, 02859 const UChar *text, 02860 int32_t textLength); 02861 02880 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); 02881 02882 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION 02883 02890 UnicodeString(const char *codepageData); 02891 02898 UnicodeString(const char *codepageData, int32_t dataLength); 02899 02900 #endif 02901 02902 #if !UCONFIG_NO_CONVERSION 02903 02921 UnicodeString(const char *codepageData, const char *codepage); 02922 02940 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage); 02941 02963 UnicodeString( 02964 const char *src, int32_t srcLength, 02965 UConverter *cnv, 02966 UErrorCode &errorCode); 02967 02968 #endif 02969 02994 UnicodeString(const char *src, int32_t length, enum EInvariant inv); 02995 02996 03002 UnicodeString(const UnicodeString& that); 03003 03010 UnicodeString(const UnicodeString& src, int32_t srcStart); 03011 03019 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); 03020 03037 virtual Replaceable *clone() const; 03038 03042 virtual ~UnicodeString(); 03043 03057 static UnicodeString fromUTF8(const StringPiece &utf8); 03058 03070 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length); 03071 03072 /* Miscellaneous operations */ 03073 03108 UnicodeString unescape() const; 03109 03129 UChar32 unescapeAt(int32_t &offset) const; 03130 03136 static UClassID U_EXPORT2 getStaticClassID(); 03137 03143 virtual UClassID getDynamicClassID() const; 03144 03145 //======================================== 03146 // Implementation methods 03147 //======================================== 03148 03149 protected: 03154 virtual int32_t getLength() const; 03155 03161 virtual UChar getCharAt(int32_t offset) const; 03162 03168 virtual UChar32 getChar32At(int32_t offset) const; 03169 03170 private: 03171 // For char* constructors. Could be made public. 03172 UnicodeString &setToUTF8(const StringPiece &utf8); 03173 // For extract(char*). 03174 // We could make a toUTF8(target, capacity, errorCode) public but not 03175 // this version: New API will be cleaner if we make callers create substrings 03176 // rather than having start+length on every method, 03177 // and it should take a UErrorCode&. 03178 int32_t 03179 toUTF8(int32_t start, int32_t len, 03180 char *target, int32_t capacity) const; 03181 03182 03183 inline int8_t 03184 doCompare(int32_t start, 03185 int32_t length, 03186 const UnicodeString& srcText, 03187 int32_t srcStart, 03188 int32_t srcLength) const; 03189 03190 int8_t doCompare(int32_t start, 03191 int32_t length, 03192 const UChar *srcChars, 03193 int32_t srcStart, 03194 int32_t srcLength) const; 03195 03196 inline int8_t 03197 doCompareCodePointOrder(int32_t start, 03198 int32_t length, 03199 const UnicodeString& srcText, 03200 int32_t srcStart, 03201 int32_t srcLength) const; 03202 03203 int8_t doCompareCodePointOrder(int32_t start, 03204 int32_t length, 03205 const UChar *srcChars, 03206 int32_t srcStart, 03207 int32_t srcLength) const; 03208 03209 inline int8_t 03210 doCaseCompare(int32_t start, 03211 int32_t length, 03212 const UnicodeString &srcText, 03213 int32_t srcStart, 03214 int32_t srcLength, 03215 uint32_t options) const; 03216 03217 int8_t 03218 doCaseCompare(int32_t start, 03219 int32_t length, 03220 const UChar *srcChars, 03221 int32_t srcStart, 03222 int32_t srcLength, 03223 uint32_t options) const; 03224 03225 int32_t doIndexOf(UChar c, 03226 int32_t start, 03227 int32_t length) const; 03228 03229 int32_t doIndexOf(UChar32 c, 03230 int32_t start, 03231 int32_t length) const; 03232 03233 int32_t doLastIndexOf(UChar c, 03234 int32_t start, 03235 int32_t length) const; 03236 03237 int32_t doLastIndexOf(UChar32 c, 03238 int32_t start, 03239 int32_t length) const; 03240 03241 void doExtract(int32_t start, 03242 int32_t length, 03243 UChar *dst, 03244 int32_t dstStart) const; 03245 03246 inline void doExtract(int32_t start, 03247 int32_t length, 03248 UnicodeString& target) const; 03249 03250 inline UChar doCharAt(int32_t offset) const; 03251 03252 UnicodeString& doReplace(int32_t start, 03253 int32_t length, 03254 const UnicodeString& srcText, 03255 int32_t srcStart, 03256 int32_t srcLength); 03257 03258 UnicodeString& doReplace(int32_t start, 03259 int32_t length, 03260 const UChar *srcChars, 03261 int32_t srcStart, 03262 int32_t srcLength); 03263 03264 UnicodeString& doReverse(int32_t start, 03265 int32_t length); 03266 03267 // calculate hash code 03268 int32_t doHashCode(void) const; 03269 03270 // get pointer to start of array 03271 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function 03272 inline UChar* getArrayStart(void); 03273 inline const UChar* getArrayStart(void) const; 03274 03275 // A UnicodeString object (not necessarily its current buffer) 03276 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). 03277 inline UBool isWritable() const; 03278 03279 // Is the current buffer writable? 03280 inline UBool isBufferWritable() const; 03281 03282 // None of the following does releaseArray(). 03283 inline void setLength(int32_t len); // sets only fShortLength and fLength 03284 inline void setToEmpty(); // sets fFlags=kShortString 03285 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags 03286 03287 // allocate the array; result may be fStackBuffer 03288 // sets refCount to 1 if appropriate 03289 // sets fArray, fCapacity, and fFlags 03290 // returns boolean for success or failure 03291 UBool allocate(int32_t capacity); 03292 03293 // release the array if owned 03294 void releaseArray(void); 03295 03296 // turn a bogus string into an empty one 03297 void unBogus(); 03298 03299 // implements assigment operator, copy constructor, and fastCopyFrom() 03300 UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); 03301 03302 // Pin start and limit to acceptable values. 03303 inline void pinIndex(int32_t& start) const; 03304 inline void pinIndices(int32_t& start, 03305 int32_t& length) const; 03306 03307 #if !UCONFIG_NO_CONVERSION 03308 03309 /* Internal extract() using UConverter. */ 03310 int32_t doExtract(int32_t start, int32_t length, 03311 char *dest, int32_t destCapacity, 03312 UConverter *cnv, 03313 UErrorCode &errorCode) const; 03314 03315 /* 03316 * Real constructor for converting from codepage data. 03317 * It assumes that it is called with !fRefCounted. 03318 * 03319 * If <code>codepage==0</code>, then the default converter 03320 * is used for the platform encoding. 03321 * If <code>codepage</code> is an empty string (<code>""</code>), 03322 * then a simple conversion is performed on the codepage-invariant 03323 * subset ("invariant characters") of the platform encoding. See utypes.h. 03324 */ 03325 void doCodepageCreate(const char *codepageData, 03326 int32_t dataLength, 03327 const char *codepage); 03328 03329 /* 03330 * Worker function for creating a UnicodeString from 03331 * a codepage string using a UConverter. 03332 */ 03333 void 03334 doCodepageCreate(const char *codepageData, 03335 int32_t dataLength, 03336 UConverter *converter, 03337 UErrorCode &status); 03338 03339 #endif 03340 03341 /* 03342 * This function is called when write access to the array 03343 * is necessary. 03344 * 03345 * We need to make a copy of the array if 03346 * the buffer is read-only, or 03347 * the buffer is refCounted (shared), and refCount>1, or 03348 * the buffer is too small. 03349 * 03350 * Return FALSE if memory could not be allocated. 03351 */ 03352 UBool cloneArrayIfNeeded(int32_t newCapacity = -1, 03353 int32_t growCapacity = -1, 03354 UBool doCopyArray = TRUE, 03355 int32_t **pBufferToDelete = 0, 03356 UBool forceClone = FALSE); 03357 03358 // common function for case mappings 03359 UnicodeString & 03360 caseMap(BreakIterator *titleIter, 03361 const char *locale, 03362 uint32_t options, 03363 int32_t toWhichCase); 03364 03365 // ref counting 03366 void addRef(void); 03367 int32_t removeRef(void); 03368 int32_t refCount(void) const; 03369 03370 // constants 03371 enum { 03372 // Set the stack buffer size so that sizeof(UnicodeString) is, 03373 // naturally (without padding), a multiple of sizeof(pointer). 03374 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings 03375 kInvalidUChar=0xffff, // invalid UChar index 03376 kGrowSize=128, // grow size for this buffer 03377 kInvalidHashCode=0, // invalid hash code 03378 kEmptyHashCode=1, // hash code for empty string 03379 03380 // bit flag values for fFlags 03381 kIsBogus=1, // this string is bogus, i.e., not valid or NULL 03382 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields 03383 kRefCounted=4, // there is a refCount field before the characters in fArray 03384 kBufferIsReadonly=8,// do not write to this buffer 03385 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), 03386 // and releaseBuffer(newLength) must be called 03387 03388 // combined values for convenience 03389 kShortString=kUsingStackBuffer, 03390 kLongString=kRefCounted, 03391 kReadonlyAlias=kBufferIsReadonly, 03392 kWritableAlias=0 03393 }; 03394 03395 friend class StringThreadTest; 03396 friend class UnicodeStringAppendable; 03397 03398 union StackBufferOrFields; // forward declaration necessary before friend declaration 03399 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion 03400 03401 /* 03402 * The following are all the class fields that are stored 03403 * in each UnicodeString object. 03404 * Note that UnicodeString has virtual functions, 03405 * therefore there is an implicit vtable pointer 03406 * as the first real field. 03407 * The fields should be aligned such that no padding is necessary. 03408 * On 32-bit machines, the size should be 32 bytes, 03409 * on 64-bit machines (8-byte pointers), it should be 40 bytes. 03410 * 03411 * We use a hack to achieve this. 03412 * 03413 * With at least some compilers, each of the following is forced to 03414 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer], 03415 * rounded up with additional padding if the fields do not already fit that requirement: 03416 * - sizeof(class UnicodeString) 03417 * - offsetof(UnicodeString, fUnion) 03418 * - sizeof(fUnion) 03419 * - sizeof(fFields) 03420 * 03421 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars) 03422 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines. 03423 * (Padding at the end of fFields is ok: 03424 * As long as there is no padding after fStackBuffer, it is not wasted space.) 03425 * 03426 * We further assume that the compiler does not reorder the fields, 03427 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion, 03428 * with at most some padding (but no other field) in between. 03429 * (Padding there would be wasted space, but functionally harmless.) 03430 * 03431 * We use a few more sizeof(pointer)'s chunks of space with 03432 * fRestOfStackBuffer, fShortLength and fFlags, 03433 * to get up exactly to the intended sizeof(UnicodeString). 03434 */ 03435 // (implicit) *vtable; 03436 union StackBufferOrFields { 03437 // fStackBuffer is used iff (fFlags&kUsingStackBuffer) 03438 // else fFields is used 03439 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer 03440 struct { 03441 UChar *fArray; // the Unicode data 03442 int32_t fCapacity; // capacity of fArray (in UChars) 03443 int32_t fLength; // number of characters in fArray if >127; else undefined 03444 } fFields; 03445 } fUnion; 03446 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; 03447 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength 03448 uint8_t fFlags; // bit flags: see constants above 03449 }; 03450 03459 U_COMMON_API UnicodeString U_EXPORT2 03460 operator+ (const UnicodeString &s1, const UnicodeString &s2); 03461 03462 //======================================== 03463 // Inline members 03464 //======================================== 03465 03466 //======================================== 03467 // Privates 03468 //======================================== 03469 03470 inline void 03471 UnicodeString::pinIndex(int32_t& start) const 03472 { 03473 // pin index 03474 if(start < 0) { 03475 start = 0; 03476 } else if(start > length()) { 03477 start = length(); 03478 } 03479 } 03480 03481 inline void 03482 UnicodeString::pinIndices(int32_t& start, 03483 int32_t& _length) const 03484 { 03485 // pin indices 03486 int32_t len = length(); 03487 if(start < 0) { 03488 start = 0; 03489 } else if(start > len) { 03490 start = len; 03491 } 03492 if(_length < 0) { 03493 _length = 0; 03494 } else if(_length > (len - start)) { 03495 _length = (len - start); 03496 } 03497 } 03498 03499 inline UChar* 03500 UnicodeString::getArrayStart() 03501 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03502 03503 inline const UChar* 03504 UnicodeString::getArrayStart() const 03505 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } 03506 03507 //======================================== 03508 // Read-only implementation methods 03509 //======================================== 03510 inline int32_t 03511 UnicodeString::length() const 03512 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } 03513 03514 inline int32_t 03515 UnicodeString::getCapacity() const 03516 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } 03517 03518 inline int32_t 03519 UnicodeString::hashCode() const 03520 { return doHashCode(); } 03521 03522 inline UBool 03523 UnicodeString::isBogus() const 03524 { return (UBool)(fFlags & kIsBogus); } 03525 03526 inline UBool 03527 UnicodeString::isWritable() const 03528 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } 03529 03530 inline UBool 03531 UnicodeString::isBufferWritable() const 03532 { 03533 return (UBool)( 03534 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && 03535 (!(fFlags&kRefCounted) || refCount()==1)); 03536 } 03537 03538 inline const UChar * 03539 UnicodeString::getBuffer() const { 03540 if(fFlags&(kIsBogus|kOpenGetBuffer)) { 03541 return 0; 03542 } else if(fFlags&kUsingStackBuffer) { 03543 return fUnion.fStackBuffer; 03544 } else { 03545 return fUnion.fFields.fArray; 03546 } 03547 } 03548 03549 //======================================== 03550 // Read-only alias methods 03551 //======================================== 03552 inline int8_t 03553 UnicodeString::doCompare(int32_t start, 03554 int32_t thisLength, 03555 const UnicodeString& srcText, 03556 int32_t srcStart, 03557 int32_t srcLength) const 03558 { 03559 if(srcText.isBogus()) { 03560 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03561 } else { 03562 srcText.pinIndices(srcStart, srcLength); 03563 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03564 } 03565 } 03566 03567 inline UBool 03568 UnicodeString::operator== (const UnicodeString& text) const 03569 { 03570 if(isBogus()) { 03571 return text.isBogus(); 03572 } else { 03573 int32_t len = length(), textLength = text.length(); 03574 return 03575 !text.isBogus() && 03576 len == textLength && 03577 doCompare(0, len, text, 0, textLength) == 0; 03578 } 03579 } 03580 03581 inline UBool 03582 UnicodeString::operator!= (const UnicodeString& text) const 03583 { return (! operator==(text)); } 03584 03585 inline UBool 03586 UnicodeString::operator> (const UnicodeString& text) const 03587 { return doCompare(0, length(), text, 0, text.length()) == 1; } 03588 03589 inline UBool 03590 UnicodeString::operator< (const UnicodeString& text) const 03591 { return doCompare(0, length(), text, 0, text.length()) == -1; } 03592 03593 inline UBool 03594 UnicodeString::operator>= (const UnicodeString& text) const 03595 { return doCompare(0, length(), text, 0, text.length()) != -1; } 03596 03597 inline UBool 03598 UnicodeString::operator<= (const UnicodeString& text) const 03599 { return doCompare(0, length(), text, 0, text.length()) != 1; } 03600 03601 inline int8_t 03602 UnicodeString::compare(const UnicodeString& text) const 03603 { return doCompare(0, length(), text, 0, text.length()); } 03604 03605 inline int8_t 03606 UnicodeString::compare(int32_t start, 03607 int32_t _length, 03608 const UnicodeString& srcText) const 03609 { return doCompare(start, _length, srcText, 0, srcText.length()); } 03610 03611 inline int8_t 03612 UnicodeString::compare(const UChar *srcChars, 03613 int32_t srcLength) const 03614 { return doCompare(0, length(), srcChars, 0, srcLength); } 03615 03616 inline int8_t 03617 UnicodeString::compare(int32_t start, 03618 int32_t _length, 03619 const UnicodeString& srcText, 03620 int32_t srcStart, 03621 int32_t srcLength) const 03622 { return doCompare(start, _length, srcText, srcStart, srcLength); } 03623 03624 inline int8_t 03625 UnicodeString::compare(int32_t start, 03626 int32_t _length, 03627 const UChar *srcChars) const 03628 { return doCompare(start, _length, srcChars, 0, _length); } 03629 03630 inline int8_t 03631 UnicodeString::compare(int32_t start, 03632 int32_t _length, 03633 const UChar *srcChars, 03634 int32_t srcStart, 03635 int32_t srcLength) const 03636 { return doCompare(start, _length, srcChars, srcStart, srcLength); } 03637 03638 inline int8_t 03639 UnicodeString::compareBetween(int32_t start, 03640 int32_t limit, 03641 const UnicodeString& srcText, 03642 int32_t srcStart, 03643 int32_t srcLimit) const 03644 { return doCompare(start, limit - start, 03645 srcText, srcStart, srcLimit - srcStart); } 03646 03647 inline int8_t 03648 UnicodeString::doCompareCodePointOrder(int32_t start, 03649 int32_t thisLength, 03650 const UnicodeString& srcText, 03651 int32_t srcStart, 03652 int32_t srcLength) const 03653 { 03654 if(srcText.isBogus()) { 03655 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03656 } else { 03657 srcText.pinIndices(srcStart, srcLength); 03658 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength); 03659 } 03660 } 03661 03662 inline int8_t 03663 UnicodeString::compareCodePointOrder(const UnicodeString& text) const 03664 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); } 03665 03666 inline int8_t 03667 UnicodeString::compareCodePointOrder(int32_t start, 03668 int32_t _length, 03669 const UnicodeString& srcText) const 03670 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } 03671 03672 inline int8_t 03673 UnicodeString::compareCodePointOrder(const UChar *srcChars, 03674 int32_t srcLength) const 03675 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } 03676 03677 inline int8_t 03678 UnicodeString::compareCodePointOrder(int32_t start, 03679 int32_t _length, 03680 const UnicodeString& srcText, 03681 int32_t srcStart, 03682 int32_t srcLength) const 03683 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); } 03684 03685 inline int8_t 03686 UnicodeString::compareCodePointOrder(int32_t start, 03687 int32_t _length, 03688 const UChar *srcChars) const 03689 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } 03690 03691 inline int8_t 03692 UnicodeString::compareCodePointOrder(int32_t start, 03693 int32_t _length, 03694 const UChar *srcChars, 03695 int32_t srcStart, 03696 int32_t srcLength) const 03697 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } 03698 03699 inline int8_t 03700 UnicodeString::compareCodePointOrderBetween(int32_t start, 03701 int32_t limit, 03702 const UnicodeString& srcText, 03703 int32_t srcStart, 03704 int32_t srcLimit) const 03705 { return doCompareCodePointOrder(start, limit - start, 03706 srcText, srcStart, srcLimit - srcStart); } 03707 03708 inline int8_t 03709 UnicodeString::doCaseCompare(int32_t start, 03710 int32_t thisLength, 03711 const UnicodeString &srcText, 03712 int32_t srcStart, 03713 int32_t srcLength, 03714 uint32_t options) const 03715 { 03716 if(srcText.isBogus()) { 03717 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise 03718 } else { 03719 srcText.pinIndices(srcStart, srcLength); 03720 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options); 03721 } 03722 } 03723 03724 inline int8_t 03725 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const { 03726 return doCaseCompare(0, length(), text, 0, text.length(), options); 03727 } 03728 03729 inline int8_t 03730 UnicodeString::caseCompare(int32_t start, 03731 int32_t _length, 03732 const UnicodeString &srcText, 03733 uint32_t options) const { 03734 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options); 03735 } 03736 03737 inline int8_t 03738 UnicodeString::caseCompare(const UChar *srcChars, 03739 int32_t srcLength, 03740 uint32_t options) const { 03741 return doCaseCompare(0, length(), srcChars, 0, srcLength, options); 03742 } 03743 03744 inline int8_t 03745 UnicodeString::caseCompare(int32_t start, 03746 int32_t _length, 03747 const UnicodeString &srcText, 03748 int32_t srcStart, 03749 int32_t srcLength, 03750 uint32_t options) const { 03751 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options); 03752 } 03753 03754 inline int8_t 03755 UnicodeString::caseCompare(int32_t start, 03756 int32_t _length, 03757 const UChar *srcChars, 03758 uint32_t options) const { 03759 return doCaseCompare(start, _length, srcChars, 0, _length, options); 03760 } 03761 03762 inline int8_t 03763 UnicodeString::caseCompare(int32_t start, 03764 int32_t _length, 03765 const UChar *srcChars, 03766 int32_t srcStart, 03767 int32_t srcLength, 03768 uint32_t options) const { 03769 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options); 03770 } 03771 03772 inline int8_t 03773 UnicodeString::caseCompareBetween(int32_t start, 03774 int32_t limit, 03775 const UnicodeString &srcText, 03776 int32_t srcStart, 03777 int32_t srcLimit, 03778 uint32_t options) const { 03779 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options); 03780 } 03781 03782 inline int32_t 03783 UnicodeString::indexOf(const UnicodeString& srcText, 03784 int32_t srcStart, 03785 int32_t srcLength, 03786 int32_t start, 03787 int32_t _length) const 03788 { 03789 if(!srcText.isBogus()) { 03790 srcText.pinIndices(srcStart, srcLength); 03791 if(srcLength > 0) { 03792 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03793 } 03794 } 03795 return -1; 03796 } 03797 03798 inline int32_t 03799 UnicodeString::indexOf(const UnicodeString& text) const 03800 { return indexOf(text, 0, text.length(), 0, length()); } 03801 03802 inline int32_t 03803 UnicodeString::indexOf(const UnicodeString& text, 03804 int32_t start) const { 03805 pinIndex(start); 03806 return indexOf(text, 0, text.length(), start, length() - start); 03807 } 03808 03809 inline int32_t 03810 UnicodeString::indexOf(const UnicodeString& text, 03811 int32_t start, 03812 int32_t _length) const 03813 { return indexOf(text, 0, text.length(), start, _length); } 03814 03815 inline int32_t 03816 UnicodeString::indexOf(const UChar *srcChars, 03817 int32_t srcLength, 03818 int32_t start) const { 03819 pinIndex(start); 03820 return indexOf(srcChars, 0, srcLength, start, length() - start); 03821 } 03822 03823 inline int32_t 03824 UnicodeString::indexOf(const UChar *srcChars, 03825 int32_t srcLength, 03826 int32_t start, 03827 int32_t _length) const 03828 { return indexOf(srcChars, 0, srcLength, start, _length); } 03829 03830 inline int32_t 03831 UnicodeString::indexOf(UChar c, 03832 int32_t start, 03833 int32_t _length) const 03834 { return doIndexOf(c, start, _length); } 03835 03836 inline int32_t 03837 UnicodeString::indexOf(UChar32 c, 03838 int32_t start, 03839 int32_t _length) const 03840 { return doIndexOf(c, start, _length); } 03841 03842 inline int32_t 03843 UnicodeString::indexOf(UChar c) const 03844 { return doIndexOf(c, 0, length()); } 03845 03846 inline int32_t 03847 UnicodeString::indexOf(UChar32 c) const 03848 { return indexOf(c, 0, length()); } 03849 03850 inline int32_t 03851 UnicodeString::indexOf(UChar c, 03852 int32_t start) const { 03853 pinIndex(start); 03854 return doIndexOf(c, start, length() - start); 03855 } 03856 03857 inline int32_t 03858 UnicodeString::indexOf(UChar32 c, 03859 int32_t start) const { 03860 pinIndex(start); 03861 return indexOf(c, start, length() - start); 03862 } 03863 03864 inline int32_t 03865 UnicodeString::lastIndexOf(const UChar *srcChars, 03866 int32_t srcLength, 03867 int32_t start, 03868 int32_t _length) const 03869 { return lastIndexOf(srcChars, 0, srcLength, start, _length); } 03870 03871 inline int32_t 03872 UnicodeString::lastIndexOf(const UChar *srcChars, 03873 int32_t srcLength, 03874 int32_t start) const { 03875 pinIndex(start); 03876 return lastIndexOf(srcChars, 0, srcLength, start, length() - start); 03877 } 03878 03879 inline int32_t 03880 UnicodeString::lastIndexOf(const UnicodeString& srcText, 03881 int32_t srcStart, 03882 int32_t srcLength, 03883 int32_t start, 03884 int32_t _length) const 03885 { 03886 if(!srcText.isBogus()) { 03887 srcText.pinIndices(srcStart, srcLength); 03888 if(srcLength > 0) { 03889 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length); 03890 } 03891 } 03892 return -1; 03893 } 03894 03895 inline int32_t 03896 UnicodeString::lastIndexOf(const UnicodeString& text, 03897 int32_t start, 03898 int32_t _length) const 03899 { return lastIndexOf(text, 0, text.length(), start, _length); } 03900 03901 inline int32_t 03902 UnicodeString::lastIndexOf(const UnicodeString& text, 03903 int32_t start) const { 03904 pinIndex(start); 03905 return lastIndexOf(text, 0, text.length(), start, length() - start); 03906 } 03907 03908 inline int32_t 03909 UnicodeString::lastIndexOf(const UnicodeString& text) const 03910 { return lastIndexOf(text, 0, text.length(), 0, length()); } 03911 03912 inline int32_t 03913 UnicodeString::lastIndexOf(UChar c, 03914 int32_t start, 03915 int32_t _length) const 03916 { return doLastIndexOf(c, start, _length); } 03917 03918 inline int32_t 03919 UnicodeString::lastIndexOf(UChar32 c, 03920 int32_t start, 03921 int32_t _length) const { 03922 return doLastIndexOf(c, start, _length); 03923 } 03924 03925 inline int32_t 03926 UnicodeString::lastIndexOf(UChar c) const 03927 { return doLastIndexOf(c, 0, length()); } 03928 03929 inline int32_t 03930 UnicodeString::lastIndexOf(UChar32 c) const { 03931 return lastIndexOf(c, 0, length()); 03932 } 03933 03934 inline int32_t 03935 UnicodeString::lastIndexOf(UChar c, 03936 int32_t start) const { 03937 pinIndex(start); 03938 return doLastIndexOf(c, start, length() - start); 03939 } 03940 03941 inline int32_t 03942 UnicodeString::lastIndexOf(UChar32 c, 03943 int32_t start) const { 03944 pinIndex(start); 03945 return lastIndexOf(c, start, length() - start); 03946 } 03947 03948 inline UBool 03949 UnicodeString::startsWith(const UnicodeString& text) const 03950 { return compare(0, text.length(), text, 0, text.length()) == 0; } 03951 03952 inline UBool 03953 UnicodeString::startsWith(const UnicodeString& srcText, 03954 int32_t srcStart, 03955 int32_t srcLength) const 03956 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } 03957 03958 inline UBool 03959 UnicodeString::startsWith(const UChar *srcChars, 03960 int32_t srcLength) const 03961 { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } 03962 03963 inline UBool 03964 UnicodeString::startsWith(const UChar *srcChars, 03965 int32_t srcStart, 03966 int32_t srcLength) const 03967 { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;} 03968 03969 inline UBool 03970 UnicodeString::endsWith(const UnicodeString& text) const 03971 { return doCompare(length() - text.length(), text.length(), 03972 text, 0, text.length()) == 0; } 03973 03974 inline UBool 03975 UnicodeString::endsWith(const UnicodeString& srcText, 03976 int32_t srcStart, 03977 int32_t srcLength) const { 03978 srcText.pinIndices(srcStart, srcLength); 03979 return doCompare(length() - srcLength, srcLength, 03980 srcText, srcStart, srcLength) == 0; 03981 } 03982 03983 inline UBool 03984 UnicodeString::endsWith(const UChar *srcChars, 03985 int32_t srcLength) const { 03986 if(srcLength < 0) { 03987 srcLength = u_strlen(srcChars); 03988 } 03989 return doCompare(length() - srcLength, srcLength, 03990 srcChars, 0, srcLength) == 0; 03991 } 03992 03993 inline UBool 03994 UnicodeString::endsWith(const UChar *srcChars, 03995 int32_t srcStart, 03996 int32_t srcLength) const { 03997 if(srcLength < 0) { 03998 srcLength = u_strlen(srcChars + srcStart); 03999 } 04000 return doCompare(length() - srcLength, srcLength, 04001 srcChars, srcStart, srcLength) == 0; 04002 } 04003 04004 //======================================== 04005 // replace 04006 //======================================== 04007 inline UnicodeString& 04008 UnicodeString::replace(int32_t start, 04009 int32_t _length, 04010 const UnicodeString& srcText) 04011 { return doReplace(start, _length, srcText, 0, srcText.length()); } 04012 04013 inline UnicodeString& 04014 UnicodeString::replace(int32_t start, 04015 int32_t _length, 04016 const UnicodeString& srcText, 04017 int32_t srcStart, 04018 int32_t srcLength) 04019 { return doReplace(start, _length, srcText, srcStart, srcLength); } 04020 04021 inline UnicodeString& 04022 UnicodeString::replace(int32_t start, 04023 int32_t _length, 04024 const UChar *srcChars, 04025 int32_t srcLength) 04026 { return doReplace(start, _length, srcChars, 0, srcLength); } 04027 04028 inline UnicodeString& 04029 UnicodeString::replace(int32_t start, 04030 int32_t _length, 04031 const UChar *srcChars, 04032 int32_t srcStart, 04033 int32_t srcLength) 04034 { return doReplace(start, _length, srcChars, srcStart, srcLength); } 04035 04036 inline UnicodeString& 04037 UnicodeString::replace(int32_t start, 04038 int32_t _length, 04039 UChar srcChar) 04040 { return doReplace(start, _length, &srcChar, 0, 1); } 04041 04042 inline UnicodeString& 04043 UnicodeString::replace(int32_t start, 04044 int32_t _length, 04045 UChar32 srcChar) { 04046 UChar buffer[U16_MAX_LENGTH]; 04047 int32_t count = 0; 04048 UBool isError = FALSE; 04049 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError); 04050 return doReplace(start, _length, buffer, 0, count); 04051 } 04052 04053 inline UnicodeString& 04054 UnicodeString::replaceBetween(int32_t start, 04055 int32_t limit, 04056 const UnicodeString& srcText) 04057 { return doReplace(start, limit - start, srcText, 0, srcText.length()); } 04058 04059 inline UnicodeString& 04060 UnicodeString::replaceBetween(int32_t start, 04061 int32_t limit, 04062 const UnicodeString& srcText, 04063 int32_t srcStart, 04064 int32_t srcLimit) 04065 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); } 04066 04067 inline UnicodeString& 04068 UnicodeString::findAndReplace(const UnicodeString& oldText, 04069 const UnicodeString& newText) 04070 { return findAndReplace(0, length(), oldText, 0, oldText.length(), 04071 newText, 0, newText.length()); } 04072 04073 inline UnicodeString& 04074 UnicodeString::findAndReplace(int32_t start, 04075 int32_t _length, 04076 const UnicodeString& oldText, 04077 const UnicodeString& newText) 04078 { return findAndReplace(start, _length, oldText, 0, oldText.length(), 04079 newText, 0, newText.length()); } 04080 04081 // ============================ 04082 // extract 04083 // ============================ 04084 inline void 04085 UnicodeString::doExtract(int32_t start, 04086 int32_t _length, 04087 UnicodeString& target) const 04088 { target.replace(0, target.length(), *this, start, _length); } 04089 04090 inline void 04091 UnicodeString::extract(int32_t start, 04092 int32_t _length, 04093 UChar *target, 04094 int32_t targetStart) const 04095 { doExtract(start, _length, target, targetStart); } 04096 04097 inline void 04098 UnicodeString::extract(int32_t start, 04099 int32_t _length, 04100 UnicodeString& target) const 04101 { doExtract(start, _length, target); } 04102 04103 #if !UCONFIG_NO_CONVERSION 04104 04105 inline int32_t 04106 UnicodeString::extract(int32_t start, 04107 int32_t _length, 04108 char *dst, 04109 const char *codepage) const 04110 04111 { 04112 // This dstSize value will be checked explicitly 04113 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage); 04114 } 04115 04116 #endif 04117 04118 inline void 04119 UnicodeString::extractBetween(int32_t start, 04120 int32_t limit, 04121 UChar *dst, 04122 int32_t dstStart) const { 04123 pinIndex(start); 04124 pinIndex(limit); 04125 doExtract(start, limit - start, dst, dstStart); 04126 } 04127 04128 inline UnicodeString 04129 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { 04130 return tempSubString(start, limit - start); 04131 } 04132 04133 inline UChar 04134 UnicodeString::doCharAt(int32_t offset) const 04135 { 04136 if((uint32_t)offset < (uint32_t)length()) { 04137 return getArrayStart()[offset]; 04138 } else { 04139 return kInvalidUChar; 04140 } 04141 } 04142 04143 inline UChar 04144 UnicodeString::charAt(int32_t offset) const 04145 { return doCharAt(offset); } 04146 04147 inline UChar 04148 UnicodeString::operator[] (int32_t offset) const 04149 { return doCharAt(offset); } 04150 04151 inline UChar32 04152 UnicodeString::char32At(int32_t offset) const 04153 { 04154 int32_t len = length(); 04155 if((uint32_t)offset < (uint32_t)len) { 04156 const UChar *array = getArrayStart(); 04157 UChar32 c; 04158 U16_GET(array, 0, offset, len, c); 04159 return c; 04160 } else { 04161 return kInvalidUChar; 04162 } 04163 } 04164 04165 inline int32_t 04166 UnicodeString::getChar32Start(int32_t offset) const { 04167 if((uint32_t)offset < (uint32_t)length()) { 04168 const UChar *array = getArrayStart(); 04169 U16_SET_CP_START(array, 0, offset); 04170 return offset; 04171 } else { 04172 return 0; 04173 } 04174 } 04175 04176 inline int32_t 04177 UnicodeString::getChar32Limit(int32_t offset) const { 04178 int32_t len = length(); 04179 if((uint32_t)offset < (uint32_t)len) { 04180 const UChar *array = getArrayStart(); 04181 U16_SET_CP_LIMIT(array, 0, offset, len); 04182 return offset; 04183 } else { 04184 return len; 04185 } 04186 } 04187 04188 inline UBool 04189 UnicodeString::isEmpty() const { 04190 return fShortLength == 0; 04191 } 04192 04193 //======================================== 04194 // Write implementation methods 04195 //======================================== 04196 inline void 04197 UnicodeString::setLength(int32_t len) { 04198 if(len <= 127) { 04199 fShortLength = (int8_t)len; 04200 } else { 04201 fShortLength = (int8_t)-1; 04202 fUnion.fFields.fLength = len; 04203 } 04204 } 04205 04206 inline void 04207 UnicodeString::setToEmpty() { 04208 fShortLength = 0; 04209 fFlags = kShortString; 04210 } 04211 04212 inline void 04213 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { 04214 setLength(len); 04215 fUnion.fFields.fArray = array; 04216 fUnion.fFields.fCapacity = capacity; 04217 } 04218 04219 inline const UChar * 04220 UnicodeString::getTerminatedBuffer() { 04221 if(!isWritable()) { 04222 return 0; 04223 } else { 04224 UChar *array = getArrayStart(); 04225 int32_t len = length(); 04226 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { 04227 /* 04228 * kRefCounted: Do not write the NUL if the buffer is shared. 04229 * That is mostly safe, except when the length of one copy was modified 04230 * without copy-on-write, e.g., via truncate(newLength) or remove(void). 04231 * Then the NUL would be written into the middle of another copy's string. 04232 */ 04233 if(!(fFlags&kBufferIsReadonly)) { 04234 /* 04235 * We must not write to a readonly buffer, but it is known to be 04236 * NUL-terminated if len<capacity. 04237 * A shared, allocated buffer (refCount()>1) must not have its contents 04238 * modified, but the NUL at [len] is beyond the string contents, 04239 * and multiple string objects and threads writing the same NUL into the 04240 * same location is harmless. 04241 * In all other cases, the buffer is fully writable and it is anyway safe 04242 * to write the NUL. 04243 * 04244 * Note: An earlier version of this code tested whether there is a NUL 04245 * at [len] already, but, while safe, it generated lots of warnings from 04246 * tools like valgrind and Purify. 04247 */ 04248 array[len] = 0; 04249 } 04250 return array; 04251 } else if(cloneArrayIfNeeded(len+1)) { 04252 array = getArrayStart(); 04253 array[len] = 0; 04254 return array; 04255 } else { 04256 return 0; 04257 } 04258 } 04259 } 04260 04261 inline UnicodeString& 04262 UnicodeString::operator= (UChar ch) 04263 { return doReplace(0, length(), &ch, 0, 1); } 04264 04265 inline UnicodeString& 04266 UnicodeString::operator= (UChar32 ch) 04267 { return replace(0, length(), ch); } 04268 04269 inline UnicodeString& 04270 UnicodeString::setTo(const UnicodeString& srcText, 04271 int32_t srcStart, 04272 int32_t srcLength) 04273 { 04274 unBogus(); 04275 return doReplace(0, length(), srcText, srcStart, srcLength); 04276 } 04277 04278 inline UnicodeString& 04279 UnicodeString::setTo(const UnicodeString& srcText, 04280 int32_t srcStart) 04281 { 04282 unBogus(); 04283 srcText.pinIndex(srcStart); 04284 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart); 04285 } 04286 04287 inline UnicodeString& 04288 UnicodeString::setTo(const UnicodeString& srcText) 04289 { 04290 return copyFrom(srcText); 04291 } 04292 04293 inline UnicodeString& 04294 UnicodeString::setTo(const UChar *srcChars, 04295 int32_t srcLength) 04296 { 04297 unBogus(); 04298 return doReplace(0, length(), srcChars, 0, srcLength); 04299 } 04300 04301 inline UnicodeString& 04302 UnicodeString::setTo(UChar srcChar) 04303 { 04304 unBogus(); 04305 return doReplace(0, length(), &srcChar, 0, 1); 04306 } 04307 04308 inline UnicodeString& 04309 UnicodeString::setTo(UChar32 srcChar) 04310 { 04311 unBogus(); 04312 return replace(0, length(), srcChar); 04313 } 04314 04315 inline UnicodeString& 04316 UnicodeString::append(const UnicodeString& srcText, 04317 int32_t srcStart, 04318 int32_t srcLength) 04319 { return doReplace(length(), 0, srcText, srcStart, srcLength); } 04320 04321 inline UnicodeString& 04322 UnicodeString::append(const UnicodeString& srcText) 04323 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04324 04325 inline UnicodeString& 04326 UnicodeString::append(const UChar *srcChars, 04327 int32_t srcStart, 04328 int32_t srcLength) 04329 { return doReplace(length(), 0, srcChars, srcStart, srcLength); } 04330 04331 inline UnicodeString& 04332 UnicodeString::append(const UChar *srcChars, 04333 int32_t srcLength) 04334 { return doReplace(length(), 0, srcChars, 0, srcLength); } 04335 04336 inline UnicodeString& 04337 UnicodeString::append(UChar srcChar) 04338 { return doReplace(length(), 0, &srcChar, 0, 1); } 04339 04340 inline UnicodeString& 04341 UnicodeString::append(UChar32 srcChar) { 04342 UChar buffer[U16_MAX_LENGTH]; 04343 int32_t _length = 0; 04344 UBool isError = FALSE; 04345 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError); 04346 return doReplace(length(), 0, buffer, 0, _length); 04347 } 04348 04349 inline UnicodeString& 04350 UnicodeString::operator+= (UChar ch) 04351 { return doReplace(length(), 0, &ch, 0, 1); } 04352 04353 inline UnicodeString& 04354 UnicodeString::operator+= (UChar32 ch) { 04355 return append(ch); 04356 } 04357 04358 inline UnicodeString& 04359 UnicodeString::operator+= (const UnicodeString& srcText) 04360 { return doReplace(length(), 0, srcText, 0, srcText.length()); } 04361 04362 inline UnicodeString& 04363 UnicodeString::insert(int32_t start, 04364 const UnicodeString& srcText, 04365 int32_t srcStart, 04366 int32_t srcLength) 04367 { return doReplace(start, 0, srcText, srcStart, srcLength); } 04368 04369 inline UnicodeString& 04370 UnicodeString::insert(int32_t start, 04371 const UnicodeString& srcText) 04372 { return doReplace(start, 0, srcText, 0, srcText.length()); } 04373 04374 inline UnicodeString& 04375 UnicodeString::insert(int32_t start, 04376 const UChar *srcChars, 04377 int32_t srcStart, 04378 int32_t srcLength) 04379 { return doReplace(start, 0, srcChars, srcStart, srcLength); } 04380 04381 inline UnicodeString& 04382 UnicodeString::insert(int32_t start, 04383 const UChar *srcChars, 04384 int32_t srcLength) 04385 { return doReplace(start, 0, srcChars, 0, srcLength); } 04386 04387 inline UnicodeString& 04388 UnicodeString::insert(int32_t start, 04389 UChar srcChar) 04390 { return doReplace(start, 0, &srcChar, 0, 1); } 04391 04392 inline UnicodeString& 04393 UnicodeString::insert(int32_t start, 04394 UChar32 srcChar) 04395 { return replace(start, 0, srcChar); } 04396 04397 04398 inline UnicodeString& 04399 UnicodeString::remove() 04400 { 04401 // remove() of a bogus string makes the string empty and non-bogus 04402 // we also un-alias a read-only alias to deal with NUL-termination 04403 // issues with getTerminatedBuffer() 04404 if(fFlags & (kIsBogus|kBufferIsReadonly)) { 04405 setToEmpty(); 04406 } else { 04407 fShortLength = 0; 04408 } 04409 return *this; 04410 } 04411 04412 inline UnicodeString& 04413 UnicodeString::remove(int32_t start, 04414 int32_t _length) 04415 { 04416 if(start <= 0 && _length == INT32_MAX) { 04417 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus 04418 return remove(); 04419 } 04420 return doReplace(start, _length, NULL, 0, 0); 04421 } 04422 04423 inline UnicodeString& 04424 UnicodeString::removeBetween(int32_t start, 04425 int32_t limit) 04426 { return doReplace(start, limit - start, NULL, 0, 0); } 04427 04428 inline UnicodeString & 04429 UnicodeString::retainBetween(int32_t start, int32_t limit) { 04430 truncate(limit); 04431 return doReplace(0, start, NULL, 0, 0); 04432 } 04433 04434 inline UBool 04435 UnicodeString::truncate(int32_t targetLength) 04436 { 04437 if(isBogus() && targetLength == 0) { 04438 // truncate(0) of a bogus string makes the string empty and non-bogus 04439 unBogus(); 04440 return FALSE; 04441 } else if((uint32_t)targetLength < (uint32_t)length()) { 04442 setLength(targetLength); 04443 if(fFlags&kBufferIsReadonly) { 04444 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more 04445 } 04446 return TRUE; 04447 } else { 04448 return FALSE; 04449 } 04450 } 04451 04452 inline UnicodeString& 04453 UnicodeString::reverse() 04454 { return doReverse(0, length()); } 04455 04456 inline UnicodeString& 04457 UnicodeString::reverse(int32_t start, 04458 int32_t _length) 04459 { return doReverse(start, _length); } 04460 04461 U_NAMESPACE_END 04462 04463 #endif