MyGUI
3.2.1
|
00001 /* 00002 * This source file is part of MyGUI. For the latest info, see http://mygui.info/ 00003 * Distributed under the MIT License 00004 * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT) 00005 */ 00006 00007 #include "MyGUI_Precompiled.h" 00008 #include "MyGUI_UString.h" 00009 00010 namespace MyGUI 00011 { 00012 00013 //-------------------------------------------------------------------------- 00014 UString::_base_iterator::_base_iterator() 00015 { 00016 mString = 0; 00017 } 00018 //-------------------------------------------------------------------------- 00019 void UString::_base_iterator::_seekFwd( size_type c ) 00020 { 00021 mIter += c; 00022 } 00023 //-------------------------------------------------------------------------- 00024 void UString::_base_iterator::_seekRev( size_type c ) 00025 { 00026 mIter -= c; 00027 } 00028 //-------------------------------------------------------------------------- 00029 void UString::_base_iterator::_become( const _base_iterator& i ) 00030 { 00031 mIter = i.mIter; 00032 mString = i.mString; 00033 } 00034 //-------------------------------------------------------------------------- 00035 bool UString::_base_iterator::_test_begin() const 00036 { 00037 return mIter == mString->mData.begin(); 00038 } 00039 //-------------------------------------------------------------------------- 00040 bool UString::_base_iterator::_test_end() const 00041 { 00042 return mIter == mString->mData.end(); 00043 } 00044 //-------------------------------------------------------------------------- 00045 UString::size_type UString::_base_iterator::_get_index() const 00046 { 00047 return mIter - mString->mData.begin(); 00048 } 00049 //-------------------------------------------------------------------------- 00050 void UString::_base_iterator::_jump_to( size_type index ) 00051 { 00052 mIter = mString->mData.begin() + index; 00053 } 00054 //-------------------------------------------------------------------------- 00055 UString::unicode_char UString::_base_iterator::_getCharacter() const 00056 { 00057 size_type current_index = _get_index(); 00058 return mString->getChar( current_index ); 00059 } 00060 //-------------------------------------------------------------------------- 00061 int UString::_base_iterator::_setCharacter( unicode_char uc ) 00062 { 00063 size_type current_index = _get_index(); 00064 int change = mString->setChar( current_index, uc ); 00065 _jump_to( current_index ); 00066 return change; 00067 } 00068 //-------------------------------------------------------------------------- 00069 void UString::_base_iterator::_moveNext() 00070 { 00071 _seekFwd( 1 ); // move 1 code point forward 00072 if ( _test_end() ) return; // exit if we hit the end 00073 if ( _utf16_surrogate_follow( mIter[0] ) ) { 00074 // landing on a follow code point means we might be part of a bigger character 00075 // so we test for that 00076 code_point lead_half = 0; 00077 //NB: we can't possibly be at the beginning here, so no need to test 00078 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair 00079 if ( _utf16_surrogate_lead( lead_half ) ) { 00080 _seekFwd( 1 ); // if so, then advance 1 more code point 00081 } 00082 } 00083 } 00084 //-------------------------------------------------------------------------- 00085 void UString::_base_iterator::_movePrev() 00086 { 00087 _seekRev( 1 ); // move 1 code point backwards 00088 if ( _test_begin() ) return; // exit if we hit the beginning 00089 if ( _utf16_surrogate_follow( mIter[0] ) ) { 00090 // landing on a follow code point means we might be part of a bigger character 00091 // so we test for that 00092 code_point lead_half = 0; 00093 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair 00094 if ( _utf16_surrogate_lead( lead_half ) ) { 00095 _seekRev( 1 ); // if so, then rewind 1 more code point 00096 } 00097 } 00098 } 00099 //-------------------------------------------------------------------------- 00100 //-------------------------------------------------------------------------- 00101 //-------------------------------------------------------------------------- 00102 //-------------------------------------------------------------------------- 00103 UString::_fwd_iterator::_fwd_iterator() 00104 { 00105 00106 } 00107 //-------------------------------------------------------------------------- 00108 UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i ) 00109 { 00110 _become( i ); 00111 } 00112 //-------------------------------------------------------------------------- 00113 UString::_fwd_iterator& UString::_fwd_iterator::operator++() 00114 { 00115 _seekFwd( 1 ); 00116 return *this; 00117 } 00118 //-------------------------------------------------------------------------- 00119 UString::_fwd_iterator UString::_fwd_iterator::operator++( int ) 00120 { 00121 _fwd_iterator tmp( *this ); 00122 _seekFwd( 1 ); 00123 return tmp; 00124 } 00125 //-------------------------------------------------------------------------- 00126 UString::_fwd_iterator& UString::_fwd_iterator::operator--() 00127 { 00128 _seekRev( 1 ); 00129 return *this; 00130 } 00131 //-------------------------------------------------------------------------- 00132 UString::_fwd_iterator UString::_fwd_iterator::operator--( int ) 00133 { 00134 _fwd_iterator tmp( *this ); 00135 _seekRev( 1 ); 00136 return tmp; 00137 } 00138 //-------------------------------------------------------------------------- 00139 UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n ) 00140 { 00141 _fwd_iterator tmp( *this ); 00142 if ( n < 0 ) 00143 tmp._seekRev( -n ); 00144 else 00145 tmp._seekFwd( n ); 00146 return tmp; 00147 } 00148 //-------------------------------------------------------------------------- 00149 UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n ) 00150 { 00151 _fwd_iterator tmp( *this ); 00152 if ( n < 0 ) 00153 tmp._seekFwd( -n ); 00154 else 00155 tmp._seekRev( n ); 00156 return tmp; 00157 } 00158 //-------------------------------------------------------------------------- 00159 UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n ) 00160 { 00161 if ( n < 0 ) 00162 _seekRev( -n ); 00163 else 00164 _seekFwd( n ); 00165 return *this; 00166 } 00167 //-------------------------------------------------------------------------- 00168 UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n ) 00169 { 00170 if ( n < 0 ) 00171 _seekFwd( -n ); 00172 else 00173 _seekRev( n ); 00174 return *this; 00175 } 00176 //-------------------------------------------------------------------------- 00177 UString::value_type& UString::_fwd_iterator::operator*() const 00178 { 00179 return *mIter; 00180 } 00181 //-------------------------------------------------------------------------- 00182 UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const 00183 { 00184 _fwd_iterator tmp( *this ); 00185 tmp += n; 00186 return *tmp; 00187 } 00188 //-------------------------------------------------------------------------- 00189 UString::_fwd_iterator& UString::_fwd_iterator::moveNext() 00190 { 00191 _moveNext(); 00192 return *this; 00193 } 00194 //-------------------------------------------------------------------------- 00195 UString::_fwd_iterator& UString::_fwd_iterator::movePrev() 00196 { 00197 _movePrev(); 00198 return *this; 00199 } 00200 //-------------------------------------------------------------------------- 00201 UString::unicode_char UString::_fwd_iterator::getCharacter() const 00202 { 00203 return _getCharacter(); 00204 } 00205 //-------------------------------------------------------------------------- 00206 int UString::_fwd_iterator::setCharacter( unicode_char uc ) 00207 { 00208 return _setCharacter( uc ); 00209 } 00210 //-------------------------------------------------------------------------- 00211 //-------------------------------------------------------------------------- 00212 //-------------------------------------------------------------------------- 00213 //-------------------------------------------------------------------------- 00214 UString::_const_fwd_iterator::_const_fwd_iterator() 00215 { 00216 00217 } 00218 //-------------------------------------------------------------------------- 00219 UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i ) 00220 { 00221 _become( i ); 00222 } 00223 //-------------------------------------------------------------------------- 00224 UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i ) 00225 { 00226 _become( i ); 00227 } 00228 //-------------------------------------------------------------------------- 00229 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++() 00230 { 00231 _seekFwd( 1 ); 00232 return *this; 00233 } 00234 //-------------------------------------------------------------------------- 00235 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int ) 00236 { 00237 _const_fwd_iterator tmp( *this ); 00238 _seekFwd( 1 ); 00239 return tmp; 00240 } 00241 //-------------------------------------------------------------------------- 00242 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--() 00243 { 00244 _seekRev( 1 ); 00245 return *this; 00246 } 00247 //-------------------------------------------------------------------------- 00248 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int ) 00249 { 00250 _const_fwd_iterator tmp( *this ); 00251 _seekRev( 1 ); 00252 return tmp; 00253 } 00254 //-------------------------------------------------------------------------- 00255 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n ) 00256 { 00257 _const_fwd_iterator tmp( *this ); 00258 if ( n < 0 ) 00259 tmp._seekRev( -n ); 00260 else 00261 tmp._seekFwd( n ); 00262 return tmp; 00263 } 00264 //-------------------------------------------------------------------------- 00265 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n ) 00266 { 00267 _const_fwd_iterator tmp( *this ); 00268 if ( n < 0 ) 00269 tmp._seekFwd( -n ); 00270 else 00271 tmp._seekRev( n ); 00272 return tmp; 00273 } 00274 //-------------------------------------------------------------------------- 00275 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n ) 00276 { 00277 if ( n < 0 ) 00278 _seekRev( -n ); 00279 else 00280 _seekFwd( n ); 00281 return *this; 00282 } 00283 //-------------------------------------------------------------------------- 00284 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n ) 00285 { 00286 if ( n < 0 ) 00287 _seekFwd( -n ); 00288 else 00289 _seekRev( n ); 00290 return *this; 00291 } 00292 //-------------------------------------------------------------------------- 00293 const UString::value_type& UString::_const_fwd_iterator::operator*() const 00294 { 00295 return *mIter; 00296 } 00297 //-------------------------------------------------------------------------- 00298 const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const 00299 { 00300 _const_fwd_iterator tmp( *this ); 00301 tmp += n; 00302 return *tmp; 00303 } 00304 //-------------------------------------------------------------------------- 00305 UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext() 00306 { 00307 _moveNext(); 00308 return *this; 00309 } 00310 //-------------------------------------------------------------------------- 00311 UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev() 00312 { 00313 _movePrev(); 00314 return *this; 00315 } 00316 //-------------------------------------------------------------------------- 00317 UString::unicode_char UString::_const_fwd_iterator::getCharacter() const 00318 { 00319 return _getCharacter(); 00320 } 00321 //-------------------------------------------------------------------------- 00322 //-------------------------------------------------------------------------- 00323 //-------------------------------------------------------------------------- 00324 //-------------------------------------------------------------------------- 00325 UString::_rev_iterator::_rev_iterator() 00326 { 00327 00328 } 00329 //-------------------------------------------------------------------------- 00330 UString::_rev_iterator::_rev_iterator( const _rev_iterator& i ) 00331 { 00332 _become( i ); 00333 } 00334 //-------------------------------------------------------------------------- 00335 UString::_rev_iterator& UString::_rev_iterator::operator++() 00336 { 00337 _seekRev( 1 ); 00338 return *this; 00339 } 00340 //-------------------------------------------------------------------------- 00341 UString::_rev_iterator UString::_rev_iterator::operator++( int ) 00342 { 00343 _rev_iterator tmp( *this ); 00344 _seekRev( 1 ); 00345 return tmp; 00346 } 00347 //-------------------------------------------------------------------------- 00348 UString::_rev_iterator& UString::_rev_iterator::operator--() 00349 { 00350 _seekFwd( 1 ); 00351 return *this; 00352 } 00353 //-------------------------------------------------------------------------- 00354 UString::_rev_iterator UString::_rev_iterator::operator--( int ) 00355 { 00356 _rev_iterator tmp( *this ); 00357 _seekFwd( 1 ); 00358 return tmp; 00359 } 00360 //-------------------------------------------------------------------------- 00361 UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n ) 00362 { 00363 _rev_iterator tmp( *this ); 00364 if ( n < 0 ) 00365 tmp._seekFwd( -n ); 00366 else 00367 tmp._seekRev( n ); 00368 return tmp; 00369 } 00370 //-------------------------------------------------------------------------- 00371 UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n ) 00372 { 00373 _rev_iterator tmp( *this ); 00374 if ( n < 0 ) 00375 tmp._seekRev( -n ); 00376 else 00377 tmp._seekFwd( n ); 00378 return tmp; 00379 } 00380 //-------------------------------------------------------------------------- 00381 UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n ) 00382 { 00383 if ( n < 0 ) 00384 _seekFwd( -n ); 00385 else 00386 _seekRev( n ); 00387 return *this; 00388 } 00389 //-------------------------------------------------------------------------- 00390 UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n ) 00391 { 00392 if ( n < 0 ) 00393 _seekRev( -n ); 00394 else 00395 _seekFwd( n ); 00396 return *this; 00397 } 00398 //-------------------------------------------------------------------------- 00399 UString::value_type& UString::_rev_iterator::operator*() const 00400 { 00401 return mIter[-1]; 00402 } 00403 //-------------------------------------------------------------------------- 00404 UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const 00405 { 00406 _rev_iterator tmp( *this ); 00407 tmp -= n; 00408 return *tmp; 00409 } 00410 //-------------------------------------------------------------------------- 00411 //-------------------------------------------------------------------------- 00412 //-------------------------------------------------------------------------- 00413 //-------------------------------------------------------------------------- 00414 UString::_const_rev_iterator::_const_rev_iterator() 00415 { 00416 00417 } 00418 //-------------------------------------------------------------------------- 00419 UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i ) 00420 { 00421 _become( i ); 00422 } 00423 //-------------------------------------------------------------------------- 00424 UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i ) 00425 { 00426 _become( i ); 00427 } 00428 //-------------------------------------------------------------------------- 00429 UString::_const_rev_iterator& UString::_const_rev_iterator::operator++() 00430 { 00431 _seekRev( 1 ); 00432 return *this; 00433 } 00434 //-------------------------------------------------------------------------- 00435 UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int ) 00436 { 00437 _const_rev_iterator tmp( *this ); 00438 _seekRev( 1 ); 00439 return tmp; 00440 } 00441 //-------------------------------------------------------------------------- 00442 UString::_const_rev_iterator& UString::_const_rev_iterator::operator--() 00443 { 00444 _seekFwd( 1 ); 00445 return *this; 00446 } 00447 //-------------------------------------------------------------------------- 00448 UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int ) 00449 { 00450 _const_rev_iterator tmp( *this ); 00451 _seekFwd( 1 ); 00452 return tmp; 00453 } 00454 //-------------------------------------------------------------------------- 00455 UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n ) 00456 { 00457 _const_rev_iterator tmp( *this ); 00458 if ( n < 0 ) 00459 tmp._seekFwd( -n ); 00460 else 00461 tmp._seekRev( n ); 00462 return tmp; 00463 } 00464 //-------------------------------------------------------------------------- 00465 UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n ) 00466 { 00467 _const_rev_iterator tmp( *this ); 00468 if ( n < 0 ) 00469 tmp._seekRev( -n ); 00470 else 00471 tmp._seekFwd( n ); 00472 return tmp; 00473 } 00474 //-------------------------------------------------------------------------- 00475 UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n ) 00476 { 00477 if ( n < 0 ) 00478 _seekFwd( -n ); 00479 else 00480 _seekRev( n ); 00481 return *this; 00482 } 00483 //-------------------------------------------------------------------------- 00484 UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n ) 00485 { 00486 if ( n < 0 ) 00487 _seekRev( -n ); 00488 else 00489 _seekFwd( n ); 00490 return *this; 00491 } 00492 //-------------------------------------------------------------------------- 00493 const UString::value_type& UString::_const_rev_iterator::operator*() const 00494 { 00495 return mIter[-1]; 00496 } 00497 //-------------------------------------------------------------------------- 00498 const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const 00499 { 00500 _const_rev_iterator tmp( *this ); 00501 tmp -= n; 00502 return *tmp; 00503 } 00504 //-------------------------------------------------------------------------- 00505 //-------------------------------------------------------------------------- 00506 //-------------------------------------------------------------------------- 00507 //-------------------------------------------------------------------------- 00508 UString::UString() 00509 { 00510 _init(); 00511 } 00512 //-------------------------------------------------------------------------- 00513 UString::UString( const UString& copy ) 00514 { 00515 _init(); 00516 mData = copy.mData; 00517 } 00518 //-------------------------------------------------------------------------- 00519 UString::UString( size_type length, const code_point& ch ) 00520 { 00521 _init(); 00522 assign( length, ch ); 00523 } 00524 //-------------------------------------------------------------------------- 00525 UString::UString( const code_point* str ) 00526 { 00527 _init(); 00528 assign( str ); 00529 } 00530 //-------------------------------------------------------------------------- 00531 UString::UString( const code_point* str, size_type length ) 00532 { 00533 _init(); 00534 assign( str, length ); 00535 } 00536 //-------------------------------------------------------------------------- 00537 UString::UString( const UString& str, size_type index, size_type length ) 00538 { 00539 _init(); 00540 assign( str, index, length ); 00541 } 00542 //-------------------------------------------------------------------------- 00543 #if MYGUI_IS_NATIVE_WCHAR_T 00544 UString::UString( const wchar_t* w_str ) 00545 { 00546 _init(); 00547 assign( w_str ); 00548 } 00549 //-------------------------------------------------------------------------- 00550 UString::UString( const wchar_t* w_str, size_type length ) 00551 { 00552 _init(); 00553 assign( w_str, length ); 00554 } 00555 #endif 00556 //-------------------------------------------------------------------------- 00557 UString::UString( const std::wstring& wstr ) 00558 { 00559 _init(); 00560 assign( wstr ); 00561 } 00562 //-------------------------------------------------------------------------- 00563 UString::UString( const char* c_str ) 00564 { 00565 _init(); 00566 assign( c_str ); 00567 } 00568 //-------------------------------------------------------------------------- 00569 UString::UString( const char* c_str, size_type length ) 00570 { 00571 _init(); 00572 assign( c_str, length ); 00573 } 00574 //-------------------------------------------------------------------------- 00575 UString::UString( const std::string& str ) 00576 { 00577 _init(); 00578 assign( str ); 00579 } 00580 //-------------------------------------------------------------------------- 00581 UString::~UString() 00582 { 00583 _cleanBuffer(); 00584 } 00585 //-------------------------------------------------------------------------- 00586 UString::size_type UString::size() const 00587 { 00588 return mData.size(); 00589 } 00590 //-------------------------------------------------------------------------- 00591 UString::size_type UString::length() const 00592 { 00593 return size(); 00594 } 00595 //-------------------------------------------------------------------------- 00596 UString::size_type UString::length_Characters() const 00597 { 00598 const_iterator i = begin(), ie = end(); 00599 size_type c = 0; 00600 while ( i != ie ) { 00601 i.moveNext(); 00602 ++c; 00603 } 00604 return c; 00605 } 00606 //-------------------------------------------------------------------------- 00607 UString::size_type UString::max_size() const 00608 { 00609 return mData.max_size(); 00610 } 00611 //-------------------------------------------------------------------------- 00612 void UString::reserve( size_type size ) 00613 { 00614 mData.reserve( size ); 00615 } 00616 //-------------------------------------------------------------------------- 00617 void UString::resize( size_type num, const code_point& val /*= 0 */ ) 00618 { 00619 mData.resize( num, val ); 00620 } 00621 //-------------------------------------------------------------------------- 00622 void UString::swap( UString& from ) 00623 { 00624 mData.swap( from.mData ); 00625 } 00626 //-------------------------------------------------------------------------- 00627 bool UString::empty() const 00628 { 00629 return mData.empty(); 00630 } 00631 //-------------------------------------------------------------------------- 00632 const UString::code_point* UString::c_str() const 00633 { 00634 return mData.c_str(); 00635 } 00636 //-------------------------------------------------------------------------- 00637 const UString::code_point* UString::data() const 00638 { 00639 return c_str(); 00640 } 00641 //-------------------------------------------------------------------------- 00642 UString::size_type UString::capacity() const 00643 { 00644 return mData.capacity(); 00645 } 00646 //-------------------------------------------------------------------------- 00647 void UString::clear() 00648 { 00649 mData.clear(); 00650 } 00651 //-------------------------------------------------------------------------- 00652 UString UString::substr( size_type index, size_type num /*= npos */ ) const 00653 { 00654 // this could avoid the extra copy if we used a private specialty constructor 00655 dstring data = mData.substr( index, num ); 00656 UString tmp; 00657 tmp.mData.swap( data ); 00658 return tmp; 00659 } 00660 //-------------------------------------------------------------------------- 00661 void UString::push_back( unicode_char val ) 00662 { 00663 code_point cp[2]; 00664 size_t c = _utf32_to_utf16( val, cp ); 00665 if ( c > 0 ) push_back( cp[0] ); 00666 if ( c > 1 ) push_back( cp[1] ); 00667 } 00668 //-------------------------------------------------------------------------- 00669 #if MYGUI_IS_NATIVE_WCHAR_T 00670 void UString::push_back( wchar_t val ) 00671 { 00672 // we do this because the Unicode method still preserves UTF-16 code points 00673 mData.push_back( static_cast<code_point>( val ) ); 00674 } 00675 #endif 00676 //-------------------------------------------------------------------------- 00677 void UString::push_back( code_point val ) 00678 { 00679 mData.push_back( val ); 00680 } 00681 00682 void UString::push_back( char val ) 00683 { 00684 mData.push_back( static_cast<code_point>( val ) ); 00685 } 00686 00687 bool UString::inString( unicode_char ch ) const 00688 { 00689 const_iterator i, ie = end(); 00690 for ( i = begin(); i != ie; i.moveNext() ) { 00691 if ( i.getCharacter() == ch ) 00692 return true; 00693 } 00694 return false; 00695 } 00696 00697 const std::string& UString::asUTF8() const 00698 { 00699 _load_buffer_UTF8(); 00700 return *m_buffer.mStrBuffer; 00701 } 00702 00703 const char* UString::asUTF8_c_str() const 00704 { 00705 _load_buffer_UTF8(); 00706 return m_buffer.mStrBuffer->c_str(); 00707 } 00708 00709 const UString::utf32string& UString::asUTF32() const 00710 { 00711 _load_buffer_UTF32(); 00712 return *m_buffer.mUTF32StrBuffer; 00713 } 00714 00715 const UString::unicode_char* UString::asUTF32_c_str() const 00716 { 00717 _load_buffer_UTF32(); 00718 return m_buffer.mUTF32StrBuffer->c_str(); 00719 } 00720 00721 const std::wstring& UString::asWStr() const 00722 { 00723 _load_buffer_WStr(); 00724 return *m_buffer.mWStrBuffer; 00725 } 00726 00727 const wchar_t* UString::asWStr_c_str() const 00728 { 00729 _load_buffer_WStr(); 00730 return m_buffer.mWStrBuffer->c_str(); 00731 } 00732 00733 UString::code_point& UString::at( size_type loc ) 00734 { 00735 return mData.at( loc ); 00736 } 00737 00738 const UString::code_point& UString::at( size_type loc ) const 00739 { 00740 return mData.at( loc ); 00741 } 00742 00743 UString::unicode_char UString::getChar( size_type loc ) const 00744 { 00745 const code_point* ptr = c_str(); 00746 unicode_char uc; 00747 size_t l = _utf16_char_length( ptr[loc] ); 00748 code_point cp[2] = { /* blame the code beautifier */ 00749 0, 0 00750 }; 00751 cp[0] = ptr[loc]; 00752 00753 if ( l == 2 && ( loc + 1 ) < mData.length() ) { 00754 cp[1] = ptr[loc+1]; 00755 } 00756 _utf16_to_utf32( cp, uc ); 00757 return uc; 00758 } 00759 00760 int UString::setChar( size_type loc, unicode_char ch ) 00761 { 00762 code_point cp[2] = { /* blame the code beautifier */ 00763 0, 0 00764 }; 00765 size_t l = _utf32_to_utf16( ch, cp ); 00766 unicode_char existingChar = getChar( loc ); 00767 size_t existingSize = _utf16_char_length( existingChar ); 00768 size_t newSize = _utf16_char_length( ch ); 00769 00770 if ( newSize > existingSize ) { 00771 at( loc ) = cp[0]; 00772 insert( loc + 1, 1, cp[1] ); 00773 return 1; 00774 } 00775 if ( newSize < existingSize ) { 00776 erase( loc, 1 ); 00777 at( loc ) = cp[0]; 00778 return -1; 00779 } 00780 00781 // newSize == existingSize 00782 at( loc ) = cp[0]; 00783 if ( l == 2 ) at( loc + 1 ) = cp[1]; 00784 return 0; 00785 } 00786 00787 UString::iterator UString::begin() 00788 { 00789 iterator i; 00790 i.mIter = mData.begin(); 00791 i.mString = this; 00792 return i; 00793 } 00794 00795 UString::const_iterator UString::begin() const 00796 { 00797 const_iterator i; 00798 i.mIter = const_cast<UString*>( this )->mData.begin(); 00799 i.mString = const_cast<UString*>( this ); 00800 return i; 00801 } 00802 00803 UString::iterator UString::end() 00804 { 00805 iterator i; 00806 i.mIter = mData.end(); 00807 i.mString = this; 00808 return i; 00809 } 00810 00811 UString::const_iterator UString::end() const 00812 { 00813 const_iterator i; 00814 i.mIter = const_cast<UString*>( this )->mData.end(); 00815 i.mString = const_cast<UString*>( this ); 00816 return i; 00817 } 00818 00819 UString::reverse_iterator UString::rbegin() 00820 { 00821 reverse_iterator i; 00822 i.mIter = mData.end(); 00823 i.mString = this; 00824 return i; 00825 } 00826 00827 UString::const_reverse_iterator UString::rbegin() const 00828 { 00829 const_reverse_iterator i; 00830 i.mIter = const_cast<UString*>( this )->mData.end(); 00831 i.mString = const_cast<UString*>( this ); 00832 return i; 00833 } 00834 00835 UString::reverse_iterator UString::rend() 00836 { 00837 reverse_iterator i; 00838 i.mIter = mData.begin(); 00839 i.mString = this; 00840 return i; 00841 } 00842 00843 UString::const_reverse_iterator UString::rend() const 00844 { 00845 const_reverse_iterator i; 00846 i.mIter = const_cast<UString*>( this )->mData.begin(); 00847 i.mString = const_cast<UString*>( this ); 00848 return i; 00849 } 00850 00851 UString& UString::assign( iterator start, iterator end ) 00852 { 00853 mData.assign( start.mIter, end.mIter ); 00854 return *this; 00855 } 00856 00857 UString& UString::assign( const UString& str ) 00858 { 00859 mData.assign( str.mData ); 00860 return *this; 00861 } 00862 00863 UString& UString::assign( const code_point* str ) 00864 { 00865 mData.assign( str ); 00866 return *this; 00867 } 00868 00869 UString& UString::assign( const code_point* str, size_type num ) 00870 { 00871 mData.assign( str, num ); 00872 return *this; 00873 } 00874 00875 UString& UString::assign( const UString& str, size_type index, size_type len ) 00876 { 00877 mData.assign( str.mData, index, len ); 00878 return *this; 00879 } 00880 00881 UString& UString::assign( size_type num, const code_point& ch ) 00882 { 00883 mData.assign( num, ch ); 00884 return *this; 00885 } 00886 00887 UString& UString::assign( const std::wstring& wstr ) 00888 { 00889 mData.clear(); 00890 mData.reserve( wstr.length() ); // best guess bulk allocate 00891 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy 00892 code_point tmp; 00893 std::wstring::const_iterator i, ie = wstr.end(); 00894 for ( i = wstr.begin(); i != ie; i++ ) { 00895 tmp = static_cast<code_point>( *i ); 00896 mData.push_back( tmp ); 00897 } 00898 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower) 00899 code_point cp[3] = {0, 0, 0}; 00900 unicode_char tmp; 00901 std::wstring::const_iterator i, ie = wstr.end(); 00902 for ( i = wstr.begin(); i != ie; i++ ) { 00903 tmp = static_cast<unicode_char>( *i ); 00904 size_t l = _utf32_to_utf16( tmp, cp ); 00905 if ( l > 0 ) mData.push_back( cp[0] ); 00906 if ( l > 1 ) mData.push_back( cp[1] ); 00907 } 00908 #endif 00909 return *this; 00910 } 00911 00912 #if MYGUI_IS_NATIVE_WCHAR_T 00913 UString& UString::assign( const wchar_t* w_str ) 00914 { 00915 std::wstring tmp; 00916 tmp.assign( w_str ); 00917 return assign( tmp ); 00918 } 00919 00920 UString& UString::assign( const wchar_t* w_str, size_type num ) 00921 { 00922 std::wstring tmp; 00923 tmp.assign( w_str, num ); 00924 return assign( tmp ); 00925 } 00926 #endif 00927 00928 UString& UString::assign( const std::string& str ) 00929 { 00930 size_type len = _verifyUTF8( str ); 00931 clear(); // empty our contents, if there are any 00932 reserve( len ); // best guess bulk capacity growth 00933 00934 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32, 00935 // then converting it to UTF-16, then finally appending the data buffer 00936 00937 unicode_char uc; // temporary Unicode character buffer 00938 unsigned char utf8buf[7]; // temporary UTF-8 buffer 00939 utf8buf[6] = 0; 00940 size_t utf8len; // UTF-8 length 00941 code_point utf16buff[3]; // temporary UTF-16 buffer 00942 utf16buff[2] = 0; 00943 size_t utf16len; // UTF-16 length 00944 00945 std::string::const_iterator i, ie = str.end(); 00946 for ( i = str.begin(); i != ie; i++ ) { 00947 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load 00948 for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes 00949 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful) 00950 } 00951 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer 00952 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion 00953 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop 00954 00955 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion 00956 append( utf16buff, utf16len ); // append the characters to the string 00957 } 00958 return *this; 00959 } 00960 00961 UString& UString::assign( const char* c_str ) 00962 { 00963 std::string tmp( c_str ); 00964 return assign( tmp ); 00965 } 00966 00967 UString& UString::assign( const char* c_str, size_type num ) 00968 { 00969 std::string tmp; 00970 tmp.assign( c_str, num ); 00971 return assign( tmp ); 00972 } 00973 00974 UString& UString::append( const UString& str ) 00975 { 00976 mData.append( str.mData ); 00977 return *this; 00978 } 00979 00980 UString& UString::append( const code_point* str ) 00981 { 00982 mData.append( str ); 00983 return *this; 00984 } 00985 00986 UString& UString::append( const UString& str, size_type index, size_type len ) 00987 { 00988 mData.append( str.mData, index, len ); 00989 return *this; 00990 } 00991 00992 UString& UString::append( const code_point* str, size_type num ) 00993 { 00994 mData.append( str, num ); 00995 return *this; 00996 } 00997 00998 UString& UString::append( size_type num, code_point ch ) 00999 { 01000 mData.append( num, ch ); 01001 return *this; 01002 } 01003 01004 UString& UString::append( iterator start, iterator end ) 01005 { 01006 mData.append( start.mIter, end.mIter ); 01007 return *this; 01008 } 01009 01010 #if MYGUI_IS_NATIVE_WCHAR_T 01011 UString& UString::append( const wchar_t* w_str, size_type num ) 01012 { 01013 std::wstring tmp( w_str, num ); 01014 return append( tmp ); 01015 } 01016 01017 UString& UString::append( size_type num, wchar_t ch ) 01018 { 01019 return append( num, static_cast<unicode_char>( ch ) ); 01020 } 01021 #endif 01022 UString& UString::append( const char* c_str, size_type num ) 01023 { 01024 UString tmp( c_str, num ); 01025 append( tmp ); 01026 return *this; 01027 } 01028 01029 UString& UString::append( size_type num, char ch ) 01030 { 01031 append( num, static_cast<code_point>( ch ) ); 01032 return *this; 01033 } 01034 01035 UString& UString::append( size_type num, unicode_char ch ) 01036 { 01037 code_point cp[2] = {0, 0}; 01038 if ( _utf32_to_utf16( ch, cp ) == 2 ) { 01039 for ( size_type i = 0; i < num; i++ ) { 01040 append( 1, cp[0] ); 01041 append( 1, cp[1] ); 01042 } 01043 } else { 01044 for ( size_type i = 0; i < num; i++ ) { 01045 append( 1, cp[0] ); 01046 } 01047 } 01048 return *this; 01049 } 01050 01051 UString::iterator UString::insert( iterator i, const code_point& ch ) 01052 { 01053 iterator ret; 01054 ret.mIter = mData.insert( i.mIter, ch ); 01055 ret.mString = this; 01056 return ret; 01057 } 01058 01059 UString& UString::insert( size_type index, const UString& str ) 01060 { 01061 mData.insert( index, str.mData ); 01062 return *this; 01063 } 01064 01065 UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num ) 01066 { 01067 mData.insert( index1, str.mData, index2, num ); 01068 return *this; 01069 } 01070 01071 void UString::insert( iterator i, iterator start, iterator end ) 01072 { 01073 mData.insert( i.mIter, start.mIter, end.mIter ); 01074 } 01075 01076 UString& UString::insert( size_type index, const code_point* str, size_type num ) 01077 { 01078 mData.insert( index, str, num ); 01079 return *this; 01080 } 01081 01082 #if MYGUI_IS_NATIVE_WCHAR_T 01083 UString& UString::insert( size_type index, const wchar_t* w_str, size_type num ) 01084 { 01085 UString tmp( w_str, num ); 01086 insert( index, tmp ); 01087 return *this; 01088 } 01089 #endif 01090 01091 UString& UString::insert( size_type index, const char* c_str, size_type num ) 01092 { 01093 UString tmp( c_str, num ); 01094 insert( index, tmp ); 01095 return *this; 01096 } 01097 01098 UString& UString::insert( size_type index, size_type num, code_point ch ) 01099 { 01100 mData.insert( index, num, ch ); 01101 return *this; 01102 } 01103 01104 #if MYGUI_IS_NATIVE_WCHAR_T 01105 UString& UString::insert( size_type index, size_type num, wchar_t ch ) 01106 { 01107 insert( index, num, static_cast<unicode_char>( ch ) ); 01108 return *this; 01109 } 01110 #endif 01111 01112 UString& UString::insert( size_type index, size_type num, char ch ) 01113 { 01114 insert( index, num, static_cast<code_point>( ch ) ); 01115 return *this; 01116 } 01117 01118 UString& UString::insert( size_type index, size_type num, unicode_char ch ) 01119 { 01120 code_point cp[3] = {0, 0, 0}; 01121 size_t l = _utf32_to_utf16( ch, cp ); 01122 if ( l == 1 ) { 01123 return insert( index, num, cp[0] ); 01124 } 01125 for ( size_type c = 0; c < num; c++ ) { 01126 // insert in reverse order to preserve ordering after insert 01127 insert( index, 1, cp[1] ); 01128 insert( index, 1, cp[0] ); 01129 } 01130 return *this; 01131 } 01132 01133 void UString::insert( iterator i, size_type num, const code_point& ch ) 01134 { 01135 mData.insert( i.mIter, num, ch ); 01136 } 01137 #if MYGUI_IS_NATIVE_WCHAR_T 01138 void UString::insert( iterator i, size_type num, const wchar_t& ch ) 01139 { 01140 insert( i, num, static_cast<unicode_char>( ch ) ); 01141 } 01142 #endif 01143 01144 void UString::insert( iterator i, size_type num, const char& ch ) 01145 { 01146 insert( i, num, static_cast<code_point>( ch ) ); 01147 } 01148 01149 void UString::insert( iterator i, size_type num, const unicode_char& ch ) 01150 { 01151 code_point cp[3] = {0, 0, 0}; 01152 size_t l = _utf32_to_utf16( ch, cp ); 01153 if ( l == 1 ) { 01154 insert( i, num, cp[0] ); 01155 } else { 01156 for ( size_type c = 0; c < num; c++ ) { 01157 // insert in reverse order to preserve ordering after insert 01158 insert( i, 1, cp[1] ); 01159 insert( i, 1, cp[0] ); 01160 } 01161 } 01162 } 01163 01164 UString::iterator UString::erase( iterator loc ) 01165 { 01166 iterator ret; 01167 ret.mIter = mData.erase( loc.mIter ); 01168 ret.mString = this; 01169 return ret; 01170 } 01171 01172 UString::iterator UString::erase( iterator start, iterator end ) 01173 { 01174 iterator ret; 01175 ret.mIter = mData.erase( start.mIter, end.mIter ); 01176 ret.mString = this; 01177 return ret; 01178 } 01179 01180 UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ ) 01181 { 01182 if ( num == npos ) 01183 mData.erase( index ); 01184 else 01185 mData.erase( index, num ); 01186 return *this; 01187 } 01188 01189 UString& UString::replace( size_type index1, size_type num1, const UString& str ) 01190 { 01191 mData.replace( index1, num1, str.mData, 0, npos ); 01192 return *this; 01193 } 01194 01195 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 ) 01196 { 01197 mData.replace( index1, num1, str.mData, 0, num2 ); 01198 return *this; 01199 } 01200 01201 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 ) 01202 { 01203 mData.replace( index1, num1, str.mData, index2, num2 ); 01204 return *this; 01205 } 01206 01207 UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ ) 01208 { 01209 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01210 01211 size_type index1 = begin() - st; 01212 size_type num1 = end - st; 01213 return replace( index1, num1, str, 0, num ); 01214 } 01215 01216 UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch ) 01217 { 01218 mData.replace( index, num1, num2, ch ); 01219 return *this; 01220 } 01221 01222 UString& UString::replace( iterator start, iterator end, size_type num, code_point ch ) 01223 { 01224 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01225 01226 size_type index1 = begin() - st; 01227 size_type num1 = end - st; 01228 return replace( index1, num1, num, ch ); 01229 } 01230 01231 int UString::compare( const UString& str ) const 01232 { 01233 return mData.compare( str.mData ); 01234 } 01235 01236 int UString::compare( const code_point* str ) const 01237 { 01238 return mData.compare( str ); 01239 } 01240 01241 int UString::compare( size_type index, size_type length, const UString& str ) const 01242 { 01243 return mData.compare( index, length, str.mData ); 01244 } 01245 01246 int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const 01247 { 01248 return mData.compare( index, length, str.mData, index2, length2 ); 01249 } 01250 01251 int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const 01252 { 01253 return mData.compare( index, length, str, length2 ); 01254 } 01255 01256 #if MYGUI_IS_NATIVE_WCHAR_T 01257 int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const 01258 { 01259 UString tmp( w_str, length2 ); 01260 return compare( index, length, tmp ); 01261 } 01262 #endif 01263 01264 int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const 01265 { 01266 UString tmp( c_str, length2 ); 01267 return compare( index, length, tmp ); 01268 } 01269 01270 UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const 01271 { 01272 return mData.find( str.c_str(), index ); 01273 } 01274 01275 UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const 01276 { 01277 UString tmp( cp_str ); 01278 return mData.find( tmp.c_str(), index, length ); 01279 } 01280 01281 UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const 01282 { 01283 UString tmp( c_str ); 01284 return mData.find( tmp.c_str(), index, length ); 01285 } 01286 01287 #if MYGUI_IS_NATIVE_WCHAR_T 01288 UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const 01289 { 01290 UString tmp( w_str ); 01291 return mData.find( tmp.c_str(), index, length ); 01292 } 01293 #endif 01294 01295 UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const 01296 { 01297 return find( static_cast<code_point>( ch ), index ); 01298 } 01299 01300 UString::size_type UString::find( code_point ch, size_type index /*= 0 */ ) const 01301 { 01302 return mData.find( ch, index ); 01303 } 01304 01305 #if MYGUI_IS_NATIVE_WCHAR_T 01306 UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const 01307 { 01308 return find( static_cast<unicode_char>( ch ), index ); 01309 } 01310 #endif 01311 01312 UString::size_type UString::find( unicode_char ch, size_type index /*= 0 */ ) const 01313 { 01314 code_point cp[3] = {0, 0, 0}; 01315 size_t l = _utf32_to_utf16( ch, cp ); 01316 return find( UString( cp, l ), index ); 01317 } 01318 01319 UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const 01320 { 01321 return mData.rfind( str.c_str(), index ); 01322 } 01323 01324 UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const 01325 { 01326 UString tmp( cp_str ); 01327 return mData.rfind( tmp.c_str(), index, num ); 01328 } 01329 01330 UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const 01331 { 01332 UString tmp( c_str ); 01333 return mData.rfind( tmp.c_str(), index, num ); 01334 } 01335 01336 #if MYGUI_IS_NATIVE_WCHAR_T 01337 UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const 01338 { 01339 UString tmp( w_str ); 01340 return mData.rfind( tmp.c_str(), index, num ); 01341 } 01342 #endif 01343 01344 UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const 01345 { 01346 return rfind( static_cast<code_point>( ch ), index ); 01347 } 01348 01349 UString::size_type UString::rfind( code_point ch, size_type index ) const 01350 { 01351 return mData.rfind( ch, index ); 01352 } 01353 01354 #if MYGUI_IS_NATIVE_WCHAR_T 01355 UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const 01356 { 01357 return rfind( static_cast<unicode_char>( ch ), index ); 01358 } 01359 #endif 01360 01361 UString::size_type UString::rfind( unicode_char ch, size_type index /*= 0 */ ) const 01362 { 01363 code_point cp[3] = {0, 0, 0}; 01364 size_t l = _utf32_to_utf16( ch, cp ); 01365 return rfind( UString( cp, l ), index ); 01366 } 01367 01368 UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const 01369 { 01370 size_type i = 0; 01371 const size_type len = length(); 01372 while ( i < num && ( index + i ) < len ) { 01373 unicode_char ch = getChar( index + i ); 01374 if ( str.inString( ch ) ) 01375 return index + i; 01376 i += _utf16_char_length( ch ); // increment by the Unicode character length 01377 } 01378 return npos; 01379 } 01380 01381 UString::size_type UString::find_first_of( code_point ch, size_type index /*= 0 */ ) const 01382 { 01383 UString tmp; 01384 tmp.assign( 1, ch ); 01385 return find_first_of( tmp, index ); 01386 } 01387 01388 UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const 01389 { 01390 return find_first_of( static_cast<code_point>( ch ), index ); 01391 } 01392 01393 #if MYGUI_IS_NATIVE_WCHAR_T 01394 UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const 01395 { 01396 return find_first_of( static_cast<unicode_char>( ch ), index ); 01397 } 01398 #endif 01399 01400 UString::size_type UString::find_first_of( unicode_char ch, size_type index /*= 0 */ ) const 01401 { 01402 code_point cp[3] = {0, 0, 0}; 01403 size_t l = _utf32_to_utf16( ch, cp ); 01404 return find_first_of( UString( cp, l ), index ); 01405 } 01406 01407 UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const 01408 { 01409 size_type i = 0; 01410 const size_type len = length(); 01411 while ( i < num && ( index + i ) < len ) { 01412 unicode_char ch = getChar( index + i ); 01413 if ( !str.inString( ch ) ) 01414 return index + i; 01415 i += _utf16_char_length( ch ); // increment by the Unicode character length 01416 } 01417 return npos; 01418 } 01419 01420 UString::size_type UString::find_first_not_of( code_point ch, size_type index /*= 0 */ ) const 01421 { 01422 UString tmp; 01423 tmp.assign( 1, ch ); 01424 return find_first_not_of( tmp, index ); 01425 } 01426 01427 UString::size_type UString::find_first_not_of( char ch, size_type index /*= 0 */ ) const 01428 { 01429 return find_first_not_of( static_cast<code_point>( ch ), index ); 01430 } 01431 01432 #if MYGUI_IS_NATIVE_WCHAR_T 01433 UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const 01434 { 01435 return find_first_not_of( static_cast<unicode_char>( ch ), index ); 01436 } 01437 #endif 01438 01439 UString::size_type UString::find_first_not_of( unicode_char ch, size_type index /*= 0 */ ) const 01440 { 01441 code_point cp[3] = {0, 0, 0}; 01442 size_t l = _utf32_to_utf16( ch, cp ); 01443 return find_first_not_of( UString( cp, l ), index ); 01444 } 01445 01446 UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const 01447 { 01448 size_type i = 0; 01449 const size_type len = length(); 01450 if ( index > len ) index = len - 1; 01451 01452 while ( i < num && ( index - i ) != npos ) { 01453 size_type j = index - i; 01454 // careful to step full Unicode characters 01455 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) { 01456 j = index - ++i; 01457 } 01458 // and back to the usual dull test 01459 unicode_char ch = getChar( j ); 01460 if ( str.inString( ch ) ) 01461 return j; 01462 i++; 01463 } 01464 return npos; 01465 } 01466 01467 UString::size_type UString::find_last_of( code_point ch, size_type index /*= npos */ ) const 01468 { 01469 UString tmp; 01470 tmp.assign( 1, ch ); 01471 return find_last_of( tmp, index ); 01472 } 01473 01474 #if MYGUI_IS_NATIVE_WCHAR_T 01475 UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const 01476 { 01477 return find_last_of( static_cast<unicode_char>( ch ), index ); 01478 } 01479 #endif 01480 01481 UString::size_type UString::find_last_of( unicode_char ch, size_type index /*= npos */ ) const 01482 { 01483 code_point cp[3] = {0, 0, 0}; 01484 size_t l = _utf32_to_utf16( ch, cp ); 01485 return find_last_of( UString( cp, l ), index ); 01486 } 01487 01488 UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const 01489 { 01490 size_type i = 0; 01491 const size_type len = length(); 01492 if ( index > len ) index = len - 1; 01493 01494 while ( i < num && ( index - i ) != npos ) { 01495 size_type j = index - i; 01496 // careful to step full Unicode characters 01497 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) { 01498 j = index - ++i; 01499 } 01500 // and back to the usual dull test 01501 unicode_char ch = getChar( j ); 01502 if ( !str.inString( ch ) ) 01503 return j; 01504 i++; 01505 } 01506 return npos; 01507 } 01508 01509 UString::size_type UString::find_last_not_of( code_point ch, size_type index /*= npos */ ) const 01510 { 01511 UString tmp; 01512 tmp.assign( 1, ch ); 01513 return find_last_not_of( tmp, index ); 01514 } 01515 01516 UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const 01517 { 01518 return find_last_not_of( static_cast<code_point>( ch ), index ); 01519 } 01520 01521 #if MYGUI_IS_NATIVE_WCHAR_T 01522 UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const 01523 { 01524 return find_last_not_of( static_cast<unicode_char>( ch ), index ); 01525 } 01526 #endif 01527 01528 UString::size_type UString::find_last_not_of( unicode_char ch, size_type index /*= npos */ ) const 01529 { 01530 code_point cp[3] = {0, 0, 0}; 01531 size_t l = _utf32_to_utf16( ch, cp ); 01532 return find_last_not_of( UString( cp, l ), index ); 01533 } 01534 01535 bool UString::operator<( const UString& right ) const 01536 { 01537 return compare( right ) < 0; 01538 } 01539 01540 bool UString::operator<=( const UString& right ) const 01541 { 01542 return compare( right ) <= 0; 01543 } 01544 01545 UString& UString::operator=( const UString& s ) 01546 { 01547 return assign( s ); 01548 } 01549 01550 UString& UString::operator=( code_point ch ) 01551 { 01552 clear(); 01553 return append( 1, ch ); 01554 } 01555 01556 UString& UString::operator=( char ch ) 01557 { 01558 clear(); 01559 return append( 1, ch ); 01560 } 01561 01562 #if MYGUI_IS_NATIVE_WCHAR_T 01563 UString& UString::operator=( wchar_t ch ) 01564 { 01565 clear(); 01566 return append( 1, ch ); 01567 } 01568 #endif 01569 01570 UString& UString::operator=( unicode_char ch ) 01571 { 01572 clear(); 01573 return append( 1, ch ); 01574 } 01575 01576 bool UString::operator>( const UString& right ) const 01577 { 01578 return compare( right ) > 0; 01579 } 01580 01581 bool UString::operator>=( const UString& right ) const 01582 { 01583 return compare( right ) >= 0; 01584 } 01585 01586 bool UString::operator==( const UString& right ) const 01587 { 01588 return compare( right ) == 0; 01589 } 01590 01591 bool UString::operator!=( const UString& right ) const 01592 { 01593 return !operator==( right ); 01594 } 01595 01596 UString::code_point& UString::operator[]( size_type index ) 01597 { 01598 return at( index ); 01599 } 01600 01601 const UString::code_point& UString::operator[]( size_type index ) const 01602 { 01603 return at( index ); 01604 } 01605 01606 UString::operator std::string() const 01607 { 01608 return std::string( asUTF8() ); 01609 } 01610 01612 UString::operator std::wstring() const 01613 { 01614 return std::wstring( asWStr() ); 01615 } 01616 01617 01618 bool UString::_utf16_independent_char( code_point cp ) 01619 { 01620 if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range 01621 return false; // it matches a surrogate pair signature 01622 return true; // everything else is a standalone code point 01623 } 01624 01625 bool UString::_utf16_surrogate_lead( code_point cp ) 01626 { 01627 if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair 01628 return true; // it is a 1st word 01629 return false; // it isn't 01630 } 01631 01632 bool UString::_utf16_surrogate_follow( code_point cp ) 01633 { 01634 if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair 01635 return true; // it is a 2nd word 01636 return false; // everything else isn't 01637 } 01638 01639 size_t UString::_utf16_char_length( code_point cp ) 01640 { 01641 if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair 01642 return 2; // if it is, then we are 2 words long 01643 return 1; // otherwise we are only 1 word long 01644 } 01645 01646 size_t UString::_utf16_char_length( unicode_char uc ) 01647 { 01648 if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum 01649 return 2; // if so, we need a surrogate pair 01650 return 1; // otherwise we can stuff it into a single word 01651 } 01652 01653 size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc ) 01654 { 01655 const code_point& cp1 = in_cp[0]; 01656 const code_point& cp2 = in_cp[1]; 01657 bool wordPair = false; 01658 01659 // does it look like a surrogate pair? 01660 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) { 01661 // looks like one, but does the other half match the algorithm as well? 01662 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF ) 01663 wordPair = true; // yep! 01664 } 01665 01666 if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value 01667 out_uc = cp1; 01668 return 1; 01669 } 01670 01671 unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers 01672 cU -= 0xD800; // remove the encoding markers 01673 cL -= 0xDC00; 01674 01675 out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location 01676 out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits 01677 out_uc += 0x10000; // add back in the value offset 01678 01679 return 2; // this whole operation takes to words, so that's what we'll return 01680 } 01681 01682 size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] ) 01683 { 01684 if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them 01685 out_cp[0] = static_cast<code_point>(in_uc); 01686 return 1; 01687 } 01688 unicode_char uc = in_uc; // copy to writable buffer 01689 unsigned short tmp; // single code point buffer 01690 uc -= 0x10000; // subtract value offset 01691 01692 //process upper word 01693 tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits 01694 tmp += 0xD800; // add encoding offset 01695 out_cp[0] = tmp; // write 01696 01697 // process lower word 01698 tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits 01699 tmp += 0xDC00; // add encoding offset 01700 out_cp[1] = tmp; // write 01701 01702 return 2; // return used word count (2 for surrogate pairs) 01703 } 01704 01705 bool UString::_utf8_start_char( unsigned char cp ) 01706 { 01707 return ( cp & ~_cont_mask ) != _cont; 01708 } 01709 01710 size_t UString::_utf8_char_length( unsigned char cp ) 01711 { 01712 if ( !( cp & 0x80 ) ) return 1; 01713 if (( cp & ~_lead1_mask ) == _lead1 ) return 2; 01714 if (( cp & ~_lead2_mask ) == _lead2 ) return 3; 01715 if (( cp & ~_lead3_mask ) == _lead3 ) return 4; 01716 if (( cp & ~_lead4_mask ) == _lead4 ) return 5; 01717 if (( cp & ~_lead5_mask ) == _lead5 ) return 6; 01718 01719 return 1; 01720 //throw invalid_data( "invalid UTF-8 sequence header value" ); 01721 } 01722 01723 size_t UString::_utf8_char_length( unicode_char uc ) 01724 { 01725 /* 01726 7 bit: U-00000000 - U-0000007F: 0xxxxxxx 01727 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 01728 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 01729 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 01730 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 01731 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 01732 */ 01733 if ( !( uc & ~0x0000007F ) ) return 1; 01734 if ( !( uc & ~0x000007FF ) ) return 2; 01735 if ( !( uc & ~0x0000FFFF ) ) return 3; 01736 if ( !( uc & ~0x001FFFFF ) ) return 4; 01737 if ( !( uc & ~0x03FFFFFF ) ) return 5; 01738 if ( !( uc & ~0x7FFFFFFF ) ) return 6; 01739 01740 return 1; 01741 //throw invalid_data( "invalid UTF-32 value" ); 01742 } 01743 01744 size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc ) 01745 { 01746 size_t len = _utf8_char_length( in_cp[0] ); 01747 if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit 01748 out_uc = in_cp[0]; 01749 return 1; 01750 } 01751 01752 unicode_char c = 0; // temporary buffer 01753 size_t i = 0; 01754 switch ( len ) { // load header byte 01755 case 6: 01756 c = in_cp[i] & _lead5_mask; 01757 break; 01758 case 5: 01759 c = in_cp[i] & _lead4_mask; 01760 break; 01761 case 4: 01762 c = in_cp[i] & _lead3_mask; 01763 break; 01764 case 3: 01765 c = in_cp[i] & _lead2_mask; 01766 break; 01767 case 2: 01768 c = in_cp[i] & _lead1_mask; 01769 break; 01770 } 01771 01772 // load each continuation byte 01773 for ( ++i; i < len; i++ ) 01774 { 01775 if (( in_cp[i] & ~_cont_mask ) != _cont ) 01776 { 01777 //throw invalid_data( "bad UTF-8 continuation byte" ); 01778 out_uc = in_cp[0]; 01779 return 1; 01780 } 01781 c <<= 6; 01782 c |= ( in_cp[i] & _cont_mask ); 01783 } 01784 01785 out_uc = c; // write the final value and return the used byte length 01786 return len; 01787 } 01788 01789 size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] ) 01790 { 01791 size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence 01792 unicode_char c = in_uc; // copy to temp buffer 01793 01794 //stuff all of the lower bits 01795 for ( size_t i = len - 1; i > 0; i-- ) { 01796 out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont); 01797 c >>= 6; 01798 } 01799 01800 //now write the header byte 01801 switch ( len ) { 01802 case 6: 01803 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5); 01804 break; 01805 case 5: 01806 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4); 01807 break; 01808 case 4: 01809 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3); 01810 break; 01811 case 3: 01812 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2); 01813 break; 01814 case 2: 01815 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1); 01816 break; 01817 case 1: 01818 default: 01819 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F); 01820 break; 01821 } 01822 01823 // return the byte length of the sequence 01824 return len; 01825 } 01826 01827 UString::size_type UString::_verifyUTF8( const unsigned char* c_str ) 01828 { 01829 std::string tmp( reinterpret_cast<const char*>( c_str ) ); 01830 return _verifyUTF8( tmp ); 01831 } 01832 01833 UString::size_type UString::_verifyUTF8( const std::string& str ) 01834 { 01835 std::string::const_iterator i, ie = str.end(); 01836 i = str.begin(); 01837 size_type length = 0; 01838 01839 while ( i != ie ) { 01840 // characters pass until we find an extended sequence 01841 if (( *i ) & 0x80 ) { 01842 unsigned char c = ( *i ); 01843 size_t contBytes = 0; 01844 01845 // get continuation byte count and test for overlong sequences 01846 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte 01847 if ( c == _lead1 ) 01848 { 01849 //throw invalid_data( "overlong UTF-8 sequence" ); 01850 return str.size(); 01851 } 01852 contBytes = 1; 01853 01854 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes 01855 contBytes = 2; 01856 if ( c == _lead2 ) { // possible overlong UTF-8 sequence 01857 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01858 if (( c & _lead2 ) == _cont ) 01859 { 01860 //throw invalid_data( "overlong UTF-8 sequence" ); 01861 return str.size(); 01862 } 01863 } 01864 01865 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes 01866 contBytes = 3; 01867 if ( c == _lead3 ) { // possible overlong UTF-8 sequence 01868 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01869 if (( c & _lead3 ) == _cont ) 01870 { 01871 //throw invalid_data( "overlong UTF-8 sequence" ); 01872 return str.size(); 01873 } 01874 } 01875 01876 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes 01877 contBytes = 4; 01878 if ( c == _lead4 ) { // possible overlong UTF-8 sequence 01879 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01880 if (( c & _lead4 ) == _cont ) 01881 { 01882 //throw invalid_data( "overlong UTF-8 sequence" ); 01883 return str.size(); 01884 } 01885 } 01886 01887 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes 01888 contBytes = 5; 01889 if ( c == _lead5 ) { // possible overlong UTF-8 sequence 01890 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01891 if (( c & _lead5 ) == _cont ) 01892 { 01893 //throw invalid_data( "overlong UTF-8 sequence" ); 01894 return str.size(); 01895 } 01896 } 01897 } 01898 01899 // check remaining continuation bytes for 01900 while ( contBytes-- ) { 01901 c = ( *( ++i ) ); // get next byte in sequence 01902 if (( c & ~_cont_mask ) != _cont ) 01903 { 01904 //throw invalid_data( "bad UTF-8 continuation byte" ); 01905 return str.size(); 01906 } 01907 } 01908 } 01909 length++; 01910 i++; 01911 } 01912 return length; 01913 } 01914 01915 void UString::_init() 01916 { 01917 m_buffer.mVoidBuffer = 0; 01918 m_bufferType = bt_none; 01919 m_bufferSize = 0; 01920 } 01921 01922 void UString::_cleanBuffer() const 01923 { 01924 if ( m_buffer.mVoidBuffer != 0 ) { 01925 switch ( m_bufferType ) { 01926 case bt_string: 01927 delete m_buffer.mStrBuffer; 01928 break; 01929 case bt_wstring: 01930 delete m_buffer.mWStrBuffer; 01931 break; 01932 case bt_utf32string: 01933 delete m_buffer.mUTF32StrBuffer; 01934 break; 01935 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out 01936 default: 01937 //delete m_buffer.mVoidBuffer; 01938 // delete void* is undefined, don't do that 01939 assert("This should never happen - mVoidBuffer should never contain something if we " 01940 "don't know the type"); 01941 break; 01942 } 01943 m_buffer.mVoidBuffer = 0; 01944 m_bufferSize = 0; 01945 m_bufferType = bt_none; 01946 } 01947 } 01948 01949 void UString::_getBufferStr() const 01950 { 01951 if ( m_bufferType != bt_string ) { 01952 _cleanBuffer(); 01953 m_buffer.mStrBuffer = new std::string(); 01954 m_bufferType = bt_string; 01955 } 01956 m_buffer.mStrBuffer->clear(); 01957 } 01958 01959 void UString::_getBufferWStr() const 01960 { 01961 if ( m_bufferType != bt_wstring ) { 01962 _cleanBuffer(); 01963 m_buffer.mWStrBuffer = new std::wstring(); 01964 m_bufferType = bt_wstring; 01965 } 01966 m_buffer.mWStrBuffer->clear(); 01967 } 01968 01969 void UString::_getBufferUTF32Str() const 01970 { 01971 if ( m_bufferType != bt_utf32string ) { 01972 _cleanBuffer(); 01973 m_buffer.mUTF32StrBuffer = new utf32string(); 01974 m_bufferType = bt_utf32string; 01975 } 01976 m_buffer.mUTF32StrBuffer->clear(); 01977 } 01978 01979 void UString::_load_buffer_UTF8() const 01980 { 01981 _getBufferStr(); 01982 std::string& buffer = ( *m_buffer.mStrBuffer ); 01983 buffer.reserve( length() ); 01984 01985 unsigned char utf8buf[6]; 01986 char* charbuf = ( char* )utf8buf; 01987 unicode_char c; 01988 size_t len; 01989 01990 const_iterator i, ie = end(); 01991 for ( i = begin(); i != ie; i.moveNext() ) { 01992 c = i.getCharacter(); 01993 len = _utf32_to_utf8( c, utf8buf ); 01994 size_t j = 0; 01995 while ( j < len ) 01996 buffer.push_back( charbuf[j++] ); 01997 } 01998 } 01999 02000 void UString::_load_buffer_WStr() const 02001 { 02002 _getBufferWStr(); 02003 std::wstring& buffer = ( *m_buffer.mWStrBuffer ); 02004 buffer.reserve( length() ); // may over reserve, but should be close enough 02005 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16 02006 const_iterator i, ie = end(); 02007 for ( i = begin(); i != ie; ++i ) { 02008 buffer.push_back(( wchar_t )( *i ) ); 02009 } 02010 #else // wchar_t fits UTF-32 02011 unicode_char c; 02012 const_iterator i, ie = end(); 02013 for ( i = begin(); i != ie; i.moveNext() ) { 02014 c = i.getCharacter(); 02015 buffer.push_back(( wchar_t )c ); 02016 } 02017 #endif 02018 } 02019 02020 void UString::_load_buffer_UTF32() const 02021 { 02022 _getBufferUTF32Str(); 02023 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer ); 02024 buffer.reserve( length() ); // may over reserve, but should be close enough 02025 02026 unicode_char c; 02027 02028 const_iterator i, ie = end(); 02029 for ( i = begin(); i != ie; i.moveNext() ) { 02030 c = i.getCharacter(); 02031 buffer.push_back( c ); 02032 } 02033 } 02034 02035 } // namespace MyGUI