MyGUI
3.0.3
|
00001 00006 /* 00007 This file is part of MyGUI. 00008 00009 MyGUI is free software: you can redistribute it and/or modify 00010 it under the terms of the GNU Lesser General Public License as published by 00011 the Free Software Foundation, either version 3 of the License, or 00012 (at your option) any later version. 00013 00014 MyGUI is distributed in the hope that it will be useful, 00015 but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00017 GNU Lesser General Public License for more details. 00018 00019 You should have received a copy of the GNU Lesser General Public License 00020 along with MyGUI. If not, see <http://www.gnu.org/licenses/>. 00021 */ 00022 #include "MyGUI_Precompiled.h" 00023 #include "MyGUI_UString.h" 00024 00025 namespace MyGUI 00026 { 00027 00028 //-------------------------------------------------------------------------- 00029 UString::_base_iterator::_base_iterator() 00030 { 00031 mString = 0; 00032 } 00033 //-------------------------------------------------------------------------- 00034 void UString::_base_iterator::_seekFwd( size_type c ) 00035 { 00036 mIter += c; 00037 } 00038 //-------------------------------------------------------------------------- 00039 void UString::_base_iterator::_seekRev( size_type c ) 00040 { 00041 mIter -= c; 00042 } 00043 //-------------------------------------------------------------------------- 00044 void UString::_base_iterator::_become( const _base_iterator& i ) 00045 { 00046 mIter = i.mIter; 00047 mString = i.mString; 00048 } 00049 //-------------------------------------------------------------------------- 00050 bool UString::_base_iterator::_test_begin() const 00051 { 00052 return mIter == mString->mData.begin(); 00053 } 00054 //-------------------------------------------------------------------------- 00055 bool UString::_base_iterator::_test_end() const 00056 { 00057 return mIter == mString->mData.end(); 00058 } 00059 //-------------------------------------------------------------------------- 00060 UString::size_type UString::_base_iterator::_get_index() const 00061 { 00062 return mIter - mString->mData.begin(); 00063 } 00064 //-------------------------------------------------------------------------- 00065 void UString::_base_iterator::_jump_to( size_type index ) 00066 { 00067 mIter = mString->mData.begin() + index; 00068 } 00069 //-------------------------------------------------------------------------- 00070 UString::unicode_char UString::_base_iterator::_getCharacter() const 00071 { 00072 size_type current_index = _get_index(); 00073 return mString->getChar( current_index ); 00074 } 00075 //-------------------------------------------------------------------------- 00076 int UString::_base_iterator::_setCharacter( unicode_char uc ) 00077 { 00078 size_type current_index = _get_index(); 00079 int change = mString->setChar( current_index, uc ); 00080 _jump_to( current_index ); 00081 return change; 00082 } 00083 //-------------------------------------------------------------------------- 00084 void UString::_base_iterator::_moveNext() 00085 { 00086 _seekFwd( 1 ); // move 1 code point forward 00087 if ( _test_end() ) return; // exit if we hit the end 00088 if ( _utf16_surrogate_follow( mIter[0] ) ) { 00089 // landing on a follow code point means we might be part of a bigger character 00090 // so we test for that 00091 code_point lead_half = 0; 00092 //NB: we can't possibly be at the beginning here, so no need to test 00093 lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair 00094 if ( _utf16_surrogate_lead( lead_half ) ) { 00095 _seekFwd( 1 ); // if so, then advance 1 more code point 00096 } 00097 } 00098 } 00099 //-------------------------------------------------------------------------- 00100 void UString::_base_iterator::_movePrev() 00101 { 00102 _seekRev( 1 ); // move 1 code point backwards 00103 if ( _test_begin() ) return; // exit if we hit the beginning 00104 if ( _utf16_surrogate_follow( mIter[0] ) ) { 00105 // landing on a follow code point means we might be part of a bigger character 00106 // so we test for that 00107 code_point lead_half = 0; 00108 lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair 00109 if ( _utf16_surrogate_lead( lead_half ) ) { 00110 _seekRev( 1 ); // if so, then rewind 1 more code point 00111 } 00112 } 00113 } 00114 //-------------------------------------------------------------------------- 00115 //-------------------------------------------------------------------------- 00116 //-------------------------------------------------------------------------- 00117 //-------------------------------------------------------------------------- 00118 UString::_fwd_iterator::_fwd_iterator() 00119 { 00120 00121 } 00122 //-------------------------------------------------------------------------- 00123 UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i ) 00124 { 00125 _become( i ); 00126 } 00127 //-------------------------------------------------------------------------- 00128 UString::_fwd_iterator& UString::_fwd_iterator::operator++() 00129 { 00130 _seekFwd( 1 ); 00131 return *this; 00132 } 00133 //-------------------------------------------------------------------------- 00134 UString::_fwd_iterator UString::_fwd_iterator::operator++( int ) 00135 { 00136 _fwd_iterator tmp( *this ); 00137 _seekFwd( 1 ); 00138 return tmp; 00139 } 00140 //-------------------------------------------------------------------------- 00141 UString::_fwd_iterator& UString::_fwd_iterator::operator--() 00142 { 00143 _seekRev( 1 ); 00144 return *this; 00145 } 00146 //-------------------------------------------------------------------------- 00147 UString::_fwd_iterator UString::_fwd_iterator::operator--( int ) 00148 { 00149 _fwd_iterator tmp( *this ); 00150 _seekRev( 1 ); 00151 return tmp; 00152 } 00153 //-------------------------------------------------------------------------- 00154 UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n ) 00155 { 00156 _fwd_iterator tmp( *this ); 00157 if ( n < 0 ) 00158 tmp._seekRev( -n ); 00159 else 00160 tmp._seekFwd( n ); 00161 return tmp; 00162 } 00163 //-------------------------------------------------------------------------- 00164 UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n ) 00165 { 00166 _fwd_iterator tmp( *this ); 00167 if ( n < 0 ) 00168 tmp._seekFwd( -n ); 00169 else 00170 tmp._seekRev( n ); 00171 return tmp; 00172 } 00173 //-------------------------------------------------------------------------- 00174 UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n ) 00175 { 00176 if ( n < 0 ) 00177 _seekRev( -n ); 00178 else 00179 _seekFwd( n ); 00180 return *this; 00181 } 00182 //-------------------------------------------------------------------------- 00183 UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n ) 00184 { 00185 if ( n < 0 ) 00186 _seekFwd( -n ); 00187 else 00188 _seekRev( n ); 00189 return *this; 00190 } 00191 //-------------------------------------------------------------------------- 00192 UString::value_type& UString::_fwd_iterator::operator*() const 00193 { 00194 return *mIter; 00195 } 00196 //-------------------------------------------------------------------------- 00197 UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const 00198 { 00199 _fwd_iterator tmp( *this ); 00200 tmp += n; 00201 return *tmp; 00202 } 00203 //-------------------------------------------------------------------------- 00204 UString::_fwd_iterator& UString::_fwd_iterator::moveNext() 00205 { 00206 _moveNext(); 00207 return *this; 00208 } 00209 //-------------------------------------------------------------------------- 00210 UString::_fwd_iterator& UString::_fwd_iterator::movePrev() 00211 { 00212 _movePrev(); 00213 return *this; 00214 } 00215 //-------------------------------------------------------------------------- 00216 UString::unicode_char UString::_fwd_iterator::getCharacter() const 00217 { 00218 return _getCharacter(); 00219 } 00220 //-------------------------------------------------------------------------- 00221 int UString::_fwd_iterator::setCharacter( unicode_char uc ) 00222 { 00223 return _setCharacter( uc ); 00224 } 00225 //-------------------------------------------------------------------------- 00226 //-------------------------------------------------------------------------- 00227 //-------------------------------------------------------------------------- 00228 //-------------------------------------------------------------------------- 00229 UString::_const_fwd_iterator::_const_fwd_iterator() 00230 { 00231 00232 } 00233 //-------------------------------------------------------------------------- 00234 UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i ) 00235 { 00236 _become( i ); 00237 } 00238 //-------------------------------------------------------------------------- 00239 UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i ) 00240 { 00241 _become( i ); 00242 } 00243 //-------------------------------------------------------------------------- 00244 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++() 00245 { 00246 _seekFwd( 1 ); 00247 return *this; 00248 } 00249 //-------------------------------------------------------------------------- 00250 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int ) 00251 { 00252 _const_fwd_iterator tmp( *this ); 00253 _seekFwd( 1 ); 00254 return tmp; 00255 } 00256 //-------------------------------------------------------------------------- 00257 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--() 00258 { 00259 _seekRev( 1 ); 00260 return *this; 00261 } 00262 //-------------------------------------------------------------------------- 00263 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int ) 00264 { 00265 _const_fwd_iterator tmp( *this ); 00266 _seekRev( 1 ); 00267 return tmp; 00268 } 00269 //-------------------------------------------------------------------------- 00270 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n ) 00271 { 00272 _const_fwd_iterator tmp( *this ); 00273 if ( n < 0 ) 00274 tmp._seekRev( -n ); 00275 else 00276 tmp._seekFwd( n ); 00277 return tmp; 00278 } 00279 //-------------------------------------------------------------------------- 00280 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n ) 00281 { 00282 _const_fwd_iterator tmp( *this ); 00283 if ( n < 0 ) 00284 tmp._seekFwd( -n ); 00285 else 00286 tmp._seekRev( n ); 00287 return tmp; 00288 } 00289 //-------------------------------------------------------------------------- 00290 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n ) 00291 { 00292 if ( n < 0 ) 00293 _seekRev( -n ); 00294 else 00295 _seekFwd( n ); 00296 return *this; 00297 } 00298 //-------------------------------------------------------------------------- 00299 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n ) 00300 { 00301 if ( n < 0 ) 00302 _seekFwd( -n ); 00303 else 00304 _seekRev( n ); 00305 return *this; 00306 } 00307 //-------------------------------------------------------------------------- 00308 const UString::value_type& UString::_const_fwd_iterator::operator*() const 00309 { 00310 return *mIter; 00311 } 00312 //-------------------------------------------------------------------------- 00313 const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const 00314 { 00315 _const_fwd_iterator tmp( *this ); 00316 tmp += n; 00317 return *tmp; 00318 } 00319 //-------------------------------------------------------------------------- 00320 UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext() 00321 { 00322 _moveNext(); 00323 return *this; 00324 } 00325 //-------------------------------------------------------------------------- 00326 UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev() 00327 { 00328 _movePrev(); 00329 return *this; 00330 } 00331 //-------------------------------------------------------------------------- 00332 UString::unicode_char UString::_const_fwd_iterator::getCharacter() const 00333 { 00334 return _getCharacter(); 00335 } 00336 //-------------------------------------------------------------------------- 00337 //-------------------------------------------------------------------------- 00338 //-------------------------------------------------------------------------- 00339 //-------------------------------------------------------------------------- 00340 UString::_rev_iterator::_rev_iterator() 00341 { 00342 00343 } 00344 //-------------------------------------------------------------------------- 00345 UString::_rev_iterator::_rev_iterator( const _rev_iterator& i ) 00346 { 00347 _become( i ); 00348 } 00349 //-------------------------------------------------------------------------- 00350 UString::_rev_iterator& UString::_rev_iterator::operator++() 00351 { 00352 _seekRev( 1 ); 00353 return *this; 00354 } 00355 //-------------------------------------------------------------------------- 00356 UString::_rev_iterator UString::_rev_iterator::operator++( int ) 00357 { 00358 _rev_iterator tmp( *this ); 00359 _seekRev( 1 ); 00360 return tmp; 00361 } 00362 //-------------------------------------------------------------------------- 00363 UString::_rev_iterator& UString::_rev_iterator::operator--() 00364 { 00365 _seekFwd( 1 ); 00366 return *this; 00367 } 00368 //-------------------------------------------------------------------------- 00369 UString::_rev_iterator UString::_rev_iterator::operator--( int ) 00370 { 00371 _rev_iterator tmp( *this ); 00372 _seekFwd( 1 ); 00373 return tmp; 00374 } 00375 //-------------------------------------------------------------------------- 00376 UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n ) 00377 { 00378 _rev_iterator tmp( *this ); 00379 if ( n < 0 ) 00380 tmp._seekFwd( -n ); 00381 else 00382 tmp._seekRev( n ); 00383 return tmp; 00384 } 00385 //-------------------------------------------------------------------------- 00386 UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n ) 00387 { 00388 _rev_iterator tmp( *this ); 00389 if ( n < 0 ) 00390 tmp._seekRev( -n ); 00391 else 00392 tmp._seekFwd( n ); 00393 return tmp; 00394 } 00395 //-------------------------------------------------------------------------- 00396 UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n ) 00397 { 00398 if ( n < 0 ) 00399 _seekFwd( -n ); 00400 else 00401 _seekRev( n ); 00402 return *this; 00403 } 00404 //-------------------------------------------------------------------------- 00405 UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n ) 00406 { 00407 if ( n < 0 ) 00408 _seekRev( -n ); 00409 else 00410 _seekFwd( n ); 00411 return *this; 00412 } 00413 //-------------------------------------------------------------------------- 00414 UString::value_type& UString::_rev_iterator::operator*() const 00415 { 00416 return mIter[-1]; 00417 } 00418 //-------------------------------------------------------------------------- 00419 UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const 00420 { 00421 _rev_iterator tmp( *this ); 00422 tmp -= n; 00423 return *tmp; 00424 } 00425 //-------------------------------------------------------------------------- 00426 //-------------------------------------------------------------------------- 00427 //-------------------------------------------------------------------------- 00428 //-------------------------------------------------------------------------- 00429 UString::_const_rev_iterator::_const_rev_iterator() 00430 { 00431 00432 } 00433 //-------------------------------------------------------------------------- 00434 UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i ) 00435 { 00436 _become( i ); 00437 } 00438 //-------------------------------------------------------------------------- 00439 UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i ) 00440 { 00441 _become( i ); 00442 } 00443 //-------------------------------------------------------------------------- 00444 UString::_const_rev_iterator& UString::_const_rev_iterator::operator++() 00445 { 00446 _seekRev( 1 ); 00447 return *this; 00448 } 00449 //-------------------------------------------------------------------------- 00450 UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int ) 00451 { 00452 _const_rev_iterator tmp( *this ); 00453 _seekRev( 1 ); 00454 return tmp; 00455 } 00456 //-------------------------------------------------------------------------- 00457 UString::_const_rev_iterator& UString::_const_rev_iterator::operator--() 00458 { 00459 _seekFwd( 1 ); 00460 return *this; 00461 } 00462 //-------------------------------------------------------------------------- 00463 UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int ) 00464 { 00465 _const_rev_iterator tmp( *this ); 00466 _seekFwd( 1 ); 00467 return tmp; 00468 } 00469 //-------------------------------------------------------------------------- 00470 UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n ) 00471 { 00472 _const_rev_iterator tmp( *this ); 00473 if ( n < 0 ) 00474 tmp._seekFwd( -n ); 00475 else 00476 tmp._seekRev( n ); 00477 return tmp; 00478 } 00479 //-------------------------------------------------------------------------- 00480 UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n ) 00481 { 00482 _const_rev_iterator tmp( *this ); 00483 if ( n < 0 ) 00484 tmp._seekRev( -n ); 00485 else 00486 tmp._seekFwd( n ); 00487 return tmp; 00488 } 00489 //-------------------------------------------------------------------------- 00490 UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n ) 00491 { 00492 if ( n < 0 ) 00493 _seekFwd( -n ); 00494 else 00495 _seekRev( n ); 00496 return *this; 00497 } 00498 //-------------------------------------------------------------------------- 00499 UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n ) 00500 { 00501 if ( n < 0 ) 00502 _seekRev( -n ); 00503 else 00504 _seekFwd( n ); 00505 return *this; 00506 } 00507 //-------------------------------------------------------------------------- 00508 const UString::value_type& UString::_const_rev_iterator::operator*() const 00509 { 00510 return mIter[-1]; 00511 } 00512 //-------------------------------------------------------------------------- 00513 const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const 00514 { 00515 _const_rev_iterator tmp( *this ); 00516 tmp -= n; 00517 return *tmp; 00518 } 00519 //-------------------------------------------------------------------------- 00520 //-------------------------------------------------------------------------- 00521 //-------------------------------------------------------------------------- 00522 //-------------------------------------------------------------------------- 00523 UString::UString() 00524 { 00525 _init(); 00526 } 00527 //-------------------------------------------------------------------------- 00528 UString::UString( const UString& copy ) 00529 { 00530 _init(); 00531 mData = copy.mData; 00532 } 00533 //-------------------------------------------------------------------------- 00534 UString::UString( size_type length, const code_point& ch ) 00535 { 00536 _init(); 00537 assign( length, ch ); 00538 } 00539 //-------------------------------------------------------------------------- 00540 UString::UString( const code_point* str ) 00541 { 00542 _init(); 00543 assign( str ); 00544 } 00545 //-------------------------------------------------------------------------- 00546 UString::UString( const code_point* str, size_type length ) 00547 { 00548 _init(); 00549 assign( str, length ); 00550 } 00551 //-------------------------------------------------------------------------- 00552 UString::UString( const UString& str, size_type index, size_type length ) 00553 { 00554 _init(); 00555 assign( str, index, length ); 00556 } 00557 //-------------------------------------------------------------------------- 00558 #if MYGUI_IS_NATIVE_WCHAR_T 00559 UString::UString( const wchar_t* w_str ) 00560 { 00561 _init(); 00562 assign( w_str ); 00563 } 00564 //-------------------------------------------------------------------------- 00565 UString::UString( const wchar_t* w_str, size_type length ) 00566 { 00567 _init(); 00568 assign( w_str, length ); 00569 } 00570 #endif 00571 //-------------------------------------------------------------------------- 00572 UString::UString( const std::wstring& wstr ) 00573 { 00574 _init(); 00575 assign( wstr ); 00576 } 00577 //-------------------------------------------------------------------------- 00578 UString::UString( const char* c_str ) 00579 { 00580 _init(); 00581 assign( c_str ); 00582 } 00583 //-------------------------------------------------------------------------- 00584 UString::UString( const char* c_str, size_type length ) 00585 { 00586 _init(); 00587 assign( c_str, length ); 00588 } 00589 //-------------------------------------------------------------------------- 00590 UString::UString( const std::string& str ) 00591 { 00592 _init(); 00593 assign( str ); 00594 } 00595 //-------------------------------------------------------------------------- 00596 UString::~UString() 00597 { 00598 _cleanBuffer(); 00599 } 00600 //-------------------------------------------------------------------------- 00601 UString::size_type UString::size() const 00602 { 00603 return mData.size(); 00604 } 00605 //-------------------------------------------------------------------------- 00606 UString::size_type UString::length() const 00607 { 00608 return size(); 00609 } 00610 //-------------------------------------------------------------------------- 00611 UString::size_type UString::length_Characters() const 00612 { 00613 const_iterator i = begin(), ie = end(); 00614 size_type c = 0; 00615 while ( i != ie ) { 00616 i.moveNext(); 00617 ++c; 00618 } 00619 return c; 00620 } 00621 //-------------------------------------------------------------------------- 00622 UString::size_type UString::max_size() const 00623 { 00624 return mData.max_size(); 00625 } 00626 //-------------------------------------------------------------------------- 00627 void UString::reserve( size_type size ) 00628 { 00629 mData.reserve( size ); 00630 } 00631 //-------------------------------------------------------------------------- 00632 void UString::resize( size_type num, const code_point& val /*= 0 */ ) 00633 { 00634 mData.resize( num, val ); 00635 } 00636 //-------------------------------------------------------------------------- 00637 void UString::swap( UString& from ) 00638 { 00639 mData.swap( from.mData ); 00640 } 00641 //-------------------------------------------------------------------------- 00642 bool UString::empty() const 00643 { 00644 return mData.empty(); 00645 } 00646 //-------------------------------------------------------------------------- 00647 const UString::code_point* UString::c_str() const 00648 { 00649 return mData.c_str(); 00650 } 00651 //-------------------------------------------------------------------------- 00652 const UString::code_point* UString::data() const 00653 { 00654 return c_str(); 00655 } 00656 //-------------------------------------------------------------------------- 00657 UString::size_type UString::capacity() const 00658 { 00659 return mData.capacity(); 00660 } 00661 //-------------------------------------------------------------------------- 00662 void UString::clear() 00663 { 00664 mData.clear(); 00665 } 00666 //-------------------------------------------------------------------------- 00667 UString UString::substr( size_type index, size_type num /*= npos */ ) const 00668 { 00669 // this could avoid the extra copy if we used a private specialty constructor 00670 dstring data = mData.substr( index, num ); 00671 UString tmp; 00672 tmp.mData.swap( data ); 00673 return tmp; 00674 } 00675 //-------------------------------------------------------------------------- 00676 void UString::push_back( unicode_char val ) 00677 { 00678 code_point cp[2]; 00679 size_t c = _utf32_to_utf16( val, cp ); 00680 if ( c > 0 ) push_back( cp[0] ); 00681 if ( c > 1 ) push_back( cp[1] ); 00682 } 00683 //-------------------------------------------------------------------------- 00684 #if MYGUI_IS_NATIVE_WCHAR_T 00685 void UString::push_back( wchar_t val ) 00686 { 00687 // we do this because the Unicode method still preserves UTF-16 code points 00688 mData.push_back( static_cast<code_point>( val ) ); 00689 } 00690 #endif 00691 //-------------------------------------------------------------------------- 00692 void UString::push_back( code_point val ) 00693 { 00694 mData.push_back( val ); 00695 } 00696 00697 void UString::push_back( char val ) 00698 { 00699 mData.push_back( static_cast<code_point>( val ) ); 00700 } 00701 00702 bool UString::inString( unicode_char ch ) const 00703 { 00704 const_iterator i, ie = end(); 00705 for ( i = begin(); i != ie; i.moveNext() ) { 00706 if ( i.getCharacter() == ch ) 00707 return true; 00708 } 00709 return false; 00710 } 00711 00712 const std::string& UString::asUTF8() const 00713 { 00714 _load_buffer_UTF8(); 00715 return *m_buffer.mStrBuffer; 00716 } 00717 00718 const char* UString::asUTF8_c_str() const 00719 { 00720 _load_buffer_UTF8(); 00721 return m_buffer.mStrBuffer->c_str(); 00722 } 00723 00724 const UString::utf32string& UString::asUTF32() const 00725 { 00726 _load_buffer_UTF32(); 00727 return *m_buffer.mUTF32StrBuffer; 00728 } 00729 00730 const UString::unicode_char* UString::asUTF32_c_str() const 00731 { 00732 _load_buffer_UTF32(); 00733 return m_buffer.mUTF32StrBuffer->c_str(); 00734 } 00735 00736 const std::wstring& UString::asWStr() const 00737 { 00738 _load_buffer_WStr(); 00739 return *m_buffer.mWStrBuffer; 00740 } 00741 00742 const wchar_t* UString::asWStr_c_str() const 00743 { 00744 _load_buffer_WStr(); 00745 return m_buffer.mWStrBuffer->c_str(); 00746 } 00747 00748 UString::code_point& UString::at( size_type loc ) 00749 { 00750 return mData.at( loc ); 00751 } 00752 00753 const UString::code_point& UString::at( size_type loc ) const 00754 { 00755 return mData.at( loc ); 00756 } 00757 00758 UString::unicode_char UString::getChar( size_type loc ) const 00759 { 00760 const code_point* ptr = c_str(); 00761 unicode_char uc; 00762 size_t l = _utf16_char_length( ptr[loc] ); 00763 code_point cp[2] = { /* blame the code beautifier */ 00764 0, 0 00765 }; 00766 cp[0] = ptr[loc]; 00767 00768 if ( l == 2 && ( loc + 1 ) < mData.length() ) { 00769 cp[1] = ptr[loc+1]; 00770 } 00771 _utf16_to_utf32( cp, uc ); 00772 return uc; 00773 } 00774 00775 int UString::setChar( size_type loc, unicode_char ch ) 00776 { 00777 code_point cp[2] = { /* blame the code beautifier */ 00778 0, 0 00779 }; 00780 size_t l = _utf32_to_utf16( ch, cp ); 00781 unicode_char existingChar = getChar( loc ); 00782 size_t existingSize = _utf16_char_length( existingChar ); 00783 size_t newSize = _utf16_char_length( ch ); 00784 00785 if ( newSize > existingSize ) { 00786 at( loc ) = cp[0]; 00787 insert( loc + 1, 1, cp[1] ); 00788 return 1; 00789 } 00790 if ( newSize < existingSize ) { 00791 erase( loc, 1 ); 00792 at( loc ) = cp[0]; 00793 return -1; 00794 } 00795 00796 // newSize == existingSize 00797 at( loc ) = cp[0]; 00798 if ( l == 2 ) at( loc + 1 ) = cp[1]; 00799 return 0; 00800 } 00801 00802 UString::iterator UString::begin() 00803 { 00804 iterator i; 00805 i.mIter = mData.begin(); 00806 i.mString = this; 00807 return i; 00808 } 00809 00810 UString::const_iterator UString::begin() const 00811 { 00812 const_iterator i; 00813 i.mIter = const_cast<UString*>( this )->mData.begin(); 00814 i.mString = const_cast<UString*>( this ); 00815 return i; 00816 } 00817 00818 UString::iterator UString::end() 00819 { 00820 iterator i; 00821 i.mIter = mData.end(); 00822 i.mString = this; 00823 return i; 00824 } 00825 00826 UString::const_iterator UString::end() const 00827 { 00828 const_iterator i; 00829 i.mIter = const_cast<UString*>( this )->mData.end(); 00830 i.mString = const_cast<UString*>( this ); 00831 return i; 00832 } 00833 00834 UString::reverse_iterator UString::rbegin() 00835 { 00836 reverse_iterator i; 00837 i.mIter = mData.end(); 00838 i.mString = this; 00839 return i; 00840 } 00841 00842 UString::const_reverse_iterator UString::rbegin() const 00843 { 00844 const_reverse_iterator i; 00845 i.mIter = const_cast<UString*>( this )->mData.end(); 00846 i.mString = const_cast<UString*>( this ); 00847 return i; 00848 } 00849 00850 UString::reverse_iterator UString::rend() 00851 { 00852 reverse_iterator i; 00853 i.mIter = mData.begin(); 00854 i.mString = this; 00855 return i; 00856 } 00857 00858 UString::const_reverse_iterator UString::rend() const 00859 { 00860 const_reverse_iterator i; 00861 i.mIter = const_cast<UString*>( this )->mData.begin(); 00862 i.mString = const_cast<UString*>( this ); 00863 return i; 00864 } 00865 00866 UString& UString::assign( iterator start, iterator end ) 00867 { 00868 mData.assign( start.mIter, end.mIter ); 00869 return *this; 00870 } 00871 00872 UString& UString::assign( const UString& str ) 00873 { 00874 mData.assign( str.mData ); 00875 return *this; 00876 } 00877 00878 UString& UString::assign( const code_point* str ) 00879 { 00880 mData.assign( str ); 00881 return *this; 00882 } 00883 00884 UString& UString::assign( const code_point* str, size_type num ) 00885 { 00886 mData.assign( str, num ); 00887 return *this; 00888 } 00889 00890 UString& UString::assign( const UString& str, size_type index, size_type len ) 00891 { 00892 mData.assign( str.mData, index, len ); 00893 return *this; 00894 } 00895 00896 UString& UString::assign( size_type num, const code_point& ch ) 00897 { 00898 mData.assign( num, ch ); 00899 return *this; 00900 } 00901 00902 UString& UString::assign( const std::wstring& wstr ) 00903 { 00904 mData.clear(); 00905 mData.reserve( wstr.length() ); // best guess bulk allocate 00906 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy 00907 code_point tmp; 00908 std::wstring::const_iterator i, ie = wstr.end(); 00909 for ( i = wstr.begin(); i != ie; i++ ) { 00910 tmp = static_cast<code_point>( *i ); 00911 mData.push_back( tmp ); 00912 } 00913 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower) 00914 code_point cp[3] = {0, 0, 0}; 00915 unicode_char tmp; 00916 std::wstring::const_iterator i, ie = wstr.end(); 00917 for ( i = wstr.begin(); i != ie; i++ ) { 00918 tmp = static_cast<unicode_char>( *i ); 00919 size_t l = _utf32_to_utf16( tmp, cp ); 00920 if ( l > 0 ) mData.push_back( cp[0] ); 00921 if ( l > 1 ) mData.push_back( cp[1] ); 00922 } 00923 #endif 00924 return *this; 00925 } 00926 00927 #if MYGUI_IS_NATIVE_WCHAR_T 00928 UString& UString::assign( const wchar_t* w_str ) 00929 { 00930 std::wstring tmp; 00931 tmp.assign( w_str ); 00932 return assign( tmp ); 00933 } 00934 00935 UString& UString::assign( const wchar_t* w_str, size_type num ) 00936 { 00937 std::wstring tmp; 00938 tmp.assign( w_str, num ); 00939 return assign( tmp ); 00940 } 00941 #endif 00942 00943 UString& UString::assign( const std::string& str ) 00944 { 00945 size_type len = _verifyUTF8( str ); 00946 clear(); // empty our contents, if there are any 00947 reserve( len ); // best guess bulk capacity growth 00948 00949 // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32, 00950 // then converting it to UTF-16, then finally appending the data buffer 00951 00952 unicode_char uc; // temporary Unicode character buffer 00953 unsigned char utf8buf[7]; // temporary UTF-8 buffer 00954 utf8buf[6] = 0; 00955 size_t utf8len; // UTF-8 length 00956 code_point utf16buff[3]; // temporary UTF-16 buffer 00957 utf16buff[2] = 0; 00958 size_t utf16len; // UTF-16 length 00959 00960 std::string::const_iterator i, ie = str.end(); 00961 for ( i = str.begin(); i != ie; i++ ) { 00962 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load 00963 for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes 00964 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful) 00965 } 00966 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer 00967 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion 00968 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop 00969 00970 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion 00971 append( utf16buff, utf16len ); // append the characters to the string 00972 } 00973 return *this; 00974 } 00975 00976 UString& UString::assign( const char* c_str ) 00977 { 00978 std::string tmp( c_str ); 00979 return assign( tmp ); 00980 } 00981 00982 UString& UString::assign( const char* c_str, size_type num ) 00983 { 00984 std::string tmp; 00985 tmp.assign( c_str, num ); 00986 return assign( tmp ); 00987 } 00988 00989 UString& UString::append( const UString& str ) 00990 { 00991 mData.append( str.mData ); 00992 return *this; 00993 } 00994 00995 UString& UString::append( const code_point* str ) 00996 { 00997 mData.append( str ); 00998 return *this; 00999 } 01000 01001 UString& UString::append( const UString& str, size_type index, size_type len ) 01002 { 01003 mData.append( str.mData, index, len ); 01004 return *this; 01005 } 01006 01007 UString& UString::append( const code_point* str, size_type num ) 01008 { 01009 mData.append( str, num ); 01010 return *this; 01011 } 01012 01013 UString& UString::append( size_type num, code_point ch ) 01014 { 01015 mData.append( num, ch ); 01016 return *this; 01017 } 01018 01019 UString& UString::append( iterator start, iterator end ) 01020 { 01021 mData.append( start.mIter, end.mIter ); 01022 return *this; 01023 } 01024 01025 #if MYGUI_IS_NATIVE_WCHAR_T 01026 UString& UString::append( const wchar_t* w_str, size_type num ) 01027 { 01028 std::wstring tmp( w_str, num ); 01029 return append( tmp ); 01030 } 01031 01032 UString& UString::append( size_type num, wchar_t ch ) 01033 { 01034 return append( num, static_cast<unicode_char>( ch ) ); 01035 } 01036 #endif 01037 UString& UString::append( const char* c_str, size_type num ) 01038 { 01039 UString tmp( c_str, num ); 01040 append( tmp ); 01041 return *this; 01042 } 01043 01044 UString& UString::append( size_type num, char ch ) 01045 { 01046 append( num, static_cast<code_point>( ch ) ); 01047 return *this; 01048 } 01049 01050 UString& UString::append( size_type num, unicode_char ch ) 01051 { 01052 code_point cp[2] = {0, 0}; 01053 if ( _utf32_to_utf16( ch, cp ) == 2 ) { 01054 for ( size_type i = 0; i < num; i++ ) { 01055 append( 1, cp[0] ); 01056 append( 1, cp[1] ); 01057 } 01058 } else { 01059 for ( size_type i = 0; i < num; i++ ) { 01060 append( 1, cp[0] ); 01061 } 01062 } 01063 return *this; 01064 } 01065 01066 UString::iterator UString::insert( iterator i, const code_point& ch ) 01067 { 01068 iterator ret; 01069 ret.mIter = mData.insert( i.mIter, ch ); 01070 ret.mString = this; 01071 return ret; 01072 } 01073 01074 UString& UString::insert( size_type index, const UString& str ) 01075 { 01076 mData.insert( index, str.mData ); 01077 return *this; 01078 } 01079 01080 UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num ) 01081 { 01082 mData.insert( index1, str.mData, index2, num ); 01083 return *this; 01084 } 01085 01086 void UString::insert( iterator i, iterator start, iterator end ) 01087 { 01088 mData.insert( i.mIter, start.mIter, end.mIter ); 01089 } 01090 01091 UString& UString::insert( size_type index, const code_point* str, size_type num ) 01092 { 01093 mData.insert( index, str, num ); 01094 return *this; 01095 } 01096 01097 #if MYGUI_IS_NATIVE_WCHAR_T 01098 UString& UString::insert( size_type index, const wchar_t* w_str, size_type num ) 01099 { 01100 UString tmp( w_str, num ); 01101 insert( index, tmp ); 01102 return *this; 01103 } 01104 #endif 01105 01106 UString& UString::insert( size_type index, const char* c_str, size_type num ) 01107 { 01108 UString tmp( c_str, num ); 01109 insert( index, tmp ); 01110 return *this; 01111 } 01112 01113 UString& UString::insert( size_type index, size_type num, code_point ch ) 01114 { 01115 mData.insert( index, num, ch ); 01116 return *this; 01117 } 01118 01119 #if MYGUI_IS_NATIVE_WCHAR_T 01120 UString& UString::insert( size_type index, size_type num, wchar_t ch ) 01121 { 01122 insert( index, num, static_cast<unicode_char>( ch ) ); 01123 return *this; 01124 } 01125 #endif 01126 01127 UString& UString::insert( size_type index, size_type num, char ch ) 01128 { 01129 insert( index, num, static_cast<code_point>( ch ) ); 01130 return *this; 01131 } 01132 01133 UString& UString::insert( size_type index, size_type num, unicode_char ch ) 01134 { 01135 code_point cp[3] = {0, 0, 0}; 01136 size_t l = _utf32_to_utf16( ch, cp ); 01137 if ( l == 1 ) { 01138 return insert( index, num, cp[0] ); 01139 } 01140 for ( size_type c = 0; c < num; c++ ) { 01141 // insert in reverse order to preserve ordering after insert 01142 insert( index, 1, cp[1] ); 01143 insert( index, 1, cp[0] ); 01144 } 01145 return *this; 01146 } 01147 01148 void UString::insert( iterator i, size_type num, const code_point& ch ) 01149 { 01150 mData.insert( i.mIter, num, ch ); 01151 } 01152 #if MYGUI_IS_NATIVE_WCHAR_T 01153 void UString::insert( iterator i, size_type num, const wchar_t& ch ) 01154 { 01155 insert( i, num, static_cast<unicode_char>( ch ) ); 01156 } 01157 #endif 01158 01159 void UString::insert( iterator i, size_type num, const char& ch ) 01160 { 01161 insert( i, num, static_cast<code_point>( ch ) ); 01162 } 01163 01164 void UString::insert( iterator i, size_type num, const unicode_char& ch ) 01165 { 01166 code_point cp[3] = {0, 0, 0}; 01167 size_t l = _utf32_to_utf16( ch, cp ); 01168 if ( l == 1 ) { 01169 insert( i, num, cp[0] ); 01170 } else { 01171 for ( size_type c = 0; c < num; c++ ) { 01172 // insert in reverse order to preserve ordering after insert 01173 insert( i, 1, cp[1] ); 01174 insert( i, 1, cp[0] ); 01175 } 01176 } 01177 } 01178 01179 UString::iterator UString::erase( iterator loc ) 01180 { 01181 iterator ret; 01182 ret.mIter = mData.erase( loc.mIter ); 01183 ret.mString = this; 01184 return ret; 01185 } 01186 01187 UString::iterator UString::erase( iterator start, iterator end ) 01188 { 01189 iterator ret; 01190 ret.mIter = mData.erase( start.mIter, end.mIter ); 01191 ret.mString = this; 01192 return ret; 01193 } 01194 01195 UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ ) 01196 { 01197 if ( num == npos ) 01198 mData.erase( index ); 01199 else 01200 mData.erase( index, num ); 01201 return *this; 01202 } 01203 01204 UString& UString::replace( size_type index1, size_type num1, const UString& str ) 01205 { 01206 mData.replace( index1, num1, str.mData, 0, npos ); 01207 return *this; 01208 } 01209 01210 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 ) 01211 { 01212 mData.replace( index1, num1, str.mData, 0, num2 ); 01213 return *this; 01214 } 01215 01216 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 ) 01217 { 01218 mData.replace( index1, num1, str.mData, index2, num2 ); 01219 return *this; 01220 } 01221 01222 UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ ) 01223 { 01224 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01225 01226 size_type index1 = begin() - st; 01227 size_type num1 = end - st; 01228 return replace( index1, num1, str, 0, num ); 01229 } 01230 01231 UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch ) 01232 { 01233 mData.replace( index, num1, num2, ch ); 01234 return *this; 01235 } 01236 01237 UString& UString::replace( iterator start, iterator end, size_type num, code_point ch ) 01238 { 01239 _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload 01240 01241 size_type index1 = begin() - st; 01242 size_type num1 = end - st; 01243 return replace( index1, num1, num, ch ); 01244 } 01245 01246 int UString::compare( const UString& str ) const 01247 { 01248 return mData.compare( str.mData ); 01249 } 01250 01251 int UString::compare( const code_point* str ) const 01252 { 01253 return mData.compare( str ); 01254 } 01255 01256 int UString::compare( size_type index, size_type length, const UString& str ) const 01257 { 01258 return mData.compare( index, length, str.mData ); 01259 } 01260 01261 int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const 01262 { 01263 return mData.compare( index, length, str.mData, index2, length2 ); 01264 } 01265 01266 int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const 01267 { 01268 return mData.compare( index, length, str, length2 ); 01269 } 01270 01271 #if MYGUI_IS_NATIVE_WCHAR_T 01272 int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const 01273 { 01274 UString tmp( w_str, length2 ); 01275 return compare( index, length, tmp ); 01276 } 01277 #endif 01278 01279 int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const 01280 { 01281 UString tmp( c_str, length2 ); 01282 return compare( index, length, tmp ); 01283 } 01284 01285 UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const 01286 { 01287 return mData.find( str.c_str(), index ); 01288 } 01289 01290 UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const 01291 { 01292 UString tmp( cp_str ); 01293 return mData.find( tmp.c_str(), index, length ); 01294 } 01295 01296 UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const 01297 { 01298 UString tmp( c_str ); 01299 return mData.find( tmp.c_str(), index, length ); 01300 } 01301 01302 #if MYGUI_IS_NATIVE_WCHAR_T 01303 UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const 01304 { 01305 UString tmp( w_str ); 01306 return mData.find( tmp.c_str(), index, length ); 01307 } 01308 #endif 01309 01310 UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const 01311 { 01312 return find( static_cast<code_point>( ch ), index ); 01313 } 01314 01315 UString::size_type UString::find( code_point ch, size_type index /*= 0 */ ) const 01316 { 01317 return mData.find( ch, index ); 01318 } 01319 01320 #if MYGUI_IS_NATIVE_WCHAR_T 01321 UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const 01322 { 01323 return find( static_cast<unicode_char>( ch ), index ); 01324 } 01325 #endif 01326 01327 UString::size_type UString::find( unicode_char ch, size_type index /*= 0 */ ) const 01328 { 01329 code_point cp[3] = {0, 0, 0}; 01330 size_t l = _utf32_to_utf16( ch, cp ); 01331 return find( UString( cp, l ), index ); 01332 } 01333 01334 UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const 01335 { 01336 return mData.rfind( str.c_str(), index ); 01337 } 01338 01339 UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const 01340 { 01341 UString tmp( cp_str ); 01342 return mData.rfind( tmp.c_str(), index, num ); 01343 } 01344 01345 UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const 01346 { 01347 UString tmp( c_str ); 01348 return mData.rfind( tmp.c_str(), index, num ); 01349 } 01350 01351 #if MYGUI_IS_NATIVE_WCHAR_T 01352 UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const 01353 { 01354 UString tmp( w_str ); 01355 return mData.rfind( tmp.c_str(), index, num ); 01356 } 01357 #endif 01358 01359 UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const 01360 { 01361 return rfind( static_cast<code_point>( ch ), index ); 01362 } 01363 01364 UString::size_type UString::rfind( code_point ch, size_type index ) const 01365 { 01366 return mData.rfind( ch, index ); 01367 } 01368 01369 #if MYGUI_IS_NATIVE_WCHAR_T 01370 UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const 01371 { 01372 return rfind( static_cast<unicode_char>( ch ), index ); 01373 } 01374 #endif 01375 01376 UString::size_type UString::rfind( unicode_char ch, size_type index /*= 0 */ ) const 01377 { 01378 code_point cp[3] = {0, 0, 0}; 01379 size_t l = _utf32_to_utf16( ch, cp ); 01380 return rfind( UString( cp, l ), index ); 01381 } 01382 01383 UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const 01384 { 01385 size_type i = 0; 01386 const size_type len = length(); 01387 while ( i < num && ( index + i ) < len ) { 01388 unicode_char ch = getChar( index + i ); 01389 if ( str.inString( ch ) ) 01390 return index + i; 01391 i += _utf16_char_length( ch ); // increment by the Unicode character length 01392 } 01393 return npos; 01394 } 01395 01396 UString::size_type UString::find_first_of( code_point ch, size_type index /*= 0 */ ) const 01397 { 01398 UString tmp; 01399 tmp.assign( 1, ch ); 01400 return find_first_of( tmp, index ); 01401 } 01402 01403 UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const 01404 { 01405 return find_first_of( static_cast<code_point>( ch ), index ); 01406 } 01407 01408 #if MYGUI_IS_NATIVE_WCHAR_T 01409 UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const 01410 { 01411 return find_first_of( static_cast<unicode_char>( ch ), index ); 01412 } 01413 #endif 01414 01415 UString::size_type UString::find_first_of( unicode_char ch, size_type index /*= 0 */ ) const 01416 { 01417 code_point cp[3] = {0, 0, 0}; 01418 size_t l = _utf32_to_utf16( ch, cp ); 01419 return find_first_of( UString( cp, l ), index ); 01420 } 01421 01422 UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const 01423 { 01424 size_type i = 0; 01425 const size_type len = length(); 01426 while ( i < num && ( index + i ) < len ) { 01427 unicode_char ch = getChar( index + i ); 01428 if ( !str.inString( ch ) ) 01429 return index + i; 01430 i += _utf16_char_length( ch ); // increment by the Unicode character length 01431 } 01432 return npos; 01433 } 01434 01435 UString::size_type UString::find_first_not_of( code_point ch, size_type index /*= 0 */ ) const 01436 { 01437 UString tmp; 01438 tmp.assign( 1, ch ); 01439 return find_first_not_of( tmp, index ); 01440 } 01441 01442 UString::size_type UString::find_first_not_of( char ch, size_type index /*= 0 */ ) const 01443 { 01444 return find_first_not_of( static_cast<code_point>( ch ), index ); 01445 } 01446 01447 #if MYGUI_IS_NATIVE_WCHAR_T 01448 UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const 01449 { 01450 return find_first_not_of( static_cast<unicode_char>( ch ), index ); 01451 } 01452 #endif 01453 01454 UString::size_type UString::find_first_not_of( unicode_char ch, size_type index /*= 0 */ ) const 01455 { 01456 code_point cp[3] = {0, 0, 0}; 01457 size_t l = _utf32_to_utf16( ch, cp ); 01458 return find_first_not_of( UString( cp, l ), index ); 01459 } 01460 01461 UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const 01462 { 01463 size_type i = 0; 01464 const size_type len = length(); 01465 if ( index > len ) index = len - 1; 01466 01467 while ( i < num && ( index - i ) != npos ) { 01468 size_type j = index - i; 01469 // careful to step full Unicode characters 01470 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) { 01471 j = index - ++i; 01472 } 01473 // and back to the usual dull test 01474 unicode_char ch = getChar( j ); 01475 if ( str.inString( ch ) ) 01476 return j; 01477 i++; 01478 } 01479 return npos; 01480 } 01481 01482 UString::size_type UString::find_last_of( code_point ch, size_type index /*= npos */ ) const 01483 { 01484 UString tmp; 01485 tmp.assign( 1, ch ); 01486 return find_last_of( tmp, index ); 01487 } 01488 01489 #if MYGUI_IS_NATIVE_WCHAR_T 01490 UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const 01491 { 01492 return find_last_of( static_cast<unicode_char>( ch ), index ); 01493 } 01494 #endif 01495 01496 UString::size_type UString::find_last_of( unicode_char ch, size_type index /*= npos */ ) const 01497 { 01498 code_point cp[3] = {0, 0, 0}; 01499 size_t l = _utf32_to_utf16( ch, cp ); 01500 return find_last_of( UString( cp, l ), index ); 01501 } 01502 01503 UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const 01504 { 01505 size_type i = 0; 01506 const size_type len = length(); 01507 if ( index > len ) index = len - 1; 01508 01509 while ( i < num && ( index - i ) != npos ) { 01510 size_type j = index - i; 01511 // careful to step full Unicode characters 01512 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) { 01513 j = index - ++i; 01514 } 01515 // and back to the usual dull test 01516 unicode_char ch = getChar( j ); 01517 if ( !str.inString( ch ) ) 01518 return j; 01519 i++; 01520 } 01521 return npos; 01522 } 01523 01524 UString::size_type UString::find_last_not_of( code_point ch, size_type index /*= npos */ ) const 01525 { 01526 UString tmp; 01527 tmp.assign( 1, ch ); 01528 return find_last_not_of( tmp, index ); 01529 } 01530 01531 UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const 01532 { 01533 return find_last_not_of( static_cast<code_point>( ch ), index ); 01534 } 01535 01536 #if MYGUI_IS_NATIVE_WCHAR_T 01537 UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const 01538 { 01539 return find_last_not_of( static_cast<unicode_char>( ch ), index ); 01540 } 01541 #endif 01542 01543 UString::size_type UString::find_last_not_of( unicode_char ch, size_type index /*= npos */ ) const 01544 { 01545 code_point cp[3] = {0, 0, 0}; 01546 size_t l = _utf32_to_utf16( ch, cp ); 01547 return find_last_not_of( UString( cp, l ), index ); 01548 } 01549 01550 bool UString::operator<( const UString& right ) const 01551 { 01552 return compare( right ) < 0; 01553 } 01554 01555 bool UString::operator<=( const UString& right ) const 01556 { 01557 return compare( right ) <= 0; 01558 } 01559 01560 UString& UString::operator=( const UString& s ) 01561 { 01562 return assign( s ); 01563 } 01564 01565 UString& UString::operator=( code_point ch ) 01566 { 01567 clear(); 01568 return append( 1, ch ); 01569 } 01570 01571 UString& UString::operator=( char ch ) 01572 { 01573 clear(); 01574 return append( 1, ch ); 01575 } 01576 01577 #if MYGUI_IS_NATIVE_WCHAR_T 01578 UString& UString::operator=( wchar_t ch ) 01579 { 01580 clear(); 01581 return append( 1, ch ); 01582 } 01583 #endif 01584 01585 UString& UString::operator=( unicode_char ch ) 01586 { 01587 clear(); 01588 return append( 1, ch ); 01589 } 01590 01591 bool UString::operator>( const UString& right ) const 01592 { 01593 return compare( right ) > 0; 01594 } 01595 01596 bool UString::operator>=( const UString& right ) const 01597 { 01598 return compare( right ) >= 0; 01599 } 01600 01601 bool UString::operator==( const UString& right ) const 01602 { 01603 return compare( right ) == 0; 01604 } 01605 01606 bool UString::operator!=( const UString& right ) const 01607 { 01608 return !operator==( right ); 01609 } 01610 01611 UString::code_point& UString::operator[]( size_type index ) 01612 { 01613 return at( index ); 01614 } 01615 01616 const UString::code_point& UString::operator[]( size_type index ) const 01617 { 01618 return at( index ); 01619 } 01620 01621 UString::operator std::string() const 01622 { 01623 return std::string( asUTF8() ); 01624 } 01625 01627 UString::operator std::wstring() const 01628 { 01629 return std::wstring( asWStr() ); 01630 } 01631 01632 01633 bool UString::_utf16_independent_char( code_point cp ) 01634 { 01635 if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range 01636 return false; // it matches a surrogate pair signature 01637 return true; // everything else is a standalone code point 01638 } 01639 01640 bool UString::_utf16_surrogate_lead( code_point cp ) 01641 { 01642 if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair 01643 return true; // it is a 1st word 01644 return false; // it isn't 01645 } 01646 01647 bool UString::_utf16_surrogate_follow( code_point cp ) 01648 { 01649 if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair 01650 return true; // it is a 2nd word 01651 return false; // everything else isn't 01652 } 01653 01654 size_t UString::_utf16_char_length( code_point cp ) 01655 { 01656 if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair 01657 return 2; // if it is, then we are 2 words long 01658 return 1; // otherwise we are only 1 word long 01659 } 01660 01661 size_t UString::_utf16_char_length( unicode_char uc ) 01662 { 01663 if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum 01664 return 2; // if so, we need a surrogate pair 01665 return 1; // otherwise we can stuff it into a single word 01666 } 01667 01668 size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc ) 01669 { 01670 const code_point& cp1 = in_cp[0]; 01671 const code_point& cp2 = in_cp[1]; 01672 bool wordPair = false; 01673 01674 // does it look like a surrogate pair? 01675 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) { 01676 // looks like one, but does the other half match the algorithm as well? 01677 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF ) 01678 wordPair = true; // yep! 01679 } 01680 01681 if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value 01682 out_uc = cp1; 01683 return 1; 01684 } 01685 01686 unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers 01687 cU -= 0xD800; // remove the encoding markers 01688 cL -= 0xDC00; 01689 01690 out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location 01691 out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits 01692 out_uc += 0x10000; // add back in the value offset 01693 01694 return 2; // this whole operation takes to words, so that's what we'll return 01695 } 01696 01697 size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] ) 01698 { 01699 if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them 01700 out_cp[0] = static_cast<code_point>(in_uc); 01701 return 1; 01702 } 01703 unicode_char uc = in_uc; // copy to writable buffer 01704 unsigned short tmp; // single code point buffer 01705 uc -= 0x10000; // subtract value offset 01706 01707 //process upper word 01708 tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits 01709 tmp += 0xD800; // add encoding offset 01710 out_cp[0] = tmp; // write 01711 01712 // process lower word 01713 tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits 01714 tmp += 0xDC00; // add encoding offset 01715 out_cp[1] = tmp; // write 01716 01717 return 2; // return used word count (2 for surrogate pairs) 01718 } 01719 01720 bool UString::_utf8_start_char( unsigned char cp ) 01721 { 01722 return ( cp & ~_cont_mask ) != _cont; 01723 } 01724 01725 size_t UString::_utf8_char_length( unsigned char cp ) 01726 { 01727 if ( !( cp & 0x80 ) ) return 1; 01728 if (( cp & ~_lead1_mask ) == _lead1 ) return 2; 01729 if (( cp & ~_lead2_mask ) == _lead2 ) return 3; 01730 if (( cp & ~_lead3_mask ) == _lead3 ) return 4; 01731 if (( cp & ~_lead4_mask ) == _lead4 ) return 5; 01732 if (( cp & ~_lead5_mask ) == _lead5 ) return 6; 01733 throw invalid_data( "invalid UTF-8 sequence header value" ); 01734 } 01735 01736 size_t UString::_utf8_char_length( unicode_char uc ) 01737 { 01738 /* 01739 7 bit: U-00000000 - U-0000007F: 0xxxxxxx 01740 11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx 01741 16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx 01742 21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 01743 26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 01744 31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 01745 */ 01746 if ( !( uc & ~0x0000007F ) ) return 1; 01747 if ( !( uc & ~0x000007FF ) ) return 2; 01748 if ( !( uc & ~0x0000FFFF ) ) return 3; 01749 if ( !( uc & ~0x001FFFFF ) ) return 4; 01750 if ( !( uc & ~0x03FFFFFF ) ) return 5; 01751 if ( !( uc & ~0x7FFFFFFF ) ) return 6; 01752 throw invalid_data( "invalid UTF-32 value" ); 01753 } 01754 01755 size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc ) 01756 { 01757 size_t len = _utf8_char_length( in_cp[0] ); 01758 if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit 01759 out_uc = in_cp[0]; 01760 return 1; 01761 } 01762 01763 unicode_char c = 0; // temporary buffer 01764 size_t i = 0; 01765 switch ( len ) { // load header byte 01766 case 6: 01767 c = in_cp[i] & _lead5_mask; 01768 break; 01769 case 5: 01770 c = in_cp[i] & _lead4_mask; 01771 break; 01772 case 4: 01773 c = in_cp[i] & _lead3_mask; 01774 break; 01775 case 3: 01776 c = in_cp[i] & _lead2_mask; 01777 break; 01778 case 2: 01779 c = in_cp[i] & _lead1_mask; 01780 break; 01781 } 01782 01783 for ( ++i; i < len; i++ ) { // load each continuation byte 01784 if (( in_cp[i] & ~_cont_mask ) != _cont ) 01785 throw invalid_data( "bad UTF-8 continuation byte" ); 01786 c <<= 6; 01787 c |= ( in_cp[i] & _cont_mask ); 01788 } 01789 01790 out_uc = c; // write the final value and return the used byte length 01791 return len; 01792 } 01793 01794 size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] ) 01795 { 01796 size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence 01797 unicode_char c = in_uc; // copy to temp buffer 01798 01799 //stuff all of the lower bits 01800 for ( size_t i = len - 1; i > 0; i-- ) { 01801 out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont); 01802 c >>= 6; 01803 } 01804 01805 //now write the header byte 01806 switch ( len ) { 01807 case 6: 01808 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5); 01809 break; 01810 case 5: 01811 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4); 01812 break; 01813 case 4: 01814 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3); 01815 break; 01816 case 3: 01817 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2); 01818 break; 01819 case 2: 01820 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1); 01821 break; 01822 case 1: 01823 default: 01824 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F); 01825 break; 01826 } 01827 01828 // return the byte length of the sequence 01829 return len; 01830 } 01831 01832 UString::size_type UString::_verifyUTF8( const unsigned char* c_str ) 01833 { 01834 std::string tmp( reinterpret_cast<const char*>( c_str ) ); 01835 return _verifyUTF8( tmp ); 01836 } 01837 01838 UString::size_type UString::_verifyUTF8( const std::string& str ) 01839 { 01840 std::string::const_iterator i, ie = str.end(); 01841 i = str.begin(); 01842 size_type length = 0; 01843 01844 while ( i != ie ) { 01845 // characters pass until we find an extended sequence 01846 if (( *i ) & 0x80 ) { 01847 unsigned char c = ( *i ); 01848 size_t contBytes = 0; 01849 01850 // get continuation byte count and test for overlong sequences 01851 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte 01852 if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" ); 01853 contBytes = 1; 01854 01855 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes 01856 contBytes = 2; 01857 if ( c == _lead2 ) { // possible overlong UTF-8 sequence 01858 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01859 if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 01860 } 01861 01862 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes 01863 contBytes = 3; 01864 if ( c == _lead3 ) { // possible overlong UTF-8 sequence 01865 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01866 if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 01867 } 01868 01869 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes 01870 contBytes = 4; 01871 if ( c == _lead4 ) { // possible overlong UTF-8 sequence 01872 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01873 if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 01874 } 01875 01876 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes 01877 contBytes = 5; 01878 if ( c == _lead5 ) { // possible overlong UTF-8 sequence 01879 c = ( *( i + 1 ) ); // look ahead to next byte in sequence 01880 if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" ); 01881 } 01882 } 01883 01884 // check remaining continuation bytes for 01885 while ( contBytes-- ) { 01886 c = ( *( ++i ) ); // get next byte in sequence 01887 if (( c & ~_cont_mask ) != _cont ) 01888 throw invalid_data( "bad UTF-8 continuation byte" ); 01889 } 01890 } 01891 length++; 01892 i++; 01893 } 01894 return length; 01895 } 01896 01897 void UString::_init() 01898 { 01899 m_buffer.mVoidBuffer = 0; 01900 m_bufferType = bt_none; 01901 m_bufferSize = 0; 01902 } 01903 01904 void UString::_cleanBuffer() const 01905 { 01906 if ( m_buffer.mVoidBuffer != 0 ) { 01907 switch ( m_bufferType ) { 01908 case bt_string: 01909 delete m_buffer.mStrBuffer; 01910 break; 01911 case bt_wstring: 01912 delete m_buffer.mWStrBuffer; 01913 break; 01914 case bt_utf32string: 01915 delete m_buffer.mUTF32StrBuffer; 01916 break; 01917 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out 01918 default: 01919 //delete m_buffer.mVoidBuffer; 01920 // delete void* is undefined, don't do that 01921 assert("This should never happen - mVoidBuffer should never contain something if we " 01922 "don't know the type"); 01923 break; 01924 } 01925 m_buffer.mVoidBuffer = 0; 01926 m_bufferSize = 0; 01927 m_bufferType = bt_none; 01928 } 01929 } 01930 01931 void UString::_getBufferStr() const 01932 { 01933 if ( m_bufferType != bt_string ) { 01934 _cleanBuffer(); 01935 m_buffer.mStrBuffer = new std::string(); 01936 m_bufferType = bt_string; 01937 } 01938 m_buffer.mStrBuffer->clear(); 01939 } 01940 01941 void UString::_getBufferWStr() const 01942 { 01943 if ( m_bufferType != bt_wstring ) { 01944 _cleanBuffer(); 01945 m_buffer.mWStrBuffer = new std::wstring(); 01946 m_bufferType = bt_wstring; 01947 } 01948 m_buffer.mWStrBuffer->clear(); 01949 } 01950 01951 void UString::_getBufferUTF32Str() const 01952 { 01953 if ( m_bufferType != bt_utf32string ) { 01954 _cleanBuffer(); 01955 m_buffer.mUTF32StrBuffer = new utf32string(); 01956 m_bufferType = bt_utf32string; 01957 } 01958 m_buffer.mUTF32StrBuffer->clear(); 01959 } 01960 01961 void UString::_load_buffer_UTF8() const 01962 { 01963 _getBufferStr(); 01964 std::string& buffer = ( *m_buffer.mStrBuffer ); 01965 buffer.reserve( length() ); 01966 01967 unsigned char utf8buf[6]; 01968 char* charbuf = ( char* )utf8buf; 01969 unicode_char c; 01970 size_t len; 01971 01972 const_iterator i, ie = end(); 01973 for ( i = begin(); i != ie; i.moveNext() ) { 01974 c = i.getCharacter(); 01975 len = _utf32_to_utf8( c, utf8buf ); 01976 size_t j = 0; 01977 while ( j < len ) 01978 buffer.push_back( charbuf[j++] ); 01979 } 01980 } 01981 01982 void UString::_load_buffer_WStr() const 01983 { 01984 _getBufferWStr(); 01985 std::wstring& buffer = ( *m_buffer.mWStrBuffer ); 01986 buffer.reserve( length() ); // may over reserve, but should be close enough 01987 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16 01988 const_iterator i, ie = end(); 01989 for ( i = begin(); i != ie; ++i ) { 01990 buffer.push_back(( wchar_t )( *i ) ); 01991 } 01992 #else // wchar_t fits UTF-32 01993 unicode_char c; 01994 const_iterator i, ie = end(); 01995 for ( i = begin(); i != ie; i.moveNext() ) { 01996 c = i.getCharacter(); 01997 buffer.push_back(( wchar_t )c ); 01998 } 01999 #endif 02000 } 02001 02002 void UString::_load_buffer_UTF32() const 02003 { 02004 _getBufferUTF32Str(); 02005 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer ); 02006 buffer.reserve( length() ); // may over reserve, but should be close enough 02007 02008 unicode_char c; 02009 02010 const_iterator i, ie = end(); 02011 for ( i = begin(); i != ie; i.moveNext() ) { 02012 c = i.getCharacter(); 02013 buffer.push_back( c ); 02014 } 02015 } 02016 02017 }