00001
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "MyGUI_Precompiled.h"
00023 #include "MyGUI_UString.h"
00024
00025 namespace MyGUI
00026 {
00027
00028
00029 UString::_base_iterator::_base_iterator()
00030 {
00031 mString = 0;
00032 }
00033
00034 void UString::_base_iterator::_seekFwd( size_type c )
00035 {
00036 mIter += c;
00037 }
00038
00039 void UString::_base_iterator::_seekRev( size_type c )
00040 {
00041 mIter -= c;
00042 }
00043
00044 void UString::_base_iterator::_become( const _base_iterator& i )
00045 {
00046 mIter = i.mIter;
00047 mString = i.mString;
00048 }
00049
00050 bool UString::_base_iterator::_test_begin() const
00051 {
00052 return mIter == mString->mData.begin();
00053 }
00054
00055 bool UString::_base_iterator::_test_end() const
00056 {
00057 return mIter == mString->mData.end();
00058 }
00059
00060 UString::size_type UString::_base_iterator::_get_index() const
00061 {
00062 return mIter - mString->mData.begin();
00063 }
00064
00065 void UString::_base_iterator::_jump_to( size_type index )
00066 {
00067 mIter = mString->mData.begin() + index;
00068 }
00069
00070 UString::unicode_char UString::_base_iterator::_getCharacter() const
00071 {
00072 size_type current_index = _get_index();
00073 return mString->getChar( current_index );
00074 }
00075
00076 int UString::_base_iterator::_setCharacter( unicode_char uc )
00077 {
00078 size_type current_index = _get_index();
00079 int change = mString->setChar( current_index, uc );
00080 _jump_to( current_index );
00081 return change;
00082 }
00083
00084 void UString::_base_iterator::_moveNext()
00085 {
00086 _seekFwd( 1 );
00087 if ( _test_end() ) return;
00088 if ( _utf16_surrogate_follow( mIter[0] ) ) {
00089
00090
00091 code_point lead_half = 0;
00092
00093 lead_half = mIter[-1];
00094 if ( _utf16_surrogate_lead( lead_half ) ) {
00095 _seekFwd( 1 );
00096 }
00097 }
00098 }
00099
00100 void UString::_base_iterator::_movePrev()
00101 {
00102 _seekRev( 1 );
00103 if ( _test_begin() ) return;
00104 if ( _utf16_surrogate_follow( mIter[0] ) ) {
00105
00106
00107 code_point lead_half = 0;
00108 lead_half = mIter[-1];
00109 if ( _utf16_surrogate_lead( lead_half ) ) {
00110 _seekRev( 1 );
00111 }
00112 }
00113 }
00114
00115
00116
00117
00118 UString::_fwd_iterator::_fwd_iterator()
00119 {
00120
00121 }
00122
00123 UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i )
00124 {
00125 _become( i );
00126 }
00127
00128 UString::_fwd_iterator& UString::_fwd_iterator::operator++()
00129 {
00130 _seekFwd( 1 );
00131 return *this;
00132 }
00133
00134 UString::_fwd_iterator UString::_fwd_iterator::operator++( int )
00135 {
00136 _fwd_iterator tmp( *this );
00137 _seekFwd( 1 );
00138 return tmp;
00139 }
00140
00141 UString::_fwd_iterator& UString::_fwd_iterator::operator--()
00142 {
00143 _seekRev( 1 );
00144 return *this;
00145 }
00146
00147 UString::_fwd_iterator UString::_fwd_iterator::operator--( int )
00148 {
00149 _fwd_iterator tmp( *this );
00150 _seekRev( 1 );
00151 return tmp;
00152 }
00153
00154 UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n )
00155 {
00156 _fwd_iterator tmp( *this );
00157 if ( n < 0 )
00158 tmp._seekRev( -n );
00159 else
00160 tmp._seekFwd( n );
00161 return tmp;
00162 }
00163
00164 UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n )
00165 {
00166 _fwd_iterator tmp( *this );
00167 if ( n < 0 )
00168 tmp._seekFwd( -n );
00169 else
00170 tmp._seekRev( n );
00171 return tmp;
00172 }
00173
00174 UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n )
00175 {
00176 if ( n < 0 )
00177 _seekRev( -n );
00178 else
00179 _seekFwd( n );
00180 return *this;
00181 }
00182
00183 UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n )
00184 {
00185 if ( n < 0 )
00186 _seekFwd( -n );
00187 else
00188 _seekRev( n );
00189 return *this;
00190 }
00191
00192 UString::value_type& UString::_fwd_iterator::operator*() const
00193 {
00194 return *mIter;
00195 }
00196
00197 UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const
00198 {
00199 _fwd_iterator tmp( *this );
00200 tmp += n;
00201 return *tmp;
00202 }
00203
00204 UString::_fwd_iterator& UString::_fwd_iterator::moveNext()
00205 {
00206 _moveNext();
00207 return *this;
00208 }
00209
00210 UString::_fwd_iterator& UString::_fwd_iterator::movePrev()
00211 {
00212 _movePrev();
00213 return *this;
00214 }
00215
00216 UString::unicode_char UString::_fwd_iterator::getCharacter() const
00217 {
00218 return _getCharacter();
00219 }
00220
00221 int UString::_fwd_iterator::setCharacter( unicode_char uc )
00222 {
00223 return _setCharacter( uc );
00224 }
00225
00226
00227
00228
00229 UString::_const_fwd_iterator::_const_fwd_iterator()
00230 {
00231
00232 }
00233
00234 UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i )
00235 {
00236 _become( i );
00237 }
00238
00239 UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i )
00240 {
00241 _become( i );
00242 }
00243
00244 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++()
00245 {
00246 _seekFwd( 1 );
00247 return *this;
00248 }
00249
00250 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int )
00251 {
00252 _const_fwd_iterator tmp( *this );
00253 _seekFwd( 1 );
00254 return tmp;
00255 }
00256
00257 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--()
00258 {
00259 _seekRev( 1 );
00260 return *this;
00261 }
00262
00263 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int )
00264 {
00265 _const_fwd_iterator tmp( *this );
00266 _seekRev( 1 );
00267 return tmp;
00268 }
00269
00270 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n )
00271 {
00272 _const_fwd_iterator tmp( *this );
00273 if ( n < 0 )
00274 tmp._seekRev( -n );
00275 else
00276 tmp._seekFwd( n );
00277 return tmp;
00278 }
00279
00280 UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n )
00281 {
00282 _const_fwd_iterator tmp( *this );
00283 if ( n < 0 )
00284 tmp._seekFwd( -n );
00285 else
00286 tmp._seekRev( n );
00287 return tmp;
00288 }
00289
00290 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n )
00291 {
00292 if ( n < 0 )
00293 _seekRev( -n );
00294 else
00295 _seekFwd( n );
00296 return *this;
00297 }
00298
00299 UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n )
00300 {
00301 if ( n < 0 )
00302 _seekFwd( -n );
00303 else
00304 _seekRev( n );
00305 return *this;
00306 }
00307
00308 const UString::value_type& UString::_const_fwd_iterator::operator*() const
00309 {
00310 return *mIter;
00311 }
00312
00313 const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const
00314 {
00315 _const_fwd_iterator tmp( *this );
00316 tmp += n;
00317 return *tmp;
00318 }
00319
00320 UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext()
00321 {
00322 _moveNext();
00323 return *this;
00324 }
00325
00326 UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev()
00327 {
00328 _movePrev();
00329 return *this;
00330 }
00331
00332 UString::unicode_char UString::_const_fwd_iterator::getCharacter() const
00333 {
00334 return _getCharacter();
00335 }
00336
00337
00338
00339
00340 UString::_rev_iterator::_rev_iterator()
00341 {
00342
00343 }
00344
00345 UString::_rev_iterator::_rev_iterator( const _rev_iterator& i )
00346 {
00347 _become( i );
00348 }
00349
00350 UString::_rev_iterator& UString::_rev_iterator::operator++()
00351 {
00352 _seekRev( 1 );
00353 return *this;
00354 }
00355
00356 UString::_rev_iterator UString::_rev_iterator::operator++( int )
00357 {
00358 _rev_iterator tmp( *this );
00359 _seekRev( 1 );
00360 return tmp;
00361 }
00362
00363 UString::_rev_iterator& UString::_rev_iterator::operator--()
00364 {
00365 _seekFwd( 1 );
00366 return *this;
00367 }
00368
00369 UString::_rev_iterator UString::_rev_iterator::operator--( int )
00370 {
00371 _rev_iterator tmp( *this );
00372 _seekFwd( 1 );
00373 return tmp;
00374 }
00375
00376 UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n )
00377 {
00378 _rev_iterator tmp( *this );
00379 if ( n < 0 )
00380 tmp._seekFwd( -n );
00381 else
00382 tmp._seekRev( n );
00383 return tmp;
00384 }
00385
00386 UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n )
00387 {
00388 _rev_iterator tmp( *this );
00389 if ( n < 0 )
00390 tmp._seekRev( -n );
00391 else
00392 tmp._seekFwd( n );
00393 return tmp;
00394 }
00395
00396 UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n )
00397 {
00398 if ( n < 0 )
00399 _seekFwd( -n );
00400 else
00401 _seekRev( n );
00402 return *this;
00403 }
00404
00405 UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n )
00406 {
00407 if ( n < 0 )
00408 _seekRev( -n );
00409 else
00410 _seekFwd( n );
00411 return *this;
00412 }
00413
00414 UString::value_type& UString::_rev_iterator::operator*() const
00415 {
00416 return mIter[-1];
00417 }
00418
00419 UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const
00420 {
00421 _rev_iterator tmp( *this );
00422 tmp -= n;
00423 return *tmp;
00424 }
00425
00426
00427
00428
00429 UString::_const_rev_iterator::_const_rev_iterator()
00430 {
00431
00432 }
00433
00434 UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i )
00435 {
00436 _become( i );
00437 }
00438
00439 UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i )
00440 {
00441 _become( i );
00442 }
00443
00444 UString::_const_rev_iterator& UString::_const_rev_iterator::operator++()
00445 {
00446 _seekRev( 1 );
00447 return *this;
00448 }
00449
00450 UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int )
00451 {
00452 _const_rev_iterator tmp( *this );
00453 _seekRev( 1 );
00454 return tmp;
00455 }
00456
00457 UString::_const_rev_iterator& UString::_const_rev_iterator::operator--()
00458 {
00459 _seekFwd( 1 );
00460 return *this;
00461 }
00462
00463 UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int )
00464 {
00465 _const_rev_iterator tmp( *this );
00466 _seekFwd( 1 );
00467 return tmp;
00468 }
00469
00470 UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n )
00471 {
00472 _const_rev_iterator tmp( *this );
00473 if ( n < 0 )
00474 tmp._seekFwd( -n );
00475 else
00476 tmp._seekRev( n );
00477 return tmp;
00478 }
00479
00480 UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n )
00481 {
00482 _const_rev_iterator tmp( *this );
00483 if ( n < 0 )
00484 tmp._seekRev( -n );
00485 else
00486 tmp._seekFwd( n );
00487 return tmp;
00488 }
00489
00490 UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n )
00491 {
00492 if ( n < 0 )
00493 _seekFwd( -n );
00494 else
00495 _seekRev( n );
00496 return *this;
00497 }
00498
00499 UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n )
00500 {
00501 if ( n < 0 )
00502 _seekRev( -n );
00503 else
00504 _seekFwd( n );
00505 return *this;
00506 }
00507
00508 const UString::value_type& UString::_const_rev_iterator::operator*() const
00509 {
00510 return mIter[-1];
00511 }
00512
00513 const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const
00514 {
00515 _const_rev_iterator tmp( *this );
00516 tmp -= n;
00517 return *tmp;
00518 }
00519
00520
00521
00522
00523 UString::UString()
00524 {
00525 _init();
00526 }
00527
00528 UString::UString( const UString& copy )
00529 {
00530 _init();
00531 mData = copy.mData;
00532 }
00533
00534 UString::UString( size_type length, const code_point& ch )
00535 {
00536 _init();
00537 assign( length, ch );
00538 }
00539
00540 UString::UString( const code_point* str )
00541 {
00542 _init();
00543 assign( str );
00544 }
00545
00546 UString::UString( const code_point* str, size_type length )
00547 {
00548 _init();
00549 assign( str, length );
00550 }
00551
00552 UString::UString( const UString& str, size_type index, size_type length )
00553 {
00554 _init();
00555 assign( str, index, length );
00556 }
00557
00558 #if MYGUI_IS_NATIVE_WCHAR_T
00559 UString::UString( const wchar_t* w_str )
00560 {
00561 _init();
00562 assign( w_str );
00563 }
00564
00565 UString::UString( const wchar_t* w_str, size_type length )
00566 {
00567 _init();
00568 assign( w_str, length );
00569 }
00570 #endif
00571
00572 UString::UString( const std::wstring& wstr )
00573 {
00574 _init();
00575 assign( wstr );
00576 }
00577
00578 UString::UString( const char* c_str )
00579 {
00580 _init();
00581 assign( c_str );
00582 }
00583
00584 UString::UString( const char* c_str, size_type length )
00585 {
00586 _init();
00587 assign( c_str, length );
00588 }
00589
00590 UString::UString( const std::string& str )
00591 {
00592 _init();
00593 assign( str );
00594 }
00595
00596 UString::~UString()
00597 {
00598 _cleanBuffer();
00599 }
00600
00601 UString::size_type UString::size() const
00602 {
00603 return mData.size();
00604 }
00605
00606 UString::size_type UString::length() const
00607 {
00608 return size();
00609 }
00610
00611 UString::size_type UString::length_Characters() const
00612 {
00613 const_iterator i = begin(), ie = end();
00614 size_type c = 0;
00615 while ( i != ie ) {
00616 i.moveNext();
00617 ++c;
00618 }
00619 return c;
00620 }
00621
00622 UString::size_type UString::max_size() const
00623 {
00624 return mData.max_size();
00625 }
00626
00627 void UString::reserve( size_type size )
00628 {
00629 mData.reserve( size );
00630 }
00631
00632 void UString::resize( size_type num, const code_point& val )
00633 {
00634 mData.resize( num, val );
00635 }
00636
00637 void UString::swap( UString& from )
00638 {
00639 mData.swap( from.mData );
00640 }
00641
00642 bool UString::empty() const
00643 {
00644 return mData.empty();
00645 }
00646
00647 const UString::code_point* UString::c_str() const
00648 {
00649 return mData.c_str();
00650 }
00651
00652 const UString::code_point* UString::data() const
00653 {
00654 return c_str();
00655 }
00656
00657 UString::size_type UString::capacity() const
00658 {
00659 return mData.capacity();
00660 }
00661
00662 void UString::clear()
00663 {
00664 mData.clear();
00665 }
00666
00667 UString UString::substr( size_type index, size_type num ) const
00668 {
00669
00670 dstring data = mData.substr( index, num );
00671 UString tmp;
00672 tmp.mData.swap( data );
00673 return tmp;
00674 }
00675
00676 void UString::push_back( unicode_char val )
00677 {
00678 code_point cp[2];
00679 size_t c = _utf32_to_utf16( val, cp );
00680 if ( c > 0 ) push_back( cp[0] );
00681 if ( c > 1 ) push_back( cp[1] );
00682 }
00683
00684 #if MYGUI_IS_NATIVE_WCHAR_T
00685 void UString::push_back( wchar_t val )
00686 {
00687
00688 mData.push_back( static_cast<code_point>( val ) );
00689 }
00690 #endif
00691
00692 void UString::push_back( code_point val )
00693 {
00694 mData.push_back( val );
00695 }
00696
00697 void UString::push_back( char val )
00698 {
00699 mData.push_back( static_cast<code_point>( val ) );
00700 }
00701
00702 bool UString::inString( unicode_char ch ) const
00703 {
00704 const_iterator i, ie = end();
00705 for ( i = begin(); i != ie; i.moveNext() ) {
00706 if ( i.getCharacter() == ch )
00707 return true;
00708 }
00709 return false;
00710 }
00711
00712 const std::string& UString::asUTF8() const
00713 {
00714 _load_buffer_UTF8();
00715 return *m_buffer.mStrBuffer;
00716 }
00717
00718 const char* UString::asUTF8_c_str() const
00719 {
00720 _load_buffer_UTF8();
00721 return m_buffer.mStrBuffer->c_str();
00722 }
00723
00724 const UString::utf32string& UString::asUTF32() const
00725 {
00726 _load_buffer_UTF32();
00727 return *m_buffer.mUTF32StrBuffer;
00728 }
00729
00730 const UString::unicode_char* UString::asUTF32_c_str() const
00731 {
00732 _load_buffer_UTF32();
00733 return m_buffer.mUTF32StrBuffer->c_str();
00734 }
00735
00736 const std::wstring& UString::asWStr() const
00737 {
00738 _load_buffer_WStr();
00739 return *m_buffer.mWStrBuffer;
00740 }
00741
00742 const wchar_t* UString::asWStr_c_str() const
00743 {
00744 _load_buffer_WStr();
00745 return m_buffer.mWStrBuffer->c_str();
00746 }
00747
00748 UString::code_point& UString::at( size_type loc )
00749 {
00750 return mData.at( loc );
00751 }
00752
00753 const UString::code_point& UString::at( size_type loc ) const
00754 {
00755 return mData.at( loc );
00756 }
00757
00758 UString::unicode_char UString::getChar( size_type loc ) const
00759 {
00760 const code_point* ptr = c_str();
00761 unicode_char uc;
00762 size_t l = _utf16_char_length( ptr[loc] );
00763 code_point cp[2] = {
00764 0, 0
00765 };
00766 cp[0] = ptr[loc];
00767
00768 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
00769 cp[1] = ptr[loc+1];
00770 }
00771 _utf16_to_utf32( cp, uc );
00772 return uc;
00773 }
00774
00775 int UString::setChar( size_type loc, unicode_char ch )
00776 {
00777 code_point cp[2] = {
00778 0, 0
00779 };
00780 size_t l = _utf32_to_utf16( ch, cp );
00781 unicode_char existingChar = getChar( loc );
00782 size_t existingSize = _utf16_char_length( existingChar );
00783 size_t newSize = _utf16_char_length( ch );
00784
00785 if ( newSize > existingSize ) {
00786 at( loc ) = cp[0];
00787 insert( loc + 1, 1, cp[1] );
00788 return 1;
00789 }
00790 if ( newSize < existingSize ) {
00791 erase( loc, 1 );
00792 at( loc ) = cp[0];
00793 return -1;
00794 }
00795
00796
00797 at( loc ) = cp[0];
00798 if ( l == 2 ) at( loc + 1 ) = cp[1];
00799 return 0;
00800 }
00801
00802 UString::iterator UString::begin()
00803 {
00804 iterator i;
00805 i.mIter = mData.begin();
00806 i.mString = this;
00807 return i;
00808 }
00809
00810 UString::const_iterator UString::begin() const
00811 {
00812 const_iterator i;
00813 i.mIter = const_cast<UString*>( this )->mData.begin();
00814 i.mString = const_cast<UString*>( this );
00815 return i;
00816 }
00817
00818 UString::iterator UString::end()
00819 {
00820 iterator i;
00821 i.mIter = mData.end();
00822 i.mString = this;
00823 return i;
00824 }
00825
00826 UString::const_iterator UString::end() const
00827 {
00828 const_iterator i;
00829 i.mIter = const_cast<UString*>( this )->mData.end();
00830 i.mString = const_cast<UString*>( this );
00831 return i;
00832 }
00833
00834 UString::reverse_iterator UString::rbegin()
00835 {
00836 reverse_iterator i;
00837 i.mIter = mData.end();
00838 i.mString = this;
00839 return i;
00840 }
00841
00842 UString::const_reverse_iterator UString::rbegin() const
00843 {
00844 const_reverse_iterator i;
00845 i.mIter = const_cast<UString*>( this )->mData.end();
00846 i.mString = const_cast<UString*>( this );
00847 return i;
00848 }
00849
00850 UString::reverse_iterator UString::rend()
00851 {
00852 reverse_iterator i;
00853 i.mIter = mData.begin();
00854 i.mString = this;
00855 return i;
00856 }
00857
00858 UString::const_reverse_iterator UString::rend() const
00859 {
00860 const_reverse_iterator i;
00861 i.mIter = const_cast<UString*>( this )->mData.begin();
00862 i.mString = const_cast<UString*>( this );
00863 return i;
00864 }
00865
00866 UString& UString::assign( iterator start, iterator end )
00867 {
00868 mData.assign( start.mIter, end.mIter );
00869 return *this;
00870 }
00871
00872 UString& UString::assign( const UString& str )
00873 {
00874 mData.assign( str.mData );
00875 return *this;
00876 }
00877
00878 UString& UString::assign( const code_point* str )
00879 {
00880 mData.assign( str );
00881 return *this;
00882 }
00883
00884 UString& UString::assign( const code_point* str, size_type num )
00885 {
00886 mData.assign( str, num );
00887 return *this;
00888 }
00889
00890 UString& UString::assign( const UString& str, size_type index, size_type len )
00891 {
00892 mData.assign( str.mData, index, len );
00893 return *this;
00894 }
00895
00896 UString& UString::assign( size_type num, const code_point& ch )
00897 {
00898 mData.assign( num, ch );
00899 return *this;
00900 }
00901
00902 UString& UString::assign( const std::wstring& wstr )
00903 {
00904 mData.clear();
00905 mData.reserve( wstr.length() );
00906 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
00907 code_point tmp;
00908 std::wstring::const_iterator i, ie = wstr.end();
00909 for ( i = wstr.begin(); i != ie; i++ ) {
00910 tmp = static_cast<code_point>( *i );
00911 mData.push_back( tmp );
00912 }
00913 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
00914 code_point cp[3] = {0, 0, 0};
00915 unicode_char tmp;
00916 std::wstring::const_iterator i, ie = wstr.end();
00917 for ( i = wstr.begin(); i != ie; i++ ) {
00918 tmp = static_cast<unicode_char>( *i );
00919 size_t l = _utf32_to_utf16( tmp, cp );
00920 if ( l > 0 ) mData.push_back( cp[0] );
00921 if ( l > 1 ) mData.push_back( cp[1] );
00922 }
00923 #endif
00924 return *this;
00925 }
00926
00927 #if MYGUI_IS_NATIVE_WCHAR_T
00928 UString& UString::assign( const wchar_t* w_str )
00929 {
00930 std::wstring tmp;
00931 tmp.assign( w_str );
00932 return assign( tmp );
00933 }
00934
00935 UString& UString::assign( const wchar_t* w_str, size_type num )
00936 {
00937 std::wstring tmp;
00938 tmp.assign( w_str, num );
00939 return assign( tmp );
00940 }
00941 #endif
00942
00943 UString& UString::assign( const std::string& str )
00944 {
00945 size_type len = _verifyUTF8( str );
00946 clear();
00947 reserve( len );
00948
00949
00950
00951
00952 unicode_char uc;
00953 unsigned char utf8buf[7];
00954 utf8buf[6] = 0;
00955 size_t utf8len;
00956 code_point utf16buff[3];
00957 utf16buff[2] = 0;
00958 size_t utf16len;
00959
00960 std::string::const_iterator i, ie = str.end();
00961 for ( i = str.begin(); i != ie; i++ ) {
00962 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) );
00963 for ( size_t j = 0; j < utf8len; j++ ) {
00964 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) );
00965 }
00966 utf8buf[utf8len] = 0;
00967 utf8len = _utf8_to_utf32( utf8buf, uc );
00968 i += utf8len - 1;
00969
00970 utf16len = _utf32_to_utf16( uc, utf16buff );
00971 append( utf16buff, utf16len );
00972 }
00973 return *this;
00974 }
00975
00976 UString& UString::assign( const char* c_str )
00977 {
00978 std::string tmp( c_str );
00979 return assign( tmp );
00980 }
00981
00982 UString& UString::assign( const char* c_str, size_type num )
00983 {
00984 std::string tmp;
00985 tmp.assign( c_str, num );
00986 return assign( tmp );
00987 }
00988
00989 UString& UString::append( const UString& str )
00990 {
00991 mData.append( str.mData );
00992 return *this;
00993 }
00994
00995 UString& UString::append( const code_point* str )
00996 {
00997 mData.append( str );
00998 return *this;
00999 }
01000
01001 UString& UString::append( const UString& str, size_type index, size_type len )
01002 {
01003 mData.append( str.mData, index, len );
01004 return *this;
01005 }
01006
01007 UString& UString::append( const code_point* str, size_type num )
01008 {
01009 mData.append( str, num );
01010 return *this;
01011 }
01012
01013 UString& UString::append( size_type num, code_point ch )
01014 {
01015 mData.append( num, ch );
01016 return *this;
01017 }
01018
01019 UString& UString::append( iterator start, iterator end )
01020 {
01021 mData.append( start.mIter, end.mIter );
01022 return *this;
01023 }
01024
01025 #if MYGUI_IS_NATIVE_WCHAR_T
01026 UString& UString::append( const wchar_t* w_str, size_type num )
01027 {
01028 std::wstring tmp( w_str, num );
01029 return append( tmp );
01030 }
01031
01032 UString& UString::append( size_type num, wchar_t ch )
01033 {
01034 return append( num, static_cast<unicode_char>( ch ) );
01035 }
01036 #endif
01037 UString& UString::append( const char* c_str, size_type num )
01038 {
01039 UString tmp( c_str, num );
01040 append( tmp );
01041 return *this;
01042 }
01043
01044 UString& UString::append( size_type num, char ch )
01045 {
01046 append( num, static_cast<code_point>( ch ) );
01047 return *this;
01048 }
01049
01050 UString& UString::append( size_type num, unicode_char ch )
01051 {
01052 code_point cp[2] = {0, 0};
01053 if ( _utf32_to_utf16( ch, cp ) == 2 ) {
01054 for ( size_type i = 0; i < num; i++ ) {
01055 append( 1, cp[0] );
01056 append( 1, cp[1] );
01057 }
01058 } else {
01059 for ( size_type i = 0; i < num; i++ ) {
01060 append( 1, cp[0] );
01061 }
01062 }
01063 return *this;
01064 }
01065
01066 UString::iterator UString::insert( iterator i, const code_point& ch )
01067 {
01068 iterator ret;
01069 ret.mIter = mData.insert( i.mIter, ch );
01070 ret.mString = this;
01071 return ret;
01072 }
01073
01074 UString& UString::insert( size_type index, const UString& str )
01075 {
01076 mData.insert( index, str.mData );
01077 return *this;
01078 }
01079
01080 UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
01081 {
01082 mData.insert( index1, str.mData, index2, num );
01083 return *this;
01084 }
01085
01086 void UString::insert( iterator i, iterator start, iterator end )
01087 {
01088 mData.insert( i.mIter, start.mIter, end.mIter );
01089 }
01090
01091 UString& UString::insert( size_type index, const code_point* str, size_type num )
01092 {
01093 mData.insert( index, str, num );
01094 return *this;
01095 }
01096
01097 #if MYGUI_IS_NATIVE_WCHAR_T
01098 UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
01099 {
01100 UString tmp( w_str, num );
01101 insert( index, tmp );
01102 return *this;
01103 }
01104 #endif
01105
01106 UString& UString::insert( size_type index, const char* c_str, size_type num )
01107 {
01108 UString tmp( c_str, num );
01109 insert( index, tmp );
01110 return *this;
01111 }
01112
01113 UString& UString::insert( size_type index, size_type num, code_point ch )
01114 {
01115 mData.insert( index, num, ch );
01116 return *this;
01117 }
01118
01119 #if MYGUI_IS_NATIVE_WCHAR_T
01120 UString& UString::insert( size_type index, size_type num, wchar_t ch )
01121 {
01122 insert( index, num, static_cast<unicode_char>( ch ) );
01123 return *this;
01124 }
01125 #endif
01126
01127 UString& UString::insert( size_type index, size_type num, char ch )
01128 {
01129 insert( index, num, static_cast<code_point>( ch ) );
01130 return *this;
01131 }
01132
01133 UString& UString::insert( size_type index, size_type num, unicode_char ch )
01134 {
01135 code_point cp[3] = {0, 0, 0};
01136 size_t l = _utf32_to_utf16( ch, cp );
01137 if ( l == 1 ) {
01138 return insert( index, num, cp[0] );
01139 }
01140 for ( size_type c = 0; c < num; c++ ) {
01141
01142 insert( index, 1, cp[1] );
01143 insert( index, 1, cp[0] );
01144 }
01145 return *this;
01146 }
01147
01148 void UString::insert( iterator i, size_type num, const code_point& ch )
01149 {
01150 mData.insert( i.mIter, num, ch );
01151 }
01152 #if MYGUI_IS_NATIVE_WCHAR_T
01153 void UString::insert( iterator i, size_type num, const wchar_t& ch )
01154 {
01155 insert( i, num, static_cast<unicode_char>( ch ) );
01156 }
01157 #endif
01158
01159 void UString::insert( iterator i, size_type num, const char& ch )
01160 {
01161 insert( i, num, static_cast<code_point>( ch ) );
01162 }
01163
01164 void UString::insert( iterator i, size_type num, const unicode_char& ch )
01165 {
01166 code_point cp[3] = {0, 0, 0};
01167 size_t l = _utf32_to_utf16( ch, cp );
01168 if ( l == 1 ) {
01169 insert( i, num, cp[0] );
01170 } else {
01171 for ( size_type c = 0; c < num; c++ ) {
01172
01173 insert( i, 1, cp[1] );
01174 insert( i, 1, cp[0] );
01175 }
01176 }
01177 }
01178
01179 UString::iterator UString::erase( iterator loc )
01180 {
01181 iterator ret;
01182 ret.mIter = mData.erase( loc.mIter );
01183 ret.mString = this;
01184 return ret;
01185 }
01186
01187 UString::iterator UString::erase( iterator start, iterator end )
01188 {
01189 iterator ret;
01190 ret.mIter = mData.erase( start.mIter, end.mIter );
01191 ret.mString = this;
01192 return ret;
01193 }
01194
01195 UString& UString::erase( size_type index , size_type num )
01196 {
01197 if ( num == npos )
01198 mData.erase( index );
01199 else
01200 mData.erase( index, num );
01201 return *this;
01202 }
01203
01204 UString& UString::replace( size_type index1, size_type num1, const UString& str )
01205 {
01206 mData.replace( index1, num1, str.mData, 0, npos );
01207 return *this;
01208 }
01209
01210 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01211 {
01212 mData.replace( index1, num1, str.mData, 0, num2 );
01213 return *this;
01214 }
01215
01216 UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01217 {
01218 mData.replace( index1, num1, str.mData, index2, num2 );
01219 return *this;
01220 }
01221
01222 UString& UString::replace( iterator start, iterator end, const UString& str, size_type num )
01223 {
01224 _const_fwd_iterator st(start);
01225
01226 size_type index1 = begin() - st;
01227 size_type num1 = end - st;
01228 return replace( index1, num1, str, 0, num );
01229 }
01230
01231 UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch )
01232 {
01233 mData.replace( index, num1, num2, ch );
01234 return *this;
01235 }
01236
01237 UString& UString::replace( iterator start, iterator end, size_type num, code_point ch )
01238 {
01239 _const_fwd_iterator st(start);
01240
01241 size_type index1 = begin() - st;
01242 size_type num1 = end - st;
01243 return replace( index1, num1, num, ch );
01244 }
01245
01246 int UString::compare( const UString& str ) const
01247 {
01248 return mData.compare( str.mData );
01249 }
01250
01251 int UString::compare( const code_point* str ) const
01252 {
01253 return mData.compare( str );
01254 }
01255
01256 int UString::compare( size_type index, size_type length, const UString& str ) const
01257 {
01258 return mData.compare( index, length, str.mData );
01259 }
01260
01261 int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01262 {
01263 return mData.compare( index, length, str.mData, index2, length2 );
01264 }
01265
01266 int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01267 {
01268 return mData.compare( index, length, str, length2 );
01269 }
01270
01271 #if MYGUI_IS_NATIVE_WCHAR_T
01272 int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01273 {
01274 UString tmp( w_str, length2 );
01275 return compare( index, length, tmp );
01276 }
01277 #endif
01278
01279 int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01280 {
01281 UString tmp( c_str, length2 );
01282 return compare( index, length, tmp );
01283 }
01284
01285 UString::size_type UString::find( const UString& str, size_type index ) const
01286 {
01287 return mData.find( str.c_str(), index );
01288 }
01289
01290 UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const
01291 {
01292 UString tmp( cp_str );
01293 return mData.find( tmp.c_str(), index, length );
01294 }
01295
01296 UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const
01297 {
01298 UString tmp( c_str );
01299 return mData.find( tmp.c_str(), index, length );
01300 }
01301
01302 #if MYGUI_IS_NATIVE_WCHAR_T
01303 UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
01304 {
01305 UString tmp( w_str );
01306 return mData.find( tmp.c_str(), index, length );
01307 }
01308 #endif
01309
01310 UString::size_type UString::find( char ch, size_type index ) const
01311 {
01312 return find( static_cast<code_point>( ch ), index );
01313 }
01314
01315 UString::size_type UString::find( code_point ch, size_type index ) const
01316 {
01317 return mData.find( ch, index );
01318 }
01319
01320 #if MYGUI_IS_NATIVE_WCHAR_T
01321 UString::size_type UString::find( wchar_t ch, size_type index ) const
01322 {
01323 return find( static_cast<unicode_char>( ch ), index );
01324 }
01325 #endif
01326
01327 UString::size_type UString::find( unicode_char ch, size_type index ) const
01328 {
01329 code_point cp[3] = {0, 0, 0};
01330 size_t l = _utf32_to_utf16( ch, cp );
01331 return find( UString( cp, l ), index );
01332 }
01333
01334 UString::size_type UString::rfind( const UString& str, size_type index ) const
01335 {
01336 return mData.rfind( str.c_str(), index );
01337 }
01338
01339 UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const
01340 {
01341 UString tmp( cp_str );
01342 return mData.rfind( tmp.c_str(), index, num );
01343 }
01344
01345 UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
01346 {
01347 UString tmp( c_str );
01348 return mData.rfind( tmp.c_str(), index, num );
01349 }
01350
01351 #if MYGUI_IS_NATIVE_WCHAR_T
01352 UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
01353 {
01354 UString tmp( w_str );
01355 return mData.rfind( tmp.c_str(), index, num );
01356 }
01357 #endif
01358
01359 UString::size_type UString::rfind( char ch, size_type index ) const
01360 {
01361 return rfind( static_cast<code_point>( ch ), index );
01362 }
01363
01364 UString::size_type UString::rfind( code_point ch, size_type index ) const
01365 {
01366 return mData.rfind( ch, index );
01367 }
01368
01369 #if MYGUI_IS_NATIVE_WCHAR_T
01370 UString::size_type UString::rfind( wchar_t ch, size_type index ) const
01371 {
01372 return rfind( static_cast<unicode_char>( ch ), index );
01373 }
01374 #endif
01375
01376 UString::size_type UString::rfind( unicode_char ch, size_type index ) const
01377 {
01378 code_point cp[3] = {0, 0, 0};
01379 size_t l = _utf32_to_utf16( ch, cp );
01380 return rfind( UString( cp, l ), index );
01381 }
01382
01383 UString::size_type UString::find_first_of( const UString &str, size_type index , size_type num ) const
01384 {
01385 size_type i = 0;
01386 const size_type len = length();
01387 while ( i < num && ( index + i ) < len ) {
01388 unicode_char ch = getChar( index + i );
01389 if ( str.inString( ch ) )
01390 return index + i;
01391 i += _utf16_char_length( ch );
01392 }
01393 return npos;
01394 }
01395
01396 UString::size_type UString::find_first_of( code_point ch, size_type index ) const
01397 {
01398 UString tmp;
01399 tmp.assign( 1, ch );
01400 return find_first_of( tmp, index );
01401 }
01402
01403 UString::size_type UString::find_first_of( char ch, size_type index ) const
01404 {
01405 return find_first_of( static_cast<code_point>( ch ), index );
01406 }
01407
01408 #if MYGUI_IS_NATIVE_WCHAR_T
01409 UString::size_type UString::find_first_of( wchar_t ch, size_type index ) const
01410 {
01411 return find_first_of( static_cast<unicode_char>( ch ), index );
01412 }
01413 #endif
01414
01415 UString::size_type UString::find_first_of( unicode_char ch, size_type index ) const
01416 {
01417 code_point cp[3] = {0, 0, 0};
01418 size_t l = _utf32_to_utf16( ch, cp );
01419 return find_first_of( UString( cp, l ), index );
01420 }
01421
01422 UString::size_type UString::find_first_not_of( const UString& str, size_type index , size_type num ) const
01423 {
01424 size_type i = 0;
01425 const size_type len = length();
01426 while ( i < num && ( index + i ) < len ) {
01427 unicode_char ch = getChar( index + i );
01428 if ( !str.inString( ch ) )
01429 return index + i;
01430 i += _utf16_char_length( ch );
01431 }
01432 return npos;
01433 }
01434
01435 UString::size_type UString::find_first_not_of( code_point ch, size_type index ) const
01436 {
01437 UString tmp;
01438 tmp.assign( 1, ch );
01439 return find_first_not_of( tmp, index );
01440 }
01441
01442 UString::size_type UString::find_first_not_of( char ch, size_type index ) const
01443 {
01444 return find_first_not_of( static_cast<code_point>( ch ), index );
01445 }
01446
01447 #if MYGUI_IS_NATIVE_WCHAR_T
01448 UString::size_type UString::find_first_not_of( wchar_t ch, size_type index ) const
01449 {
01450 return find_first_not_of( static_cast<unicode_char>( ch ), index );
01451 }
01452 #endif
01453
01454 UString::size_type UString::find_first_not_of( unicode_char ch, size_type index ) const
01455 {
01456 code_point cp[3] = {0, 0, 0};
01457 size_t l = _utf32_to_utf16( ch, cp );
01458 return find_first_not_of( UString( cp, l ), index );
01459 }
01460
01461 UString::size_type UString::find_last_of( const UString& str, size_type index , size_type num ) const
01462 {
01463 size_type i = 0;
01464 const size_type len = length();
01465 if ( index > len ) index = len - 1;
01466
01467 while ( i < num && ( index - i ) != npos ) {
01468 size_type j = index - i;
01469
01470 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01471 j = index - ++i;
01472 }
01473
01474 unicode_char ch = getChar( j );
01475 if ( str.inString( ch ) )
01476 return j;
01477 i++;
01478 }
01479 return npos;
01480 }
01481
01482 UString::size_type UString::find_last_of( code_point ch, size_type index ) const
01483 {
01484 UString tmp;
01485 tmp.assign( 1, ch );
01486 return find_last_of( tmp, index );
01487 }
01488
01489 #if MYGUI_IS_NATIVE_WCHAR_T
01490 UString::size_type UString::find_last_of( wchar_t ch, size_type index ) const
01491 {
01492 return find_last_of( static_cast<unicode_char>( ch ), index );
01493 }
01494 #endif
01495
01496 UString::size_type UString::find_last_of( unicode_char ch, size_type index ) const
01497 {
01498 code_point cp[3] = {0, 0, 0};
01499 size_t l = _utf32_to_utf16( ch, cp );
01500 return find_last_of( UString( cp, l ), index );
01501 }
01502
01503 UString::size_type UString::find_last_not_of( const UString& str, size_type index , size_type num ) const
01504 {
01505 size_type i = 0;
01506 const size_type len = length();
01507 if ( index > len ) index = len - 1;
01508
01509 while ( i < num && ( index - i ) != npos ) {
01510 size_type j = index - i;
01511
01512 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01513 j = index - ++i;
01514 }
01515
01516 unicode_char ch = getChar( j );
01517 if ( !str.inString( ch ) )
01518 return j;
01519 i++;
01520 }
01521 return npos;
01522 }
01523
01524 UString::size_type UString::find_last_not_of( code_point ch, size_type index ) const
01525 {
01526 UString tmp;
01527 tmp.assign( 1, ch );
01528 return find_last_not_of( tmp, index );
01529 }
01530
01531 UString::size_type UString::find_last_not_of( char ch, size_type index ) const
01532 {
01533 return find_last_not_of( static_cast<code_point>( ch ), index );
01534 }
01535
01536 #if MYGUI_IS_NATIVE_WCHAR_T
01537 UString::size_type UString::find_last_not_of( wchar_t ch, size_type index ) const
01538 {
01539 return find_last_not_of( static_cast<unicode_char>( ch ), index );
01540 }
01541 #endif
01542
01543 UString::size_type UString::find_last_not_of( unicode_char ch, size_type index ) const
01544 {
01545 code_point cp[3] = {0, 0, 0};
01546 size_t l = _utf32_to_utf16( ch, cp );
01547 return find_last_not_of( UString( cp, l ), index );
01548 }
01549
01550 bool UString::operator<( const UString& right ) const
01551 {
01552 return compare( right ) < 0;
01553 }
01554
01555 bool UString::operator<=( const UString& right ) const
01556 {
01557 return compare( right ) <= 0;
01558 }
01559
01560 UString& UString::operator=( const UString& s )
01561 {
01562 return assign( s );
01563 }
01564
01565 UString& UString::operator=( code_point ch )
01566 {
01567 clear();
01568 return append( 1, ch );
01569 }
01570
01571 UString& UString::operator=( char ch )
01572 {
01573 clear();
01574 return append( 1, ch );
01575 }
01576
01577 #if MYGUI_IS_NATIVE_WCHAR_T
01578 UString& UString::operator=( wchar_t ch )
01579 {
01580 clear();
01581 return append( 1, ch );
01582 }
01583 #endif
01584
01585 UString& UString::operator=( unicode_char ch )
01586 {
01587 clear();
01588 return append( 1, ch );
01589 }
01590
01591 bool UString::operator>( const UString& right ) const
01592 {
01593 return compare( right ) > 0;
01594 }
01595
01596 bool UString::operator>=( const UString& right ) const
01597 {
01598 return compare( right ) >= 0;
01599 }
01600
01601 bool UString::operator==( const UString& right ) const
01602 {
01603 return compare( right ) == 0;
01604 }
01605
01606 bool UString::operator!=( const UString& right ) const
01607 {
01608 return !operator==( right );
01609 }
01610
01611 UString::code_point& UString::operator[]( size_type index )
01612 {
01613 return at( index );
01614 }
01615
01616 const UString::code_point& UString::operator[]( size_type index ) const
01617 {
01618 return at( index );
01619 }
01620
01621 UString::operator std::string() const
01622 {
01623 return std::string( asUTF8() );
01624 }
01625
01627 UString::operator std::wstring() const
01628 {
01629 return std::wstring( asWStr() );
01630 }
01631
01632
01633 bool UString::_utf16_independent_char( code_point cp )
01634 {
01635 if ( 0xD800 <= cp && cp <= 0xDFFF )
01636 return false;
01637 return true;
01638 }
01639
01640 bool UString::_utf16_surrogate_lead( code_point cp )
01641 {
01642 if ( 0xD800 <= cp && cp <= 0xDBFF )
01643 return true;
01644 return false;
01645 }
01646
01647 bool UString::_utf16_surrogate_follow( code_point cp )
01648 {
01649 if ( 0xDC00 <= cp && cp <= 0xDFFF )
01650 return true;
01651 return false;
01652 }
01653
01654 size_t UString::_utf16_char_length( code_point cp )
01655 {
01656 if ( 0xD800 <= cp && cp <= 0xDBFF )
01657 return 2;
01658 return 1;
01659 }
01660
01661 size_t UString::_utf16_char_length( unicode_char uc )
01662 {
01663 if ( uc > 0xFFFF )
01664 return 2;
01665 return 1;
01666 }
01667
01668 size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
01669 {
01670 const code_point& cp1 = in_cp[0];
01671 const code_point& cp2 = in_cp[1];
01672 bool wordPair = false;
01673
01674
01675 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
01676
01677 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
01678 wordPair = true;
01679 }
01680
01681 if ( !wordPair ) {
01682 out_uc = cp1;
01683 return 1;
01684 }
01685
01686 unsigned short cU = cp1, cL = cp2;
01687 cU -= 0xD800;
01688 cL -= 0xDC00;
01689
01690 out_uc = ( cU & 0x03FF ) << 10;
01691 out_uc |= ( cL & 0x03FF );
01692 out_uc += 0x10000;
01693
01694 return 2;
01695 }
01696
01697 size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
01698 {
01699 if ( in_uc <= 0xFFFF ) {
01700 out_cp[0] = static_cast<code_point>(in_uc);
01701 return 1;
01702 }
01703 unicode_char uc = in_uc;
01704 unsigned short tmp;
01705 uc -= 0x10000;
01706
01707
01708 tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
01709 tmp += 0xD800;
01710 out_cp[0] = tmp;
01711
01712
01713 tmp = static_cast<unsigned short>(uc & 0x03FF);
01714 tmp += 0xDC00;
01715 out_cp[1] = tmp;
01716
01717 return 2;
01718 }
01719
01720 bool UString::_utf8_start_char( unsigned char cp )
01721 {
01722 return ( cp & ~_cont_mask ) != _cont;
01723 }
01724
01725 size_t UString::_utf8_char_length( unsigned char cp )
01726 {
01727 if ( !( cp & 0x80 ) ) return 1;
01728 if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
01729 if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
01730 if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
01731 if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
01732 if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
01733 throw invalid_data( "invalid UTF-8 sequence header value" );
01734 }
01735
01736 size_t UString::_utf8_char_length( unicode_char uc )
01737 {
01738
01739
01740
01741
01742
01743
01744
01745
01746 if ( !( uc & ~0x0000007F ) ) return 1;
01747 if ( !( uc & ~0x000007FF ) ) return 2;
01748 if ( !( uc & ~0x0000FFFF ) ) return 3;
01749 if ( !( uc & ~0x001FFFFF ) ) return 4;
01750 if ( !( uc & ~0x03FFFFFF ) ) return 5;
01751 if ( !( uc & ~0x7FFFFFFF ) ) return 6;
01752 throw invalid_data( "invalid UTF-32 value" );
01753 }
01754
01755 size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
01756 {
01757 size_t len = _utf8_char_length( in_cp[0] );
01758 if ( len == 1 ) {
01759 out_uc = in_cp[0];
01760 return 1;
01761 }
01762
01763 unicode_char c = 0;
01764 size_t i = 0;
01765 switch ( len ) {
01766 case 6:
01767 c = in_cp[i] & _lead5_mask;
01768 break;
01769 case 5:
01770 c = in_cp[i] & _lead4_mask;
01771 break;
01772 case 4:
01773 c = in_cp[i] & _lead3_mask;
01774 break;
01775 case 3:
01776 c = in_cp[i] & _lead2_mask;
01777 break;
01778 case 2:
01779 c = in_cp[i] & _lead1_mask;
01780 break;
01781 }
01782
01783 for ( ++i; i < len; i++ ) {
01784 if (( in_cp[i] & ~_cont_mask ) != _cont )
01785 throw invalid_data( "bad UTF-8 continuation byte" );
01786 c <<= 6;
01787 c |= ( in_cp[i] & _cont_mask );
01788 }
01789
01790 out_uc = c;
01791 return len;
01792 }
01793
01794 size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
01795 {
01796 size_t len = _utf8_char_length( in_uc );
01797 unicode_char c = in_uc;
01798
01799
01800 for ( size_t i = len - 1; i > 0; i-- ) {
01801 out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
01802 c >>= 6;
01803 }
01804
01805
01806 switch ( len ) {
01807 case 6:
01808 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
01809 break;
01810 case 5:
01811 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
01812 break;
01813 case 4:
01814 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
01815 break;
01816 case 3:
01817 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
01818 break;
01819 case 2:
01820 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
01821 break;
01822 case 1:
01823 default:
01824 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
01825 break;
01826 }
01827
01828
01829 return len;
01830 }
01831
01832 UString::size_type UString::_verifyUTF8( const unsigned char* c_str )
01833 {
01834 std::string tmp( reinterpret_cast<const char*>( c_str ) );
01835 return _verifyUTF8( tmp );
01836 }
01837
01838 UString::size_type UString::_verifyUTF8( const std::string& str )
01839 {
01840 std::string::const_iterator i, ie = str.end();
01841 i = str.begin();
01842 size_type length = 0;
01843
01844 while ( i != ie ) {
01845
01846 if (( *i ) & 0x80 ) {
01847 unsigned char c = ( *i );
01848 size_t contBytes = 0;
01849
01850
01851 if (( c & ~_lead1_mask ) == _lead1 ) {
01852 if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
01853 contBytes = 1;
01854
01855 } else if (( c & ~_lead2_mask ) == _lead2 ) {
01856 contBytes = 2;
01857 if ( c == _lead2 ) {
01858 c = ( *( i + 1 ) );
01859 if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01860 }
01861
01862 } else if (( c & ~_lead3_mask ) == _lead3 ) {
01863 contBytes = 3;
01864 if ( c == _lead3 ) {
01865 c = ( *( i + 1 ) );
01866 if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01867 }
01868
01869 } else if (( c & ~_lead4_mask ) == _lead4 ) {
01870 contBytes = 4;
01871 if ( c == _lead4 ) {
01872 c = ( *( i + 1 ) );
01873 if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01874 }
01875
01876 } else if (( c & ~_lead5_mask ) == _lead5 ) {
01877 contBytes = 5;
01878 if ( c == _lead5 ) {
01879 c = ( *( i + 1 ) );
01880 if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
01881 }
01882 }
01883
01884
01885 while ( contBytes-- ) {
01886 c = ( *( ++i ) );
01887 if (( c & ~_cont_mask ) != _cont )
01888 throw invalid_data( "bad UTF-8 continuation byte" );
01889 }
01890 }
01891 length++;
01892 i++;
01893 }
01894 return length;
01895 }
01896
01897 void UString::_init()
01898 {
01899 m_buffer.mVoidBuffer = 0;
01900 m_bufferType = bt_none;
01901 m_bufferSize = 0;
01902 }
01903
01904 void UString::_cleanBuffer() const
01905 {
01906 if ( m_buffer.mVoidBuffer != 0 ) {
01907 switch ( m_bufferType ) {
01908 case bt_string:
01909 delete m_buffer.mStrBuffer;
01910 break;
01911 case bt_wstring:
01912 delete m_buffer.mWStrBuffer;
01913 break;
01914 case bt_utf32string:
01915 delete m_buffer.mUTF32StrBuffer;
01916 break;
01917 case bt_none:
01918 default:
01919
01920
01921 assert("This should never happen - mVoidBuffer should never contain something if we "
01922 "don't know the type");
01923 break;
01924 }
01925 m_buffer.mVoidBuffer = 0;
01926 m_bufferSize = 0;
01927 m_bufferType = bt_none;
01928 }
01929 }
01930
01931 void UString::_getBufferStr() const
01932 {
01933 if ( m_bufferType != bt_string ) {
01934 _cleanBuffer();
01935 m_buffer.mStrBuffer = new std::string();
01936 m_bufferType = bt_string;
01937 }
01938 m_buffer.mStrBuffer->clear();
01939 }
01940
01941 void UString::_getBufferWStr() const
01942 {
01943 if ( m_bufferType != bt_wstring ) {
01944 _cleanBuffer();
01945 m_buffer.mWStrBuffer = new std::wstring();
01946 m_bufferType = bt_wstring;
01947 }
01948 m_buffer.mWStrBuffer->clear();
01949 }
01950
01951 void UString::_getBufferUTF32Str() const
01952 {
01953 if ( m_bufferType != bt_utf32string ) {
01954 _cleanBuffer();
01955 m_buffer.mUTF32StrBuffer = new utf32string();
01956 m_bufferType = bt_utf32string;
01957 }
01958 m_buffer.mUTF32StrBuffer->clear();
01959 }
01960
01961 void UString::_load_buffer_UTF8() const
01962 {
01963 _getBufferStr();
01964 std::string& buffer = ( *m_buffer.mStrBuffer );
01965 buffer.reserve( length() );
01966
01967 unsigned char utf8buf[6];
01968 char* charbuf = ( char* )utf8buf;
01969 unicode_char c;
01970 size_t len;
01971
01972 const_iterator i, ie = end();
01973 for ( i = begin(); i != ie; i.moveNext() ) {
01974 c = i.getCharacter();
01975 len = _utf32_to_utf8( c, utf8buf );
01976 size_t j = 0;
01977 while ( j < len )
01978 buffer.push_back( charbuf[j++] );
01979 }
01980 }
01981
01982 void UString::_load_buffer_WStr() const
01983 {
01984 _getBufferWStr();
01985 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
01986 buffer.reserve( length() );
01987 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
01988 const_iterator i, ie = end();
01989 for ( i = begin(); i != ie; ++i ) {
01990 buffer.push_back(( wchar_t )( *i ) );
01991 }
01992 #else // wchar_t fits UTF-32
01993 unicode_char c;
01994 const_iterator i, ie = end();
01995 for ( i = begin(); i != ie; i.moveNext() ) {
01996 c = i.getCharacter();
01997 buffer.push_back(( wchar_t )c );
01998 }
01999 #endif
02000 }
02001
02002 void UString::_load_buffer_UTF32() const
02003 {
02004 _getBufferUTF32Str();
02005 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02006 buffer.reserve( length() );
02007
02008 unicode_char c;
02009
02010 const_iterator i, ie = end();
02011 for ( i = begin(); i != ie; i.moveNext() ) {
02012 c = i.getCharacter();
02013 buffer.push_back( c );
02014 }
02015 }
02016
02017 }