MyGUI  3.2.1
MyGUI_UString.cpp
Go to the documentation of this file.
00001 /*
00002  * This source file is part of MyGUI. For the latest info, see http://mygui.info/
00003  * Distributed under the MIT License
00004  * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
00005  */
00006 
00007 #include "MyGUI_Precompiled.h"
00008 #include "MyGUI_UString.h"
00009 
00010 namespace MyGUI
00011 {
00012 
00013     //--------------------------------------------------------------------------
00014     UString::_base_iterator::_base_iterator()
00015     {
00016         mString = 0;
00017     }
00018     //--------------------------------------------------------------------------
00019     void UString::_base_iterator::_seekFwd( size_type c )
00020     {
00021         mIter += c;
00022     }
00023     //--------------------------------------------------------------------------
00024     void UString::_base_iterator::_seekRev( size_type c )
00025     {
00026         mIter -= c;
00027     }
00028     //--------------------------------------------------------------------------
00029     void UString::_base_iterator::_become( const _base_iterator& i )
00030     {
00031         mIter = i.mIter;
00032         mString = i.mString;
00033     }
00034     //--------------------------------------------------------------------------
00035     bool UString::_base_iterator::_test_begin() const
00036     {
00037         return mIter == mString->mData.begin();
00038     }
00039     //--------------------------------------------------------------------------
00040     bool UString::_base_iterator::_test_end() const
00041     {
00042         return mIter == mString->mData.end();
00043     }
00044     //--------------------------------------------------------------------------
00045     UString::size_type UString::_base_iterator::_get_index() const
00046     {
00047         return mIter - mString->mData.begin();
00048     }
00049     //--------------------------------------------------------------------------
00050     void UString::_base_iterator::_jump_to( size_type index )
00051     {
00052         mIter = mString->mData.begin() + index;
00053     }
00054     //--------------------------------------------------------------------------
00055     UString::unicode_char UString::_base_iterator::_getCharacter() const
00056     {
00057         size_type current_index = _get_index();
00058         return mString->getChar( current_index );
00059     }
00060     //--------------------------------------------------------------------------
00061     int UString::_base_iterator::_setCharacter( unicode_char uc )
00062     {
00063         size_type current_index = _get_index();
00064         int change = mString->setChar( current_index, uc );
00065         _jump_to( current_index );
00066         return change;
00067     }
00068     //--------------------------------------------------------------------------
00069     void UString::_base_iterator::_moveNext()
00070     {
00071         _seekFwd( 1 ); // move 1 code point forward
00072         if ( _test_end() ) return; // exit if we hit the end
00073         if ( _utf16_surrogate_follow( mIter[0] ) ) {
00074             // landing on a follow code point means we might be part of a bigger character
00075             // so we test for that
00076             code_point lead_half = 0;
00077             //NB: we can't possibly be at the beginning here, so no need to test
00078             lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
00079             if ( _utf16_surrogate_lead( lead_half ) ) {
00080                 _seekFwd( 1 ); // if so, then advance 1 more code point
00081             }
00082         }
00083     }
00084     //--------------------------------------------------------------------------
00085     void UString::_base_iterator::_movePrev()
00086     {
00087         _seekRev( 1 ); // move 1 code point backwards
00088         if ( _test_begin() ) return; // exit if we hit the beginning
00089         if ( _utf16_surrogate_follow( mIter[0] ) ) {
00090             // landing on a follow code point means we might be part of a bigger character
00091             // so we test for that
00092             code_point lead_half = 0;
00093             lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
00094             if ( _utf16_surrogate_lead( lead_half ) ) {
00095                 _seekRev( 1 ); // if so, then rewind 1 more code point
00096             }
00097         }
00098     }
00099     //--------------------------------------------------------------------------
00100     //--------------------------------------------------------------------------
00101     //--------------------------------------------------------------------------
00102     //--------------------------------------------------------------------------
00103     UString::_fwd_iterator::_fwd_iterator()
00104     {
00105 
00106     }
00107     //--------------------------------------------------------------------------
00108     UString::_fwd_iterator::_fwd_iterator( const _fwd_iterator& i )
00109     {
00110         _become( i );
00111     }
00112     //--------------------------------------------------------------------------
00113     UString::_fwd_iterator& UString::_fwd_iterator::operator++()
00114     {
00115         _seekFwd( 1 );
00116         return *this;
00117     }
00118     //--------------------------------------------------------------------------
00119     UString::_fwd_iterator UString::_fwd_iterator::operator++( int )
00120     {
00121         _fwd_iterator tmp( *this );
00122         _seekFwd( 1 );
00123         return tmp;
00124     }
00125     //--------------------------------------------------------------------------
00126     UString::_fwd_iterator& UString::_fwd_iterator::operator--()
00127     {
00128         _seekRev( 1 );
00129         return *this;
00130     }
00131     //--------------------------------------------------------------------------
00132     UString::_fwd_iterator UString::_fwd_iterator::operator--( int )
00133     {
00134         _fwd_iterator tmp( *this );
00135         _seekRev( 1 );
00136         return tmp;
00137     }
00138     //--------------------------------------------------------------------------
00139     UString::_fwd_iterator UString::_fwd_iterator::operator+( difference_type n )
00140     {
00141         _fwd_iterator tmp( *this );
00142         if ( n < 0 )
00143             tmp._seekRev( -n );
00144         else
00145             tmp._seekFwd( n );
00146         return tmp;
00147     }
00148     //--------------------------------------------------------------------------
00149     UString::_fwd_iterator UString::_fwd_iterator::operator-( difference_type n )
00150     {
00151         _fwd_iterator tmp( *this );
00152         if ( n < 0 )
00153             tmp._seekFwd( -n );
00154         else
00155             tmp._seekRev( n );
00156         return tmp;
00157     }
00158     //--------------------------------------------------------------------------
00159     UString::_fwd_iterator& UString::_fwd_iterator::operator+=( difference_type n )
00160     {
00161         if ( n < 0 )
00162             _seekRev( -n );
00163         else
00164             _seekFwd( n );
00165         return *this;
00166     }
00167     //--------------------------------------------------------------------------
00168     UString::_fwd_iterator& UString::_fwd_iterator::operator-=( difference_type n )
00169     {
00170         if ( n < 0 )
00171             _seekFwd( -n );
00172         else
00173             _seekRev( n );
00174         return *this;
00175     }
00176     //--------------------------------------------------------------------------
00177     UString::value_type& UString::_fwd_iterator::operator*() const
00178     {
00179         return *mIter;
00180     }
00181     //--------------------------------------------------------------------------
00182     UString::value_type& UString::_fwd_iterator::operator[]( difference_type n ) const
00183     {
00184         _fwd_iterator tmp( *this );
00185         tmp += n;
00186         return *tmp;
00187     }
00188     //--------------------------------------------------------------------------
00189     UString::_fwd_iterator& UString::_fwd_iterator::moveNext()
00190     {
00191         _moveNext();
00192         return *this;
00193     }
00194     //--------------------------------------------------------------------------
00195     UString::_fwd_iterator& UString::_fwd_iterator::movePrev()
00196     {
00197         _movePrev();
00198         return *this;
00199     }
00200     //--------------------------------------------------------------------------
00201     UString::unicode_char UString::_fwd_iterator::getCharacter() const
00202     {
00203         return _getCharacter();
00204     }
00205     //--------------------------------------------------------------------------
00206     int UString::_fwd_iterator::setCharacter( unicode_char uc )
00207     {
00208         return _setCharacter( uc );
00209     }
00210     //--------------------------------------------------------------------------
00211     //--------------------------------------------------------------------------
00212     //--------------------------------------------------------------------------
00213     //--------------------------------------------------------------------------
00214     UString::_const_fwd_iterator::_const_fwd_iterator()
00215     {
00216 
00217     }
00218     //--------------------------------------------------------------------------
00219     UString::_const_fwd_iterator::_const_fwd_iterator( const _const_fwd_iterator& i )
00220     {
00221         _become( i );
00222     }
00223     //--------------------------------------------------------------------------
00224     UString::_const_fwd_iterator::_const_fwd_iterator( const _fwd_iterator& i )
00225     {
00226         _become( i );
00227     }
00228     //--------------------------------------------------------------------------
00229     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator++()
00230     {
00231         _seekFwd( 1 );
00232         return *this;
00233     }
00234     //--------------------------------------------------------------------------
00235     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator++( int )
00236     {
00237         _const_fwd_iterator tmp( *this );
00238         _seekFwd( 1 );
00239         return tmp;
00240     }
00241     //--------------------------------------------------------------------------
00242     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator--()
00243     {
00244         _seekRev( 1 );
00245         return *this;
00246     }
00247     //--------------------------------------------------------------------------
00248     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator--( int )
00249     {
00250         _const_fwd_iterator tmp( *this );
00251         _seekRev( 1 );
00252         return tmp;
00253     }
00254     //--------------------------------------------------------------------------
00255     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator+( difference_type n )
00256     {
00257         _const_fwd_iterator tmp( *this );
00258         if ( n < 0 )
00259             tmp._seekRev( -n );
00260         else
00261             tmp._seekFwd( n );
00262         return tmp;
00263     }
00264     //--------------------------------------------------------------------------
00265     UString::_const_fwd_iterator UString::_const_fwd_iterator::operator-( difference_type n )
00266     {
00267         _const_fwd_iterator tmp( *this );
00268         if ( n < 0 )
00269             tmp._seekFwd( -n );
00270         else
00271             tmp._seekRev( n );
00272         return tmp;
00273     }
00274     //--------------------------------------------------------------------------
00275     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator+=( difference_type n )
00276     {
00277         if ( n < 0 )
00278             _seekRev( -n );
00279         else
00280             _seekFwd( n );
00281         return *this;
00282     }
00283     //--------------------------------------------------------------------------
00284     UString::_const_fwd_iterator& UString::_const_fwd_iterator::operator-=( difference_type n )
00285     {
00286         if ( n < 0 )
00287             _seekFwd( -n );
00288         else
00289             _seekRev( n );
00290         return *this;
00291     }
00292     //--------------------------------------------------------------------------
00293     const UString::value_type& UString::_const_fwd_iterator::operator*() const
00294     {
00295         return *mIter;
00296     }
00297     //--------------------------------------------------------------------------
00298     const UString::value_type& UString::_const_fwd_iterator::operator[]( difference_type n ) const
00299     {
00300         _const_fwd_iterator tmp( *this );
00301         tmp += n;
00302         return *tmp;
00303     }
00304     //--------------------------------------------------------------------------
00305     UString::_const_fwd_iterator& UString::_const_fwd_iterator::moveNext()
00306     {
00307         _moveNext();
00308         return *this;
00309     }
00310     //--------------------------------------------------------------------------
00311     UString::_const_fwd_iterator& UString::_const_fwd_iterator::movePrev()
00312     {
00313         _movePrev();
00314         return *this;
00315     }
00316     //--------------------------------------------------------------------------
00317     UString::unicode_char UString::_const_fwd_iterator::getCharacter() const
00318     {
00319         return _getCharacter();
00320     }
00321     //--------------------------------------------------------------------------
00322     //--------------------------------------------------------------------------
00323     //--------------------------------------------------------------------------
00324     //--------------------------------------------------------------------------
00325     UString::_rev_iterator::_rev_iterator()
00326     {
00327 
00328     }
00329     //--------------------------------------------------------------------------
00330     UString::_rev_iterator::_rev_iterator( const _rev_iterator& i )
00331     {
00332         _become( i );
00333     }
00334     //--------------------------------------------------------------------------
00335     UString::_rev_iterator& UString::_rev_iterator::operator++()
00336     {
00337         _seekRev( 1 );
00338         return *this;
00339     }
00340     //--------------------------------------------------------------------------
00341     UString::_rev_iterator UString::_rev_iterator::operator++( int )
00342     {
00343         _rev_iterator tmp( *this );
00344         _seekRev( 1 );
00345         return tmp;
00346     }
00347     //--------------------------------------------------------------------------
00348     UString::_rev_iterator& UString::_rev_iterator::operator--()
00349     {
00350         _seekFwd( 1 );
00351         return *this;
00352     }
00353     //--------------------------------------------------------------------------
00354     UString::_rev_iterator UString::_rev_iterator::operator--( int )
00355     {
00356         _rev_iterator tmp( *this );
00357         _seekFwd( 1 );
00358         return tmp;
00359     }
00360     //--------------------------------------------------------------------------
00361     UString::_rev_iterator UString::_rev_iterator::operator+( difference_type n )
00362     {
00363         _rev_iterator tmp( *this );
00364         if ( n < 0 )
00365             tmp._seekFwd( -n );
00366         else
00367             tmp._seekRev( n );
00368         return tmp;
00369     }
00370     //--------------------------------------------------------------------------
00371     UString::_rev_iterator UString::_rev_iterator::operator-( difference_type n )
00372     {
00373         _rev_iterator tmp( *this );
00374         if ( n < 0 )
00375             tmp._seekRev( -n );
00376         else
00377             tmp._seekFwd( n );
00378         return tmp;
00379     }
00380     //--------------------------------------------------------------------------
00381     UString::_rev_iterator& UString::_rev_iterator::operator+=( difference_type n )
00382     {
00383         if ( n < 0 )
00384             _seekFwd( -n );
00385         else
00386             _seekRev( n );
00387         return *this;
00388     }
00389     //--------------------------------------------------------------------------
00390     UString::_rev_iterator& UString::_rev_iterator::operator-=( difference_type n )
00391     {
00392         if ( n < 0 )
00393             _seekRev( -n );
00394         else
00395             _seekFwd( n );
00396         return *this;
00397     }
00398     //--------------------------------------------------------------------------
00399     UString::value_type& UString::_rev_iterator::operator*() const
00400     {
00401         return mIter[-1];
00402     }
00403     //--------------------------------------------------------------------------
00404     UString::value_type& UString::_rev_iterator::operator[]( difference_type n ) const
00405     {
00406         _rev_iterator tmp( *this );
00407         tmp -= n;
00408         return *tmp;
00409     }
00410     //--------------------------------------------------------------------------
00411     //--------------------------------------------------------------------------
00412     //--------------------------------------------------------------------------
00413     //--------------------------------------------------------------------------
00414     UString::_const_rev_iterator::_const_rev_iterator()
00415     {
00416 
00417     }
00418     //--------------------------------------------------------------------------
00419     UString::_const_rev_iterator::_const_rev_iterator( const _const_rev_iterator& i )
00420     {
00421         _become( i );
00422     }
00423     //--------------------------------------------------------------------------
00424     UString::_const_rev_iterator::_const_rev_iterator( const _rev_iterator& i )
00425     {
00426         _become( i );
00427     }
00428     //--------------------------------------------------------------------------
00429     UString::_const_rev_iterator& UString::_const_rev_iterator::operator++()
00430     {
00431         _seekRev( 1 );
00432         return *this;
00433     }
00434     //--------------------------------------------------------------------------
00435     UString::_const_rev_iterator UString::_const_rev_iterator::operator++( int )
00436     {
00437         _const_rev_iterator tmp( *this );
00438         _seekRev( 1 );
00439         return tmp;
00440     }
00441     //--------------------------------------------------------------------------
00442     UString::_const_rev_iterator& UString::_const_rev_iterator::operator--()
00443     {
00444         _seekFwd( 1 );
00445         return *this;
00446     }
00447     //--------------------------------------------------------------------------
00448     UString::_const_rev_iterator UString::_const_rev_iterator::operator--( int )
00449     {
00450         _const_rev_iterator tmp( *this );
00451         _seekFwd( 1 );
00452         return tmp;
00453     }
00454     //--------------------------------------------------------------------------
00455     UString::_const_rev_iterator UString::_const_rev_iterator::operator+( difference_type n )
00456     {
00457         _const_rev_iterator tmp( *this );
00458         if ( n < 0 )
00459             tmp._seekFwd( -n );
00460         else
00461             tmp._seekRev( n );
00462         return tmp;
00463     }
00464     //--------------------------------------------------------------------------
00465     UString::_const_rev_iterator UString::_const_rev_iterator::operator-( difference_type n )
00466     {
00467         _const_rev_iterator tmp( *this );
00468         if ( n < 0 )
00469             tmp._seekRev( -n );
00470         else
00471             tmp._seekFwd( n );
00472         return tmp;
00473     }
00474     //--------------------------------------------------------------------------
00475     UString::_const_rev_iterator& UString::_const_rev_iterator::operator+=( difference_type n )
00476     {
00477         if ( n < 0 )
00478             _seekFwd( -n );
00479         else
00480             _seekRev( n );
00481         return *this;
00482     }
00483     //--------------------------------------------------------------------------
00484     UString::_const_rev_iterator& UString::_const_rev_iterator::operator-=( difference_type n )
00485     {
00486         if ( n < 0 )
00487             _seekRev( -n );
00488         else
00489             _seekFwd( n );
00490         return *this;
00491     }
00492     //--------------------------------------------------------------------------
00493     const UString::value_type& UString::_const_rev_iterator::operator*() const
00494     {
00495         return mIter[-1];
00496     }
00497     //--------------------------------------------------------------------------
00498     const UString::value_type& UString::_const_rev_iterator::operator[]( difference_type n ) const
00499     {
00500         _const_rev_iterator tmp( *this );
00501         tmp -= n;
00502         return *tmp;
00503     }
00504     //--------------------------------------------------------------------------
00505     //--------------------------------------------------------------------------
00506     //--------------------------------------------------------------------------
00507     //--------------------------------------------------------------------------
00508     UString::UString()
00509     {
00510         _init();
00511     }
00512     //--------------------------------------------------------------------------
00513     UString::UString( const UString& copy )
00514     {
00515         _init();
00516         mData = copy.mData;
00517     }
00518     //--------------------------------------------------------------------------
00519     UString::UString( size_type length, const code_point& ch )
00520     {
00521         _init();
00522         assign( length, ch );
00523     }
00524     //--------------------------------------------------------------------------
00525     UString::UString( const code_point* str )
00526     {
00527         _init();
00528         assign( str );
00529     }
00530     //--------------------------------------------------------------------------
00531     UString::UString( const code_point* str, size_type length )
00532     {
00533         _init();
00534         assign( str, length );
00535     }
00536     //--------------------------------------------------------------------------
00537     UString::UString( const UString& str, size_type index, size_type length )
00538     {
00539         _init();
00540         assign( str, index, length );
00541     }
00542     //--------------------------------------------------------------------------
00543 #if MYGUI_IS_NATIVE_WCHAR_T
00544     UString::UString( const wchar_t* w_str )
00545     {
00546         _init();
00547         assign( w_str );
00548     }
00549     //--------------------------------------------------------------------------
00550     UString::UString( const wchar_t* w_str, size_type length )
00551     {
00552         _init();
00553         assign( w_str, length );
00554     }
00555 #endif
00556     //--------------------------------------------------------------------------
00557     UString::UString( const std::wstring& wstr )
00558     {
00559         _init();
00560         assign( wstr );
00561     }
00562     //--------------------------------------------------------------------------
00563     UString::UString( const char* c_str )
00564     {
00565         _init();
00566         assign( c_str );
00567     }
00568     //--------------------------------------------------------------------------
00569     UString::UString( const char* c_str, size_type length )
00570     {
00571         _init();
00572         assign( c_str, length );
00573     }
00574     //--------------------------------------------------------------------------
00575     UString::UString( const std::string& str )
00576     {
00577         _init();
00578         assign( str );
00579     }
00580     //--------------------------------------------------------------------------
00581     UString::~UString()
00582     {
00583         _cleanBuffer();
00584     }
00585     //--------------------------------------------------------------------------
00586     UString::size_type UString::size() const
00587     {
00588         return mData.size();
00589     }
00590     //--------------------------------------------------------------------------
00591     UString::size_type UString::length() const
00592     {
00593         return size();
00594     }
00595     //--------------------------------------------------------------------------
00596     UString::size_type UString::length_Characters() const
00597     {
00598         const_iterator i = begin(), ie = end();
00599         size_type c = 0;
00600         while ( i != ie ) {
00601             i.moveNext();
00602             ++c;
00603         }
00604         return c;
00605     }
00606     //--------------------------------------------------------------------------
00607     UString::size_type UString::max_size() const
00608     {
00609         return mData.max_size();
00610     }
00611     //--------------------------------------------------------------------------
00612     void UString::reserve( size_type size )
00613     {
00614         mData.reserve( size );
00615     }
00616     //--------------------------------------------------------------------------
00617     void UString::resize( size_type num, const code_point& val /*= 0 */ )
00618     {
00619         mData.resize( num, val );
00620     }
00621     //--------------------------------------------------------------------------
00622     void UString::swap( UString& from )
00623     {
00624         mData.swap( from.mData );
00625     }
00626     //--------------------------------------------------------------------------
00627     bool UString::empty() const
00628     {
00629         return mData.empty();
00630     }
00631     //--------------------------------------------------------------------------
00632     const UString::code_point* UString::c_str() const
00633     {
00634         return mData.c_str();
00635     }
00636     //--------------------------------------------------------------------------
00637     const UString::code_point* UString::data() const
00638     {
00639         return c_str();
00640     }
00641     //--------------------------------------------------------------------------
00642     UString::size_type UString::capacity() const
00643     {
00644         return mData.capacity();
00645     }
00646     //--------------------------------------------------------------------------
00647     void UString::clear()
00648     {
00649         mData.clear();
00650     }
00651     //--------------------------------------------------------------------------
00652     UString UString::substr( size_type index, size_type num /*= npos */ ) const
00653     {
00654         // this could avoid the extra copy if we used a private specialty constructor
00655         dstring data = mData.substr( index, num );
00656         UString tmp;
00657         tmp.mData.swap( data );
00658         return tmp;
00659     }
00660     //--------------------------------------------------------------------------
00661     void UString::push_back( unicode_char val )
00662     {
00663         code_point cp[2];
00664         size_t c = _utf32_to_utf16( val, cp );
00665         if ( c > 0 ) push_back( cp[0] );
00666         if ( c > 1 ) push_back( cp[1] );
00667     }
00668     //--------------------------------------------------------------------------
00669 #if MYGUI_IS_NATIVE_WCHAR_T
00670     void UString::push_back( wchar_t val )
00671     {
00672         // we do this because the Unicode method still preserves UTF-16 code points
00673         mData.push_back( static_cast<code_point>( val ) );
00674     }
00675 #endif
00676     //--------------------------------------------------------------------------
00677     void UString::push_back( code_point val )
00678     {
00679         mData.push_back( val );
00680     }
00681 
00682     void UString::push_back( char val )
00683     {
00684         mData.push_back( static_cast<code_point>( val ) );
00685     }
00686 
00687     bool UString::inString( unicode_char ch ) const
00688     {
00689         const_iterator i, ie = end();
00690         for ( i = begin(); i != ie; i.moveNext() ) {
00691             if ( i.getCharacter() == ch )
00692                 return true;
00693         }
00694         return false;
00695     }
00696 
00697     const std::string& UString::asUTF8() const
00698     {
00699         _load_buffer_UTF8();
00700         return *m_buffer.mStrBuffer;
00701     }
00702 
00703     const char* UString::asUTF8_c_str() const
00704     {
00705         _load_buffer_UTF8();
00706         return m_buffer.mStrBuffer->c_str();
00707     }
00708 
00709     const UString::utf32string& UString::asUTF32() const
00710     {
00711         _load_buffer_UTF32();
00712         return *m_buffer.mUTF32StrBuffer;
00713     }
00714 
00715     const UString::unicode_char* UString::asUTF32_c_str() const
00716     {
00717         _load_buffer_UTF32();
00718         return m_buffer.mUTF32StrBuffer->c_str();
00719     }
00720 
00721     const std::wstring& UString::asWStr() const
00722     {
00723         _load_buffer_WStr();
00724         return *m_buffer.mWStrBuffer;
00725     }
00726 
00727     const wchar_t* UString::asWStr_c_str() const
00728     {
00729         _load_buffer_WStr();
00730         return m_buffer.mWStrBuffer->c_str();
00731     }
00732 
00733     UString::code_point& UString::at( size_type loc )
00734     {
00735         return mData.at( loc );
00736     }
00737 
00738     const UString::code_point& UString::at( size_type loc ) const
00739     {
00740         return mData.at( loc );
00741     }
00742 
00743     UString::unicode_char UString::getChar( size_type loc ) const
00744     {
00745         const code_point* ptr = c_str();
00746         unicode_char uc;
00747         size_t l = _utf16_char_length( ptr[loc] );
00748         code_point cp[2] = { /* blame the code beautifier */
00749             0, 0
00750         };
00751         cp[0] = ptr[loc];
00752 
00753         if ( l == 2 && ( loc + 1 ) < mData.length() ) {
00754             cp[1] = ptr[loc+1];
00755         }
00756         _utf16_to_utf32( cp, uc );
00757         return uc;
00758     }
00759 
00760     int UString::setChar( size_type loc, unicode_char ch )
00761     {
00762         code_point cp[2] = { /* blame the code beautifier */
00763             0, 0
00764         };
00765         size_t l = _utf32_to_utf16( ch, cp );
00766         unicode_char existingChar = getChar( loc );
00767         size_t existingSize = _utf16_char_length( existingChar );
00768         size_t newSize = _utf16_char_length( ch );
00769 
00770         if ( newSize > existingSize ) {
00771             at( loc ) = cp[0];
00772             insert( loc + 1, 1, cp[1] );
00773             return 1;
00774         }
00775         if ( newSize < existingSize ) {
00776             erase( loc, 1 );
00777             at( loc ) = cp[0];
00778             return -1;
00779         }
00780 
00781         // newSize == existingSize
00782         at( loc ) = cp[0];
00783         if ( l == 2 ) at( loc + 1 ) = cp[1];
00784         return 0;
00785     }
00786 
00787     UString::iterator UString::begin()
00788     {
00789         iterator i;
00790         i.mIter = mData.begin();
00791         i.mString = this;
00792         return i;
00793     }
00794 
00795     UString::const_iterator UString::begin() const
00796     {
00797         const_iterator i;
00798         i.mIter = const_cast<UString*>( this )->mData.begin();
00799         i.mString = const_cast<UString*>( this );
00800         return i;
00801     }
00802 
00803     UString::iterator UString::end()
00804     {
00805         iterator i;
00806         i.mIter = mData.end();
00807         i.mString = this;
00808         return i;
00809     }
00810 
00811     UString::const_iterator UString::end() const
00812     {
00813         const_iterator i;
00814         i.mIter = const_cast<UString*>( this )->mData.end();
00815         i.mString = const_cast<UString*>( this );
00816         return i;
00817     }
00818 
00819     UString::reverse_iterator UString::rbegin()
00820     {
00821         reverse_iterator i;
00822         i.mIter = mData.end();
00823         i.mString = this;
00824         return i;
00825     }
00826 
00827     UString::const_reverse_iterator UString::rbegin() const
00828     {
00829         const_reverse_iterator i;
00830         i.mIter = const_cast<UString*>( this )->mData.end();
00831         i.mString = const_cast<UString*>( this );
00832         return i;
00833     }
00834 
00835     UString::reverse_iterator UString::rend()
00836     {
00837         reverse_iterator i;
00838         i.mIter = mData.begin();
00839         i.mString = this;
00840         return i;
00841     }
00842 
00843     UString::const_reverse_iterator UString::rend() const
00844     {
00845         const_reverse_iterator i;
00846         i.mIter = const_cast<UString*>( this )->mData.begin();
00847         i.mString = const_cast<UString*>( this );
00848         return i;
00849     }
00850 
00851     UString& UString::assign( iterator start, iterator end )
00852     {
00853         mData.assign( start.mIter, end.mIter );
00854         return *this;
00855     }
00856 
00857     UString& UString::assign( const UString& str )
00858     {
00859         mData.assign( str.mData );
00860         return *this;
00861     }
00862 
00863     UString& UString::assign( const code_point* str )
00864     {
00865         mData.assign( str );
00866         return *this;
00867     }
00868 
00869     UString& UString::assign( const code_point* str, size_type num )
00870     {
00871         mData.assign( str, num );
00872         return *this;
00873     }
00874 
00875     UString& UString::assign( const UString& str, size_type index, size_type len )
00876     {
00877         mData.assign( str.mData, index, len );
00878         return *this;
00879     }
00880 
00881     UString& UString::assign( size_type num, const code_point& ch )
00882     {
00883         mData.assign( num, ch );
00884         return *this;
00885     }
00886 
00887     UString& UString::assign( const std::wstring& wstr )
00888     {
00889         mData.clear();
00890         mData.reserve( wstr.length() ); // best guess bulk allocate
00891 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
00892         code_point tmp;
00893         std::wstring::const_iterator i, ie = wstr.end();
00894         for ( i = wstr.begin(); i != ie; i++ ) {
00895             tmp = static_cast<code_point>( *i );
00896             mData.push_back( tmp );
00897         }
00898 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
00899         code_point cp[3] = {0, 0, 0};
00900         unicode_char tmp;
00901         std::wstring::const_iterator i, ie = wstr.end();
00902         for ( i = wstr.begin(); i != ie; i++ ) {
00903             tmp = static_cast<unicode_char>( *i );
00904             size_t l = _utf32_to_utf16( tmp, cp );
00905             if ( l > 0 ) mData.push_back( cp[0] );
00906             if ( l > 1 ) mData.push_back( cp[1] );
00907         }
00908 #endif
00909         return *this;
00910     }
00911 
00912 #if MYGUI_IS_NATIVE_WCHAR_T
00913     UString& UString::assign( const wchar_t* w_str )
00914     {
00915         std::wstring tmp;
00916         tmp.assign( w_str );
00917         return assign( tmp );
00918     }
00919 
00920     UString& UString::assign( const wchar_t* w_str, size_type num )
00921     {
00922         std::wstring tmp;
00923         tmp.assign( w_str, num );
00924         return assign( tmp );
00925     }
00926 #endif
00927 
00928     UString& UString::assign( const std::string& str )
00929     {
00930         size_type len = _verifyUTF8( str );
00931         clear(); // empty our contents, if there are any
00932         reserve( len ); // best guess bulk capacity growth
00933 
00934         // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
00935         // then converting it to UTF-16, then finally appending the data buffer
00936 
00937         unicode_char uc;          // temporary Unicode character buffer
00938         unsigned char utf8buf[7]; // temporary UTF-8 buffer
00939         utf8buf[6] = 0;
00940         size_t utf8len;           // UTF-8 length
00941         code_point utf16buff[3];  // temporary UTF-16 buffer
00942         utf16buff[2] = 0;
00943         size_t utf16len;          // UTF-16 length
00944 
00945         std::string::const_iterator i, ie = str.end();
00946         for ( i = str.begin(); i != ie; i++ ) {
00947             utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
00948             for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
00949                 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
00950             }
00951             utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
00952             utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
00953             i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
00954 
00955             utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
00956             append( utf16buff, utf16len ); // append the characters to the string
00957         }
00958         return *this;
00959     }
00960 
00961     UString& UString::assign( const char* c_str )
00962     {
00963         std::string tmp( c_str );
00964         return assign( tmp );
00965     }
00966 
00967     UString& UString::assign( const char* c_str, size_type num )
00968     {
00969         std::string tmp;
00970         tmp.assign( c_str, num );
00971         return assign( tmp );
00972     }
00973 
00974     UString& UString::append( const UString& str )
00975     {
00976         mData.append( str.mData );
00977         return *this;
00978     }
00979 
00980     UString& UString::append( const code_point* str )
00981     {
00982         mData.append( str );
00983         return *this;
00984     }
00985 
00986     UString& UString::append( const UString& str, size_type index, size_type len )
00987     {
00988         mData.append( str.mData, index, len );
00989         return *this;
00990     }
00991 
00992     UString& UString::append( const code_point* str, size_type num )
00993     {
00994         mData.append( str, num );
00995         return *this;
00996     }
00997 
00998     UString& UString::append( size_type num, code_point ch )
00999     {
01000         mData.append( num, ch );
01001         return *this;
01002     }
01003 
01004     UString& UString::append( iterator start, iterator end )
01005     {
01006         mData.append( start.mIter, end.mIter );
01007         return *this;
01008     }
01009 
01010 #if MYGUI_IS_NATIVE_WCHAR_T
01011     UString& UString::append( const wchar_t* w_str, size_type num )
01012     {
01013         std::wstring tmp( w_str, num );
01014         return append( tmp );
01015     }
01016 
01017     UString& UString::append( size_type num, wchar_t ch )
01018     {
01019         return append( num, static_cast<unicode_char>( ch ) );
01020     }
01021 #endif
01022     UString& UString::append( const char* c_str, size_type num )
01023     {
01024         UString tmp( c_str, num );
01025         append( tmp );
01026         return *this;
01027     }
01028 
01029     UString& UString::append( size_type num, char ch )
01030     {
01031         append( num, static_cast<code_point>( ch ) );
01032         return *this;
01033     }
01034 
01035     UString& UString::append( size_type num, unicode_char ch )
01036     {
01037         code_point cp[2] = {0, 0};
01038         if ( _utf32_to_utf16( ch, cp ) == 2 ) {
01039             for ( size_type i = 0; i < num; i++ ) {
01040                 append( 1, cp[0] );
01041                 append( 1, cp[1] );
01042             }
01043         } else {
01044             for ( size_type i = 0; i < num; i++ ) {
01045                 append( 1, cp[0] );
01046             }
01047         }
01048         return *this;
01049     }
01050 
01051     UString::iterator UString::insert( iterator i, const code_point& ch )
01052     {
01053         iterator ret;
01054         ret.mIter = mData.insert( i.mIter, ch );
01055         ret.mString = this;
01056         return ret;
01057     }
01058 
01059     UString& UString::insert( size_type index, const UString& str )
01060     {
01061         mData.insert( index, str.mData );
01062         return *this;
01063     }
01064 
01065     UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
01066     {
01067         mData.insert( index1, str.mData, index2, num );
01068         return *this;
01069     }
01070 
01071     void UString::insert( iterator i, iterator start, iterator end )
01072     {
01073         mData.insert( i.mIter, start.mIter, end.mIter );
01074     }
01075 
01076     UString& UString::insert( size_type index, const code_point* str, size_type num )
01077     {
01078         mData.insert( index, str, num );
01079         return *this;
01080     }
01081 
01082 #if MYGUI_IS_NATIVE_WCHAR_T
01083     UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
01084     {
01085         UString tmp( w_str, num );
01086         insert( index, tmp );
01087         return *this;
01088     }
01089 #endif
01090 
01091     UString& UString::insert( size_type index, const char* c_str, size_type num )
01092     {
01093         UString tmp( c_str, num );
01094         insert( index, tmp );
01095         return *this;
01096     }
01097 
01098     UString& UString::insert( size_type index, size_type num, code_point ch )
01099     {
01100         mData.insert( index, num, ch );
01101         return *this;
01102     }
01103 
01104 #if MYGUI_IS_NATIVE_WCHAR_T
01105     UString& UString::insert( size_type index, size_type num, wchar_t ch )
01106     {
01107         insert( index, num, static_cast<unicode_char>( ch ) );
01108         return *this;
01109     }
01110 #endif
01111 
01112     UString& UString::insert( size_type index, size_type num, char ch )
01113     {
01114         insert( index, num, static_cast<code_point>( ch ) );
01115         return *this;
01116     }
01117 
01118     UString& UString::insert( size_type index, size_type num, unicode_char ch )
01119     {
01120         code_point cp[3] = {0, 0, 0};
01121         size_t l = _utf32_to_utf16( ch, cp );
01122         if ( l == 1 ) {
01123             return insert( index, num, cp[0] );
01124         }
01125         for ( size_type c = 0; c < num; c++ ) {
01126             // insert in reverse order to preserve ordering after insert
01127             insert( index, 1, cp[1] );
01128             insert( index, 1, cp[0] );
01129         }
01130         return *this;
01131     }
01132 
01133     void UString::insert( iterator i, size_type num, const code_point& ch )
01134     {
01135         mData.insert( i.mIter, num, ch );
01136     }
01137 #if MYGUI_IS_NATIVE_WCHAR_T
01138     void UString::insert( iterator i, size_type num, const wchar_t& ch )
01139     {
01140         insert( i, num, static_cast<unicode_char>( ch ) );
01141     }
01142 #endif
01143 
01144     void UString::insert( iterator i, size_type num, const char& ch )
01145     {
01146         insert( i, num, static_cast<code_point>( ch ) );
01147     }
01148 
01149     void UString::insert( iterator i, size_type num, const unicode_char& ch )
01150     {
01151         code_point cp[3] = {0, 0, 0};
01152         size_t l = _utf32_to_utf16( ch, cp );
01153         if ( l == 1 ) {
01154             insert( i, num, cp[0] );
01155         } else {
01156             for ( size_type c = 0; c < num; c++ ) {
01157                 // insert in reverse order to preserve ordering after insert
01158                 insert( i, 1, cp[1] );
01159                 insert( i, 1, cp[0] );
01160             }
01161         }
01162     }
01163 
01164     UString::iterator UString::erase( iterator loc )
01165     {
01166         iterator ret;
01167         ret.mIter = mData.erase( loc.mIter );
01168         ret.mString = this;
01169         return ret;
01170     }
01171 
01172     UString::iterator UString::erase( iterator start, iterator end )
01173     {
01174         iterator ret;
01175         ret.mIter = mData.erase( start.mIter, end.mIter );
01176         ret.mString = this;
01177         return ret;
01178     }
01179 
01180     UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
01181     {
01182         if ( num == npos )
01183             mData.erase( index );
01184         else
01185             mData.erase( index, num );
01186         return *this;
01187     }
01188 
01189     UString& UString::replace( size_type index1, size_type num1, const UString& str )
01190     {
01191         mData.replace( index1, num1, str.mData, 0, npos );
01192         return *this;
01193     }
01194 
01195     UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
01196     {
01197         mData.replace( index1, num1, str.mData, 0, num2 );
01198         return *this;
01199     }
01200 
01201     UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
01202     {
01203         mData.replace( index1, num1, str.mData, index2, num2 );
01204         return *this;
01205     }
01206 
01207     UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
01208     {
01209         _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01210 
01211         size_type index1 = begin() - st;
01212         size_type num1 = end - st;
01213         return replace( index1, num1, str, 0, num );
01214     }
01215 
01216     UString& UString::replace( size_type index, size_type num1, size_type num2, code_point ch )
01217     {
01218         mData.replace( index, num1, num2, ch );
01219         return *this;
01220     }
01221 
01222     UString& UString::replace( iterator start, iterator end, size_type num, code_point ch )
01223     {
01224         _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01225 
01226         size_type index1 = begin() - st;
01227         size_type num1 = end - st;
01228         return replace( index1, num1, num, ch );
01229     }
01230 
01231     int UString::compare( const UString& str ) const
01232     {
01233         return mData.compare( str.mData );
01234     }
01235 
01236     int UString::compare( const code_point* str ) const
01237     {
01238         return mData.compare( str );
01239     }
01240 
01241     int UString::compare( size_type index, size_type length, const UString& str ) const
01242     {
01243         return mData.compare( index, length, str.mData );
01244     }
01245 
01246     int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
01247     {
01248         return mData.compare( index, length, str.mData, index2, length2 );
01249     }
01250 
01251     int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
01252     {
01253         return mData.compare( index, length, str, length2 );
01254     }
01255 
01256 #if MYGUI_IS_NATIVE_WCHAR_T
01257     int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
01258     {
01259         UString tmp( w_str, length2 );
01260         return compare( index, length, tmp );
01261     }
01262 #endif
01263 
01264     int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
01265     {
01266         UString tmp( c_str, length2 );
01267         return compare( index, length, tmp );
01268     }
01269 
01270     UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
01271     {
01272         return mData.find( str.c_str(), index );
01273     }
01274 
01275     UString::size_type UString::find( const code_point* cp_str, size_type index, size_type length ) const
01276     {
01277         UString tmp( cp_str );
01278         return mData.find( tmp.c_str(), index, length );
01279     }
01280 
01281     UString::size_type UString::find( const char* c_str, size_type index, size_type length ) const
01282     {
01283         UString tmp( c_str );
01284         return mData.find( tmp.c_str(), index, length );
01285     }
01286 
01287 #if MYGUI_IS_NATIVE_WCHAR_T
01288     UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
01289     {
01290         UString tmp( w_str );
01291         return mData.find( tmp.c_str(), index, length );
01292     }
01293 #endif
01294 
01295     UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
01296     {
01297         return find( static_cast<code_point>( ch ), index );
01298     }
01299 
01300     UString::size_type UString::find( code_point ch, size_type index /*= 0 */ ) const
01301     {
01302         return mData.find( ch, index );
01303     }
01304 
01305 #if MYGUI_IS_NATIVE_WCHAR_T
01306     UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
01307     {
01308         return find( static_cast<unicode_char>( ch ), index );
01309     }
01310 #endif
01311 
01312     UString::size_type UString::find( unicode_char ch, size_type index /*= 0 */ ) const
01313     {
01314         code_point cp[3] = {0, 0, 0};
01315         size_t l = _utf32_to_utf16( ch, cp );
01316         return find( UString( cp, l ), index );
01317     }
01318 
01319     UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
01320     {
01321         return mData.rfind( str.c_str(), index );
01322     }
01323 
01324     UString::size_type UString::rfind( const code_point* cp_str, size_type index, size_type num ) const
01325     {
01326         UString tmp( cp_str );
01327         return mData.rfind( tmp.c_str(), index, num );
01328     }
01329 
01330     UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
01331     {
01332         UString tmp( c_str );
01333         return mData.rfind( tmp.c_str(), index, num );
01334     }
01335 
01336 #if MYGUI_IS_NATIVE_WCHAR_T
01337     UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
01338     {
01339         UString tmp( w_str );
01340         return mData.rfind( tmp.c_str(), index, num );
01341     }
01342 #endif
01343 
01344     UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
01345     {
01346         return rfind( static_cast<code_point>( ch ), index );
01347     }
01348 
01349     UString::size_type UString::rfind( code_point ch, size_type index ) const
01350     {
01351         return mData.rfind( ch, index );
01352     }
01353 
01354 #if MYGUI_IS_NATIVE_WCHAR_T
01355     UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
01356     {
01357         return rfind( static_cast<unicode_char>( ch ), index );
01358     }
01359 #endif
01360 
01361     UString::size_type UString::rfind( unicode_char ch, size_type index /*= 0 */ ) const
01362     {
01363         code_point cp[3] = {0, 0, 0};
01364         size_t l = _utf32_to_utf16( ch, cp );
01365         return rfind( UString( cp, l ), index );
01366     }
01367 
01368     UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
01369     {
01370         size_type i = 0;
01371         const size_type len = length();
01372         while ( i < num && ( index + i ) < len ) {
01373             unicode_char ch = getChar( index + i );
01374             if ( str.inString( ch ) )
01375                 return index + i;
01376             i += _utf16_char_length( ch ); // increment by the Unicode character length
01377         }
01378         return npos;
01379     }
01380 
01381     UString::size_type UString::find_first_of( code_point ch, size_type index /*= 0 */ ) const
01382     {
01383         UString tmp;
01384         tmp.assign( 1, ch );
01385         return find_first_of( tmp, index );
01386     }
01387 
01388     UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
01389     {
01390         return find_first_of( static_cast<code_point>( ch ), index );
01391     }
01392 
01393 #if MYGUI_IS_NATIVE_WCHAR_T
01394     UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
01395     {
01396         return find_first_of( static_cast<unicode_char>( ch ), index );
01397     }
01398 #endif
01399 
01400     UString::size_type UString::find_first_of( unicode_char ch, size_type index /*= 0 */ ) const
01401     {
01402         code_point cp[3] = {0, 0, 0};
01403         size_t l = _utf32_to_utf16( ch, cp );
01404         return find_first_of( UString( cp, l ), index );
01405     }
01406 
01407     UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
01408     {
01409         size_type i = 0;
01410         const size_type len = length();
01411         while ( i < num && ( index + i ) < len ) {
01412             unicode_char ch = getChar( index + i );
01413             if ( !str.inString( ch ) )
01414                 return index + i;
01415             i += _utf16_char_length( ch ); // increment by the Unicode character length
01416         }
01417         return npos;
01418     }
01419 
01420     UString::size_type UString::find_first_not_of( code_point ch, size_type index /*= 0 */ ) const
01421     {
01422         UString tmp;
01423         tmp.assign( 1, ch );
01424         return find_first_not_of( tmp, index );
01425     }
01426 
01427     UString::size_type UString::find_first_not_of( char ch, size_type index /*= 0 */ ) const
01428     {
01429         return find_first_not_of( static_cast<code_point>( ch ), index );
01430     }
01431 
01432 #if MYGUI_IS_NATIVE_WCHAR_T
01433     UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
01434     {
01435         return find_first_not_of( static_cast<unicode_char>( ch ), index );
01436     }
01437 #endif
01438 
01439     UString::size_type UString::find_first_not_of( unicode_char ch, size_type index /*= 0 */ ) const
01440     {
01441         code_point cp[3] = {0, 0, 0};
01442         size_t l = _utf32_to_utf16( ch, cp );
01443         return find_first_not_of( UString( cp, l ), index );
01444     }
01445 
01446     UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
01447     {
01448         size_type i = 0;
01449         const size_type len = length();
01450         if ( index > len ) index = len - 1;
01451 
01452         while ( i < num && ( index - i ) != npos ) {
01453             size_type j = index - i;
01454             // careful to step full Unicode characters
01455             if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01456                 j = index - ++i;
01457             }
01458             // and back to the usual dull test
01459             unicode_char ch = getChar( j );
01460             if ( str.inString( ch ) )
01461                 return j;
01462             i++;
01463         }
01464         return npos;
01465     }
01466 
01467     UString::size_type UString::find_last_of( code_point ch, size_type index /*= npos */ ) const
01468     {
01469         UString tmp;
01470         tmp.assign( 1, ch );
01471         return find_last_of( tmp, index );
01472     }
01473 
01474 #if MYGUI_IS_NATIVE_WCHAR_T
01475     UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
01476     {
01477         return find_last_of( static_cast<unicode_char>( ch ), index );
01478     }
01479 #endif
01480 
01481     UString::size_type UString::find_last_of( unicode_char ch, size_type index /*= npos */ ) const
01482     {
01483         code_point cp[3] = {0, 0, 0};
01484         size_t l = _utf32_to_utf16( ch, cp );
01485         return find_last_of( UString( cp, l ), index );
01486     }
01487 
01488     UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
01489     {
01490         size_type i = 0;
01491         const size_type len = length();
01492         if ( index > len ) index = len - 1;
01493 
01494         while ( i < num && ( index - i ) != npos ) {
01495             size_type j = index - i;
01496             // careful to step full Unicode characters
01497             if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01498                 j = index - ++i;
01499             }
01500             // and back to the usual dull test
01501             unicode_char ch = getChar( j );
01502             if ( !str.inString( ch ) )
01503                 return j;
01504             i++;
01505         }
01506         return npos;
01507     }
01508 
01509     UString::size_type UString::find_last_not_of( code_point ch, size_type index /*= npos */ ) const
01510     {
01511         UString tmp;
01512         tmp.assign( 1, ch );
01513         return find_last_not_of( tmp, index );
01514     }
01515 
01516     UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
01517     {
01518         return find_last_not_of( static_cast<code_point>( ch ), index );
01519     }
01520 
01521 #if MYGUI_IS_NATIVE_WCHAR_T
01522     UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
01523     {
01524         return find_last_not_of( static_cast<unicode_char>( ch ), index );
01525     }
01526 #endif
01527 
01528     UString::size_type UString::find_last_not_of( unicode_char ch, size_type index /*= npos */ ) const
01529     {
01530         code_point cp[3] = {0, 0, 0};
01531         size_t l = _utf32_to_utf16( ch, cp );
01532         return find_last_not_of( UString( cp, l ), index );
01533     }
01534 
01535     bool UString::operator<( const UString& right ) const
01536     {
01537         return compare( right ) < 0;
01538     }
01539 
01540     bool UString::operator<=( const UString& right ) const
01541     {
01542         return compare( right ) <= 0;
01543     }
01544 
01545     UString& UString::operator=( const UString& s )
01546     {
01547         return assign( s );
01548     }
01549 
01550     UString& UString::operator=( code_point ch )
01551     {
01552         clear();
01553         return append( 1, ch );
01554     }
01555 
01556     UString& UString::operator=( char ch )
01557     {
01558         clear();
01559         return append( 1, ch );
01560     }
01561 
01562 #if MYGUI_IS_NATIVE_WCHAR_T
01563     UString& UString::operator=( wchar_t ch )
01564     {
01565         clear();
01566         return append( 1, ch );
01567     }
01568 #endif
01569 
01570     UString& UString::operator=( unicode_char ch )
01571     {
01572         clear();
01573         return append( 1, ch );
01574     }
01575 
01576     bool UString::operator>( const UString& right ) const
01577     {
01578         return compare( right ) > 0;
01579     }
01580 
01581     bool UString::operator>=( const UString& right ) const
01582     {
01583         return compare( right ) >= 0;
01584     }
01585 
01586     bool UString::operator==( const UString& right ) const
01587     {
01588         return compare( right ) == 0;
01589     }
01590 
01591     bool UString::operator!=( const UString& right ) const
01592     {
01593         return !operator==( right );
01594     }
01595 
01596     UString::code_point& UString::operator[]( size_type index )
01597     {
01598         return at( index );
01599     }
01600 
01601     const UString::code_point& UString::operator[]( size_type index ) const
01602     {
01603         return at( index );
01604     }
01605 
01606     UString::operator std::string() const 
01607     {
01608         return std::string( asUTF8() );
01609     }
01610     
01612     UString::operator std::wstring() const 
01613     {
01614         return std::wstring( asWStr() );
01615     }
01616 
01617 
01618     bool UString::_utf16_independent_char( code_point cp )
01619     {
01620         if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
01621             return false; // it matches a surrogate pair signature
01622         return true; // everything else is a standalone code point
01623     }
01624 
01625     bool UString::_utf16_surrogate_lead( code_point cp )
01626     {
01627         if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
01628             return true; // it is a 1st word
01629         return false; // it isn't
01630     }
01631 
01632     bool UString::_utf16_surrogate_follow( code_point cp )
01633     {
01634         if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
01635             return true; // it is a 2nd word
01636         return false; // everything else isn't
01637     }
01638 
01639     size_t UString::_utf16_char_length( code_point cp )
01640     {
01641         if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
01642             return 2; // if it is, then we are 2 words long
01643         return 1; // otherwise we are only 1 word long
01644     }
01645 
01646     size_t UString::_utf16_char_length( unicode_char uc )
01647     {
01648         if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
01649             return 2; // if so, we need a surrogate pair
01650         return 1; // otherwise we can stuff it into a single word
01651     }
01652 
01653     size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
01654     {
01655         const code_point& cp1 = in_cp[0];
01656         const code_point& cp2 = in_cp[1];
01657         bool wordPair = false;
01658 
01659         // does it look like a surrogate pair?
01660         if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
01661             // looks like one, but does the other half match the algorithm as well?
01662             if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
01663                 wordPair = true; // yep!
01664         }
01665 
01666         if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
01667             out_uc = cp1;
01668             return 1;
01669         }
01670 
01671         unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
01672         cU -= 0xD800; // remove the encoding markers
01673         cL -= 0xDC00;
01674 
01675         out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
01676         out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
01677         out_uc += 0x10000; // add back in the value offset
01678 
01679         return 2; // this whole operation takes to words, so that's what we'll return
01680     }
01681 
01682     size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
01683     {
01684         if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
01685             out_cp[0] = static_cast<code_point>(in_uc);
01686             return 1;
01687         }
01688         unicode_char uc = in_uc; // copy to writable buffer
01689         unsigned short tmp; // single code point buffer
01690         uc -= 0x10000; // subtract value offset
01691 
01692         //process upper word
01693         tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
01694         tmp += 0xD800; // add encoding offset
01695         out_cp[0] = tmp; // write
01696 
01697         // process lower word
01698         tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
01699         tmp += 0xDC00; // add encoding offset
01700         out_cp[1] = tmp; // write
01701 
01702         return 2; // return used word count (2 for surrogate pairs)
01703     }
01704 
01705     bool UString::_utf8_start_char( unsigned char cp )
01706     {
01707         return ( cp & ~_cont_mask ) != _cont;
01708     }
01709 
01710     size_t UString::_utf8_char_length( unsigned char cp )
01711     {
01712         if ( !( cp & 0x80 ) ) return 1;
01713         if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
01714         if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
01715         if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
01716         if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
01717         if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
01718 
01719         return 1;
01720         //throw invalid_data( "invalid UTF-8 sequence header value" );
01721     }
01722 
01723     size_t UString::_utf8_char_length( unicode_char uc )
01724     {
01725         /*
01726         7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
01727         11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
01728         16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
01729         21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
01730         26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
01731         31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
01732         */
01733         if ( !( uc & ~0x0000007F ) ) return 1;
01734         if ( !( uc & ~0x000007FF ) ) return 2;
01735         if ( !( uc & ~0x0000FFFF ) ) return 3;
01736         if ( !( uc & ~0x001FFFFF ) ) return 4;
01737         if ( !( uc & ~0x03FFFFFF ) ) return 5;
01738         if ( !( uc & ~0x7FFFFFFF ) ) return 6;
01739 
01740         return 1;
01741         //throw invalid_data( "invalid UTF-32 value" );
01742     }
01743 
01744     size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
01745     {
01746         size_t len = _utf8_char_length( in_cp[0] );
01747         if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
01748             out_uc = in_cp[0];
01749             return 1;
01750         }
01751 
01752         unicode_char c = 0; // temporary buffer
01753         size_t i = 0;
01754         switch ( len ) { // load header byte
01755             case 6:
01756                 c = in_cp[i] & _lead5_mask;
01757                 break;
01758             case 5:
01759                 c = in_cp[i] & _lead4_mask;
01760                 break;
01761             case 4:
01762                 c = in_cp[i] & _lead3_mask;
01763                 break;
01764             case 3:
01765                 c = in_cp[i] & _lead2_mask;
01766                 break;
01767             case 2:
01768                 c = in_cp[i] & _lead1_mask;
01769                 break;
01770         }
01771 
01772         // load each continuation byte
01773         for ( ++i; i < len; i++ )
01774         {
01775             if (( in_cp[i] & ~_cont_mask ) != _cont )
01776             {
01777                 //throw invalid_data( "bad UTF-8 continuation byte" );
01778                 out_uc = in_cp[0];
01779                 return 1;
01780             }
01781             c <<= 6;
01782             c |= ( in_cp[i] & _cont_mask );
01783         }
01784 
01785         out_uc = c; // write the final value and return the used byte length
01786         return len;
01787     }
01788 
01789     size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
01790     {
01791         size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
01792         unicode_char c = in_uc; // copy to temp buffer
01793 
01794         //stuff all of the lower bits
01795         for ( size_t i = len - 1; i > 0; i-- ) {
01796             out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
01797             c >>= 6;
01798         }
01799 
01800         //now write the header byte
01801         switch ( len ) {
01802             case 6:
01803                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
01804                 break;
01805             case 5:
01806                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
01807                 break;
01808             case 4:
01809                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
01810                 break;
01811             case 3:
01812                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
01813                 break;
01814             case 2:
01815                 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
01816                 break;
01817             case 1:
01818             default:
01819                 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
01820                 break;
01821         }
01822 
01823         // return the byte length of the sequence
01824         return len;
01825     }
01826 
01827     UString::size_type UString::_verifyUTF8( const unsigned char* c_str )
01828     {
01829         std::string tmp( reinterpret_cast<const char*>( c_str ) );
01830         return _verifyUTF8( tmp );
01831     }
01832 
01833     UString::size_type UString::_verifyUTF8( const std::string& str )
01834     {
01835         std::string::const_iterator i, ie = str.end();
01836         i = str.begin();
01837         size_type length = 0;
01838 
01839         while ( i != ie ) {
01840             // characters pass until we find an extended sequence
01841             if (( *i ) & 0x80 ) {
01842                 unsigned char c = ( *i );
01843                 size_t contBytes = 0;
01844 
01845                 // get continuation byte count and test for overlong sequences
01846                 if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
01847                     if ( c == _lead1 )
01848                     {
01849                         //throw invalid_data( "overlong UTF-8 sequence" );
01850                         return str.size();
01851                     }
01852                     contBytes = 1;
01853 
01854                 } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
01855                     contBytes = 2;
01856                     if ( c == _lead2 ) { // possible overlong UTF-8 sequence
01857                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01858                         if (( c & _lead2 ) == _cont )
01859                         {
01860                             //throw invalid_data( "overlong UTF-8 sequence" );
01861                             return str.size();
01862                         }
01863                     }
01864 
01865                 } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
01866                     contBytes = 3;
01867                     if ( c == _lead3 ) { // possible overlong UTF-8 sequence
01868                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01869                         if (( c & _lead3 ) == _cont )
01870                         {
01871                             //throw invalid_data( "overlong UTF-8 sequence" );
01872                             return str.size();
01873                         }
01874                     }
01875 
01876                 } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
01877                     contBytes = 4;
01878                     if ( c == _lead4 ) { // possible overlong UTF-8 sequence
01879                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01880                         if (( c & _lead4 ) == _cont )
01881                         {
01882                             //throw invalid_data( "overlong UTF-8 sequence" );
01883                             return str.size();
01884                         }
01885                     }
01886 
01887                 } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
01888                     contBytes = 5;
01889                     if ( c == _lead5 ) { // possible overlong UTF-8 sequence
01890                         c = ( *( i + 1 ) ); // look ahead to next byte in sequence
01891                         if (( c & _lead5 ) == _cont )
01892                         {
01893                             //throw invalid_data( "overlong UTF-8 sequence" );
01894                             return str.size();
01895                         }
01896                     }
01897                 }
01898 
01899                 // check remaining continuation bytes for
01900                 while ( contBytes-- ) {
01901                     c = ( *( ++i ) ); // get next byte in sequence
01902                     if (( c & ~_cont_mask ) != _cont )
01903                     {
01904                         //throw invalid_data( "bad UTF-8 continuation byte" );
01905                         return str.size();
01906                     }
01907                 }
01908             }
01909             length++;
01910             i++;
01911         }
01912         return length;
01913     }
01914 
01915     void UString::_init()
01916     {
01917         m_buffer.mVoidBuffer = 0;
01918         m_bufferType = bt_none;
01919         m_bufferSize = 0;
01920     }
01921 
01922     void UString::_cleanBuffer() const
01923     {
01924         if ( m_buffer.mVoidBuffer != 0 ) {
01925             switch ( m_bufferType ) {
01926                 case bt_string:
01927                     delete m_buffer.mStrBuffer;
01928                     break;
01929                 case bt_wstring:
01930                     delete m_buffer.mWStrBuffer;
01931                     break;
01932                 case bt_utf32string:
01933                     delete m_buffer.mUTF32StrBuffer;
01934                     break;
01935                 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
01936                 default:
01937                     //delete m_buffer.mVoidBuffer;
01938                     // delete void* is undefined, don't do that
01939                     assert("This should never happen - mVoidBuffer should never contain something if we "
01940                         "don't know the type");
01941                     break;
01942             }
01943             m_buffer.mVoidBuffer = 0;
01944             m_bufferSize = 0;
01945             m_bufferType = bt_none;
01946         }
01947     }
01948 
01949     void UString::_getBufferStr() const
01950     {
01951         if ( m_bufferType != bt_string ) {
01952             _cleanBuffer();
01953             m_buffer.mStrBuffer = new std::string();
01954             m_bufferType = bt_string;
01955         }
01956         m_buffer.mStrBuffer->clear();
01957     }
01958 
01959     void UString::_getBufferWStr() const
01960     {
01961         if ( m_bufferType != bt_wstring ) {
01962             _cleanBuffer();
01963             m_buffer.mWStrBuffer = new std::wstring();
01964             m_bufferType = bt_wstring;
01965         }
01966         m_buffer.mWStrBuffer->clear();
01967     }
01968 
01969     void UString::_getBufferUTF32Str() const
01970     {
01971         if ( m_bufferType != bt_utf32string ) {
01972             _cleanBuffer();
01973             m_buffer.mUTF32StrBuffer = new utf32string();
01974             m_bufferType = bt_utf32string;
01975         }
01976         m_buffer.mUTF32StrBuffer->clear();
01977     }
01978 
01979     void UString::_load_buffer_UTF8() const
01980     {
01981         _getBufferStr();
01982         std::string& buffer = ( *m_buffer.mStrBuffer );
01983         buffer.reserve( length() );
01984 
01985         unsigned char utf8buf[6];
01986         char* charbuf = ( char* )utf8buf;
01987         unicode_char c;
01988         size_t len;
01989 
01990         const_iterator i, ie = end();
01991         for ( i = begin(); i != ie; i.moveNext() ) {
01992             c = i.getCharacter();
01993             len = _utf32_to_utf8( c, utf8buf );
01994             size_t j = 0;
01995             while ( j < len )
01996                 buffer.push_back( charbuf[j++] );
01997         }
01998     }
01999 
02000     void UString::_load_buffer_WStr() const
02001     {
02002         _getBufferWStr();
02003         std::wstring& buffer = ( *m_buffer.mWStrBuffer );
02004         buffer.reserve( length() ); // may over reserve, but should be close enough
02005 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
02006         const_iterator i, ie = end();
02007         for ( i = begin(); i != ie; ++i ) {
02008             buffer.push_back(( wchar_t )( *i ) );
02009         }
02010 #else // wchar_t fits UTF-32
02011         unicode_char c;
02012         const_iterator i, ie = end();
02013         for ( i = begin(); i != ie; i.moveNext() ) {
02014             c = i.getCharacter();
02015             buffer.push_back(( wchar_t )c );
02016         }
02017 #endif
02018     }
02019 
02020     void UString::_load_buffer_UTF32() const
02021     {
02022         _getBufferUTF32Str();
02023         utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02024         buffer.reserve( length() ); // may over reserve, but should be close enough
02025 
02026         unicode_char c;
02027 
02028         const_iterator i, ie = end();
02029         for ( i = begin(); i != ie; i.moveNext() ) {
02030             c = i.getCharacter();
02031             buffer.push_back( c );
02032         }
02033     }
02034 
02035 } // namespace MyGUI