00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 #include "kmime_header_parsing.h"
00027 #include "kmime_charfreq.h"
00028 #include "kmime_warning.h"
00029
00030 #include <config-kmime.h>
00031 #include <kdefakes.h>
00032 #include <kglobal.h>
00033 #include <klocale.h>
00034 #include <kcharsets.h>
00035 #include <kcodecs.h>
00036 #include <kdebug.h>
00037
00038 #include <QtCore/QList>
00039 #include <QtCore/QString>
00040 #include <QtCore/QTextCodec>
00041
00042 #include <ctype.h>
00043 #include <time.h>
00044 #include <stdlib.h>
00045 #include <unistd.h>
00046 #include <boost/concept_check.hpp>
00047 #include "kmime_codecs.h"
00048
00049 using namespace KMime;
00050
00051 namespace KMime {
00052
00053 QList<QByteArray> c_harsetCache;
00054 QList<QByteArray> l_anguageCache;
00055 QString f_allbackCharEnc;
00056 bool u_seOutlookEncoding = false;
00057
00058 QByteArray cachedCharset( const QByteArray &name )
00059 {
00060 foreach ( const QByteArray& charset, c_harsetCache ) {
00061 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00062 return charset;
00063 }
00064 }
00065
00066 c_harsetCache.append( name.toUpper() );
00067
00068 return c_harsetCache.last();
00069 }
00070
00071 QByteArray cachedLanguage( const QByteArray &name )
00072 {
00073 foreach ( const QByteArray& language, l_anguageCache ) {
00074 if ( qstricmp( name.data(), language.data() ) == 0 ) {
00075 return language;
00076 }
00077 }
00078
00079 l_anguageCache.append( name.toUpper() );
00080
00081 return l_anguageCache.last();
00082 }
00083
00084 bool isUsAscii( const QString &s )
00085 {
00086 uint sLength = s.length();
00087 for ( uint i=0; i<sLength; i++ ) {
00088 if ( s.at( i ).toLatin1() <= 0 ) {
00089 return false;
00090 }
00091 }
00092 return true;
00093 }
00094
00095 QString nameForEncoding( Headers::contentEncoding enc )
00096 {
00097 switch( enc ) {
00098 case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00099 case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00100 case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00101 case Headers::CEbase64: return QString::fromLatin1( "base64" );
00102 case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00103 case Headers::CEbinary: return QString::fromLatin1( "binary" );
00104 default: return QString::fromLatin1( "unknown" );
00105 }
00106 }
00107
00108 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00109 {
00110 QList<Headers::contentEncoding> allowed;
00111 CharFreq cf( data );
00112
00113 switch ( cf.type() ) {
00114 case CharFreq::SevenBitText:
00115 allowed << Headers::CE7Bit;
00116 case CharFreq::EightBitText:
00117 allowed << Headers::CE8Bit;
00118 case CharFreq::SevenBitData:
00119 if ( cf.printableRatio() > 5.0/6.0 ) {
00120
00121
00122
00123 allowed << Headers::CEquPr;
00124 allowed << Headers::CEbase64;
00125 } else {
00126 allowed << Headers::CEbase64;
00127 allowed << Headers::CEquPr;
00128 }
00129 break;
00130 case CharFreq::EightBitData:
00131 allowed << Headers::CEbase64;
00132 break;
00133 case CharFreq::None:
00134 default:
00135 Q_ASSERT( false );
00136 }
00137
00138 return allowed;
00139 }
00140
00141
00142 const uchar specialsMap[16] = {
00143 0x00, 0x00, 0x00, 0x00,
00144 0x20, 0xCA, 0x00, 0x3A,
00145 0x80, 0x00, 0x00, 0x1C,
00146 0x00, 0x00, 0x00, 0x00
00147 };
00148
00149
00150 const uchar tSpecialsMap[16] = {
00151 0x00, 0x00, 0x00, 0x00,
00152 0x20, 0xC9, 0x00, 0x3F,
00153 0x80, 0x00, 0x00, 0x1C,
00154 0x00, 0x00, 0x00, 0x00
00155 };
00156
00157
00158 const uchar aTextMap[16] = {
00159 0x00, 0x00, 0x00, 0x00,
00160 0x5F, 0x35, 0xFF, 0xC5,
00161 0x7F, 0xFF, 0xFF, 0xE3,
00162 0xFF, 0xFF, 0xFF, 0xFE
00163 };
00164
00165
00166 const uchar tTextMap[16] = {
00167 0x00, 0x00, 0x00, 0x00,
00168 0x5F, 0x36, 0xFF, 0xC0,
00169 0x7F, 0xFF, 0xFF, 0xE3,
00170 0xFF, 0xFF, 0xFF, 0xFE
00171 };
00172
00173
00174 const uchar eTextMap[16] = {
00175 0x00, 0x00, 0x00, 0x00,
00176 0x40, 0x35, 0xFF, 0xC0,
00177 0x7F, 0xFF, 0xFF, 0xE0,
00178 0x7F, 0xFF, 0xFF, 0xE0
00179 };
00180
00181 void setFallbackCharEncoding(const QString& fallbackCharEnc)
00182 {
00183 f_allbackCharEnc = fallbackCharEnc;
00184 }
00185
00186 QString fallbackCharEncoding()
00187 {
00188 return f_allbackCharEnc;
00189 }
00190
00191 void setUseOutlookAttachmentEncoding( bool violateStandard )
00192 {
00193 u_seOutlookEncoding = violateStandard;
00194 }
00195
00196 bool useOutlookAttachmentEncoding()
00197 {
00198 return u_seOutlookEncoding;
00199 }
00200
00201
00202 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00203 const QByteArray &defaultCS, bool forceCS )
00204 {
00205 QByteArray result;
00206 QByteArray spaceBuffer;
00207 const char *scursor = src.constData();
00208 const char *send = scursor + src.length();
00209 bool onlySpacesSinceLastWord = false;
00210
00211 while ( scursor != send ) {
00212
00213 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00214 spaceBuffer += *scursor++;
00215 continue;
00216 }
00217
00218
00219 if ( *scursor == '=' ) {
00220 QByteArray language;
00221 QString decoded;
00222 ++scursor;
00223 const char *start = scursor;
00224 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00225 result += decoded.toUtf8();
00226 onlySpacesSinceLastWord = true;
00227 spaceBuffer.clear();
00228 } else {
00229 if ( onlySpacesSinceLastWord ) {
00230 result += spaceBuffer;
00231 onlySpacesSinceLastWord = false;
00232 }
00233 result += '=';
00234 scursor = start;
00235 }
00236 continue;
00237 } else {
00238
00239 if ( onlySpacesSinceLastWord ) {
00240 result += spaceBuffer;
00241 onlySpacesSinceLastWord = false;
00242 }
00243 result += *scursor;
00244 ++scursor;
00245 }
00246 }
00247
00248
00249 const QString tryUtf8 = QString::fromUtf8( result );
00250 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
00251 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
00252 return codec->toUnicode( result );
00253 } else {
00254 return tryUtf8;
00255 }
00256 }
00257
00258 QString decodeRFC2047String( const QByteArray &src )
00259 {
00260 QByteArray usedCS;
00261 return decodeRFC2047String( src, usedCS, "utf-8", false );
00262 }
00263
00264 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00265 bool addressHeader, bool allow8BitHeaders )
00266 {
00267 QByteArray encoded8Bit, result;
00268 int start=0, end=0;
00269 bool nonAscii=false, ok=true, useQEncoding=false;
00270
00271 const QTextCodec *codec = KGlobal::charsets()->codecForName( charset, ok );
00272
00273 QByteArray usedCS;
00274 if ( !ok ) {
00275
00276 usedCS = KGlobal::locale()->encoding();
00277 codec = KGlobal::charsets()->codecForName( usedCS, ok );
00278 }
00279 else {
00280 Q_ASSERT( codec );
00281 if ( charset.isEmpty() )
00282 usedCS = codec->name();
00283 else
00284 usedCS = charset;
00285 }
00286
00287 if ( usedCS.contains( "8859-" ) ) {
00288 useQEncoding = true;
00289 }
00290
00291 encoded8Bit = codec->fromUnicode( src );
00292
00293 if ( allow8BitHeaders ) {
00294 return encoded8Bit;
00295 }
00296
00297 uint encoded8BitLength = encoded8Bit.length();
00298 for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00299 if ( encoded8Bit[i] == ' ' ) {
00300 start = i + 1;
00301 }
00302
00303
00304 if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00305 ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00306 end = start;
00307 nonAscii = true;
00308 break;
00309 }
00310 }
00311
00312 if ( nonAscii ) {
00313 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00314
00315 end++;
00316 }
00317
00318 for ( int x=end; x<encoded8Bit.length(); x++ ) {
00319 if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00320 ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) {
00321 end = x;
00322
00323 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00324
00325 end++;
00326 }
00327 }
00328 }
00329
00330 result = encoded8Bit.left( start ) + "=?" + usedCS;
00331
00332 if ( useQEncoding ) {
00333 result += "?Q?";
00334
00335 char c, hexcode;
00336 for ( int i=start; i<end; i++ ) {
00337 c = encoded8Bit[i];
00338 if ( c == ' ' ) {
00339 result += '_';
00340 } else {
00341 if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) ||
00342 ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||
00343 ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00344 result += c;
00345 } else {
00346 result += '=';
00347 hexcode = ((c & 0xF0) >> 4) + 48;
00348 if ( hexcode >= 58 ) {
00349 hexcode += 7;
00350 }
00351 result += hexcode;
00352 hexcode = (c & 0x0F) + 48;
00353 if ( hexcode >= 58 ) {
00354 hexcode += 7;
00355 }
00356 result += hexcode;
00357 }
00358 }
00359 }
00360 } else {
00361 result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00362 }
00363
00364 result +="?=";
00365 result += encoded8Bit.right( encoded8Bit.length() - end );
00366 } else {
00367 result = encoded8Bit;
00368 }
00369
00370 return result;
00371 }
00372
00373
00374
00375 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
00376 {
00377 if ( str.isEmpty() )
00378 return QByteArray();
00379
00380
00381 const QTextCodec *codec = KGlobal::charsets()->codecForName( charset );
00382 QByteArray latin;
00383 if ( charset == "us-ascii" )
00384 latin = str.toAscii();
00385 else if ( codec )
00386 latin = codec->fromUnicode( str );
00387 else
00388 latin = str.toLocal8Bit();
00389
00390 char *l;
00391 for ( l = latin.data(); *l; ++l ) {
00392 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) )
00393
00394 break;
00395 }
00396 if ( !*l )
00397 return latin;
00398
00399 QByteArray result = charset + "''";
00400 for ( l = latin.data(); *l; ++l ) {
00401 bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
00402 if( !needsQuoting ) {
00403 const QByteArray especials = "()<>@,;:\"/[]?.= \033";
00404 int len = especials.length();
00405 for ( int i = 0; i < len; i++ )
00406 if ( *l == especials[i] ) {
00407 needsQuoting = true;
00408 break;
00409 }
00410 }
00411 if ( needsQuoting ) {
00412 result += '%';
00413 unsigned char hexcode;
00414 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00415 if ( hexcode >= 58 )
00416 hexcode += 7;
00417 result += hexcode;
00418 hexcode = ( *l & 0x0F ) + 48;
00419 if ( hexcode >= 58 )
00420 hexcode += 7;
00421 result += hexcode;
00422 } else {
00423 result += *l;
00424 }
00425 }
00426 return result;
00427 }
00428
00429
00430
00431 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
00432 bool forceCS )
00433 {
00434 int p = str.indexOf('\'');
00435 if (p < 0) return KGlobal::charsets()->codecForName( defaultCS )->toUnicode( str );
00436
00437
00438 QByteArray charset = str.left(p);
00439
00440 QByteArray st = str.mid( str.lastIndexOf('\'') + 1 );
00441
00442 char ch, ch2;
00443 p = 0;
00444 while (p < (int)st.length())
00445 {
00446 if (st.at(p) == 37)
00447 {
00448
00449
00450 if ( p + 2 < st.length() ) {
00451 ch = st.at(p+1) - 48;
00452 if (ch > 16)
00453 ch -= 7;
00454 ch2 = st.at(p+2) - 48;
00455 if (ch2 > 16)
00456 ch2 -= 7;
00457 st[p] = ch * 16 + ch2;
00458 st.remove( p+1, 2 );
00459 }
00460 }
00461 p++;
00462 }
00463 kDebug() << "Got pre-decoded:" << st;
00464 QString result;
00465 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( charset );
00466 if ( !charsetcodec || forceCS )
00467 charsetcodec = KGlobal::charsets()->codecForName( defaultCS );
00468
00469 usedCS = charsetcodec->name();
00470 return charsetcodec->toUnicode( st );
00471 }
00472
00473 QString decodeRFC2231String( const QByteArray &src )
00474 {
00475 QByteArray usedCS;
00476 return decodeRFC2231String( src, usedCS, "utf-8", false );
00477 }
00478
00479 QByteArray uniqueString()
00480 {
00481 static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00482 time_t now;
00483 char p[11];
00484 int pos, ran;
00485 unsigned int timeval;
00486
00487 p[10] = '\0';
00488 now = time( 0 );
00489 ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00490 timeval = (now / ran) + getpid();
00491
00492 for ( int i=0; i<10; i++ ) {
00493 pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00494
00495 p[i] = chars[pos];
00496 }
00497
00498 QByteArray ret;
00499 ret.setNum( timeval );
00500 ret += '.';
00501 ret += p;
00502
00503 return ret;
00504 }
00505
00506 QByteArray multiPartBoundary()
00507 {
00508 return "nextPart" + uniqueString();
00509 }
00510
00511 QByteArray unfoldHeader( const QByteArray &header )
00512 {
00513 QByteArray result;
00514 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00515 while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00516 foldBegin = foldEnd = foldMid;
00517
00518 while ( foldBegin > 0 ) {
00519 if ( !QChar( header[foldBegin - 1] ).isSpace() ) {
00520 break;
00521 }
00522 --foldBegin;
00523 }
00524
00525 while ( foldEnd <= header.length() - 1 ) {
00526 if ( QChar( header[foldEnd] ).isSpace() ) {
00527 ++foldEnd;
00528 }
00529 else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
00530 header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
00531 ( ( header[foldEnd + 1] == '0' &&
00532 header[foldEnd + 2] == '9' ) ||
00533 ( header[foldEnd + 1] == '2' &&
00534 header[foldEnd + 2] == '0' ) ) ) {
00535
00536 foldEnd += 3;
00537 }
00538 else {
00539 break;
00540 }
00541 }
00542
00543 result += header.mid( pos, foldBegin - pos );
00544 if ( foldEnd < header.length() -1 )
00545 result += ' ';
00546 pos = foldEnd;
00547 }
00548 result += header.mid( pos, header.length() - pos );
00549 return result;
00550 }
00551
00552 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
00553 {
00554 int end = dataBegin;
00555 int len = src.length() - 1;
00556
00557 if ( folded )
00558 *folded = false;
00559
00560 if ( dataBegin < 0 ) {
00561
00562 return -1;
00563 }
00564
00565 if ( dataBegin > len ) {
00566
00567 return len + 1;
00568 }
00569
00570
00571
00572
00573 if ( src.at(end) == '\n' && end + 1 < len &&
00574 ( src[end+1] == ' ' || src[end+1] == '\t' ) ) {
00575
00576
00577 dataBegin += 2;
00578 end += 2;
00579 }
00580
00581 if ( src.at(end) != '\n' ) {
00582 while ( true ) {
00583 end = src.indexOf( '\n', end + 1 );
00584 if ( end == -1 || end == len ) {
00585
00586 break;
00587 }
00588 else if ( src[end+1] == ' ' || src[end+1] == '\t' ||
00589 ( src[end+1] == '=' && end+3 <= len &&
00590 ( ( src[end+2] == '0' && src[end+3] == '9' ) ||
00591 ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) {
00592
00593 if ( folded )
00594 *folded = true;
00595 } else {
00596
00597 break;
00598 }
00599 }
00600 }
00601
00602 if ( end < 0 ) {
00603 end = len + 1;
00604 }
00605 return end;
00606 }
00607
00608 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00609 {
00610 QByteArray n = name;
00611 n.append( ':' );
00612 int begin = -1;
00613
00614 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00615 begin = 0;
00616 } else {
00617 n.prepend('\n');
00618 const char *p = strcasestr( src.constData(), n.constData() );
00619 if ( !p ) {
00620 begin = -1;
00621 } else {
00622 begin = p - src.constData();
00623 ++begin;
00624 }
00625 }
00626
00627 if ( begin > -1) {
00628 dataBegin = begin + name.length() + 1;
00629
00630 if ( src.at( dataBegin ) == ' ' ) {
00631 ++dataBegin;
00632 }
00633 end = findHeaderLineEnd( src, dataBegin, folded );
00634 return begin;
00635
00636 } else {
00637 dataBegin = -1;
00638 return -1;
00639 }
00640 }
00641
00642 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00643 {
00644 int begin, end;
00645 bool folded;
00646 indexOfHeader( src, name, end, begin, &folded );
00647
00648 if ( begin >= 0 ) {
00649 if ( !folded ) {
00650 return src.mid( begin, end - begin );
00651 } else {
00652 QByteArray hdrValue = src.mid( begin, end - begin );
00653 return unfoldHeader( hdrValue );
00654 }
00655 } else {
00656 return QByteArray();
00657 }
00658 }
00659
00660 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00661 {
00662 int begin, end;
00663 bool folded;
00664 QList<QByteArray> result;
00665 QByteArray copySrc( src );
00666
00667 indexOfHeader( copySrc, name, end, begin, &folded );
00668 while ( begin >= 0 ) {
00669 if ( !folded ) {
00670 result.append( copySrc.mid( begin, end - begin ) );
00671 } else {
00672 QByteArray hdrValue = copySrc.mid( begin, end - begin );
00673 result.append( unfoldHeader( hdrValue ) );
00674 }
00675
00676
00677 copySrc = copySrc.mid( end );
00678 indexOfHeader( copySrc, name, end, begin, &folded );
00679 }
00680
00681 return result;
00682 }
00683
00684 void removeHeader( QByteArray &header, const QByteArray &name )
00685 {
00686 int begin, end, dummy;
00687 begin = indexOfHeader( header, name, end, dummy );
00688 if ( begin >= 0 ) {
00689 header.remove( begin, end - begin + 1 );
00690 }
00691 }
00692
00693 QByteArray CRLFtoLF( const QByteArray &s )
00694 {
00695 QByteArray ret = s;
00696 ret.replace( "\r\n", "\n" );
00697 return ret;
00698 }
00699
00700 QByteArray CRLFtoLF( const char *s )
00701 {
00702 QByteArray ret = s;
00703 return CRLFtoLF( ret );
00704 }
00705
00706 QByteArray LFtoCRLF( const QByteArray &s )
00707 {
00708 QByteArray ret = s;
00709 ret.replace( '\n', "\r\n" );
00710 return ret;
00711 }
00712
00713 QByteArray LFtoCRLF( const char *s )
00714 {
00715 QByteArray ret = s;
00716 return LFtoCRLF( ret );
00717 }
00718
00719 namespace {
00720 template < typename T > void removeQuotesGeneric( T & str )
00721 {
00722 bool inQuote = false;
00723 for ( int i = 0; i < str.length(); ++i ) {
00724 if ( str[i] == '"' ) {
00725 str.remove( i, 1 );
00726 i--;
00727 inQuote = !inQuote;
00728 } else {
00729 if ( inQuote && ( str[i] == '\\' ) ) {
00730 str.remove( i, 1 );
00731 }
00732 }
00733 }
00734 }
00735 }
00736
00737 void removeQuots( QByteArray &str )
00738 {
00739 removeQuotesGeneric( str );
00740 }
00741
00742 void removeQuots( QString &str )
00743 {
00744 removeQuotesGeneric( str );
00745 }
00746
00747
00748
00749
00750
00751
00752
00753 static char getCharFromQByteArray( const QByteArray &array, int index )
00754 {
00755 return array.at( index );
00756 }
00757
00758 static char getCharFromQString( const QString &string, int index )
00759 {
00760 return string.at( index ).toAscii();
00761 }
00762
00763 template<class StringType>
00764 void addQuotes_impl( StringType &str, bool forceQuotes,
00765 char (*convertFunction)( const StringType&, int ) )
00766 {
00767 bool needsQuotes=false;
00768 for ( int i=0; i < str.length(); i++ ) {
00769 const char cur = convertFunction( str, i );
00770 if ( strchr("()<>@,.;:[]=\\\"", cur ) != 0 ) {
00771 needsQuotes = true;
00772 }
00773 if ( cur == '\\' || cur == '\"' ) {
00774 str.insert( i, '\\' );
00775 i++;
00776 }
00777 }
00778
00779 if ( needsQuotes || forceQuotes ) {
00780 str.insert( 0, '\"' );
00781 str.append( "\"" );
00782 }
00783 }
00784
00785 void addQuotes( QByteArray &str, bool forceQuotes )
00786 {
00787 addQuotes_impl( str, forceQuotes, &getCharFromQByteArray );
00788 }
00789
00790 void addQuotes( QString &str, bool forceQuotes )
00791 {
00792 addQuotes_impl( str, forceQuotes, &getCharFromQString );
00793 }
00794
00795 KMIME_EXPORT QString balanceBidiState( const QString &input )
00796 {
00797 const int LRO = 0x202D;
00798 const int RLO = 0x202E;
00799 const int LRE = 0x202A;
00800 const int RLE = 0x202B;
00801 const int PDF = 0x202C;
00802
00803 QString result = input;
00804
00805 int openDirChangers = 0;
00806 int numPDFsRemoved = 0;
00807 for ( int i = 0; i < input.length(); i++ ) {
00808 const ushort &code = input.at( i ).unicode();
00809 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00810 openDirChangers++;
00811 }
00812 else if ( code == PDF ) {
00813 if ( openDirChangers > 0 ) {
00814 openDirChangers--;
00815 }
00816 else {
00817
00818 kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00819 result.remove( i - numPDFsRemoved, 1 );
00820 numPDFsRemoved++;
00821 }
00822 }
00823 }
00824
00825 if ( openDirChangers > 0 ) {
00826 kWarning() << "Possible Unicode spoofing detected in" << input;
00827
00828
00829
00830
00831 for ( int i = openDirChangers; i > 0; i-- ) {
00832 if ( result.endsWith( '"' ) )
00833 result.insert( result.length() - 1, QChar( PDF ) );
00834 else
00835 result += QChar( PDF );
00836 }
00837 }
00838
00839 return result;
00840 }
00841
00842 QString removeBidiControlChars( const QString &input )
00843 {
00844 const int LRO = 0x202D;
00845 const int RLO = 0x202E;
00846 const int LRE = 0x202A;
00847 const int RLE = 0x202B;
00848 QString result = input;
00849 result.remove( LRO );
00850 result.remove( RLO );
00851 result.remove( LRE );
00852 result.remove( RLE );
00853 return result;
00854 }
00855
00856 }