00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00035 #include "kmime_codec_qp.h"
00036 #include "kmime_util.h"
00037
00038 #include <kdebug.h>
00039
00040 #include <cassert>
00041
00042 using namespace KMime;
00043
00044 namespace KMime {
00045
00046
00047
00054 static inline char binToHex( uchar value )
00055 {
00056 if ( value > 9 ) {
00057 return value + 'A' - 10;
00058 } else {
00059 return value + '0';
00060 }
00061 }
00062
00067 static inline uchar highNibble( uchar ch )
00068 {
00069 return ch >> 4;
00070 }
00071
00076 static inline uchar lowNibble( uchar ch )
00077 {
00078 return ch & 0xF;
00079 }
00080
00086 static inline bool keep( uchar ch )
00087 {
00088
00089 return !( ( ch < ' ' && ch != '\t' ) || ch == '?' );
00090 }
00091
00092
00093
00094
00095
00096 class QuotedPrintableEncoder : public Encoder
00097 {
00098 char mInputBuffer[16];
00099 uchar mCurrentLineLength;
00100 uchar mAccu;
00101 uint mInputBufferReadCursor : 4;
00102 uint mInputBufferWriteCursor : 4;
00103 enum {
00104 Never, AtBOL, Definitely
00105 } mAccuNeedsEncoding : 2;
00106 bool mSawLineEnd : 1;
00107 bool mSawCR : 1;
00108 bool mFinishing : 1;
00109 bool mFinished : 1;
00110 protected:
00111 friend class QuotedPrintableCodec;
00112 QuotedPrintableEncoder( bool withCRLF=false )
00113 : Encoder( withCRLF ), mCurrentLineLength( 0 ), mAccu( 0 ),
00114 mInputBufferReadCursor( 0 ), mInputBufferWriteCursor( 0 ),
00115 mAccuNeedsEncoding( Never ),
00116 mSawLineEnd( false ), mSawCR( false ), mFinishing( false ),
00117 mFinished( false ) {}
00118
00119 bool needsEncoding( uchar ch )
00120 { return ch > '~' || ( ch < ' ' && ch != '\t' ) || ch == '='; }
00121 bool needsEncodingAtEOL( uchar ch )
00122 { return ch == ' ' || ch == '\t'; }
00123 bool needsEncodingAtBOL( uchar ch )
00124 { return ch == 'F' || ch == '.' || ch == '-'; }
00125 bool fillInputBuffer( const char* &scursor, const char * const send );
00126 bool processNextChar();
00127 void createOutputBuffer( char* &dcursor, const char * const dend );
00128 public:
00129 virtual ~QuotedPrintableEncoder() {}
00130
00131 bool encode( const char* &scursor, const char * const send,
00132 char* &dcursor, const char * const dend );
00133
00134 bool finish( char* &dcursor, const char * const dend );
00135 };
00136
00137 class QuotedPrintableDecoder : public Decoder
00138 {
00139 const char mEscapeChar;
00140 char mBadChar;
00142 uchar mAccu;
00152 const bool mQEncoding;
00153 bool mInsideHexChar;
00154 bool mFlushing;
00155 bool mExpectLF;
00156 bool mHaveAccu;
00159 char mLastChar;
00160 protected:
00161 friend class QuotedPrintableCodec;
00162 friend class Rfc2047QEncodingCodec;
00163 friend class Rfc2231EncodingCodec;
00164 QuotedPrintableDecoder( bool withCRLF=false,
00165 bool aQEncoding=false, char aEscapeChar='=' )
00166 : Decoder( withCRLF ),
00167 mEscapeChar( aEscapeChar ),
00168 mBadChar( 0 ),
00169 mAccu( 0 ),
00170 mQEncoding( aQEncoding ),
00171 mInsideHexChar( false ),
00172 mFlushing( false ),
00173 mExpectLF( false ),
00174 mHaveAccu( false ),
00175 mLastChar( 0 ) {}
00176 public:
00177 virtual ~QuotedPrintableDecoder() {}
00178
00179 bool decode( const char* &scursor, const char * const send,
00180 char* &dcursor, const char * const dend );
00181 bool finish( char* & dcursor, const char * const dend );
00182 };
00183
00184 class Rfc2047QEncodingEncoder : public Encoder
00185 {
00186 uchar mAccu;
00187 uchar mStepNo;
00188 const char mEscapeChar;
00189 bool mInsideFinishing : 1;
00190 protected:
00191 friend class Rfc2047QEncodingCodec;
00192 friend class Rfc2231EncodingCodec;
00193 Rfc2047QEncodingEncoder( bool withCRLF=false, char aEscapeChar='=' )
00194 : Encoder( withCRLF ),
00195 mAccu( 0 ), mStepNo( 0 ), mEscapeChar( aEscapeChar ),
00196 mInsideFinishing( false )
00197 {
00198
00199 assert( aEscapeChar == '=' || aEscapeChar == '%' );
00200 }
00201
00202
00203 bool needsEncoding( uchar ch )
00204 {
00205 if ( ch > 'z' ) {
00206 return true;
00207 }
00208 if ( !isEText( ch ) ) {
00209 return true;
00210 }
00211 if ( mEscapeChar == '%' && ( ch == '*' || ch == '/' ) ) {
00212 return true;
00213 }
00214 return false;
00215 }
00216
00217 public:
00218 virtual ~Rfc2047QEncodingEncoder() {}
00219
00220 bool encode( const char* & scursor, const char * const send,
00221 char* & dcursor, const char * const dend );
00222 bool finish( char* & dcursor, const char * const dend );
00223 };
00224
00225
00226
00227 static int QuotedPrintableDecoder_maxDecodedSizeFor( int insize, bool withCRLF )
00228 {
00229
00230 int result = insize;
00231
00232 if ( withCRLF )
00233 result += insize;
00234
00235
00236 result += 2;
00237
00238 return result;
00239 }
00240
00241 Encoder *QuotedPrintableCodec::makeEncoder( bool withCRLF ) const
00242 {
00243 return new QuotedPrintableEncoder( withCRLF );
00244 }
00245
00246 Decoder *QuotedPrintableCodec::makeDecoder( bool withCRLF ) const
00247 {
00248 return new QuotedPrintableDecoder( withCRLF );
00249 }
00250
00251 int QuotedPrintableCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const
00252 {
00253 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00254 }
00255
00256 Encoder *Rfc2047QEncodingCodec::makeEncoder( bool withCRLF ) const
00257 {
00258 return new Rfc2047QEncodingEncoder( withCRLF );
00259 }
00260
00261 Decoder *Rfc2047QEncodingCodec::makeDecoder( bool withCRLF ) const
00262 {
00263 return new QuotedPrintableDecoder( withCRLF, true );
00264 }
00265
00266 int Rfc2047QEncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const
00267 {
00268 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00269 }
00270
00271 Encoder *Rfc2231EncodingCodec::makeEncoder( bool withCRLF ) const
00272 {
00273 return new Rfc2047QEncodingEncoder( withCRLF, '%' );
00274 }
00275
00276 Decoder *Rfc2231EncodingCodec::makeDecoder( bool withCRLF ) const
00277 {
00278 return new QuotedPrintableDecoder( withCRLF, true, '%' );
00279 }
00280
00281 int Rfc2231EncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const
00282 {
00283 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00284 }
00285
00286
00287
00288
00289
00290 bool QuotedPrintableDecoder::decode( const char* &scursor,
00291 const char * const send,
00292 char* &dcursor, const char * const dend )
00293 {
00294 if ( mWithCRLF ) {
00295 kWarning() << "CRLF output for decoders isn't yet supported!";
00296 }
00297
00298 while ( scursor != send && dcursor != dend ) {
00299 if ( mFlushing ) {
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309 if ( mInsideHexChar ) {
00310
00311 *dcursor++ = mEscapeChar;
00312 mInsideHexChar = false;
00313 } else if ( mHaveAccu ) {
00314
00315 *dcursor++ = mLastChar;
00316 mHaveAccu = false;
00317 mAccu = 0;
00318 } else {
00319
00320 assert( mAccu == 0 );
00321 if ( mBadChar ) {
00322 if ( mBadChar == '=' ) {
00323 mInsideHexChar = true;
00324 } else {
00325 *dcursor++ = mBadChar;
00326 }
00327 mBadChar = 0;
00328 }
00329 mFlushing = false;
00330 }
00331 continue;
00332 }
00333 assert( mBadChar == 0 );
00334
00335 uchar ch = *scursor++;
00336 uchar value = 255;
00337
00338 if ( mExpectLF && ch != '\n' ) {
00339 kWarning() << "QuotedPrintableDecoder:"
00340 "illegally formed soft linebreak or lonely CR!";
00341 mInsideHexChar = false;
00342 mExpectLF = false;
00343 assert( mAccu == 0 );
00344 }
00345
00346 if ( mInsideHexChar ) {
00347
00348 if ( ch <= '9' ) {
00349 if ( ch >= '0' ) {
00350 value = ch - '0';
00351 } else {
00352 switch ( ch ) {
00353 case '\r':
00354 mExpectLF = true;
00355 break;
00356 case '\n':
00357
00358 if ( !mHaveAccu ) {
00359 mExpectLF = false;
00360 mInsideHexChar = false;
00361 break;
00362 }
00363
00364 default:
00365 kWarning() << "QuotedPrintableDecoder:"
00366 "illegally formed hex char! Outputting verbatim.";
00367 mBadChar = ch;
00368 mFlushing = true;
00369 }
00370 continue;
00371 }
00372 } else {
00373 if ( ch <= 'F' ) {
00374 if ( ch >= 'A' ) {
00375 value = 10 + ch - 'A';
00376 } else {
00377 mBadChar = ch;
00378 mFlushing = true;
00379 continue;
00380 }
00381 } else {
00382 if ( ch <= 'f' && ch >= 'a' ) {
00383 value = 10 + ch - 'a';
00384 } else {
00385 mBadChar = ch;
00386 mFlushing = true;
00387 continue;
00388 }
00389 }
00390 }
00391
00392 assert( value < 16 );
00393 assert( mBadChar == 0 );
00394 assert( !mExpectLF );
00395
00396 if ( mHaveAccu ) {
00397 *dcursor++ = char( mAccu | value );
00398 mAccu = 0;
00399 mHaveAccu = false;
00400 mInsideHexChar = false;
00401 } else {
00402 mHaveAccu = true;
00403 mAccu = value << 4;
00404 mLastChar = ch;
00405 }
00406 } else {
00407 if ( ( ch <= '~' && ch >= ' ' ) || ch == '\t' ) {
00408 if ( ch == mEscapeChar ) {
00409 mInsideHexChar = true;
00410 } else if ( mQEncoding && ch == '_' ) {
00411 *dcursor++ = char( 0x20 );
00412 } else {
00413 *dcursor++ = char( ch );
00414 }
00415 } else if ( ch == '\n' ) {
00416 *dcursor++ = '\n';
00417 mExpectLF = false;
00418 } else if ( ch == '\r' ) {
00419 mExpectLF = true;
00420 } else {
00421 kWarning() << "QuotedPrintableDecoder:" << ch <<
00422 "illegal character in input stream! Ignoring.";
00423 }
00424 }
00425 }
00426
00427 return scursor == send;
00428 }
00429
00430 bool QuotedPrintableDecoder::finish( char* &dcursor, const char * const dend )
00431 {
00432 while ( ( mInsideHexChar || mHaveAccu || mFlushing ) && dcursor != dend ) {
00433
00434 if ( mInsideHexChar ) {
00435
00436 *dcursor++ = mEscapeChar;
00437 mInsideHexChar = false;
00438 }
00439 else if ( mHaveAccu ) {
00440
00441 *dcursor++ = mLastChar;
00442 mHaveAccu = false;
00443 mAccu = 0;
00444 } else {
00445
00446 assert( mAccu == 0 );
00447 if ( mBadChar ) {
00448 *dcursor++ = mBadChar;
00449 mBadChar = 0;
00450 }
00451 mFlushing = false;
00452 }
00453 }
00454
00455
00456 return !( mHaveAccu || mFlushing );
00457 }
00458
00459 bool QuotedPrintableEncoder::fillInputBuffer( const char* &scursor,
00460 const char * const send ) {
00461
00462 if ( mSawLineEnd ) {
00463 return true;
00464 }
00465
00466
00467
00468 for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor
00469 && scursor != send ; mInputBufferWriteCursor++ ) {
00470 char ch = *scursor++;
00471 if ( ch == '\r' ) {
00472 mSawCR = true;
00473 } else if ( ch == '\n' ) {
00474
00475
00476 if ( mSawCR ) {
00477 mSawCR = false;
00478 assert( mInputBufferWriteCursor != mInputBufferReadCursor );
00479 mInputBufferWriteCursor--;
00480 }
00481 mSawLineEnd = true;
00482 return true;
00483 } else {
00484 mSawCR = false;
00485 }
00486 mInputBuffer[ mInputBufferWriteCursor ] = ch;
00487 }
00488 mSawLineEnd = false;
00489 return false;
00490 }
00491
00492 bool QuotedPrintableEncoder::processNextChar()
00493 {
00494
00495
00496
00497
00498
00499
00500 const int minBufferFillWithoutLineEnd = 4;
00501
00502 assert( mOutputBufferCursor == 0 );
00503
00504 int bufferFill =
00505 int( mInputBufferWriteCursor ) - int( mInputBufferReadCursor ) ;
00506 if ( bufferFill < 0 ) {
00507 bufferFill += 16;
00508 }
00509
00510 assert( bufferFill >=0 && bufferFill <= 15 );
00511
00512 if ( !mFinishing && !mSawLineEnd &&
00513 bufferFill < minBufferFillWithoutLineEnd ) {
00514 return false;
00515 }
00516
00517
00518 if ( mInputBufferReadCursor == mInputBufferWriteCursor ) {
00519 return false;
00520 }
00521
00522
00523 mAccu = mInputBuffer[ mInputBufferReadCursor++ ];
00524 if ( needsEncoding( mAccu ) ) {
00525 mAccuNeedsEncoding = Definitely;
00526 } else if ( ( mSawLineEnd || mFinishing )
00527 && bufferFill == 1
00528 && needsEncodingAtEOL( mAccu ) ) {
00529 mAccuNeedsEncoding = Definitely;
00530 } else if ( needsEncodingAtBOL( mAccu ) ) {
00531 mAccuNeedsEncoding = AtBOL;
00532 } else {
00533
00534 mAccuNeedsEncoding = Never;
00535 }
00536
00537 return true;
00538 }
00539
00540
00541
00542
00543
00544 void QuotedPrintableEncoder::createOutputBuffer( char* &dcursor,
00545 const char * const dend )
00546 {
00547 const int maxLineLength = 76;
00548
00549 assert( mOutputBufferCursor == 0 );
00550
00551 bool lastOneOnThisLine = mSawLineEnd
00552 && mInputBufferReadCursor == mInputBufferWriteCursor;
00553
00554 int neededSpace = 1;
00555 if ( mAccuNeedsEncoding == Definitely ) {
00556 neededSpace = 3;
00557 }
00558
00559
00560 if ( !lastOneOnThisLine ) {
00561 neededSpace++;
00562 }
00563
00564 if ( mCurrentLineLength > maxLineLength - neededSpace ) {
00565
00566 write( '=', dcursor, dend );
00567 writeCRLF( dcursor, dend );
00568 mCurrentLineLength = 0;
00569 }
00570
00571 if ( Never == mAccuNeedsEncoding ||
00572 ( AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) ) {
00573 write( mAccu, dcursor, dend );
00574 mCurrentLineLength++;
00575 } else {
00576 write( '=', dcursor, dend );
00577 write( binToHex( highNibble( mAccu ) ), dcursor, dend );
00578 write( binToHex( lowNibble( mAccu ) ), dcursor, dend );
00579 mCurrentLineLength += 3;
00580 }
00581 }
00582
00583 bool QuotedPrintableEncoder::encode( const char* &scursor,
00584 const char * const send,
00585 char* &dcursor, const char * const dend )
00586 {
00587
00588 if ( mFinishing ) {
00589 return true;
00590 }
00591
00592 while ( scursor != send && dcursor != dend ) {
00593 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
00594 return scursor == send;
00595 }
00596
00597 assert( mOutputBufferCursor == 0 );
00598
00599
00600
00601 fillInputBuffer( scursor, send );
00602
00603 if ( processNextChar() ) {
00604
00605 createOutputBuffer( dcursor, dend );
00606 } else if ( mSawLineEnd &&
00607 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00608
00609 writeCRLF( dcursor, dend );
00610
00611 mSawLineEnd = false;
00612 mCurrentLineLength = 0;
00613 } else {
00614
00615 break;
00616 }
00617 }
00618
00619
00620
00621 if ( mOutputBufferCursor ) {
00622 flushOutputBuffer( dcursor, dend );
00623 }
00624
00625 return scursor == send;
00626
00627 }
00628
00629 bool QuotedPrintableEncoder::finish( char* &dcursor, const char * const dend )
00630 {
00631 mFinishing = true;
00632
00633 if ( mFinished ) {
00634 return flushOutputBuffer( dcursor, dend );
00635 }
00636
00637 while ( dcursor != dend ) {
00638 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
00639 return false;
00640 }
00641
00642 assert( mOutputBufferCursor == 0 );
00643
00644 if ( processNextChar() ) {
00645
00646 createOutputBuffer( dcursor, dend );
00647 } else if ( mSawLineEnd &&
00648 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00649
00650 writeCRLF( dcursor, dend );
00651 mSawLineEnd = false;
00652 mCurrentLineLength = 0;
00653 } else {
00654 mFinished = true;
00655 return flushOutputBuffer( dcursor, dend );
00656 }
00657 }
00658
00659 return mFinished && !mOutputBufferCursor;
00660
00661 }
00662
00663 bool Rfc2047QEncodingEncoder::encode( const char* &scursor,
00664 const char * const send,
00665 char* &dcursor, const char * const dend )
00666 {
00667 if ( mInsideFinishing ) {
00668 return true;
00669 }
00670
00671 while ( scursor != send && dcursor != dend ) {
00672 uchar value;
00673 switch ( mStepNo ) {
00674 case 0:
00675
00676 mAccu = *scursor++;
00677 if ( !needsEncoding( mAccu ) ) {
00678 *dcursor++ = char( mAccu );
00679 } else if ( mEscapeChar == '=' && mAccu == 0x20 ) {
00680
00681
00682 *dcursor++ = '_';
00683 } else {
00684
00685 *dcursor++ = mEscapeChar;
00686 mStepNo = 1;
00687 }
00688 continue;
00689 case 1:
00690
00691 value = highNibble( mAccu );
00692 mStepNo = 2;
00693 break;
00694 case 2:
00695
00696 value = lowNibble( mAccu );
00697 mStepNo = 0;
00698 break;
00699 default: assert( 0 );
00700 }
00701
00702
00703 *dcursor++ = binToHex( value );
00704 }
00705
00706 return scursor == send;
00707 }
00708
00709 #include <QtCore/QString>
00710
00711 bool Rfc2047QEncodingEncoder::finish( char* &dcursor, const char * const dend )
00712 {
00713 mInsideFinishing = true;
00714
00715
00716 while ( mStepNo != 0 && dcursor != dend ) {
00717 uchar value;
00718 switch ( mStepNo ) {
00719 case 1:
00720
00721 value = highNibble( mAccu );
00722 mStepNo = 2;
00723 break;
00724 case 2:
00725
00726 value = lowNibble( mAccu );
00727 mStepNo = 0;
00728 break;
00729 default: assert( 0 );
00730 }
00731
00732
00733 *dcursor++ = binToHex( value );
00734 }
00735
00736 return mStepNo == 0;
00737 }
00738
00739 }