• Skip to content
  • Skip to link menu
KDE 4.5 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_header_parsing.cpp

00001 /*  -*- c++ -*-
00002     kmime_header_parsing.cpp
00003 
00004     KMime, the KDE Internet mail/usenet news message library.
00005     Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007     This library is free software; you can redistribute it and/or
00008     modify it under the terms of the GNU Library General Public
00009     License as published by the Free Software Foundation; either
00010     version 2 of the License, or (at your option) any later version.
00011 
00012     This library is distributed in the hope that it will be useful,
00013     but WITHOUT ANY WARRANTY; without even the implied warranty of
00014     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015     Library General Public License for more details.
00016 
00017     You should have received a copy of the GNU Library General Public License
00018     along with this library; see the file COPYING.LIB.  If not, write to
00019     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020     Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "kmime_header_parsing.h"
00024 
00025 #include "kmime_codecs.h"
00026 #include "kmime_headerfactory_p.h"
00027 #include "kmime_headers.h"
00028 #include "kmime_util.h"
00029 #include "kmime_util_p.h"
00030 #include "kmime_dateformatter.h"
00031 #include "kmime_warning.h"
00032 
00033 #include <kglobal.h>
00034 #include <kcharsets.h>
00035 
00036 #include <QtCore/QTextCodec>
00037 #include <QtCore/QMap>
00038 #include <QtCore/QStringList>
00039 #include <QtCore/QUrl>
00040 
00041 #include <ctype.h> // for isdigit
00042 #include <cassert>
00043 
00044 using namespace KMime;
00045 using namespace KMime::Types;
00046 
00047 namespace KMime {
00048 
00049 namespace Types {
00050 
00051 // QUrl::fromAce is extremely expensive, so only use it when necessary.
00052 // Fortunately, the presence of IDNA is readily detected with a substring match...
00053 static inline QString QUrl_fromAce_wrapper( const QString & domain )
00054 {
00055     if ( domain.contains( QLatin1String( "xn--" ) ) )
00056         return QUrl::fromAce( domain.toLatin1() );
00057     else
00058         return domain;
00059 }
00060 
00061 static QString addr_spec_as_string( const AddrSpec & as, bool pretty )
00062 {
00063   if ( as.isEmpty() ) {
00064     return QString();
00065   }
00066 
00067   bool needsQuotes = false;
00068   QString result;
00069   result.reserve( as.localPart.length() + as.domain.length() + 1 );
00070   for ( int i = 0 ; i < as.localPart.length() ; ++i ) {
00071     const char ch = as.localPart[i].toLatin1();
00072     if ( ch == '.' || isAText( ch ) ) {
00073       result += ch;
00074     } else {
00075       needsQuotes = true;
00076       if ( ch == '\\' || ch == '"' ) {
00077         result += '\\';
00078       }
00079       result += ch;
00080     }
00081   }
00082   const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ;
00083   if ( needsQuotes ) {
00084     result = '"' + result + "\"";
00085   }
00086   if( dom.isEmpty() ) {
00087     return result;
00088   } else {
00089     return result + '@' + dom;
00090   }
00091 }
00092 
00093 QString AddrSpec::asString() const
00094 {
00095     return addr_spec_as_string( *this, false );
00096 }
00097 
00098 QString AddrSpec::asPrettyString() const
00099 {
00100     return addr_spec_as_string( *this, true );
00101 }
00102 
00103 bool AddrSpec::isEmpty() const
00104 {
00105   return localPart.isEmpty() && domain.isEmpty();
00106 }
00107 
00108 QByteArray Mailbox::address() const
00109 {
00110   return mAddrSpec.asString().toLatin1();
00111 }
00112 
00113 AddrSpec Mailbox::addrSpec() const
00114 {
00115   return mAddrSpec;
00116 }
00117 
00118 QString Mailbox::name() const
00119 {
00120   return mDisplayName;
00121 }
00122 
00123 void Mailbox::setAddress( const AddrSpec &addr )
00124 {
00125   mAddrSpec = addr;
00126 }
00127 
00128 void Mailbox::setAddress( const QByteArray &addr )
00129 {
00130   const char *cursor = addr.constData();
00131   if ( !HeaderParsing::parseAngleAddr( cursor,
00132                                        cursor + addr.length(), mAddrSpec ) ) {
00133     if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
00134                                         mAddrSpec ) ) {
00135       kWarning() << "Invalid address";
00136       return;
00137     }
00138   }
00139 }
00140 
00141 void Mailbox::setName( const QString &name )
00142 {
00143   mDisplayName = removeBidiControlChars( name );
00144 }
00145 
00146 void Mailbox::setNameFrom7Bit( const QByteArray &name,
00147                                const QByteArray &defaultCharset )
00148 {
00149   QByteArray cs;
00150   setName( decodeRFC2047String( name, cs, defaultCharset, false ) );
00151 }
00152 
00153 bool Mailbox::hasAddress() const
00154 {
00155   return !mAddrSpec.isEmpty();
00156 }
00157 
00158 bool Mailbox::hasName() const
00159 {
00160   return !mDisplayName.isEmpty();
00161 }
00162 
00163 QString Mailbox::prettyAddress() const
00164 {
00165   return prettyAddress( QuoteNever );
00166 }
00167 
00168 QString Mailbox::prettyAddress( Quoting quoting ) const
00169 {
00170   if ( !hasName() ) {
00171     return address();
00172   }
00173   QString s = name();
00174   if ( quoting != QuoteNever ) {
00175     addQuotes( s, quoting == QuoteAlways /*bool force*/ );
00176   }
00177 
00178   if ( hasAddress() ) {
00179     s += QLatin1String(" <") + address() + QLatin1Char('>');
00180   }
00181   return s;
00182 }
00183 
00184 void Mailbox::fromUnicodeString( const QString &s )
00185 {
00186   from7BitString( encodeRFC2047String( s, "utf-8", false ) );
00187 }
00188 
00189 void Mailbox::from7BitString( const QByteArray &s )
00190 {
00191   const char *cursor = s.constData();
00192   HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
00193 }
00194 
00195 QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const
00196 {
00197   if ( !hasName() ) {
00198     return address();
00199   }
00200   QByteArray rv;
00201   if ( isUsAscii( name() ) ) {
00202     QByteArray tmp = name().toLatin1();
00203     addQuotes( tmp, false );
00204     rv += tmp;
00205   } else {
00206     rv += encodeRFC2047String( name(), encCharset, true );
00207   }
00208   if ( hasAddress() ) {
00209     rv += " <" + address() + '>';
00210   }
00211   return rv;
00212 }
00213 
00214 } // namespace Types
00215 
00216 namespace HeaderParsing {
00217 
00218 // parse the encoded-word (scursor points to after the initial '=')
00219 bool parseEncodedWord( const char* &scursor, const char * const send,
00220                        QString &result, QByteArray &language,
00221                        QByteArray &usedCS, const QByteArray &defaultCS,
00222                        bool forceCS )
00223 {
00224   // make sure the caller already did a bit of the work.
00225   assert( *(scursor-1) == '=' );
00226 
00227   //
00228   // STEP 1:
00229   // scan for the charset/language portion of the encoded-word
00230   //
00231 
00232   char ch = *scursor++;
00233 
00234   if ( ch != '?' ) {
00235     // kDebug() << "first";
00236     //KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00237     return false;
00238   }
00239 
00240   // remember start of charset (ie. just after the initial "=?") and
00241   // language (just after the first '*') fields:
00242   const char * charsetStart = scursor;
00243   const char * languageStart = 0;
00244 
00245   // find delimiting '?' (and the '*' separating charset and language
00246   // tags, if any):
00247   for ( ; scursor != send ; scursor++ ) {
00248     if ( *scursor == '?') {
00249       break;
00250     } else if ( *scursor == '*' && languageStart == 0 ) {
00251       languageStart = scursor + 1;
00252     }
00253   }
00254 
00255   // not found? can't be an encoded-word!
00256   if ( scursor == send || *scursor != '?' ) {
00257     // kDebug() << "second";
00258     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00259     return false;
00260   }
00261 
00262   // extract the language information, if any (if languageStart is 0,
00263   // language will be null, too):
00264   QByteArray maybeLanguage( languageStart, scursor - languageStart );
00265   // extract charset information (keep in mind: the size given to the
00266   // ctor is one off due to the \0 terminator):
00267   QByteArray maybeCharset( charsetStart,
00268                            ( languageStart ? languageStart - 1 : scursor ) - charsetStart );
00269 
00270   //
00271   // STEP 2:
00272   // scan for the encoding portion of the encoded-word
00273   //
00274 
00275   // remember start of encoding (just _after_ the second '?'):
00276   scursor++;
00277   const char * encodingStart = scursor;
00278 
00279   // find next '?' (ending the encoding tag):
00280   for ( ; scursor != send ; scursor++ ) {
00281     if ( *scursor == '?' ) {
00282       break;
00283     }
00284   }
00285 
00286   // not found? Can't be an encoded-word!
00287   if ( scursor == send || *scursor != '?' ) {
00288     // kDebug() << "third";
00289     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00290     return false;
00291   }
00292 
00293   // extract the encoding information:
00294   QByteArray maybeEncoding( encodingStart, scursor - encodingStart );
00295 
00296   // kDebug() << "parseEncodedWord: found charset == \"" << maybeCharset
00297   //         << "\"; language == \"" << maybeLanguage
00298   //         << "\"; encoding == \"" << maybeEncoding << "\"";
00299 
00300   //
00301   // STEP 3:
00302   // scan for encoded-text portion of encoded-word
00303   //
00304 
00305   // remember start of encoded-text (just after the third '?'):
00306   scursor++;
00307   const char * encodedTextStart = scursor;
00308 
00309   // find the '?=' sequence (ending the encoded-text):
00310   for ( ; scursor != send ; scursor++ ) {
00311     if ( *scursor == '?' ) {
00312       if ( scursor + 1 != send ) {
00313         if ( *( scursor + 1 ) != '=' ) { // We expect a '=' after the '?', but we got something else; ignore
00314           KMIME_WARN << "Stray '?' in q-encoded word, ignoring this.";
00315           continue;
00316         }
00317         else { // yep, found a '?=' sequence
00318           scursor += 2;
00319           break;
00320         }
00321       }
00322       else { // The '?' is the last char, but we need a '=' after it!
00323         KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00324         return false;
00325       }
00326     }
00327   }
00328 
00329   if ( *( scursor - 2 ) != '?' || *( scursor - 1 ) != '=' ||
00330        scursor < encodedTextStart + 2 ) {
00331     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00332     return false;
00333   }
00334 
00335   // set end sentinel for encoded-text:
00336   const char * const encodedTextEnd = scursor - 2;
00337 
00338   //
00339   // STEP 4:
00340   // setup decoders for the transfer encoding and the charset
00341   //
00342 
00343   // try if there's a codec for the encoding found:
00344   Codec * codec = Codec::codecForName( maybeEncoding );
00345   if ( !codec ) {
00346     KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
00347     return false;
00348   }
00349 
00350   // get an instance of a corresponding decoder:
00351   Decoder * dec = codec->makeDecoder();
00352   assert( dec );
00353 
00354   // try if there's a (text)codec for the charset found:
00355   bool matchOK = false;
00356   QTextCodec *textCodec = 0;
00357   if ( forceCS || maybeCharset.isEmpty() ) {
00358     textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00359     usedCS = cachedCharset( defaultCS );
00360   } else {
00361     textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00362     if ( !matchOK ) {  //no suitable codec found => use default charset
00363       textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00364       usedCS = cachedCharset( defaultCS );
00365     } else {
00366       usedCS = cachedCharset( maybeCharset );
00367     }
00368   }
00369 
00370   if ( !matchOK || !textCodec ) {
00371     KMIME_WARN_UNKNOWN( Charset, maybeCharset );
00372     delete dec;
00373     return false;
00374   };
00375 
00376   // kDebug() << "mimeName(): \"" << textCodec->name() << "\"";
00377 
00378   // allocate a temporary buffer to store the 8bit text:
00379   int encodedTextLength = encodedTextEnd - encodedTextStart;
00380   QByteArray buffer;
00381   buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) );
00382   char *bbegin = buffer.data();
00383   char *bend = bbegin + buffer.length();
00384 
00385   //
00386   // STEP 5:
00387   // do the actual decoding
00388   //
00389 
00390   if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) {
00391     KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
00392                << encodedTextLength << ")\nresult may be truncated";
00393   }
00394 
00395   result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() );
00396 
00397   // kDebug() << "result now: \"" << result << "\"";
00398   // cleanup:
00399   delete dec;
00400   language = maybeLanguage;
00401 
00402   return true;
00403 }
00404 
00405 static inline void eatWhiteSpace( const char* &scursor, const char * const send )
00406 {
00407   while ( scursor != send &&
00408           ( *scursor == ' ' || *scursor == '\n' ||
00409             *scursor == '\t' || *scursor == '\r' ) )
00410     scursor++;
00411 }
00412 
00413 bool parseAtom( const char * &scursor, const char * const send,
00414                 QString &result, bool allow8Bit )
00415 {
00416   QPair<const char*,int> maybeResult;
00417 
00418   if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00419     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00420     return true;
00421   }
00422 
00423   return false;
00424 }
00425 
00426 bool parseAtom( const char * &scursor, const char * const send,
00427                 QPair<const char*,int> &result, bool allow8Bit )
00428 {
00429   bool success = false;
00430   const char *start = scursor;
00431 
00432   while ( scursor != send ) {
00433     signed char ch = *scursor++;
00434     if ( ch > 0 && isAText( ch ) ) {
00435       // AText: OK
00436       success = true;
00437     } else if ( allow8Bit && ch < 0 ) {
00438       // 8bit char: not OK, but be tolerant.
00439       KMIME_WARN_8BIT( ch );
00440       success = true;
00441     } else {
00442       // CTL or special - marking the end of the atom:
00443       // re-set sursor to point to the offending
00444       // char and return:
00445       scursor--;
00446       break;
00447     }
00448   }
00449   result.first = start;
00450   result.second = scursor - start;
00451   return success;
00452 }
00453 
00454 // FIXME: Remove this and the other parseToken() method. add a new one where "result" is a
00455 //        QByteArray.
00456 bool parseToken( const char * &scursor, const char * const send,
00457                  QString &result, bool allow8Bit )
00458 {
00459   QPair<const char*,int> maybeResult;
00460 
00461   if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00462     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00463     return true;
00464   }
00465 
00466   return false;
00467 }
00468 
00469 bool parseToken( const char * &scursor, const char * const send,
00470                  QPair<const char*,int> &result, bool allow8Bit )
00471 {
00472   bool success = false;
00473   const char * start = scursor;
00474 
00475   while ( scursor != send ) {
00476     signed char ch = *scursor++;
00477     if ( ch > 0 && isTText( ch ) ) {
00478       // TText: OK
00479       success = true;
00480     } else if ( allow8Bit && ch < 0 ) {
00481       // 8bit char: not OK, but be tolerant.
00482       KMIME_WARN_8BIT( ch );
00483       success = true;
00484     } else {
00485       // CTL or tspecial - marking the end of the atom:
00486       // re-set sursor to point to the offending
00487       // char and return:
00488       scursor--;
00489       break;
00490     }
00491   }
00492   result.first = start;
00493   result.second = scursor - start;
00494   return success;
00495 }
00496 
00497 #define READ_ch_OR_FAIL if ( scursor == send ) {        \
00498     KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
00499     return false;                                       \
00500   } else {                                              \
00501     ch = *scursor++;                                    \
00502   }
00503 
00504 // known issues:
00505 //
00506 // - doesn't handle quoted CRLF
00507 
00508 // FIXME: Why is result a QString? This should be a QByteArray, since at this level, we don't
00509 //        know about encodings yet!
00510 bool parseGenericQuotedString( const char* &scursor, const char * const send,
00511                                QString &result, bool isCRLF,
00512                                const char openChar, const char closeChar )
00513 {
00514   char ch;
00515   // We are in a quoted-string or domain-literal or comment and the
00516   // cursor points to the first char after the openChar.
00517   // We will apply unfolding and quoted-pair removal.
00518   // We return when we either encounter the end or unescaped openChar
00519   // or closeChar.
00520 
00521   assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00522 
00523   while ( scursor != send ) {
00524     ch = *scursor++;
00525 
00526     if ( ch == closeChar || ch == openChar ) {
00527       // end of quoted-string or another opening char:
00528       // let caller decide what to do.
00529       return true;
00530     }
00531 
00532     switch( ch ) {
00533     case '\\':      // quoted-pair
00534       // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
00535       READ_ch_OR_FAIL;
00536       KMIME_WARN_IF_8BIT( ch );
00537       result += QChar( ch );
00538       break;
00539     case '\r':
00540       // ###
00541       // The case of lonely '\r' is easy to solve, as they're
00542       // not part of Unix Line-ending conventions.
00543       // But I see a problem if we are given Unix-native
00544       // line-ending-mails, where we cannot determine anymore
00545       // whether a given '\n' was part of a CRLF or was occurring
00546       // on it's own.
00547       READ_ch_OR_FAIL;
00548       if ( ch != '\n' ) {
00549         // CR on it's own...
00550         KMIME_WARN_LONE( CR );
00551         result += QChar('\r');
00552         scursor--; // points to after the '\r' again
00553       } else {
00554         // CRLF encountered.
00555         // lookahead: check for folding
00556         READ_ch_OR_FAIL;
00557         if ( ch == ' ' || ch == '\t' ) {
00558           // correct folding;
00559           // position cursor behind the CRLF WSP (unfolding)
00560           // and add the WSP to the result
00561           result += QChar( ch );
00562         } else {
00563           // this is the "shouldn't happen"-case. There is a CRLF
00564           // inside a quoted-string without it being part of FWS.
00565           // We take it verbatim.
00566           KMIME_WARN_NON_FOLDING( CRLF );
00567           result += "\r\n";
00568           // the cursor is decremented again, so's we need not
00569           // duplicate the whole switch here. "ch" could've been
00570           // everything (incl. openChar or closeChar).
00571           scursor--;
00572         }
00573       }
00574       break;
00575     case '\n':
00576       // Note: CRLF has been handled above already!
00577       // ### LF needs special treatment, depending on whether isCRLF
00578       // is true (we can be sure a lonely '\n' was meant this way) or
00579       // false ('\n' alone could have meant LF or CRLF in the original
00580       // message. This parser assumes CRLF iff the LF is followed by
00581       // either WSP (folding) or NULL (premature end of quoted-string;
00582       // Should be fixed, since NULL is allowed as per rfc822).
00583       READ_ch_OR_FAIL;
00584       if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00585         // folding
00586         // correct folding
00587         result += QChar( ch );
00588       } else {
00589         // non-folding
00590         KMIME_WARN_LONE( LF );
00591         result += QChar('\n');
00592         // pos is decremented, so's we need not duplicate the whole
00593         // switch here. ch could've been everything (incl. <">, "\").
00594         scursor--;
00595       }
00596       break;
00597     case '=':
00598     {
00599       // ### Work around broken clients that send encoded words in quoted-strings
00600       //     For example, older KMail versions.
00601       if( scursor == send )
00602         break;
00603       
00604       const char *oldscursor = scursor;
00605       QString tmp;
00606       QByteArray lang, charset;
00607       if( *scursor++ == '?' ) {
00608         --scursor;
00609         if( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
00610           result += tmp;
00611           break;
00612         } else {
00613           scursor = oldscursor;
00614         }
00615       } else {
00616         scursor = oldscursor;
00617       }
00618       // fall through
00619     }
00620     default:
00621       KMIME_WARN_IF_8BIT( ch );
00622       result += QChar( ch );
00623     }
00624   }
00625 
00626   return false;
00627 }
00628 
00629 // known issues:
00630 //
00631 // - doesn't handle encoded-word inside comments.
00632 
00633 bool parseComment( const char* &scursor, const char * const send,
00634                    QString &result, bool isCRLF, bool reallySave )
00635 {
00636   int commentNestingDepth = 1;
00637   const char *afterLastClosingParenPos = 0;
00638   QString maybeCmnt;
00639   const char *oldscursor = scursor;
00640 
00641   assert( *(scursor-1) == '(' );
00642 
00643   while ( commentNestingDepth ) {
00644     QString cmntPart;
00645     if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00646       assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00647       // see the kdoc for above function for the possible conditions
00648       // we have to check:
00649       switch ( *(scursor-1) ) {
00650       case ')':
00651         if ( reallySave ) {
00652           // add the chunk that's now surely inside the comment.
00653           result += maybeCmnt;
00654           result += cmntPart;
00655           if ( commentNestingDepth > 1 ) {
00656             // don't add the outermost ')'...
00657             result += QChar(')');
00658           }
00659           maybeCmnt.clear();
00660         }
00661         afterLastClosingParenPos = scursor;
00662         --commentNestingDepth;
00663         break;
00664       case '(':
00665         if ( reallySave ) {
00666           // don't add to "result" yet, because we might find that we
00667           // are already outside the (broken) comment...
00668           maybeCmnt += cmntPart;
00669           maybeCmnt += QChar('(');
00670         }
00671         ++commentNestingDepth;
00672         break;
00673       default: assert( 0 );
00674       } // switch
00675     } else {
00676       // !parseGenericQuotedString, ie. premature end
00677       if ( afterLastClosingParenPos ) {
00678         scursor = afterLastClosingParenPos;
00679       } else {
00680         scursor = oldscursor;
00681       }
00682       return false;
00683     }
00684   } // while
00685 
00686   return true;
00687 }
00688 
00689 // known issues: none.
00690 
00691 bool parsePhrase( const char* &scursor, const char * const send,
00692                   QString &result, bool isCRLF )
00693 {
00694   enum {
00695     None, Phrase, Atom, EncodedWord, QuotedString
00696   } found = None;
00697 
00698   QString tmp;
00699   QByteArray lang, charset;
00700   const char *successfullyParsed = 0;
00701   // only used by the encoded-word branch
00702   const char *oldscursor;
00703   // used to suppress whitespace between adjacent encoded-words
00704   // (rfc2047, 6.2):
00705   bool lastWasEncodedWord = false;
00706 
00707   while ( scursor != send ) {
00708     char ch = *scursor++;
00709     switch ( ch ) {
00710     case '.': // broken, but allow for intorop's sake
00711       if ( found == None ) {
00712         --scursor;
00713         return false;
00714       } else {
00715         if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) {
00716           result += ". ";
00717         } else {
00718           result += '.';
00719         }
00720         successfullyParsed = scursor;
00721       }
00722       break;
00723     case '"': // quoted-string
00724       tmp.clear();
00725       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00726         successfullyParsed = scursor;
00727         assert( *(scursor-1) == '"' );
00728         switch ( found ) {
00729         case None:
00730           found = QuotedString;
00731           break;
00732         case Phrase:
00733         case Atom:
00734         case EncodedWord:
00735         case QuotedString:
00736           found = Phrase;
00737           result += QChar(' '); // rfc822, 3.4.4
00738           break;
00739         default:
00740           assert( 0 );
00741         }
00742         lastWasEncodedWord = false;
00743         result += tmp;
00744       } else {
00745         // premature end of quoted string.
00746         // What to do? Return leading '"' as special? Return as quoted-string?
00747         // We do the latter if we already found something, else signal failure.
00748         if ( found == None ) {
00749           return false;
00750         } else {
00751           result += QChar(' '); // rfc822, 3.4.4
00752           result += tmp;
00753           return true;
00754         }
00755       }
00756       break;
00757     case '(': // comment
00758       // parse it, but ignore content:
00759       tmp.clear();
00760       if ( parseComment( scursor, send, tmp, isCRLF,
00761                          false /*don't bother with the content*/ ) ) {
00762         successfullyParsed = scursor;
00763         lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
00764       } else {
00765         if ( found == None ) {
00766           return false;
00767         } else {
00768           scursor = successfullyParsed;
00769           return true;
00770         }
00771       }
00772       break;
00773     case '=': // encoded-word
00774       tmp.clear();
00775       oldscursor = scursor;
00776       lang.clear();
00777       charset.clear();
00778       if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
00779         successfullyParsed = scursor;
00780         switch ( found ) {
00781         case None:
00782           found = EncodedWord;
00783           break;
00784         case Phrase:
00785         case EncodedWord:
00786         case Atom:
00787         case QuotedString:
00788           if ( !lastWasEncodedWord ) {
00789             result += QChar(' '); // rfc822, 3.4.4
00790           }
00791           found = Phrase;
00792           break;
00793         default: assert( 0 );
00794         }
00795         lastWasEncodedWord = true;
00796         result += tmp;
00797         break;
00798       } else {
00799         // parse as atom:
00800         scursor = oldscursor;
00801       }
00802       // fall though...
00803 
00804     default: //atom
00805       tmp.clear();
00806       scursor--;
00807       if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
00808         successfullyParsed = scursor;
00809         switch ( found ) {
00810         case None:
00811           found = Atom;
00812           break;
00813         case Phrase:
00814         case Atom:
00815         case EncodedWord:
00816         case QuotedString:
00817           found = Phrase;
00818           result += QChar(' '); // rfc822, 3.4.4
00819           break;
00820         default:
00821           assert( 0 );
00822         }
00823         lastWasEncodedWord = false;
00824         result += tmp;
00825       } else {
00826         if ( found == None ) {
00827           return false;
00828         } else {
00829           scursor = successfullyParsed;
00830           return true;
00831         }
00832       }
00833     }
00834     eatWhiteSpace( scursor, send );
00835   }
00836 
00837   return found != None;
00838 }
00839 
00840 bool parseDotAtom( const char* &scursor, const char * const send,
00841                    QString &result, bool isCRLF )
00842 {
00843   eatCFWS( scursor, send, isCRLF );
00844 
00845   // always points to just after the last atom parsed:
00846   const char *successfullyParsed;
00847 
00848   QString tmp;
00849   if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
00850     return false;
00851   }
00852   result += tmp;
00853   successfullyParsed = scursor;
00854 
00855   while ( scursor != send ) {
00856 
00857     // end of header or no '.' -> return
00858     if ( scursor == send || *scursor != '.' ) {
00859       return true;
00860     }
00861     scursor++; // eat '.'
00862 
00863     if ( scursor == send || !isAText( *scursor ) ) {
00864       // end of header or no AText, but this time following a '.'!:
00865       // reset cursor to just after last successfully parsed char and
00866       // return:
00867       scursor = successfullyParsed;
00868       return true;
00869     }
00870 
00871     // try to parse the next atom:
00872     QString maybeAtom;
00873     if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
00874       scursor = successfullyParsed;
00875       return true;
00876     }
00877 
00878     result += QChar('.');
00879     result += maybeAtom;
00880     successfullyParsed = scursor;
00881   }
00882 
00883   scursor = successfullyParsed;
00884   return true;
00885 }
00886 
00887 void eatCFWS( const char* &scursor, const char * const send, bool isCRLF )
00888 {
00889   QString dummy;
00890 
00891   while ( scursor != send ) {
00892     const char *oldscursor = scursor;
00893 
00894     char ch = *scursor++;
00895 
00896     switch( ch ) {
00897     case ' ':
00898     case '\t': // whitespace
00899     case '\r':
00900     case '\n': // folding
00901       continue;
00902 
00903     case '(': // comment
00904       if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) {
00905         continue;
00906       }
00907       scursor = oldscursor;
00908       return;
00909 
00910     default:
00911       scursor = oldscursor;
00912       return;
00913     }
00914   }
00915 }
00916 
00917 bool parseDomain( const char* &scursor, const char * const send,
00918                   QString &result, bool isCRLF )
00919 {
00920   eatCFWS( scursor, send, isCRLF );
00921   if ( scursor == send ) {
00922     return false;
00923   }
00924 
00925   // domain := dot-atom / domain-literal / atom *("." atom)
00926   //
00927   // equivalent to:
00928   // domain = dot-atom / domain-literal,
00929   // since parseDotAtom does allow CFWS between atoms and dots
00930 
00931   if ( *scursor == '[' ) {
00932     // domain-literal:
00933     QString maybeDomainLiteral;
00934     // eat '[':
00935     scursor++;
00936     while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00937                                       isCRLF, '[', ']' ) ) {
00938       if ( scursor == send ) {
00939         // end of header: check for closing ']':
00940         if ( *(scursor-1) == ']' ) {
00941           // OK, last char was ']':
00942           result = maybeDomainLiteral;
00943           return true;
00944         } else {
00945           // not OK, domain-literal wasn't closed:
00946           return false;
00947         }
00948       }
00949       // we hit openChar in parseGenericQuotedString.
00950       // include it in maybeDomainLiteral and keep on parsing:
00951       if ( *(scursor-1) == '[' ) {
00952         maybeDomainLiteral += QChar('[');
00953         continue;
00954       }
00955       // OK, real end of domain-literal:
00956       result = maybeDomainLiteral;
00957       return true;
00958     }
00959   } else {
00960     // dot-atom:
00961     QString maybeDotAtom;
00962     if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00963       result = maybeDotAtom;
00964       // Domain may end with '.', if so preserve it'
00965       if ( scursor != send && *scursor == '.' ) {
00966         result += QChar('.');
00967         scursor++;
00968       }
00969       return true;
00970     }
00971   }
00972   return false;
00973 }
00974 
00975 bool parseObsRoute( const char* &scursor, const char* const send,
00976                     QStringList &result, bool isCRLF, bool save )
00977 {
00978   while ( scursor != send ) {
00979     eatCFWS( scursor, send, isCRLF );
00980     if ( scursor == send ) {
00981       return false;
00982     }
00983 
00984     // empty entry:
00985     if ( *scursor == ',' ) {
00986       scursor++;
00987       if ( save ) {
00988         result.append( QString() );
00989       }
00990       continue;
00991     }
00992 
00993     // empty entry ending the list:
00994     if ( *scursor == ':' ) {
00995       scursor++;
00996       if ( save ) {
00997         result.append( QString() );
00998       }
00999       return true;
01000     }
01001 
01002     // each non-empty entry must begin with '@':
01003     if ( *scursor != '@' ) {
01004       return false;
01005     } else {
01006       scursor++;
01007     }
01008 
01009     QString maybeDomain;
01010     if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
01011       return false;
01012     }
01013     if ( save ) {
01014       result.append( maybeDomain );
01015     }
01016 
01017     // eat the following (optional) comma:
01018     eatCFWS( scursor, send, isCRLF );
01019     if ( scursor == send ) {
01020       return false;
01021     }
01022     if ( *scursor == ':' ) {
01023       scursor++;
01024       return true;
01025     }
01026     if ( *scursor == ',' ) {
01027       scursor++;
01028     }
01029   }
01030 
01031   return false;
01032 }
01033 
01034 bool parseAddrSpec( const char* &scursor, const char * const send,
01035                     AddrSpec &result, bool isCRLF )
01036 {
01037   //
01038   // STEP 1:
01039   // local-part := dot-atom / quoted-string / word *("." word)
01040   //
01041   // this is equivalent to:
01042   // local-part := word *("." word)
01043 
01044   QString maybeLocalPart;
01045   QString tmp;
01046 
01047   while ( scursor != send ) {
01048     // first, eat any whitespace
01049     eatCFWS( scursor, send, isCRLF );
01050 
01051     char ch = *scursor++;
01052     switch ( ch ) {
01053     case '.': // dot
01054       maybeLocalPart += QChar('.');
01055       break;
01056 
01057     case '@':
01058       goto SAW_AT_SIGN;
01059       break;
01060 
01061     case '"': // quoted-string
01062       tmp.clear();
01063       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
01064         maybeLocalPart += tmp;
01065       } else {
01066         return false;
01067       }
01068       break;
01069 
01070     default: // atom
01071       scursor--; // re-set scursor to point to ch again
01072       tmp.clear();
01073       if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
01074         maybeLocalPart += tmp;
01075       } else {
01076         return false; // parseAtom can only fail if the first char is non-atext.
01077       }
01078       break;
01079     }
01080   }
01081 
01082   return false;
01083 
01084   //
01085   // STEP 2:
01086   // domain
01087   //
01088 
01089 SAW_AT_SIGN:
01090 
01091   assert( *(scursor-1) == '@' );
01092 
01093   QString maybeDomain;
01094   if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
01095     return false;
01096   }
01097 
01098   result.localPart = maybeLocalPart;
01099   result.domain = maybeDomain;
01100 
01101   return true;
01102 }
01103 
01104 bool parseAngleAddr( const char* &scursor, const char * const send,
01105                      AddrSpec &result, bool isCRLF )
01106 {
01107   // first, we need an opening angle bracket:
01108   eatCFWS( scursor, send, isCRLF );
01109   if ( scursor == send || *scursor != '<' ) {
01110     return false;
01111   }
01112   scursor++; // eat '<'
01113 
01114   eatCFWS( scursor, send, isCRLF );
01115   if ( scursor == send ) {
01116     return false;
01117   }
01118 
01119   if ( *scursor == '@' || *scursor == ',' ) {
01120     // obs-route: parse, but ignore:
01121     KMIME_WARN << "obsolete source route found! ignoring.";
01122     QStringList dummy;
01123     if ( !parseObsRoute( scursor, send, dummy,
01124                          isCRLF, false /* don't save */ ) ) {
01125       return false;
01126     }
01127     // angle-addr isn't complete until after the '>':
01128     if ( scursor == send ) {
01129       return false;
01130     }
01131   }
01132 
01133   // parse addr-spec:
01134   AddrSpec maybeAddrSpec;
01135   if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01136     return false;
01137   }
01138 
01139   eatCFWS( scursor, send, isCRLF );
01140   if ( scursor == send || *scursor != '>' ) {
01141     return false;
01142   }
01143   scursor++;
01144 
01145   result = maybeAddrSpec;
01146   return true;
01147 
01148 }
01149 
01150 static QString stripQuotes( const QString &input )
01151 {
01152   if ( input.startsWith( '"' ) && input.endsWith( '"' ) ) {
01153     QString stripped( input.mid( 1, input.size() - 2 ) );
01154     return stripped;
01155   }
01156   else return input;
01157 }
01158 
01159 bool parseMailbox( const char* &scursor, const char * const send,
01160                    Mailbox &result, bool isCRLF )
01161 {
01162   eatCFWS( scursor, send, isCRLF );
01163   if ( scursor == send ) {
01164     return false;
01165   }
01166 
01167   AddrSpec maybeAddrSpec;
01168   QString maybeDisplayName;
01169 
01170   // first, try if it's a vanilla addr-spec:
01171   const char * oldscursor = scursor;
01172   if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01173     result.setAddress( maybeAddrSpec );
01174     // check for the obsolete form of display-name (as comment):
01175     eatWhiteSpace( scursor, send );
01176     if ( scursor != send && *scursor == '(' ) {
01177       scursor++;
01178       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01179         return false;
01180       }
01181     }
01182     result.setName( stripQuotes( maybeDisplayName ) );
01183     return true;
01184   }
01185   scursor = oldscursor;
01186 
01187   // second, see if there's a display-name:
01188   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01189     // failed: reset cursor, note absent display-name
01190     maybeDisplayName.clear();
01191     scursor = oldscursor;
01192   } else {
01193     // succeeded: eat CFWS
01194     eatCFWS( scursor, send, isCRLF );
01195     if ( scursor == send ) {
01196       return false;
01197     }
01198   }
01199 
01200   // third, parse the angle-addr:
01201   if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
01202     return false;
01203   }
01204 
01205   if ( maybeDisplayName.isNull() ) {
01206     // check for the obsolete form of display-name (as comment):
01207     eatWhiteSpace( scursor, send );
01208     if ( scursor != send && *scursor == '(' ) {
01209       scursor++;
01210       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01211         return false;
01212       }
01213     }
01214   }
01215 
01216   result.setName( stripQuotes( maybeDisplayName ) );
01217   result.setAddress( maybeAddrSpec );
01218   return true;
01219 }
01220 
01221 bool parseGroup( const char* &scursor, const char * const send,
01222                  Address &result, bool isCRLF )
01223 {
01224   // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
01225   //
01226   // equivalent to:
01227   // group   := display-name ":" [ obs-mbox-list ] ";"
01228 
01229   eatCFWS( scursor, send, isCRLF );
01230   if ( scursor == send ) {
01231     return false;
01232   }
01233 
01234   // get display-name:
01235   QString maybeDisplayName;
01236   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01237     return false;
01238   }
01239 
01240   // get ":":
01241   eatCFWS( scursor, send, isCRLF );
01242   if ( scursor == send || *scursor != ':' ) {
01243     return false;
01244   }
01245 
01246   // KDE5 TODO: Don't expose displayName as public, but rather add setter for it that
01247   //            automatically calls removeBidiControlChars
01248   result.displayName = removeBidiControlChars( maybeDisplayName );
01249 
01250   // get obs-mbox-list (may contain empty entries):
01251   scursor++;
01252   while ( scursor != send ) {
01253     eatCFWS( scursor, send, isCRLF );
01254     if ( scursor == send ) {
01255       return false;
01256     }
01257 
01258     // empty entry:
01259     if ( *scursor == ',' ) {
01260       scursor++;
01261       continue;
01262     }
01263 
01264     // empty entry ending the list:
01265     if ( *scursor == ';' ) {
01266       scursor++;
01267       return true;
01268     }
01269 
01270     Mailbox maybeMailbox;
01271     if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01272       return false;
01273     }
01274     result.mailboxList.append( maybeMailbox );
01275 
01276     eatCFWS( scursor, send, isCRLF );
01277     // premature end:
01278     if ( scursor == send ) {
01279       return false;
01280     }
01281     // regular end of the list:
01282     if ( *scursor == ';' ) {
01283       scursor++;
01284       return true;
01285     }
01286     // eat regular list entry separator:
01287     if ( *scursor == ',' ) {
01288       scursor++;
01289     }
01290   }
01291   return false;
01292 }
01293 
01294 bool parseAddress( const char* &scursor, const char * const send,
01295                    Address &result, bool isCRLF )
01296 {
01297   // address       := mailbox / group
01298 
01299   eatCFWS( scursor, send, isCRLF );
01300   if ( scursor == send ) {
01301     return false;
01302   }
01303 
01304   // first try if it's a single mailbox:
01305   Mailbox maybeMailbox;
01306   const char * oldscursor = scursor;
01307   if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01308     // yes, it is:
01309     result.displayName.clear();
01310     result.mailboxList.append( maybeMailbox );
01311     return true;
01312   }
01313   scursor = oldscursor;
01314 
01315   Address maybeAddress;
01316 
01317   // no, it's not a single mailbox. Try if it's a group:
01318   if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
01319     return false;
01320   }
01321 
01322   result = maybeAddress;
01323   return true;
01324 }
01325 
01326 bool parseAddressList( const char* &scursor, const char * const send,
01327                        AddressList &result, bool isCRLF )
01328 {
01329   while ( scursor != send ) {
01330     eatCFWS( scursor, send, isCRLF );
01331     // end of header: this is OK.
01332     if ( scursor == send ) {
01333       return true;
01334     }
01335     // empty entry: ignore:
01336     if ( *scursor == ',' ) {
01337       scursor++;
01338       continue;
01339     }
01340     // broken clients might use ';' as list delimiter, accept that as well
01341     if ( *scursor == ';' ) {
01342       scursor++;
01343       continue;
01344     }
01345 
01346     // parse one entry
01347     Address maybeAddress;
01348     if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
01349       return false;
01350     }
01351     result.append( maybeAddress );
01352 
01353     eatCFWS( scursor, send, isCRLF );
01354     // end of header: this is OK.
01355     if ( scursor == send ) {
01356       return true;
01357     }
01358     // comma separating entries: eat it.
01359     if ( *scursor == ',' ) {
01360       scursor++;
01361     }
01362   }
01363   return true;
01364 }
01365 
01366 static QString asterisk = QString::fromLatin1( "*0*", 1 );
01367 static QString asteriskZero = QString::fromLatin1( "*0*", 2 );
01368 //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 );
01369 
01370 // FIXME: Get rid of the very ugly "QStringOrQPair" thing. At this level, we are supposed to work
01371 //        on byte arrays, not strings! The result parameter should be a simple
01372 //        QPair<QByteArray,QByteArray>, which is the attribute name and the value.
01373 bool parseParameter( const char* &scursor, const char * const send,
01374                      QPair<QString,QStringOrQPair> &result, bool isCRLF )
01375 {
01376   // parameter = regular-parameter / extended-parameter
01377   // regular-parameter = regular-parameter-name "=" value
01378   // extended-parameter =
01379   // value = token / quoted-string
01380   //
01381   // note that rfc2231 handling is out of the scope of this function.
01382   // Therefore we return the attribute as QString and the value as
01383   // (start,length) tupel if we see that the value is encoded
01384   // (trailing asterisk), for parseParameterList to decode...
01385 
01386   eatCFWS( scursor, send, isCRLF );
01387   if ( scursor == send ) {
01388     return false;
01389   }
01390 
01391   //
01392   // parse the parameter name:
01393   //
01394   // FIXME: maybeAttribute should be a QByteArray
01395   QString maybeAttribute;
01396   if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) {
01397     return false;
01398   }
01399 
01400   eatCFWS( scursor, send, isCRLF );
01401   // premature end: not OK (haven't seen '=' yet).
01402   if ( scursor == send || *scursor != '=' ) {
01403     return false;
01404   }
01405   scursor++; // eat '='
01406 
01407   eatCFWS( scursor, send, isCRLF );
01408   if ( scursor == send ) {
01409     // don't choke on attribute=, meaning the value was omitted:
01410     if ( maybeAttribute.endsWith( asterisk ) ) {
01411       KMIME_WARN << "attribute ends with \"*\", but value is empty!"
01412         "Chopping away \"*\".";
01413       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01414     }
01415     result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01416     return true;
01417   }
01418 
01419   const char * oldscursor = scursor;
01420 
01421   //
01422   // parse the parameter value:
01423   //
01424   QStringOrQPair maybeValue;
01425   if ( *scursor == '"' ) {
01426     // value is a quoted-string:
01427     scursor++;
01428     if ( maybeAttribute.endsWith( asterisk ) ) {
01429       // attributes ending with "*" designate extended-parameters,
01430       // which cannot have quoted-strings as values. So we remove the
01431       // trailing "*" to not confuse upper layers.
01432       KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
01433         "Chopping away \"*\".";
01434       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01435     }
01436 
01437     if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01438       scursor = oldscursor;
01439       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01440       return false; // this case needs further processing by upper layers!!
01441     }
01442   } else {
01443     // value is a token:
01444     if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
01445       scursor = oldscursor;
01446       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01447       return false; // this case needs further processing by upper layers!!
01448     }
01449   }
01450 
01451   result = qMakePair( maybeAttribute.toLower(), maybeValue );
01452   return true;
01453 }
01454 
01455 // FIXME: Get rid of QStringOrQPair: Use a simply QMap<QByteArray, QByteArray> for "result"
01456 //        instead!
01457 bool parseRawParameterList( const char* &scursor, const char * const send,
01458                             QMap<QString,QStringOrQPair> &result,
01459                             bool isCRLF )
01460 {
01461   // we use parseParameter() consecutively to obtain a map of raw
01462   // attributes to raw values. "Raw" here means that we don't do
01463   // rfc2231 decoding and concatenation. This is left to
01464   // parseParameterList(), which will call this function.
01465   //
01466   // The main reason for making this chunk of code a separate
01467   // (private) method is that we can deal with broken parameters
01468   // _here_ and leave the rfc2231 handling solely to
01469   // parseParameterList(), which will still be enough work.
01470 
01471   while ( scursor != send ) {
01472     eatCFWS( scursor, send, isCRLF );
01473     // empty entry ending the list: OK.
01474     if ( scursor == send ) {
01475       return true;
01476     }
01477     // empty list entry: ignore.
01478     if ( *scursor == ';' ) {
01479       scursor++;
01480       continue;
01481     }
01482 
01483     QPair<QString,QStringOrQPair> maybeParameter;
01484     if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01485       // we need to do a bit of work if the attribute is not
01486       // NULL. These are the cases marked with "needs further
01487       // processing" in parseParameter(). Specifically, parsing of the
01488       // token or the quoted-string, which should represent the value,
01489       // failed. We take the easy way out and simply search for the
01490       // next ';' to start parsing again. (Another option would be to
01491       // take the text between '=' and ';' as value)
01492       if ( maybeParameter.first.isNull() ) {
01493         return false;
01494       }
01495       while ( scursor != send ) {
01496         if ( *scursor++ == ';' ) {
01497           goto IS_SEMICOLON;
01498         }
01499       }
01500       // scursor == send case: end of list.
01501       return true;
01502     IS_SEMICOLON:
01503       // *scursor == ';' case: parse next entry.
01504       continue;
01505     }
01506     // successful parsing brings us here:
01507     result.insert( maybeParameter.first, maybeParameter.second );
01508 
01509     eatCFWS( scursor, send, isCRLF );
01510     // end of header: ends list.
01511     if ( scursor == send ) {
01512       return true;
01513     }
01514     // regular separator: eat it.
01515     if ( *scursor == ';' ) {
01516       scursor++;
01517     }
01518   }
01519   return true;
01520 }
01521 
01522 static void decodeRFC2231Value( Codec* &rfc2231Codec,
01523                                 QTextCodec* &textcodec,
01524                                 bool isContinuation, QString &value,
01525                                 QPair<const char*,int> &source, QByteArray& charset )
01526 {
01527   //
01528   // parse the raw value into (charset,language,text):
01529   //
01530 
01531   const char * decBegin = source.first;
01532   const char * decCursor = decBegin;
01533   const char * decEnd = decCursor + source.second;
01534 
01535   if ( !isContinuation ) {
01536     // find the first single quote
01537     while ( decCursor != decEnd ) {
01538       if ( *decCursor == '\'' ) {
01539         break;
01540       } else {
01541         decCursor++;
01542       }
01543     }
01544 
01545     if ( decCursor == decEnd ) {
01546       // there wasn't a single single quote at all!
01547       // take the whole value to be in latin-1:
01548       KMIME_WARN << "No charset in extended-initial-value."
01549         "Assuming \"iso-8859-1\".";
01550       value += QString::fromLatin1( decBegin, source.second );
01551       return;
01552     }
01553 
01554     charset = QByteArray( decBegin, decCursor - decBegin );
01555 
01556     const char * oldDecCursor = ++decCursor;
01557     // find the second single quote (we ignore the language tag):
01558     while ( decCursor != decEnd ) {
01559       if ( *decCursor == '\'' ) {
01560         break;
01561       } else {
01562         decCursor++;
01563       }
01564     }
01565     if ( decCursor == decEnd ) {
01566       KMIME_WARN << "No language in extended-initial-value."
01567         "Trying to recover.";
01568       decCursor = oldDecCursor;
01569     } else {
01570       decCursor++;
01571     }
01572 
01573     // decCursor now points to the start of the
01574     // "extended-other-values":
01575 
01576     //
01577     // get the decoders:
01578     //
01579 
01580     bool matchOK = false;
01581     textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01582     if ( !matchOK ) {
01583       textcodec = 0;
01584       KMIME_WARN_UNKNOWN( Charset, charset );
01585     }
01586   }
01587 
01588   if ( !rfc2231Codec ) {
01589     rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01590     assert( rfc2231Codec );
01591   }
01592 
01593   if ( !textcodec ) {
01594     value += QString::fromLatin1( decCursor, decEnd - decCursor );
01595     return;
01596   }
01597 
01598   Decoder * dec = rfc2231Codec->makeDecoder();
01599   assert( dec );
01600 
01601   //
01602   // do the decoding:
01603   //
01604 
01605   QByteArray buffer;
01606   buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01607   QByteArray::Iterator bit = buffer.begin();
01608   QByteArray::ConstIterator bend = buffer.end();
01609 
01610   if ( !dec->decode( decCursor, decEnd, bit, bend ) ) {
01611     KMIME_WARN << rfc2231Codec->name()
01612                << "codec lies about its maxDecodedSizeFor()" << endl
01613                << "result may be truncated";
01614   }
01615 
01616   value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01617 
01618   // kDebug() << "value now: \"" << value << "\"";
01619   // cleanup:
01620   delete dec;
01621 }
01622 
01623 // known issues:
01624 //  - permutes rfc2231 continuations when the total number of parts
01625 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
01626 
01627 bool parseParameterListWithCharset( const char* &scursor,
01628                                                 const char * const send,
01629                                                 QMap<QString,QString> &result,
01630                                                 QByteArray& charset, bool isCRLF )
01631 {
01632 // parse the list into raw attribute-value pairs:
01633   QMap<QString,QStringOrQPair> rawParameterList;
01634   if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
01635     return false;
01636   }
01637 
01638   if ( rawParameterList.isEmpty() ) {
01639     return true;
01640   }
01641 
01642   // decode rfc 2231 continuations and alternate charset encoding:
01643 
01644   // NOTE: this code assumes that what QMapIterator delivers is sorted
01645   // by the key!
01646 
01647   Codec * rfc2231Codec = 0;
01648   QTextCodec * textcodec = 0;
01649   QString attribute;
01650   QString value;
01651   enum Mode {
01652     NoMode = 0x0, Continued = 0x1, Encoded = 0x2
01653   };
01654 
01655   enum EncodingMode {
01656     NoEncoding,
01657     RFC2047,
01658     RFC2231
01659   };
01660 
01661   QMap<QString,QStringOrQPair>::Iterator it, end = rawParameterList.end();
01662 
01663   for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01664     if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01665       //
01666       // new attribute:
01667       //
01668 
01669       // store the last attribute/value pair in the result map now:
01670       if ( !attribute.isNull() ) {
01671         result.insert( attribute, value );
01672       }
01673       // and extract the information from the new raw attribute:
01674       value.clear();
01675       attribute = it.key();
01676       int mode = NoMode;
01677       EncodingMode encodingMode = NoEncoding;
01678 
01679       // is the value rfc2331-encoded?
01680       if ( attribute.endsWith( asterisk ) ) {
01681         attribute.truncate( attribute.length() - 1 );
01682         mode |= Encoded;
01683         encodingMode = RFC2231;
01684       }
01685       // is the value rfc2047-encoded?
01686       if( !(*it).qstring.isNull() && (*it).qstring.contains( "=?" ) ) {
01687         mode |= Encoded;
01688         encodingMode = RFC2047;
01689       }
01690       // is the value continued?
01691       if ( attribute.endsWith( asteriskZero ) ) {
01692         attribute.truncate( attribute.length() - 2 );
01693         mode |= Continued;
01694       }
01695       //
01696       // decode if necessary:
01697       //
01698       if ( mode & Encoded ) {
01699         if ( encodingMode == RFC2231 ) {
01700           decodeRFC2231Value( rfc2231Codec, textcodec,
01701                               false, /* isn't continuation */
01702                               value, (*it).qpair, charset );
01703         }
01704         else if ( encodingMode == RFC2047 ) {
01705           value += decodeRFC2047String( (*it).qstring.toLatin1(), charset );
01706         }
01707       } else {
01708         // not encoded.
01709         if ( (*it).qpair.first ) {
01710           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01711         } else {
01712           value += (*it).qstring;
01713         }
01714       }
01715 
01716       //
01717       // shortcut-processing when the value isn't encoded:
01718       //
01719 
01720       if ( !(mode & Continued) ) {
01721         // save result already:
01722         result.insert( attribute, value );
01723         // force begin of a new attribute:
01724         attribute.clear();
01725       }
01726     } else { // it.key().startsWith( attribute )
01727       //
01728       // continuation
01729       //
01730 
01731       // ignore the section and trust QMap to have sorted the keys:
01732       if ( it.key().endsWith( asterisk ) ) {
01733         // encoded
01734         decodeRFC2231Value( rfc2231Codec, textcodec,
01735                             true, /* is continuation */
01736                             value, (*it).qpair, charset );
01737       } else {
01738         // not encoded
01739         if ( (*it).qpair.first ) {
01740           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01741         } else {
01742           value += (*it).qstring;
01743         }
01744       }
01745     }
01746   }
01747 
01748   // write last attr/value pair:
01749   if ( !attribute.isNull() ) {
01750     result.insert( attribute, value );
01751   }
01752 
01753   return true;
01754 }
01755 
01756 
01757 bool parseParameterList( const char* &scursor, const char * const send,
01758                          QMap<QString,QString> &result, bool isCRLF )
01759 {
01760   QByteArray charset;
01761   return parseParameterListWithCharset( scursor, send, result, charset, isCRLF );
01762 }
01763 
01764 static const char * const stdDayNames[] = {
01765   "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01766 };
01767 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01768 
01769 static bool parseDayName( const char* &scursor, const char * const send )
01770 {
01771   // check bounds:
01772   if ( send - scursor < 3 ) {
01773     return false;
01774   }
01775 
01776   for ( int i = 0 ; i < stdDayNamesLen ; ++i ) {
01777     if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01778       scursor += 3;
01779       // kDebug() << "found" << stdDayNames[i];
01780       return true;
01781     }
01782   }
01783 
01784   return false;
01785 }
01786 
01787 static const char * const stdMonthNames[] = {
01788   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01789   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
01790 };
01791 static const int stdMonthNamesLen =
01792                               sizeof stdMonthNames / sizeof *stdMonthNames;
01793 
01794 static bool parseMonthName( const char* &scursor, const char * const send,
01795                             int &result )
01796 {
01797   // check bounds:
01798   if ( send - scursor < 3 ) {
01799     return false;
01800   }
01801 
01802   for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
01803     if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01804       scursor += 3;
01805       return true;
01806     }
01807   }
01808 
01809   // not found:
01810   return false;
01811 }
01812 
01813 static const struct {
01814   const char * tzName;
01815   long int secsEastOfGMT;
01816 } timeZones[] = {
01817   // rfc 822 timezones:
01818   { "GMT", 0 },
01819   { "UT", 0 },
01820   { "EDT", -4*3600 },
01821   { "EST", -5*3600 },
01822   { "MST", -5*3600 },
01823   { "CST", -6*3600 },
01824   { "MDT", -6*3600 },
01825   { "MST", -7*3600 },
01826   { "PDT", -7*3600 },
01827   { "PST", -8*3600 },
01828   // common, non-rfc-822 zones:
01829   { "CET", 1*3600 },
01830   { "MET", 1*3600 },
01831   { "UTC", 0 },
01832   { "CEST", 2*3600 },
01833   { "BST", 1*3600 },
01834   // rfc 822 military timezones:
01835   { "Z", 0 },
01836   { "A", -1*3600 },
01837   { "B", -2*3600 },
01838   { "C", -3*3600 },
01839   { "D", -4*3600 },
01840   { "E", -5*3600 },
01841   { "F", -6*3600 },
01842   { "G", -7*3600 },
01843   { "H", -8*3600 },
01844   { "I", -9*3600 },
01845   // J is not used!
01846   { "K", -10*3600 },
01847   { "L", -11*3600 },
01848   { "M", -12*3600 },
01849   { "N", 1*3600 },
01850   { "O", 2*3600 },
01851   { "P", 3*3600 },
01852   { "Q", 4*3600 },
01853   { "R", 5*3600 },
01854   { "S", 6*3600 },
01855   { "T", 7*3600 },
01856   { "U", 8*3600 },
01857   { "V", 9*3600 },
01858   { "W", 10*3600 },
01859   { "X", 11*3600 },
01860   { "Y", 12*3600 },
01861 };
01862 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01863 
01864 static bool parseAlphaNumericTimeZone( const char* &scursor,
01865                                        const char * const send,
01866                                        long int &secsEastOfGMT,
01867                                        bool &timeZoneKnown )
01868 {
01869   QPair<const char*,int> maybeTimeZone( 0, 0 );
01870   if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) {
01871     return false;
01872   }
01873   for ( int i = 0 ; i < timeZonesLen ; ++i ) {
01874     if ( qstrnicmp( timeZones[i].tzName,
01875                     maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01876       scursor += maybeTimeZone.second;
01877       secsEastOfGMT = timeZones[i].secsEastOfGMT;
01878       timeZoneKnown = true;
01879       return true;
01880     }
01881   }
01882 
01883   // don't choke just because we don't happen to know the time zone
01884   KMIME_WARN_UNKNOWN( time zone,
01885                       QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
01886   secsEastOfGMT = 0;
01887   timeZoneKnown = false;
01888   return true;
01889 }
01890 
01891 // parse a number and return the number of digits parsed:
01892 int parseDigits( const char* &scursor, const char * const send, int &result )
01893 {
01894   result = 0;
01895   int digits = 0;
01896   for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01897     result *= 10;
01898     result += int( *scursor - '0' );
01899   }
01900   return digits;
01901 }
01902 
01903 static bool parseTimeOfDay( const char* &scursor, const char * const send,
01904                             int &hour, int &min, int &sec, bool isCRLF=false )
01905 {
01906   // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
01907 
01908   //
01909   // 2DIGIT representing "hour":
01910   //
01911   if ( !parseDigits( scursor, send, hour ) ) {
01912     return false;
01913   }
01914 
01915   eatCFWS( scursor, send, isCRLF );
01916   if ( scursor == send || *scursor != ':' ) {
01917     return false;
01918   }
01919   scursor++; // eat ':'
01920 
01921   eatCFWS( scursor, send, isCRLF );
01922   if ( scursor == send ) {
01923     return false;
01924   }
01925 
01926   //
01927   // 2DIGIT representing "minute":
01928   //
01929   if ( !parseDigits( scursor, send, min ) ) {
01930     return false;
01931   }
01932 
01933   eatCFWS( scursor, send, isCRLF );
01934   if ( scursor == send ) {
01935     return true; // seconds are optional
01936   }
01937 
01938   //
01939   // let's see if we have a 2DIGIT representing "second":
01940   //
01941   if ( *scursor == ':' ) {
01942     // yepp, there are seconds:
01943     scursor++; // eat ':'
01944     eatCFWS( scursor, send, isCRLF );
01945     if ( scursor == send ) {
01946       return false;
01947     }
01948 
01949     if ( !parseDigits( scursor, send, sec ) ) {
01950       return false;
01951     }
01952   } else {
01953     sec = 0;
01954   }
01955 
01956   return true;
01957 }
01958 
01959 bool parseTime( const char* &scursor, const char * send,
01960                 int &hour, int &min, int &sec, long int &secsEastOfGMT,
01961                 bool &timeZoneKnown, bool isCRLF )
01962 {
01963   // time := time-of-day CFWS ( zone / obs-zone )
01964   //
01965   // obs-zone    := "UT" / "GMT" /
01966   //                "EST" / "EDT" / ; -0500 / -0400
01967   //                "CST" / "CDT" / ; -0600 / -0500
01968   //                "MST" / "MDT" / ; -0700 / -0600
01969   //                "PST" / "PDT" / ; -0800 / -0700
01970   //                "A"-"I" / "a"-"i" /
01971   //                "K"-"Z" / "k"-"z"
01972 
01973   eatCFWS( scursor, send, isCRLF );
01974   if ( scursor == send ) {
01975     return false;
01976   }
01977 
01978   if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
01979     return false;
01980   }
01981 
01982   eatCFWS( scursor, send, isCRLF );
01983   if ( scursor == send ) {
01984     timeZoneKnown = false;
01985     secsEastOfGMT = 0;
01986     return true; // allow missing timezone
01987   }
01988 
01989   timeZoneKnown = true;
01990   if ( *scursor == '+' || *scursor == '-' ) {
01991     // remember and eat '-'/'+':
01992     const char sign = *scursor++;
01993     // numerical timezone:
01994     int maybeTimeZone;
01995     if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
01996       return false;
01997     }
01998     secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01999     if ( sign == '-' ) {
02000       secsEastOfGMT *= -1;
02001       if ( secsEastOfGMT == 0 ) {
02002         timeZoneKnown = false; // -0000 means indetermined tz
02003       }
02004     }
02005   } else {
02006     // maybe alphanumeric timezone:
02007     if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
02008       return false;
02009     }
02010   }
02011   return true;
02012 }
02013 
02014 bool parseDateTime( const char* &scursor, const char * const send,
02015                     KDateTime &result, bool isCRLF )
02016 {
02017   // Parsing date-time; strict mode:
02018   //
02019   // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
02020   // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
02021   //                time
02022   //
02023   // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
02024   // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
02025   //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
02026 
02027   result = KDateTime();
02028   QDateTime maybeDateTime;
02029 
02030   eatCFWS( scursor, send, isCRLF );
02031   if ( scursor == send ) {
02032     return false;
02033   }
02034 
02035   //
02036   // let's see if there's a day-of-week:
02037   //
02038   if ( parseDayName( scursor, send ) ) {
02039     eatCFWS( scursor, send, isCRLF );
02040     if ( scursor == send ) {
02041       return false;
02042     }
02043     // day-name should be followed by ',' but we treat it as optional:
02044     if ( *scursor == ',' ) {
02045       scursor++; // eat ','
02046       eatCFWS( scursor, send, isCRLF );
02047     }
02048   }
02049 
02050   //
02051   // 1*2DIGIT representing "day" (of month):
02052   //
02053   int maybeDay;
02054   if ( !parseDigits( scursor, send, maybeDay ) ) {
02055     return false;
02056   }
02057 
02058   eatCFWS( scursor, send, isCRLF );
02059   if ( scursor == send ) {
02060     return false;
02061   }
02062 
02063   //
02064   // month-name:
02065   //
02066   int maybeMonth = 0;
02067   if ( !parseMonthName( scursor, send, maybeMonth ) ) {
02068     return false;
02069   }
02070   if ( scursor == send ) {
02071     return false;
02072   }
02073   assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
02074   ++maybeMonth; // 0-11 -> 1-12
02075 
02076   eatCFWS( scursor, send, isCRLF );
02077   if ( scursor == send ) {
02078     return false;
02079   }
02080 
02081   //
02082   // 2*DIGIT representing "year":
02083   //
02084   int maybeYear;
02085   if ( !parseDigits( scursor, send, maybeYear ) ) {
02086     return false;
02087   }
02088   // RFC 2822 4.3 processing:
02089   if ( maybeYear < 50 ) {
02090     maybeYear += 2000;
02091   } else if ( maybeYear < 1000 ) {
02092     maybeYear += 1900;
02093   }
02094   // else keep as is
02095   if ( maybeYear < 1900 ) {
02096     return false; // rfc2822, 3.3
02097   }
02098 
02099   eatCFWS( scursor, send, isCRLF );
02100   if ( scursor == send ) {
02101     return false;
02102   }
02103 
02104   maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
02105 
02106   //
02107   // time
02108   //
02109   int maybeHour, maybeMinute, maybeSecond;
02110   long int secsEastOfGMT;
02111   bool timeZoneKnown = true;
02112 
02113   if ( !parseTime( scursor, send,
02114                    maybeHour, maybeMinute, maybeSecond,
02115                    secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
02116     return false;
02117   }
02118 
02119   maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );
02120   if ( !maybeDateTime.isValid() )
02121     return false;
02122 
02123   result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
02124   if ( !result.isValid() )
02125     return false;
02126   return true;
02127 }
02128 
02129 Headers::Base *extractFirstHeader( QByteArray &head )
02130 {
02131   int endOfFieldBody = 0;
02132   bool folded = false;
02133   Headers::Base *header = 0;
02134 
02135   int startOfFieldBody = head.indexOf( ':' );
02136   const int endOfFieldHeader = startOfFieldBody;
02137 
02138   if ( startOfFieldBody > -1 ) {    //there is another header
02139     startOfFieldBody++; //skip the ':'
02140     if ( head[startOfFieldBody] == ' ' ) { // skip the space after the ':', if there
02141       startOfFieldBody++;
02142     }
02143     endOfFieldBody = findHeaderLineEnd( head, startOfFieldBody, &folded );
02144 
02145     QByteArray rawType = head.left( endOfFieldHeader );
02146     QByteArray rawFieldBody = head.mid( startOfFieldBody, endOfFieldBody - startOfFieldBody );
02147     if ( folded ) {
02148       rawFieldBody = unfoldHeader( rawFieldBody );
02149     }
02150     // We might get an invalid mail without a field name, don't crash on that.
02151     if ( !rawType.isEmpty() ) {
02152       header = HeaderFactory::self()->createHeader( rawType );
02153     }
02154     if( !header ) {
02155       //kWarning() << "Returning Generic header of type" << rawType;
02156       header = new Headers::Generic( rawType );
02157     }
02158     header->from7BitString( rawFieldBody );
02159 
02160     head.remove( 0, endOfFieldBody + 1 );
02161   } else {
02162     head.clear();
02163   }
02164 
02165   return header;
02166 }
02167 
02168 Headers::Base::List parseHeaders( const QByteArray &head )
02169 {
02170   Headers::Base::List ret;
02171   Headers::Base *h;
02172 
02173   QByteArray copy = head;
02174   while( ( h = extractFirstHeader( copy ) ) ) {
02175     ret << h;
02176   }
02177 
02178   return ret;
02179 }
02180 
02181 } // namespace HeaderParsing
02182 
02183 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.7.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal