KDevelop API Documentation

lexer.h

Go to the documentation of this file.
00001 /* This file is part of KDevelop
00002     Copyright (C) 2002,2003 Roberto Raggi <roberto@kdevelop.org>
00003 
00004     This library is free software; you can redistribute it and/or
00005     modify it under the terms of the GNU Library General Public
00006     License as published by the Free Software Foundation; either
00007     version 2 of the License, or (at your option) any later version.
00008 
00009     This library is distributed in the hope that it will be useful,
00010     but WITHOUT ANY WARRANTY; without even the implied warranty of
00011     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012     Library General Public License for more details.
00013 
00014     You should have received a copy of the GNU Library General Public License
00015     along with this library; see the file COPYING.LIB.  If not, write to
00016     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
00017     Boston, MA 02111-1307, USA.
00018 */
00019 
00020 #ifndef LEXER_H
00021 #define LEXER_H
00022 
00023 #include "driver.h"
00024 
00025 #include <qstring.h>
00026 #include <qmap.h>
00027 #include <qvaluestack.h>
00028 #include <qpair.h>
00029 #include <qptrvector.h>
00030 
00031 enum Type {
00032     Token_eof = 0,
00033     Token_identifier = 1000,
00034     Token_number_literal,
00035     Token_char_literal,
00036     Token_string_literal,
00037     Token_whitespaces,
00038     Token_comment,
00039     Token_preproc,
00040 
00041     Token_assign = 2000,
00042     Token_ptrmem,
00043     Token_ellipsis,
00044     Token_scope,
00045     Token_shift,
00046     Token_eq,
00047     Token_leq,
00048     Token_geq,
00049     Token_incr,
00050     Token_decr,
00051     Token_arrow,
00052 
00053     Token_concat,
00054 
00055     Token_K_DCOP,
00056     Token_k_dcop,
00057     Token_k_dcop_signals,
00058 
00059     Token_Q_OBJECT,
00060     Token_signals,
00061     Token_slots,
00062     Token_emit,
00063 
00064     Token_and,
00065     Token_and_eq,
00066     Token_asm,
00067     Token_auto,
00068     Token_bitand,
00069     Token_bitor,
00070     Token_bool,
00071     Token_break,
00072     Token_case,
00073     Token_catch,
00074     Token_char,
00075     Token_class,
00076     Token_compl,
00077     Token_const,
00078     Token_const_cast,
00079     Token_continue,
00080     Token_default,
00081     Token_delete,
00082     Token_do,
00083     Token_double,
00084     Token_dynamic_cast,
00085     Token_else,
00086     Token_enum,
00087     Token_explicit,
00088     Token_export,
00089     Token_extern,
00090     Token_false,
00091     Token_float,
00092     Token_for,
00093     Token_friend,
00094     Token_goto,
00095     Token_if,
00096     Token_inline,
00097     Token_int,
00098     Token_long,
00099     Token_mutable,
00100     Token_namespace,
00101     Token_new,
00102     Token_not,
00103     Token_not_eq,
00104     Token_operator,
00105     Token_or,
00106     Token_or_eq,
00107     Token_private,
00108     Token_protected,
00109     Token_public,
00110     Token_register,
00111     Token_reinterpret_cast,
00112     Token_return,
00113     Token_short,
00114     Token_signed,
00115     Token_sizeof,
00116     Token_static,
00117     Token_static_cast,
00118     Token_struct,
00119     Token_switch,
00120     Token_template,
00121     Token_this,
00122     Token_throw,
00123     Token_true,
00124     Token_try,
00125     Token_typedef,
00126     Token_typeid,
00127     Token_typename,
00128     Token_union,
00129     Token_unsigned,
00130     Token_using,
00131     Token_virtual,
00132     Token_void,
00133     Token_volatile,
00134     Token_wchar_t,
00135     Token_while,
00136     Token_xor,
00137     Token_xor_eq
00138 };
00139 
00140 enum SkipType {
00141     SkipWord,
00142     SkipWordAndArguments
00143 };
00144 
00145 struct LexerData;
00146 
00147 class Token
00148 {
00149 public:
00150     Token();
00151     Token( int type, int position, int length, const QString& text );
00152     Token( const Token& source );
00153 
00154     Token& operator = ( const Token& source );
00155     bool operator == ( const Token& token ) const;
00156     operator int () const;
00157 
00158     bool isNull() const;
00159 
00160     int type() const;
00161     void setType( int type );
00162 
00163     void getStartPosition( int* line, int* column ) const;
00164     void setStartPosition( int line, int column );
00165     void getEndPosition( int* line, int* column ) const;
00166     void setEndPosition( int line, int column );
00167 
00168     unsigned int length() const;
00169     void setLength( unsigned int length );
00170 
00171     int position() const;
00172     void setPosition( int position );
00173 
00174     QString text() const;
00175     
00176 private:
00177     int m_type;
00178     int m_position;
00179     int m_length;
00180     int m_startLine;
00181     int m_startColumn;
00182     int m_endLine;
00183     int m_endColumn;
00184     QString m_text;
00185 
00186     friend class Lexer;
00187     friend class Parser;
00188 }; // class Token
00189 
00190 class Lexer
00191 {
00192 public:
00193     Lexer( Driver* driver );
00194     ~Lexer();
00195 
00196     bool recordComments() const;
00197     void setRecordComments( bool record );
00198 
00199     bool recordWhiteSpaces() const;
00200     void setRecordWhiteSpaces( bool record );
00201 
00202     bool reportWarnings() const;
00203     void setReportWarnings( bool enable );
00204 
00205     bool reportMessages() const;
00206     void setReportMessages( bool enable );
00207 
00208     bool skipWordsEnabled() const;
00209     void setSkipWordsEnabled( bool enabled );
00210 
00211     bool preprocessorEnabled() const;
00212     void setPreprocessorEnabled( bool enabled );
00213 
00214     void resetSkipWords();
00215     void addSkipWord( const QString& word, SkipType skipType=SkipWord, const QString& str = QString::null );
00216 
00217     QString source() const;
00218     void setSource( const QString& source );
00219 
00220     int index() const;
00221     void setIndex( int index );
00222 
00223     void reset();
00224 
00225     const Token& tokenAt( int position ) const;
00226     const Token& nextToken();
00227     const Token& lookAhead( int n ) const;
00228 
00229     static int toInt( const Token& token );
00230 
00231     int tokenPosition( const Token& token ) const;
00232     void getTokenPosition( const Token& token, int* line, int* col );
00233 
00234     int currentLine() const { return m_currentLine; }
00235     int currentColumn() const { return m_currentColumn; }
00236 
00237 private:
00238     QChar currentChar() const;
00239     QChar peekChar( int n=1 ) const;
00240     int currentPosition() const;
00241 
00242     void tokenize();
00243     void nextToken( Token& token, bool stopOnNewline=false );
00244     void nextChar();
00245     void nextChar( int n );
00246     void skip( int l, int r );
00247     void readIdentifier();
00248     void readWhiteSpaces( bool skipNewLine=true );
00249     void readLineComment();
00250     void readMultiLineComment();
00251     void readCharLiteral();
00252     void readStringLiteral();
00253     void readNumberLiteral();
00254 
00255     int findOperator3() const;
00256     int findOperator2() const;
00257     bool eof() const;
00258 
00259     // preprocessor (based on an article of Al Stevens on Dr.Dobb's journal)
00260     int testIfLevel();
00261     int macroDefined();
00262     QString readArgument();
00263 
00264     int macroPrimary();
00265     int macroMultiplyDivide();
00266     int macroAddSubtract();
00267     int macroRelational();
00268     int macroEquality();
00269     int macroBoolAnd();
00270     int macroBoolXor();
00271     int macroBoolOr();
00272     int macroLogicalAnd();
00273     int macroLogicalOr();
00274     int macroExpression();
00275 
00276     void handleDirective( const QString& directive );
00277     void processDefine( Macro& macro );
00278     void processElse();
00279     void processElif();
00280     void processEndif();
00281     void processIf();
00282     void processIfdef();
00283     void processIfndef();
00284     void processInclude();
00285     void processUndef();
00286 
00287 private:
00288     LexerData* d;
00289     Driver* m_driver;
00290     QPtrVector< Token > m_tokens;
00291     int m_size;
00292     int m_index;
00293     QString m_source;
00294     int m_ptr;
00295     int m_endPtr;
00296     bool m_recordComments;
00297     bool m_recordWhiteSpaces;
00298     bool m_startLine;
00299     QMap< QString, QPair<SkipType, QString> > m_words;
00300 
00301     int m_currentLine;
00302     int m_currentColumn;
00303     bool m_skipWordsEnabled;
00304 
00305     // preprocessor
00306     QMemArray<bool> m_skipping;
00307     QMemArray<bool> m_trueTest;
00308     int m_ifLevel;
00309     bool m_preprocessorEnabled;
00310     bool m_inPreproc;
00311 
00312     bool m_reportWarnings;
00313     bool m_reportMessages;
00314 
00315 private:
00316     Lexer( const Lexer& source );
00317     void operator = ( const Lexer& source );
00318 };
00319 
00320 
00321 inline Token::Token()
00322     : m_type( -1 ),
00323       m_position( 0 ),
00324       m_length( 0 ),
00325       m_text( 0 )
00326 {
00327 }
00328 
00329 inline Token::Token( int type, int position, int length, const QString& text )
00330     : m_type( type ),
00331       m_position( position ),
00332       m_length( length ),
00333       m_text( text )
00334 {
00335 }
00336 
00337 inline Token::Token( const Token& source )
00338     : m_type( source.m_type ),
00339       m_position( source.m_position ),
00340       m_length( source.m_length ),
00341       m_startLine( source.m_startLine ),
00342       m_startColumn( source.m_startColumn ),
00343       m_endLine( source.m_endLine ),
00344       m_endColumn( source.m_endColumn ),
00345       m_text( source.m_text )
00346 {
00347 }
00348 
00349 inline Token& Token::operator = ( const Token& source )
00350 {
00351     m_type = source.m_type;
00352     m_position = source.m_position;
00353     m_length = source.m_length;
00354     m_startLine = source.m_startLine;
00355     m_startColumn = source.m_startColumn;
00356     m_endLine = source.m_endLine;
00357     m_endColumn = source.m_endColumn;
00358     m_text = source.m_text;
00359     return( *this );
00360 }
00361 
00362 inline Token::operator int () const
00363 {
00364     return m_type;
00365 }
00366 
00367 inline bool Token::operator == ( const Token& token ) const
00368 {
00369     return m_type == token.m_type &&
00370        m_position == token.m_position &&
00371          m_length == token.m_length &&
00372       m_startLine == token.m_startLine &&
00373     m_startColumn == token.m_startColumn &&
00374         m_endLine == token.m_endLine &&
00375       m_endColumn == token.m_endColumn &&
00376            m_text == token.m_text;
00377 }
00378 
00379 inline bool Token::isNull() const
00380 {
00381     return m_type == Token_eof || m_length == 0;
00382 }
00383 
00384 inline int Token::type() const
00385 {
00386     return m_type;
00387 }
00388 
00389 inline void Token::setType( int type )
00390 {
00391     m_type = type;
00392 }
00393 
00394 inline int Token::position() const
00395 {
00396     return m_position;
00397 }
00398 
00399 inline QString Token::text() const
00400 {
00401     return m_text.mid(m_position, m_length);
00402 }
00403 
00404 inline void Token::setStartPosition( int line, int column )
00405 {
00406     m_startLine = line;
00407     m_startColumn = column;
00408 }
00409 
00410 inline void Token::setEndPosition( int line, int column )
00411 {
00412     m_endLine = line;
00413     m_endColumn = column;
00414 }
00415 
00416 inline void Token::getStartPosition( int* line, int* column ) const
00417 {
00418     if( line ) *line = m_startLine;
00419     if( column ) *column = m_startColumn;
00420 }
00421 
00422 inline void Token::getEndPosition( int* line, int* column ) const
00423 {
00424     if( line ) *line = m_endLine;
00425     if( column ) *column = m_endColumn;
00426 }
00427 
00428 inline void Token::setPosition( int position )
00429 {
00430     m_position = position;
00431 }
00432 
00433 inline unsigned int Token::length() const
00434 {
00435     return m_length;
00436 }
00437 
00438 inline void Token::setLength( unsigned int length )
00439 {
00440     m_length = length;
00441 }
00442 
00443 inline bool Lexer::recordComments() const
00444 {
00445     return m_recordComments;
00446 }
00447 
00448 inline void Lexer::setRecordComments( bool record )
00449 {
00450     m_recordComments = record;
00451 }
00452 
00453 inline bool Lexer::recordWhiteSpaces() const
00454 {
00455     return m_recordWhiteSpaces;
00456 }
00457 
00458 inline void Lexer::setRecordWhiteSpaces( bool record )
00459 {
00460     m_recordWhiteSpaces = record;
00461 }
00462 
00463 inline QString Lexer::source() const
00464 {
00465     return m_source;
00466 }
00467 
00468 inline int Lexer::index() const
00469 {
00470     return m_index;
00471 }
00472 
00473 inline void Lexer::setIndex( int index )
00474 {
00475     m_index = index;
00476 }
00477 
00478 inline const Token& Lexer::nextToken()
00479 {
00480     if( m_index < m_size )
00481         return *m_tokens[ m_index++ ];
00482 
00483     return *m_tokens[ m_index ];
00484 }
00485 
00486 inline const Token& Lexer::tokenAt( int n ) const
00487 {
00488     return *m_tokens[ QMIN(n, m_size-1) ];
00489 }
00490 
00491 inline const Token& Lexer::lookAhead( int n ) const
00492 {
00493     return *m_tokens[ QMIN(m_index + n, m_size-1) ];
00494 }
00495 
00496 inline int Lexer::tokenPosition( const Token& token ) const
00497 {
00498     return token.position();
00499 }
00500 
00501 inline void Lexer::nextChar()
00502 {
00503     if(m_source[m_ptr++] == '\n') {
00504         ++m_currentLine;
00505         m_currentColumn = 0;
00506         m_startLine = true;
00507     } else {
00508     ++m_currentColumn;
00509     }
00510 }
00511 
00512 inline void Lexer::nextChar( int n )
00513 {
00514     m_currentColumn += n;
00515     m_ptr += n;
00516 }
00517 
00518 inline void Lexer::readIdentifier()
00519 {
00520     while( currentChar().isLetterOrNumber() || currentChar() == '_' )
00521         nextChar();
00522 }
00523 
00524 inline void Lexer::readWhiteSpaces( bool skipNewLine )
00525 {
00526     while( !currentChar().isNull() ){
00527         QChar ch = currentChar();
00528 
00529         if( ch == '\n' && !skipNewLine ){
00530             break;
00531         } else if( ch.isSpace() ){
00532             nextChar();
00533         } else if( m_inPreproc && currentChar() == '\\' ){
00534             nextChar();
00535             readWhiteSpaces( true );
00536         } else {
00537             break;
00538         }
00539     }
00540 }
00541 
00542 inline void Lexer::readLineComment()
00543 {
00544     while( !currentChar().isNull() && currentChar() != '\n' ){
00545     if( m_reportMessages && currentChar() == '@' && m_source.mid(currentPosition()+1, 4).lower() == "todo" ){
00546         nextChar( 5 );
00547         QString msg;
00548         int line = m_currentLine;
00549         int col = m_currentColumn;
00550 
00551         while( currentChar() ){
00552         if( currentChar() == '*' && peekChar() == '/' )
00553             break;
00554         else if( currentChar() == '\n' )
00555             break;
00556 
00557         msg += currentChar();
00558         nextChar();
00559         }
00560         m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) );
00561     } else
00562         if( m_reportMessages && m_source.mid(currentPosition(), 5).lower() == "fixme" ){
00563             nextChar( 5 );
00564             QString msg;
00565             int line = m_currentLine;
00566             int col = m_currentColumn;
00567 
00568             while( currentChar() ){
00569             if( currentChar() == '*' && peekChar() == '/' )
00570                 break;
00571             else if( currentChar() == '\n' )
00572                 break;
00573 
00574             msg += currentChar();
00575             nextChar();
00576             }
00577             m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) );
00578         } else
00579             nextChar();
00580     }
00581 }
00582 
00583 inline void Lexer::readMultiLineComment()
00584 {
00585     while( !currentChar().isNull() ){
00586         if( currentChar() == '*' && peekChar() == '/' ){
00587             nextChar( 2 );
00588             return;
00589     } else if( m_reportMessages && currentChar() == '@' && m_source.mid(currentPosition()+1, 4).lower() == "todo" ){
00590         nextChar( 5 );
00591         QString msg;
00592         int line = m_currentLine;
00593         int col = m_currentColumn;
00594 
00595         while( currentChar() ){
00596         if( currentChar() == '*' && peekChar() == '/' )
00597             break;
00598         else if( currentChar() == '\n' )
00599             break;
00600         msg += currentChar();
00601         nextChar();
00602         }
00603         m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) );
00604     } else
00605         if( m_reportMessages && m_source.mid(currentPosition(), 5).lower() == "fixme" ){
00606             nextChar( 5 );
00607             QString msg;
00608             int line = m_currentLine;
00609             int col = m_currentColumn;
00610 
00611             while( currentChar() ){
00612             if( currentChar() == '*' && peekChar() == '/' )
00613                 break;
00614             else if( currentChar() == '\n' )
00615                 break;
00616 
00617             msg += currentChar();
00618             nextChar();
00619             }
00620             m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) );
00621         } else
00622             nextChar();
00623     }
00624 }
00625 
00626 inline void Lexer::readCharLiteral()
00627 {
00628     if( currentChar() == '\'' )
00629         nextChar(); // skip '
00630     else if( currentChar() == 'L' && peekChar() == '\'' )
00631     nextChar( 2 ); // slip L'
00632     else
00633         return;
00634 
00635     while( !currentChar().isNull() ){
00636         int len = m_endPtr - currentPosition();
00637 
00638         if( len>=2 && (currentChar() == '\\' && peekChar() == '\'') ){
00639             nextChar( 2 );
00640         } else if( len>=2 && (currentChar() == '\\' && peekChar() == '\\') ){
00641             nextChar( 2 );
00642         } else if( currentChar() == '\'' ){
00643             nextChar();
00644             break;
00645         } else {
00646         nextChar();
00647     }
00648     }
00649 }
00650 
00651 inline void Lexer::readStringLiteral()
00652 {
00653     if( currentChar() != '"' )
00654         return;
00655 
00656     nextChar(); // skip "
00657 
00658     while( !currentChar().isNull() ){
00659         int len = m_endPtr - m_ptr;
00660 
00661         if( len>=2 && currentChar() == '\\' && peekChar() == '"' ){
00662             nextChar( 2 );
00663         } else if( len>=2 && currentChar() == '\\' && peekChar() == '\\' ){
00664             nextChar( 2 );
00665         } else if( currentChar() == '"' ){
00666             nextChar();
00667             break;
00668         } else {
00669         nextChar();
00670     }
00671     }
00672 }
00673 
00674 inline void Lexer::readNumberLiteral()
00675 {
00676     while( currentChar().isLetterOrNumber() || currentChar() == '.' )
00677         nextChar();
00678 }
00679 
00680 inline int Lexer::findOperator3() const
00681 {
00682     int n = int(m_endPtr - m_ptr);
00683 
00684     if( n >= 3){
00685     QChar ch = currentChar(), ch1=peekChar(), ch2=peekChar(2);
00686 
00687     if( ch == '<' && ch1 == '<' && ch2 == '=' ) return Token_assign;
00688     else if( ch == '>' && ch1 == '>' && ch2 == '=' ) return Token_assign;
00689     else if( ch == '-' && ch1 == '>' && ch2 == '*' ) return Token_ptrmem;
00690     else if( ch == '.' && ch1 == '.' && ch2 == '.' ) return Token_ellipsis;
00691     }
00692 
00693     return -1;
00694 }
00695 
00696 inline int Lexer::findOperator2() const
00697 {
00698     int n = int(m_endPtr - m_ptr);
00699 
00700     if( n>=2 ){
00701     QChar ch = currentChar(), ch1=peekChar();
00702 
00703     if( ch == ':' && ch1 == ':' ) return Token_scope;
00704     else if( ch == '.' && ch1 == '*' ) return Token_ptrmem;
00705     else if( ch == '+' && ch1 == '=' ) return Token_assign;
00706     else if( ch == '-' && ch1 == '=' ) return Token_assign;
00707     else if( ch == '*' && ch1 == '=' ) return Token_assign;
00708     else if( ch == '/' && ch1 == '=' ) return Token_assign;
00709     else if( ch == '%' && ch1 == '=' ) return Token_assign;
00710     else if( ch == '^' && ch1 == '=' ) return Token_assign;
00711     else if( ch == '&' && ch1 == '=' ) return Token_assign;
00712     else if( ch == '|' && ch1 == '=' ) return Token_assign;
00713     else if( ch == '<' && ch1 == '<' ) return Token_shift;
00714     else if( ch == '>' && ch1 == '>' ) return Token_shift;
00715     else if( ch == '=' && ch1 == '=' ) return Token_eq;
00716     else if( ch == '!' && ch1 == '=' ) return Token_eq;
00717     else if( ch == '<' && ch1 == '=' ) return Token_leq;
00718     else if( ch == '>' && ch1 == '=' ) return Token_geq;
00719     else if( ch == '&' && ch1 == '&' ) return Token_and;
00720     else if( ch == '|' && ch1 == '|' ) return Token_or;
00721     else if( ch == '+' && ch1 == '+' ) return Token_incr;
00722     else if( ch == '-' && ch1 == '-' ) return Token_decr;
00723     else if( ch == '-' && ch1 == '>' ) return Token_arrow;
00724     else if( ch == '#' && ch1 == '#' ) return Token_concat;
00725     }
00726 
00727     return -1;
00728 }
00729 
00730 inline bool Lexer::skipWordsEnabled() const
00731 {
00732     return m_skipWordsEnabled;
00733 }
00734 
00735 inline void Lexer::setSkipWordsEnabled( bool enabled )
00736 {
00737     m_skipWordsEnabled = enabled;
00738 }
00739 
00740 inline bool Lexer::preprocessorEnabled() const
00741 {
00742     return m_preprocessorEnabled;
00743 }
00744 
00745 inline void Lexer::setPreprocessorEnabled( bool enabled )
00746 {
00747     m_preprocessorEnabled = enabled;
00748 }
00749 
00750 inline int Lexer::currentPosition() const
00751 {
00752     return m_ptr;
00753 }
00754 
00755 inline QChar Lexer::currentChar() const
00756 {
00757     return m_ptr < m_endPtr ? m_source[m_ptr] : QChar::null;
00758 }
00759 
00760 inline QChar Lexer::peekChar( int n ) const
00761 {
00762     return m_ptr+n < m_endPtr ? m_source[m_ptr + n] : QChar::null;
00763 }
00764 
00765 inline bool Lexer::eof() const
00766 {
00767     return m_ptr >= m_endPtr;
00768 }
00769 
00770 inline bool Lexer::reportWarnings() const
00771 {
00772     return m_reportWarnings;
00773 }
00774 
00775 inline void Lexer::setReportWarnings( bool enable )
00776 {
00777     m_reportWarnings = enable;
00778 }
00779 
00780 inline bool Lexer::reportMessages() const
00781 {
00782     return m_reportMessages;
00783 }
00784 
00785 inline void Lexer::setReportMessages( bool enable )
00786 {
00787     m_reportMessages = enable;
00788 }
00789 
00790 
00791 #endif
KDE Logo
This file is part of the documentation for KDevelop Version 3.1.2.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Wed Mar 23 00:03:51 2005 by doxygen 1.3.9.1 written by Dimitri van Heesch, © 1997-2003