00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef LEXER_H
00021 #define LEXER_H
00022
00023 #include "driver.h"
00024
00025 #include <qstring.h>
00026 #include <qmap.h>
00027 #include <qvaluestack.h>
00028 #include <qpair.h>
00029 #include <qptrvector.h>
00030
00031 enum Type {
00032 Token_eof = 0,
00033 Token_identifier = 1000,
00034 Token_number_literal,
00035 Token_char_literal,
00036 Token_string_literal,
00037 Token_whitespaces,
00038 Token_comment,
00039 Token_preproc,
00040
00041 Token_assign = 2000,
00042 Token_ptrmem,
00043 Token_ellipsis,
00044 Token_scope,
00045 Token_shift,
00046 Token_eq,
00047 Token_leq,
00048 Token_geq,
00049 Token_incr,
00050 Token_decr,
00051 Token_arrow,
00052
00053 Token_concat,
00054
00055 Token_K_DCOP,
00056 Token_k_dcop,
00057 Token_k_dcop_signals,
00058
00059 Token_Q_OBJECT,
00060 Token_signals,
00061 Token_slots,
00062 Token_emit,
00063
00064 Token_and,
00065 Token_and_eq,
00066 Token_asm,
00067 Token_auto,
00068 Token_bitand,
00069 Token_bitor,
00070 Token_bool,
00071 Token_break,
00072 Token_case,
00073 Token_catch,
00074 Token_char,
00075 Token_class,
00076 Token_compl,
00077 Token_const,
00078 Token_const_cast,
00079 Token_continue,
00080 Token_default,
00081 Token_delete,
00082 Token_do,
00083 Token_double,
00084 Token_dynamic_cast,
00085 Token_else,
00086 Token_enum,
00087 Token_explicit,
00088 Token_export,
00089 Token_extern,
00090 Token_false,
00091 Token_float,
00092 Token_for,
00093 Token_friend,
00094 Token_goto,
00095 Token_if,
00096 Token_inline,
00097 Token_int,
00098 Token_long,
00099 Token_mutable,
00100 Token_namespace,
00101 Token_new,
00102 Token_not,
00103 Token_not_eq,
00104 Token_operator,
00105 Token_or,
00106 Token_or_eq,
00107 Token_private,
00108 Token_protected,
00109 Token_public,
00110 Token_register,
00111 Token_reinterpret_cast,
00112 Token_return,
00113 Token_short,
00114 Token_signed,
00115 Token_sizeof,
00116 Token_static,
00117 Token_static_cast,
00118 Token_struct,
00119 Token_switch,
00120 Token_template,
00121 Token_this,
00122 Token_throw,
00123 Token_true,
00124 Token_try,
00125 Token_typedef,
00126 Token_typeid,
00127 Token_typename,
00128 Token_union,
00129 Token_unsigned,
00130 Token_using,
00131 Token_virtual,
00132 Token_void,
00133 Token_volatile,
00134 Token_wchar_t,
00135 Token_while,
00136 Token_xor,
00137 Token_xor_eq
00138 };
00139
00140 enum SkipType {
00141 SkipWord,
00142 SkipWordAndArguments
00143 };
00144
00145 struct LexerData;
00146
00147 class Token
00148 {
00149 public:
00150 Token();
00151 Token( int type, int position, int length, const QString& text );
00152 Token( const Token& source );
00153
00154 Token& operator = ( const Token& source );
00155 bool operator == ( const Token& token ) const;
00156 operator int () const;
00157
00158 bool isNull() const;
00159
00160 int type() const;
00161 void setType( int type );
00162
00163 void getStartPosition( int* line, int* column ) const;
00164 void setStartPosition( int line, int column );
00165 void getEndPosition( int* line, int* column ) const;
00166 void setEndPosition( int line, int column );
00167
00168 unsigned int length() const;
00169 void setLength( unsigned int length );
00170
00171 int position() const;
00172 void setPosition( int position );
00173
00174 QString text() const;
00175
00176 private:
00177 int m_type;
00178 int m_position;
00179 int m_length;
00180 int m_startLine;
00181 int m_startColumn;
00182 int m_endLine;
00183 int m_endColumn;
00184 QString m_text;
00185
00186 friend class Lexer;
00187 friend class Parser;
00188 };
00189
00190 class Lexer
00191 {
00192 public:
00193 Lexer( Driver* driver );
00194 ~Lexer();
00195
00196 bool recordComments() const;
00197 void setRecordComments( bool record );
00198
00199 bool recordWhiteSpaces() const;
00200 void setRecordWhiteSpaces( bool record );
00201
00202 bool reportWarnings() const;
00203 void setReportWarnings( bool enable );
00204
00205 bool reportMessages() const;
00206 void setReportMessages( bool enable );
00207
00208 bool skipWordsEnabled() const;
00209 void setSkipWordsEnabled( bool enabled );
00210
00211 bool preprocessorEnabled() const;
00212 void setPreprocessorEnabled( bool enabled );
00213
00214 void resetSkipWords();
00215 void addSkipWord( const QString& word, SkipType skipType=SkipWord, const QString& str = QString::null );
00216
00217 QString source() const;
00218 void setSource( const QString& source );
00219
00220 int index() const;
00221 void setIndex( int index );
00222
00223 void reset();
00224
00225 const Token& tokenAt( int position ) const;
00226 const Token& nextToken();
00227 const Token& lookAhead( int n ) const;
00228
00229 static int toInt( const Token& token );
00230
00231 int tokenPosition( const Token& token ) const;
00232 void getTokenPosition( const Token& token, int* line, int* col );
00233
00234 int currentLine() const { return m_currentLine; }
00235 int currentColumn() const { return m_currentColumn; }
00236
00237 private:
00238 QChar currentChar() const;
00239 QChar peekChar( int n=1 ) const;
00240 int currentPosition() const;
00241
00242 void tokenize();
00243 void nextToken( Token& token, bool stopOnNewline=false );
00244 void nextChar();
00245 void nextChar( int n );
00246 void skip( int l, int r );
00247 void readIdentifier();
00248 void readWhiteSpaces( bool skipNewLine=true );
00249 void readLineComment();
00250 void readMultiLineComment();
00251 void readCharLiteral();
00252 void readStringLiteral();
00253 void readNumberLiteral();
00254
00255 int findOperator3() const;
00256 int findOperator2() const;
00257 bool eof() const;
00258
00259
00260 int testIfLevel();
00261 int macroDefined();
00262 QString readArgument();
00263
00264 int macroPrimary();
00265 int macroMultiplyDivide();
00266 int macroAddSubtract();
00267 int macroRelational();
00268 int macroEquality();
00269 int macroBoolAnd();
00270 int macroBoolXor();
00271 int macroBoolOr();
00272 int macroLogicalAnd();
00273 int macroLogicalOr();
00274 int macroExpression();
00275
00276 void handleDirective( const QString& directive );
00277 void processDefine( Macro& macro );
00278 void processElse();
00279 void processElif();
00280 void processEndif();
00281 void processIf();
00282 void processIfdef();
00283 void processIfndef();
00284 void processInclude();
00285 void processUndef();
00286
00287 private:
00288 LexerData* d;
00289 Driver* m_driver;
00290 QPtrVector< Token > m_tokens;
00291 int m_size;
00292 int m_index;
00293 QString m_source;
00294 int m_ptr;
00295 int m_endPtr;
00296 bool m_recordComments;
00297 bool m_recordWhiteSpaces;
00298 bool m_startLine;
00299 QMap< QString, QPair<SkipType, QString> > m_words;
00300
00301 int m_currentLine;
00302 int m_currentColumn;
00303 bool m_skipWordsEnabled;
00304
00305
00306 QMemArray<bool> m_skipping;
00307 QMemArray<bool> m_trueTest;
00308 int m_ifLevel;
00309 bool m_preprocessorEnabled;
00310 bool m_inPreproc;
00311
00312 bool m_reportWarnings;
00313 bool m_reportMessages;
00314
00315 private:
00316 Lexer( const Lexer& source );
00317 void operator = ( const Lexer& source );
00318 };
00319
00320
00321 inline Token::Token()
00322 : m_type( -1 ),
00323 m_position( 0 ),
00324 m_length( 0 ),
00325 m_text( 0 )
00326 {
00327 }
00328
00329 inline Token::Token( int type, int position, int length, const QString& text )
00330 : m_type( type ),
00331 m_position( position ),
00332 m_length( length ),
00333 m_text( text )
00334 {
00335 }
00336
00337 inline Token::Token( const Token& source )
00338 : m_type( source.m_type ),
00339 m_position( source.m_position ),
00340 m_length( source.m_length ),
00341 m_startLine( source.m_startLine ),
00342 m_startColumn( source.m_startColumn ),
00343 m_endLine( source.m_endLine ),
00344 m_endColumn( source.m_endColumn ),
00345 m_text( source.m_text )
00346 {
00347 }
00348
00349 inline Token& Token::operator = ( const Token& source )
00350 {
00351 m_type = source.m_type;
00352 m_position = source.m_position;
00353 m_length = source.m_length;
00354 m_startLine = source.m_startLine;
00355 m_startColumn = source.m_startColumn;
00356 m_endLine = source.m_endLine;
00357 m_endColumn = source.m_endColumn;
00358 m_text = source.m_text;
00359 return( *this );
00360 }
00361
00362 inline Token::operator int () const
00363 {
00364 return m_type;
00365 }
00366
00367 inline bool Token::operator == ( const Token& token ) const
00368 {
00369 return m_type == token.m_type &&
00370 m_position == token.m_position &&
00371 m_length == token.m_length &&
00372 m_startLine == token.m_startLine &&
00373 m_startColumn == token.m_startColumn &&
00374 m_endLine == token.m_endLine &&
00375 m_endColumn == token.m_endColumn &&
00376 m_text == token.m_text;
00377 }
00378
00379 inline bool Token::isNull() const
00380 {
00381 return m_type == Token_eof || m_length == 0;
00382 }
00383
00384 inline int Token::type() const
00385 {
00386 return m_type;
00387 }
00388
00389 inline void Token::setType( int type )
00390 {
00391 m_type = type;
00392 }
00393
00394 inline int Token::position() const
00395 {
00396 return m_position;
00397 }
00398
00399 inline QString Token::text() const
00400 {
00401 return m_text.mid(m_position, m_length);
00402 }
00403
00404 inline void Token::setStartPosition( int line, int column )
00405 {
00406 m_startLine = line;
00407 m_startColumn = column;
00408 }
00409
00410 inline void Token::setEndPosition( int line, int column )
00411 {
00412 m_endLine = line;
00413 m_endColumn = column;
00414 }
00415
00416 inline void Token::getStartPosition( int* line, int* column ) const
00417 {
00418 if( line ) *line = m_startLine;
00419 if( column ) *column = m_startColumn;
00420 }
00421
00422 inline void Token::getEndPosition( int* line, int* column ) const
00423 {
00424 if( line ) *line = m_endLine;
00425 if( column ) *column = m_endColumn;
00426 }
00427
00428 inline void Token::setPosition( int position )
00429 {
00430 m_position = position;
00431 }
00432
00433 inline unsigned int Token::length() const
00434 {
00435 return m_length;
00436 }
00437
00438 inline void Token::setLength( unsigned int length )
00439 {
00440 m_length = length;
00441 }
00442
00443 inline bool Lexer::recordComments() const
00444 {
00445 return m_recordComments;
00446 }
00447
00448 inline void Lexer::setRecordComments( bool record )
00449 {
00450 m_recordComments = record;
00451 }
00452
00453 inline bool Lexer::recordWhiteSpaces() const
00454 {
00455 return m_recordWhiteSpaces;
00456 }
00457
00458 inline void Lexer::setRecordWhiteSpaces( bool record )
00459 {
00460 m_recordWhiteSpaces = record;
00461 }
00462
00463 inline QString Lexer::source() const
00464 {
00465 return m_source;
00466 }
00467
00468 inline int Lexer::index() const
00469 {
00470 return m_index;
00471 }
00472
00473 inline void Lexer::setIndex( int index )
00474 {
00475 m_index = index;
00476 }
00477
00478 inline const Token& Lexer::nextToken()
00479 {
00480 if( m_index < m_size )
00481 return *m_tokens[ m_index++ ];
00482
00483 return *m_tokens[ m_index ];
00484 }
00485
00486 inline const Token& Lexer::tokenAt( int n ) const
00487 {
00488 return *m_tokens[ QMIN(n, m_size-1) ];
00489 }
00490
00491 inline const Token& Lexer::lookAhead( int n ) const
00492 {
00493 return *m_tokens[ QMIN(m_index + n, m_size-1) ];
00494 }
00495
00496 inline int Lexer::tokenPosition( const Token& token ) const
00497 {
00498 return token.position();
00499 }
00500
00501 inline void Lexer::nextChar()
00502 {
00503 if(m_source[m_ptr++] == '\n') {
00504 ++m_currentLine;
00505 m_currentColumn = 0;
00506 m_startLine = true;
00507 } else {
00508 ++m_currentColumn;
00509 }
00510 }
00511
00512 inline void Lexer::nextChar( int n )
00513 {
00514 m_currentColumn += n;
00515 m_ptr += n;
00516 }
00517
00518 inline void Lexer::readIdentifier()
00519 {
00520 while( currentChar().isLetterOrNumber() || currentChar() == '_' )
00521 nextChar();
00522 }
00523
00524 inline void Lexer::readWhiteSpaces( bool skipNewLine )
00525 {
00526 while( !currentChar().isNull() ){
00527 QChar ch = currentChar();
00528
00529 if( ch == '\n' && !skipNewLine ){
00530 break;
00531 } else if( ch.isSpace() ){
00532 nextChar();
00533 } else if( m_inPreproc && currentChar() == '\\' ){
00534 nextChar();
00535 readWhiteSpaces( true );
00536 } else {
00537 break;
00538 }
00539 }
00540 }
00541
00542 inline void Lexer::readLineComment()
00543 {
00544 while( !currentChar().isNull() && currentChar() != '\n' ){
00545 if( m_reportMessages && currentChar() == '@' && m_source.mid(currentPosition()+1, 4).lower() == "todo" ){
00546 nextChar( 5 );
00547 QString msg;
00548 int line = m_currentLine;
00549 int col = m_currentColumn;
00550
00551 while( currentChar() ){
00552 if( currentChar() == '*' && peekChar() == '/' )
00553 break;
00554 else if( currentChar() == '\n' )
00555 break;
00556
00557 msg += currentChar();
00558 nextChar();
00559 }
00560 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) );
00561 } else
00562 if( m_reportMessages && m_source.mid(currentPosition(), 5).lower() == "fixme" ){
00563 nextChar( 5 );
00564 QString msg;
00565 int line = m_currentLine;
00566 int col = m_currentColumn;
00567
00568 while( currentChar() ){
00569 if( currentChar() == '*' && peekChar() == '/' )
00570 break;
00571 else if( currentChar() == '\n' )
00572 break;
00573
00574 msg += currentChar();
00575 nextChar();
00576 }
00577 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) );
00578 } else
00579 nextChar();
00580 }
00581 }
00582
00583 inline void Lexer::readMultiLineComment()
00584 {
00585 while( !currentChar().isNull() ){
00586 if( currentChar() == '*' && peekChar() == '/' ){
00587 nextChar( 2 );
00588 return;
00589 } else if( m_reportMessages && currentChar() == '@' && m_source.mid(currentPosition()+1, 4).lower() == "todo" ){
00590 nextChar( 5 );
00591 QString msg;
00592 int line = m_currentLine;
00593 int col = m_currentColumn;
00594
00595 while( currentChar() ){
00596 if( currentChar() == '*' && peekChar() == '/' )
00597 break;
00598 else if( currentChar() == '\n' )
00599 break;
00600 msg += currentChar();
00601 nextChar();
00602 }
00603 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) );
00604 } else
00605 if( m_reportMessages && m_source.mid(currentPosition(), 5).lower() == "fixme" ){
00606 nextChar( 5 );
00607 QString msg;
00608 int line = m_currentLine;
00609 int col = m_currentColumn;
00610
00611 while( currentChar() ){
00612 if( currentChar() == '*' && peekChar() == '/' )
00613 break;
00614 else if( currentChar() == '\n' )
00615 break;
00616
00617 msg += currentChar();
00618 nextChar();
00619 }
00620 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) );
00621 } else
00622 nextChar();
00623 }
00624 }
00625
00626 inline void Lexer::readCharLiteral()
00627 {
00628 if( currentChar() == '\'' )
00629 nextChar();
00630 else if( currentChar() == 'L' && peekChar() == '\'' )
00631 nextChar( 2 );
00632 else
00633 return;
00634
00635 while( !currentChar().isNull() ){
00636 int len = m_endPtr - currentPosition();
00637
00638 if( len>=2 && (currentChar() == '\\' && peekChar() == '\'') ){
00639 nextChar( 2 );
00640 } else if( len>=2 && (currentChar() == '\\' && peekChar() == '\\') ){
00641 nextChar( 2 );
00642 } else if( currentChar() == '\'' ){
00643 nextChar();
00644 break;
00645 } else {
00646 nextChar();
00647 }
00648 }
00649 }
00650
00651 inline void Lexer::readStringLiteral()
00652 {
00653 if( currentChar() != '"' )
00654 return;
00655
00656 nextChar();
00657
00658 while( !currentChar().isNull() ){
00659 int len = m_endPtr - m_ptr;
00660
00661 if( len>=2 && currentChar() == '\\' && peekChar() == '"' ){
00662 nextChar( 2 );
00663 } else if( len>=2 && currentChar() == '\\' && peekChar() == '\\' ){
00664 nextChar( 2 );
00665 } else if( currentChar() == '"' ){
00666 nextChar();
00667 break;
00668 } else {
00669 nextChar();
00670 }
00671 }
00672 }
00673
00674 inline void Lexer::readNumberLiteral()
00675 {
00676 while( currentChar().isLetterOrNumber() || currentChar() == '.' )
00677 nextChar();
00678 }
00679
00680 inline int Lexer::findOperator3() const
00681 {
00682 int n = int(m_endPtr - m_ptr);
00683
00684 if( n >= 3){
00685 QChar ch = currentChar(), ch1=peekChar(), ch2=peekChar(2);
00686
00687 if( ch == '<' && ch1 == '<' && ch2 == '=' ) return Token_assign;
00688 else if( ch == '>' && ch1 == '>' && ch2 == '=' ) return Token_assign;
00689 else if( ch == '-' && ch1 == '>' && ch2 == '*' ) return Token_ptrmem;
00690 else if( ch == '.' && ch1 == '.' && ch2 == '.' ) return Token_ellipsis;
00691 }
00692
00693 return -1;
00694 }
00695
00696 inline int Lexer::findOperator2() const
00697 {
00698 int n = int(m_endPtr - m_ptr);
00699
00700 if( n>=2 ){
00701 QChar ch = currentChar(), ch1=peekChar();
00702
00703 if( ch == ':' && ch1 == ':' ) return Token_scope;
00704 else if( ch == '.' && ch1 == '*' ) return Token_ptrmem;
00705 else if( ch == '+' && ch1 == '=' ) return Token_assign;
00706 else if( ch == '-' && ch1 == '=' ) return Token_assign;
00707 else if( ch == '*' && ch1 == '=' ) return Token_assign;
00708 else if( ch == '/' && ch1 == '=' ) return Token_assign;
00709 else if( ch == '%' && ch1 == '=' ) return Token_assign;
00710 else if( ch == '^' && ch1 == '=' ) return Token_assign;
00711 else if( ch == '&' && ch1 == '=' ) return Token_assign;
00712 else if( ch == '|' && ch1 == '=' ) return Token_assign;
00713 else if( ch == '<' && ch1 == '<' ) return Token_shift;
00714 else if( ch == '>' && ch1 == '>' ) return Token_shift;
00715 else if( ch == '=' && ch1 == '=' ) return Token_eq;
00716 else if( ch == '!' && ch1 == '=' ) return Token_eq;
00717 else if( ch == '<' && ch1 == '=' ) return Token_leq;
00718 else if( ch == '>' && ch1 == '=' ) return Token_geq;
00719 else if( ch == '&' && ch1 == '&' ) return Token_and;
00720 else if( ch == '|' && ch1 == '|' ) return Token_or;
00721 else if( ch == '+' && ch1 == '+' ) return Token_incr;
00722 else if( ch == '-' && ch1 == '-' ) return Token_decr;
00723 else if( ch == '-' && ch1 == '>' ) return Token_arrow;
00724 else if( ch == '#' && ch1 == '#' ) return Token_concat;
00725 }
00726
00727 return -1;
00728 }
00729
00730 inline bool Lexer::skipWordsEnabled() const
00731 {
00732 return m_skipWordsEnabled;
00733 }
00734
00735 inline void Lexer::setSkipWordsEnabled( bool enabled )
00736 {
00737 m_skipWordsEnabled = enabled;
00738 }
00739
00740 inline bool Lexer::preprocessorEnabled() const
00741 {
00742 return m_preprocessorEnabled;
00743 }
00744
00745 inline void Lexer::setPreprocessorEnabled( bool enabled )
00746 {
00747 m_preprocessorEnabled = enabled;
00748 }
00749
00750 inline int Lexer::currentPosition() const
00751 {
00752 return m_ptr;
00753 }
00754
00755 inline QChar Lexer::currentChar() const
00756 {
00757 return m_ptr < m_endPtr ? m_source[m_ptr] : QChar::null;
00758 }
00759
00760 inline QChar Lexer::peekChar( int n ) const
00761 {
00762 return m_ptr+n < m_endPtr ? m_source[m_ptr + n] : QChar::null;
00763 }
00764
00765 inline bool Lexer::eof() const
00766 {
00767 return m_ptr >= m_endPtr;
00768 }
00769
00770 inline bool Lexer::reportWarnings() const
00771 {
00772 return m_reportWarnings;
00773 }
00774
00775 inline void Lexer::setReportWarnings( bool enable )
00776 {
00777 m_reportWarnings = enable;
00778 }
00779
00780 inline bool Lexer::reportMessages() const
00781 {
00782 return m_reportMessages;
00783 }
00784
00785 inline void Lexer::setReportMessages( bool enable )
00786 {
00787 m_reportMessages = enable;
00788 }
00789
00790
00791 #endif