KDevelop API Documentation

lib/cppparser/lexer.h

Go to the documentation of this file.
00001 /* This file is part of KDevelop 00002 Copyright (C) 2002,2003 Roberto Raggi <roberto@kdevelop.org> 00003 00004 This library is free software; you can redistribute it and/or 00005 modify it under the terms of the GNU Library General Public 00006 License as published by the Free Software Foundation; either 00007 version 2 of the License, or (at your option) any later version. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 Library General Public License for more details. 00013 00014 You should have received a copy of the GNU Library General Public License 00015 along with this library; see the file COPYING.LIB. If not, write to 00016 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00017 Boston, MA 02111-1307, USA. 00018 */ 00019 00020 #ifndef LEXER_H 00021 #define LEXER_H 00022 00023 #include "driver.h" 00024 00025 #include <qstring.h> 00026 #include <qmap.h> 00027 #include <qvaluestack.h> 00028 #include <qpair.h> 00029 #include <qptrvector.h> 00030 00031 enum Type { 00032 Token_eof = 0, 00033 Token_identifier = 1000, 00034 Token_number_literal, 00035 Token_char_literal, 00036 Token_string_literal, 00037 Token_whitespaces, 00038 Token_comment, 00039 Token_preproc, 00040 00041 Token_assign = 2000, 00042 Token_ptrmem, 00043 Token_ellipsis, 00044 Token_scope, 00045 Token_shift, 00046 Token_eq, 00047 Token_leq, 00048 Token_geq, 00049 Token_incr, 00050 Token_decr, 00051 Token_arrow, 00052 00053 Token_concat, 00054 00055 Token_K_DCOP, 00056 Token_k_dcop, 00057 Token_k_dcop_signals, 00058 00059 Token_Q_OBJECT, 00060 Token_signals, 00061 Token_slots, 00062 Token_emit, 00063 00064 Token_and, 00065 Token_and_eq, 00066 Token_asm, 00067 Token_auto, 00068 Token_bitand, 00069 Token_bitor, 00070 Token_bool, 00071 Token_break, 00072 Token_case, 00073 Token_catch, 00074 Token_char, 00075 Token_class, 00076 Token_compl, 00077 Token_const, 00078 Token_const_cast, 00079 Token_continue, 00080 Token_default, 00081 Token_delete, 00082 Token_do, 00083 Token_double, 00084 Token_dynamic_cast, 00085 Token_else, 00086 Token_enum, 00087 Token_explicit, 00088 Token_export, 00089 Token_extern, 00090 Token_false, 00091 Token_float, 00092 Token_for, 00093 Token_friend, 00094 Token_goto, 00095 Token_if, 00096 Token_inline, 00097 Token_int, 00098 Token_long, 00099 Token_mutable, 00100 Token_namespace, 00101 Token_new, 00102 Token_not, 00103 Token_not_eq, 00104 Token_operator, 00105 Token_or, 00106 Token_or_eq, 00107 Token_private, 00108 Token_protected, 00109 Token_public, 00110 Token_register, 00111 Token_reinterpret_cast, 00112 Token_return, 00113 Token_short, 00114 Token_signed, 00115 Token_sizeof, 00116 Token_static, 00117 Token_static_cast, 00118 Token_struct, 00119 Token_switch, 00120 Token_template, 00121 Token_this, 00122 Token_throw, 00123 Token_true, 00124 Token_try, 00125 Token_typedef, 00126 Token_typeid, 00127 Token_typename, 00128 Token_union, 00129 Token_unsigned, 00130 Token_using, 00131 Token_virtual, 00132 Token_void, 00133 Token_volatile, 00134 Token_wchar_t, 00135 Token_while, 00136 Token_xor, 00137 Token_xor_eq 00138 }; 00139 00140 enum SkipType { 00141 SkipWord, 00142 SkipWordAndArguments 00143 }; 00144 00145 struct LexerData; 00146 00147 class Token 00148 { 00149 public: 00150 Token(); 00151 Token( int type, int position, int length, const QString& text ); 00152 Token( const Token& source ); 00153 00154 Token& operator = ( const Token& source ); 00155 bool operator == ( const Token& token ) const; 00156 operator int () const; 00157 00158 bool isNull() const; 00159 00160 int type() const; 00161 void setType( int type ); 00162 00163 void getStartPosition( int* line, int* column ) const; 00164 void setStartPosition( int line, int column ); 00165 void getEndPosition( int* line, int* column ) const; 00166 void setEndPosition( int line, int column ); 00167 00168 unsigned int length() const; 00169 void setLength( unsigned int length ); 00170 00171 int position() const; 00172 void setPosition( int position ); 00173 00174 QString text() const; 00175 00176 private: 00177 int m_type; 00178 int m_position; 00179 int m_length; 00180 int m_startLine; 00181 int m_startColumn; 00182 int m_endLine; 00183 int m_endColumn; 00184 QString m_text; 00185 00186 friend class Lexer; 00187 friend class Parser; 00188 }; // class Token 00189 00190 class Lexer 00191 { 00192 public: 00193 Lexer( Driver* driver ); 00194 ~Lexer(); 00195 00196 bool recordComments() const; 00197 void setRecordComments( bool record ); 00198 00199 bool recordWhiteSpaces() const; 00200 void setRecordWhiteSpaces( bool record ); 00201 00202 bool reportWarnings() const; 00203 void setReportWarnings( bool enable ); 00204 00205 bool reportMessages() const; 00206 void setReportMessages( bool enable ); 00207 00208 bool skipWordsEnabled() const; 00209 void setSkipWordsEnabled( bool enabled ); 00210 00211 bool preprocessorEnabled() const; 00212 void setPreprocessorEnabled( bool enabled ); 00213 00214 void resetSkipWords(); 00215 void addSkipWord( const QString& word, SkipType skipType=SkipWord, const QString& str = QString::null ); 00216 00217 QString source() const; 00218 void setSource( const QString& source ); 00219 00220 int index() const; 00221 void setIndex( int index ); 00222 00223 void reset(); 00224 00225 const Token& tokenAt( int position ) const; 00226 const Token& nextToken(); 00227 const Token& lookAhead( int n ) const; 00228 00229 static int toInt( const Token& token ); 00230 00231 int tokenPosition( const Token& token ) const; 00232 void getTokenPosition( const Token& token, int* line, int* col ); 00233 00234 int currentLine() const { return m_currentLine; } 00235 int currentColumn() const { return m_currentColumn; } 00236 00237 private: 00238 QChar currentChar() const; 00239 QChar peekChar( int n=1 ) const; 00240 int currentPosition() const; 00241 00242 void tokenize(); 00243 void nextToken( Token& token, bool stopOnNewline=false ); 00244 void nextChar(); 00245 void nextChar( int n ); 00246 void skip( int l, int r ); 00247 void readIdentifier(); 00248 void readWhiteSpaces( bool skipNewLine=true ); 00249 void readLineComment(); 00250 void readMultiLineComment(); 00251 void readCharLiteral(); 00252 void readStringLiteral(); 00253 void readNumberLiteral(); 00254 00255 int findOperator3() const; 00256 int findOperator2() const; 00257 bool eof() const; 00258 00259 // preprocessor (based on an article of Al Stevens on Dr.Dobb's journal) 00260 int testIfLevel(); 00261 int macroDefined(); 00262 QString readArgument(); 00263 00264 int macroPrimary(); 00265 int macroMultiplyDivide(); 00266 int macroAddSubtract(); 00267 int macroRelational(); 00268 int macroEquality(); 00269 int macroBoolAnd(); 00270 int macroBoolXor(); 00271 int macroBoolOr(); 00272 int macroLogicalAnd(); 00273 int macroLogicalOr(); 00274 int macroExpression(); 00275 00276 void handleDirective( const QString& directive ); 00277 void processDefine( Macro& macro ); 00278 void processElse(); 00279 void processElif(); 00280 void processEndif(); 00281 void processIf(); 00282 void processIfdef(); 00283 void processIfndef(); 00284 void processInclude(); 00285 void processUndef(); 00286 00287 private: 00288 LexerData* d; 00289 Driver* m_driver; 00290 QPtrVector< Token > m_tokens; 00291 int m_size; 00292 int m_index; 00293 QString m_source; 00294 int m_ptr; 00295 int m_endPtr; 00296 bool m_recordComments; 00297 bool m_recordWhiteSpaces; 00298 bool m_startLine; 00299 QMap< QString, QPair<SkipType, QString> > m_words; 00300 00301 int m_currentLine; 00302 int m_currentColumn; 00303 bool m_skipWordsEnabled; 00304 00305 // preprocessor 00306 QMemArray<bool> m_skipping; 00307 QMemArray<bool> m_trueTest; 00308 int m_ifLevel; 00309 bool m_preprocessorEnabled; 00310 bool m_inPreproc; 00311 00312 bool m_reportWarnings; 00313 bool m_reportMessages; 00314 00315 private: 00316 Lexer( const Lexer& source ); 00317 void operator = ( const Lexer& source ); 00318 }; 00319 00320 00321 inline Token::Token() 00322 : m_type( -1 ), 00323 m_position( 0 ), 00324 m_length( 0 ), 00325 m_text( 0 ) 00326 { 00327 } 00328 00329 inline Token::Token( int type, int position, int length, const QString& text ) 00330 : m_type( type ), 00331 m_position( position ), 00332 m_length( length ), 00333 m_text( text ) 00334 { 00335 } 00336 00337 inline Token::Token( const Token& source ) 00338 : m_type( source.m_type ), 00339 m_position( source.m_position ), 00340 m_length( source.m_length ), 00341 m_startLine( source.m_startLine ), 00342 m_startColumn( source.m_startColumn ), 00343 m_endLine( source.m_endLine ), 00344 m_endColumn( source.m_endColumn ), 00345 m_text( source.m_text ) 00346 { 00347 } 00348 00349 inline Token& Token::operator = ( const Token& source ) 00350 { 00351 m_type = source.m_type; 00352 m_position = source.m_position; 00353 m_length = source.m_length; 00354 m_startLine = source.m_startLine; 00355 m_startColumn = source.m_startColumn; 00356 m_endLine = source.m_endLine; 00357 m_endColumn = source.m_endColumn; 00358 m_text = source.m_text; 00359 return( *this ); 00360 } 00361 00362 inline Token::operator int () const 00363 { 00364 return m_type; 00365 } 00366 00367 inline bool Token::operator == ( const Token& token ) const 00368 { 00369 return m_type == token.m_type && 00370 m_position == token.m_position && 00371 m_length == token.m_length && 00372 m_startLine == token.m_startLine && 00373 m_startColumn == token.m_startColumn && 00374 m_endLine == token.m_endLine && 00375 m_endColumn == token.m_endColumn && 00376 m_text == token.m_text; 00377 } 00378 00379 inline bool Token::isNull() const 00380 { 00381 return m_type == Token_eof || m_length == 0; 00382 } 00383 00384 inline int Token::type() const 00385 { 00386 return m_type; 00387 } 00388 00389 inline void Token::setType( int type ) 00390 { 00391 m_type = type; 00392 } 00393 00394 inline int Token::position() const 00395 { 00396 return m_position; 00397 } 00398 00399 inline QString Token::text() const 00400 { 00401 return m_text.mid(m_position, m_length); 00402 } 00403 00404 inline void Token::setStartPosition( int line, int column ) 00405 { 00406 m_startLine = line; 00407 m_startColumn = column; 00408 } 00409 00410 inline void Token::setEndPosition( int line, int column ) 00411 { 00412 m_endLine = line; 00413 m_endColumn = column; 00414 } 00415 00416 inline void Token::getStartPosition( int* line, int* column ) const 00417 { 00418 if( line ) *line = m_startLine; 00419 if( column ) *column = m_startColumn; 00420 } 00421 00422 inline void Token::getEndPosition( int* line, int* column ) const 00423 { 00424 if( line ) *line = m_endLine; 00425 if( column ) *column = m_endColumn; 00426 } 00427 00428 inline void Token::setPosition( int position ) 00429 { 00430 m_position = position; 00431 } 00432 00433 inline unsigned int Token::length() const 00434 { 00435 return m_length; 00436 } 00437 00438 inline void Token::setLength( unsigned int length ) 00439 { 00440 m_length = length; 00441 } 00442 00443 inline bool Lexer::recordComments() const 00444 { 00445 return m_recordComments; 00446 } 00447 00448 inline void Lexer::setRecordComments( bool record ) 00449 { 00450 m_recordComments = record; 00451 } 00452 00453 inline bool Lexer::recordWhiteSpaces() const 00454 { 00455 return m_recordWhiteSpaces; 00456 } 00457 00458 inline void Lexer::setRecordWhiteSpaces( bool record ) 00459 { 00460 m_recordWhiteSpaces = record; 00461 } 00462 00463 inline QString Lexer::source() const 00464 { 00465 return m_source; 00466 } 00467 00468 inline int Lexer::index() const 00469 { 00470 return m_index; 00471 } 00472 00473 inline void Lexer::setIndex( int index ) 00474 { 00475 m_index = index; 00476 } 00477 00478 inline const Token& Lexer::nextToken() 00479 { 00480 if( m_index < m_size ) 00481 return *m_tokens[ m_index++ ]; 00482 00483 return *m_tokens[ m_index ]; 00484 } 00485 00486 inline const Token& Lexer::tokenAt( int n ) const 00487 { 00488 return *m_tokens[ QMIN(n, m_size-1) ]; 00489 } 00490 00491 inline const Token& Lexer::lookAhead( int n ) const 00492 { 00493 return *m_tokens[ QMIN(m_index + n, m_size-1) ]; 00494 } 00495 00496 inline int Lexer::tokenPosition( const Token& token ) const 00497 { 00498 return token.position(); 00499 } 00500 00501 inline void Lexer::nextChar() 00502 { 00503 if(m_source[m_ptr++] == '\n') { 00504 ++m_currentLine; 00505 m_currentColumn = 0; 00506 m_startLine = true; 00507 } else { 00508 ++m_currentColumn; 00509 } 00510 } 00511 00512 inline void Lexer::nextChar( int n ) 00513 { 00514 m_currentColumn += n; 00515 m_ptr += n; 00516 } 00517 00518 inline void Lexer::readIdentifier() 00519 { 00520 while( currentChar().isLetterOrNumber() || currentChar() == '_' ) 00521 nextChar(); 00522 } 00523 00524 inline void Lexer::readWhiteSpaces( bool skipNewLine ) 00525 { 00526 while( !currentChar().isNull() ){ 00527 QChar ch = currentChar(); 00528 00529 if( ch == '\n' && !skipNewLine ){ 00530 break; 00531 } else if( ch.isSpace() ){ 00532 nextChar(); 00533 } else if( m_inPreproc && currentChar() == '\\' ){ 00534 nextChar(); 00535 readWhiteSpaces( true ); 00536 } else { 00537 break; 00538 } 00539 } 00540 } 00541 00542 inline void Lexer::readLineComment() 00543 { 00544 while( !currentChar().isNull() && currentChar() != '\n' ){ 00545 if( m_reportMessages && currentChar() == '@' && m_source.mid(currentPosition()+1, 4).lower() == "todo" ){ 00546 nextChar( 5 ); 00547 QString msg; 00548 int line = m_currentLine; 00549 int col = m_currentColumn; 00550 00551 while( currentChar() ){ 00552 if( currentChar() == '*' && peekChar() == '/' ) 00553 break; 00554 else if( currentChar() == '\n' ) 00555 break; 00556 00557 msg += currentChar(); 00558 nextChar(); 00559 } 00560 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) ); 00561 } else 00562 if( m_reportMessages && m_source.mid(currentPosition(), 5).lower() == "fixme" ){ 00563 nextChar( 5 ); 00564 QString msg; 00565 int line = m_currentLine; 00566 int col = m_currentColumn; 00567 00568 while( currentChar() ){ 00569 if( currentChar() == '*' && peekChar() == '/' ) 00570 break; 00571 else if( currentChar() == '\n' ) 00572 break; 00573 00574 msg += currentChar(); 00575 nextChar(); 00576 } 00577 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) ); 00578 } else 00579 nextChar(); 00580 } 00581 } 00582 00583 inline void Lexer::readMultiLineComment() 00584 { 00585 while( !currentChar().isNull() ){ 00586 if( currentChar() == '*' && peekChar() == '/' ){ 00587 nextChar( 2 ); 00588 return; 00589 } else if( m_reportMessages && currentChar() == '@' && m_source.mid(currentPosition()+1, 4).lower() == "todo" ){ 00590 nextChar( 5 ); 00591 QString msg; 00592 int line = m_currentLine; 00593 int col = m_currentColumn; 00594 00595 while( currentChar() ){ 00596 if( currentChar() == '*' && peekChar() == '/' ) 00597 break; 00598 else if( currentChar() == '\n' ) 00599 break; 00600 msg += currentChar(); 00601 nextChar(); 00602 } 00603 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Todo) ); 00604 } else 00605 if( m_reportMessages && m_source.mid(currentPosition(), 5).lower() == "fixme" ){ 00606 nextChar( 5 ); 00607 QString msg; 00608 int line = m_currentLine; 00609 int col = m_currentColumn; 00610 00611 while( currentChar() ){ 00612 if( currentChar() == '*' && peekChar() == '/' ) 00613 break; 00614 else if( currentChar() == '\n' ) 00615 break; 00616 00617 msg += currentChar(); 00618 nextChar(); 00619 } 00620 m_driver->addProblem( m_driver->currentFileName(), Problem(msg, line, col, Problem::Level_Fixme) ); 00621 } else 00622 nextChar(); 00623 } 00624 } 00625 00626 inline void Lexer::readCharLiteral() 00627 { 00628 if( currentChar() == '\'' ) 00629 nextChar(); // skip ' 00630 else if( currentChar() == 'L' && peekChar() == '\'' ) 00631 nextChar( 2 ); // slip L' 00632 else 00633 return; 00634 00635 while( !currentChar().isNull() ){ 00636 int len = m_endPtr - currentPosition(); 00637 00638 if( len>=2 && (currentChar() == '\\' && peekChar() == '\'') ){ 00639 nextChar( 2 ); 00640 } else if( len>=2 && (currentChar() == '\\' && peekChar() == '\\') ){ 00641 nextChar( 2 ); 00642 } else if( currentChar() == '\'' ){ 00643 nextChar(); 00644 break; 00645 } else { 00646 nextChar(); 00647 } 00648 } 00649 } 00650 00651 inline void Lexer::readStringLiteral() 00652 { 00653 if( currentChar() != '"' ) 00654 return; 00655 00656 nextChar(); // skip " 00657 00658 while( !currentChar().isNull() ){ 00659 int len = m_endPtr - m_ptr; 00660 00661 if( len>=2 && currentChar() == '\\' && peekChar() == '"' ){ 00662 nextChar( 2 ); 00663 } else if( len>=2 && currentChar() == '\\' && peekChar() == '\\' ){ 00664 nextChar( 2 ); 00665 } else if( currentChar() == '"' ){ 00666 nextChar(); 00667 break; 00668 } else { 00669 nextChar(); 00670 } 00671 } 00672 } 00673 00674 inline void Lexer::readNumberLiteral() 00675 { 00676 while( currentChar().isLetterOrNumber() || currentChar() == '.' ) 00677 nextChar(); 00678 } 00679 00680 inline int Lexer::findOperator3() const 00681 { 00682 int n = int(m_endPtr - m_ptr); 00683 00684 if( n >= 3){ 00685 QChar ch = currentChar(), ch1=peekChar(), ch2=peekChar(2); 00686 00687 if( ch == '<' && ch1 == '<' && ch2 == '=' ) return Token_assign; 00688 else if( ch == '>' && ch1 == '>' && ch2 == '=' ) return Token_assign; 00689 else if( ch == '-' && ch1 == '>' && ch2 == '*' ) return Token_ptrmem; 00690 else if( ch == '.' && ch1 == '.' && ch2 == '.' ) return Token_ellipsis; 00691 } 00692 00693 return -1; 00694 } 00695 00696 inline int Lexer::findOperator2() const 00697 { 00698 int n = int(m_endPtr - m_ptr); 00699 00700 if( n>=2 ){ 00701 QChar ch = currentChar(), ch1=peekChar(); 00702 00703 if( ch == ':' && ch1 == ':' ) return Token_scope; 00704 else if( ch == '.' && ch1 == '*' ) return Token_ptrmem; 00705 else if( ch == '+' && ch1 == '=' ) return Token_assign; 00706 else if( ch == '-' && ch1 == '=' ) return Token_assign; 00707 else if( ch == '*' && ch1 == '=' ) return Token_assign; 00708 else if( ch == '/' && ch1 == '=' ) return Token_assign; 00709 else if( ch == '%' && ch1 == '=' ) return Token_assign; 00710 else if( ch == '^' && ch1 == '=' ) return Token_assign; 00711 else if( ch == '&' && ch1 == '=' ) return Token_assign; 00712 else if( ch == '|' && ch1 == '=' ) return Token_assign; 00713 else if( ch == '<' && ch1 == '<' ) return Token_shift; 00714 else if( ch == '>' && ch1 == '>' ) return Token_shift; 00715 else if( ch == '=' && ch1 == '=' ) return Token_eq; 00716 else if( ch == '!' && ch1 == '=' ) return Token_eq; 00717 else if( ch == '<' && ch1 == '=' ) return Token_leq; 00718 else if( ch == '>' && ch1 == '=' ) return Token_geq; 00719 else if( ch == '&' && ch1 == '&' ) return Token_and; 00720 else if( ch == '|' && ch1 == '|' ) return Token_or; 00721 else if( ch == '+' && ch1 == '+' ) return Token_incr; 00722 else if( ch == '-' && ch1 == '-' ) return Token_decr; 00723 else if( ch == '-' && ch1 == '>' ) return Token_arrow; 00724 else if( ch == '#' && ch1 == '#' ) return Token_concat; 00725 } 00726 00727 return -1; 00728 } 00729 00730 inline bool Lexer::skipWordsEnabled() const 00731 { 00732 return m_skipWordsEnabled; 00733 } 00734 00735 inline void Lexer::setSkipWordsEnabled( bool enabled ) 00736 { 00737 m_skipWordsEnabled = enabled; 00738 } 00739 00740 inline bool Lexer::preprocessorEnabled() const 00741 { 00742 return m_preprocessorEnabled; 00743 } 00744 00745 inline void Lexer::setPreprocessorEnabled( bool enabled ) 00746 { 00747 m_preprocessorEnabled = enabled; 00748 } 00749 00750 inline int Lexer::currentPosition() const 00751 { 00752 return m_ptr; 00753 } 00754 00755 inline QChar Lexer::currentChar() const 00756 { 00757 return m_ptr < m_endPtr ? m_source[m_ptr] : QChar::null; 00758 } 00759 00760 inline QChar Lexer::peekChar( int n ) const 00761 { 00762 return m_ptr+n < m_endPtr ? m_source[m_ptr + n] : QChar::null; 00763 } 00764 00765 inline bool Lexer::eof() const 00766 { 00767 return m_ptr >= m_endPtr; 00768 } 00769 00770 inline bool Lexer::reportWarnings() const 00771 { 00772 return m_reportWarnings; 00773 } 00774 00775 inline void Lexer::setReportWarnings( bool enable ) 00776 { 00777 m_reportWarnings = enable; 00778 } 00779 00780 inline bool Lexer::reportMessages() const 00781 { 00782 return m_reportMessages; 00783 } 00784 00785 inline void Lexer::setReportMessages( bool enable ) 00786 { 00787 m_reportMessages = enable; 00788 } 00789 00790 00791 #endif
KDE Logo
This file is part of the documentation for KDevelop Version 3.0.4.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Wed Oct 6 17:39:07 2004 by doxygen 1.3.7 written by Dimitri van Heesch, © 1997-2003