00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
#ifndef LEXER_H
00021
#define LEXER_H
00022
00023
#include "driver.h"
00024
00025
#include <qstring.h>
00026
#include <qmap.h>
00027
#include <qvaluestack.h>
00028
#include <qpair.h>
00029
#include <qptrvector.h>
00030
00031 enum Type {
00032
Token_eof = 0,
00033
Token_identifier = 1000,
00034
Token_number_literal,
00035
Token_char_literal,
00036
Token_string_literal,
00037
Token_whitespaces,
00038
Token_comment,
00039
Token_preproc,
00040
00041
Token_assign = 2000,
00042
Token_ptrmem,
00043
Token_ellipsis,
00044
Token_scope,
00045
Token_shift,
00046
Token_eq,
00047
Token_leq,
00048
Token_geq,
00049
Token_incr,
00050
Token_decr,
00051
Token_arrow,
00052
00053
Token_concat,
00054
00055
Token_K_DCOP,
00056
Token_k_dcop,
00057
Token_k_dcop_signals,
00058
00059
Token_Q_OBJECT,
00060
Token_signals,
00061
Token_slots,
00062
Token_emit,
00063
00064
Token_and,
00065
Token_and_eq,
00066
Token_asm,
00067
Token_auto,
00068
Token_bitand,
00069
Token_bitor,
00070
Token_bool,
00071
Token_break,
00072
Token_case,
00073
Token_catch,
00074
Token_char,
00075
Token_class,
00076
Token_compl,
00077
Token_const,
00078
Token_const_cast,
00079
Token_continue,
00080
Token_default,
00081
Token_delete,
00082
Token_do,
00083
Token_double,
00084
Token_dynamic_cast,
00085
Token_else,
00086
Token_enum,
00087
Token_explicit,
00088
Token_export,
00089
Token_extern,
00090
Token_false,
00091
Token_float,
00092
Token_for,
00093
Token_friend,
00094
Token_goto,
00095
Token_if,
00096
Token_inline,
00097
Token_int,
00098
Token_long,
00099
Token_mutable,
00100
Token_namespace,
00101
Token_new,
00102
Token_not,
00103
Token_not_eq,
00104
Token_operator,
00105
Token_or,
00106
Token_or_eq,
00107
Token_private,
00108
Token_protected,
00109
Token_public,
00110
Token_register,
00111
Token_reinterpret_cast,
00112
Token_return,
00113
Token_short,
00114
Token_signed,
00115
Token_sizeof,
00116
Token_static,
00117
Token_static_cast,
00118
Token_struct,
00119
Token_switch,
00120
Token_template,
00121
Token_this,
00122
Token_throw,
00123
Token_true,
00124
Token_try,
00125
Token_typedef,
00126
Token_typeid,
00127
Token_typename,
00128
Token_union,
00129
Token_unsigned,
00130
Token_using,
00131
Token_virtual,
00132
Token_void,
00133
Token_volatile,
00134
Token_wchar_t,
00135
Token_while,
00136
Token_xor,
00137
Token_xor_eq
00138 };
00139
00140 enum SkipType {
00141
SkipWord,
00142
SkipWordAndArguments
00143 };
00144
00145
struct LexerData;
00146
00147
class Token
00148 {
00149
public:
00150
Token();
00151
Token(
int type,
int position,
int length,
const QString& text );
00152
Token(
const Token& source );
00153
00154
Token& operator = (
const Token& source );
00155
bool operator == (
const Token& token )
const;
00156
operator int () const;
00157
00158
bool isNull() const;
00159
00160
int type() const;
00161
void setType(
int type );
00162
00163
void getStartPosition(
int* line,
int* column ) const;
00164
void setStartPosition(
int line,
int column );
00165
void getEndPosition(
int* line,
int* column ) const;
00166
void setEndPosition(
int line,
int column );
00167
00168
unsigned int length() const;
00169
void setLength(
unsigned int length );
00170
00171
int position() const;
00172
void setPosition(
int position );
00173
00174
QString text() const;
00175
00176 private:
00177 int m_type;
00178 int m_position;
00179 int m_length;
00180 int m_startLine;
00181 int m_startColumn;
00182 int m_endLine;
00183 int m_endColumn;
00184 QString m_text;
00185
00186 friend class
Lexer;
00187 friend class
Parser;
00188 };
00189
00190 class Lexer
00191 {
00192
public:
00193 Lexer(
Driver* driver );
00194 ~Lexer();
00195
00196
bool recordComments()
const;
00197
void setRecordComments(
bool record );
00198
00199
bool recordWhiteSpaces()
const;
00200
void setRecordWhiteSpaces(
bool record );
00201
00202
bool reportWarnings()
const;
00203
void setReportWarnings(
bool enable );
00204
00205
bool reportMessages()
const;
00206
void setReportMessages(
bool enable );
00207
00208
bool skipWordsEnabled()
const;
00209
void setSkipWordsEnabled(
bool enabled );
00210
00211
bool preprocessorEnabled()
const;
00212
void setPreprocessorEnabled(
bool enabled );
00213
00214
void resetSkipWords();
00215
void addSkipWord(
const QString& word,
SkipType skipType=
SkipWord,
const QString& str = QString::null );
00216
00217
QString source()
const;
00218
void setSource(
const QString& source );
00219
00220
int index()
const;
00221
void setIndex(
int index );
00222
00223
void reset();
00224
00225
const Token& tokenAt(
int position )
const;
00226
const Token& nextToken();
00227
const Token& lookAhead(
int n )
const;
00228
00229
static int toInt(
const Token& token );
00230
00231
int tokenPosition(
const Token& token )
const;
00232
void getTokenPosition(
const Token& token,
int* line,
int* col );
00233
00234 int currentLine()
const {
return m_currentLine; }
00235 int currentColumn()
const {
return m_currentColumn; }
00236
00237
private:
00238
QChar currentChar() const;
00239
QChar peekChar(
int n=1 ) const;
00240
int currentPosition() const;
00241
00242
void tokenize();
00243
void nextToken(
Token& token,
bool stopOnNewline=false );
00244
void nextChar();
00245
void nextChar(
int n );
00246
void skip(
int l,
int r );
00247
void readIdentifier();
00248
void readWhiteSpaces(
bool skipNewLine=true );
00249
void readLineComment();
00250
void readMultiLineComment();
00251
void readCharLiteral();
00252
void readStringLiteral();
00253
void readNumberLiteral();
00254
00255
int findOperator3() const;
00256
int findOperator2() const;
00257
bool eof() const;
00258
00259
00260
int testIfLevel();
00261
int macroDefined();
00262
QString readArgument();
00263
00264
int macroPrimary();
00265
int macroMultiplyDivide();
00266
int macroAddSubtract();
00267
int macroRelational();
00268
int macroEquality();
00269
int macroBoolAnd();
00270
int macroBoolXor();
00271
int macroBoolOr();
00272
int macroLogicalAnd();
00273
int macroLogicalOr();
00274
int macroExpression();
00275
00276
void handleDirective( const
QString& directive );
00277
void processDefine(
Macro& macro );
00278
void processElse();
00279
void processElif();
00280
void processEndif();
00281
void processIf();
00282
void processIfdef();
00283
void processIfndef();
00284
void processInclude();
00285
void processUndef();
00286
00287 private:
00288 LexerData* d;
00289 Driver* m_driver;
00290 QPtrVector<
Token > m_tokens;
00291 int m_size;
00292 int m_index;
00293 QString m_source;
00294 int m_ptr;
00295 int m_endPtr;
00296 bool m_recordComments;
00297 bool m_recordWhiteSpaces;
00298 bool m_startLine;
00299 QMap<
QString,
QPair<SkipType, QString> > m_words;
00300
00301 int m_currentLine;
00302 int m_currentColumn;
00303 bool m_skipWordsEnabled;
00304
00305
00306 QMemArray<
bool> m_skipping;
00307 QMemArray<
bool> m_trueTest;
00308 int m_ifLevel;
00309 bool m_preprocessorEnabled;
00310 bool m_inPreproc;
00311
00312 bool m_reportWarnings;
00313 bool m_reportMessages;
00314
00315 private:
00316 Lexer( const Lexer& source );
00317
void operator = ( const Lexer& source );
00318 };
00319
00320
00321 inline
Token::
Token()
00322 : m_type( -1 ),
00323 m_position( 0 ),
00324 m_length( 0 ),
00325 m_text( 0 )
00326 {
00327 }
00328
00329 inline Token::Token(
int type,
int position,
int length,
const QString& text )
00330 : m_type( type ),
00331 m_position( position ),
00332 m_length(
length ),
00333 m_text(
text )
00334 {
00335 }
00336
00337 inline Token::Token(
const Token& source )
00338 : m_type( source.m_type ),
00339 m_position( source.m_position ),
00340 m_length( source.m_length ),
00341 m_startLine( source.m_startLine ),
00342 m_startColumn( source.m_startColumn ),
00343 m_endLine( source.m_endLine ),
00344 m_endColumn( source.m_endColumn ),
00345 m_text( source.m_text )
00346 {
00347 }
00348
00349 inline Token& Token::operator = (
const Token& source )
00350 {
00351 m_type = source.
m_type;
00352 m_position = source.
m_position;
00353 m_length = source.
m_length;
00354 m_startLine = source.
m_startLine;
00355 m_startColumn = source.
m_startColumn;
00356 m_endLine = source.
m_endLine;
00357 m_endColumn = source.
m_endColumn;
00358 m_text = source.
m_text;
00359
return( *this );
00360 }
00361
00362 inline Token::operator int ()
const
00363
{
00364
return m_type;
00365 }
00366
00367 inline bool Token::operator == (
const Token& token )
const
00368
{
00369
return m_type == token.
m_type &&
00370 m_position == token.
m_position &&
00371 m_length == token.
m_length &&
00372 m_startLine == token.
m_startLine &&
00373 m_startColumn == token.
m_startColumn &&
00374 m_endLine == token.
m_endLine &&
00375 m_endColumn == token.
m_endColumn &&
00376 m_text == token.
m_text;
00377 }
00378
00379 inline bool Token::isNull()
const
00380
{
00381
return m_type ==
Token_eof || m_length == 0;
00382 }
00383
00384 inline int Token::type()
const
00385
{
00386
return m_type;
00387 }
00388
00389
inline void Token::setType(
int type )
00390 {
00391 m_type = type;
00392 }
00393
00394 inline int Token::position()
const
00395
{
00396
return m_position;
00397 }
00398
00399 inline QString Token::text()
const
00400
{
00401
return m_text.mid(m_position, m_length);
00402 }
00403
00404 inline void Token::setStartPosition(
int line,
int column )
00405 {
00406 m_startLine = line;
00407 m_startColumn = column;
00408 }
00409
00410 inline void Token::setEndPosition(
int line,
int column )
00411 {
00412 m_endLine = line;
00413 m_endColumn = column;
00414 }
00415
00416 inline void Token::getStartPosition(
int* line,
int* column )
const
00417
{
00418
if( line ) *line = m_startLine;
00419
if( column ) *column = m_startColumn;
00420 }
00421
00422 inline void Token::getEndPosition(
int* line,
int* column )
const
00423
{
00424
if( line ) *line = m_endLine;
00425
if( column ) *column = m_endColumn;
00426 }
00427
00428 inline void Token::setPosition(
int position )
00429 {
00430 m_position = position;
00431 }
00432
00433 inline unsigned int Token::length()
const
00434
{
00435
return m_length;
00436 }
00437
00438 inline void Token::setLength(
unsigned int length )
00439 {
00440 m_length =
length;
00441 }
00442
00443 inline bool Lexer::recordComments()
const
00444
{
00445
return m_recordComments;
00446 }
00447
00448 inline void Lexer::setRecordComments(
bool record )
00449 {
00450
m_recordComments = record;
00451 }
00452
00453 inline bool Lexer::recordWhiteSpaces()
const
00454
{
00455
return m_recordWhiteSpaces;
00456 }
00457
00458 inline void Lexer::setRecordWhiteSpaces(
bool record )
00459 {
00460
m_recordWhiteSpaces = record;
00461 }
00462
00463 inline QString Lexer::source()
const
00464
{
00465
return m_source;
00466 }
00467
00468 inline int Lexer::index()
const
00469
{
00470
return m_index;
00471 }
00472
00473 inline void Lexer::setIndex(
int index )
00474 {
00475
m_index = index;
00476 }
00477
00478 inline const Token&
Lexer::nextToken()
00479 {
00480
if(
m_index <
m_size )
00481
return *
m_tokens[
m_index++ ];
00482
00483
return *
m_tokens[
m_index ];
00484 }
00485
00486 inline const Token&
Lexer::tokenAt(
int n )
const
00487
{
00488
return *
m_tokens[ QMIN(n,
m_size-1) ];
00489 }
00490
00491 inline const Token&
Lexer::lookAhead(
int n )
const
00492
{
00493
return *
m_tokens[ QMIN(
m_index + n,
m_size-1) ];
00494 }
00495
00496 inline int Lexer::tokenPosition(
const Token& token )
const
00497
{
00498
return token.
position();
00499 }
00500
00501 inline void Lexer::nextChar()
00502 {
00503
if(
m_source[
m_ptr++] ==
'\n') {
00504 ++
m_currentLine;
00505
m_currentColumn = 0;
00506 m_startLine =
true;
00507 }
else {
00508 ++
m_currentColumn;
00509 }
00510 }
00511
00512 inline void Lexer::nextChar(
int n )
00513 {
00514
m_currentColumn += n;
00515
m_ptr += n;
00516 }
00517
00518 inline void Lexer::readIdentifier()
00519 {
00520
while(
currentChar().isLetterOrNumber() ||
currentChar() ==
'_' )
00521
nextChar();
00522 }
00523
00524 inline void Lexer::readWhiteSpaces(
bool skipNewLine )
00525 {
00526
while( !
currentChar().isNull() ){
00527
QChar ch =
currentChar();
00528
00529
if( ch ==
'\n' && !skipNewLine ){
00530
break;
00531 }
else if( ch.isSpace() ){
00532
nextChar();
00533 }
else if(
m_inPreproc &&
currentChar() ==
'\\' ){
00534
nextChar();
00535
readWhiteSpaces(
true );
00536 }
else {
00537
break;
00538 }
00539 }
00540 }
00541
00542 inline void Lexer::readLineComment()
00543 {
00544
while( !
currentChar().isNull() &&
currentChar() !=
'\n' ){
00545
if(
m_reportMessages &&
currentChar() ==
'@' &&
m_source.mid(
currentPosition()+1, 4).lower() ==
"todo" ){
00546
nextChar( 5 );
00547
QString msg;
00548
int line =
m_currentLine;
00549
int col =
m_currentColumn;
00550
00551
while(
currentChar() ){
00552
if(
currentChar() ==
'*' &&
peekChar() ==
'/' )
00553
break;
00554
else if(
currentChar() ==
'\n' )
00555
break;
00556
00557 msg +=
currentChar();
00558
nextChar();
00559 }
00560
m_driver->
addProblem(
m_driver->
currentFileName(),
Problem(msg, line, col, Problem::Level_Todo) );
00561 }
else
00562
if(
m_reportMessages &&
m_source.mid(
currentPosition(), 5).lower() ==
"fixme" ){
00563
nextChar( 5 );
00564
QString msg;
00565
int line =
m_currentLine;
00566
int col =
m_currentColumn;
00567
00568
while(
currentChar() ){
00569
if(
currentChar() ==
'*' &&
peekChar() ==
'/' )
00570
break;
00571
else if(
currentChar() ==
'\n' )
00572
break;
00573
00574 msg +=
currentChar();
00575
nextChar();
00576 }
00577
m_driver->
addProblem(
m_driver->
currentFileName(),
Problem(msg, line, col, Problem::Level_Fixme) );
00578 }
else
00579
nextChar();
00580 }
00581 }
00582
00583 inline void Lexer::readMultiLineComment()
00584 {
00585
while( !
currentChar().isNull() ){
00586
if(
currentChar() ==
'*' &&
peekChar() ==
'/' ){
00587
nextChar( 2 );
00588
return;
00589 }
else if(
m_reportMessages &&
currentChar() ==
'@' &&
m_source.mid(
currentPosition()+1, 4).lower() ==
"todo" ){
00590
nextChar( 5 );
00591
QString msg;
00592
int line =
m_currentLine;
00593
int col =
m_currentColumn;
00594
00595
while(
currentChar() ){
00596
if(
currentChar() ==
'*' &&
peekChar() ==
'/' )
00597
break;
00598
else if(
currentChar() ==
'\n' )
00599
break;
00600 msg +=
currentChar();
00601
nextChar();
00602 }
00603
m_driver->
addProblem(
m_driver->
currentFileName(),
Problem(msg, line, col, Problem::Level_Todo) );
00604 }
else
00605
if(
m_reportMessages &&
m_source.mid(
currentPosition(), 5).lower() ==
"fixme" ){
00606
nextChar( 5 );
00607
QString msg;
00608
int line =
m_currentLine;
00609
int col =
m_currentColumn;
00610
00611
while(
currentChar() ){
00612
if(
currentChar() ==
'*' &&
peekChar() ==
'/' )
00613
break;
00614
else if(
currentChar() ==
'\n' )
00615
break;
00616
00617 msg +=
currentChar();
00618
nextChar();
00619 }
00620
m_driver->
addProblem(
m_driver->
currentFileName(),
Problem(msg, line, col, Problem::Level_Fixme) );
00621 }
else
00622
nextChar();
00623 }
00624 }
00625
00626 inline void Lexer::readCharLiteral()
00627 {
00628
if(
currentChar() ==
'\'' )
00629
nextChar();
00630
else if(
currentChar() ==
'L' &&
peekChar() ==
'\'' )
00631
nextChar( 2 );
00632
else
00633
return;
00634
00635
while( !
currentChar().isNull() ){
00636
int len =
m_endPtr -
currentPosition();
00637
00638
if(
len>=2 && (
currentChar() ==
'\\' &&
peekChar() ==
'\'') ){
00639
nextChar( 2 );
00640 }
else if(
len>=2 && (
currentChar() ==
'\\' && peekChar() ==
'\\') ){
00641
nextChar( 2 );
00642 }
else if(
currentChar() ==
'\'' ){
00643
nextChar();
00644
break;
00645 }
else {
00646
nextChar();
00647 }
00648 }
00649 }
00650
00651 inline void Lexer::readStringLiteral()
00652 {
00653
if(
currentChar() !=
'"' )
00654
return;
00655
00656
nextChar();
00657
00658
while( !
currentChar().isNull() ){
00659
int len =
m_endPtr -
m_ptr;
00660
00661
if(
len>=2 &&
currentChar() ==
'\\' &&
peekChar() ==
'"' ){
00662
nextChar( 2 );
00663 }
else if(
len>=2 &&
currentChar() ==
'\\' && peekChar() ==
'\\' ){
00664
nextChar( 2 );
00665 }
else if(
currentChar() ==
'"' ){
00666
nextChar();
00667
break;
00668 }
else {
00669
nextChar();
00670 }
00671 }
00672 }
00673
00674 inline void Lexer::readNumberLiteral()
00675 {
00676
while(
currentChar().isLetterOrNumber() ||
currentChar() ==
'.' )
00677
nextChar();
00678 }
00679
00680 inline int Lexer::findOperator3()
const
00681
{
00682
int n = int(
m_endPtr -
m_ptr);
00683
00684
if( n >= 3){
00685
QChar ch =
currentChar(), ch1=
peekChar(), ch2=peekChar(2);
00686
00687
if( ch ==
'<' && ch1 ==
'<' && ch2 ==
'=' )
return Token_assign;
00688
else if( ch ==
'>' && ch1 ==
'>' && ch2 ==
'=' )
return Token_assign;
00689
else if( ch ==
'-' && ch1 ==
'>' && ch2 ==
'*' )
return Token_ptrmem;
00690
else if( ch ==
'.' && ch1 ==
'.' && ch2 ==
'.' )
return Token_ellipsis;
00691 }
00692
00693
return -1;
00694 }
00695
00696 inline int Lexer::findOperator2()
const
00697
{
00698
int n = int(
m_endPtr -
m_ptr);
00699
00700
if( n>=2 ){
00701
QChar ch =
currentChar(), ch1=
peekChar();
00702
00703
if( ch ==
':' && ch1 ==
':' )
return Token_scope;
00704
else if( ch ==
'.' && ch1 ==
'*' )
return Token_ptrmem;
00705
else if( ch ==
'+' && ch1 ==
'=' )
return Token_assign;
00706
else if( ch ==
'-' && ch1 ==
'=' )
return Token_assign;
00707
else if( ch ==
'*' && ch1 ==
'=' )
return Token_assign;
00708
else if( ch ==
'/' && ch1 ==
'=' )
return Token_assign;
00709
else if( ch ==
'%' && ch1 ==
'=' )
return Token_assign;
00710
else if( ch ==
'^' && ch1 ==
'=' )
return Token_assign;
00711
else if( ch ==
'&' && ch1 ==
'=' )
return Token_assign;
00712
else if( ch ==
'|' && ch1 ==
'=' )
return Token_assign;
00713
else if( ch ==
'<' && ch1 ==
'<' )
return Token_shift;
00714
else if( ch ==
'>' && ch1 ==
'>' )
return Token_shift;
00715
else if( ch ==
'=' && ch1 ==
'=' )
return Token_eq;
00716
else if( ch ==
'!' && ch1 ==
'=' )
return Token_eq;
00717
else if( ch ==
'<' && ch1 ==
'=' )
return Token_leq;
00718
else if( ch ==
'>' && ch1 ==
'=' )
return Token_geq;
00719
else if( ch ==
'&' && ch1 ==
'&' )
return Token_and;
00720
else if( ch ==
'|' && ch1 ==
'|' )
return Token_or;
00721
else if( ch ==
'+' && ch1 ==
'+' )
return Token_incr;
00722
else if( ch ==
'-' && ch1 ==
'-' )
return Token_decr;
00723
else if( ch ==
'-' && ch1 ==
'>' )
return Token_arrow;
00724
else if( ch ==
'#' && ch1 ==
'#' )
return Token_concat;
00725 }
00726
00727
return -1;
00728 }
00729
00730 inline bool Lexer::skipWordsEnabled()
const
00731
{
00732
return m_skipWordsEnabled;
00733 }
00734
00735 inline void Lexer::setSkipWordsEnabled(
bool enabled )
00736 {
00737
m_skipWordsEnabled = enabled;
00738 }
00739
00740 inline bool Lexer::preprocessorEnabled()
const
00741
{
00742
return m_preprocessorEnabled;
00743 }
00744
00745 inline void Lexer::setPreprocessorEnabled(
bool enabled )
00746 {
00747
m_preprocessorEnabled = enabled;
00748 }
00749
00750 inline int Lexer::currentPosition()
const
00751
{
00752
return m_ptr;
00753 }
00754
00755 inline QChar Lexer::currentChar()
const
00756
{
00757
return m_ptr <
m_endPtr ?
m_source[
m_ptr] : QChar::null;
00758 }
00759
00760 inline QChar Lexer::peekChar(
int n )
const
00761
{
00762
return m_ptr+n <
m_endPtr ?
m_source[
m_ptr + n] : QChar::null;
00763 }
00764
00765 inline bool Lexer::eof()
const
00766
{
00767
return m_ptr >=
m_endPtr;
00768 }
00769
00770 inline bool Lexer::reportWarnings()
const
00771
{
00772
return m_reportWarnings;
00773 }
00774
00775 inline void Lexer::setReportWarnings(
bool enable )
00776 {
00777
m_reportWarnings = enable;
00778 }
00779
00780 inline bool Lexer::reportMessages()
const
00781
{
00782
return m_reportMessages;
00783 }
00784
00785 inline void Lexer::setReportMessages(
bool enable )
00786 {
00787
m_reportMessages = enable;
00788 }
00789
00790
00791
#endif