CharScanner.hpp
Go to the documentation of this file.00001 #ifndef INC_CharScanner_hpp__
00002 #define INC_CharScanner_hpp__
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <antlr/config.hpp>
00012
00013 #include <map>
00014
00015 #ifdef HAS_NOT_CCTYPE_H
00016 #include <ctype.h>
00017 #else
00018 #include <cctype>
00019 #endif
00020
00021 #include <antlr/TokenStream.hpp>
00022 #include <antlr/RecognitionException.hpp>
00023 #include <antlr/MismatchedCharException.hpp>
00024 #include <antlr/InputBuffer.hpp>
00025 #include <antlr/BitSet.hpp>
00026 #include <antlr/LexerSharedInputState.hpp>
00027
00028 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00029 namespace antlr {
00030 #endif
00031
00032 class ANTLR_API CharScanner;
00033
00034 ANTLR_C_USING(tolower)
00035
00036 #ifdef ANTLR_REALLY_NO_STRCASECMP
00037
00038
00039 inline int strcasecmp(const char *s1, const char *s2)
00040 {
00041 while (true)
00042 {
00043 char c1 = tolower(*s1++),
00044 c2 = tolower(*s2++);
00045 if (c1 < c2) return -1;
00046 if (c1 > c2) return 1;
00047 if (c1 == 0) return 0;
00048 }
00049 }
00050 #else
00051 #ifdef NO_STRCASECMP
00052 ANTLR_C_USING(stricmp)
00053 #else
00054 ANTLR_C_USING(strcasecmp)
00055 #endif
00056 #endif
00057
00060 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00061 private:
00062 const CharScanner* scanner;
00063 public:
00064 #ifdef NO_TEMPLATE_PARTS
00065 CharScannerLiteralsLess();
00066 #endif
00067 CharScannerLiteralsLess(const CharScanner* theScanner)
00068 : scanner(theScanner)
00069 {
00070 }
00071 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
00072
00073
00074
00075 };
00076
00079 class ANTLR_API CharScanner : public TokenStream {
00080 protected:
00081 typedef RefToken (*factory_type)();
00082 public:
00083 CharScanner(InputBuffer& cb, bool case_sensitive );
00084 CharScanner(InputBuffer* cb, bool case_sensitive );
00085 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
00086
00087 virtual ~CharScanner()
00088 {
00089 }
00090
00091 virtual int LA(int i);
00092
00093 virtual void append(char c)
00094 {
00095 if (saveConsumedInput) {
00096 int l = text.length();
00097 if ((l%256) == 0)
00098 text.reserve(l+256);
00099 text.replace(l,0,&c,1);
00100 }
00101 }
00102
00103 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
00104 {
00105 if (saveConsumedInput)
00106 text+=s;
00107 }
00108
00109 virtual void commit()
00110 {
00111 inputState->getInput().commit();
00112 }
00113
00114 virtual void consume();
00115
00117 virtual void consumeUntil(int c)
00118 {
00119 for(;;)
00120 {
00121 int la_1 = LA(1);
00122 if( la_1 == EOF_CHAR || la_1 == c )
00123 break;
00124 consume();
00125 }
00126 }
00127
00129 virtual void consumeUntil(const BitSet& set)
00130 {
00131 for(;;)
00132 {
00133 int la_1 = LA(1);
00134 if( la_1 == EOF_CHAR || set.member(la_1) )
00135 break;
00136 consume();
00137 }
00138 }
00139
00141 virtual int mark()
00142 {
00143 return inputState->getInput().mark();
00144 }
00146 virtual void rewind(int pos)
00147 {
00148 inputState->getInput().rewind(pos);
00149 }
00150
00152 virtual void match(int c)
00153 {
00154 int la_1 = LA(1);
00155 if ( la_1 != c )
00156 throw MismatchedCharException(la_1, c, false, this);
00157 consume();
00158 }
00159
00163 virtual void match(const BitSet& b)
00164 {
00165 if (!b.member(LA(1))) {
00166 throw MismatchedCharException(LA(1),b,false,this);
00167 }
00168 consume();
00169 }
00170
00172 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
00173 {
00174 int len = s.length();
00175
00176 for (int i = 0; i < len; i++)
00177 {
00178 int la_1 = LA(1);
00179
00180 if ( la_1 != s[i] )
00181 throw MismatchedCharException(la_1, s[i], false, this);
00182
00183 consume();
00184 }
00185 }
00189 virtual void matchNot(int c)
00190 {
00191 int la_1 = LA(1);
00192
00193 if ( la_1 == c )
00194 throw MismatchedCharException(la_1, c, true, this);
00195
00196 consume();
00197 }
00201 virtual void matchRange(int c1, int c2)
00202 {
00203 int la_1 = LA(1);
00204
00205 if ( la_1 < c1 || la_1 > c2 )
00206 throw MismatchedCharException(la_1, c1, c2, false, this);
00207
00208 consume();
00209 }
00210
00211 virtual bool getCaseSensitive() const
00212 {
00213 return caseSensitive;
00214 }
00215
00216 virtual void setCaseSensitive(bool t)
00217 {
00218 caseSensitive = t;
00219 }
00220
00221 virtual bool getCaseSensitiveLiterals() const=0;
00222
00224 virtual int getLine() const
00225 {
00226 return inputState->line;
00227 }
00228
00230 virtual void setLine(int l)
00231 {
00232 inputState->line = l;
00233 }
00234
00236 virtual int getColumn() const
00237 {
00238 return inputState->column;
00239 }
00241 virtual void setColumn(int c)
00242 {
00243 inputState->column = c;
00244 }
00245
00247 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
00248 {
00249 return inputState->filename;
00250 }
00252 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
00253 {
00254 inputState->filename = f;
00255 }
00256
00257 virtual bool getCommitToPath() const
00258 {
00259 return commitToPath;
00260 }
00261
00262 virtual void setCommitToPath(bool commit)
00263 {
00264 commitToPath = commit;
00265 }
00266
00268 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
00269 {
00270 return text;
00271 }
00272
00273 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
00274 {
00275 text = s;
00276 }
00277
00278 virtual void resetText()
00279 {
00280 text = "";
00281 inputState->tokenStartColumn = inputState->column;
00282 inputState->tokenStartLine = inputState->line;
00283 }
00284
00285 virtual RefToken getTokenObject() const
00286 {
00287 return _returnToken;
00288 }
00289
00293 virtual void newline()
00294 {
00295 ++inputState->line;
00296 inputState->column = 1;
00297 }
00298
00302 virtual void tab()
00303 {
00304 int c = getColumn();
00305 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;
00306 setColumn( nc );
00307 }
00309 int setTabsize( int size )
00310 {
00311 int oldsize = tabsize;
00312 tabsize = size;
00313 return oldsize;
00314 }
00316 int getTabSize() const
00317 {
00318 return tabsize;
00319 }
00320
00322 void panic();
00324 void panic(const ANTLR_USE_NAMESPACE(std)string& s);
00325
00327 virtual void reportError(const RecognitionException& e);
00328
00330 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
00331
00333 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
00334
00335 virtual InputBuffer& getInputBuffer()
00336 {
00337 return inputState->getInput();
00338 }
00339
00340 virtual LexerSharedInputState getInputState()
00341 {
00342 return inputState;
00343 }
00344
00347 virtual void setInputState(LexerSharedInputState state)
00348 {
00349 inputState = state;
00350 }
00351
00353 virtual void setTokenObjectFactory(factory_type factory)
00354 {
00355 tokenFactory = factory;
00356 }
00357
00361 virtual int testLiteralsTable(int ttype) const
00362 {
00363 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
00364 if (i != literals.end())
00365 ttype = (*i).second;
00366 return ttype;
00367 }
00368
00374 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
00375 {
00376 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00377 if (i != literals.end())
00378 ttype = (*i).second;
00379 return ttype;
00380 }
00381
00383 virtual int toLower(int c) const
00384 {
00385
00386
00387
00388 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00389 }
00390
00406 virtual void uponEOF()
00407 {
00408 }
00409
00411 virtual void traceIndent();
00412 virtual void traceIn(const char* rname);
00413 virtual void traceOut(const char* rname);
00414
00415 #ifndef NO_STATIC_CONSTS
00416 static const int EOF_CHAR = EOF;
00417 #else
00418 enum {
00419 EOF_CHAR = EOF
00420 };
00421 #endif
00422 protected:
00423 ANTLR_USE_NAMESPACE(std)string text;
00424
00425 bool saveConsumedInput;
00426 factory_type tokenFactory;
00427 bool caseSensitive;
00428 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals;
00429
00430 RefToken _returnToken;
00431
00433 LexerSharedInputState inputState;
00434
00439 bool commitToPath;
00440
00441 int tabsize;
00442
00444 virtual RefToken makeToken(int t)
00445 {
00446 RefToken tok = tokenFactory();
00447 tok->setType(t);
00448 tok->setColumn(inputState->tokenStartColumn);
00449 tok->setLine(inputState->tokenStartLine);
00450 return tok;
00451 }
00452
00455 class Tracer {
00456 private:
00457 CharScanner* parser;
00458 const char* text;
00459
00460 Tracer(const Tracer& other);
00461 Tracer& operator=(const Tracer& other);
00462 public:
00463 Tracer( CharScanner* p,const char* t )
00464 : parser(p), text(t)
00465 {
00466 parser->traceIn(text);
00467 }
00468 ~Tracer()
00469 {
00470 parser->traceOut(text);
00471 }
00472 };
00473
00474 int traceDepth;
00475 private:
00476 CharScanner( const CharScanner& other );
00477 CharScanner& operator=( const CharScanner& other );
00478
00479 #ifndef NO_STATIC_CONSTS
00480 static const int NO_CHAR = 0;
00481 #else
00482 enum {
00483 NO_CHAR = 0
00484 };
00485 #endif
00486 };
00487
00488 inline int CharScanner::LA(int i)
00489 {
00490 int c = inputState->getInput().LA(i);
00491
00492 if ( caseSensitive )
00493 return c;
00494 else
00495 return toLower(c);
00496 }
00497
00498 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
00499 {
00500 if (scanner->getCaseSensitiveLiterals())
00501 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00502 else
00503 {
00504 #ifdef NO_STRCASECMP
00505 return (stricmp(x.c_str(),y.c_str())<0);
00506 #else
00507 return (strcasecmp(x.c_str(),y.c_str())<0);
00508 #endif
00509 }
00510 }
00511
00512 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00513 }
00514 #endif
00515
00516 #endif //INC_CharScanner_hpp__
This file is part of the documentation for KDevelop Version 3.1.2.