00001
#ifndef INC_CharScanner_hpp__
00002
#define INC_CharScanner_hpp__
00003
00004
00005
00006
00007
00008
00009
00010
00011
#include <antlr/config.hpp>
00012
00013
#include <map>
00014
00015
#ifdef HAS_NOT_CCTYPE_H
00016
#include <ctype.h>
00017
#else
00018
#include <cctype>
00019
#endif
00020
00021
#include <antlr/TokenStream.hpp>
00022
#include <antlr/RecognitionException.hpp>
00023
#include <antlr/MismatchedCharException.hpp>
00024
#include <antlr/InputBuffer.hpp>
00025
#include <antlr/BitSet.hpp>
00026
#include <antlr/LexerSharedInputState.hpp>
00027
00028
#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00029
namespace antlr {
00030
#endif
00031
00032 class ANTLR_API CharScanner;
00033
00034
ANTLR_C_USING(tolower)
00035
00036 #ifdef ANTLR_REALLY_NO_STRCASECMP
00037
00038
00039
inline int strcasecmp(
const char *s1,
const char *s2)
00040 {
00041
while (
true)
00042 {
00043
char c1 = tolower(*s1++),
00044 c2 = tolower(*s2++);
00045
if (c1 < c2)
return -1;
00046
if (c1 > c2)
return 1;
00047
if (c1 == 0)
return 0;
00048 }
00049 }
00050
#else
00051
#ifdef NO_STRCASECMP
00052
ANTLR_C_USING(stricmp)
00053 #
else
00054
ANTLR_C_USING(strcasecmp)
00055 #endif
00056
#endif
00057
00060 class ANTLR_API CharScannerLiteralsLess :
public ANTLR_USE_NAMESPACE(std)
binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00061
private:
00062 const CharScanner* scanner;
00063
public:
00064
#ifdef NO_TEMPLATE_PARTS
00065
CharScannerLiteralsLess();
00066
#endif
00067 CharScannerLiteralsLess(
const CharScanner* theScanner)
00068 : scanner(theScanner)
00069 {
00070 }
00071
bool operator() (
const ANTLR_USE_NAMESPACE(std)string& x,
const ANTLR_USE_NAMESPACE(std)string& y)
const;
00072
00073
00074
00075 };
00076
00079 class ANTLR_API CharScanner :
public TokenStream {
00080
protected:
00081 typedef RefToken (*
factory_type)();
00082
public:
00083
CharScanner(
InputBuffer& cb,
bool case_sensitive );
00084
CharScanner(
InputBuffer* cb,
bool case_sensitive );
00085
CharScanner(
const LexerSharedInputState& state,
bool case_sensitive );
00086
00087 virtual ~
CharScanner()
00088 {
00089 }
00090
00091
virtual int LA(
int i);
00092
00093 virtual void append(
char c)
00094 {
00095
if (saveConsumedInput) {
00096
int l =
text.length();
00097
if ((l%256) == 0)
00098
text.reserve(l+256);
00099
text.replace(l,0,&c,1);
00100 }
00101 }
00102
00103 virtual void append(
const ANTLR_USE_NAMESPACE(std)string& s)
00104 {
00105
if (saveConsumedInput)
00106
text+=s;
00107 }
00108
00109 virtual void commit()
00110 {
00111 inputState->getInput().commit();
00112 }
00113
00114
virtual void consume();
00115
00117 virtual void consumeUntil(
int c)
00118 {
00119
for(;;)
00120 {
00121
int la_1 = LA(1);
00122
if( la_1 == EOF_CHAR || la_1 == c )
00123
break;
00124 consume();
00125 }
00126 }
00127
00129 virtual void consumeUntil(
const BitSet& set)
00130 {
00131
for(;;)
00132 {
00133
int la_1 = LA(1);
00134
if( la_1 == EOF_CHAR || set.
member(la_1) )
00135
break;
00136 consume();
00137 }
00138 }
00139
00141 virtual int mark()
00142 {
00143
return inputState->getInput().mark();
00144 }
00146 virtual void rewind(
int pos)
00147 {
00148 inputState->getInput().rewind(pos);
00149 }
00150
00152 virtual void match(
int c)
00153 {
00154
int la_1 = LA(1);
00155
if ( la_1 != c )
00156
throw MismatchedCharException(la_1, c,
false,
this);
00157 consume();
00158 }
00159
00163 virtual void match(
const BitSet& b)
00164 {
00165
if (!b.
member(LA(1))) {
00166
throw MismatchedCharException(LA(1),b,
false,
this);
00167 }
00168 consume();
00169 }
00170
00172 virtual void match(
const ANTLR_USE_NAMESPACE(std)string& s)
00173 {
00174
int len = s.length();
00175
00176
for (
int i = 0; i <
len; i++)
00177 {
00178
int la_1 = LA(1);
00179
00180
if ( la_1 != s[i] )
00181
throw MismatchedCharException(la_1, s[i],
false,
this);
00182
00183 consume();
00184 }
00185 }
00189 virtual void matchNot(
int c)
00190 {
00191
int la_1 = LA(1);
00192
00193
if ( la_1 == c )
00194
throw MismatchedCharException(la_1, c,
true,
this);
00195
00196 consume();
00197 }
00201 virtual void matchRange(
int c1,
int c2)
00202 {
00203
int la_1 = LA(1);
00204
00205
if ( la_1 < c1 || la_1 > c2 )
00206
throw MismatchedCharException(la_1, c1, c2,
false,
this);
00207
00208 consume();
00209 }
00210
00211 virtual bool getCaseSensitive()
const
00212
{
00213
return caseSensitive;
00214 }
00215
00216 virtual void setCaseSensitive(
bool t)
00217 {
00218 caseSensitive = t;
00219 }
00220
00221
virtual bool getCaseSensitiveLiterals() const=0;
00222
00224 virtual
int getLine()
const
00225
{
00226
return inputState->line;
00227 }
00228
00230 virtual void setLine(
int l)
00231 {
00232 inputState->line = l;
00233 }
00234
00236 virtual int getColumn()
const
00237
{
00238
return inputState->column;
00239 }
00241 virtual void setColumn(
int c)
00242 {
00243 inputState->column = c;
00244 }
00245
00247 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename()
const
00248
{
00249
return inputState->filename;
00250 }
00252 virtual void setFilename(
const ANTLR_USE_NAMESPACE(std)string& f)
00253 {
00254 inputState->filename = f;
00255 }
00256
00257 virtual bool getCommitToPath()
const
00258
{
00259
return commitToPath;
00260 }
00261
00262 virtual void setCommitToPath(
bool commit)
00263 {
00264 commitToPath = commit;
00265 }
00266
00268 virtual const ANTLR_USE_NAMESPACE(std)string& getText()
const
00269
{
00270
return text;
00271 }
00272
00273 virtual void setText(
const ANTLR_USE_NAMESPACE(std)string& s)
00274 {
00275
text = s;
00276 }
00277
00278 virtual void resetText()
00279 {
00280
text =
"";
00281 inputState->tokenStartColumn = inputState->column;
00282 inputState->tokenStartLine = inputState->line;
00283 }
00284
00285 virtual RefToken getTokenObject()
const
00286
{
00287
return _returnToken;
00288 }
00289
00293 virtual void newline()
00294 {
00295 ++inputState->line;
00296 inputState->column = 1;
00297 }
00298
00302 virtual void tab()
00303 {
00304
int c = getColumn();
00305
int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;
00306 setColumn( nc );
00307 }
00309 int setTabsize(
int size )
00310 {
00311
int oldsize = tabsize;
00312 tabsize =
size;
00313
return oldsize;
00314 }
00316 int getTabSize()
const
00317
{
00318
return tabsize;
00319 }
00320
00322
void panic();
00324
void panic(
const ANTLR_USE_NAMESPACE(std)string& s);
00325
00327
virtual void reportError(
const RecognitionException& e);
00328
00330
virtual void reportError(
const ANTLR_USE_NAMESPACE(std)string& s);
00331
00333
virtual void reportWarning(
const ANTLR_USE_NAMESPACE(std)string& s);
00334
00335 virtual InputBuffer& getInputBuffer()
00336 {
00337
return inputState->getInput();
00338 }
00339
00340 virtual LexerSharedInputState getInputState()
00341 {
00342
return inputState;
00343 }
00344
00347 virtual void setInputState(
LexerSharedInputState state)
00348 {
00349 inputState = state;
00350 }
00351
00353 virtual void setTokenObjectFactory(factory_type factory)
00354 {
00355 tokenFactory = factory;
00356 }
00357
00361 virtual int testLiteralsTable(
int ttype)
const
00362
{
00363
ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(
text);
00364
if (i != literals.end())
00365 ttype = (*i).second;
00366
return ttype;
00367 }
00368
00374 virtual int testLiteralsTable(
const ANTLR_USE_NAMESPACE(std)string& txt,
int ttype)
const
00375
{
00376
ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00377
if (i != literals.end())
00378 ttype = (*i).second;
00379
return ttype;
00380 }
00381
00383 virtual int toLower(
int c)
const
00384
{
00385
00386
00387
00388
return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00389 }
00390
00406 virtual void uponEOF()
00407 {
00408 }
00409
00411
virtual void traceIndent();
00412
virtual void traceIn(
const char* rname);
00413
virtual void traceOut(
const char* rname);
00414
00415
#ifndef NO_STATIC_CONSTS
00416
static const int EOF_CHAR = EOF;
00417
#else
00418
enum {
00419 EOF_CHAR = EOF
00420 };
00421
#endif
00422
protected:
00423 ANTLR_USE_NAMESPACE(std)string text;
00424
00425 bool saveConsumedInput;
00426 factory_type tokenFactory;
00427 bool caseSensitive;
00428 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,
int,CharScannerLiteralsLess> literals;
00429
00430 RefToken _returnToken;
00431
00433 LexerSharedInputState inputState;
00434
00439 bool commitToPath;
00440
00441 int tabsize;
00442
00444 virtual
RefToken makeToken(
int t)
00445 {
00446
RefToken tok = tokenFactory();
00447 tok->setType(t);
00448 tok->setColumn(inputState->tokenStartColumn);
00449 tok->setLine(inputState->tokenStartLine);
00450
return tok;
00451 }
00452
00455 class Tracer {
00456
private:
00457 CharScanner* parser;
00458 const char*
text;
00459
00460
Tracer(
const Tracer& other);
00461
Tracer& operator=(
const Tracer& other);
00462
public:
00463 Tracer( CharScanner* p,
const char* t )
00464 : parser(p),
text(t)
00465 {
00466 parser->traceIn(
text);
00467 }
00468 ~
Tracer()
00469 {
00470 parser->traceOut(
text);
00471 }
00472 };
00473
00474 int traceDepth;
00475
private:
00476
CharScanner(
const CharScanner& other );
00477
CharScanner& operator=(
const CharScanner& other );
00478
00479
#ifndef NO_STATIC_CONSTS
00480
static const int NO_CHAR = 0;
00481
#else
00482
enum {
00483 NO_CHAR = 0
00484 };
00485
#endif
00486
};
00487
00488 inline int CharScanner::LA(
int i)
00489 {
00490
int c =
inputState->getInput().LA(i);
00491
00492
if (
caseSensitive )
00493
return c;
00494
else
00495
return toLower(c);
00496 }
00497
00498 inline bool CharScannerLiteralsLess::operator() (
const ANTLR_USE_NAMESPACE(std)string& x,
const ANTLR_USE_NAMESPACE(std)string& y)
const
00499
{
00500
if (
scanner->
getCaseSensitiveLiterals())
00501
return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00502
else
00503 {
00504
#ifdef NO_STRCASECMP
00505
return (stricmp(x.c_str(),y.c_str())<0);
00506
#else
00507
return (strcasecmp(x.c_str(),y.c_str())<0);
00508
#endif
00509
}
00510 }
00511
00512
#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00513
}
00514
#endif
00515
00516
#endif //INC_CharScanner_hpp__