KDevelop API Documentation

CharScanner.hpp

Go to the documentation of this file.
00001 #ifndef INC_CharScanner_hpp__
00002 #define INC_CharScanner_hpp__
00003 
00004 /* ANTLR Translator Generator
00005  * Project led by Terence Parr at http://www.jGuru.com
00006  * Software rights: http://www.antlr.org/RIGHTS.html
00007  *
00008  * $Id: CharScanner.hpp,v 1.3 2003/11/21 17:31:01 aclu Exp $
00009  */
00010 
00011 #include <antlr/config.hpp>
00012 
00013 #include <map>
00014 
00015 #ifdef HAS_NOT_CCTYPE_H
00016 #include <ctype.h>
00017 #else
00018 #include <cctype>
00019 #endif
00020 
00021 #include <antlr/TokenStream.hpp>
00022 #include <antlr/RecognitionException.hpp>
00023 #include <antlr/MismatchedCharException.hpp>
00024 #include <antlr/InputBuffer.hpp>
00025 #include <antlr/BitSet.hpp>
00026 #include <antlr/LexerSharedInputState.hpp>
00027 
00028 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00029 namespace antlr {
00030 #endif
00031 
00032 class ANTLR_API CharScanner;
00033 
00034 ANTLR_C_USING(tolower)
00035 
00036 #ifdef ANTLR_REALLY_NO_STRCASECMP
00037 // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
00038 // on the mac has neither...
00039 inline int strcasecmp(const char *s1, const char *s2)
00040 {
00041     while (true)
00042     {
00043         char  c1 = tolower(*s1++),
00044                 c2 = tolower(*s2++);
00045         if (c1 < c2) return -1;
00046         if (c1 > c2) return 1;
00047         if (c1 == 0) return 0;
00048     }
00049 }
00050 #else
00051 #ifdef NO_STRCASECMP
00052 ANTLR_C_USING(stricmp)
00053 #else
00054 ANTLR_C_USING(strcasecmp)
00055 #endif
00056 #endif
00057 
00060 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
00061 private:
00062     const CharScanner* scanner;
00063 public:
00064 #ifdef NO_TEMPLATE_PARTS
00065     CharScannerLiteralsLess(); // not really used
00066 #endif
00067     CharScannerLiteralsLess(const CharScanner* theScanner)
00068     : scanner(theScanner)
00069     {
00070     }
00071     bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
00072 // defaults are good enough..
00073     //  CharScannerLiteralsLess(const CharScannerLiteralsLess&);
00074     //  CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
00075 };
00076 
00079 class ANTLR_API CharScanner : public TokenStream {
00080 protected:
00081     typedef RefToken (*factory_type)();
00082 public:
00083     CharScanner(InputBuffer& cb, bool case_sensitive );
00084     CharScanner(InputBuffer* cb, bool case_sensitive );
00085     CharScanner(const LexerSharedInputState& state, bool case_sensitive );
00086 
00087     virtual ~CharScanner()
00088     {
00089     }
00090 
00091     virtual int LA(int i);
00092 
00093     virtual void append(char c)
00094     {
00095         if (saveConsumedInput) {
00096             int l = text.length();
00097             if ((l%256) == 0)
00098                 text.reserve(l+256);
00099             text.replace(l,0,&c,1);
00100         }
00101     }
00102 
00103     virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
00104     {
00105         if (saveConsumedInput)
00106             text+=s;
00107     }
00108 
00109     virtual void commit()
00110     {
00111         inputState->getInput().commit();
00112     }
00113 
00114     virtual void consume();
00115 
00117     virtual void consumeUntil(int c)
00118     {
00119         for(;;)
00120         {
00121             int la_1 = LA(1);
00122             if( la_1 == EOF_CHAR || la_1 == c )
00123                 break;
00124             consume();
00125         }
00126     }
00127 
00129     virtual void consumeUntil(const BitSet& set)
00130     {
00131         for(;;)
00132         {
00133             int la_1 = LA(1);
00134             if( la_1 == EOF_CHAR || set.member(la_1) )
00135                 break;
00136             consume();
00137         }
00138     }
00139 
00141     virtual int mark()
00142     {
00143         return inputState->getInput().mark();
00144     }
00146     virtual void rewind(int pos)
00147     {
00148         inputState->getInput().rewind(pos);
00149     }
00150 
00152     virtual void match(int c)
00153     {
00154         int la_1 = LA(1);
00155         if ( la_1 != c )
00156             throw MismatchedCharException(la_1, c, false, this);
00157         consume();
00158     }
00159 
00163     virtual void match(const BitSet& b)
00164     {
00165         if (!b.member(LA(1))) {
00166             throw MismatchedCharException(LA(1),b,false,this);
00167         }
00168         consume();
00169     }
00170 
00172     virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
00173     {
00174         int len = s.length();
00175 
00176         for (int i = 0; i < len; i++)
00177         {
00178             int la_1 = LA(1);
00179 
00180             if ( la_1 != s[i] )
00181                 throw MismatchedCharException(la_1, s[i], false, this);
00182 
00183             consume();
00184         }
00185     }
00189     virtual void matchNot(int c)
00190     {
00191         int la_1 = LA(1);
00192 
00193         if ( la_1 == c )
00194             throw MismatchedCharException(la_1, c, true, this);
00195 
00196         consume();
00197     }
00201     virtual void matchRange(int c1, int c2)
00202     {
00203         int la_1 = LA(1);
00204 
00205         if ( la_1 < c1 || la_1 > c2 )
00206             throw MismatchedCharException(la_1, c1, c2, false, this);
00207 
00208         consume();
00209     }
00210 
00211     virtual bool getCaseSensitive() const
00212     {
00213         return caseSensitive;
00214     }
00215 
00216     virtual void setCaseSensitive(bool t)
00217     {
00218         caseSensitive = t;
00219     }
00220 
00221     virtual bool getCaseSensitiveLiterals() const=0;
00222 
00224     virtual int getLine() const
00225     {
00226         return inputState->line;
00227     }
00228 
00230     virtual void setLine(int l)
00231     {
00232         inputState->line = l;
00233     }
00234 
00236     virtual int getColumn() const
00237     {
00238         return inputState->column;
00239     }
00241     virtual void setColumn(int c)
00242     {
00243         inputState->column = c;
00244     }
00245 
00247     virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
00248     {
00249         return inputState->filename;
00250     }
00252     virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
00253     {
00254         inputState->filename = f;
00255     }
00256 
00257     virtual bool getCommitToPath() const
00258     {
00259         return commitToPath;
00260     }
00261 
00262     virtual void setCommitToPath(bool commit)
00263     {
00264         commitToPath = commit;
00265     }
00266 
00268     virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
00269     {
00270         return text;
00271     }
00272 
00273     virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
00274     {
00275         text = s;
00276     }
00277 
00278     virtual void resetText()
00279     {
00280         text = "";
00281         inputState->tokenStartColumn = inputState->column;
00282         inputState->tokenStartLine = inputState->line;
00283     }
00284 
00285     virtual RefToken getTokenObject() const
00286     {
00287         return _returnToken;
00288     }
00289 
00293     virtual void newline()
00294     {
00295         ++inputState->line;
00296         inputState->column = 1;
00297     }
00298 
00302     virtual void tab()
00303     {
00304         int c = getColumn();
00305         int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
00306         setColumn( nc );
00307     }
00309     int setTabsize( int size )
00310     {
00311         int oldsize = tabsize;
00312         tabsize = size;
00313         return oldsize;
00314     }
00316     int getTabSize() const
00317     {
00318         return tabsize;
00319     }
00320 
00322     void panic();
00324     void panic(const ANTLR_USE_NAMESPACE(std)string& s);
00325 
00327     virtual void reportError(const RecognitionException& e);
00328 
00330     virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
00331 
00333     virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
00334 
00335     virtual InputBuffer& getInputBuffer()
00336     {
00337         return inputState->getInput();
00338     }
00339 
00340     virtual LexerSharedInputState getInputState()
00341     {
00342         return inputState;
00343     }
00344 
00347     virtual void setInputState(LexerSharedInputState state)
00348     {
00349         inputState = state;
00350     }
00351 
00353     virtual void setTokenObjectFactory(factory_type factory)
00354     {
00355         tokenFactory = factory;
00356     }
00357 
00361     virtual int testLiteralsTable(int ttype) const
00362     {
00363         ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
00364         if (i != literals.end())
00365             ttype = (*i).second;
00366         return ttype;
00367     }
00368 
00374     virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
00375     {
00376         ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
00377         if (i != literals.end())
00378             ttype = (*i).second;
00379         return ttype;
00380     }
00381 
00383     virtual int toLower(int c) const
00384     {
00385         // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
00386         // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
00387         // this one is more structural. Maybe make this configurable.
00388         return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
00389     }
00390 
00406     virtual void uponEOF()
00407     {
00408     }
00409 
00411     virtual void traceIndent();
00412     virtual void traceIn(const char* rname);
00413     virtual void traceOut(const char* rname);
00414 
00415 #ifndef NO_STATIC_CONSTS
00416     static const int EOF_CHAR = EOF;
00417 #else
00418     enum {
00419         EOF_CHAR = EOF
00420     };
00421 #endif
00422 protected:
00423     ANTLR_USE_NAMESPACE(std)string text; 
00424 
00425     bool saveConsumedInput;
00426     factory_type tokenFactory;              
00427     bool caseSensitive;                         
00428     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
00429 
00430     RefToken _returnToken;      
00431 
00433     LexerSharedInputState inputState;
00434 
00439     bool commitToPath;
00440 
00441     int tabsize;    
00442 
00444     virtual RefToken makeToken(int t)
00445     {
00446         RefToken tok = tokenFactory();
00447         tok->setType(t);
00448         tok->setColumn(inputState->tokenStartColumn);
00449         tok->setLine(inputState->tokenStartLine);
00450         return tok;
00451     }
00452 
00455     class Tracer {
00456     private:
00457         CharScanner* parser;
00458         const char* text;
00459 
00460         Tracer(const Tracer& other);                    // undefined
00461         Tracer& operator=(const Tracer& other);     // undefined
00462     public:
00463         Tracer( CharScanner* p,const char* t )
00464         : parser(p), text(t)
00465         {
00466             parser->traceIn(text);
00467         }
00468         ~Tracer()
00469         {
00470             parser->traceOut(text);
00471         }
00472     };
00473 
00474     int traceDepth;
00475 private:
00476     CharScanner( const CharScanner& other );                    // undefined
00477     CharScanner& operator=( const CharScanner& other ); // undefined
00478 
00479 #ifndef NO_STATIC_CONSTS
00480     static const int NO_CHAR = 0;
00481 #else
00482     enum {
00483         NO_CHAR = 0
00484     };
00485 #endif
00486 };
00487 
00488 inline int CharScanner::LA(int i)
00489 {
00490     int c = inputState->getInput().LA(i);
00491 
00492     if ( caseSensitive )
00493         return c;
00494     else
00495         return toLower(c);  // VC 6 tolower bug caught in toLower.
00496 }
00497 
00498 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
00499 {
00500     if (scanner->getCaseSensitiveLiterals())
00501         return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
00502     else
00503     {
00504 #ifdef NO_STRCASECMP
00505         return (stricmp(x.c_str(),y.c_str())<0);
00506 #else
00507         return (strcasecmp(x.c_str(),y.c_str())<0);
00508 #endif
00509     }
00510 }
00511 
00512 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
00513 }
00514 #endif
00515 
00516 #endif //INC_CharScanner_hpp__
KDE Logo
This file is part of the documentation for KDevelop Version 3.1.2.
Documentation copyright © 1996-2004 the KDE developers.
Generated on Tue Feb 22 09:22:34 2005 by doxygen 1.3.9.1 written by Dimitri van Heesch, © 1997-2003