00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef _KJSLEXER_H_
00024 #define _KJSLEXER_H_
00025
00026 #include "ustring.h"
00027
00028
00029 namespace KJS {
00030
00031 class Identifier;
00032
00033 class RegExp;
00034
00035 class Lexer {
00036 public:
00037 Lexer();
00038 ~Lexer();
00039 static Lexer *curr();
00040
00041 void setCode(const UChar *c, unsigned int len);
00042 int lex();
00043
00044 int lineNo() const { return yylineno + 1; }
00045
00046 bool prevTerminator() const { return terminator; }
00047
00048 enum State { Start,
00049 IdentifierOrKeyword,
00050 Identifier,
00051 InIdentifierOrKeyword,
00052 InIdentifier,
00053 InIdentifierUnicodeEscapeStart,
00054 InIdentifierUnicodeEscape,
00055 InSingleLineComment,
00056 InMultiLineComment,
00057 InNum,
00058 InNum0,
00059 InHex,
00060 InOctal,
00061 InDecimal,
00062 InExponentIndicator,
00063 InExponent,
00064 Hex,
00065 Octal,
00066 Number,
00067 String,
00068 Eof,
00069 InString,
00070 InEscapeSequence,
00071 InHexEscape,
00072 InUnicodeEscape,
00073 Other,
00074 Bad };
00075
00076 bool scanRegExp();
00077 UString pattern, flags;
00078 bool hadError() const { return foundBad; }
00079
00080 static bool isWhiteSpace(unsigned short c);
00081 static bool isIdentLetter(unsigned short c);
00082 static bool isDecimalDigit(unsigned short c);
00083 static bool isHexDigit(unsigned short c);
00084 static bool isOctalDigit(unsigned short c);
00085
00086 private:
00087 int yylineno;
00088 bool done;
00089 char *buffer8;
00090 UChar *buffer16;
00091 unsigned int size8, size16;
00092 unsigned int pos8, pos16;
00093 bool terminator;
00094 bool restrKeyword;
00095
00096 bool delimited;
00097 bool skipLF;
00098 bool skipCR;
00099 bool convertNextIdentifier;
00100 int stackToken;
00101 int lastToken;
00102 bool foundBad;
00103
00104 State state;
00105 void setDone(State s);
00106 unsigned int pos;
00107 void shift(unsigned int p);
00108 void nextLine();
00109 int lookupKeyword(const char *);
00110
00111 int matchPunctuator(unsigned short c1, unsigned short c2,
00112 unsigned short c3, unsigned short c4);
00113 unsigned short singleEscape(unsigned short c) const;
00114 unsigned short convertOctal(unsigned short c1, unsigned short c2,
00115 unsigned short c3) const;
00116 public:
00117 static unsigned char convertHex(unsigned short c1);
00118 static unsigned char convertHex(unsigned short c1, unsigned short c2);
00119 static UChar convertUnicode(unsigned short c1, unsigned short c2,
00120 unsigned short c3, unsigned short c4);
00121
00122 #ifdef KJS_DEBUG_MEM
00123
00126 static void globalClear();
00127 #endif
00128
00129 void doneParsing();
00130
00131 private:
00132
00133 void record8(unsigned short c);
00134 void record16(int c);
00135 void record16(UChar c);
00136
00137 KJS::Identifier *makeIdentifier(UChar *buffer, unsigned int pos);
00138 UString *makeUString(UChar *buffer, unsigned int pos);
00139
00140 const UChar *code;
00141 unsigned int length;
00142 int yycolumn;
00143 #ifndef KJS_PURE_ECMA
00144 int bol;
00145 #endif
00146
00147
00148 int current, next1, next2, next3;
00149
00150 UString **strings;
00151 unsigned int numStrings;
00152 unsigned int stringsCapacity;
00153
00154 KJS::Identifier **identifiers;
00155 unsigned int numIdentifiers;
00156 unsigned int identifiersCapacity;
00157
00158
00159 class LexerPrivate;
00160 LexerPrivate *priv;
00161 };
00162
00163 }
00164
00165 #endif