1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Tokenize DNS master file format"""
17
18 import cStringIO
19 import sys
20
21 import dns.exception
22 import dns.name
23 import dns.ttl
24
25 _DELIMITERS = {
26 ' ' : True,
27 '\t' : True,
28 '\n' : True,
29 ';' : True,
30 '(' : True,
31 ')' : True,
32 '"' : True }
33
34 _QUOTING_DELIMITERS = { '"' : True }
35
36 EOF = 0
37 EOL = 1
38 WHITESPACE = 2
39 IDENTIFIER = 3
40 QUOTED_STRING = 4
41 COMMENT = 5
42 DELIMITER = 6
43
45 """Raised when an attempt is made to unget a token when the unget
46 buffer is full."""
47 pass
48
50 """A DNS master file format token.
51
52 @ivar ttype: The token type
53 @type ttype: int
54 @ivar value: The token value
55 @type value: string
56 @ivar has_escape: Does the token value contain escapes?
57 @type has_escape: bool
58 """
59
60 - def __init__(self, ttype, value='', has_escape=False):
61 """Initialize a token instance.
62
63 @param ttype: The token type
64 @type ttype: int
65 @param value: The token value
66 @type value: string
67 @param has_escape: Does the token value contain escapes?
68 @type has_escape: bool
69 """
70 self.ttype = ttype
71 self.value = value
72 self.has_escape = has_escape
73
75 return self.ttype == EOF
76
78 return self.ttype == EOL
79
82
85
88
91
94
96 return (self.ttype == EOL or self.ttype == EOF)
97
99 if not isinstance(other, Token):
100 return False
101 return (self.ttype == other.ttype and
102 self.value == other.value)
103
105 if not isinstance(other, Token):
106 return True
107 return (self.ttype != other.ttype or
108 self.value != other.value)
109
111 return '%d "%s"' % (self.ttype, self.value)
112
114 if not self.has_escape:
115 return self
116 unescaped = ''
117 l = len(self.value)
118 i = 0
119 while i < l:
120 c = self.value[i]
121 i += 1
122 if c == '\\':
123 if i >= l:
124 raise dns.exception.UnexpectedEnd
125 c = self.value[i]
126 i += 1
127 if c.isdigit():
128 if i >= l:
129 raise dns.exception.UnexpectedEnd
130 c2 = self.value[i]
131 i += 1
132 if i >= l:
133 raise dns.exception.UnexpectedEnd
134 c3 = self.value[i]
135 i += 1
136 if not (c2.isdigit() and c3.isdigit()):
137 raise dns.exception.SyntaxError
138 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
139 unescaped += c
140 return Token(self.ttype, unescaped)
141
142
143
146
148 return iter((self.ttype, self.value))
149
151 if i == 0:
152 return self.ttype
153 elif i == 1:
154 return self.value
155 else:
156 raise IndexError
157
159 """A DNS master file format tokenizer.
160
161 A token is a (type, value) tuple, where I{type} is an int, and
162 I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
163 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
164
165 @ivar file: The file to tokenize
166 @type file: file
167 @ivar ungotten_char: The most recently ungotten character, or None.
168 @type ungotten_char: string
169 @ivar ungotten_token: The most recently ungotten token, or None.
170 @type ungotten_token: (int, string) token tuple
171 @ivar multiline: The current multiline level. This value is increased
172 by one every time a '(' delimiter is read, and decreased by one every time
173 a ')' delimiter is read.
174 @type multiline: int
175 @ivar quoting: This variable is true if the tokenizer is currently
176 reading a quoted string.
177 @type quoting: bool
178 @ivar eof: This variable is true if the tokenizer has encountered EOF.
179 @type eof: bool
180 @ivar delimiters: The current delimiter dictionary.
181 @type delimiters: dict
182 @ivar line_number: The current line number
183 @type line_number: int
184 @ivar filename: A filename that will be returned by the L{where} method.
185 @type filename: string
186 """
187
188 - def __init__(self, f=sys.stdin, filename=None):
189 """Initialize a tokenizer instance.
190
191 @param f: The file to tokenize. The default is sys.stdin.
192 This parameter may also be a string, in which case the tokenizer
193 will take its input from the contents of the string.
194 @type f: file or string
195 @param filename: the name of the filename that the L{where} method
196 will return.
197 @type filename: string
198 """
199
200 if isinstance(f, str):
201 f = cStringIO.StringIO(f)
202 if filename is None:
203 filename = '<string>'
204 else:
205 if filename is None:
206 if f is sys.stdin:
207 filename = '<stdin>'
208 else:
209 filename = '<file>'
210 self.file = f
211 self.ungotten_char = None
212 self.ungotten_token = None
213 self.multiline = 0
214 self.quoting = False
215 self.eof = False
216 self.delimiters = _DELIMITERS
217 self.line_number = 1
218 self.filename = filename
219
221 """Read a character from input.
222 @rtype: string
223 """
224
225 if self.ungotten_char is None:
226 if self.eof:
227 c = ''
228 else:
229 c = self.file.read(1)
230 if c == '':
231 self.eof = True
232 elif c == '\n':
233 self.line_number += 1
234 else:
235 c = self.ungotten_char
236 self.ungotten_char = None
237 return c
238
240 """Return the current location in the input.
241
242 @rtype: (string, int) tuple. The first item is the filename of
243 the input, the second is the current line number.
244 """
245
246 return (self.filename, self.line_number)
247
249 """Unget a character.
250
251 The unget buffer for characters is only one character large; it is
252 an error to try to unget a character when the unget buffer is not
253 empty.
254
255 @param c: the character to unget
256 @type c: string
257 @raises UngetBufferFull: there is already an ungotten char
258 """
259
260 if not self.ungotten_char is None:
261 raise UngetBufferFull
262 self.ungotten_char = c
263
265 """Consume input until a non-whitespace character is encountered.
266
267 The non-whitespace character is then ungotten, and the number of
268 whitespace characters consumed is returned.
269
270 If the tokenizer is in multiline mode, then newlines are whitespace.
271
272 @rtype: int
273 """
274
275 skipped = 0
276 while True:
277 c = self._get_char()
278 if c != ' ' and c != '\t':
279 if (c != '\n') or not self.multiline:
280 self._unget_char(c)
281 return skipped
282 skipped += 1
283
284 - def get(self, want_leading = False, want_comment = False):
285 """Get the next token.
286
287 @param want_leading: If True, return a WHITESPACE token if the
288 first character read is whitespace. The default is False.
289 @type want_leading: bool
290 @param want_comment: If True, return a COMMENT token if the
291 first token read is a comment. The default is False.
292 @type want_comment: bool
293 @rtype: Token object
294 @raises dns.exception.UnexpectedEnd: input ended prematurely
295 @raises dns.exception.SyntaxError: input was badly formed
296 """
297
298 if not self.ungotten_token is None:
299 token = self.ungotten_token
300 self.ungotten_token = None
301 if token.is_whitespace():
302 if want_leading:
303 return token
304 elif token.is_comment():
305 if want_comment:
306 return token
307 else:
308 return token
309 skipped = self.skip_whitespace()
310 if want_leading and skipped > 0:
311 return Token(WHITESPACE, ' ')
312 token = ''
313 ttype = IDENTIFIER
314 has_escape = False
315 while True:
316 c = self._get_char()
317 if c == '' or c in self.delimiters:
318 if c == '' and self.quoting:
319 raise dns.exception.UnexpectedEnd
320 if token == '' and ttype != QUOTED_STRING:
321 if c == '(':
322 self.multiline += 1
323 self.skip_whitespace()
324 continue
325 elif c == ')':
326 if not self.multiline > 0:
327 raise dns.exception.SyntaxError
328 self.multiline -= 1
329 self.skip_whitespace()
330 continue
331 elif c == '"':
332 if not self.quoting:
333 self.quoting = True
334 self.delimiters = _QUOTING_DELIMITERS
335 ttype = QUOTED_STRING
336 continue
337 else:
338 self.quoting = False
339 self.delimiters = _DELIMITERS
340 self.skip_whitespace()
341 continue
342 elif c == '\n':
343 return Token(EOL, '\n')
344 elif c == ';':
345 while 1:
346 c = self._get_char()
347 if c == '\n' or c == '':
348 break
349 token += c
350 if want_comment:
351 self._unget_char(c)
352 return Token(COMMENT, token)
353 elif c == '':
354 if self.multiline:
355 raise dns.exception.SyntaxError('unbalanced parentheses')
356 return Token(EOF)
357 elif self.multiline:
358 self.skip_whitespace()
359 token = ''
360 continue
361 else:
362 return Token(EOL, '\n')
363 else:
364
365
366
367 token = c
368 ttype = DELIMITER
369 else:
370 self._unget_char(c)
371 break
372 elif self.quoting:
373 if c == '\\':
374 c = self._get_char()
375 if c == '':
376 raise dns.exception.UnexpectedEnd
377 if c.isdigit():
378 c2 = self._get_char()
379 if c2 == '':
380 raise dns.exception.UnexpectedEnd
381 c3 = self._get_char()
382 if c == '':
383 raise dns.exception.UnexpectedEnd
384 if not (c2.isdigit() and c3.isdigit()):
385 raise dns.exception.SyntaxError
386 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
387 elif c == '\n':
388 raise dns.exception.SyntaxError('newline in quoted string')
389 elif c == '\\':
390
391
392
393
394 token += c
395 has_escape = True
396 c = self._get_char()
397 if c == '' or c == '\n':
398 raise dns.exception.UnexpectedEnd
399 token += c
400 if token == '' and ttype != QUOTED_STRING:
401 if self.multiline:
402 raise dns.exception.SyntaxError('unbalanced parentheses')
403 ttype = EOF
404 return Token(ttype, token, has_escape)
405
407 """Unget a token.
408
409 The unget buffer for tokens is only one token large; it is
410 an error to try to unget a token when the unget buffer is not
411 empty.
412
413 @param token: the token to unget
414 @type token: Token object
415 @raises UngetBufferFull: there is already an ungotten token
416 """
417
418 if not self.ungotten_token is None:
419 raise UngetBufferFull
420 self.ungotten_token = token
421
423 """Return the next item in an iteration.
424 @rtype: (int, string)
425 """
426
427 token = self.get()
428 if token.is_eof():
429 raise StopIteration
430 return token
431
434
435
436
438 """Read the next token and interpret it as an integer.
439
440 @raises dns.exception.SyntaxError:
441 @rtype: int
442 """
443
444 token = self.get().unescape()
445 if not token.is_identifier():
446 raise dns.exception.SyntaxError('expecting an identifier')
447 if not token.value.isdigit():
448 raise dns.exception.SyntaxError('expecting an integer')
449 return int(token.value)
450
452 """Read the next token and interpret it as an 8-bit unsigned
453 integer.
454
455 @raises dns.exception.SyntaxError:
456 @rtype: int
457 """
458
459 value = self.get_int()
460 if value < 0 or value > 255:
461 raise dns.exception.SyntaxError('%d is not an unsigned 8-bit integer' % value)
462 return value
463
465 """Read the next token and interpret it as a 16-bit unsigned
466 integer.
467
468 @raises dns.exception.SyntaxError:
469 @rtype: int
470 """
471
472 value = self.get_int()
473 if value < 0 or value > 65535:
474 raise dns.exception.SyntaxError('%d is not an unsigned 16-bit integer' % value)
475 return value
476
478 """Read the next token and interpret it as a 32-bit unsigned
479 integer.
480
481 @raises dns.exception.SyntaxError:
482 @rtype: int
483 """
484
485 token = self.get().unescape()
486 if not token.is_identifier():
487 raise dns.exception.SyntaxError('expecting an identifier')
488 if not token.value.isdigit():
489 raise dns.exception.SyntaxError('expecting an integer')
490 value = long(token.value)
491 if value < 0 or value > 4294967296L:
492 raise dns.exception.SyntaxError('%d is not an unsigned 32-bit integer' % value)
493 return value
494
506
508 """Read the next token and raise an exception if it is not an identifier.
509
510 @raises dns.exception.SyntaxError:
511 @rtype: string
512 """
513
514 token = self.get().unescape()
515 if not token.is_identifier():
516 raise dns.exception.SyntaxError('expecting an identifier')
517 return token.value
518
520 """Read the next token and interpret it as a DNS name.
521
522 @raises dns.exception.SyntaxError:
523 @rtype: dns.name.Name object"""
524
525 token = self.get()
526 if not token.is_identifier():
527 raise dns.exception.SyntaxError('expecting an identifier')
528 return dns.name.from_text(token.value, origin)
529
531 """Read the next token and raise an exception if it isn't EOL or
532 EOF.
533
534 @raises dns.exception.SyntaxError:
535 @rtype: string
536 """
537
538 token = self.get()
539 if not token.is_eol_or_eof():
540 raise dns.exception.SyntaxError('expected EOL or EOF, got %d "%s"' % (token.ttype, token.value))
541 return token.value
542
548