1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """Tokenize DNS master file format"""
17
18 import io
19 import sys
20
21 import dns.exception
22 import dns.name
23 import dns.ttl
24
25 _DELIMITERS = frozenset(' \t\n;()"')
26 _QUOTING_DELIMITERS = frozenset('"')
27
28 EOF = 0
29 EOL = 1
30 WHITESPACE = 2
31 IDENTIFIER = 3
32 QUOTED_STRING = 4
33 COMMENT = 5
34 DELIMITER = 6
35
37 """Raised when an attempt is made to unget a token when the unget
38 buffer is full."""
39 pass
40
42 """A DNS master file format token.
43
44 @ivar ttype: The token type
45 @type ttype: int
46 @ivar value: The token value
47 @type value: string
48 @ivar has_escape: Does the token value contain escapes?
49 @type has_escape: bool
50 """
51
52 - def __init__(self, ttype, value='', has_escape=False):
53 """Initialize a token instance.
54
55 @param ttype: The token type
56 @type ttype: int
57 @param value: The token value
58 @type value: string
59 @param has_escape: Does the token value contain escapes?
60 @type has_escape: bool
61 """
62 self.ttype = ttype
63 self.value = value
64 self.has_escape = has_escape
65
67 return self.ttype == EOF
68
70 return self.ttype == EOL
71
74
77
80
83
86
88 return (self.ttype == EOL or self.ttype == EOF)
89
91 if not isinstance(other, Token):
92 return False
93 return (self.ttype == other.ttype and
94 self.value == other.value)
95
97 if not isinstance(other, Token):
98 return True
99 return (self.ttype != other.ttype or
100 self.value != other.value)
101
103 return '%d "%s"' % (self.ttype, self.value)
104
106 if not self.has_escape:
107 return self
108 unescaped = ''
109 l = len(self.value)
110 i = 0
111 while i < l:
112 c = self.value[i]
113 i += 1
114 if c == '\\':
115 if i >= l:
116 raise dns.exception.UnexpectedEnd
117 c = self.value[i]
118 i += 1
119 if c.isdigit():
120 if i >= l:
121 raise dns.exception.UnexpectedEnd
122 c2 = self.value[i]
123 i += 1
124 if i >= l:
125 raise dns.exception.UnexpectedEnd
126 c3 = self.value[i]
127 i += 1
128 if not (c2.isdigit() and c3.isdigit()):
129 raise dns.exception.SyntaxError
130 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
131 unescaped += c
132 return Token(self.ttype, unescaped)
133
134
135
138
140 return iter((self.ttype, self.value))
141
143 if i == 0:
144 return self.ttype
145 elif i == 1:
146 return self.value
147 else:
148 raise IndexError
149
151 """A DNS master file format tokenizer.
152
153 A token is a (type, value) tuple, where I{type} is an int, and
154 I{value} is a string. The valid types are EOF, EOL, WHITESPACE,
155 IDENTIFIER, QUOTED_STRING, COMMENT, and DELIMITER.
156
157 @ivar file: The file to tokenize
158 @type file: file
159 @ivar ungotten_char: The most recently ungotten character, or None.
160 @type ungotten_char: string
161 @ivar ungotten_token: The most recently ungotten token, or None.
162 @type ungotten_token: (int, string) token tuple
163 @ivar multiline: The current multiline level. This value is increased
164 by one every time a '(' delimiter is read, and decreased by one every time
165 a ')' delimiter is read.
166 @type multiline: int
167 @ivar quoting: This variable is true if the tokenizer is currently
168 reading a quoted string.
169 @type quoting: bool
170 @ivar eof: This variable is true if the tokenizer has encountered EOF.
171 @type eof: bool
172 @ivar delimiters: The current delimiter dictionary.
173 @type delimiters: dict
174 @ivar line_number: The current line number
175 @type line_number: int
176 @ivar filename: A filename that will be returned by the L{where} method.
177 @type filename: string
178 """
179
180 - def __init__(self, f=sys.stdin, filename=None):
181 """Initialize a tokenizer instance.
182
183 @param f: The file to tokenize. The default is sys.stdin.
184 This parameter may also be a string, in which case the tokenizer
185 will take its input from the contents of the string.
186 @type f: file or string
187 @param filename: the name of the filename that the L{where} method
188 will return.
189 @type filename: string
190 """
191
192 if isinstance(f, str):
193 f = io.StringIO(f)
194 if filename is None:
195 filename = '<string>'
196 else:
197 if filename is None:
198 if f is sys.stdin:
199 filename = '<stdin>'
200 else:
201 filename = '<file>'
202 self.file = f
203 self.ungotten_char = None
204 self.ungotten_token = None
205 self.multiline = 0
206 self.quoting = False
207 self.eof = False
208 self.delimiters = _DELIMITERS
209 self.line_number = 1
210 self.filename = filename
211
213 """Read a character from input.
214 @rtype: string
215 """
216
217 if self.ungotten_char is None:
218 if self.eof:
219 c = ''
220 else:
221 c = self.file.read(1)
222 if c == '':
223 self.eof = True
224 elif c == '\n':
225 self.line_number += 1
226 else:
227 c = self.ungotten_char
228 self.ungotten_char = None
229 return c
230
232 """Return the current location in the input.
233
234 @rtype: (string, int) tuple. The first item is the filename of
235 the input, the second is the current line number.
236 """
237
238 return (self.filename, self.line_number)
239
241 """Unget a character.
242
243 The unget buffer for characters is only one character large; it is
244 an error to try to unget a character when the unget buffer is not
245 empty.
246
247 @param c: the character to unget
248 @type c: string
249 @raises UngetBufferFull: there is already an ungotten char
250 """
251
252 if not self.ungotten_char is None:
253 raise UngetBufferFull
254 self.ungotten_char = c
255
257 """Consume input until a non-whitespace character is encountered.
258
259 The non-whitespace character is then ungotten, and the number of
260 whitespace characters consumed is returned.
261
262 If the tokenizer is in multiline mode, then newlines are whitespace.
263
264 @rtype: int
265 """
266
267 skipped = 0
268 while True:
269 c = self._get_char()
270 if c != ' ' and c != '\t':
271 if (c != '\n') or not self.multiline:
272 self._unget_char(c)
273 return skipped
274 skipped += 1
275
276 - def get(self, want_leading = False, want_comment = False):
277 """Get the next token.
278
279 @param want_leading: If True, return a WHITESPACE token if the
280 first character read is whitespace. The default is False.
281 @type want_leading: bool
282 @param want_comment: If True, return a COMMENT token if the
283 first token read is a comment. The default is False.
284 @type want_comment: bool
285 @rtype: Token object
286 @raises dns.exception.UnexpectedEnd: input ended prematurely
287 @raises dns.exception.SyntaxError: input was badly formed
288 """
289
290 if not self.ungotten_token is None:
291 token = self.ungotten_token
292 self.ungotten_token = None
293 if token.is_whitespace():
294 if want_leading:
295 return token
296 elif token.is_comment():
297 if want_comment:
298 return token
299 else:
300 return token
301 skipped = self.skip_whitespace()
302 if want_leading and skipped > 0:
303 return Token(WHITESPACE, ' ')
304 token = ''
305 ttype = IDENTIFIER
306 has_escape = False
307 while True:
308 c = self._get_char()
309 if c == '' or c in self.delimiters:
310 if c == '' and self.quoting:
311 raise dns.exception.UnexpectedEnd
312 if token == '' and ttype != QUOTED_STRING:
313 if c == '(':
314 self.multiline += 1
315 self.skip_whitespace()
316 continue
317 elif c == ')':
318 if not self.multiline > 0:
319 raise dns.exception.SyntaxError
320 self.multiline -= 1
321 self.skip_whitespace()
322 continue
323 elif c == '"':
324 if not self.quoting:
325 self.quoting = True
326 self.delimiters = _QUOTING_DELIMITERS
327 ttype = QUOTED_STRING
328 continue
329 else:
330 self.quoting = False
331 self.delimiters = _DELIMITERS
332 self.skip_whitespace()
333 continue
334 elif c == '\n':
335 return Token(EOL, '\n')
336 elif c == ';':
337 while 1:
338 c = self._get_char()
339 if c == '\n' or c == '':
340 break
341 token += c
342 if want_comment:
343 self._unget_char(c)
344 return Token(COMMENT, token)
345 elif c == '':
346 if self.multiline:
347 raise dns.exception.SyntaxError('unbalanced parentheses')
348 return Token(EOF)
349 elif self.multiline:
350 self.skip_whitespace()
351 token = ''
352 continue
353 else:
354 return Token(EOL, '\n')
355 else:
356
357
358
359 token = c
360 ttype = DELIMITER
361 else:
362 self._unget_char(c)
363 break
364 elif self.quoting:
365 if c == '\\':
366 c = self._get_char()
367 if c == '':
368 raise dns.exception.UnexpectedEnd
369 if c.isdigit():
370 c2 = self._get_char()
371 if c2 == '':
372 raise dns.exception.UnexpectedEnd
373 c3 = self._get_char()
374 if c == '':
375 raise dns.exception.UnexpectedEnd
376 if not (c2.isdigit() and c3.isdigit()):
377 raise dns.exception.SyntaxError
378 c = chr(int(c) * 100 + int(c2) * 10 + int(c3))
379 elif c == '\n':
380 raise dns.exception.SyntaxError('newline in quoted string')
381 elif c == '\\':
382
383
384
385
386 token += c
387 has_escape = True
388 c = self._get_char()
389 if c == '' or c == '\n':
390 raise dns.exception.UnexpectedEnd
391 token += c
392 if token == '' and ttype != QUOTED_STRING:
393 if self.multiline:
394 raise dns.exception.SyntaxError('unbalanced parentheses')
395 ttype = EOF
396 return Token(ttype, token, has_escape)
397
399 """Unget a token.
400
401 The unget buffer for tokens is only one token large; it is
402 an error to try to unget a token when the unget buffer is not
403 empty.
404
405 @param token: the token to unget
406 @type token: Token object
407 @raises UngetBufferFull: there is already an ungotten token
408 """
409
410 if not self.ungotten_token is None:
411 raise UngetBufferFull
412 self.ungotten_token = token
413
415 """Return the next item in an iteration.
416 @rtype: (int, string)
417 """
418
419 token = self.get()
420 if token.is_eof():
421 raise StopIteration
422 return token
423
426
427
428
430 """Read the next token and interpret it as an integer.
431
432 @raises dns.exception.SyntaxError:
433 @rtype: int
434 """
435
436 token = self.get().unescape()
437 if not token.is_identifier():
438 raise dns.exception.SyntaxError('expecting an identifier')
439 if not token.value.isdigit():
440 raise dns.exception.SyntaxError('expecting an integer')
441 return int(token.value)
442
444 """Read the next token and interpret it as an 8-bit unsigned
445 integer.
446
447 @raises dns.exception.SyntaxError:
448 @rtype: int
449 """
450
451 value = self.get_int()
452 if value < 0 or value > 255:
453 raise dns.exception.SyntaxError('%d is not an unsigned 8-bit integer' % value)
454 return value
455
457 """Read the next token and interpret it as a 16-bit unsigned
458 integer.
459
460 @raises dns.exception.SyntaxError:
461 @rtype: int
462 """
463
464 value = self.get_int()
465 if value < 0 or value > 65535:
466 raise dns.exception.SyntaxError('%d is not an unsigned 16-bit integer' % value)
467 return value
468
470 """Read the next token and interpret it as a 32-bit unsigned
471 integer.
472
473 @raises dns.exception.SyntaxError:
474 @rtype: int
475 """
476
477 token = self.get().unescape()
478 if not token.is_identifier():
479 raise dns.exception.SyntaxError('expecting an identifier')
480 if not token.value.isdigit():
481 raise dns.exception.SyntaxError('expecting an integer')
482 value = int(token.value)
483 if value < 0 or value > 4294967296:
484 raise dns.exception.SyntaxError('%d is not an unsigned 32-bit integer' % value)
485 return value
486
498
500 """Read the next token and raise an exception if it is not an identifier.
501
502 @raises dns.exception.SyntaxError:
503 @rtype: string
504 """
505
506 token = self.get().unescape()
507 if not token.is_identifier():
508 raise dns.exception.SyntaxError('expecting an identifier')
509 return token.value
510
512 """Read the next token and interpret it as a DNS name.
513
514 @raises dns.exception.SyntaxError:
515 @rtype: dns.name.Name object"""
516
517 token = self.get()
518 if not token.is_identifier():
519 raise dns.exception.SyntaxError('expecting an identifier')
520 return dns.name.from_text(token.value, origin)
521
523 """Read the next token and raise an exception if it isn't EOL or
524 EOF.
525
526 @raises dns.exception.SyntaxError:
527 @rtype: string
528 """
529
530 token = self.get()
531 if not token.is_eol_or_eof():
532 raise dns.exception.SyntaxError('expected EOL or EOF, got %d "%s"' % (token.ttype, token.value))
533 return token.value
534
540