1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """ Stream oriented reading of a BibTeX file, with no actual semantic
23 operation on the content. Tries to return _everything_ from the file,
24 including comments, strings,..."""
25
26 import re
27 from gettext import gettext as _
28
29 from Pyblio.Exceptions import ParserError
30 from Pyblio.Parsers.Syntax.BibTeX import Coding
31
34 """ Return a textual version of the field, with no visible BibTeX / LaTeX markup """
35 pass
36
38 """ Return a flattened list of the balanced expressions composing the field """
39 pass
40
42 """ Execute the known LaTeX commands forming the field,
43 substitute the known symbols, and return the resulting string"""
44 pass
45
47 """ Return the BibTeX version of the field """
48 pass
49
50
56
57
61
63
65
66 self.type = type
67 self.key = key
68 self.fields = fields
69
70 return
71
73
74 if self.type != other.type: return 1
75 if self.key != other.key : return 1
76
77 return cmp (self.fields, other.fields)
78
80
81 return 'Record (%s, %s, %s)' % (
82 repr (self.type),
83 repr (self.key),
84 repr (self.fields))
85
87 """ A value, as a concatenation of blocks """
88
90 return 'Join (%s)' % list.__repr__ (self)
91
93 v = []
94 for data in self:
95 v = v + data.subst ()
96
97 return v
98
100 r = []
101 for v in self:
102 r += v.join ()
103 return r
104
106
107 def subjoin (fragment):
108 if isinstance (fragment, Text):
109 try:
110 return env.strings [fragment]
111 except KeyError:
112 pass
113 return fragment.execute (env)
114
115 return Join ([ subjoin (x) for x in self ])
116
117
119 try:
120 return ''.join (map (lambda x: x.flat (), self))
121 except AttributeError:
122 print repr (self)
123 raise
124
125
127 return ' # '.join (map (lambda x: x.tobib (), self))
128
129 -class Text(unicode):
130
132 return self.replace ('~', u'\xa0')
133
134 - def __repr__ (self):
135 return 'Text(%s)' % unicode.__repr__(self)
136
139
141 return Coding.encode(self)
142
143 - def execute (self, env):
145
147 """ A LaTeX \-command """
148
150 self._cmd = cmd
151 return
152
154 return 'Cmd (%s)' % `self._cmd`
155
158
161
163 return '\\%s' % self._cmd
164
166 if not isinstance (other, Cmd): return 1
167
168 return cmp (self._cmd, other._cmd)
169
171 """ A textual block, as a sequence of text and commands """
172
173 closer = {
174 '"': '"',
175 '{': '}',
176 '(': ')',
177 }
178
179 - def __init__ (self, opening, data = None):
180 self._o = opening
181
182 if data is None: self._d = ()
183 else: self._d = data
184 return
185
187 r = ''
188 for o in self._d:
189 r = r + o.flat ()
190
191 return r
192
195
197 final = []
198 stack = [] + list (self._d)
199
200 while stack:
201 d = stack.pop (0)
202
203 if isinstance (d, Cmd):
204 r = env.run (d._cmd, stack)
205 else:
206 r = d.execute (env)
207
208 final.append (r)
209
210 return Block (self._o, final)
211
212
214 return list (self._d)
215
217 return 'Block (%s, %s)' % (`self._o`,
218 `self._d`)
219
221 r = []
222 for d in self._d:
223 try:
224 r = r + d.subst ()
225 except AttributeError:
226 print repr (d)
227 return r
228
230 if not isinstance (other, Block): return 1
231
232 if self._o != other._o: return 1
233 return cmp (self._d, other._d)
234
236 return '%s%s%s' % (
237 self._o,
238 ''.join([x.tobib() for x in self._d]),
239 self.closer[self._o])
240
241
245
247
249
250 self.fd = fd
251 self.ln = 0
252 self.cs = charset
253
254 self._buf = []
255 return
256
258 self.ln += 1
259 if self._buf:
260 return self._buf.pop()
261
262 l = self.fd.readline()
263 if not l:
264 raise EndOfFile()
265 try:
266 return l.decode(self.cs)
267 except UnicodeDecodeError, err:
268 raise ParserError(str(err), self.ln)
269
271 self.ln -= 1
272 self._buf.append (line)
273
274 -class Context (object):
275
276 - def __init__ (self):
277
278 self.rectype = None
279 return
280
281 ST_OUT, ST_OPEN, ST_DONE = range (3)
282
283 _record_start = re.compile ('\s*@\s*(\w+)(.*)')
284
286 """ Called when the parser is not in a record """
287
288 assert ctx.rectype is None
289
290 comment = ''
291
292 while 1:
293 try:
294 l = fd.readline ()
295
296 except EndOfFile, _:
297 if comment: return ST_DONE, Comment (comment)
298 else: return ST_DONE, None
299
300 m = _record_start.match (l)
301 if m:
302
303 if m.group (1).lower () == 'comment':
304 r = []
305 if comment:
306 r.append (Comment (comment))
307
308 r.append (ATComment (m.group (2)))
309 return ST_OUT, r
310
311 ctx.rectype = m.group (1)
312 fd.unreadline (m.group (2).lstrip ())
313
314 if comment: return ST_OPEN, Comment (comment)
315 else: return ST_OPEN, None
316
317 comment += l
318
319 assert False
320
321 _brace_re = re.compile (r'[()"{}\\]')
322 _cmd_re = re.compile (r'(\w+|\S| )(.*)')
323 _inline_re = re.compile (r'([,#=])')
324
326 """ Called at the opening of a record """
327
328 assert ctx.rectype is not None
329
330
331 stack = []
332 curr = []
333
334 container = None
335 data = ''
336
337 l = fd.readline ()
338 start = fd.ln
339
340 while 1:
341 m = _brace_re.search (l)
342 if not m:
343 data += l
344 l = fd.readline ()
345 continue
346
347 idx = m.start (0)
348 before, brace, l = l [:idx], l [idx], l [idx+1:]
349
350 data += before
351
352 if brace == '\\':
353 m = _cmd_re.match (l)
354
355 if not m:
356 raise ParserError ('backslash at the end of a line', fd.ln)
357
358 if data: curr.append (Text (data))
359 curr.append (Cmd (m.group (1)))
360
361 l = m.group (2)
362 data = ''
363 continue
364
365 if not container:
366 if data:
367 raise ParserError (
368 'unexpected data before '
369 'the opening of the record: %s' % repr (data),
370 fd.ln)
371
372 if brace in ')}':
373 raise ParserError ('unexpected closing symbol %s' % repr (brace),
374 fd.ln)
375
376 container = brace
377
378 else:
379 if brace in '})':
380
381 if (brace == '}' and container != '{'):
382 raise ParserError ('mismatched "%s"' % brace, fd.ln)
383
384 if brace == ')' and container != '(':
385 data += ')'
386 continue
387
388 if data: curr.append (Text (data))
389 data = ''
390
391 if not stack: break
392
393 v = Block (container, curr)
394
395 curr, container = stack.pop ()
396 curr.append (v)
397 continue
398
399 elif brace == '(':
400
401 data += '('
402 continue
403
404 elif brace == '"':
405
406 if container == '"':
407
408 if data: curr.append (Text (data))
409 data = ''
410
411 if not stack: break
412
413 v = Block ('"', curr)
414 curr, container = stack.pop ()
415 curr.append (v)
416 continue
417
418 else:
419
420
421 if len (stack) == 0:
422
423 if data: curr.append (Text (data))
424 stack.append ((curr, container))
425
426 curr = []
427 data = ''
428 container = '"'
429
430 else:
431 data += '"'
432
433 elif brace == '{':
434 if data: curr.append (Text (data))
435 stack.append ((curr, container))
436
437 curr = []
438 data = ''
439 container = '{'
440
441
442 stream = []
443
444 while curr:
445 l = curr.pop (0)
446
447 if not isinstance (l, Text):
448 stream.append (l)
449 continue
450
451 i = 0
452 for m in _inline_re.finditer (l):
453 s, e = m.start (1), m.end (1)
454
455 stream += [ Text (x) for x in l [i:s].split () ]
456 stream.append (Text (l [s]))
457 i = e
458
459 if i < len (l): stream += [ Text (x) for x in l [i:].split () ]
460
461 final = []
462 key = None
463 field = []
464
465 while stream:
466
467 k = stream.pop (0)
468
469 if not stream or stream [0] == ',':
470 if key: raise ParserError (
471 "key is defined twice", start)
472
473 if field: raise ParserError (
474 "key is defined in the middle of the record", start)
475
476 key = k
477 if stream: stream.pop (0)
478 continue
479
480 v = stream.pop (0)
481 if v != '=':
482 raise ParserError (
483 "invalid syntax after field %s" % repr (k), start)
484
485 vs = Join ()
486
487 while stream:
488 v = stream.pop (0)
489 if v == ',': break
490
491 if vs:
492 if v == '#':
493 if not stream:
494 raise ParserError (
495 "field %s: unexpected #" % k, start)
496 vs.append (stream.pop (0))
497
498 else:
499 if isinstance (v, Text):
500
501 stream.insert (0, v)
502 break
503
504 raise ParserError (
505 "field %s: missing #" % k, start)
506 else:
507 vs.append (v)
508
509 field.append ((k, vs))
510
511 rec = Record (ctx.rectype, key, field)
512
513 ctx.rectype = None
514
515 return ST_OUT, rec
516
517 _fstm = {
518 ST_OUT: _on_out,
519 ST_OPEN: _on_open,
520 }
521
522 -def read (fd, charset = 'utf-8'):
523
524 ctx = Context ()
525
526 fd = Cache (fd, charset)
527 st = ST_OUT
528
529 while st != ST_DONE:
530 st, data = _fstm [st] (fd, ctx)
531 if data is None: continue
532
533 if type (data) is type ([]):
534 for d in data: yield d
535 else:
536 yield data
537
538 return
539