1
2
3
4
5
6
7
8
9
10 """
11 Visualization tools for CFGs.
12
13 """
14
15 """
16 Idea for a nice demo:
17 - 3 panes: grammar, treelet, working area
18 - grammar is a list of productions
19 - when you select a production, the treelet that it licenses appears
20 in the treelet area
21 - the working area has the text on the bottom, and S at top. When
22 you select a production, it shows (ghosted) the locations where
23 that production's treelet could be attached to either the text
24 or the tree rooted at S.
25 - the user can drag the treelet onto one of those (or click on them?)
26 - the user can delete pieces of the tree from the working area
27 (right click?)
28 - connecting top to bottom? drag one NP onto another?
29
30 +-------------------------------------------------------------+
31 | S -> NP VP | S |
32 |[NP -> Det N ]| / \ |
33 | ... | NP VP |
34 | N -> 'dog' | |
35 | N -> 'cat' | |
36 | ... | |
37 +--------------+ |
38 | NP | Det N |
39 | / \ | | | |
40 | Det N | the cat saw the dog |
41 | | |
42 +--------------+----------------------------------------------+
43
44 Operations:
45 - connect a new treelet -- drag or click shadow
46 - delete a treelet -- right click
47 - if only connected to top, delete everything below
48 - if only connected to bottom, delete everything above
49 - connect top & bottom -- drag a leaf to a root or a root to a leaf
50 - disconnect top & bottom -- right click
51 - if connected to top & bottom, then disconnect
52 """
53
54 from nltk_lite.draw import *
55 from nltk_lite.parse.cfg import *
56 from Tkinter import *
57 from nltk_lite.parse.tree import *
58 from nltk_lite.draw.tree import *
59
60
61
62
63
64
84
85
86
87
88
89 _CFGEditor_HELP = """
90
91 The CFG Editor can be used to create or modify context free grammars.
92 A context free grammar consists of a start symbol and a list of
93 productions. The start symbol is specified by the text entry field in
94 the upper right hand corner of the editor; and the list of productions
95 are specified in the main text editing box.
96
97 Every non-blank line specifies a single production. Each production
98 has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
99 is a list of nonterminals and terminals.
100
101 Nonterminals must be a single word, such as S or NP or NP_subj.
102 Currently, nonterminals must consists of alphanumeric characters and
103 underscores (_). Nonterminals are colored blue. If you place the
104 mouse over any nonterminal, then all occurances of that nonterminal
105 will be highlighted.
106
107 Termianals must be surrounded by single quotes (') or double
108 quotes(\"). For example, "dog" and "New York" are terminals.
109 Currently, the string within the quotes must consist of alphanumeric
110 characters, underscores, and spaces.
111
112 To enter a new production, go to a blank line, and type a nonterminal,
113 followed by an arrow (->), followed by a sequence of terminals and
114 nonterminals. Note that "->" (dash + greater-than) is automatically
115 converted to an arrow symbol. When you move your cursor to a
116 different line, your production will automatically be colorized. If
117 there are any errors, they will be highlighted in red.
118
119 Note that the order of the productions is signifigant for some
120 algorithms. To re-order the productions, use cut and paste to move
121 them.
122
123 Use the buttons at the bottom of the window when you are done editing
124 the CFG:
125 - Ok: apply the new CFG, and exit the editor.
126 - Apply: apply the new CFG, and do not exit the editor.
127 - Reset: revert to the original CFG, and do not exit the editor.
128 - Cancel: revert to the original CFG, and exit the editor.
129
130 """
131
133 """
134 A dialog window for creating and editing context free grammars.
135 C{CFGEditor} places the following restrictions on what C{CFG}s can
136 be edited:
137 - All nonterminals must be strings consisting of word
138 characters.
139 - All terminals must be strings consisting of word characters
140 and space characters.
141 """
142
143
144 ARROW = SymbolWidget.SYMBOLS['rightarrow']
145 _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
146 _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
147 _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" +
148 "(->|("+ARROW+"))\s*" +
149 r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$")
150 _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
151 _BOLD = ('helvetica', -12, 'bold')
152
153 - def __init__(self, parent, cfg=None, set_cfg_callback=None):
173
181
194
196 self._top.title('CFG Editor')
197 self._top.bind('<Control-q>', self._cancel)
198 self._top.bind('<Alt-q>', self._cancel)
199 self._top.bind('<Control-d>', self._cancel)
200
201 self._top.bind('<Alt-x>', self._cancel)
202 self._top.bind('<Escape>', self._cancel)
203
204 self._top.bind('<Alt-c>', self._cancel)
205
206 self._top.bind('<Control-o>', self._ok)
207 self._top.bind('<Alt-o>', self._ok)
208 self._top.bind('<Control-a>', self._apply)
209 self._top.bind('<Alt-a>', self._apply)
210 self._top.bind('<Control-r>', self._reset)
211 self._top.bind('<Alt-r>', self._reset)
212 self._top.bind('<Control-h>', self._help)
213 self._top.bind('<Alt-h>', self._help)
214 self._top.bind('<F1>', self._help)
215
217 self._prodframe = Frame(self._top)
218
219
220 self._textwidget = Text(self._prodframe, background='#e0e0e0',
221 exportselection=1)
222 self._textscroll = Scrollbar(self._prodframe, takefocus=0,
223 orient='vertical')
224 self._textwidget.config(yscrollcommand = self._textscroll.set)
225 self._textscroll.config(command=self._textwidget.yview)
226 self._textscroll.pack(side='right', fill='y')
227 self._textwidget.pack(expand=1, fill='both', side='left')
228
229
230
231 self._textwidget.tag_config('terminal', foreground='#006000')
232 self._textwidget.tag_config('arrow', font='symbol')
233 self._textwidget.tag_config('error', background='red')
234
235
236
237 self._linenum = 0
238
239
240 self._top.bind('>', self._replace_arrows)
241
242
243 self._top.bind('<<Paste>>', self._analyze)
244 self._top.bind('<KeyPress>', self._check_analyze)
245 self._top.bind('<ButtonPress>', self._check_analyze)
246
247
248 def cycle(e, textwidget=self._textwidget):
249 textwidget.tk_focusNext().focus()
250 self._textwidget.bind('<Tab>', cycle)
251
252 prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
253 for i in range(len(prod_tuples)-1,0,-1):
254 if (prod_tuples[i][0] == prod_tuples[i-1][0]):
255 if () in prod_tuples[i][1]: continue
256 if () in prod_tuples[i-1][1]: continue
257 print prod_tuples[i-1][1]
258 print prod_tuples[i][1]
259 prod_tuples[i-1][1].extend(prod_tuples[i][1])
260 del prod_tuples[i]
261
262 for lhs, rhss in prod_tuples:
263 print lhs, rhss
264 s = '%s ->' % lhs
265 for rhs in rhss:
266 for elt in rhs:
267 if isinstance(elt, Nonterminal): s += ' %s' % elt
268 else: s += ' %r' % elt
269 s += ' |'
270 s = s[:-2] + '\n'
271 self._textwidget.insert('end', s)
272
273 self._analyze()
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
310
312 """
313 Check if we've moved to a new line. If we have, then remove
314 all colorization from the line we moved to, and re-colorize
315 the line that we moved from.
316 """
317 linenum = int(self._textwidget.index('insert').split('.')[0])
318 if linenum != self._linenum:
319 self._clear_tags(linenum)
320 self._analyze_line(self._linenum)
321 self._linenum = linenum
322
324 """
325 Replace any C{'->'} text strings with arrows (char \\256, in
326 symbol font). This searches the whole buffer, but is fast
327 enough to be done anytime they press '>'.
328 """
329 arrow = '1.0'
330 while 1:
331 arrow = self._textwidget.search('->', arrow, 'end+1char')
332 if arrow == '': break
333 self._textwidget.delete(arrow, arrow+'+2char')
334 self._textwidget.insert(arrow, self.ARROW, 'arrow')
335 self._textwidget.insert(arrow, '\t')
336
337 arrow = '1.0'
338 while 1:
339 arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
340 'end+1char')
341 if arrow == '': break
342 self._textwidget.tag_add('arrow', arrow, arrow+'+1char')
343
345 """
346 Given a line number and a regexp match for a token on that
347 line, colorize the token. Note that the regexp match gives us
348 the token's text, start index (on the line), and end index (on
349 the line).
350 """
351
352 if match.group()[0] in "'\"": tag = 'terminal'
353 elif match.group() in ('->', self.ARROW): tag = 'arrow'
354 else:
355
356
357
358 tag = 'nonterminal_'+match.group()
359 if tag not in self._textwidget.tag_names():
360 self._init_nonterminal_tag(tag)
361
362 start = '%d.%d' % (linenum, match.start())
363 end = '%d.%d' % (linenum, match.end())
364 self._textwidget.tag_add(tag, start, end)
365
367 self._textwidget.tag_config(tag, foreground=foreground,
368 font=CFGEditor._BOLD)
369 if not self._highlight_matching_nonterminals:
370 return
371 def enter(e, textwidget=self._textwidget, tag=tag):
372 textwidget.tag_config(tag, background='#80ff80')
373 def leave(e, textwidget=self._textwidget, tag=tag):
374 textwidget.tag_config(tag, background='')
375 self._textwidget.tag_bind(tag, '<Enter>', enter)
376 self._textwidget.tag_bind(tag, '<Leave>', leave)
377
395 CFGEditor._TOKEN_RE.sub(analyze_token, line)
396 elif line.strip() != '':
397
398 self._mark_error(linenum, line)
399
401 """
402 Mark the location of an error in a line.
403 """
404 arrowmatch = CFGEditor._ARROW_RE.search(line)
405 if not arrowmatch:
406
407 start = '%d.0' % linenum
408 end = '%d.end' % linenum
409 elif not CFGEditor._LHS_RE.match(line):
410
411 start = '%d.0' % linenum
412 end = '%d.%d' % (linenum, arrowmatch.start())
413 else:
414
415 start = '%d.%d' % (linenum, arrowmatch.end())
416 end = '%d.end' % linenum
417
418
419 if self._textwidget.compare(start, '==', end):
420 start = '%d.0' % linenum
421 end = '%d.end' % linenum
422 self._textwidget.tag_add('error', start, end)
423
425 """
426 Replace C{->} with arrows, and colorize the entire buffer.
427 """
428 self._replace_arrows()
429 numlines = int(self._textwidget.index('end').split('.')[0])
430 for linenum in range(1, numlines+1):
431 self._analyze_line(linenum)
432
468
470 if self._top is None: return
471 self._top.destroy()
472 self._top = None
473
477
484
486 self._textwidget.delete('1.0', 'end')
487 for production in self._cfg.productions():
488 self._textwidget.insert('end', '%s\n' % production)
489 self._analyze()
490 if self._set_cfg_callback is not None:
491 self._set_cfg_callback(self._cfg)
492
497
506
507
508
509
510
535
536
537
538
539
542
544
546
553
555 self._treelet_canvas = Canvas(parent, background='white')
556 self._treelet_canvas.pack(side='bottom', fill='x')
557 self._treelet = None
558
564
565
566
567
568
570 c = self._workspace.canvas()
571 fontsize = int(self._size.get())
572 node_font = ('helvetica', -(fontsize+4), 'bold')
573 leaf_font = ('helvetica', -(fontsize+2))
574
575
576 if self._tree is not None:
577 self._workspace.remove_widget(self._tree)
578
579
580 start = self._grammar.start().symbol()
581 rootnode = TextWidget(c, start, font=node_font, draggable=1)
582
583
584 leaves = []
585 for word in self._text:
586 if isinstance(word, Token): word = word.type()
587 leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
588
589
590 self._tree = TreeSegmentWidget(c, rootnode, leaves,
591 color='white')
592
593
594 self._workspace.add_widget(self._tree)
595
596
597 for leaf in leaves: leaf.move(0,100)
598
599
600
601
604
625
626
627
628
629
631 canvas = self._treelet_canvas
632
633 self._prodlist.highlight(production)
634 if self._treelet is not None: self._treelet.destroy()
635
636
637 rhs = production.rhs()
638 for (i, elt) in enumerate(rhs):
639 if isinstance(elt, Nonterminal): elt = Tree(elt)
640 tree = Tree(production.lhs().symbol(), *rhs)
641
642
643 fontsize = int(self._size.get())
644 node_font = ('helvetica', -(fontsize+4), 'bold')
645 leaf_font = ('helvetica', -(fontsize+2))
646 self._treelet = tree_to_treesegment(canvas, tree,
647 node_font=node_font,
648 leaf_font=leaf_font)
649 self._treelet['draggable'] = 1
650
651
652 (x1, y1, x2, y2) = self._treelet.bbox()
653 w, h = int(canvas['width']), int(canvas['height'])
654 self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
655
656
657 self._markproduction(production)
658
661
662 - def mainloop(self, *args, **kwargs):
663 self._top.mainloop(*args, **kwargs)
664
666 from nltk_lite.parse import cfg
667 nonterminals = 'S VP NP PP P N Name V Det'
668 (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s)
669 for s in nonterminals.split()]
670 productions = (
671
672 cfg.Production(S, [NP, VP]),
673 cfg.Production(NP, [Det, N]),
674 cfg.Production(NP, [NP, PP]),
675 cfg.Production(VP, [VP, PP]),
676 cfg.Production(VP, [V, NP, PP]),
677 cfg.Production(VP, [V, NP]),
678 cfg.Production(PP, [P, NP]),
679 cfg.Production(PP, []),
680
681 cfg.Production(PP, ['up', 'over', NP]),
682
683
684 cfg.Production(NP, ['I']), cfg.Production(Det, ['the']),
685 cfg.Production(Det, ['a']), cfg.Production(N, ['man']),
686 cfg.Production(V, ['saw']), cfg.Production(P, ['in']),
687 cfg.Production(P, ['with']), cfg.Production(N, ['park']),
688 cfg.Production(N, ['dog']), cfg.Production(N, ['statue']),
689 cfg.Production(Det, ['my']),
690 )
691 grammar = cfg.Grammar(S, productions)
692
693 text = 'I saw a man in the park'.split()
694 d=CFGDemo(grammar, text)
695 d.mainloop()
696
697
698
699
700
702 from nltk_lite.parse import cfg
703 nonterminals = 'S VP NP PP P N Name V Det'
704 (S, VP, NP, PP, P, N, Name, V, Det) = [cfg.Nonterminal(s)
705 for s in nonterminals.split()]
706
707 grammar = cfg.parse_cfg("""
708 S -> NP VP
709 PP -> P NP
710 NP -> Det N
711 NP -> NP PP
712 VP -> V NP
713 VP -> VP PP
714 Det -> 'a'
715 Det -> 'the'
716 Det -> 'my'
717 NP -> 'I'
718 N -> 'dog'
719 N -> 'man'
720 N -> 'park'
721 N -> 'statue'
722 V -> 'saw'
723 P -> 'in'
724 P -> 'up'
725 P -> 'over'
726 P -> 'with'
727 """)
728
729 def cb(grammar): print grammar
730 top = Tk()
731 editor = CFGEditor(top, grammar, cb)
732 Label(top, text='\nTesting CFG Editor\n').pack()
733 Button(top, text='Quit', command=top.destroy).pack()
734 top.mainloop()
735
737 from nltk_lite.parse import cfg
738 (S, VP, NP, PP, P, N, Name, V, Det) = \
739 nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
740
741 productions = (
742
743 cfg.Production(S, [NP, VP]),
744 cfg.Production(NP, [Det, N]),
745 cfg.Production(NP, [NP, PP]),
746 cfg.Production(VP, [VP, PP]),
747 cfg.Production(VP, [V, NP, PP]),
748 cfg.Production(VP, [V, NP]),
749 cfg.Production(PP, [P, NP]),
750 cfg.Production(PP, []),
751
752 cfg.Production(PP, ['up', 'over', NP]),
753
754
755 cfg.Production(NP, ['I']), cfg.Production(Det, ['the']),
756 cfg.Production(Det, ['a']), cfg.Production(N, ['man']),
757 cfg.Production(V, ['saw']), cfg.Production(P, ['in']),
758 cfg.Production(P, ['with']), cfg.Production(N, ['park']),
759 cfg.Production(N, ['dog']), cfg.Production(N, ['statue']),
760 cfg.Production(Det, ['my']),
761 )
762
763 t = Tk()
764 def destroy(e, t=t): t.destroy()
765 t.bind('q', destroy)
766 p = ProductionList(t, productions)
767 p.pack(expand=1, fill='both')
768 p.add_callback('select', p.markonly)
769 p.add_callback('move', p.markonly)
770 p.focus()
771 p.mark(productions[2])
772 p.mark(productions[8])
773
774 if __name__ == '__main__': demo()
775