1
2
3 from nltk_lite import tokenize
4 from nltk_lite.parse.featurechart import *
5 from nltk_lite.parse import GrammarFile, bracket_parse, tree
6 from nltk_lite.draw.tree import draw_trees
7
8 """
9 An implementation of the Hole Semantics model, following Blackburn and Bos,
10 Representation and Inference for Natural Language (CSLI, 2005).
11
12 The semantic representations are built by the grammar hole.cfg.
13 This module contains driver code to read in sentences and parse them
14 according to a hole semantics grammar.
15
16 After parsing, the semantic representation is in the form of an underspecified
17 representation that is not easy to read. We use a "plugging" algorithm to
18 convert that representation into first-order logic formulas. These can be
19 displayed textually or graphically.
20 """
21
22
23
24
25
26
27
28
29
31 """
32 This class holds the broken-down components of a hole semantics, i.e. it
33 extracts the holes, labels, logic formula fragments and constraints out of
34 a big conjunction of such as produced by the hole semantics grammar. It
35 then provides some operations on the semantics dealing with holes, labels
36 and finding legal ways to plug holes with labels.
37 """
39 """
40 Constructor. `usr' is a tree of nodes that can take the forms:
41 (and t t'), (hole v), (label v), (: v phi), (leq v v)
42 where t, t' are subtrees, v is a variable, and phi is a formula fragment.
43 """
44 self.holes = set()
45 self.labels = set()
46 self.fragments = {}
47 self.constraints = set()
48 self._break_down(usr)
49 self.top_most_labels = self._find_top_most_labels()
50 self.top_hole = self._find_top_hole()
51
53 """Return true if x is a label in this semantic representation."""
54 return x in self.labels
55
57 """Return true if x is a hole in this semantic representation."""
58 return x in self.holes
59
61 """
62 Return true if x is a node (label or hole) in this semantic
63 representation.
64 """
65 return self.is_label(x) or self.is_hole(x)
66
68 """
69 Extract holes, labels, formula fragments and constraints from the hole
70 semantics underspecified representation (USR).
71 """
72 assert isinstance(usr, Tree)
73
74
75 if usr.node == 'and':
76 self._break_down(usr[0])
77 self._break_down(usr[1])
78
79
80 elif usr.node == 'hole':
81 hole = usr[0]
82 self.holes.add(hole)
83 assert not self.is_label(hole)
84
85
86 elif usr.node == 'label':
87 label = usr[0]
88 self.labels.add(label)
89 assert not self.is_hole(label)
90
91
92 elif usr.node == ':':
93 label = usr[0]
94 phi = usr[1]
95 assert not self.fragments.has_key(label)
96 self.fragments[label] = phi
97
98
99 elif usr.node == 'leq':
100 lhs = usr[0]
101 rhs = usr[1]
102 self.constraints.add(Constraint(lhs, rhs))
103
104 else:
105 raise ValueError(usr.node)
106
108 """
109 Return the set of labels which are not referenced directly as part of
110 another formula fragment. These will be the top-most labels for the
111 subtree that they are part of.
112 """
113 top_most_labels = self.labels.copy()
114 for f in self.fragments.itervalues():
115 for arg in f:
116 if self.is_label(arg):
117 top_most_labels.discard(arg)
118 return top_most_labels
119
121 """
122 Return the hole that will be the top of the formula tree.
123 """
124 top_hole = self.holes.copy()
125 for f in self.fragments.itervalues():
126 for arg in f:
127 if self.is_hole(arg):
128 top_hole.discard(arg)
129 assert len(top_hole) == 1
130 return top_hole.pop()
131
133 """
134 Calculate and return all the legal pluggings (mappings of labels to
135 holes) of this semantics given the constraints.
136 """
137 record = []
138 self._plug_nodes([(self.top_hole, [])], self.top_most_labels, {},
139 record)
140 return record
141
142 - def _plug_nodes(self, queue, potential_labels, plug_acc, record):
143 """
144 Plug the nodes in `queue' with the labels in `potential_labels'.
145
146 Each element of `queue' is a tuple of the node to plug and the list of
147 ancestor holes from the root of the graph to that node.
148
149 `potential_labels' is a set of the labels which are still available for
150 plugging.
151
152 `plug_acc' is the incomplete mapping of holes to labels made on the
153 current branch of the search tree so far.
154
155 `record' is a list of all the complete pluggings that we have found in
156 total so far. It is the only parameter that is destructively updated.
157 """
158 assert queue != []
159 (node, ancestors) = queue[0]
160 if self.is_hole(node):
161
162 self._plug_hole(node, ancestors, queue[1:], potential_labels,
163 plug_acc, record)
164 else:
165 assert self.is_label(node)
166
167
168 phi = self.fragments[node]
169 head = [(a, ancestors) for a in phi if self.is_node(a)]
170 self._plug_nodes(head + queue[1:], potential_labels,
171 plug_acc, record)
172
173 - def _plug_hole(self, hole, ancestors0, queue, potential_labels0,
174 plug_acc0, record):
175 """
176 Try all possible ways of plugging a single hole.
177 See _plug_nodes for the meanings of the parameters.
178 """
179
180 assert hole not in ancestors0
181 ancestors = [hole] + ancestors0
182
183
184 for l in potential_labels0:
185
186 if self._violates_constraints(l, ancestors):
187 continue
188
189 plug_acc = plug_acc0.copy()
190 plug_acc[hole] = l
191 potential_labels = potential_labels0.copy()
192 potential_labels.remove(l)
193
194 if len(potential_labels) == 0:
195
196
197
198
199
200
201
202 self._sanity_check_plugging(plug_acc, self.top_hole, [])
203 record.append(plug_acc)
204 else:
205
206
207
208
209
210
211
212
213 self._plug_nodes(queue + [(l, ancestors)], potential_labels,
214 plug_acc, record)
215
217 """
218 Return True if the `label' cannot be placed underneath the holes given
219 by the set `ancestors' because it would violate the constraints imposed
220 on it.
221 """
222 for c in self.constraints:
223 if c.lhs == label:
224 if c.rhs not in ancestors:
225 return True
226 return False
227
229 """
230 Make sure that a given plugging is legal. We recursively go through
231 each node and make sure that no constraints are violated.
232 We also check that all holes have been filled.
233 """
234 if self.is_hole(node):
235 ancestors = [node] + ancestors
236 label = plugging[node]
237 else:
238 label = node
239 assert self.is_label(label)
240 for c in self.constraints:
241 if c.lhs == label:
242 assert c.rhs in ancestors
243 phi = self.fragments[label]
244 for arg in phi:
245 if self.is_node(arg):
246 self._sanity_check_plugging(plugging, arg, [label] + ancestors)
247
254
264
265
267 """
268 This class represents a constraint of the form (L =< N),
269 where L is a label and N is a node (a label or a hole).
270 """
275 if self.__class__ == other.__class__:
276 return self.lhs == other.lhs and self.rhs == other.rhs
277 else:
278 return False
280 return not (self == other)
282 return hash(repr(self))
284 return '(%s =< %s)' % (self.lhs, self.rhs)
285
286
288 """
289 A Tree for first-order logic formulas that prints differently. Nodes with
290 operator names are printed in infix. Nodes which have unrecognised names
291 are assumed to be predicates.
292 """
294 if self.node == 'ALL':
295 var = self[0]
296 st = self[1]
297 return '(ALL %s %s)' % (var, st)
298 elif self.node == 'SOME':
299 var = self[0]
300 st = self[1]
301 return '(SOME %s %s)' % (var, st)
302 elif self.node == 'AND':
303 return '(%s /\ %s)' % (self[0], self[1])
304 elif self.node == 'IMP':
305 return '(%s -> %s)' % (self[0], self[1])
306
307 else:
308
309 args = ', '.join([str(arg) for arg in self])
310 return '%s(%s)' % (self.node, args)
311
312
314 import sys
315 from optparse import OptionParser, OptionGroup
316 usage = """%%prog [options] [grammar_file]""" % globals()
317
318 opts = OptionParser(usage=usage)
319 opts.add_option("-c", "--components",
320 action="store_true", dest="show_components", default=0,
321 help="show hole semantics components")
322 opts.add_option("-r", "--raw",
323 action="store_true", dest="show_raw", default=0,
324 help="show the raw hole semantics expression")
325 opts.add_option("-d", "--drawtrees",
326 action="store_true", dest="draw_trees", default=0,
327 help="show formula trees in a GUI window")
328 opts.add_option("-v", "--verbose",
329 action="count", dest="verbosity", default=0,
330 help="show more information during parse")
331
332 (options, args) = opts.parse_args()
333
334 if len(args) > 0:
335 filename = args[0]
336 else:
337 filename = 'hole.cfg'
338
339 print 'Reading grammar file', filename
340 grammar = GrammarFile.read_file(filename)
341 parser = grammar.earley_parser(trace=options.verbosity)
342
343
344 print 'Sentence: ',
345 line = sys.stdin.readline()[:-1]
346
347
348 tokens = list(tokenize.whitespace(line))
349 trees = parser.get_parse_list(tokens)
350 print 'Got %d different parses' % len(trees)
351
352 for tree in trees:
353
354 sem = tree[0].node['sem'].simplify()
355
356
357 sem = sem.skolemise()
358
359
360
361
362 usr = bracket_parse(str(sem))
363
364
365
366 hole_sem = HoleSemantics(usr)
367
368
369 if options.show_raw:
370 print
371 print 'Raw expression'
372 print usr
373
374
375 if options.show_components:
376 print
377 print 'Holes: ', hole_sem.holes
378 print 'Labels: ', hole_sem.labels
379 print 'Constraints: ', hole_sem.constraints
380 print 'Top hole: ', hole_sem.top_hole
381 print 'Top labels: ', hole_sem.top_most_labels
382 print 'Fragments:'
383 for (l,f) in hole_sem.fragments.items():
384 print '\t%s: %s' % (l, f)
385
386
387 pluggings = hole_sem.pluggings()
388
389
390 trees = map(hole_sem.formula_tree, pluggings)
391
392
393 n = 1
394 for tree in trees:
395 print
396 print '%d. %s' % (n, tree)
397 n += 1
398
399
400 if options.draw_trees:
401 draw_trees(*trees)
402
403 print
404 print 'Done.'
405
406 if __name__ == '__main__':
407 main()
408