Package nltk_lite :: Package parse :: Module category
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.parse.category

  1  # Natural Language Toolkit: Categories 
  2  # 
  3  # Copyright (C) 2001-2007 University of Pennsylvania 
  4  # Author: Contributed by Rob Speer (NLTK version) 
  5  #         Steven Bird <sb@csse.unimelb.edu.au> (NLTK-Lite Port) 
  6  #         Ewan Klein <ewan@inf.ed.ac.uk> (Hooks for semantics) 
  7  #         Peter Wang <wangp@csse.unimelb.edu.au> (Overhaul) 
  8  # URL: <http://nltk.sourceforge.net> 
  9  # For license information, see LICENSE.TXT 
 10  # 
 11  # $Id: category.py 4519 2007-05-16 08:16:25Z stevenbird $ 
 12   
 13  from nltk_lite.parse import * 
 14  from nltk_lite.featurestructure import * 
 15  from nltk_lite.semantics import logic 
 16  import string 
 17   
18 -class Category(FeatureStructure, Nonterminal):
19 """ 20 A C{Category} is a specialized feature structure, intended for use in 21 parsing. It can act as a C{Nonterminal}. 22 23 A C{Category} differs from a C{FeatureStructure} in these ways: 24 25 - Categories may not be re-entrant. 26 27 - Categories use value-based equality, while FeatureStructures use 28 identity-based equality. 29 30 - Strings in Categories are compared case-insensitively. 31 32 - Categories have one feature marked as the 'head', which prints 33 differently than other features if it has a value. For example, 34 in the C{repr()} representation of a Category, the head goes to the 35 left, on the outside of the brackets. Subclasses of C{Category} 36 may change the feature name that is designated as the head, which is 37 _head by default. 38 39 - Subclasses of C{Category} may contain a list of I{required features}, 40 which are names of features whose value is None if unspecified. A 41 Category lacking a feature that is required in it will not unify with 42 any Category that has that feature. If a required feature's value is 43 C{None}, it is considered to be not present. (Mixing different 44 subclasses of C{Category} is probably a bad idea.) 45 46 - C{True} and C{False} are allowed as values. A feature named C{foo} 47 with a value of C{True} is simply expressed as C{+foo}. Similarly, if 48 it is C{False}, it is expressed as C{-foo}. 49 """ 50 51 headname = '_head' 52 requiredFeatures = [] 53
54 - def __init__(self, **features):
55 FeatureStructure.__init__(self, **features) 56 self._required = self.__class__.requiredFeatures 57 for name in self._required: 58 if not self._features.has_key(name): 59 self._features[name] = None 60 items = self._features.items() 61 items.sort() 62 self._hash = None 63 self._frozen = False 64 self._memorepr = None
65
66 - def required_features(self):
67 "@return: A list of the names of all required features." 68 return self._required
69
70 - def __cmp__(self, other):
71 return cmp(repr(self), repr(other))
72
73 - def __div__(self, other):
74 """ 75 @return: A new Category based on this one, with its C{/} feature set to 76 C{other}. 77 """ 78 temp = self.deepcopy() 79 dict = temp._features 80 dict['/'] = other 81 return self.__class__(**dict)
82
83 - def __eq__(self, other):
84 """ 85 @return: True if C{self} and C{other} assign the same value to 86 to every feature. In particular, return true if 87 C{self[M{p}]==other[M{p}]} for every feature path M{p} such 88 that C{self[M{p}]} or C{other[M{p}]} is a base value (i.e., 89 not a nested Category). 90 @rtype: C{bool} 91 """ 92 93 # Get the result of equal_values, and make it a real boolean while 94 # we're at it. 95 if not other.__class__ == self.__class__: return False 96 if hash(self) != hash(other): return False 97 return (self.equal_values(other) == True)
98
99 - def __ne__(self, other):
100 return not (self == other)
101
102 - def __hash__(self):
103 if self._hash is not None: return self._hash 104 items = self._features.items() 105 items.sort() 106 return hash(tuple(items))
107
108 - def freeze(self):
109 """ 110 Freezing a Category memoizes its hash value, to make comparisons on it 111 faster. After freezing, the Category and all its values are immutable. 112 113 @return: self 114 """ 115 for val in self._features.values(): 116 if isinstance(val, Category) and not val.frozen(): 117 val.freeze() 118 self._hash = hash(self) 119 self._memorepr = self._repr({}, {}) 120 self._frozen = True 121 return self
122
123 - def frozen(self):
124 """ 125 Returns whether this Category is frozen (immutable). 126 127 @rtype: C{bool} 128 """ 129 return self._frozen
130
131 - def __setitem__(self, name, value):
132 if self._frozen: raise "Cannot modify a frozen Category" 133 self._features[name] = value
134
135 - def symbol(self):
136 """ 137 @return: The node value corresponding to this C{Category}. 138 @rtype: C{Category} 139 """ 140 return self
141
142 - def head(self):
143 """ 144 @return: The head of this category (the value shown outside the 145 brackets in its string representation). If there is no head, returns 146 None. 147 @rtype: C{str} or C{None} 148 """ 149 return self._features.get(self.__class__.headname)
150
151 - def deepcopy(self, memo=None):
152 """ 153 @return: A deep copy of C{self}. 154 """ 155 newcopy = self.__class__() 156 features = newcopy._features 157 158 # Fill out the features. 159 for (fname, fval) in self._features.items(): 160 if isinstance(fval, FeatureStructure): 161 features[fname] = fval.deepcopy() 162 else: 163 features[fname] = fval 164 165 return newcopy
166
167 - def reentrances(self):
168 return []
169
170 - def feature_names(self):
171 """ 172 @return: a list of all features that have values. 173 """ 174 return filter(lambda x: not (x in self._required and self[x] is None), 175 self._features.keys())
176
177 - def get_feature(self, *args):
178 try: 179 return self.__getitem__(*args) 180 except IndexError: 181 return StarValue()
182
183 - def has_feature(self, name):
184 return (name in self.feature_names())
185 186 ################################################################# 187 ## Variables 188 ################################################################# 189
190 - def remove_unbound_vars(self):
191 selfcopy = self.deepcopy() 192 selfcopy._remove_unbound_vars() 193 return selfcopy
194
195 - def _remove_unbound_vars(self):
196 for (fname, fval) in self._features.items(): 197 if isinstance(fval, FeatureVariable): 198 del self._features[fname] 199 elif isinstance(fval, Category): 200 fval._remove_unbound_vars()
201 202 ################################################################# 203 ## Unification 204 ################################################################# 205
206 - def _destructively_unify(self, other, bindings, trace=False, depth=0):
207 FeatureStructure._destructively_unify(self, other, bindings, \ 208 trace=trace, ci_str_cmp=True, depth=depth)
209 210 ################################################################# 211 ## String Representations 212 ################################################################# 213 214
215 - def __repr__(self):
216 """ 217 @return: A string representation of this feature structure. 218 """ 219 if self._memorepr is not None: return self._memorepr 220 else: return self._repr({}, {})
221
222 - def _repr(self, reentrances, reentrance_ids):
223 segments = [] 224 225 items = self.feature_names() 226 items.sort() # sorting note: keys are unique strings, so we'll 227 # never fall through to comparing values. 228 for fname in items: 229 if fname == self.__class__.headname: continue 230 fval = self[fname] 231 if isinstance(fval, bool): 232 if fval: segments.append('+%s' % fname) 233 else: segments.append('-%s' % fname) 234 elif not isinstance(fval, Category): 235 segments.append('%s=%r' % (fname, fval)) 236 else: 237 fval_repr = fval._repr(reentrances, reentrance_ids) 238 segments.append('%s=%s' % (fname, fval_repr)) 239 240 head = self._features.get(self.__class__.headname) 241 if head is None: head = '' 242 if head and not len(segments): return head 243 return '%s[%s]' % (head, ', '.join(segments))
244
245 - def _str(self, reentrances, reentrance_ids):
246 # This code is very similar to FeatureStructure._str but 247 # we print the head feature very differently, so it's hard to 248 # combine the two methods. 249 250 # Special case: 251 if len(self.feature_names()) == 0: 252 return ['[]'] 253 if self.feature_names() == [self.__class__.headname]: 254 return ['%s[]' % self[self.__class__.headname]] 255 256 257 # What's the longest feature name? Use this to align names. 258 maxfnamelen = max([len(k) for k in self.feature_names()]) 259 260 lines = [] 261 items = self.feature_names() 262 items.sort() # sorting note: keys are unique strings, so we'll 263 # never fall through to comparing values. 264 if self.__class__.headname in items: 265 items.remove(self.__class__.headname) 266 # items.insert(0, self.__class__.headname) 267 for fname in items: 268 fval = self[fname] 269 270 if not isinstance(fval, FeatureStructure): 271 # It's not a nested feature structure -- just print it. 272 lines.append('%s = %r' % (fname.ljust(maxfnamelen), fval)) 273 274 else: 275 # It's a new feature structure. Separate it from 276 # other values by a blank line. 277 if lines and lines[-1] != '': lines.append('') 278 279 # Recursively print the feature's value (fval). 280 fval_lines = fval._str(reentrances, reentrance_ids) 281 282 # Indent each line to make room for fname. 283 fval_lines = [(' '*(maxfnamelen+3))+l for l in fval_lines] 284 285 # Pick which line we'll display fname on. 286 nameline = (len(fval_lines)-1)/2 287 288 fval_lines[nameline] = ( 289 fname.ljust(maxfnamelen)+' ='+ 290 fval_lines[nameline][maxfnamelen+2:]) 291 292 # Add the feature structure to the output. 293 lines += fval_lines 294 295 # Separate FeatureStructures by a blank line. 296 lines.append('') 297 298 # Get rid of any excess blank lines. 299 if lines[-1] == '': lines = lines[:-1] 300 301 # Add brackets around everything. 302 headline = (len(lines) - 1)/2 303 if self.has_feature(self.__class__.headname): 304 head = self[self.__class__.headname] 305 else: 306 head = '' 307 maxlen = max([len(line) for line in lines]) 308 for l in range(len(lines)): 309 line = lines[l] 310 if l == headline: 311 lines[l] = ('%s[ %s%s ]' % (head, line, ' '*(maxlen-len(line)))) 312 else: 313 lines[l] = ('%s[ %s%s ]' % (' '*len(head), line, ' '*(maxlen-len(line)))) 314 315 return lines
316 317 ################################################################# 318 ## Parsing 319 ################################################################# 320 321 # Regular expressions for parsing. 322 # Extend the expressions already present in FeatureStructure._PARSE_RE 323 _PARSE_RE = {'categorystart': re.compile(r'\s*([^\s\(\)"\'\-=,\[\]]*)\s*\['), 324 'bool': re.compile(r'\s*([-\+])'), 325 'arrow': re.compile(r'\s*->\s*'), 326 #'application': re.compile(r'(app)\((\?[a-z][a-z]*)\s*,\s*(\?[a-z][a-z]*)\)'), 327 'disjunct': re.compile(r'\s*\|\s*'), 328 'whitespace': re.compile(r'\s*')} 329 for (k, v) in FeatureStructure._PARSE_RE.iteritems(): 330 assert k not in _PARSE_RE 331 _PARSE_RE[k] = v 332 333 # [classmethod]
334 - def _parse(cls, s, position=0, reentrances=None):
335 """ 336 Helper function that parses a Category. 337 @param s: The string to parse. 338 @param position: The position in the string to start parsing. 339 @param reentrances: A dictionary from reentrance ids to values. 340 @return: A tuple (val, pos) of the feature structure created 341 by parsing and the position where the parsed feature 342 structure ends. 343 """ 344 # A set of useful regular expressions (precompiled) 345 _PARSE_RE = cls._PARSE_RE 346 347 # Find the head, if there is one. 348 match = _PARSE_RE['name'].match(s, position) 349 if match is not None: 350 head = match.group(1) 351 position = match.end() 352 else: head = None 353 354 # Check that the name is followed by an open bracket. 355 if position >= len(s) or s[position] != '[': 356 return cls(**{cls.headname: head}), position 357 position += 1 358 359 # If it's immediately followed by a close bracket, then just 360 # return an empty feature structure. 361 match = _PARSE_RE['bracket'].match(s, position) 362 if match is not None: 363 if head is None: return cls(), match.end() 364 else: return cls(**{cls.headname: head}), match.end() 365 366 # Build a list of the features defined by the structure. 367 # Each feature has one of the three following forms: 368 # name = value 369 # +name 370 # -name 371 features = {} 372 if head is not None: features[cls.headname] = head 373 while position < len(s): 374 # Use these variables to hold info about the feature: 375 name = target = val = None 376 377 # Is this a shorthand boolean value? 378 match = _PARSE_RE['bool'].match(s, position) 379 if match is not None: 380 if match.group(1) == '+': val = True 381 else: val = False 382 position = match.end() 383 384 # Find the next feature's name. 385 match = _PARSE_RE['name'].match(s, position) 386 if match is None: raise ValueError('feature name', position) 387 name = match.group(1) 388 position = match.end() 389 390 # If it's not a shorthand boolean, it must be an assignment. 391 if val is None: 392 match = _PARSE_RE['assign'].match(s, position) 393 if match is None: raise ValueError('equals sign', position) 394 position = match.end() 395 val, position = cls._parseval(s, position, reentrances) 396 features[name] = val 397 398 # Check for a close bracket 399 match = _PARSE_RE['bracket'].match(s, position) 400 if match is not None: 401 return cls(**features), match.end() 402 403 # Otherwise, there should be a comma 404 match = _PARSE_RE['comma'].match(s, position) 405 if match is None: raise ValueError('comma', position) 406 position = match.end() 407 408 # We never saw a close bracket. 409 raise ValueError('close bracket', position)
410 411 # [classmethod]
412 - def _parseval(cls, s, position, reentrances):
413 """ 414 Helper function that parses a feature value. Currently 415 supports: None, bools, integers, variables, strings, nested feature 416 structures. 417 @param s: The string to parse. 418 @param position: The position in the string to start parsing. 419 @param reentrances: A dictionary from reentrance ids to values. 420 @return: A tuple (val, pos) of the value created by parsing 421 and the position where the parsed value ends. 422 """ 423 # A set of useful regular expressions (precompiled) 424 _PARSE_RE = cls._PARSE_RE 425 426 # End of string (error) 427 if position == len(s): raise ValueError('value', position) 428 429 # Semantic value of the form <app(?x, ?y) >'; return an ApplicationExpression 430 match = _PARSE_RE['application'].match(s, position) 431 if match is not None: 432 fun = ParserSubstitute(match.group(2)).next() 433 arg = ParserSubstitute(match.group(3)).next() 434 return ApplicationExpressionSubst(fun, arg), match.end() 435 436 # other semantic value enclosed by '< >'; return value given by the lambda expr parser 437 match = _PARSE_RE['semantics'].match(s, position) 438 if match is not None: 439 return ParserSubstitute(match.group(1)).next(), match.end() 440 441 # String value 442 if s[position] in "'\"": 443 start = position 444 quotemark = s[position:position+1] 445 position += 1 446 while 1: 447 match = _PARSE_RE['stringmarker'].search(s, position) 448 if not match: raise ValueError('close quote', position) 449 position = match.end() 450 if match.group() == '\\': position += 1 451 elif match.group() == quotemark: 452 return eval(s[start:position]), position 453 454 # Nested category 455 if _PARSE_RE['categorystart'].match(s, position) is not None: 456 return cls._parse(s, position, reentrances) 457 458 # Variable 459 match = _PARSE_RE['var'].match(s, position) 460 if match is not None: 461 return FeatureVariable.parse(match.group()), match.end() 462 463 # None 464 match = _PARSE_RE['none'].match(s, position) 465 if match is not None: 466 return None, match.end() 467 468 # Integer value 469 match = _PARSE_RE['int'].match(s, position) 470 if match is not None: 471 return int(match.group()), match.end() 472 473 # Alphanumeric symbol (must be checked after integer) 474 match = _PARSE_RE['symbol'].match(s, position) 475 if match is not None: 476 return cls(**{cls.headname: match.group()}), match.end() 477 478 # We don't know how to parse this value. 479 raise ValueError('value', position)
480 481 # [classmethod] 482 # Used by GrammarFile
483 - def parse_rules(cls, s):
484 """ 485 Parse a L{CFG} line involving C{Categories}. A line has this form: 486 487 C{lhs -> rhs | rhs | ...} 488 489 where C{lhs} is a Category, and each C{rhs} is a sequence of 490 Categories. 491 492 @returns: a list of C{Productions}, one for each C{rhs}. 493 """ 494 _PARSE_RE = cls._PARSE_RE 495 position = 0 496 try: 497 lhs, position = cls._parse(s, position) 498 except ValueError, e: 499 estr = ('Error parsing field structure\n\n\t' + 500 s + '\n\t' + ' '*e.args[1] + '^ ' + 501 'Expected %s\n' % e.args[0]) 502 raise ValueError, estr 503 lhs.freeze() 504 505 match = _PARSE_RE['arrow'].match(s, position) 506 if match is None: raise ValueError('arrow', position) 507 else: position = match.end() 508 rules = [] 509 while position < len(s): 510 rhs = [] 511 while position < len(s) and _PARSE_RE['disjunct'].match(s, position) is None: 512 try: 513 val, position = cls._parseval(s, position, {}) 514 except ValueError, e: 515 estr = ('Error parsing field structure\n\n\t' + 516 s + '\n\t' + ' '*e.args[1] + '^ ' + 517 'Expected %s\n' % e.args[0]) 518 raise ValueError, estr 519 if isinstance(val, Category): val.freeze() 520 rhs.append(val) 521 position = _PARSE_RE['whitespace'].match(s, position).end() 522 rules.append(Production(lhs, rhs)) 523 524 if position < len(s): 525 match = _PARSE_RE['disjunct'].match(s, position) 526 position = match.end() 527 528 # Special case: if there's nothing after the arrow, it is one rule with 529 # an empty RHS, instead of no rules. 530 if len(rules) == 0: rules = [Production(lhs, ())] 531 return rules
532 533 _parseval=classmethod(_parseval) 534 _parse=classmethod(_parse) 535 parse_rules=classmethod(parse_rules)
536 537
538 -class GrammarCategory(Category):
539 """ 540 A class of C{Category} for use in parsing. 541 542 The name of the head feature in a C{GrammarCategory} is C{pos} (for "part 543 of speech"). There is one required feature, C{/}, which is intended to 544 indicate a type of phrase that is missing from the grammatical structure. 545 546 In addition, GrammarCategories are displayed and parse differently, to be 547 consistent with NLP teaching materials: the value of the C{/} feature can 548 be written with a slash after the right bracket, so that the string 549 representation looks like: C{head[...]/value}. 550 551 An example of a C{GrammarCategory} is C{VP[+fin]/NP}, for a verb phrase 552 that is finite and has an omitted noun phrase inside it. 553 """ 554 555 headname = 'pos' 556 requiredFeatures = ['/'] 557
558 - def _repr(self, reentrances, reentrance_ids):
559 segments = [] 560 561 items = self.feature_names() 562 items.sort() # sorting note: keys are unique strings, so we'll 563 # never fall through to comparing values. 564 for fname in items: 565 if fname == self.__class__.headname or fname == '/': continue 566 fval = self[fname] 567 if isinstance(fval, bool): 568 if fval: segments.append('+%s' % fname) 569 else: segments.append('-%s' % fname) 570 elif isinstance(fval, logic.Expression): 571 segments.append('%s=%r' % (fname, fval.__str__())) 572 elif not isinstance(fval, Category): 573 segments.append('%s=%r' % (fname, fval)) 574 else: 575 fval_repr = fval._repr(reentrances, reentrance_ids) 576 segments.append('%s=%s' % (fname, fval_repr)) 577 578 head = self._features.get(self.__class__.headname) 579 if head is None: head = '' 580 if not len(segments): features = '' 581 else: features = "[%s]" % ', '.join(segments) 582 slash = self._features.get('/') 583 if slash is None: slash = '' 584 else: slash = '/%r' % slash 585 586 return '%s%s%s' % (head, features, slash)
587 588 # Regular expressions for parsing. 589 # Extend the expressions from Category and FeatureStructure. 590 # Assumes that Applications in sem always take FeatureVariable arguments 591 _PARSE_RE = {'semantics': re.compile(r'<([^>]+)>'), 592 'application': re.compile(r'<(app)\((\?[a-z][a-z]*)\s*,\s*(\?[a-z][a-z]*)\)>'), 593 'slash': re.compile(r'\s*/\s*')} 594 for (k, v) in Category._PARSE_RE.iteritems(): 595 assert k not in _PARSE_RE 596 _PARSE_RE[k] = v 597 # These we actually do want to override. 598 _PARSE_RE['name'] = re.compile(r'\s*([^\s\(\)"\'\-=,\[\]/]+)\s*') 599 _PARSE_RE['categorystart'] = re.compile(r'\s*([^\s\(\)"\'\-=,\[\]/]*)\s*(\[|/)') 600 601 # [classmethod]
602 - def _parse(cls, s, position=0, reentrances=None):
603 # A set of useful regular expressions (precompiled) 604 _PARSE_RE = cls._PARSE_RE 605 606 features = {} 607 608 # Find the head, if there is one. 609 match = _PARSE_RE['name'].match(s, position) 610 if match is not None: 611 features[cls.headname] = match.group(1) 612 position = match.end() 613 614 # If the name is followed by an open bracket, start looking for 615 # features. 616 if position < len(s) and s[position] == '[': 617 position += 1 618 619 # Build a list of the features defined by the structure. 620 # Each feature has one of the three following forms: 621 # name = value 622 # +name 623 # -name 624 while True: 625 if not position < len(s): 626 raise ValueError('close bracket', position) 627 628 # Use these variables to hold info about the feature: 629 name = target = val = None 630 631 # Check for a close bracket at the beginning 632 match = _PARSE_RE['bracket'].match(s, position) 633 if match is not None: 634 position = match.end() 635 # Get out and check for a slash value. 636 break 637 638 # Is this a shorthand boolean value? 639 match = _PARSE_RE['bool'].match(s, position) 640 if match is not None: 641 if match.group(1) == '+': val = True 642 else: val = False 643 position = match.end() 644 645 # Find the next feature's name. 646 match = _PARSE_RE['name'].match(s, position) 647 if match is None: raise ValueError('feature name', position) 648 name = match.group(1) 649 position = match.end() 650 651 # If it's not a shorthand boolean, it must be an assignment. 652 if val is None: 653 match = _PARSE_RE['assign'].match(s, position) 654 if match is None: raise ValueError('equals sign', position) 655 position = match.end() 656 657 val, position = cls._parseval(s, position, reentrances) 658 features[name] = val 659 660 # Check for a close bracket 661 match = _PARSE_RE['bracket'].match(s, position) 662 if match is not None: 663 position = match.end() 664 # Get out and check for a slash value. 665 break 666 667 # Otherwise, there should be a comma 668 match = _PARSE_RE['comma'].match(s, position) 669 if match is None: raise ValueError('comma', position) 670 position = match.end() 671 672 # Check for a slash value 673 match = _PARSE_RE['slash'].match(s, position) 674 if match is not None: 675 position = match.end() 676 slash, position = cls._parseval(s, position, 0) 677 features['/'] = slash 678 679 return cls(**features), position
680 681 _parse = classmethod(_parse)
682
683 -class ParserSubstitute(logic.Parser):
684 """ 685 A lambda calculus expression parser, extended to create application 686 expressions which support the SubstituteBindingsI interface. 687 """
688 - def make_ApplicationExpression(self, first, second):
689 return ApplicationExpressionSubst(first, second)
690
691 -class ApplicationExpressionSubst(logic.ApplicationExpression, SubstituteBindingsI):
692 """ 693 A lambda application expression, extended to implement the 694 SubstituteBindingsI interface. 695 """
696 - def substitute_bindings(self, bindings):
697 newval = self 698 for semvar in self.variables(): 699 varstr = str(semvar) 700 # discard Variables which are not FeatureVariables 701 if varstr.startswith('?'): 702 var = FeatureVariable.parse(varstr) 703 if bindings.is_bound(var): 704 newval = newval.replace(semvar, bindings.lookup(var)) 705 return newval
706 707 ############################################################################ 708 # Read a grammar from a file 709 ############################################################################ 710
711 -class GrammarFile(object):
712 - def __init__(self):
713 self.grammatical_productions = [] 714 self.lexical_productions = [] 715 self.start = GrammarCategory(pos='Start').freeze()
716
717 - def grammar(self):
718 return cfg.Grammar(self.start, self.grammatical_productions +\ 719 self.lexical_productions)
720
721 - def earley_grammar(self):
722 return cfg.Grammar(self.start, self.grammatical_productions)
723
724 - def earley_lexicon(self):
725 lexicon = {} 726 for prod in self.lexical_productions: 727 lexicon.setdefault(prod.rhs()[0], []).append(prod.lhs()) 728 return lexicon
729
730 - def earley_parser(self, trace=1):
734
735 - def apply_lines(self, lines):
736 for line in lines: 737 line = line.strip() 738 if not len(line): continue 739 if line[0] == '#': continue 740 if line[0] == '%': 741 parts = line[1:].split() 742 directive = parts[0] 743 args = string.join(parts[1:]) 744 if directive == 'start': 745 self.start = GrammarCategory.parse(args).freeze() 746 elif directive == 'include': 747 filename = args.strip('"') 748 self.apply_file(filename) 749 else: 750 rules = GrammarCategory.parse_rules(line) 751 for rule in rules: 752 if len(rule.rhs()) == 1 and isinstance(rule.rhs()[0], str): 753 self.lexical_productions.append(rule) 754 else: 755 self.grammatical_productions.append(rule)
756
757 - def apply_file(self, filename):
758 f = open(filename) 759 lines = f.readlines() 760 self.apply_lines(lines) 761 f.close()
762
763 - def read_file(filename):
764 result = GrammarFile() 765 result.apply_file(filename) 766 return result
767 read_file = staticmethod(read_file)
768 769
770 -def demo():
771 from nltk_lite.parse import category, GrammarFile 772 print "Category(pos='n', agr=Category(number='pl', gender='f')):" 773 print 774 print category.Category(pos='n', agr=Category(number='pl', gender='f')) 775 print 776 print "GrammarCategory.parse('VP[+fin]/NP[+pl]'):" 777 print 778 print category.GrammarCategory.parse('VP[+fin]/NP[+pl]') 779 print 780 g = GrammarFile.read_file("test.cfg") 781 print g.grammar()
782 783 if __name__ == '__main__': 784 demo() 785