Package Martel :: Module msre_parse
[hide private]
[frames] | no frames]

Source Code for Module Martel.msre_parse

  1  # 
  2  # Secret Labs' Regular Expression Engine 
  3  # 
  4  # convert re-style regular expression to sre pattern 
  5  # 
  6  # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved. 
  7  # 
  8  # See the sre.py file in Python 2.1 for information on usage and 
  9  # redistribution. 
 10  # 
 11  # Changes for Martel Copyright 2000-2001 by Dalke Scientific Software, LLC 
 12  # Distributed under the Biopython License Agreement (see the LICENSE file). 
 13   
 14  # XXX: show string offset and offending character for all errors 
 15   
 16  # this module works under 1.5.2 and later.  don't use string methods 
 17  import string, sys 
 18   
 19  from msre_constants import *  # Modified version of Secret Labs' sre_constants 
 20  import re  # needed to verify the attr format 
 21   
 22  SPECIAL_CHARS = ".\\[{()*+?^$|" 
 23  REPEAT_CHARS = "*+?{" 
 24   
 25  DIGITS = tuple("0123456789") 
 26   
 27  OCTDIGITS = tuple("01234567") 
 28  HEXDIGITS = tuple("0123456789abcdefABCDEF") 
 29   
 30  WHITESPACE = tuple(" \t\n\r\v\f") 
 31   
 32  ESCAPES = { 
 33      r"\a": (LITERAL, ord("\a")), 
 34      r"\b": (LITERAL, ord("\b")), 
 35      r"\f": (LITERAL, ord("\f")), 
 36      r"\n": (LITERAL, ord("\n")), 
 37      r"\r": (LITERAL, ord("\r")), 
 38      r"\R": (IN, [(CATEGORY, CATEGORY_NEWLINE)]),  # Martel extension 
 39      r"\t": (LITERAL, ord("\t")), 
 40      r"\v": (LITERAL, ord("\v")), 
 41      r"\\": (LITERAL, ord("\\")) 
 42  } 
 43   
 44  CATEGORIES = { 
 45      r"\A": (AT, AT_BEGINNING_STRING), # start of string 
 46      r"\b": (AT, AT_BOUNDARY), 
 47      r"\B": (AT, AT_NON_BOUNDARY), 
 48      r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), 
 49      r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), 
 50      r"\R": (NEWLINE, None),  # Martel extension 
 51      r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), 
 52      r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), 
 53      r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), 
 54      r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), 
 55      r"\Z": (AT, AT_END_STRING), # end of string 
 56  } 
 57   
 58  FLAGS = { 
 59      # standard flags 
 60      "i": SRE_FLAG_IGNORECASE, 
 61      "L": SRE_FLAG_LOCALE, 
 62      "m": SRE_FLAG_MULTILINE, 
 63      "s": SRE_FLAG_DOTALL, 
 64      "x": SRE_FLAG_VERBOSE, 
 65      # extensions 
 66      "t": SRE_FLAG_TEMPLATE, 
 67      "u": SRE_FLAG_UNICODE, 
 68  } 
 69   
 70  # figure out best way to convert hex/octal numbers to integers 
 71  try: 
 72      int("10", 8) 
 73      atoi = int # 2.0 and later 
 74  except TypeError: 
 75      atoi = string.atoi # 1.5.2 
 76   
77 -class Pattern:
78 # master pattern object. keeps track of global attributes
79 - def __init__(self):
80 self.flags = 0 81 self.open = [] 82 self.groups = 1 83 self.groupdict = {}
84 - def opengroup(self, name=None):
85 gid = self.groups 86 self.groups = gid + 1 87 if name: 88 self.groupdict[name] = gid 89 self.open.append(gid) 90 return gid
91 - def closegroup(self, gid):
92 self.open.remove(gid)
93 - def checkgroup(self, gid):
94 return gid < self.groups and gid not in self.open
95
96 -class SubPattern:
97 # a subpattern, in intermediate form
98 - def __init__(self, pattern, data=None):
99 self.pattern = pattern 100 if not data: 101 data = [] 102 self.data = data 103 self.width = None
104 - def dump(self, level=0):
105 nl = 1 106 for op, av in self.data: 107 print level*" " + op,; nl = 0 108 if op == "in": 109 # member sublanguage 110 print; nl = 1 111 for op, a in av: 112 print (level+1)*" " + op, a 113 elif op == "branch": 114 print; nl = 1 115 i = 0 116 for a in av[1]: 117 if i > 0: 118 print level*" " + "or" 119 a.dump(level+1); nl = 1 120 i = i + 1 121 elif type(av) in (type(()), type([])): 122 for a in av: 123 if isinstance(a, SubPattern): 124 if not nl: print 125 a.dump(level+1); nl = 1 126 else: 127 print a, ; nl = 0 128 else: 129 print av, ; nl = 0 130 if not nl: print
131 - def __repr__(self):
132 return repr(self.data)
133 - def __len__(self):
134 return len(self.data)
135 - def __delitem__(self, index):
136 del self.data[index]
137 - def __getitem__(self, index):
138 return self.data[index]
139 - def __setitem__(self, index, code):
140 self.data[index] = code
141 - def __getslice__(self, start, stop):
142 return SubPattern(self.pattern, self.data[start:stop])
143 - def insert(self, index, code):
144 self.data.insert(index, code)
145 - def append(self, code):
146 self.data.append(code)
147 - def getwidth(self):
148 # determine the width (min, max) for this subpattern 149 if self.width: 150 return self.width 151 lo = hi = 0L 152 for op, av in self.data: 153 if op is BRANCH: 154 i = sys.maxint 155 j = 0 156 for av in av[1]: 157 l, h = av.getwidth() 158 i = min(i, l) 159 j = max(j, h) 160 lo = lo + i 161 hi = hi + j 162 elif op is CALL: 163 i, j = av.getwidth() 164 lo = lo + i 165 hi = hi + j 166 elif op is SUBPATTERN: 167 i, j = av[1].getwidth() 168 lo = lo + i 169 hi = hi + j 170 elif op in (MIN_REPEAT, MAX_REPEAT): 171 i, j = av[2].getwidth() 172 lo = lo + long(i) * av[0] 173 hi = hi + long(j) * av[1] 174 elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY): 175 lo = lo + 1 176 hi = hi + 1 177 elif op == SUCCESS: 178 break 179 self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint)) 180 return self.width
181
182 -class Tokenizer:
183 - def __init__(self, string):
184 self.string = string 185 self.index = 0 186 self.__next()
187 - def __next(self):
188 if self.index >= len(self.string): 189 self.next = None 190 return 191 char = self.string[self.index] 192 if char[0] == "\\": 193 try: 194 c = self.string[self.index + 1] 195 except IndexError: 196 raise error, "bogus escape" 197 char = char + c 198 self.index = self.index + len(char) 199 self.next = char
200 - def match(self, char, skip=1):
201 if char == self.next: 202 if skip: 203 self.__next() 204 return 1 205 return 0
206 - def get(self):
207 this = self.next 208 self.__next() 209 return this
210 - def tell(self):
211 return self.index, self.next
212 - def seek(self, index):
213 self.index, self.next = index
214 215 ### Martel changes to allow all XML identifiers 216 # '[a-zA-Z_:][-a-zA-Z0-9._:]*' 217 218 # Any (optional) attrs are stored after a '?' 219 # key/values are done in URL-style 220 # key1=value1&key2=value2; 221 # only [-a-zA-Z0-9._] are allowed to be unescaped 222 # escaping is done with URL-style hex escapes, so '=' becomes '%3D' 223 # For example: 224 # seqdb format="swissprot" version="38" 225 # can be represented as 226 # seqdb?format=swissprot&version=38 227 228 229 # Martel specific changes
230 -def is_firstchar(char):
231 return "a" <= char <= "z" or "A" <= char <= "Z" or char in "_:"
232 233 # Martel specific changes
234 -def is_char(char):
235 return "a" <= char <= "z" or "A" <= char <= "Z" or \ 236 "0" <= char <= "9" or char in "._:-"
237 238 # Martel specific changes
239 -def isname(name):
240 # check that group name is a valid string 241 if not name: 242 return 0 243 if not is_firstchar(name[0]): 244 return 0 245 for char in name: 246 if not is_char(char): 247 return 0 248 return 1
249 250 # More Martel specific changes 251 # Checks if the serialized form of attrs is correct 252 _name_with_attr_pattern = re.compile(r""" 253 [a-zA-Z_:] # first character of the tag 254 [-a-zA-Z0-9._:]* # rest of the tag 255 (\? # optional attrs flagged with '?' 256 ( 257 ([-a-zA-Z0-9._]|(%[0-9A-Fa-f]{2}))+ # name can contain % escapes 258 = # '=' flags value 259 ([-a-zA-Z0-9._]|(%[0-9A-Fa-f]{2}))* # value can contain % escapes 260 (& # flag for additional args 261 ([-a-zA-Z0-9._]|(%[0-9A-Fa-f]{2}))+ # name 262 = # '=' 263 ([-a-zA-Z0-9._]|(%[0-9A-Fa-f]{2}))* # value 264 )* # 0 or more add'l args 265 )? # can have nothing after the '?' 266 )? # attrs are optional 267 $ # must get full string 268 """, re.X) 269
270 -def isname_with_attrs(name):
271 # check that group name is a valid string 272 if not name: 273 return 0 274 m = _name_with_attr_pattern.match(name) 275 return m is not None
276 277 278 ### End of Martel changes 279
280 -def _group(escape, groups):
281 # check if the escape string represents a valid group 282 try: 283 gid = atoi(escape[1:]) 284 if gid and gid < groups: 285 return gid 286 except ValueError: 287 pass 288 return None # not a valid group
289
290 -def _class_escape(source, escape):
291 # handle escape code inside character class 292 code = ESCAPES.get(escape) 293 if code: 294 return code 295 code = CATEGORIES.get(escape) 296 if code: 297 return code 298 try: 299 if escape[1:2] == "x": 300 # hexadecimal escape (exactly two digits) 301 while source.next in HEXDIGITS and len(escape) < 4: 302 escape = escape + source.get() 303 escape = escape[2:] 304 if len(escape) != 2: 305 raise error, "bogus escape: %s" % repr("\\" + escape) 306 return LITERAL, atoi(escape, 16) & 0xff 307 elif str(escape[1:2]) in OCTDIGITS: 308 # octal escape (up to three digits) 309 while source.next in OCTDIGITS and len(escape) < 5: 310 escape = escape + source.get() 311 escape = escape[1:] 312 return LITERAL, atoi(escape, 8) & 0xff 313 if len(escape) == 2: 314 return LITERAL, ord(escape[1]) 315 except ValueError: 316 pass 317 raise error, "bogus escape: %s" % repr(escape)
318
319 -def _escape(source, escape, state):
320 # handle escape code in expression 321 code = CATEGORIES.get(escape) 322 if code: 323 return code 324 code = ESCAPES.get(escape) 325 if code: 326 return code 327 try: 328 if escape[1:2] == "x": 329 # hexadecimal escape 330 while source.next in HEXDIGITS and len(escape) < 4: 331 escape = escape + source.get() 332 if len(escape) != 4: 333 raise ValueError 334 return LITERAL, atoi(escape[2:], 16) & 0xff 335 elif escape[1:2] == "0": 336 # octal escape 337 while source.next in OCTDIGITS and len(escape) < 4: 338 escape = escape + source.get() 339 return LITERAL, atoi(escape[1:], 8) & 0xff 340 elif escape[1:2] in DIGITS: 341 # octal escape *or* decimal group reference (sigh) 342 here = source.tell() 343 if source.next in DIGITS: 344 escape = escape + source.get() 345 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and 346 source.next in OCTDIGITS): 347 # got three octal digits; this is an octal escape 348 escape = escape + source.get() 349 return LITERAL, atoi(escape[1:], 8) & 0xff 350 # got at least one decimal digit; this is a group reference 351 group = _group(escape, state.groups) 352 if group: 353 if not state.checkgroup(group): 354 raise error, "cannot refer to open group" 355 return GROUPREF, group 356 raise ValueError 357 if len(escape) == 2: 358 return LITERAL, ord(escape[1]) 359 except ValueError: 360 pass 361 raise error, "bogus escape: %s" % repr(escape)
362
363 -def _parse_sub(source, state, nested=1):
364 # parse an alternation: a|b|c 365 366 items = [] 367 while 1: 368 items.append(_parse(source, state)) 369 if source.match("|"): 370 continue 371 if not nested: 372 break 373 if not source.next or source.match(")", 0): 374 break 375 else: 376 raise error, "pattern not properly closed" 377 378 if len(items) == 1: 379 return items[0] 380 381 subpattern = SubPattern(state) 382 383 # check if all items share a common prefix 384 while 1: 385 prefix = None 386 for item in items: 387 if not item: 388 break 389 if prefix is None: 390 prefix = item[0] 391 elif item[0] != prefix: 392 break 393 else: 394 # all subitems start with a common "prefix". 395 # move it out of the branch 396 for item in items: 397 del item[0] 398 subpattern.append(prefix) 399 continue # check next one 400 break 401 402 # check if the branch can be replaced by a character set 403 for item in items: 404 if len(item) != 1 or item[0][0] != LITERAL: 405 break 406 else: 407 # we can store this as a character set instead of a 408 # branch (the compiler may optimize this even more) 409 set = [] 410 for item in items: 411 set.append(item[0]) 412 subpattern.append((IN, set)) 413 return subpattern 414 415 subpattern.append((BRANCH, (None, items))) 416 return subpattern
417
418 -def _parse(source, state):
419 # parse a simple pattern 420 421 subpattern = SubPattern(state) 422 423 while 1: 424 425 if source.next in ("|", ")"): 426 break # end of subpattern 427 this = source.get() 428 if this is None: 429 break # end of pattern 430 431 if state.flags & SRE_FLAG_VERBOSE: 432 # skip whitespace and comments 433 if this in WHITESPACE: 434 continue 435 if this == "#": 436 while 1: 437 this = source.get() 438 if this in (None, "\n"): 439 break 440 continue 441 442 if this and this[0] not in SPECIAL_CHARS: 443 subpattern.append((LITERAL, ord(this))) 444 445 elif this == "[": 446 # character set 447 set = [] 448 ## if source.match(":"): 449 ## pass # handle character classes 450 if source.match("^"): 451 set.append((NEGATE, None)) 452 # check remaining characters 453 start = set[:] 454 while 1: 455 this = source.get() 456 if this == "]" and set != start: 457 break 458 elif this and this[0] == "\\": 459 code1 = _class_escape(source, this) 460 elif this: 461 code1 = LITERAL, ord(this) 462 else: 463 raise error, "unexpected end of regular expression" 464 if source.match("-"): 465 # potential range 466 this = source.get() 467 if this == "]": 468 if code1[0] is IN: 469 code1 = code1[1][0] 470 set.append(code1) 471 set.append((LITERAL, ord("-"))) 472 break 473 else: 474 if this[0] == "\\": 475 code2 = _class_escape(source, this) 476 else: 477 code2 = LITERAL, ord(this) 478 if code1[0] != LITERAL or code2[0] != LITERAL: 479 raise error, "bad character range" 480 lo = code1[1] 481 hi = code2[1] 482 if hi < lo: 483 raise error, "bad character range" 484 set.append((RANGE, (lo, hi))) 485 else: 486 if code1[0] is IN: 487 code1 = code1[1][0] 488 set.append(code1) 489 490 # XXX: <fl> should move set optimization to compiler! 491 if len(set)==1 and set[0][0] is LITERAL: 492 subpattern.append(set[0]) # optimization 493 elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: 494 subpattern.append((NOT_LITERAL, set[1][1])) # optimization 495 else: 496 # XXX: <fl> should add charmap optimization here 497 subpattern.append((IN, set)) 498 499 elif this and this[0] in REPEAT_CHARS: 500 # repeat previous item 501 if this == "?": 502 min, max = 0, 1 503 elif this == "*": 504 min, max = 0, MAXREPEAT 505 506 elif this == "+": 507 min, max = 1, MAXREPEAT 508 elif this == "{": 509 here = source.tell() 510 min, max = 0, MAXREPEAT 511 ### Martel changes to allow a named reference for the repeat count 512 # APD -- 21 Aug 2000 513 ## lo = hi = "" 514 ## while source.next in DIGITS: 515 ## lo = lo + source.get() 516 ## if source.match(","): 517 ## while source.next in DIGITS: 518 ## hi = hi + source.get() 519 ## else: 520 ## hi = lo 521 ## if not source.match("}"): 522 ## subpattern.append((LITERAL, ord(this))) 523 ## source.seek(here) 524 ## continue 525 ## if lo: 526 ## min = atoi(lo) 527 ## if hi: 528 ## max = atoi(hi) 529 lo = hi = "" 530 while source.next in DIGITS or \ 531 isname(lo + source.next): 532 lo = lo + source.get() 533 if source.match(","): 534 while source.next in DIGITS or \ 535 isname(hi + source.next): 536 hi = hi + source.get() 537 else: 538 hi = lo 539 if not source.match("}"): 540 subpattern.append((LITERAL, ord(this))) 541 source.seek(here) 542 continue 543 if lo: 544 if is_firstchar(lo[:1]): 545 min = lo 546 else: 547 min = atoi(lo) 548 if hi: 549 if is_firstchar(hi[:1]): 550 max = hi 551 else: 552 max = atoi(hi) 553 554 if type(lo) == type(hi) == type(0): 555 if max < min: 556 raise error, "bad repeat interval" 557 ### End of Martel named group repeat changes 558 else: 559 raise error, "not supported" 560 # figure out which item to repeat 561 if subpattern: 562 item = subpattern[-1:] 563 else: 564 item = None 565 if not item or (len(item) == 1 and item[0][0] == AT): 566 raise error, "nothing to repeat" 567 if item[0][0] in (MIN_REPEAT, MAX_REPEAT): 568 raise error, "multiple repeat" 569 if source.match("?"): 570 subpattern[-1] = (MIN_REPEAT, (min, max, item)) 571 else: 572 subpattern[-1] = (MAX_REPEAT, (min, max, item)) 573 574 elif this == ".": 575 subpattern.append((ANY, None)) 576 577 elif this == "(": 578 group = 1 579 name = None 580 if source.match("?"): 581 group = 0 582 # options 583 if source.match("P"): 584 # python extensions 585 if source.match("<"): 586 # named group: skip forward to end of name 587 name = "" 588 while 1: 589 char = source.get() 590 if char is None: 591 raise error, "unterminated name" 592 if char == ">": 593 break 594 name = name + char 595 group = 1 596 if not isname_with_attrs(name): 597 raise error, "bad character in group name" 598 elif source.match("="): 599 # named backreference 600 name = "" 601 while 1: 602 char = source.get() 603 if char is None: 604 raise error, "unterminated name" 605 if char == ")": 606 break 607 name = name + char 608 if not isname(name): 609 raise error, "bad character in group name" 610 gid = state.groupdict.get(name) 611 if gid is None: 612 raise error, "unknown group name" 613 subpattern.append((GROUPREF, gid)) 614 continue 615 else: 616 char = source.get() 617 if char is None: 618 raise error, "unexpected end of pattern" 619 raise error, "unknown specifier: ?P%s" % char 620 elif source.match(":"): 621 # non-capturing group 622 group = 2 623 elif source.match("#"): 624 # comment 625 while 1: 626 if source.next is None or source.next == ")": 627 break 628 source.get() 629 if not source.match(")"): 630 raise error, "unbalanced parenthesis" 631 continue 632 elif source.next in ("=", "!", "<"): 633 # lookahead assertions 634 char = source.get() 635 dir = 1 636 if char == "<": 637 if source.next not in ("=", "!"): 638 raise error, "syntax error" 639 dir = -1 # lookbehind 640 char = source.get() 641 p = _parse_sub(source, state) 642 if not source.match(")"): 643 raise error, "unbalanced parenthesis" 644 if char == "=": 645 subpattern.append((ASSERT, (dir, p))) 646 else: 647 subpattern.append((ASSERT_NOT, (dir, p))) 648 continue 649 else: 650 # flags 651 if not FLAGS.has_key(source.next): 652 raise error, "unexpected end of pattern" 653 while FLAGS.has_key(source.next): 654 state.flags = state.flags | FLAGS[source.get()] 655 if group: 656 # parse group contents 657 if group == 2: 658 # anonymous group 659 group = None 660 else: 661 group = state.opengroup(name) 662 p = _parse_sub(source, state) 663 if not source.match(")"): 664 raise error, "unbalanced parenthesis" 665 if group is not None: 666 state.closegroup(group) 667 subpattern.append((SUBPATTERN, (group, p))) 668 else: 669 while 1: 670 char = source.get() 671 if char is None: 672 raise error, "unexpected end of pattern" 673 if char == ")": 674 break 675 raise error, "unknown extension" 676 677 elif this == "^": 678 subpattern.append((AT, AT_BEGINNING)) 679 680 elif this == "$": 681 subpattern.append((AT, AT_END)) 682 683 elif this and this[0] == "\\": 684 code = _escape(source, this, state) 685 subpattern.append(code) 686 687 else: 688 raise error, "parser error" 689 690 return subpattern
691
692 -def parse(str, flags=0, pattern=None):
693 # parse 're' pattern into list of (opcode, argument) tuples 694 695 source = Tokenizer(str) 696 697 if pattern is None: 698 pattern = Pattern() 699 pattern.flags = flags 700 pattern.str = str 701 702 p = _parse_sub(source, pattern, 0) 703 704 tail = source.get() 705 if tail == ")": 706 raise error, "unbalanced parenthesis" 707 elif tail: 708 raise error, "bogus characters at end of regular expression" 709 710 if flags & SRE_FLAG_DEBUG: 711 p.dump() 712 713 if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: 714 # the VERBOSE flag was switched on inside the pattern. to be 715 # on the safe side, we'll parse the whole thing again... 716 return parse(str, p.pattern.flags) 717 718 return p
719
720 -def parse_template(source, pattern):
721 # parse 're' replacement string into list of literals and 722 # group references 723 s = Tokenizer(source) 724 p = [] 725 a = p.append 726 def literal(literal, p=p): 727 if p and p[-1][0] is LITERAL: 728 p[-1] = LITERAL, p[-1][1] + literal 729 else: 730 p.append((LITERAL, literal))
731 sep = source[:0] 732 if type(sep) is type(""): 733 char = chr 734 else: 735 char = unichr 736 while 1: 737 this = s.get() 738 if this is None: 739 break # end of replacement string 740 if this and this[0] == "\\": 741 # group 742 if this == "\\g": 743 name = "" 744 if s.match("<"): 745 while 1: 746 char = s.get() 747 if char is None: 748 raise error, "unterminated group name" 749 if char == ">": 750 break 751 name = name + char 752 if not name: 753 raise error, "bad group name" 754 try: 755 index = atoi(name) 756 except ValueError: 757 if not isname(name): 758 raise error, "bad character in group name" 759 try: 760 index = pattern.groupindex[name] 761 except KeyError: 762 raise IndexError, "unknown group name" 763 a((MARK, index)) 764 elif len(this) > 1 and this[1] in DIGITS: 765 code = None 766 while 1: 767 group = _group(this, pattern.groups+1) 768 if group: 769 if (s.next not in DIGITS or 770 not _group(this + s.next, pattern.groups+1)): 771 code = MARK, group 772 break 773 elif s.next in OCTDIGITS: 774 this = this + s.get() 775 else: 776 break 777 if not code: 778 this = this[1:] 779 code = LITERAL, char(atoi(this[-6:], 8) & 0xff) 780 if code[0] is LITERAL: 781 literal(code[1]) 782 else: 783 a(code) 784 else: 785 try: 786 this = char(ESCAPES[this][1]) 787 except KeyError: 788 pass 789 literal(this) 790 else: 791 literal(this) 792 # convert template to groups and literals lists 793 i = 0 794 groups = [] 795 literals = [] 796 for c, s in p: 797 if c is MARK: 798 groups.append((i, s)) 799 literals.append(None) 800 else: 801 literals.append(s) 802 i = i + 1 803 return groups, literals 804
805 -def expand_template(template, match):
806 g = match.group 807 sep = match.string[:0] 808 groups, literals = template 809 literals = literals[:] 810 try: 811 for index, group in groups: 812 literals[index] = s = g(group) 813 if s is None: 814 raise IndexError 815 except IndexError: 816 raise error, "empty group" 817 return string.join(literals, sep)
818