1
2
3
4 """Converts a regular expression pattern string into an Expression tree.
5
6 This is not meant to be an externally usable module.
7
8 This works by using msre_parse.py to parse the pattern. The result is
9 a tree data structure, where the nodes in the tree are tuples. The
10 first element of the tuple is the name of the node type. The format
11 of the other elements depends on the type.
12
13 The conversion routine is pretty simple - convert each msre_parse tuple
14 node into a Martel Expression node. It's a recusive implementation.
15
16 'msre_parse.py' is a modified version of Secret Labs' 'sre_parse.py'
17
18 """
19
20 import string
21 import msre_parse, Expression
22
23
24
25
26
27
28
31 self.flags = 0
32 self.open = []
33 self.groups = 1
34 self.groupdict = {}
45 return gid < self.groups and gid not in self.open
46
48 """group number -> group name, or None if there is no name"""
49 for key, val in self.groupdict.items():
50 if id in val:
51 return key
52
53
54
55
56
59
60
63
64
65
67 if where == "at_beginning":
68 return Expression.AtBeginning()
69 elif where == "at_end":
70 return Expression.AtEnd()
71 raise AssertionError("Unknown at name: %s" % repr(where))
72
73
75 assert ignore is None, "what does it mean when the field is '%s'?" % ignore
76 return Expression.Dot()
77
78
82
83
87
88
89
91 assert ignore is None, "what is %s?" % repr(ignore)
92 results = []
93 for branch in branches:
94 results.append(convert_list(group_names, branch))
95 if len(results) == 1:
96 return results[0]
97 return Expression.Alt(tuple(results))
98
99
101 """s -> a string containing all the characters not present in s"""
102 letters = []
103 if not(isinstance(s, type(""))):
104 s = str(s)
105 for c in map(chr, range(256)):
106 if c not in s:
107 letters.append(c)
108 return string.join(letters, "")
109
110
111
112 categories = {
113 "category_word": string.letters + "0123456789_",
114 "category_digit": string.digits,
115 "category_space": "\t\n\v\f\r ",
116 "category_newline": "\n\r",
117
118 "category_not_word": invert(string.letters + "0123456789_"),
119 "category_not_digit": invert(string.digits),
120 "category_not_space": invert("\t\n\v\f\r "),
121 }
122
123
124
126 negate = (terms[0][0] == 'negate')
127 s = ""
128 for c in terms[negate:]:
129 if c[0] == 'literal':
130 s = s + chr(c[1])
131 elif c[0] == 'range':
132 for i in range(c[1][0], c[1][1]+1):
133 s = s + chr(i)
134 elif c[0] == 'category':
135 s = s + categories[c[1]]
136 else:
137 raise AssertionError("unknown option for 'in': %s" % c[0])
138 return Expression.Any(s, negate)
139
140
141
143 pattern_name = group_names.reverse_name(id)
144
145
146
147 pos = -1
148 attrs = {}
149 if pattern_name is not None:
150 pos = string.find(pattern_name, "?")
151
152 if pos != -1:
153 import cgi
154 qs = pattern_name[pos+1:]
155 if not qs:
156
157 attrs = {}
158 else:
159 attrs = cgi.parse_qs(pattern_name[pos+1:],
160 keep_blank_values = 1,
161 strict_parsing = 1)
162 pattern_name = pattern_name[:pos]
163
164 for k, v in attrs.items():
165 if len(v) != 1:
166 raise AssertionError(
167 "The attribute name %s was found more than once (%d times) in the tag %s" %
168 (repr(k), len(v), repr(pattern_name)))
169
170 attrs[k] = v[0]
171
172 return Expression.Group(pattern_name, convert_list(group_names, terms),
173 attrs)
174
175
177 assert ignore is None, "what does it mean when field is %s?" % `ignore`
178 return Expression.AnyEol()
179
180
184
185
193
194
195
196
197 converter_table = {
198 "any": convert_any,
199 "assert": convert_assert,
200 "assert_not": convert_assert_not,
201 "at": convert_at,
202 "branch": convert_branch,
203 "groupref": convert_groupref,
204 "in": convert_in,
205 "literal": convert_literal,
206 "max_repeat": convert_max_repeat,
207 "newline": convert_newline,
208 "not_literal": convert_not_literal,
209 "subpattern": convert_subpattern,
210 }
211
212
214
215 results = []
216 for term in terms:
217 name = term[0]
218 try:
219 func = converter_table[name]
220 except KeyError:
221 raise AssertionError, "Do not understand sre expression %s" % \
222 repr(name)
223
224 results.append( func(*(group_names,) + term) )
225 if len(results) == 1:
226 return results[0]
227 return Expression.Seq(tuple(results))
228
229
230
242