1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 import string, re
52 from Bio import Seq, Alphabet
53
54
55
56
57 _prosite_trans = string.maketrans("abcdefghijklmnopqrstuvwxyzX}()<>",
58 "ABCDEFGHIJKLMNOPQRSTUVW.YZ.]{}^$")
59
60
61
63 """convert a valid Prosite pattern into an re string"""
64 flg = (pattern[:2] == "[<")
65 s = string.replace(pattern, "{", "[^")
66 s = string.translate(s, _prosite_trans, "-.")
67
68 if flg:
69 i = string.index(s, "]")
70 s = "(?:^|[" + s[2:i] + "])" + s[i+1:]
71 if s[-2:] == "$]":
72 i = string.rindex(s, "[")
73 s = s[:i] + "(?:" + s[i:-2] + "]|$)"
74 elif s[-3:] == "$]$":
75 i = string.rindex(s, "[")
76 s = s[:i] + "(?:" + s[i:-3] + "]|$)$"
77 return s
78
79
80
81
83 """convert a valid Prosite pattern into an re with groups for each term"""
84 flg = (pattern[:2] == "[<")
85 s = string.replace(pattern, "{", "[^")
86
87 s = string.translate(s, _prosite_trans, ".")
88
89
90 if flg:
91 i = string.index(s, "]")
92 s = "(?:^|[" + s[2:i] + "])" + s[i+1:]
93 if s[-2:] == "$]":
94 i = string.rindex(s, "[")
95 s = s[:i] + "(?:" + s[i:-2] + "]|$)"
96 if s[-3:] == "$]$":
97 i = string.rindex(s, "[")
98 s = s[:i] + "(?:" + s[i:-3] + "]|$)$"
99
100
101 if s[:1] == "^":
102 s = "^(" + s[1:]
103 else:
104 s = "(" + s
105 if s[-1:] == "$":
106 s = s[:-1] + ")$"
107 else:
108 s = s + ")"
109
110 return string.replace(s, "-", ")(")
111
112
113
114
117 prosite_alphabet = PrositeAlphabet()
118
123
125 alphabet = prosite_alphabet
126
127
128
129
130 - def __init__(self, pattern = None, data = None):
138
140 return "Prosite(%s)" % repr(str(self))
142 return string.join(map(str, self.data), "-") + "."
163
166
167 - def search(self, seq, pos=0, endpos=None):
168 if endpos is not None:
169 m = self.grouped_re.search(buffer(seq.data), pos, endpos)
170 else:
171 m = self.grouped_re.search(buffer(seq.data), pos)
172 if m is None:
173 return None
174 return PrositeMatch(self, seq, m)
175 - def match(self, seq, pos=0, endpos=None):
176 if endpos is not None:
177 m = self.grouped_re.match(buffer(seq.data), pos, endpos)
178 else:
179 m = self.grouped_re.match(buffer(seq.data), pos)
180 if m is None:
181 return None
182 return PrositeMatch(self, seq, m)
183
184
185
186
187
188
189
191 - def __init__(self, letters, ignore, is_begin, is_end, \
192 min_count, max_count, can_begin, can_end):
193 self.letters = letters
194 self.ignore = ignore
195 self.is_begin = is_begin
196 self.is_end = is_end
197 self.min_count = min_count
198 self.max_count = max_count
199 self.can_begin = can_begin
200 self.can_end = can_end
202 return PrositeTerm(self.letters, self.ignore, self.is_begin,
203 self.is_end, self.min_count, self.max_count,
204 self.can_begin, self.can_end)
206
207 s = self.base_str()
208
209 if self.min_count == self.max_count:
210 if self.min_count == 1:
211 pass
212 else:
213 s = s + "(%d)" % self.min_count
214 else:
215 s = s + "(%d,%d)" % (self.min_count, self.max_count)
216 if self.is_end:
217 s = s + ">"
218 return s
219
221
222
223
224 if self.is_begin:
225 s = "<"
226 else:
227 s = ""
228 if self.ignore:
229 s = s + "{" + self.letters + "}"
230 elif len(self.letters) == 1 and \
231 (not self.can_begin and not self.can_end):
232 s = s + self.letters
233 else:
234 s = s + "["
235 if self.can_begin:
236 s = s + "<"
237 s = s + self.letters
238 if self.can_end:
239 s = s + ">"
240 s = s + "]"
241 return s
242
243
244
245
247 - def __init__(self, prosite, seq, match):
257
259
260 return "<PrositeMatch instance at %x>" % id(self)
262 return str(self.data)
268
270 """return a list of numbers mapping to items of the original pattern
271
272 For example, if the Prosite pattern is "[AP](2)-D." matched against
273 "PAD", then the mapping is [1, 1, 2], meaning the first character
274 of the match ("P") is from the first Prosite group ("[AP]"), as
275 is the second letter ("A"). The 3rd letter ("D") is mapped to
276 group 2 of the pattern.
277 """
278
279 vals = []
280 i = 0
281 start = self.start(0)
282 try:
283 while 1:
284 end = self.match.end(i+1)
285 while start < end:
286 vals.append(i)
287 start = start + 1
288 i = i + 1
289 except IndexError:
290 pass
291 return vals
292
294 """returns the specific Prosite pattern used to find this sequence
295
296 >>> p = Prosite.compile("[AP](2,3)-D.")
297 >>> m = p.search(Seq.Seq("PAD"))
298 >>> mapping = m.mapping()
299 >>> mapped = m.mapped_pattern()
300 >>> print str(m[1]), str(p[mapping[1]]), str(mapped[1])
301 P [AP](2,3) [AP]
302 >>> print str(mapped)
303 [AP]-[AP]-D.
304 >>>
305
306 Note that the original term includes the count, while the
307 mapped pattern does the expansion.
308
309 """
310 return pattern_mapping(self.prosite, self.mapping())
311
314 - def end(self, g=0):
318 - def groups(self, default=None):
324 - def group(self, *groups):
334
342
343 prosite_term_re = re.compile(r"""
344 (?:
345 ([ABCDEFGHIKLMNPQRSTVWXYZx])| # a character OR
346 \[(<?)([ABCDEFGHIKLMNPQRSTVWXYZ]+)(>?)\]| # something in []s OR
347 \{([ABCDEFGHIKLMNPQRSTVWXYZ]+)\} # something in {}s
348 )(?:\((\d+)(,\d+)?\))? # optional count of the form "(i,j)", ",j" optional
349 $
350 """, re.VERBOSE)
351
352
353
355 if pattern[-1:] != ".":
356 raise TypeError("not a prosite pattern - needs a final '.'")
357 pattern = pattern[:-1]
358 terms = string.split(pattern, "-")
359 result = []
360 i = 0
361 for term in terms:
362 can_begin = can_end = 0
363
364 if term[:1] == "<":
365 term = term[1:]
366 is_begin = 1
367 else:
368 is_begin = 0
369
370
371 if term[-1:] == ">":
372 term = term[:-1]
373 is_end = 1
374 else:
375 is_end = 0
376
377 match = prosite_term_re.match(term)
378 if match is None:
379 raise TypeError("not a Prosite term (%s)" % repr(term))
380 if match.group(1) is not None:
381
382 ignore = 0
383 letters = match.group(1)
384 elif match.group(3) is not None:
385
386 ignore = 0
387 letters = match.group(3)
388 if match.group(2):
389 can_begin = 1
390 if i != 0:
391 raise TypeError("[<] only allowed for first term (%s)" \
392 % repr(term))
393
394 if match.group(4):
395 can_end = 1
396 if i != len(terms) - 1:
397 raise TypeError("[>] only allowed for last term (%s)" \
398 % repr(term))
399
400 elif match.group(5) is not None:
401
402 ignore = 1
403 letters = match.group(5)
404 else:
405 raise TypeError("not a prosite term (%s)" % repr(term))
406
407 if match.group(6) is not None:
408
409 min_count = int(match.group(6))
410 else:
411
412 min_count = 1
413 if match.group(7) is not None:
414
415 max_count = int(match.group(7)[1:])
416 else:
417
418 max_count = min_count
419
420 result.append(PrositeTerm(letters, ignore, is_begin,
421 is_end, min_count, max_count,
422 can_begin, can_end))
423
424 i = i + 1
425 return result
426
427
428
429
430 prosite_re = re.compile(r"""
431 ^<? # starts with an optional "<"
432 (
433 [ABCDEFGHIKLMNPQRSTVWXYZx]| # a character OR
434 (\[<?[ABCDEFGHIKLMNPQRSTVWXYZ]+>?\])| # something in []s OR
435 \{[ABCDEFGHIKLMNPQRSTVWXYZ]+\} # something in {}s
436 )(\(\d+(,\d+)?\))? # optional count of the form "(i,j)" (",j" is optional)
437 (- # new terms seperated by a '-'
438 (
439 [ABCDEFGHIKLMNPQRSTVWXYZx]| # a character OR
440 \[[ABCDEFGHIKLMNPQRSTVWXYZ]+>?\]| # something in []s OR
441 \{[ABCDEFGHIKLMNPQRSTVWXYZ]+\} # something in {}s
442 )(\(\d+(,\d+)?\))? # optional count
443 )* # repeat until done
444 >? # pattern ends with an optional ">"
445 \.$ # description ends with a required "."
446 """, re.VERBOSE)
447
448
450 """returns 1 if the Prosite pattern is syntactically correct, else 0"""
451 x = prosite_re.match(pattern)
452 if x is None:
453 return 0
454
455 if string.find(pattern, "[<", 1) != -1:
456 return 0
457 if string.find(pattern, ">]", 0, len(pattern)-2) != -1:
458 return 0
459 return 1
460
487
488
489
490
491
492
493
494