1
2
3
4
5
6 """
7 This module is OBSOLETE.
8 Most of the functionality in this module has moved to Bio.ExPASy.Prodoc;
9 please see
10
11 Bio.ExPASy.Prodoc.read To read a Prodoc file containing one entry.
12 Bio.ExPASy.Prodoc.parse Iterates over entries in a Prodoc file.
13 Bio.ExPASy.Prodoc.Record Holds Prodoc data.
14 Bio.ExPASy.Prodoc.Reference Holds data from a Prodoc reference.
15
16 The other functions and classes in Bio.Prosite.Prodoc (including
17 Bio.Prosite.Prodoc.index_file and Bio.Prosite.Prodoc.Dictionary) are
18 considered deprecated, and were not moved to Bio.ExPASy.Prodoc. If you use
19 this functionality, please contact the Biopython developers at
20 biopython-dev@biopython.org to avoid permanent removal of this module from
21 Biopython.
22
23
24
25
26 This module provides code to work with the prosite.doc file from
27 Prosite, available at http://www.expasy.ch/prosite/.
28
29 Tested with:
30 Release 15.0, July 1998
31 Release 16.0, July 1999
32 Release 20.22, 13 November 2007
33
34
35 Functions:
36 parse Iterates over entries in a Prodoc file.
37 index_file Index a Prodoc file for a Dictionary.
38 _extract_record Extract Prodoc data from a web page.
39
40
41 Classes:
42 Record Holds Prodoc data.
43 Reference Holds data from a Prodoc reference.
44 Dictionary Accesses a Prodoc file using a dictionary interface.
45 RecordParser Parses a Prodoc record into a Record object.
46
47 _Scanner Scans Prodoc-formatted data.
48 _RecordConsumer Consumes Prodoc data to a Record object.
49 """
50
51 import warnings
52 warnings.warn("This module is OBSOLETE. Most of the functionality in this module has moved to Bio.ExPASy.Prodoc.", PendingDeprecationWarning)
53
54 from types import *
55 import os
56 import sgmllib
57 from Bio import File
58 from Bio import Index
59 from Bio.ParserSupport import *
60
72
81
82
83
84
85
87 """Holds information from a Prodoc record.
88
89 Members:
90 accession Accession number of the record.
91 prosite_refs List of tuples (prosite accession, prosite name).
92 text Free format text.
93 references List of reference objects.
94
95 """
97 self.accession = ''
98 self.prosite_refs = []
99 self.text = ''
100 self.references = []
101
103 """Holds information from a Prodoc citation.
104
105 Members:
106 number Number of the reference. (string)
107 authors Names of the authors.
108 citation Describes the citation.
109
110 """
112 self.number = ''
113 self.authors = ''
114 self.citation = ''
115
117 """Accesses a Prodoc file using a dictionary interface.
118
119 """
120 __filename_key = '__filename'
121
122 - def __init__(self, indexname, parser=None):
123 """__init__(self, indexname, parser=None)
124
125 Open a Prodoc Dictionary. indexname is the name of the
126 index for the dictionary. The index should have been created
127 using the index_file function. parser is an optional Parser
128 object to change the results into another form. If set to None,
129 then the raw contents of the file will be returned.
130
131 """
132 self._index = Index.Index(indexname)
133 self._handle = open(self._index[Dictionary.__filename_key])
134 self._parser = parser
135
138
146
149
151 """Parses Prodoc data into a Record object.
152
153 """
157
158 - def parse(self, handle):
159 self._scanner.feed(handle, self._consumer)
160 return self._consumer.data
161
163 """Scans Prodoc-formatted data.
164
165 Tested with:
166 Release 15.0, July 1998
167
168 """
169 - def feed(self, handle, consumer):
170 """feed(self, handle, consumer)
171
172 Feed in Prodoc data for scanning. handle is a file-like
173 object that contains prosite data. consumer is a
174 Consumer object that will receive events as the report is scanned.
175
176 """
177 if isinstance(handle, File.UndoHandle):
178 uhandle = handle
179 else:
180 uhandle = File.UndoHandle(handle)
181
182 while 1:
183 line = uhandle.peekline()
184 if not line:
185 break
186 elif is_blank_line(line):
187
188 uhandle.readline()
189 continue
190 else:
191 self._scan_record(uhandle, consumer)
192
205
208
213
214 - def _scan_text(self, uhandle, consumer):
215 while 1:
216 line = safe_readline(uhandle)
217 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \
218 line[:5] == '{END}':
219 uhandle.saveline(line)
220 break
221 consumer.text(line)
222
230
232
233
234 read_and_call_while(uhandle, consumer.noevent, blank=1)
235 if attempt_read_and_call(uhandle, consumer.noevent, start='+----'):
236 read_and_call_until(uhandle, consumer.noevent, start='+----')
237 read_and_call(uhandle, consumer.noevent, start='+----')
238 read_and_call_while(uhandle, consumer.noevent, blank=1)
239
241 """Consumer that converts a Prodoc record to a Record object.
242
243 Members:
244 data Record with Prodoc data.
245
246 """
249
252
255
257 line = line.rstrip()
258 if line[0] != '{' or line[-1] != '}':
259 raise ValueError("I don't understand accession line\n%s" % line)
260 acc = line[1:-1]
261 if acc[:4] != 'PDOC':
262 raise ValueError("Invalid accession in line\n%s" % line)
263 self.data.accession = acc
264
266 line = line.rstrip()
267 if line[0] != '{' or line[-1] != '}':
268 raise ValueError("I don't understand accession line\n%s" % line)
269 acc, name = line[1:-1].split('; ')
270 self.data.prosite_refs.append((acc, name))
271
272 - def text(self, line):
273 self.data.text = self.data.text + line
274
276 if line[0] == '[' and line[3] == ']':
277 self._ref = Reference()
278 self._ref.number = line[1:3].strip()
279 if line[1] == 'E':
280
281
282 self._ref.citation = line[4:].strip()
283 else:
284 self._ref.authors = line[4:].strip()
285 self.data.references.append(self._ref)
286 elif line[:4] == ' ':
287 if not self._ref:
288 raise ValueError("Unnumbered reference lines\n%s" % line)
289 self._ref.citation = self._ref.citation + line[5:]
290 else:
291 raise Exception("I don't understand the reference line\n%s" % line)
292
298
299 -def index_file(filename, indexname, rec2key=None):
300 """index_file(filename, indexname, rec2key=None)
301
302 Index a Prodoc file. filename is the name of the file.
303 indexname is the name of the dictionary. rec2key is an
304 optional callback that takes a Record and generates a unique key
305 (e.g. the accession number) for the record. If not specified,
306 the id name will be used.
307
308 """
309 import os
310 if not os.path.exists(filename):
311 raise ValueError("%s does not exist" % filename)
312
313 index = Index.Index(indexname, truncate=1)
314 index[Dictionary._Dictionary__filename_key] = filename
315
316 handle = open(filename)
317 records = parse(handle)
318 end = 0L
319 for record in records:
320 start = end
321 end = handle.tell()
322 length = end - start
323
324 if rec2key is not None:
325 key = rec2key(record)
326 else:
327 key = record.accession
328
329 if not key:
330 raise KeyError("empty key was produced")
331 elif key in index:
332 raise KeyError("duplicate key %s found" % key)
333
334 index[key] = start, length
335