Package Bio :: Package Prosite :: Module Prodoc
[hide private]
[frames] | no frames]

Source Code for Module Bio.Prosite.Prodoc

  1  # Copyright 2000 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module is OBSOLETE. 
  8  Most of the functionality in this module has moved to Bio.ExPASy.Prodoc; 
  9  please see 
 10   
 11  Bio.ExPASy.Prodoc.read          To read a Prodoc file containing one entry. 
 12  Bio.ExPASy.Prodoc.parse         Iterates over entries in a Prodoc file. 
 13  Bio.ExPASy.Prodoc.Record        Holds Prodoc data. 
 14  Bio.ExPASy.Prodoc.Reference     Holds data from a Prodoc reference. 
 15   
 16  The other functions and classes in Bio.Prosite.Prodoc (including 
 17  Bio.Prosite.Prodoc.index_file and Bio.Prosite.Prodoc.Dictionary) are 
 18  considered deprecated, and were not moved to Bio.ExPASy.Prodoc. If you use 
 19  this functionality, please contact the Biopython developers at 
 20  biopython-dev@biopython.org to avoid permanent removal of this module from 
 21  Biopython. 
 22   
 23   
 24   
 25   
 26  This module provides code to work with the prosite.doc file from 
 27  Prosite, available at http://www.expasy.ch/prosite/. 
 28   
 29  Tested with: 
 30  Release 15.0, July 1998 
 31  Release 16.0, July 1999 
 32  Release 20.22, 13 November 2007 
 33   
 34   
 35  Functions: 
 36  parse              Iterates over entries in a Prodoc file. 
 37  index_file         Index a Prodoc file for a Dictionary. 
 38  _extract_record    Extract Prodoc data from a web page. 
 39   
 40   
 41  Classes: 
 42  Record             Holds Prodoc data. 
 43  Reference          Holds data from a Prodoc reference. 
 44  Dictionary         Accesses a Prodoc file using a dictionary interface. 
 45  RecordParser       Parses a Prodoc record into a Record object. 
 46   
 47  _Scanner           Scans Prodoc-formatted data. 
 48  _RecordConsumer    Consumes Prodoc data to a Record object. 
 49  """ 
 50   
 51  import warnings 
 52  warnings.warn("This module is OBSOLETE. Most of the functionality in this module has moved to Bio.ExPASy.Prodoc.", PendingDeprecationWarning) 
 53   
 54  from types import * 
 55  import os 
 56  import sgmllib 
 57  from Bio import File 
 58  from Bio import Index 
 59  from Bio.ParserSupport import * 
 60   
61 -def parse(handle):
62 import cStringIO 63 parser = RecordParser() 64 text = "" 65 for line in handle: 66 text += line 67 if line[:5] == '{END}': 68 handle = cStringIO.StringIO(text) 69 record = parser.parse(handle) 70 text = "" 71 yield record
72
73 -def read(handle):
74 parser = RecordParser() 75 record = parser.parse(handle) 76 # We should have reached the end of the record by now 77 remainder = handle.read() 78 if remainder: 79 raise ValueError("More than one Prodoc record found") 80 return record
81 82 83 # It may be a good idea to rewrite read(), parse() at some point to avoid 84 # using the old-style "parser = RecordParser(); parser.parse(handle)" approach. 85
86 -class Record:
87 """Holds information from a Prodoc record. 88 89 Members: 90 accession Accession number of the record. 91 prosite_refs List of tuples (prosite accession, prosite name). 92 text Free format text. 93 references List of reference objects. 94 95 """
96 - def __init__(self):
97 self.accession = '' 98 self.prosite_refs = [] 99 self.text = '' 100 self.references = []
101
102 -class Reference:
103 """Holds information from a Prodoc citation. 104 105 Members: 106 number Number of the reference. (string) 107 authors Names of the authors. 108 citation Describes the citation. 109 110 """
111 - def __init__(self):
112 self.number = '' 113 self.authors = '' 114 self.citation = ''
115
116 -class Dictionary:
117 """Accesses a Prodoc file using a dictionary interface. 118 119 """ 120 __filename_key = '__filename' 121
122 - def __init__(self, indexname, parser=None):
123 """__init__(self, indexname, parser=None) 124 125 Open a Prodoc Dictionary. indexname is the name of the 126 index for the dictionary. The index should have been created 127 using the index_file function. parser is an optional Parser 128 object to change the results into another form. If set to None, 129 then the raw contents of the file will be returned. 130 131 """ 132 self._index = Index.Index(indexname) 133 self._handle = open(self._index[Dictionary.__filename_key]) 134 self._parser = parser
135
136 - def __len__(self):
137 return len(self._index)
138
139 - def __getitem__(self, key):
140 start, len = self._index[key] 141 self._handle.seek(start) 142 data = self._handle.read(len) 143 if self._parser is not None: 144 return self._parser.parse(File.StringHandle(data)) 145 return data
146
147 - def __getattr__(self, name):
148 return getattr(self._index, name)
149
150 -class RecordParser(AbstractParser):
151 """Parses Prodoc data into a Record object. 152 153 """
154 - def __init__(self):
155 self._scanner = _Scanner() 156 self._consumer = _RecordConsumer()
157
158 - def parse(self, handle):
159 self._scanner.feed(handle, self._consumer) 160 return self._consumer.data
161
162 -class _Scanner:
163 """Scans Prodoc-formatted data. 164 165 Tested with: 166 Release 15.0, July 1998 167 168 """
169 - def feed(self, handle, consumer):
170 """feed(self, handle, consumer) 171 172 Feed in Prodoc data for scanning. handle is a file-like 173 object that contains prosite data. consumer is a 174 Consumer object that will receive events as the report is scanned. 175 176 """ 177 if isinstance(handle, File.UndoHandle): 178 uhandle = handle 179 else: 180 uhandle = File.UndoHandle(handle) 181 182 while 1: 183 line = uhandle.peekline() 184 if not line: 185 break 186 elif is_blank_line(line): 187 # Skip blank lines between records 188 uhandle.readline() 189 continue 190 else: 191 self._scan_record(uhandle, consumer)
192
193 - def _scan_record(self, uhandle, consumer):
194 consumer.start_record() 195 196 self._scan_accession(uhandle, consumer) 197 self._scan_prosite_refs(uhandle, consumer) 198 read_and_call(uhandle, consumer.noevent, start='{BEGIN}') 199 self._scan_text(uhandle, consumer) 200 self._scan_refs(uhandle, consumer) 201 self._scan_copyright(uhandle, consumer) 202 read_and_call(uhandle, consumer.noevent, start='{END}') 203 204 consumer.end_record()
205
206 - def _scan_accession(self, uhandle, consumer):
207 read_and_call(uhandle, consumer.accession, start='{PDOC')
208
209 - def _scan_prosite_refs(self, uhandle, consumer):
210 while attempt_read_and_call(uhandle, consumer.prosite_reference, 211 start='{PS'): 212 pass
213
214 - def _scan_text(self, uhandle, consumer):
215 while 1: 216 line = safe_readline(uhandle) 217 if (line[0] == '[' and line[3] == ']' and line[4] == ' ') or \ 218 line[:5] == '{END}': 219 uhandle.saveline(line) 220 break 221 consumer.text(line)
222
223 - def _scan_refs(self, uhandle, consumer):
224 while 1: 225 line = safe_readline(uhandle) 226 if line[:5] == '{END}' or is_blank_line(line): 227 uhandle.saveline(line) 228 break 229 consumer.reference(line)
230
239
240 -class _RecordConsumer(AbstractConsumer):
241 """Consumer that converts a Prodoc record to a Record object. 242 243 Members: 244 data Record with Prodoc data. 245 246 """
247 - def __init__(self):
248 self.data = None
249
250 - def start_record(self):
251 self.data = Record()
252
253 - def end_record(self):
254 self._clean_data()
255
256 - def accession(self, line):
257 line = line.rstrip() 258 if line[0] != '{' or line[-1] != '}': 259 raise ValueError("I don't understand accession line\n%s" % line) 260 acc = line[1:-1] 261 if acc[:4] != 'PDOC': 262 raise ValueError("Invalid accession in line\n%s" % line) 263 self.data.accession = acc
264
265 - def prosite_reference(self, line):
266 line = line.rstrip() 267 if line[0] != '{' or line[-1] != '}': 268 raise ValueError("I don't understand accession line\n%s" % line) 269 acc, name = line[1:-1].split('; ') 270 self.data.prosite_refs.append((acc, name))
271
272 - def text(self, line):
273 self.data.text = self.data.text + line
274
275 - def reference(self, line):
276 if line[0] == '[' and line[3] == ']': # new reference 277 self._ref = Reference() 278 self._ref.number = line[1:3].strip() 279 if line[1] == 'E': 280 # If it's an electronic reference, then the URL is on the 281 # line, instead of the author. 282 self._ref.citation = line[4:].strip() 283 else: 284 self._ref.authors = line[4:].strip() 285 self.data.references.append(self._ref) 286 elif line[:4] == ' ': 287 if not self._ref: 288 raise ValueError("Unnumbered reference lines\n%s" % line) 289 self._ref.citation = self._ref.citation + line[5:] 290 else: 291 raise Exception("I don't understand the reference line\n%s" % line)
292
293 - def _clean_data(self):
294 # get rid of trailing newlines 295 for ref in self.data.references: 296 ref.citation = ref.citation.rstrip() 297 ref.authors = ref.authors.rstrip()
298
299 -def index_file(filename, indexname, rec2key=None):
300 """index_file(filename, indexname, rec2key=None) 301 302 Index a Prodoc file. filename is the name of the file. 303 indexname is the name of the dictionary. rec2key is an 304 optional callback that takes a Record and generates a unique key 305 (e.g. the accession number) for the record. If not specified, 306 the id name will be used. 307 308 """ 309 import os 310 if not os.path.exists(filename): 311 raise ValueError("%s does not exist" % filename) 312 313 index = Index.Index(indexname, truncate=1) 314 index[Dictionary._Dictionary__filename_key] = filename 315 316 handle = open(filename) 317 records = parse(handle) 318 end = 0L 319 for record in records: 320 start = end 321 end = handle.tell() 322 length = end - start 323 324 if rec2key is not None: 325 key = rec2key(record) 326 else: 327 key = record.accession 328 329 if not key: 330 raise KeyError("empty key was produced") 331 elif key in index: 332 raise KeyError("duplicate key %s found" % key) 333 334 index[key] = start, length
335