Trees | Indices | Help |
---|
|
1 # Copyright 2004 by James Casbon. All rights reserved. 2 # This code is part of the Biopython distribution and governed by its 3 # license. Please see the LICENSE file that should have been included 4 # as part of this package. 5 6 """ 7 Code to deal with COMPASS output, a program for profile/profile comparison. 8 9 Compass is described in: 10 11 Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 12 alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 13 7;326(1):317-36. 14 15 Tested with COMPASS 1.24. 16 17 Functions: 18 read Reads a COMPASS file containing one COMPASS record 19 parse Iterates over records in a COMPASS file. 20 21 Classes: 22 Record One result of a COMPASS file 23 24 DEPRECATED CLASSES: 25 26 _Scanner Scan compass results 27 _Consumer Consume scanner events 28 RecordParser Parse one compass record 29 Iterator Iterate through a number of compass records 30 """ 31 import re 32 33 3436 record = None 37 try: 38 line = handle.next() 39 record = Record() 40 __read_names(record, line) 41 line = handle.next() 42 __read_threshold(record, line) 43 line = handle.next() 44 __read_lengths(record, line) 45 line = handle.next() 46 __read_profilewidth(record, line) 47 line = handle.next() 48 __read_scores(record, line) 49 except StopIteration: 50 if not record: 51 raise ValueError("No record found in handle") 52 else: 53 raise ValueError("Unexpected end of stream.") 54 for line in handle: 55 if is_blank_line(line): 56 continue 57 __read_query_alignment(record, line) 58 try: 59 line = handle.next() 60 __read_positive_alignment(record, line) 61 line = handle.next() 62 __read_hit_alignment(record, line) 63 except StopIteration: 64 raise ValueError("Unexpected end of stream.") 65 return record6668 record = None 69 try: 70 line = handle.next() 71 except StopIteration: 72 return 73 while True: 74 try: 75 record = Record() 76 __read_names(record, line) 77 line = handle.next() 78 __read_threshold(record, line) 79 line = handle.next() 80 __read_lengths(record, line) 81 line = handle.next() 82 __read_profilewidth(record, line) 83 line = handle.next() 84 __read_scores(record, line) 85 except StopIteration: 86 raise ValueError("Unexpected end of stream.") 87 for line in handle: 88 if not line.strip(): 89 continue 90 if "Ali1:" in line: 91 yield record 92 break 93 __read_query_alignment(record, line) 94 try: 95 line = handle.next() 96 __read_positive_alignment(record, line) 97 line = handle.next() 98 __read_hit_alignment(record, line) 99 except StopIteration: 100 raise ValueError("Unexpected end of stream.") 101 else: 102 yield record 103 break104106 """ 107 Hold information from one compass hit. 108 Ali1 one is the query, Ali2 the hit. 109 """ 110141 142 # Everything below is private 143 144 __regex = {"names": re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+"), 145 "threshold": re.compile("Threshold of effective gap content in columns: (\S+)"), 146 "lengths": re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)\s+filtered_length2=(\S+)"), 147 "profilewidth": re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)"), 148 "scores": re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)"), 149 "start": re.compile("(\d+)"), 150 "align": re.compile("^.{15}(\S+)"), 151 "positive_alignment": re.compile("^.{15}(.+)"), 152 } 153112 self.query='' 113 self.hit='' 114 self.gap_threshold=0 115 self.query_length=0 116 self.query_filtered_length=0 117 self.query_nseqs=0 118 self.query_neffseqs=0 119 self.hit_length=0 120 self.hit_filtered_length=0 121 self.hit_nseqs=0 122 self.hit_neffseqs=0 123 self.sw_score=0 124 self.evalue=-1 125 self.query_start=-1 126 self.hit_start=-1 127 self.query_aln='' 128 self.hit_aln='' 129 self.positives=''130 131133 """Return the length of the query covered in alignment""" 134 s = self.query_aln.replace("=", "") 135 return len(s)136155 """ 156 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 157 ------query----- -------hit------------- 158 """ 159 if not "Ali1:" in line: 160 raise ValueError("Line does not contain 'Ali1:':\n%s" % line) 161 m = __regex["names"].search(line) 162 record.query = m.group(1) 163 record.hit = m.group(2)164166 if not line.startswith("Threshold"): 167 raise ValueError("Line does not start with 'Threshold':\n%s" % line) 168 m = __regex["threshold"].search(line) 169 record.gap_threshold = float(m.group(1))170172 if not line.startswith("length1="): 173 raise ValueError("Line does not start with 'length1=':\n%s" % line) 174 m = __regex["lengths"].search(line) 175 record.query_length = int(m.group(1)) 176 record.query_filtered_length = float(m.group(2)) 177 record.hit_length = int(m.group(3)) 178 record.hit_filtered_length = float(m.group(4))179181 if not "Nseqs1" in line: 182 raise ValueError("Line does not contain 'Nseqs1':\n%s" % line) 183 m = __regex["profilewidth"].search(line) 184 record.query_nseqs = int(m.group(1)) 185 record.query_neffseqs = float(m.group(2)) 186 record.hit_nseqs = int(m.group(3)) 187 record.hit_neffseqs = float(m.group(4))188190 if not line.startswith("Smith-Waterman"): 191 raise ValueError("Line does not start with 'Smith-Waterman':\n%s" % line) 192 m = __regex["scores"].search(line) 193 if m: 194 record.sw_score = int(m.group(1)) 195 record.evalue = float(m.group(2)) 196 else: 197 record.sw_score = 0 198 record.evalue = -1.0199201 m = __regex["start"].search(line) 202 if m: 203 record.query_start = int(m.group(1)) 204 m = __regex["align"].match(line) 205 assert m!=None, "invalid match" 206 record.query_aln += m.group(1)207209 m = __regex["positive_alignment"].match(line) 210 assert m!=None, "invalid match" 211 record.positives += m.group(1)212214 m = __regex["start"].search(line) 215 if m: 216 record.hit_start = int(m.group(1)) 217 m = __regex["align"].match(line) 218 assert m!=None, "invalid match" 219 record.hit_aln += m.group(1)220 221 # Everything below is deprecated 222 223 from Bio import File 224 from Bio.ParserSupport import * 225 import Bio 226 227229 """Reads compass output and generate events (DEPRECATED)"""309231 import warnings 232 warnings.warn("Bio.Compass._Scanner is deprecated; please use the read() and parse() functions in this module instead", Bio.BiopythonDeprecationWarning)233235 """Feed in COMPASS ouput""" 236 237 if isinstance(handle, File.UndoHandle): 238 pass 239 else: 240 handle = File.UndoHandle(handle) 241 242 243 assert isinstance(handle, File.UndoHandle), \ 244 "handle must be an UndoHandle" 245 if handle.peekline(): 246 self._scan_record(handle, consumer)247 248250 self._scan_names(handle, consumer) 251 self._scan_threshold(handle, consumer) 252 self._scan_lengths(handle,consumer) 253 self._scan_profilewidth(handle, consumer) 254 self._scan_scores(handle,consumer) 255 self._scan_alignment(handle,consumer)256258 """ 259 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 260 """ 261 read_and_call(handle, consumer.names, contains="Ali1:")262264 """ 265 Threshold of effective gap content in columns: 0.5 266 """ 267 read_and_call(handle, consumer.threshold, start="Threshold")268270 """ 271 length1=388 filtered_length1=386 length2=145 filtered_length2=137 272 """ 273 read_and_call(handle, consumer.lengths, start="length1=")274276 """ 277 Nseqs1=399 Neff1=12.972 Nseqs2=1 Neff2=6.099 278 """ 279 read_and_call(handle, consumer.profilewidth, contains="Nseqs1")280282 """ 283 Smith-Waterman score = 37 Evalue = 5.75e+02 284 """ 285 read_and_call(handle, consumer.scores, start="Smith-Waterman")286288 """ 289 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 290 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 291 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 292 293 294 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 295 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 296 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 297 298 """ 299 while 1: 300 line = handle.readline() 301 if not line: 302 break 303 if is_blank_line(line): 304 continue 305 else: 306 consumer.query_alignment(line) 307 read_and_call(handle, consumer.positive_alignment) 308 read_and_call(handle, consumer.hit_alignment)311 # all regular expressions used -- compile only once 312 _re_names = re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+") 313 _re_threshold = \ 314 re.compile("Threshold of effective gap content in columns: (\S+)") 315 _re_lengths = \ 316 re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)" 317 + "\s+filtered_length2=(\S+)") 318 _re_profilewidth = \ 319 re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)") 320 _re_scores = re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)") 321 _re_start = re.compile("(\d+)") 322 _re_align = re.compile("^.{15}(\S+)") 323 _re_positive_alignment = re.compile("^.{15}(.+)") 324387326 import warnings 327 warnings.warn("Bio.Compass._Consumer is deprecated; please use the read() and parse() functions in this module instead", Bio.BiopythonDeprecationWarning) 328 self.data = None329331 """ 332 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 333 ------query----- -------hit------------- 334 """ 335 self.data = Record() 336 m = self.__class__._re_names.search(line) 337 self.data.query = m.group(1) 338 self.data.hit = m.group(2)339 343345 m = self.__class__._re_lengths.search(line) 346 self.data.query_length = int(m.group(1)) 347 self.data.query_filtered_length = float(m.group(2)) 348 self.data.hit_length = int(m.group(3)) 349 self.data.hit_filtered_length = float(m.group(4))350352 m = self.__class__._re_profilewidth.search(line) 353 self.data.query_nseqs = int(m.group(1)) 354 self.data.query_neffseqs = float(m.group(2)) 355 self.data.hit_nseqs = int(m.group(3)) 356 self.data.hit_neffseqs = float(m.group(4))357359 m = self.__class__._re_scores.search(line) 360 if m: 361 self.data.sw_score = int(m.group(1)) 362 self.data.evalue = float(m.group(2)) 363 else: 364 self.data.sw_score = 0 365 self.data.evalue = -1.0366368 m = self.__class__._re_start.search(line) 369 if m: 370 self.data.query_start = int(m.group(1)) 371 m = self.__class__._re_align.match(line) 372 assert m!=None, "invalid match" 373 self.data.query_aln = self.data.query_aln + m.group(1)374376 m = self.__class__._re_positive_alignment.match(line) 377 assert m!=None, "invalid match" 378 self.data.positives = self.data.positives + m.group(1)379389 """Parses compass results into a Record object (DEPRECATED). 390 """405392 import warnings 393 warnings.warn("Bio.Compass._RecordParser is deprecated; please use the read() and parse() functions in this module instead", Bio.BiopythonDeprecationWarning) 394 self._scanner = _Scanner() 395 self._consumer = _Consumer()396 397399 if isinstance(handle, File.UndoHandle): 400 uhandle = handle 401 else: 402 uhandle = File.UndoHandle(handle) 403 self._scanner.feed(uhandle, self._consumer) 404 return self._consumer.data407 """Iterate through a file of compass results (DEPRECATED)."""433409 import warnings 410 warnings.warn("Bio.Compass.Iterator is deprecated; please use the parse() function in this module instead", Bio.BiopythonDeprecationWarning) 411 412 self._uhandle = File.UndoHandle(handle) 413 self._parser = RecordParser()414416 lines = [] 417 while 1: 418 line = self._uhandle.readline() 419 if not line: 420 break 421 if line[0:4] == "Ali1" and lines: 422 self._uhandle.saveline(line) 423 break 424 425 lines.append(line) 426 427 428 if not lines: 429 return None 430 431 data = ''.join(lines) 432 return self._parser.parse(File.StringHandle(data))
Trees | Indices | Help |
---|
Generated by Epydoc 3.0.1 on Fri Nov 26 16:20:18 2010 | http://epydoc.sourceforge.net |