Package Bio :: Package Sequencing :: Module Ace
[hide private]
[frames] | no frames]

Source Code for Module Bio.Sequencing.Ace

  1  # Copyright 2004 by Frank Kauff and Cymon J. Cox.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  """ 
  6  Parser for ACE files output by PHRAP. 
  7   
  8  Written by Frank Kauff (fkauff@duke.edu) and 
  9  Cymon J. Cox (cymon@duke.edu) 
 10   
 11  Uses the Biopython Parser interface: ParserSupport.py 
 12   
 13  Usage: 
 14   
 15  There are two ways of reading an ace file: 
 16  1) The function 'read' reads the whole file at once; 
 17  2) The function 'parse' reads the file contig after contig. 
 18       
 19  1) Parse whole ace file at once: 
 20   
 21          from Bio.Sequencing import Ace 
 22          acefilerecord=Ace.read(open('my_ace_file.ace')) 
 23   
 24  This gives you: 
 25          acefilerecord.ncontigs (the number of contigs in the ace file) 
 26          acefilerecord.nreads (the number of reads in the ace file) 
 27          acefilerecord.contigs[] (one instance of the Contig class for each contig) 
 28   
 29  The Contig class holds the info of the CO tag, CT and WA tags, and all the reads used 
 30  for this contig in a list of instances of the Read class, e.g.: 
 31   
 32          contig3=acefilerecord.contigs[2] 
 33          read4=contig3.reads[3] 
 34          RD_of_read4=read4.rd 
 35          DS_of_read4=read4.ds 
 36   
 37  CT, WA, RT tags from the end of the file can appear anywhere are automatically 
 38  sorted into the right place. 
 39   
 40  see _RecordConsumer for details. 
 41   
 42  2) Or you can iterate over the contigs of an ace file one by one in the ususal way: 
 43   
 44          from Bio.Sequencing import Ace 
 45          contigs=Ace.parse(open('my_ace_file.ace')) 
 46          for contig in contigs: 
 47              print contig.name 
 48              ... 
 49   
 50  Please note that for memory efficiency, when using the iterator approach, only one 
 51  contig is kept in memory at once.  However, there can be a footer to the ACE file 
 52  containing WA, CT, RT or WR tags which contain additional meta-data on the contigs. 
 53  Because the parser doesn't see this data until the final record, it cannot be added to 
 54  the appropriate records.  Instead these tags will be returned with the last contig record. 
 55  Thus an ace file does not entirerly suit the concept of iterating. If WA, CT, RT, WR tags 
 56  are needed, the 'read' function rather than the 'parse' function might be more appropriate. 
 57  """ 
 58   
 59   
60 -class rd:
61 """RD (reads), store a read with its name, sequence etc."""
62 - def __init__(self):
63 self.name='' 64 self.padded_bases=None 65 self.info_items=None 66 self.read_tags=None 67 self.sequence=''
68
69 -class qa:
70 """QA (read quality), including which part if any was used as the consensus."""
71 - def __init__(self, line=None):
72 self.qual_clipping_start=None 73 self.qual_clipping_end=None 74 self.align_clipping_start=None 75 self.align_clipping_end=None 76 if line: 77 header=map(eval,line.split()[1:]) 78 self.qual_clipping_start=header[0] 79 self.qual_clipping_end=header[1] 80 self.align_clipping_start=header[2] 81 self.align_clipping_end=header[3]
82
83 -class ds:
84 """DS lines, include file name of a read's chromatogram file."""
85 - def __init__(self, line=None):
86 self.chromat_file='' 87 self.phd_file='' 88 self.time='' 89 self.chem='' 90 self.dye='' 91 self.template='' 92 self.direction='' 93 if line: 94 tags=['CHROMAT_FILE','PHD_FILE','TIME','CHEM','DYE','TEMPLATE','DIRECTION'] 95 poss=map(line.find,tags) 96 tagpos=dict(zip(poss,tags)) 97 if -1 in tagpos: 98 del tagpos[-1] 99 ps=tagpos.keys() 100 ps.sort() 101 for (p1,p2) in zip(ps,ps[1:]+[len(line)+1]): 102 setattr(self,tagpos[p1].lower(),line[p1+len(tagpos[p1])+1:p2].strip())
103 104
105 -class af:
106 """AF lines, define the location of the read within the contig."""
107 - def __init__(self, line=None):
108 self.name='' 109 self.coru=None 110 self.padded_start=None 111 if line: 112 header = line.split() 113 self.name = header[1] 114 self.coru = header[2] 115 self.padded_start = int(header[3])
116
117 -class bs:
118 """"BS (base segment), which read was chosen as the consensus at each position."""
119 - def __init__(self, line=None):
120 self.name='' 121 self.padded_start=None 122 self.padded_end=None 123 if line: 124 header = line.split() 125 self.padded_start = int(header[1]) 126 self.padded_end = int(header[2]) 127 self.name = header[3]
128
129 -class rt:
130 """RT (transient read tags), generated by crossmatch and phrap."""
131 - def __init__(self, line=None):
132 self.name='' 133 self.tag_type='' 134 self.program='' 135 self.padded_start=None 136 self.padded_end=None 137 self.date='' 138 if line: 139 header=line.split() 140 self.name=header[0] 141 self.tag_type=header[1] 142 self.program=header[2] 143 self.padded_start=int(header[3]) 144 self.padded_end=int(header[4]) 145 self.date=header[5]
146
147 -class ct:
148 """CT (consensus tags)."""
149 - def __init__(self, line=None):
150 self.name='' 151 self.tag_type='' 152 self.program='' 153 self.padded_start=None 154 self.padded_end=None 155 self.date='' 156 self.notrans='' 157 self.info=[] 158 self.comment=[] 159 if line: 160 header=line.split() 161 self.name = header[0] 162 self.tag_type = header[1] 163 self.program = header[2] 164 self.padded_start = int(header[3]) 165 self.padded_end = int(header[4]) 166 self.date = header[5] 167 if len(header)==7: 168 self.notrans = header[6]
169
170 -class wa:
171 """WA (whole assembly tag), holds the assembly program name, version, etc."""
172 - def __init__(self, line=None):
173 self.tag_type='' 174 self.program='' 175 self.date='' 176 self.info=[] 177 if line: 178 header = line.split() 179 self.tag_type = header[0] 180 self.program = header[1] 181 self.date = header[2]
182
183 -class wr:
184 """WR lines."""
185 - def __init__(self, line=None):
186 self.name='' 187 self.aligned='' 188 self.program='' 189 self.date=[] 190 if line: 191 header = line.split() 192 self.name = header[0] 193 self.aligned = header[1] 194 self.program = header[2] 195 self.date = header[3]
196
197 -class Reads:
198 """Holds information about a read supporting an ACE contig."""
199 - def __init__(self, line=None):
200 self.rd=None # one per read 201 self.qa=None # one per read 202 self.ds=None # none or one per read 203 self.rt=None # none or many per read 204 self.wr=None # none or many per read 205 if line: 206 self.rd = rd() 207 header = line.split() 208 self.rd.name = header[1] 209 self.rd.padded_bases = int(header[2]) 210 self.rd.info_items = int(header[3]) 211 self.rd.read_tags = int(header[4])
212
213 -class Contig:
214 """Holds information about a contig from an ACE record."""
215 - def __init__(self, line=None):
216 self.name = '' 217 self.nbases=None 218 self.nreads=None 219 self.nsegments=None 220 self.uorc=None 221 self.sequence="" 222 self.quality=[] 223 self.af=[] 224 self.bs=[] 225 self.reads=[] 226 self.ct=None # none or many 227 self.wa=None # none or many 228 if line: 229 header = line.split() 230 self.name = header[1] 231 self.nbases = int(header[2]) 232 self.nreads = int(header[3]) 233 self.nsegments = int(header[4]) 234 self.uorc = header[5]
235
236 -def parse(handle):
237 """parse(handle) 238 239 where handle is a file-like object. 240 241 This function returns an iterator that allows you to iterate 242 over the ACE file record by record: 243 244 records = parse(handle) 245 for record in records: 246 # do something with the record 247 248 where each record is a Contig object. 249 """ 250 251 handle = iter(handle) 252 253 line = "" 254 while True: 255 # at beginning, skip the AS and look for first CO command 256 try: 257 while True: 258 if line.startswith('CO'): 259 break 260 line = handle.next() 261 except StopIteration: 262 return 263 264 record = Contig(line) 265 266 for line in handle: 267 line = line.strip() 268 if not line: 269 break 270 record.sequence+=line 271 272 for line in handle: 273 if line.strip(): 274 break 275 if not line.startswith("BQ"): 276 raise ValueError("Failed to find BQ line") 277 278 for line in handle: 279 if not line.strip(): 280 break 281 record.quality.extend(map(int,line.split())) 282 283 for line in handle: 284 if line.strip(): 285 break 286 287 while True: 288 if not line.startswith("AF "): 289 break 290 record.af.append(af(line)) 291 try: 292 line = handle.next() 293 except StopIteration: 294 raise ValueError("Unexpected end of AF block") 295 296 while True: 297 if line.strip(): 298 break 299 try: 300 line = handle.next() 301 except StopIteration: 302 raise ValueError("Unexpected end of file") 303 304 while True: 305 if not line.startswith("BS "): 306 break 307 record.bs.append(bs(line)) 308 try: 309 line = handle.next() 310 except StopIteration: 311 raise ValueError("Failed to find end of BS block") 312 313 # now read all the read data 314 # it starts with a 'RD', and then a mandatory QA 315 # then follows an optional DS 316 # CT,RT,WA,WR may or may not be there in unlimited quantity. They might refer to the actual read or contig, 317 # or, if encountered at the end of file, to any previous read or contig. the sort() method deals 318 # with that later. 319 while True: 320 321 # each read must have a rd and qa 322 try: 323 while True: 324 # If I've met the condition, then stop reading the line. 325 if line.startswith("RD "): 326 break 327 line = handle.next() 328 except StopIteration: 329 raise ValueError("Failed to find RD line") 330 331 record.reads.append(Reads(line)) 332 333 for line in handle: 334 line = line.strip() 335 if not line: 336 break 337 record.reads[-1].rd.sequence+=line 338 339 for line in handle: 340 if line.strip(): 341 break 342 if not line.startswith("QA "): 343 raise ValueError("Failed to find QA line") 344 record.reads[-1].qa = qa(line) 345 346 # now one ds can follow 347 for line in handle: 348 if line.strip(): 349 break 350 else: 351 break 352 353 if line.startswith("DS "): 354 record.reads[-1].ds = ds(line) 355 line = "" 356 # the file could just end, or there's some more stuff. In ace files, anything can happen. 357 # the following tags are interspersed between reads and can appear multiple times. 358 while True: 359 # something left 360 try: 361 while True: 362 if line.strip(): 363 break 364 line = handle.next() 365 except StopIteration: 366 # file ends here 367 break 368 if line.startswith("RT{"): 369 # now if we're at the end of the file, this rt could 370 # belong to a previous read, not the actual one. 371 # we store it here were it appears, the user can sort later. 372 if record.reads[-1].rt is None: 373 record.reads[-1].rt=[] 374 for line in handle: 375 line=line.strip() 376 if line=='}': break 377 record.reads[-1].rt.append(rt(line)) 378 line = "" 379 elif line.startswith("WR{"): 380 if record.reads[-1].wr is None: 381 record.reads[-1].wr=[] 382 for line in handle: 383 line=line.strip() 384 if line=='}': break 385 record.reads[-1].wr.append(wr(line)) 386 line = "" 387 elif line.startswith("WA{"): 388 if record.wa is None: 389 record.wa=[] 390 try: 391 line = handle.next() 392 except StopIteration: 393 raise ValueError("Failed to read WA block") 394 record.wa.append(wa(line)) 395 for line in handle: 396 line=line.strip() 397 if line=='}': break 398 record.wa[-1].info.append(line) 399 line = "" 400 elif line.startswith("CT{"): 401 if record.ct is None: 402 record.ct=[] 403 try: 404 line = handle.next() 405 except StopIteration: 406 raise ValueError("Failed to read CT block") 407 record.ct.append(ct(line)) 408 for line in handle: 409 line=line.strip() 410 if line=="COMMENT{": 411 for line in handle: 412 line = line.strip() 413 if line.endswith("C}"): 414 break 415 record.ct[-1].comment.append(line) 416 elif line=='}': 417 break 418 else: 419 record.ct[-1].info.append(line) 420 line = "" 421 else: 422 break 423 424 if not line.startswith('RD'): # another read? 425 break 426 427 yield record
428
429 -class ACEFileRecord:
430 """Holds data of an ACE file. 431 """
432 - def __init__(self):
433 self.ncontigs=None 434 self.nreads=None 435 self.contigs=[] 436 self.wa=None # none or many
437
438 - def sort(self):
439 """Sorts wr, rt and ct tags into the appropriate contig / read instance, if possible. """ 440 441 ct=[] 442 rt=[] 443 wr=[] 444 # search for tags that aren't in the right position 445 for i in range(len(self.contigs)): 446 c = self.contigs[i] 447 if c.wa: 448 if not self.wa: 449 self.wa=[] 450 self.wa.extend(c.wa) 451 if c.ct: 452 newcts=[ct_tag for ct_tag in c.ct if ct_tag.name!=c.name] 453 map(self.contigs[i].ct.remove,newcts) 454 ct.extend(newcts) 455 for j in range(len(c.reads)): 456 r = c.reads[j] 457 if r.rt: 458 newrts=[rt_tag for rt_tag in r.rt if rt_tag.name!=r.rd.name] 459 map(self.contigs[i].reads[j].rt.remove,newrts) 460 rt.extend(newrts) 461 if r.wr: 462 newwrs=[wr_tag for wr_tag in r.wr if wr_tag.name!=r.rd.name] 463 map(self.contigs[i].reads[j].wr.remove,newwrs) 464 wr.extend(newwrs) 465 # now sort them into their proper place 466 for i in range(len(self.contigs)): 467 c = self.contigs[i] 468 for ct_tag in ct: 469 if ct_tag.name==c.name: 470 if self.contigs[i].ct is None: 471 self.contigs[i].ct=[] 472 self.contigs[i].ct.append(ct_tag) 473 if rt or wr: 474 for j in range(len(c.reads)): 475 r = c.reads[j] 476 for rt_tag in rt: 477 if rt_tag.name==r.rd.name: 478 if self.contigs[i].reads[j].rt is None: 479 self.contigs[i].reads[j].rt=[] 480 self.contigs[i].reads[j].rt.append(rt_tag) 481 for wr_tag in wr: 482 if wr_tag.name==r.rd.name: 483 if self.contigs[i].reads[j].wr is None: 484 self.contigs[i].reads[j].wr=[] 485 self.contigs[i].reads[j].wr.append(wr_tag)
486
487 -def read(handle):
488 """Parses the full ACE file in list of contigs. 489 490 """ 491 492 handle = iter(handle) 493 494 record=ACEFileRecord() 495 496 try: 497 line = handle.next() 498 except StopIteration: 499 raise ValueError("Premature end of file") 500 501 # check if the file starts correctly 502 if not line.startswith('AS'): 503 raise ValueError("File does not start with 'AS'.") 504 505 words = line.split() 506 record.ncontigs, record.nreads = map(int, words[1:3]) 507 508 # now read all the records 509 record.contigs = list(parse(handle)) 510 # wa, ct, rt rags are usually at the end of the file, but not necessarily (correct?). 511 # If the iterator is used, the tags are returned with the contig or the read after which they appear, 512 # if all tags are at the end, they are read with the last contig. The concept of an 513 # iterator leaves no other choice. But if the user uses the ACEParser, we can check 514 # them and put them into the appropriate contig/read instance. 515 # Conclusion: An ACE file is not a filetype for which iteration is 100% suitable... 516 record.sort() 517 return record
518 519 #### Everything below is deprecated 520 521 from Bio import File 522 from Bio.ParserSupport import * 523
524 -class Iterator:
525 """Iterates over an ACE-file with multiple contigs. 526 527 Methods: 528 next Return the next record from the stream, or None. 529 """ 530
531 - def __init__(self, handle, parser=None):
532 """__init__(self, handle, parser=None) 533 534 Create a new iterator. handle is a file-like object. parser 535 is an optional Parser object to change the results into another form. 536 If set to None, then the raw contents of the file will be returned. 537 """ 538 import warnings 539 warnings.warn("Ace.Iterator is deprecated. Instead of Ace.Iterator(handle, Ace.RecordParser()), please use Ace.parse(handle)", DeprecationWarning) 540 self._uhandle = File.UndoHandle(handle) 541 self._parser = parser
542
543 - def next(self):
544 """next(self) -> object 545 546 Return the next contig record from the file. If no more records 547 return None. 548 """ 549 550 lines = [] 551 while 1: 552 # if at beginning, skip the AS and look for first CO command 553 line=self._uhandle.readline() 554 if not line: # empty or corrupt file 555 return None 556 if line[:2]=='CO': 557 lines.append(line) 558 break 559 while 1: 560 line = self._uhandle.readline() 561 if not line: 562 break 563 # If a new record, then put the line back and stop. 564 if lines and line[:2] == 'CO': 565 self._uhandle.saveline(line) 566 break 567 lines.append(line) 568 569 if not lines: 570 return None 571 572 data = ''.join(lines) 573 if self._parser is not None: 574 return self._parser.parse(File.StringHandle(data)) 575 return data
576
577 - def __iter__(self) :
578 """Iterate over the ACE file record by record.""" 579 return iter(self.next, None)
580
581 -class RecordParser(AbstractParser):
582 """Parses ACE file data into a Record object."""
583 - def __init__(self):
584 self._scanner = _Scanner() 585 self._consumer = _RecordConsumer()
586
587 - def parse(self, handle):
588 if isinstance(handle, File.UndoHandle): 589 uhandle = handle 590 else: 591 uhandle = File.UndoHandle(handle) 592 self._scanner.feed(uhandle, self._consumer) 593 return self._consumer.data
594
595 -class ACEParser(AbstractParser):
596 """Parses full ACE file in list of contigs. 597 598 """ 599
600 - def __init__(self):
601 import warnings 602 warnings.warn("Ace.ACEParser is deprecated. Instead of Ace.ACEParser().parse(handle), please use Ace.read(handle)", DeprecationWarning) 603 self.data=ACEFileRecord()
604
605 - def parse(self,handle):
606 firstline=handle.readline() 607 # check if the file starts correctly 608 if firstline[:2]!='AS': 609 raise ValueError("File does not start with 'AS'.") 610 self.data.ncontigs=eval(firstline.split()[1]) 611 self.data.nreads=eval(firstline.split()[2]) 612 # now read all the records 613 records = parse(handle) 614 self.data.contigs = list(records) 615 # wa, ct, rt rags are usually at the end of the file, but not necessarily (correct?). 616 # If the iterator is used, the tags are returned with the contig or the read after which they appear, 617 # if all tags are at the end, they are read with the last contig. The concept of an 618 # iterator leaves no other choice. But if the user uses the ACEParser, we can check 619 # them and put them into the appropriate contig/read instance. 620 # Conclusion: An ACE file is not a filetype for which iteration is 100% suitable... 621 self.data.sort() 622 return self.data
623
624 -class _Scanner:
625 """Scans an ACE-formatted file. 626 627 Methods: 628 feed - Feed one ACE record. 629 """
630 - def feed(self, handle, consumer):
631 """feed(self, handle, consumer) 632 633 Feed in ACE data for scanning. handle is a file-like object 634 containing ACE data. consumer is a Consumer object that will 635 receive events as the ACE data is scanned. 636 """ 637 assert isinstance(handle, File.UndoHandle), \ 638 "handle must be an UndoHandle" 639 if handle.peekline(): 640 self._scan_record(handle, consumer)
641
642 - def _scan_record(self, uhandle, consumer):
643 consumer.begin_contig() 644 read_and_call(uhandle,consumer.co_header,start='CO ') 645 consumer.co_data(self._scan_sequence_data(uhandle)) 646 read_and_call_while(uhandle,consumer.noevent,blank=1) 647 read_and_call(uhandle,consumer.bq_header,start='BQ') 648 consumer.bq_data(self._scan_bq_data(uhandle, consumer)) 649 read_and_call_while(uhandle,consumer.noevent,blank=1) 650 read_and_call_while(uhandle,consumer.af,start='AF ') 651 read_and_call_while(uhandle,consumer.noevent,blank=1) 652 read_and_call_while(uhandle,consumer.bs,start='BS ') 653 # now read all the read data 654 # it starts with a 'RD', and then a mandatory QA 655 # then follows an optional DS 656 # CT,RT,WA,WR may or may not be there in unlimited quantity. They might refer to the actial read or contig, 657 # or, if encountered at the end of file, to any previous read or contig. the sort() method deals 658 # with that later. 659 while 1: 660 # each read must have a rd and qa 661 read_and_call_until(uhandle,consumer.noevent,start='RD ') 662 read_and_call(uhandle,consumer.rd_header,start='RD ') 663 consumer.rd_data(self._scan_sequence_data(uhandle)) 664 read_and_call_while(uhandle,consumer.noevent,blank=1) 665 read_and_call(uhandle,consumer.qa,start='QA ') 666 # now one ds can follow 667 try: 668 read_and_call_while(uhandle,consumer.noevent,blank=1) 669 attempt_read_and_call(uhandle,consumer.ds,start='DS ') 670 except ValueError: 671 # file ends 672 consumer.end_contig() 673 return 674 # the file could just end, or there's some more stuff. In ace files, everything can happen. 675 # the following tags are interspersed between reads and can ap[pear multiple times. 676 while 1: 677 # something left 678 try: 679 read_and_call_while(uhandle,consumer.noevent,blank=1) 680 except ValueError: 681 # file ends here 682 consumer.end_contig() 683 return 684 else: 685 if attempt_read_and_call(uhandle,consumer.rt_start,start='RT'): 686 consumer.rt_data(self._scan_bracket_tags(uhandle)) 687 elif attempt_read_and_call(uhandle,consumer.wr_start,start='WR'): 688 consumer.wr_data(self._scan_bracket_tags(uhandle)) 689 elif attempt_read_and_call(uhandle,consumer.wa_start,start='WA'): 690 consumer.wa_data(self._scan_bracket_tags(uhandle)) 691 elif attempt_read_and_call(uhandle,consumer.ct_start,start='CT'): 692 consumer.ct_data(self._scan_bracket_tags(uhandle)) 693 else: 694 line=safe_peekline(uhandle) 695 break 696 if not line.startswith('RD'): # another read? 697 consumer.end_contig() 698 break
699
700 - def _scan_bq_data(self, uhandle, consumer):
701 """Scans multiple lines of quality data and concatenates them.""" 702 703 qual='' 704 while 1: 705 line=uhandle.readline() 706 if is_blank_line(line): 707 uhandle.saveline(line) 708 break 709 qual+=' '+line 710 return qual
711
712 - def _scan_sequence_data(self,uhandle):
713 """Scans multiple lines of sequence data and concatenates them.""" 714 715 seq='' 716 while 1: 717 line=uhandle.readline() 718 if is_blank_line(line): 719 uhandle.saveline(line) 720 break 721 seq+=line.strip() 722 return seq
723
724 - def _scan_bracket_tags(self,uhandle):
725 """Reads the data lines of a {} tag.""" 726 727 fulltag=[] 728 while 1: 729 line=uhandle.readline().strip() 730 fulltag.append(line) 731 if line.endswith('}'): 732 fulltag[-1]=fulltag[-1][:-1] # delete the ending } 733 if fulltag[-1]=='': 734 fulltag=fulltag[:-1] # delete empty line 735 break 736 return fulltag
737
738 -class _RecordConsumer(AbstractConsumer):
739 """Reads the ace tags into data records.""" 740
741 - def __init__(self):
742 self.data = None
743
744 - def begin_contig(self):
745 self.data = Contig()
746
747 - def end_contig(self):
748 pass
749
750 - def co_header(self,line):
751 header=line.split() 752 self.data.name=header[1] 753 self.data.nbases=eval(header[2]) 754 self.data.nreads=eval(header[3]) 755 self.data.nsegments=eval(header[4]) 756 self.data.uorc=header[5]
757
758 - def co_data(self,seq):
759 self.data.sequence=seq
760
761 - def bq_header(self,line):
762 pass
763
764 - def bq_data(self,qual):
765 self.data.quality=map(eval,qual.split())
766
767 - def af(self,line):
768 header=line.split() 769 afdata=af() 770 afdata.name=header[1] 771 afdata.coru=header[2] 772 afdata.padded_start=eval(header[3]) 773 self.data.af.append(afdata)
774
775 - def bs(self,line):
776 header=line.split() 777 bsdata=bs() 778 bsdata.padded_start=eval(header[1]) 779 bsdata.padded_end=eval(header[2]) 780 bsdata.name=header[3] 781 self.data.bs.append(bsdata)
782
783 - def rd_header(self,line):
784 header=line.split() 785 # Reads start with rd, so we create a new read record here 786 self.data.reads.append(Reads()) 787 rddata=rd() 788 rddata.name=header[1] 789 rddata.padded_bases=eval(header[2]) 790 rddata.info_items=eval(header[3]) 791 rddata.read_tags=eval(header[4]) 792 self.data.reads[-1].rd=rddata
793
794 - def rd_data(self,seq):
795 self.data.reads[-1].rd.sequence=seq
796
797 - def qa(self,line):
798 header=map(eval,line.split()[1:]) 799 qadata=qa() 800 qadata.qual_clipping_start=header[0] 801 qadata.qual_clipping_end=header[1] 802 qadata.align_clipping_start=header[2] 803 qadata.align_clipping_end=header[3] 804 self.data.reads[-1].qa=qadata
805
806 - def ds(self,line):
807 dsdata=ds() 808 tags=['CHROMAT_FILE','PHD_FILE','TIME','CHEM','DYE','TEMPLATE','DIRECTION'] 809 poss=map(line.find,tags) 810 tagpos=dict(zip(poss,tags)) 811 if -1 in tagpos: 812 del tagpos[-1] 813 ps=tagpos.keys() 814 ps.sort() 815 for (p1,p2) in zip(ps,ps[1:]+[len(line)+1]): 816 setattr(dsdata,tagpos[p1].lower(),line[p1+len(tagpos[p1])+1:p2].strip()) 817 self.data.reads[-1].ds=dsdata
818
819 - def ct_start(self,line):
820 if not line.strip().endswith('{'): 821 raise ValueError('CT tag does not start with CT{') 822 ctdata=ct() 823 if self.data.ct is None: 824 self.data.ct=[] 825 self.data.ct.append(ctdata)
826
827 - def ct_data(self,taglines):
828 if len(taglines)<1: 829 raise ValueError('Missing header line in CT tag') 830 header=taglines[0].split() 831 self.data.ct[-1].name=header[0] 832 self.data.ct[-1].tag_type=header[1] 833 self.data.ct[-1].program=header[2] 834 self.data.ct[-1].padded_start=eval(header[3]) 835 self.data.ct[-1].padded_end=eval(header[4]) 836 self.data.ct[-1].date=header[5] 837 if len(header)==7: 838 self.data.ct[-1].notrans=header[6] 839 self.data.ct[-1].info=taglines[1:]
840
841 - def rt_start(self,line):
842 if not line.strip().endswith('{'): 843 raise ValueError('RT tag does not start with RT{') 844 rtdata=rt() 845 # now if we're at the end of the file, this rt could belong to a previous read, not the actual one 846 # we store it here were it appears, the user can sort later. 847 if self.data.reads[-1].rt is None: 848 self.data.reads[-1].rt=[] 849 self.data.reads[-1].rt.append(rtdata)
850
851 - def rt_data(self,taglines):
852 if len(taglines)<1: 853 raise ValueError('Missing header line in RT tag') 854 header=taglines[0].split() 855 self.data.reads[-1].rt[-1].name=header[0] 856 self.data.reads[-1].rt[-1].tag_type=header[1] 857 self.data.reads[-1].rt[-1].program=header[2] 858 self.data.reads[-1].rt[-1].padded_start=eval(header[3]) 859 self.data.reads[-1].rt[-1].padded_end=eval(header[4]) 860 self.data.reads[-1].rt[-1].date=header[5]
861
862 - def wa_start(self,line):
863 if not line.strip().endswith('{'): 864 raise ValueError('WA tag does not start with WA{') 865 wadata=wa() 866 if self.data.wa is None: 867 self.data.wa=[] 868 self.data.wa.append(wadata)
869
870 - def wa_data(self,taglines):
871 if len(taglines)<1: 872 raise ValueError('Missing header line in WA tag') 873 header=taglines[0].split() 874 self.data.wa[-1].tag_type=header[0] 875 self.data.wa[-1].program=header[1] 876 self.data.wa[-1].date=header[2] 877 self.data.wa[-1].info=taglines[1:]
878
879 - def wr_start(self,line):
880 if not line.strip().endswith('{'): 881 raise ValueError('WR tag does not start with WR{') 882 wrdata=wr() 883 if self.data.reads[-1].wr is None: 884 self.data.reads[-1].wr=[] 885 self.data.reads[-1].wr.append(wrdata)
886
887 - def wr_data(self,taglines):
888 if len(taglines)<1: 889 raise ValueError('Missing header line in WR tag') 890 header=taglines[0].split() 891 self.data.reads[-1].wr[-1].name=header[0] 892 self.data.reads[-1].wr[-1].aligned=header[1] 893 self.data.reads[-1].wr[-1].program=header[2] 894 self.data.reads[-1].wr[-1].date=header[3]
895 896 if __name__ == "__main__" : 897 print "Quick self test" 898 #Test the iterator, 899 handle = open("../../Tests/Ace/contig1.ace") 900 contigs = parse(handle) 901 for contig in contigs: 902 print contig.name, len(contig.sequence), len(contig.reads) 903 handle.close() 904 print "Done" 905