Package Bio :: Package AlignIO :: Module EmbossIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.AlignIO.EmbossIO

  1  # Copyright 2008 by Peter Cock.  All rights reserved. 
  2  # 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6  """ 
  7  Bio.AlignIO support for the "emboss" alignment output from EMBOSS tools. 
  8   
  9  You are expected to use this module via the Bio.AlignIO functions (or the 
 10  Bio.SeqIO functions if you want to work directly with the gapped sequences). 
 11   
 12  This module contains a parser for the EMBOSS pairs/simple file format, for 
 13  example from the alignret, water and needle tools. 
 14  """ 
 15   
 16  from Bio.Align.Generic import Alignment 
 17  from Interfaces import AlignmentIterator, SequentialAlignmentWriter 
 18   
19 -class EmbossWriter(SequentialAlignmentWriter) :
20 """Emboss alignment writer (WORK IN PROGRESS). 21 22 Writes a simplfied version of the EMBOSS pairs/simple file format. 23 A lot of the information their tools record in their headers is not 24 available and is ommitted. 25 """ 26
27 - def write_header(self) :
28 handle = self.handle 29 handle.write("########################################\n") 30 handle.write("# Program: Biopython\n") 31 try : 32 handle.write("# Report_file: %s\n" % handle.name) 33 except AttributeError : 34 pass 35 handle.write("########################################\n")
36 41
42 - def write_alignment(self, alignment) :
43 """Use this to write (another) single alignment to an open file.""" 44 45 handle = self.handle 46 records = alignment.get_all_seqs() 47 48 handle.write("#=======================================\n") 49 handle.write("#\n") 50 handle.write("# Aligned_sequences: %i\n" % len(records)) 51 for i, record in enumerate(records) : 52 handle.write("# %i: %s\n" % (i+1, record.id)) 53 handle.write("#\n") 54 handle.write("# Length: %i\n" % alignment.get_alignment_length()) 55 handle.write("#\n") 56 handle.write("#=======================================\n") 57 handle.write("\n") 58 #... 59 assert False
60
61 -class EmbossIterator(AlignmentIterator) :
62 """Emboss alignment iterator. 63 64 For reading the (pairwise) alignments from EMBOSS tools in what they 65 call the "pairs" and "simple" formats. 66 """ 67
68 - def next(self) :
69 70 handle = self.handle 71 72 try : 73 #Header we saved from when we were parsing 74 #the previous alignment. 75 line = self._header 76 del self._header 77 except AttributeError: 78 line = handle.readline() 79 if not line: 80 return None 81 82 while line.rstrip() != "#=======================================" : 83 line = handle.readline() 84 if not line : 85 return None 86 87 length_of_seqs = None 88 number_of_seqs = None 89 ids = [] 90 seqs = [] 91 92 93 while line[0] == "#" : 94 #Read in the rest of this alignment header, 95 #try and discover the number of records expected 96 #and their length 97 parts = line[1:].split(":",1) 98 key = parts[0].lower().strip() 99 if key == "aligned_sequences" : 100 number_of_seqs = int(parts[1].strip()) 101 assert len(ids) == 0 102 # Should now expect the record identifiers... 103 for i in range(number_of_seqs) : 104 line = handle.readline() 105 parts = line[1:].strip().split(":",1) 106 assert i+1 == int(parts[0].strip()) 107 ids.append(parts[1].strip()) 108 assert len(ids) == number_of_seqs 109 if key == "length" : 110 length_of_seqs = int(parts[1].strip()) 111 112 #And read in another line... 113 line = handle.readline() 114 115 if number_of_seqs is None : 116 raise SyntaxError("Number of sequences missing!") 117 if length_of_seqs is None : 118 raise SyntaxError("Length of sequences missing!") 119 120 if self.records_per_alignment is not None \ 121 and self.records_per_alignment != number_of_seqs : 122 raise ValueError("Found %i records in this alignment, told to expect %i" \ 123 % (number_of_seqs, self.records_per_alignment)) 124 125 seqs = ["" for id in ids] 126 index = 0 127 128 #Parse the seqs 129 while line : 130 if len(line) > 21 : 131 id_start = line[:21].strip().split(None, 1) 132 seq_end = line[21:].strip().split(None, 1) 133 if len(id_start) == 2 and len(seq_end) == 2: 134 #identifier, seq start position, seq, seq end position 135 #(an aligned seq is broken up into multiple lines) 136 id, start = id_start 137 seq, end = seq_end 138 139 #The identifier is truncated... 140 assert 0 <= index and index < number_of_seqs, \ 141 "Expected index %i in range [0,%i)" \ 142 % (index, number_of_seqs) 143 assert id==ids[index] or id == ids[index][:len(id)] 144 145 #Check the start... 146 if int(start) == 0: 147 #Special case when one sequence starts long before the other 148 assert len(seqs[index].replace("-",""))==0 149 assert len(seq.replace("-","")) == 0, line 150 elif int(start) == len(seqs[index].replace("-","")) : 151 #Special case when one sequence ends long before the other 152 assert len(seq.replace("-","")) == 0, line 153 else : 154 assert int(start) - 1 == len(seqs[index].replace("-","")), \ 155 "Found %i chars so far for sequence %i (%s), file says start %i:\n%s" \ 156 % (len(seqs[index].replace("-","")), index, id, 157 int(start), seqs[index]) 158 159 seqs[index] += seq 160 161 #Check the end ... 162 assert int(end) == len(seqs[index].replace("-","")), \ 163 "Found %i chars so far for %s, file says end %i:\n%s" \ 164 % (len(seqs[index]), id, int(end), seqs[index]) 165 166 index += 1 167 if index >= number_of_seqs : 168 index = 0 169 else : 170 #just a start value, this is just alignment annotation (?) 171 #print "Skipping: " + line.rstrip() 172 pass 173 elif line.strip() == "" : 174 #Just a spacer? 175 pass 176 else : 177 print line 178 assert False 179 180 line = handle.readline() 181 if line.rstrip() == "#---------------------------------------" \ 182 or line.rstrip() == "#=======================================" : 183 #End of alignment 184 self._header = line 185 break 186 187 assert index == 0 188 189 if self.records_per_alignment is not None \ 190 and self.records_per_alignment != len(ids) : 191 raise ValueError("Found %i records in this alignment, told to expect %i" \ 192 % (len(ids), self.records_per_alignment)) 193 194 alignment = Alignment(self.alphabet) 195 for id, seq in zip(ids, seqs) : 196 if len(seq) != length_of_seqs : 197 raise SyntaxError("Error parsing alignment - sequences of different length?") 198 alignment.add_sequence(id, seq) 199 return alignment
200 201 if __name__ == "__main__" : 202 print "Running a quick self-test" 203 204 #http://emboss.sourceforge.net/docs/themes/alnformats/align.simple 205 simple_example = \ 206 """######################################## 207 # Program: alignret 208 # Rundate: Wed Jan 16 17:16:13 2002 209 # Report_file: stdout 210 ######################################## 211 #======================================= 212 # 213 # Aligned_sequences: 4 214 # 1: IXI_234 215 # 2: IXI_235 216 # 3: IXI_236 217 # 4: IXI_237 218 # Matrix: EBLOSUM62 219 # Gap_penalty: 10.0 220 # Extend_penalty: 0.5 221 # 222 # Length: 131 223 # Identity: 95/131 (72.5%) 224 # Similarity: 127/131 (96.9%) 225 # Gaps: 25/131 (19.1%) 226 # Score: 100.0 227 # 228 # 229 #======================================= 230 231 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 232 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 233 IXI_236 1 TSPASIRPPAGPSSRPAMVSSR--RPSPPPPRRPPGRPCCSAAPPRPQAT 48 234 IXI_237 1 TSPASLRPPAGPSSRPAMVSSRR-RPSPPGPRRPT----CSAAPRRPQAT 45 235 |||||:|||||||||::::::: |||||:||||:::::|||||:||||| 236 237 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 238 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 239 IXI_236 49 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSR--G 96 240 IXI_237 46 GGYKTCSGTCTTSTSTRHRGRSGYSARTTTAACLRASRKSMRAACSR--G 93 241 ||:||||||||||||||||||||:::::::::::||||||||||||| | 242 243 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 244 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 245 IXI_236 97 SRPPRFAPPLMSSCITSTTGPPPPAGDRSHE 127 246 IXI_237 94 SRPNRFAPTLMSSCLTSTTGPPAYAGDRSHE 124 247 |||:||||:|||||:|||||||::||||||| 248 249 250 #--------------------------------------- 251 #--------------------------------------- 252 253 """ 254 255 #http://emboss.sourceforge.net/docs/themes/alnformats/align.pair 256 pair_example = \ 257 """######################################## 258 # Program: water 259 # Rundate: Wed Jan 16 17:23:19 2002 260 # Report_file: stdout 261 ######################################## 262 #======================================= 263 # 264 # Aligned_sequences: 2 265 # 1: IXI_234 266 # 2: IXI_235 267 # Matrix: EBLOSUM62 268 # Gap_penalty: 10.0 269 # Extend_penalty: 0.5 270 # 271 # Length: 131 272 # Identity: 112/131 (85.5%) 273 # Similarity: 112/131 (85.5%) 274 # Gaps: 19/131 (14.5%) 275 # Score: 591.5 276 # 277 # 278 #======================================= 279 280 IXI_234 1 TSPASIRPPAGPSSRPAMVSSRRTRPSPPGPRRPTGRPCCSAAPRRPQAT 50 281 ||||||||||||||| |||||||||||||||||||||||||| 282 IXI_235 1 TSPASIRPPAGPSSR---------RPSPPGPRRPTGRPCCSAAPRRPQAT 41 283 284 IXI_234 51 GGWKTCSGTCTTSTSTRHRGRSGWSARTTTAACLRASRKSMRAACSRSAG 100 285 |||||||||||||||||||||||| |||||||||||||||| 286 IXI_235 42 GGWKTCSGTCTTSTSTRHRGRSGW----------RASRKSMRAACSRSAG 81 287 288 IXI_234 101 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 131 289 ||||||||||||||||||||||||||||||| 290 IXI_235 82 SRPNRFAPTLMSSCITSTTGPPAWAGDRSHE 112 291 292 293 #--------------------------------------- 294 #--------------------------------------- 295 296 297 """ 298 299 pair_example2 = \ 300 """######################################## 301 # Program: needle 302 # Rundate: Sun 27 Apr 2007 17:20:35 303 # Commandline: needle 304 # [-asequence] Spo0F.faa 305 # [-bsequence] paired_r.faa 306 # -sformat2 pearson 307 # Align_format: srspair 308 # Report_file: ref_rec .needle 309 ######################################## 310 311 #======================================= 312 # 313 # Aligned_sequences: 2 314 # 1: ref_rec 315 # 2: gi|94968718|receiver 316 # Matrix: EBLOSUM62 317 # Gap_penalty: 10.0 318 # Extend_penalty: 0.5 319 # 320 # Length: 124 321 # Identity: 32/124 (25.8%) 322 # Similarity: 64/124 (51.6%) 323 # Gaps: 17/124 (13.7%) 324 # Score: 112.0 325 # 326 # 327 #======================================= 328 329 ref_rec 1 KILIVDD----QYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDL 46 330 :|:.|| :.|.|::|.: :.|.....:|.:|.||:.:..:..|.: 331 gi|94968718|r 1 -VLLADDHALVRRGFRLMLED--DPEIEIVAEAGDGAQAVKLAGELHPRV 47 332 333 ref_rec 47 VLLDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALT 96 334 |::|..:|||.|::..|:::....:|.|:::|.:.|...::.:.|.||.. 335 gi|94968718|r 48 VVMDCAMPGMSGMDATKQIRTQWPDIAVLMLTMHSEDTWVRLALEAGANG 97 336 337 ref_rec 97 HFAK-PFDIDEIRDAV-------- 111 338 :..| ..|:|.|: || 339 gi|94968718|r 98 YILKSAIDLDLIQ-AVRRVANGET 120 340 341 342 #======================================= 343 # 344 # Aligned_sequences: 2 345 # 1: ref_rec 346 # 2: gi|94968761|receiver 347 # Matrix: EBLOSUM62 348 # Gap_penalty: 10.0 349 # Extend_penalty: 0.5 350 # 351 # Length: 119 352 # Identity: 34/119 (28.6%) 353 # Similarity: 58/119 (48.7%) 354 # Gaps: 9/119 ( 7.6%) 355 # Score: 154.0 356 # 357 # 358 #======================================= 359 360 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLLD 50 361 ||||||:......|:..|...|::.....|.::||:|...:..||:|.| 362 gi|94968761|r 1 -ILIVDDEANTLASLSRAFRLAGHEATVCDNAVRALEIAKSKPFDLILSD 49 363 364 ref_rec 51 MKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFAK 100 365 :.:||.||:.:|:.:|.......|::|:....::|..::..||||....| 366 gi|94968761|r 50 VVMPGRDGLTLLEDLKTAGVQAPVVMMSGQAHIEMAVKATRLGALDFLEK 99 367 368 ref_rec 101 PFDIDEIRDAV-------- 111 369 |...|::...| 370 gi|94968761|r 100 PLSTDKLLLTVENALKLKR 118 371 372 373 #======================================= 374 # 375 # Aligned_sequences: 2 376 # 1: ref_rec 377 # 2: gi|94967506|receiver 378 # Matrix: EBLOSUM62 379 # Gap_penalty: 10.0 380 # Extend_penalty: 0.5 381 # 382 # Length: 120 383 # Identity: 29/120 (24.2%) 384 # Similarity: 53/120 (44.2%) 385 # Gaps: 9/120 ( 7.5%) 386 # Score: 121.0 387 # 388 # 389 #======================================= 390 391 ref_rec 1 -KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTKERPDLVLL 49 392 .|::|||..|..:.:..||.:.|:..........|.:.:.....||.:: 393 gi|94967506|r 1 LHIVVVDDDPGTCVYIESVFAELGHTCKSFVRPEAAEEYILTHPVDLAIV 50 394 395 ref_rec 50 DMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHFA 99 396 |:.:....|:|:|:|.:|....:..:|:|....|:|...|...||:.:.. 397 gi|94967506|r 51 DVYLGSTTGVEVLRRCRVHRPKLYAVIITGQISLEMAARSIAEGAVDYIQ 100 398 399 ref_rec 100 KPFDIDEIRDAV-------- 111 400 ||.|||.:.:.. 401 gi|94967506|r 101 KPIDIDALLNIAERALEHKE 120 402 403 404 #======================================= 405 # 406 # Aligned_sequences: 2 407 # 1: ref_rec 408 # 2: gi|94970045|receiver 409 # Matrix: EBLOSUM62 410 # Gap_penalty: 10.0 411 # Extend_penalty: 0.5 412 # 413 # Length: 118 414 # Identity: 30/118 (25.4%) 415 # Similarity: 64/118 (54.2%) 416 # Gaps: 9/118 ( 7.6%) 417 # Score: 126.0 418 # 419 # 420 #======================================= 421 422 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIVTK--ERPDLVL 48 423 :|:|:|:..:|....:.....||:...|.:|.:||.:.:| ||.|::: 424 gi|94970045|r 1 -VLLVEDEEALRAAAGDFLETRGYKIMTARDGTEALSMASKFAERIDVLI 49 425 426 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 427 .|:.:||:.|..:.:.:..|....:|:.|:.|.: :.:..:.|:.:.:.| 428 gi|94970045|r 50 TDLVMPGISGRVLAQELVKIHPETKVMYMSGYDD-ETVMVNGEIDSSSAF 98 429 430 ref_rec 99 -AKPFDID----EIRDAV 111 431 .|||.:| :||:.: 432 gi|94970045|r 99 LRKPFRMDALSAKIREVL 116 433 434 435 #======================================= 436 # 437 # Aligned_sequences: 2 438 # 1: ref_rec 439 # 2: gi|94970041|receiver 440 # Matrix: EBLOSUM62 441 # Gap_penalty: 10.0 442 # Extend_penalty: 0.5 443 # 444 # Length: 125 445 # Identity: 35/125 (28.0%) 446 # Similarity: 70/125 (56.0%) 447 # Gaps: 18/125 (14.4%) 448 # Score: 156.5 449 # 450 # 451 #======================================= 452 453 ref_rec 1 KILIVDDQYGIRILLNEVFNKEGYQTFQAANGLQALDIV--TKERPDLVL 48 454 .:|:|:|:.|:|.|:..:.:::||...:|.:|.:||:|| :.::.|::| 455 gi|94970041|r 1 TVLLVEDEEGVRKLVRGILSRQGYHVLEATSGEEALEIVRESTQKIDMLL 50 456 457 ref_rec 49 LDMKIPGMDGIEILKRMKVIDENIRVIIMTAYGELDMIQESKELGALTHF 98 458 .|:.:.||.|.|:.:|:::...:::||.|:.|.:..:::. |.||.. 459 gi|94970041|r 51 SDVVLVGMSGRELSERLRIQMPSLKVIYMSGYTDDAIVRH----GVLTES 96 460 461 ref_rec 99 A----KPFDIDEIRDAV-------- 111 462 | |||..|.:...| 463 gi|94970041|r 97 AEFLQKPFTSDSLLRKVRAVLQKRQ 121 464 465 466 #--------------------------------------- 467 #--------------------------------------- 468 469 """ 470 471 pair_example3 = """######################################## 472 # Program: needle 473 # Rundate: Mon 14 Jul 2008 11:45:42 474 # Commandline: needle 475 # [-asequence] asis:TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAATAGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGACTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTGGGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGATACTTATTGTGTAGTAGCTCATTTTCATTATGTTCTTCGAATGGGAGCAGTCATTGGTATTTTTTTGGTTTTTTTTTGAAATTTTTAGGTTATTTAGACCATTTTTTTTTGTTTCGCTAATTAGAATTTTATTAGCCTTTGGTTTTTTTTTATTTTTTGGGGTTAAGACAAGGTGTCGTTGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATAGGATCTACCTTTTATCTTTCTAATCTTTTGTTTTAGTATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTTTTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTTTCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGTGAAAGGGGGTTAATAGC 476 # [-bsequence] asis:TTATTAATCTTATGGTTTTGCCGTAAAATTTCTTTCTTTATTTTTTATTGTTAGGATTTTGTTGATTTTATTTTTCTCAAGAATTTTTAGGTCAATTAGACCGGCTTATTTTTTTGTCAGTGTTTAAAGTTTTATTAATTTTTGGGGGGGGGGGGAGACGGGGTGTTATCTGAATTAGTTTTTGGGAGTCTCTAGACATCTCATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGGAGTAAGAATTTCGATTCAGCAACTTTAGTTCACAGTCTTTTTTTTTATTAAGAAAGGTTT 477 # -filter 478 # Align_format: srspair 479 # Report_file: stdout 480 ######################################## 481 482 #======================================= 483 # 484 # Aligned_sequences: 2 485 # 1: asis 486 # 2: asis 487 # Matrix: EDNAFULL 488 # Gap_penalty: 10.0 489 # Extend_penalty: 0.5 490 # 491 # Length: 667 492 # Identity: 210/667 (31.5%) 493 # Similarity: 210/667 (31.5%) 494 # Gaps: 408/667 (61.2%) 495 # Score: 561.0 496 # 497 # 498 #======================================= 499 500 asis 1 TGTGGTTAGGTTTGGTTTTATTGGGGGCTTGGTTTGGGCCCACCCCAAAT 50 501 502 asis 0 -------------------------------------------------- 0 503 504 asis 51 AGGGAGTGGGGGTATGACCTCAGATAGACGAGCTTATTTTAGGGCGGCGA 100 505 506 asis 0 -------------------------------------------------- 0 507 508 asis 101 CTATAATTATTTCGTTTCCTACAAGGATTAAAGTTTTTTCTTTTACTGTG 150 509 510 asis 0 -------------------------------------------------- 0 511 512 asis 151 GGAGGGGGTTTGGTATTAAGAAACGCTAGTCCGGATGTGGCTCTCCATGA 200 513 .|||||| 514 asis 1 ------------TTATTAA------------------------------- 7 515 516 asis 201 TACTTATTGT------GTAGTAGCTCATTTTCATTATGTTCTTCGAATGG 244 517 .|||||.|| |||..|..|| ||||.||||.||.| ||.| 518 asis 8 -TCTTATGGTTTTGCCGTAAAATTTC--TTTCTTTATTTTTT----ATTG 50 519 520 asis 245 GAGCAGTCATTGGTATTTTTTTGGTTTTTTTTT------GAAATTTTTAG 288 521 ||.|.|||||.|||.||||.|||| | ||||||||| 522 asis 51 ---------TTAGGATTTTGTTGATTTTATTTTTCTCAAG-AATTTTTAG 90 523 524 asis 289 GTTATTTAGACC-----ATTTTTTTTT--GTTTCGCTAATTAGAATTTTA 331 525 ||.|.||||||| ||||||||.| ||.| |||.|.||||| 526 asis 91 GTCAATTAGACCGGCTTATTTTTTTGTCAGTGT------TTAAAGTTTTA 134 527 528 asis 332 TTAGCCTTTGGTTTTTTTTTATTTTT----TGGGGTTAAGACAAGGTGTC 377 529 ||| |||||| .||||...||||..|||||. 530 asis 135 TTA-----------------ATTTTTGGGGGGGGGGGGAGACGGGGTGTT 167 531 532 asis 378 GT-TGAATTAGTTTAGCAAAATACTGCTTAAGGTAGGCTATA-------- 418 533 .| ||||||||||| || ||.||.||.|| 534 asis 168 ATCTGAATTAGTTT-------------TT--GGGAGTCTCTAGACATCTC 202 535 536 asis 419 -------------GGATCTACCTTTTATCTTTCTAAT--CTTTT----GT 449 537 ||..||.||.|.|||..||||.|| ||||| | 538 asis 203 ATGGGTTGGCCGGGGGCCTGCCGTCTATAGTTCTTATTCCTTTTAAGGG- 251 539 540 asis 450 TTTAGT-ATAATTGGTCTTCGATTCAACAATTTTTAGTCTTCAGTCTTTT 498 541 ||| |.||| |||||||||.||| .||||||...||||||||| 542 asis 252 ---AGTAAGAAT-----TTCGATTCAGCAA-CTTTAGTTCACAGTCTTTT 292 543 544 asis 499 TTTTTATTTTGAAAAGGTTTTAACACTCTTGGTTTTGGAGGCTTTGGCTT 548 545 ||||||||..| |||||||| 546 asis 293 TTTTTATTAAG-AAAGGTTT------------------------------ 311 547 548 asis 549 TCTTCTTACTCTTAGGAGGATGGGCGCTAGAAAGAGTTTTAAGAGGGTGT 598 549 550 asis 311 -------------------------------------------------- 311 551 552 asis 599 GAAAGGGGGTTAATAGC 615 553 554 asis 311 ----------------- 311 555 556 557 #--------------------------------------- 558 #---------------------------------------""" 559 560 from StringIO import StringIO 561 562 alignments = list(EmbossIterator(StringIO(pair_example))) 563 assert len(alignments) == 1 564 assert len(alignments[0].get_all_seqs()) == 2 565 assert [r.id for r in alignments[0].get_all_seqs()] \ 566 == ["IXI_234", "IXI_235"] 567 568 alignments = list(EmbossIterator(StringIO(simple_example))) 569 assert len(alignments) == 1 570 assert len(alignments[0].get_all_seqs()) == 4 571 assert [r.id for r in alignments[0].get_all_seqs()] \ 572 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 573 574 alignments = list(EmbossIterator(StringIO(pair_example + simple_example))) 575 assert len(alignments) == 2 576 assert len(alignments[0].get_all_seqs()) == 2 577 assert len(alignments[1].get_all_seqs()) == 4 578 assert [r.id for r in alignments[0].get_all_seqs()] \ 579 == ["IXI_234", "IXI_235"] 580 assert [r.id for r in alignments[1].get_all_seqs()] \ 581 == ["IXI_234", "IXI_235", "IXI_236", "IXI_237"] 582 583 584 #for a in EmbossIterator(StringIO(pair_example2)) : 585 # print "Next:" 586 # for r in a.get_all_seqs() : 587 # print r.seq.tostring()[:20] + "...", r.id 588 589 alignments = list(EmbossIterator(StringIO(pair_example2))) 590 assert len(alignments) == 5 591 assert len(alignments[0].get_all_seqs()) == 2 592 assert [r.id for r in alignments[0].get_all_seqs()] \ 593 == ["ref_rec", "gi|94968718|receiver"] 594 assert [r.id for r in alignments[4].get_all_seqs()] \ 595 == ["ref_rec", "gi|94970041|receiver"] 596 597 598 599 alignments = list(EmbossIterator(StringIO(pair_example3))) 600 assert len(alignments) == 1 601 assert len(alignments[0].get_all_seqs()) == 2 602 assert [r.id for r in alignments[0].get_all_seqs()] \ 603 == ["asis","asis"] 604 605 print "Done" 606