1
2
3
4
5
6
7
8
9
10 """Bio.SeqIO support for the "ig" (IntelliGenetics or MASE) file format.
11
12 You are expected to use this module via the Bio.SeqIO functions."""
13
14 from Bio.Alphabet import single_letter_alphabet
15 from Bio.Seq import Seq
16 from Bio.SeqRecord import SeqRecord
17
18
20 """Iterate over IntelliGenetics records (as SeqRecord objects).
21
22 handle - input file
23 alphabet - optional alphabet
24
25 The optional free format file header lines (which start with two
26 semi-colons) are ignored.
27
28 The free format commentary lines at the start of each record (which
29 start with a semi-colon) are recorded as a single string with embedded
30 new line characters in the SeqRecord's annotations dictionary under the
31 key 'comment'.
32 """
33
34 while True :
35 line = handle.readline()
36 if not line : break
37 if not line.startswith(";;") : break
38
39 while line :
40
41 if line[0]!=";" :
42 raise ValueError( \
43 "Records should start with ';' and not:\n%s" % repr(line))
44
45
46
47
48
49
50 comment_lines = []
51 while line.startswith(";") :
52
53 comment_lines.append(line[1:].strip())
54 line = handle.readline()
55 title = line.rstrip()
56
57 seq_lines = []
58 while True:
59 line = handle.readline()
60 if not line : break
61 if line[0] == ";": break
62
63 seq_lines.append(line.rstrip().replace(" ",""))
64
65
66 record= SeqRecord(Seq("".join(seq_lines), alphabet),
67 id = title, name = title)
68 record.annotations['comment'] = "\n".join(comment_lines)
69 yield record
70
71
72 assert not line
73
74 if __name__ == "__main__" :
75 print "Running quick self test"
76
77 import os
78 for filename in os.listdir("../../Tests/Intelligenetics/") :
79 if os.path.splitext(filename)[-1] == ".txt" :
80 print
81 print filename
82 print "-"*len(filename)
83 handle = open(os.path.join("../../Tests/Intelligenetics/", filename))
84 for record in IgIterator(handle) :
85 print record.id, len(record)
86 handle.close()
87 print "Done"
88