1
2
3
4
5
6
7
8
9
10 """Bio.SeqIO support for the "ig" (IntelliGenetics or MASE) file format.
11
12 You are expected to use this module via the Bio.SeqIO functions."""
13
14 from Bio.Alphabet import single_letter_alphabet
15 from Bio.Seq import Seq
16 from Bio.SeqRecord import SeqRecord
17
18
20 """Iterate over IntelliGenetics records (as SeqRecord objects).
21
22 handle - input file
23 alphabet - optional alphabet
24
25 The optional free format file header lines (which start with two
26 semi-colons) are ignored.
27
28 The free format commentary lines at the start of each record (which
29 start with a semi-colon) are recorded as a single string with embedded
30 new line characters in the SeqRecord's annotations dictionary under the
31 key 'comment'.
32 """
33
34 while True:
35 line = handle.readline()
36 if not line : break
37 if not line.startswith(";;") : break
38
39 while line:
40
41 if line[0] != ";":
42 raise ValueError( \
43 "Records should start with ';' and not:\n%s" % repr(line))
44
45
46
47
48
49
50 comment_lines = []
51 while line.startswith(";"):
52
53 comment_lines.append(line[1:].strip())
54 line = handle.readline()
55 title = line.rstrip()
56
57 seq_lines = []
58 while True:
59 line = handle.readline()
60 if not line:
61 break
62 if line[0] == ";":
63 break
64
65 seq_lines.append(line.rstrip().replace(" ",""))
66 seq_str = "".join(seq_lines)
67 if seq_str.endswith("1"):
68
69 seq_str = seq_str[:-1]
70 if "1" in seq_str:
71 raise ValueError(\
72 "Potential terminator digit one found within sequence.")
73
74
75 record = SeqRecord(Seq(seq_str, alphabet),
76 id = title, name = title)
77 record.annotations['comment'] = "\n".join(comment_lines)
78 yield record
79
80
81 assert not line
82
83 if __name__ == "__main__":
84 print "Running quick self test"
85
86 import os
87 path = "../../Tests/IntelliGenetics/"
88 if os.path.isdir(path):
89 for filename in os.listdir(path):
90 if os.path.splitext(filename)[-1] == ".txt":
91 print
92 print filename
93 print "-"*len(filename)
94 handle = open(os.path.join(path, filename))
95 for record in IgIterator(handle):
96 print record.id, len(record)
97 handle.close()
98 print "Done"
99 else:
100 print "Could not find input files"
101