1
2
3
4
5
6
7
8
9
10 """Bio.SeqIO support for the "fasta" (aka FastA or Pearson) file format.
11
12 You are expected to use this module via the Bio.SeqIO functions."""
13
14 from Bio.Alphabet import single_letter_alphabet
15 from Bio.Seq import Seq
16 from Bio.SeqRecord import SeqRecord
17 from Bio.SeqIO.Interfaces import SequentialSequenceWriter
18
19
21 """Generator function to iterate over Fasta records (as SeqRecord objects).
22
23 handle - input file
24 alphabet - optional alphabet
25 title2ids - A function that, when given the title of the FASTA
26 file (without the beginning >), will return the id, name and
27 description (in that order) for the record as a tuple of strings.
28
29 If this is not given, then the entire title line will be used
30 as the description, and the first word as the id and name.
31
32 Note that use of title2ids matches that of Bio.Fasta.SequenceParser
33 but the defaults are slightly different.
34 """
35
36 while True:
37 line = handle.readline()
38 if line == "" : return
39 if line[0] == ">":
40 break
41
42 while True:
43 if line[0]!=">":
44 raise ValueError("Records in Fasta files should start with '>' character")
45 if title2ids:
46 id, name, descr = title2ids(line[1:].rstrip())
47 else:
48 descr = line[1:].rstrip()
49 id = descr.split()[0]
50 name = id
51
52 lines = []
53 line = handle.readline()
54 while True:
55 if not line : break
56 if line[0] == ">": break
57
58
59
60 lines.append(line.rstrip().replace(" ","").replace("\r",""))
61 line = handle.readline()
62
63
64 yield SeqRecord(Seq("".join(lines), alphabet),
65 id = id, name = name, description = descr)
66
67 if not line : return
68 assert False, "Should not reach this line"
69
71 """Class to write Fasta format files."""
72 - def __init__(self, handle, wrap=60, record2title=None):
73 """Create a Fasta writer.
74
75 handle - Handle to an output file, e.g. as returned
76 by open(filename, "w")
77 wrap - Optional line length used to wrap sequence lines.
78 Defaults to wrapping the sequence at 60 characters
79 Use zero (or None) for no wrapping, giving a single
80 long line for the sequence.
81 record2title - Optional function to return the text to be
82 used for the title line of each record. By default the
83 a combination of the record.id and record.description
84 is used. If the record.description starts with the
85 record.id, then just the record.description is used.
86
87 You can either use:
88
89 myWriter = FastaWriter(open(filename,"w"))
90 writer.write_file(myRecords)
91
92 Or, follow the sequential file writer system, for example:
93
94 myWriter = FastaWriter(open(filename,"w"))
95 writer.write_header() # does nothing for Fasta files
96 ...
97 Multiple calls to writer.write_record() and/or writer.write_records()
98 ...
99 writer.write_footer() # does nothing for Fasta files
100 writer.close()
101 """
102 SequentialSequenceWriter.__init__(self, handle)
103
104 self.wrap = None
105 if wrap:
106 if wrap < 1:
107 raise ValueError
108 self.wrap = wrap
109 self.record2title = record2title
110
146
147 if __name__ == "__main__":
148 print "Running quick self test"
149
150 import os
151 from Bio.Alphabet import generic_protein, generic_nucleotide
152
153
154
155 fna_filename = "NC_005213.fna"
156 faa_filename = "NC_005213.faa"
157
163
176
177 if os.path.isfile(fna_filename):
178 print "--------"
179 print "FastaIterator (single sequence)"
180 iterator = FastaIterator(open(fna_filename, "r"), alphabet=generic_nucleotide, title2ids=genbank_name_function)
181 count=0
182 for record in iterator:
183 count=count+1
184 print_record(record)
185 assert count == 1
186 print str(record.__class__)
187
188 if os.path.isfile(faa_filename):
189 print "--------"
190 print "FastaIterator (multiple sequences)"
191 iterator = FastaIterator(open(faa_filename, "r"), alphabet=generic_protein, title2ids=genbank_name_function)
192 count=0
193 for record in iterator:
194 count=count+1
195 print_record(record)
196 break
197 assert count>0
198 print str(record.__class__)
199
200 from cStringIO import StringIO
201 print "--------"
202 print "FastaIterator (empty input file)"
203
204 iterator = FastaIterator(StringIO(""))
205 count = 0
206 for record in iterator:
207 count = count+1
208 assert count==0
209
210 print "Done"
211