1 """Utilities for working with FASTA-formatted sequences (OBSOLETE).
2
3 Classes:
4 Record Holds FASTA sequence data.
5 Iterator Iterates over sequence data in a FASTA file.
6 Dictionary Accesses a FASTA file using a dictionary interface.
7 RecordParser Parses FASTA sequence data into a Record object.
8 SequenceParser Parses FASTA sequence data into a SeqRecord object.
9
10 For a long time this module was the most commonly used and best documented
11 FASTA parser in Biopython. However, we now recommend using Bio.SeqIO instead.
12
13 In view of this, while you can continue to use Bio.Fasta for the moment, it is
14 considered to be a legacy module and should not be used if you are writing new
15 code. At some point Bio.Fasta may be officially deprecated (with warning
16 messages when used) before finally being removed.
17
18 If you are already using Bio.Fasta with the SequenceParser to get SeqRecord
19 objects, then you should be able to switch to the more recent Bio.SeqIO module
20 very easily as that too uses SeqRecord objects. For example,
21
22 from Bio import Fasta
23 handle = open("example.fas")
24 for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()) :
25 print seq_record.description
26 print seq_record.seq
27 handle.close()
28
29 Using Bio.SeqIO instead this becomes:
30
31 from Bio import SeqIO
32 handle = open("example.fas")
33 for seq_record in SeqIO.parse(handle, "fasta") :
34 print seq_record.description
35 print seq_record.seq
36 handle.close()
37
38 Converting an existing code which uses the RecordParser is a little more
39 complicated as the Bio.Fasta.Record object differs from the SeqRecord.
40
41 from Bio import Fasta
42 handle = open("example.fas")
43 for record in Fasta.Iterator(handle, Fasta.RecordParser()) :
44 #record is a Bio.Fasta.Record object
45 print record.title #The full title line as a string
46 print record.sequence #The sequence as a string
47 handle.close()
48
49 Using Bio.SeqIO instead this becomes:
50
51 from Bio import SeqIO
52 handle = open("example.fas")
53 for seq_record in SeqIO.parse(handle, "fasta") :
54 print seq_record.description #The full title line as a string
55 print seq_record.seq.tostring() #The sequence as a string
56 handle.close()
57
58
59
60 """
61 from Bio import Seq
62 from Bio import SeqRecord
63 from Bio import Alphabet
64
65
67 """Holds information from a FASTA record.
68
69 Members:
70 title Title line ('>' character not included).
71 sequence The sequence.
72
73 """
75 """__init__(self, colwidth=60)
76
77 Create a new Record. colwidth specifies the number of residues
78 to put on each line when generating FASTA format.
79
80 """
81 self.title = ''
82 self.sequence = ''
83 self._colwidth = colwidth
84
95
97 """Returns one record at a time from a FASTA file.
98 """
99 - def __init__(self, handle, parser = None, debug = 0):
100 """Initialize a new iterator.
101 """
102 self.handle = handle
103 self._parser = parser
104 self._debug = debug
105
106
107 while True :
108 line = handle.readline()
109 if not line or line[0] == ">" :
110 break
111 if debug : print "Skipping: " + line
112 self._lookahead = line
113
115 return iter(self.next, None)
116
118 """Return the next record in the file"""
119 line = self._lookahead
120 if not line:
121 return None
122 assert line[0]==">", line
123 lines = [line.rstrip()]
124 line = self.handle.readline()
125 while line:
126 if line[0] == ">": break
127 if line[0] == "#" :
128 if self._debug : print "Ignoring comment line"
129 pass
130 else :
131 lines.append(line.rstrip())
132 line = self.handle.readline()
133 self._lookahead = line
134 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines))
135 if self._parser is None:
136 return "\n".join(lines)
137 else :
138 return self._parser.parse_string("\n".join(lines))
139
141 """Parses FASTA sequence data into a Fasta.Record object.
142 """
145
156
157 - def parse(self, handle):
159
161 """Parses FASTA sequence data into a SeqRecord object.
162 """
165 """Initialize a Scanner and Sequence Consumer.
166
167 Arguments:
168 o alphabet - The alphabet of the sequences to be parsed. If not
169 passed, this will be set as generic_alphabet.
170 o title2ids - A function that, when given the title of the FASTA
171 file (without the beginning >), will return the id, name and
172 description (in that order) for the record. If this is not given,
173 then the entire title line will be used as the description.
174 """
175 self.alphabet = alphabet
176 self.title2ids = title2ids
177
197
198 - def parse(self, handle):
200