Package Bio :: Package Fasta
[hide private]
[frames] | no frames]

Source Code for Package Bio.Fasta

  1  """Utilities for working with FASTA-formatted sequences (OBSOLETE). 
  2   
  3  Classes: 
  4  Record             Holds FASTA sequence data. 
  5  Iterator           Iterates over sequence data in a FASTA file. 
  6  Dictionary         Accesses a FASTA file using a dictionary interface. 
  7  RecordParser       Parses FASTA sequence data into a Record object. 
  8  SequenceParser     Parses FASTA sequence data into a SeqRecord object. 
  9   
 10  For a long time this module was the most commonly used and best documented 
 11  FASTA parser in Biopython.  However, we now recommend using Bio.SeqIO instead. 
 12   
 13  In view of this, while you can continue to use Bio.Fasta for the moment, it is 
 14  considered to be a legacy module and should not be used if you are writing new 
 15  code.  At some point Bio.Fasta may be officially deprecated (with warning 
 16  messages when used) before finally being removed. 
 17   
 18  If you are already using Bio.Fasta with the SequenceParser to get SeqRecord 
 19  objects, then you should be able to switch to the more recent Bio.SeqIO module 
 20  very easily as that too uses SeqRecord objects.  For example, 
 21   
 22  from Bio import Fasta 
 23  handle = open("example.fas") 
 24  for seq_record in Fasta.Iterator(handle, Fasta.SequenceParser()) : 
 25      print seq_record.description 
 26      print seq_record.seq 
 27  handle.close() 
 28   
 29  Using Bio.SeqIO instead this becomes: 
 30   
 31  from Bio import SeqIO 
 32  handle = open("example.fas") 
 33  for seq_record in SeqIO.parse(handle, "fasta") : 
 34      print seq_record.description 
 35      print seq_record.seq 
 36  handle.close() 
 37   
 38  Converting an existing code which uses the RecordParser is a little more 
 39  complicated as the Bio.Fasta.Record object differs from the SeqRecord. 
 40   
 41  from Bio import Fasta 
 42  handle = open("example.fas") 
 43  for record in Fasta.Iterator(handle, Fasta.RecordParser()) : 
 44      #record is a Bio.Fasta.Record object 
 45      print record.title #The full title line as a string 
 46      print record.sequence #The sequence as a string 
 47  handle.close() 
 48   
 49  Using Bio.SeqIO instead this becomes: 
 50   
 51  from Bio import SeqIO 
 52  handle = open("example.fas") 
 53  for seq_record in SeqIO.parse(handle, "fasta") : 
 54      print seq_record.description #The full title line as a string 
 55      print seq_record.seq.tostring() #The sequence as a string 
 56  handle.close() 
 57   
 58   
 59   
 60  """ 
 61  from Bio import Seq 
 62  from Bio import SeqRecord 
 63  from Bio import Alphabet 
 64   
 65   
66 -class Record:
67 """Holds information from a FASTA record. 68 69 Members: 70 title Title line ('>' character not included). 71 sequence The sequence. 72 73 """
74 - def __init__(self, colwidth=60):
75 """__init__(self, colwidth=60) 76 77 Create a new Record. colwidth specifies the number of residues 78 to put on each line when generating FASTA format. 79 80 """ 81 self.title = '' 82 self.sequence = '' 83 self._colwidth = colwidth
84
85 - def __str__(self):
86 s = [] 87 s.append('>%s' % self.title) 88 i = 0 89 while i < len(self.sequence): 90 s.append(self.sequence[i:i+self._colwidth]) 91 i = i + self._colwidth 92 #Was having a problem getting the tests to pass on windows... 93 #return os.linesep.join(s) 94 return "\n".join(s)
95
96 -class Iterator:
97 """Returns one record at a time from a FASTA file. 98 """
99 - def __init__(self, handle, parser = None, debug = 0):
100 """Initialize a new iterator. 101 """ 102 self.handle = handle 103 self._parser = parser 104 self._debug = debug 105 106 #Skip any text before the first record (e.g. blank lines) 107 while True : 108 line = handle.readline() 109 if not line or line[0] == ">" : 110 break 111 if debug : print "Skipping: " + line 112 self._lookahead = line
113
114 - def __iter__(self):
115 return iter(self.next, None)
116
117 - def next(self):
118 """Return the next record in the file""" 119 line = self._lookahead 120 if not line: 121 return None 122 assert line[0]==">", line 123 lines = [line.rstrip()] 124 line = self.handle.readline() 125 while line: 126 if line[0] == ">": break 127 if line[0] == "#" : 128 if self._debug : print "Ignoring comment line" 129 pass 130 else : 131 lines.append(line.rstrip()) 132 line = self.handle.readline() 133 self._lookahead = line 134 if self._debug : print "Debug: '%s' and '%s'" % (title, "".join(lines)) 135 if self._parser is None: 136 return "\n".join(lines) 137 else : 138 return self._parser.parse_string("\n".join(lines))
139
140 -class RecordParser:
141 """Parses FASTA sequence data into a Fasta.Record object. 142 """
143 - def __init__(self, debug = 0):
144 pass
145
146 - def parse_string(self, text) :
147 text = text.replace("\r\n","\n") #Crude way of dealing with \r\n 148 assert text[0] == ">", text 149 text = text.split("\n>",1)[0] # Only do the first record if more than one 150 title, sequence = text.split("\n", 1) 151 title = title[1:] 152 rec = Record() 153 rec.title = title 154 rec.sequence = sequence.replace("\n","") 155 return rec
156
157 - def parse(self, handle):
158 return self.parse_string(handle.read())
159
160 -class SequenceParser:
161 """Parses FASTA sequence data into a SeqRecord object. 162 """
163 - def __init__(self, alphabet = Alphabet.generic_alphabet, title2ids = None, 164 debug = 0):
165 """Initialize a Scanner and Sequence Consumer. 166 167 Arguments: 168 o alphabet - The alphabet of the sequences to be parsed. If not 169 passed, this will be set as generic_alphabet. 170 o title2ids - A function that, when given the title of the FASTA 171 file (without the beginning >), will return the id, name and 172 description (in that order) for the record. If this is not given, 173 then the entire title line will be used as the description. 174 """ 175 self.alphabet = alphabet 176 self.title2ids = title2ids
177
178 - def parse_string(self, text) :
179 text = text.replace("\r\n","\n") #Crude way of dealing with \r\n 180 assert text[0] == ">", text 181 text = text.split("\n>",1)[0] # Only do the first record if more than one 182 title, sequence = text.split("\n", 1) 183 title = title[1:] 184 185 seq = Seq.Seq(sequence.replace("\n",""), self.alphabet) 186 rec = SeqRecord.SeqRecord(seq) 187 188 if self.title2ids: 189 seq_id, name, descr = self.title2ids(title) 190 rec.id = seq_id 191 rec.name = name 192 rec.description = descr 193 else: 194 rec.description = title 195 196 return rec
197
198 - def parse(self, handle):
199 return self.parse_string(handle.read())
200