Package Bio :: Package writers :: Package SeqRecord :: Module embl
[hide private]
[frames] | no frames]

Source Code for Module Bio.writers.SeqRecord.embl

 1  """Part of an old unused and undocumented sequence writing framework (DEPRECATED).""" 
 2  # Not clear on the distinction, if any, between 'embl' and 'embl/65'.  This 
 3  # code might apply to either or both. 
 4   
 5  # See 'http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html' for a 
 6  # definition of this file format. 
 7   
 8  # This code only makes a best effort--the output may not be strictly valid. 
 9  # So, for example, the EMBL ID is supposed to be alphanumeric, starting with a 
10  # letter, but we don't check for this, etc. 
11   
12   
13  # Example: 
14  # ID   AA03518    standard; DNA; FUN; 237 BP. 
15  # XX 
16  # AC   U03518; 
17  # XX 
18  # DE   Aspergillus awamori internal transcribed spacer 1 (ITS1) and 18S 
19  # DE   rRNA and 5.8S rRNA genes, partial sequence. 
20  # XX 
21  # SQ   Sequence 237 BP; 41 A; 77 C; 67 G; 52 T; 0 other; 
22  #      aacctgcgga aggatcatta ccgagtgcgg gtcctttggg cccaacctcc catccgtgtc        60 
23  #      tattgtaccc tgttgcttcg gcgggcccgc cgcttgtcgg ccgccggggg ggcgcctctg       120 
24  #      ccccccgggc ccgtgcccgc cggagacccc aacacgaaca ctgtctgaaa gcgtgcagtc       180 
25  #      tgagttgatt gaatgcaatc agttaaaact ttcaacaatg gatctcttgg ttccggc          237 
26  # // 
27   
28   
29  import textwrap 
30   
31  from Bio import Alphabet 
32  from Bio import Writer 
33   
34 -class WriteEmbl(Writer.Writer):
35 - def __init__(self, outfile):
36 Writer.Writer.__init__(self, outfile)
37
38 - def write(self, record):
39 seq = record.seq 40 assert seq.alphabet.size == 1, "cannot handle alphabet of size %d" % \ 41 seq.alphabet.size 42 data = seq.data 43 upperdata = data.upper() 44 45 # It'd be nice if the alphabet was usefully set, but for many interesting 46 # cases (e.g., reading from FASTA files), it's not. 47 48 if isinstance(seq.alphabet, Alphabet.RNAAlphabet): 49 molecule = 'mRNA' 50 letters = ['A', 'C', 'G', 'U'] 51 else: 52 molecule = 'DNA' 53 letters = ['A', 'C', 'G', 'T'] 54 55 division = 'UNC' # unknown 56 57 self.outfile.write("ID %s standard; %s; %s; %d BP.\n" 58 % (record.id, molecule, division, len(data))) 59 60 desclist = textwrap.wrap(record.description, 74) 61 for l in desclist: 62 self.outfile.write("DE %s\n" % l) 63 64 counts = [ upperdata.count(l) for l in letters ] 65 othercount = len(upperdata) - sum(counts) 66 67 countstring = ''.join([ " %d %s;" % p for p in zip(counts, letters) ]) 68 69 self.outfile.write("SQ Sequence %s BP;%s %d other;\n" 70 % (len(data), countstring, othercount)) 71 72 rowlength = 60 73 blocklength = 10 74 for i in xrange(0, len(data), rowlength): 75 self.outfile.write(" " * 5) 76 row = data[i:i+rowlength] 77 for b in xrange(0, rowlength, blocklength): 78 block = row[b:b+blocklength] 79 self.outfile.write("%-*s" % (blocklength+1, block)) 80 self.outfile.write("%9d\n" % min(i+rowlength, len(data))) 81 82 self.outfile.write("//\n")
83 84 85 make_writer = WriteEmbl 86