1
2 try:
3 set = set
4 except NameError:
5 from sets import Set as set
6
7 import string
8 from Bio import Alphabet
9 from Bio.Alphabet import IUPAC
10 from Bio.Data import IUPACData
11
12 unambiguous_dna_by_name = {}
13 unambiguous_dna_by_id = {}
14 unambiguous_rna_by_name = {}
15 unambiguous_rna_by_id = {}
16 generic_by_name = {}
17 generic_by_id = {}
18 ambiguous_generic_by_name = {}
19 ambiguous_generic_by_id = {}
20
21
22 standard_dna_table = None
23 standard_rna_table = None
24
25
26
27
28
31
108
119
120
132
133
136
139
140
141
144 names = string.split(name, "; ")
145
146 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
147 stop_codons)
148
149 rna_table = {}
150 generic_table = {}
151 for codon, val in table.items():
152 generic_table[codon] = val
153 codon = codon.replace("T", "U")
154 generic_table[codon] = val
155 rna_table[codon] = val
156 rna_start_codons = []
157 generic_start_codons = []
158 for codon in start_codons:
159 generic_start_codons.append(codon)
160 codon = codon.replace("T", "U")
161 generic_start_codons.append(codon)
162 rna_start_codons.append(codon)
163 rna_stop_codons = []
164 generic_stop_codons = []
165 for codon in stop_codons:
166 generic_stop_codons.append(codon)
167 codon = codon.replace("T", "U")
168 generic_stop_codons.append(codon)
169 rna_stop_codons.append(codon)
170
171 generic = NCBICodonTable(id, names + [alt_name], generic_table,
172 generic_start_codons, generic_stop_codons)
173 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table,
174 rna_start_codons, rna_stop_codons)
175
176 if id == 1:
177 global standard_dna_table, standard_rna_table
178 standard_dna_table = dna
179 standard_rna_table = rna
180
181 unambiguous_dna_by_id[id] = dna
182 unambiguous_rna_by_id[id] = rna
183 generic_by_id[id] = generic
184
185 if alt_name is not None:
186 names.append(alt_name)
187
188 for name in names:
189 unambiguous_dna_by_name[name] = dna
190 unambiguous_rna_by_name[name] = rna
191 generic_by_name[name] = generic
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267 register_ncbi_table(name = 'Standard',
268 alt_name = 'SGC0', id = 1,
269 table = {
270 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
271 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
272 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
273 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
274 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
275 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
276 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
277 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
278 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
279 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
280 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
281 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
282 'GGG': 'G', },
283 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
284 start_codons = [ 'TTG', 'CTG', 'ATG', ]
285 )
286 register_ncbi_table(name = 'Vertebrate Mitochondrial',
287 alt_name = 'SGC1', id = 2,
288 table = {
289 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
290 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
291 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
292 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
293 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
294 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
295 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
296 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
297 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V',
298 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
299 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
300 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
301 stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ],
302 start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ]
303 )
304 register_ncbi_table(name = 'Yeast Mitochondrial',
305 alt_name = 'SGC2', id = 3,
306 table = {
307 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
308 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
309 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T',
310 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P',
311 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
312 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
313 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
314 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
315 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
316 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
317 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
318 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
319 'GGA': 'G', 'GGG': 'G', },
320 stop_codons = [ 'TAA', 'TAG', ],
321 start_codons = [ 'ATG', ]
322 )
323 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma',
324 alt_name = 'SGC3', id = 4,
325 table = {
326 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
327 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
328 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
329 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
330 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
331 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
332 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
333 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
334 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
335 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
336 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
337 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
338 'GGA': 'G', 'GGG': 'G', },
339 stop_codons = [ 'TAA', 'TAG', ],
340 start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC',
341 'ATA', 'ATG', 'GTG', ]
342 )
343 register_ncbi_table(name = 'Invertebrate Mitochondrial',
344 alt_name = 'SGC4', id = 5,
345 table = {
346 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
347 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
348 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
349 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
350 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
351 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
352 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
353 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
354 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
355 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
356 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
357 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
358 'GGA': 'G', 'GGG': 'G', },
359 stop_codons = [ 'TAA', 'TAG', ],
360 start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG',
361 'GTG', ]
362 )
363 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear',
364 alt_name = 'SGC5', id = 6,
365 table = {
366 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
367 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
368 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W',
369 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
370 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
371 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
372 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
373 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
374 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
375 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
376 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
377 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
378 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
379 stop_codons = [ 'TGA', ],
380 start_codons = [ 'ATG', ]
381 )
382 register_ncbi_table(name = 'Echinoderm Mitochondrial',
383 alt_name = 'SGC8', id = 9,
384 table = {
385 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
386 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
387 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
388 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
389 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
390 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
391 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
392 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
393 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
394 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
395 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
396 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
397 'GGA': 'G', 'GGG': 'G', },
398 stop_codons = [ 'TAA', 'TAG', ],
399 start_codons = [ 'ATG', ]
400 )
401 register_ncbi_table(name = 'Euplotid Nuclear',
402 alt_name = 'SGC9', id = 10,
403 table = {
404 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
405 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
406 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L',
407 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
408 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
409 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
410 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
411 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
412 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
413 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
414 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
415 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
416 'GGA': 'G', 'GGG': 'G', },
417 stop_codons = [ 'TAA', 'TAG', ],
418 start_codons = [ 'ATG', ]
419 )
420 register_ncbi_table(name = 'Bacterial',
421 alt_name = None, id = 11,
422 table = {
423 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
424 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
425 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
426 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
427 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
428 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
429 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
430 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
431 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
432 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
433 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
434 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
435 'GGG': 'G', },
436 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
437 start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA',
438 'ATG', 'GTG', ]
439 )
440 register_ncbi_table(name = 'Alternative Yeast Nuclear',
441 alt_name = None, id = 12,
442 table = {
443 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
444 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
445 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
446 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
447 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
448 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
449 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
450 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
451 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
452 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
453 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
454 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
455 'GGG': 'G', },
456 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
457 start_codons = [ 'CTG', 'ATG', ]
458 )
459 register_ncbi_table(name = 'Ascidian Mitochondrial',
460 alt_name = None, id = 13,
461 table = {
462 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
463 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
464 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
465 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
466 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
467 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
468 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
469 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
470 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G',
471 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
472 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
473 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
474 'GGA': 'G', 'GGG': 'G', },
475 stop_codons = [ 'TAA', 'TAG', ],
476 start_codons = [ 'ATG', ]
477 )
478 register_ncbi_table(name = 'Flatworm Mitochondrial',
479 alt_name = None, id = 14,
480 table = {
481 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
482 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
483 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
484 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
485 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
486 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
487 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
488 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
489 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
490 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
491 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
492 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
493 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
494 stop_codons = [ 'TAG', ],
495 start_codons = [ 'ATG', ]
496 )
497 register_ncbi_table(name = 'Blepharisma Macronuclear',
498 alt_name = None, id = 15,
499 table = {
500 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
501 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
502 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
503 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
504 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
505 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
506 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
507 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
508 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
509 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
510 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
511 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
512 'GGA': 'G', 'GGG': 'G', },
513 stop_codons = [ 'TAA', 'TGA', ],
514 start_codons = [ 'ATG', ]
515 )
516
517
518
520 - def __init__(self, codon_table,
521 ambiguous_nucleotide_alphabet,
522 ambiguous_nucleotide_values,
523 ambiguous_protein_alphabet,
524 ambiguous_protein_values):
525 CodonTable.__init__(self,
526 ambiguous_nucleotide_alphabet,
527 ambiguous_protein_alphabet,
528 AmbiguousForwardTable(codon_table.forward_table,
529 ambiguous_nucleotide_values,
530 ambiguous_protein_values),
531 codon_table.back_table,
532
533
534
535
536 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values),
537 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values)
538 )
539 self._codon_table = codon_table
540
541
542
543
545 return getattr(self._codon_table, name)
546
548 c1, c2, c3 = codon
549 x1 = ambiguous_nucleotide_values[c1]
550 x2 = ambiguous_nucleotide_values[c2]
551 x3 = ambiguous_nucleotide_values[c3]
552 possible = {}
553 stops = []
554 for y1 in x1:
555 for y2 in x2:
556 for y3 in x3:
557 try:
558 possible[forward_table[y1+y2+y3]] = 1
559 except KeyError:
560
561 stops.append(y1+y2+y3)
562 if stops:
563 if possible.keys():
564 raise TranslationError("ambiguous codon '%s' codes " % codon \
565 + "for both proteins and stop codons")
566
567 raise KeyError(codon)
568 return possible.keys()
569
571 """Extends a codon list to include all possible ambigous codons.
572
573 e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
574 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
575
576 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
577 Thus only two more codons are added in the following:
578
579 e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
580
581 Returns a new (longer) list of codon strings.
582 """
583
584
585
586
587 c1_list = [letter for (letter, meanings) \
588 in ambiguous_nucleotide_values.iteritems() \
589 if set([codon[0] for codon in codons]).issuperset(set(meanings))]
590 c2_list = [letter for (letter, meanings) \
591 in ambiguous_nucleotide_values.iteritems() \
592 if set([codon[1] for codon in codons]).issuperset(set(meanings))]
593 c3_list = [letter for (letter, meanings) \
594 in ambiguous_nucleotide_values.iteritems() \
595 if set([codon[2] for codon in codons]).issuperset(set(meanings))]
596 set2 = set([codon[1] for codon in codons])
597 set3 = set([codon[2] for codon in codons])
598 candidates = set([c1+c2+c3 for c1 in c1_list for c2 in c2_list for c3 in c3_list])
599 candidates.difference_update(codons)
600 answer = codons[:]
601
602 for ambig_codon in candidates :
603 wanted = True
604
605 for codon in [c1+c2+c3 \
606 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \
607 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \
608 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]:
609 if codon not in codons :
610
611 wanted=False
612
613 continue
614 if wanted :
615 answer.append(ambig_codon)
616 return answer
617 assert list_ambiguous_codons(['TGA', 'TAA'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA']
618 assert list_ambiguous_codons(['TAG', 'TGA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TGA']
619 assert list_ambiguous_codons(['TAG', 'TAA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR']
620 assert list_ambiguous_codons(['UAG', 'UAA'],IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR']
621 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA']
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
639 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
640 self.forward_table = forward_table
641
642 self.ambiguous_nucleotide = ambiguous_nucleotide
643 self.ambiguous_protein = ambiguous_protein
644
645 inverted = {}
646 for name, val in ambiguous_protein.items():
647 for c in val:
648 x = inverted.get(c, {})
649 x[name] = 1
650 inverted[c] = x
651 for name, val in inverted.items():
652 inverted[name] = val.keys()
653 self._inverted = inverted
654
655 self._cache = {}
656
657 - def get(self, codon, failobj = None):
658 try:
659 return self.__getitem__(codon)
660 except KeyError:
661 return failobj
662
664 try:
665 x = self._cache[codon]
666 except KeyError:
667 pass
668 else:
669 if x is TranslationError:
670 raise TranslationError(codon)
671 if x is KeyError:
672 raise KeyError(codon)
673 return x
674 try:
675 x = self.forward_table[codon]
676 self._cache[codon] = x
677 return x
678 except KeyError:
679 pass
680
681
682
683 try:
684 possible = list_possible_proteins(codon,
685 self.forward_table,
686 self.ambiguous_nucleotide)
687 except KeyError:
688 self._cache[codon] = KeyError
689 raise KeyError(codon)
690 except TranslationError:
691 self._cache[codon] = TranslationError
692 raise TranslationError(codon)
693 assert len(possible) > 0, "unambiguous codons must code"
694
695
696 if len(possible) == 1:
697 self._cache[codon] = possible[0]
698 return possible[0]
699
700
701
702 ambiguous_possible = {}
703 for amino in possible:
704 for term in self._inverted[amino]:
705 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1
706
707 n = len(possible)
708 possible = []
709 for amino, val in ambiguous_possible.items():
710 if val == n:
711 possible.append(amino)
712
713
714 if len(possible) == 0:
715 self._cache[codon] = TranslationError
716 raise TranslationError(codon)
717
718
719
720
721 def _sort(x, y, table = self.ambiguous_protein):
722 a = cmp(len(table[x]), len(table[y]))
723 if a == 0:
724 return cmp(x, y)
725 return a
726 possible.sort(_sort)
727
728 x = possible[0]
729 self._cache[codon] = x
730 return x
731
732
733 ambiguous_dna_by_name = {}
734 for key, val in unambiguous_dna_by_name.items():
735 ambiguous_dna_by_name[key] = AmbiguousCodonTable(val,
736 IUPAC.ambiguous_dna,
737 IUPACData.ambiguous_dna_values,
738 IUPAC.extended_protein,
739 IUPACData.extended_protein_values)
740 ambiguous_dna_by_id = {}
741 for key, val in unambiguous_dna_by_id.items():
742 ambiguous_dna_by_id[key] = AmbiguousCodonTable(val,
743 IUPAC.ambiguous_dna,
744 IUPACData.ambiguous_dna_values,
745 IUPAC.extended_protein,
746 IUPACData.extended_protein_values)
747
748 ambiguous_rna_by_name = {}
749 for key, val in unambiguous_rna_by_name.items():
750 ambiguous_rna_by_name[key] = AmbiguousCodonTable(val,
751 IUPAC.ambiguous_rna,
752 IUPACData.ambiguous_rna_values,
753 IUPAC.extended_protein,
754 IUPACData.extended_protein_values)
755 ambiguous_rna_by_id = {}
756 for key, val in unambiguous_rna_by_id.items():
757 ambiguous_rna_by_id[key] = AmbiguousCodonTable(val,
758 IUPAC.ambiguous_rna,
759 IUPACData.ambiguous_rna_values,
760 IUPAC.extended_protein,
761 IUPACData.extended_protein_values)
762
763
764 _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
765 _merged_values["T"] = "U"
766
767 for key, val in generic_by_name.items():
768 ambiguous_generic_by_name[key] = AmbiguousCodonTable(val,
769 Alphabet.NucleotideAlphabet(),
770 _merged_values,
771 IUPAC.extended_protein,
772 IUPACData.extended_protein_values)
773
774 for key, val in generic_by_id.items():
775 ambiguous_generic_by_id[key] = AmbiguousCodonTable(val,
776 Alphabet.NucleotideAlphabet(),
777 _merged_values,
778 IUPAC.extended_protein,
779 IUPACData.extended_protein_values)
780 del _merged_values
781 del key, val
782
783
784 for n in ambiguous_generic_by_id.keys() :
785 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
786 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
787 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X"
788
789 if "UAA" in unambiguous_rna_by_id[n].stop_codons \
790 and "UGA" in unambiguous_rna_by_id[n].stop_codons :
791 try :
792 print ambiguous_dna_by_id[n].forward_table["TRA"]
793 assert False, "Should be a stop only"
794 except KeyError :
795 pass
796 assert "URA" in ambiguous_generic_by_id[n].stop_codons
797 assert "URA" in ambiguous_rna_by_id[n].stop_codons
798 assert "TRA" in ambiguous_generic_by_id[n].stop_codons
799 assert "TRA" in ambiguous_dna_by_id[n].stop_codons
800 del n
801 assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons
802 assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons
803 assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons
804