1 """Code to interact with and run various EMBOSS programs.
2
3 These classes follow the AbstractCommandline interfaces for running
4 programs.
5 """
6
7 from Bio import Application
8 from Bio.Application import _Option
9
11 """Commandline object for the Primer3 interface from EMBOSS.
12 """
14 Application.AbstractCommandline.__init__(self)
15 self.program_name = cmd
16
17 self.parameters = \
18 [_Option(["-sequence"], ["input"], None, 1,
19 "Sequence to choose primers from"),
20 _Option(["-outfile"], ["output", "file"], None, 1,
21 "Output file name"),
22 _Option(["-task"], ["input"], None, 0),
23 _Option(["-numreturn"], ["input"], None, 0),
24 _Option(["-includedregion"], ["input"], None, 0),
25 _Option(["-target"], ["input"], None, 0),
26 _Option(["-excludedregion"], ["input"], None, 0),
27 _Option(["-forwardinput"], ["input"], None, 0),
28 _Option(["-reverseinput"], ["input"], None, 0),
29 _Option(["-gcclamp"], ["input"], None, 0),
30 _Option(["-osize"], ["input"], None, 0),
31 _Option(["-minsize"], ["input"], None, 0),
32 _Option(["-maxsize"], ["input"], None, 0),
33 _Option(["-otm"], ["input"], None, 0),
34 _Option(["-mintm"], ["input"], None, 0),
35 _Option(["-maxtm"], ["input"], None, 0),
36 _Option(["-maxdifftm"], ["input"], None, 0),
37 _Option(["-ogcpercent"], ["input"], None, 0),
38 _Option(["-mingc"], ["input"], None, 0),
39 _Option(["-maxgc"], ["input"], None, 0),
40 _Option(["-saltconc"], ["input"], None, 0),
41 _Option(["-dnaconc"], ["input"], None, 0),
42 _Option(["-maxployx"], ["input"], None, 0),
43 _Option(["-productosize"], ["input"], None, 0),
44 _Option(["-productsizerange"], ["input"], None, 0),
45 _Option(["-productotm"], ["input"], None, 0),
46 _Option(["-productmintm"], ["input"], None, 0),
47 _Option(["-productmaxtm"], ["input"], None, 0),
48 _Option(["-oligoexcluderegion"], ["input"], None, 0),
49 _Option(["-oligoinput"], ["input"], None, 0),
50 _Option(["-oligosize"], ["input"], None, 0),
51 _Option(["-oligominsize"], ["input"], None, 0),
52 _Option(["-oligomaxsize"], ["input"], None, 0),
53 _Option(["-oligotm"], ["input"], None, 0),
54 _Option(["-oligomintm"], ["input"], None, 0),
55 _Option(["-oligomaxtm"], ["input"], None, 0),
56 _Option(["-oligoogcpercent"], ["input"], None, 0),
57 _Option(["-oligomingc"], ["input"], None, 0),
58 _Option(["-oligomaxgc"], ["input"], None, 0),
59 _Option(["-oligosaltconc"], ["input"], None, 0),
60 _Option(["-oligodnaconc"], ["input"], None, 0),
61 _Option(["-oligoselfany"], ["input"], None, 0),
62 _Option(["-oligoselfend"], ["input"], None, 0),
63 _Option(["-oligomaxpolyx"], ["input"], None, 0),
64 _Option(["-mispriminglibraryfile"], ["input"], None, 0),
65 _Option(["-maxmispriming"], ["input"], None, 0),
66 _Option(["-oligomishyblibraryfile"], ["input"], None, 0),
67 _Option(["-oligomaxmishyb"], ["input"], None, 0),]
68
70 """Commandline object for the primersearch program from EMBOSS.
71 """
72 - def __init__(self, cmd = "primersearch"):
73 Application.AbstractCommandline.__init__(self)
74 self.program_name = cmd
75
76 self.parameters = \
77 [_Option(["-sequences"], ["input"], None, 1,
78 "Sequence to look for the primer pairs in."),
79 _Option(["-primers"], ["input", "file"], None, 1,
80 "File containing the primer pairs to search for."),
81 _Option(["-out"], ["output", "file"], None, 1,
82 "Name of the output file."),
83 _Option(["-mismatchpercent"], ["input"], None, 1,
84 "Allowed percentage mismatch.")]
85
87 """Commandline object for the eprotdist program from EMBOSS.
88
89 This is an EMBOSS wrapper around protdist from PHYLIP.
90 """
92 Application.AbstractCommandline.__init__(self)
93 self.program_name = cmd
94
95 self.parameters = \
96 [_Option(["-msf"], ["input"], None, 1,
97 "File containing sequences"),
98 _Option(["-outfile"], ["output"], None, 1,
99 "Output file name"),
100 _Option(["-method"], ["input"], None, 1,
101 "Choose the method to use"),
102 _Option(["-categ"], ["input"], None, 0,
103 "Choose the categorie to use"),
104 _Option(["-gencode"], ["input"], None, 0,
105 "Which genetic code"),
106 _Option(["-prob"], ["input"], None, 0,
107 "Prob change category (1.0=easy)"),
108 _Option(["-tranrate"], ["input"], None, 0,
109 "Transition/transversion ratio"),
110 _Option(["-freqa"], ["input"], None, 0,
111 "Frequency for A"),
112 _Option(["-freqc"], ["input"], None, 0,
113 "Frequency for C"),
114 _Option(["-freqg"], ["input"], None, 0,
115 "Frequency for G"),
116 _Option(["-freqt"], ["input"], None, 0,
117 "Frequency for T"),
118 _Option(["-printdata"], ["input"], None, 0,
119 "Print out the data at start of run"),
120 _Option(["-progress"], ["input"], None, 0,
121 "Print indications of progress of run"),
122 _Option(["-basefrequency"], ["input"], None, 0,
123 "Use empirical base frequencies")]
124
126 """Commandline object for the eneighbor program from EMBOSS.
127
128 This is an EMBOSS wrapper around neighbor from PHYLIP.
129 """
131 Application.AbstractCommandline.__init__(self)
132 self.program_name = cmd
133
134 self.parameters = \
135 [_Option(["-infile"], ["input"], None, 1,
136 "infile value"),
137 _Option(["-outfile"], ["output"], None, 1,
138 "Output file name"),
139 _Option(["-trout"], ["input"], None, 1,
140 "Create a tree file"),
141 _Option(["-treefile"], ["input"], None, 1,
142 "Tree file name"),
143 _Option(["-nj"], ["input"], None, 1,
144 "Neighbor-joining"),
145 _Option(["-noog"], ["input"], None, 1,
146 "Outgroup root"),
147 _Option(["-outgnum"], ["input"], None, 0,
148 "number of the outgroup"),
149 _Option(["-randseed"], ["input"], None, 0,
150 "Random number seed (must be odd)"),
151 _Option(["-datasets"], ["input"], None, 0,
152 "How many data sets"),
153 _Option(["-drawtree"], ["input"], None, 0,
154 "Draw tree"),
155 _Option(["-lt"], ["input"], None, 0,
156 "Lower-triangular data matrix"),
157 _Option(["-ut"], ["input"], None, 0,
158 "Upper-triangular data matrix"),
159 _Option(["-sr"], ["input"], None, 0,
160 "Subreplicates"),
161 _Option(["-random"], ["input"], None, 0,
162 "Randomize input order of species"),
163 _Option(["-multsets"], ["input"], None, 0,
164 "Analyze multiple data sets"),
165 _Option(["-printdata"], ["input"], None, 0,
166 "Print out the data at start of run"),
167 _Option(["-progress"], ["input"], None, 0,
168 "Print indications of progress of run")]
169
171 """Commandline object for the eprotpars program from EMBOSS.
172
173 This is an EMBOSS wrapper around protpars from PHYLIP.
174 """
176 Application.AbstractCommandline.__init__(self)
177 self.program_name = cmd
178
179 self.parameters = \
180 [_Option(["-msf"], ["input", "file"], None, 1,
181 "Sequences file to be read in"),
182 _Option(["-outfile"], ["output", "file"], None, 1,
183 "Output file"),
184 _Option(["-besttree"], ["input"], None, 0,
185 "Search for the best tree"),
186 _Option(["-random"], ["input"], None, 0,
187 "Randomize input order of species"),
188 _Option(["-norandom"], ["input"], None, 0,
189 "Do not randomize input order of species"),
190 _Option(["-randseed"], ["input"], None, 0,
191 "Random number seed (must be odd)"),
192 _Option(["-randtimes"], ["input"], None, 0,
193 "How many times to randomize"),
194 _Option(["-og"], ["input"], None, 0,
195 "Use an outgroup root"),
196 _Option(["-noog"], ["input"], None, 0,
197 "Do not use an outgroup root"),
198 _Option(["-outgnum"], ["input"], None, 0,
199 "Number of the outgroup"),
200 _Option(["-thresh"], ["input"], None, 0,
201 "Use Threshold parsimony"),
202 _Option(["-valthresh"], ["input"], None, 0,
203 "threshold value"),
204 _Option(["-printdata"], ["input"], None, 0,
205 "Print out the data at start of run"),
206 _Option(["-progress"], ["input"], None, 0,
207 "Print indications of progress of run"),
208 _Option(["-steps"], ["input"], None, 0,
209 "Print out steps in each site"),
210 _Option(["-seqatnodes"], ["input"], None, 0,
211 "Print sequences at all nodes of tree"),
212 _Option(["-drawtree"], ["input"], None, 0,
213 "Draw tree"),
214 _Option(["-trout"], ["input"], None, 0,
215 "Create a tree file"),
216 _Option(["-notrout"], ["input"], None, 0,
217 "Do not create a tree file"),
218 _Option(["-treefile"], ["output", "file"], None, 0,
219 "Output treefile name")]
220
222 """Commandline object for the econsense program from EMBOSS.
223
224 This is an EMBOSS wrapper around consense from PHYLIP.
225 """
227 Application.AbstractCommandline.__init__(self)
228 self.program_name = cmd
229
230 self.parameters = \
231 [_Option(["-infile"], ["input", "file"], None, 1,
232 "file to read in (New Hampshire standard form)"),
233 _Option(["-outfile"], ["output", "file"], None, 1,
234 "Output file name"),
235 _Option(["-notrout"], ["input"], None, 0,
236 "Do not create a tree file"),
237 _Option(["-trout"], ["input"], None, 0,
238 "Create a tree file"),
239 _Option(["-treefile"], ["output", "file"], None, 0,
240 "tree file name"),
241 _Option(["-noog"], ["input"], None, 0,
242 "Do not use an outgroup"),
243 _Option(["-og"], ["input"], None, 0,
244 "Use an outgroup"),
245 _Option(["-outgnum"], ["input"], None, 0,
246 "number of the outgroup"),
247 _Option(["-nodrawtree"], ["input"], None, 0,
248 "Do not draw a tree"),
249 _Option(["-drawtree"], ["input"], None, 0,
250 "Draw tree"),
251 _Option(["-root"], ["input"], None, 0,
252 "Trees to be treated as Rooted"),
253 _Option(["-progress"], ["input"], None, 0,
254 "Print indications of the progress of run"),
255 _Option(["-noprintsets"], ["input"], None, 0,
256 "Do not print out the sets of species"),
257 _Option(["-printsets"], ["input"], None, 0,
258 "Print out the sets of species")]
259
261 """Commandline object for the eseqboot program from EMBOSS.
262
263 This is an EMBOSS wrapper around seqboot from PHYLIP.
264 """
266 Application.AbstractCommandline.__init__(self)
267 self.program_name = cmd
268
269 self.parameters = \
270 [_Option(["-datafile"], ["input", "file"], None, 1,
271 "Input file"),
272 _Option(["-outfile"], ["output", "file"], None, 1,
273 "Output file name"),
274 _Option(["-randseed"], ["input"], None, 1,
275 "Random number seed (must be odd)"),
276 _Option(["-method"], ["input"], None, 1,
277 "Choose the method"),
278 _Option(["-test"], ["input"], None, 1,
279 "Choose test"),
280 _Option(["-reps"], ["input"], None, 1,
281 "How many replicates"),
282 _Option(["-inter"], ["input"], None, 0,
283 "Interleaved input"),
284 _Option(["-enzymes"], ["input"], None, 0,
285 "Present in input file"),
286 _Option(["-all"], ["input"], None, 0,
287 "All alleles present at each locus"),
288 _Option(["-printdata"], ["input"], None, 0,
289 "Print out the data at start of run"),
290 _Option(["-progress"], ["input"], None, 0,
291 "Print indications of progress of run")]
292
294 """Commandline object for the water program from EMBOSS.
295 """
297 Application.AbstractCommandline.__init__(self)
298 self.program_name = cmd
299
300 self.parameters = \
301 [_Option(["-asequence"], ["input", "file"], None, 1,
302 "First sequence to align"),
303 _Option(["-bsequence"], ["input", "file"], None, 1,
304 "Second sequence to align"),
305 _Option(["-gapopen"], ["input"], None, 1,
306 "Gap open penalty"),
307 _Option(["-gapextend"], ["input"], None, 1,
308 "Gap extension penalty"),
309 _Option(["-outfile"], ["output", "file"], None, 1,
310 "Output file for the alignment"),
311 _Option(["-datafile"], ["input", "file"], None, 0,
312 "Matrix file"),
313 _Option(["-similarity"], ["input"], None, 0,
314 "Display percent identity and similarity"),
315 _Option(["-nosimilarity"], ["input"], None, 0,
316 "Do not display percent identity and similarity"),
317 _Option(["-aformat"], ["input"], None, 0,
318 "Display output in a different specified output format")]
319
321 """Commandline object for the fuzznuc program from EMBOSS.
322 """
324 Application.AbstractCommandline.__init__(self)
325 self.program_name = cmd
326
327 self.parameters = [
328 _Option(["-sequence"], ["input"], None, 1,
329 "Sequence database USA"),
330 _Option(["-pattern"], ["input"], None, 1,
331 "Search pattern, using standard IUPAC one-letter codes"),
332 _Option(["-mismatch"], ["input"], None, 1,
333 "Number of mismatches"),
334 _Option(["-outfile"], ["output", "file"], None, 1,
335 "Output report file name"),
336 _Option(["-complement"], ["input"], None, 0,
337 "Search complementary strand"),
338 _Option(["-rformat"], ["input"], None, 0,
339 "Specify the report format to output in.")]
340
342 """Commandline object for the est2genome program from EMBOSS.
343 """
344 - def __init__(self, cmd = "est2genome"):
345 Application.AbstractCommandline.__init__(self)
346 self.program_name = cmd
347
348 self.parameters = [
349 _Option(["-est"], ["input"], None, 1,
350 "EST sequence(s)"),
351 _Option(["-genome"], ["input"], None, 1,
352 "Genomic sequence"),
353 _Option(["-outfile"], ["output", "file"], None, 1,
354 "Output file name"),
355 _Option(["-match"], ["input"], None, 0,
356 "Score for matching two bases"),
357 _Option(["-mismatch"], ["input"], None, 0,
358 "Cost for mismatching two bases"),
359 _Option(["-gappenalty"], ["input"], None, 0,
360 "Cost for deleting a single base in either sequence, " + \
361 "excluding introns"),
362 _Option(["-intronpenalty"], ["input"], None, 0,
363 "Cost for an intron, independent of length."),
364 _Option(["-splicepenalty"], ["input"], None, 0,
365 "Cost for an intron, independent of length " + \
366 "and starting/ending on donor-acceptor sites"),
367 _Option(["-minscore"], ["input"], None, 0,
368 "Exclude alignments with scores below this threshold score."),
369 _Option(["-reverse"], ["input"], None, 0,
370 "Reverse the orientation of the EST sequence"),
371 _Option(["-splice"], ["input"], None, 0,
372 "Use donor and acceptor splice sites."),
373 _Option(["-mode"], ["input"], None, 0,
374 "This determines the comparion mode. 'both', 'forward' " + \
375 "'reverse'"),
376 _Option(["-best"], ["input"], None, 0,
377 "You can print out all comparisons instead of just the best"),
378 _Option(["-space"], ["input"], None, 0,
379 "for linear-space recursion."),
380 _Option(["-shuffle"], ["input"], None, 0,
381 "Shuffle"),
382 _Option(["-seed"], ["input"], None, 0,
383 "Random number seed"),
384 _Option(["-align"], ["input"], None, 0,
385 "Show the alignment."),
386 _Option(["-width"], ["input"], None, 0,
387 "Alignment width")
388 ]
389
391 """Commandline object for the etandem program from EMBOSS.
392 """
394 Application.AbstractCommandline.__init__(self)
395 self.program_name = cmd
396
397 self.parameters = [
398 _Option(["-sequence"], ["input", "file"], None, 1,
399 "Sequence"),
400 _Option(["-minrepeat"], ["input"], None, 1,
401 "Minimum repeat size"),
402 _Option(["-maxrepeat"], ["input"], None, 1,
403 "Maximum repeat size"),
404 _Option(["-outfile"], ["output", "file"] , None, 1,
405 "Output report file name"),
406 _Option(["-threshold"], ["input"], None, 0,
407 "Threshold score"),
408 _Option(["-mismatch"], ["input"], None, 0,
409 "Allow N as a mismatch"),
410 _Option(["-uniform"], ["input"], None, 0,
411 "Allow uniform consensus"),
412 _Option(["-rformat"], ["output"], None, 0,
413 "Output report format")]
414
416 """Commandline object for the einverted program from EMBOSS.
417 """
419 Application.AbstractCommandline.__init__(self)
420 self.program_name = cmd
421
422 self.parameters = [
423 _Option(["-sequence"], ["input", "file"], None, 1,
424 "Sequence"),
425 _Option(["-gap"], ["input", "file"], None, 1,
426 "Gap penalty"),
427 _Option(["-threshold"], ["input"], None, 1,
428 "Minimum score threshold"),
429 _Option(["-match"], ["input"], None, 1,
430 "Match score"),
431 _Option(["-mismatch"], ["input"], None, 1,
432 "Mismatch score"),
433 _Option(["-outfile"], ["output", "file"] , None, 1,
434 "Output report file name"),
435 _Option(["-maxrepeat"], ["input"], None, 0,
436 "Maximum separation between the start and end of repeat"),
437 ]
438
440 """Commandline object for the palindrome program from EMBOSS.
441 """
442 - def __init__(self, cmd = "palindrome"):
443 Application.AbstractCommandline.__init__(self)
444 self.program_name = cmd
445
446 self.parameters = [
447 _Option(["-sequence"], ["input", "file"], None, 1,
448 "Sequence"),
449 _Option(["-minpallen"], ["input"], None, 1,
450 "Minimum palindrome length"),
451 _Option(["-maxpallen"], ["input"], None, 1,
452 "Maximum palindrome length"),
453 _Option(["-gaplimit"], ["input"], None, 1,
454 "Maximum gap between repeats"),
455 _Option(["-nummismatches"], ["input"], None, 1,
456 "Number of mismatches allowed"),
457 _Option(["-overlap"], ["input"], None, 1,
458 "Report overlapping matches"),
459 _Option(["-outfile"], ["output", "file"] , None, 1,
460 "Output report file name"),
461 ]
462
464 """Commandline object for the tranalign program from EMBOSS.
465 """
467 Application.AbstractCommandline.__init__(self)
468 self.program_name = cmd
469
470 self.parameters = [
471 _Option(["-asequence"], ["input", "file"], None, 1,
472 "Nucleotide sequences to be aligned."),
473 _Option(["-bsequence"], ["input", "file"], None, 1,
474 "Protein sequence alignment"),
475 _Option(["-outseq"], ["output", "file"], None, 1,
476 "Output sequence file."),
477 _Option(["-table"], ["input"], None, 0,
478 "Code to use")]
479
481 """Commandline object for the diffseq program from EMBOSS.
482 """
484 Application.AbstractCommandline.__init__(self)
485 self.program_name = cmd
486
487 self.parameters = [
488 _Option(["-asequence"], ["input", "file"], None, 1,
489 "First sequence to compare"),
490 _Option(["-bsequence"], ["input", "file"], None, 1,
491 "Second sequence to compare"),
492 _Option(["-wordsize"], ["input"], None, 1,
493 "Word size to use for comparisons (10 default)"),
494 _Option(["-outfile"], ["output", "file"], None, 1,
495 "Output report file name"),
496 _Option(["-aoutfeat"], ["output", "file"], None, 1,
497 "File for output of first sequence's features"),
498 _Option(["-boutfeat"], ["output", "file"], None, 1,
499 "File for output of second sequence's features"),
500 _Option(["-rformat"], ["output"], None, 0,
501 "Output report file format")
502 ]
503