1
2
3
4
5
6
7 """Definitions for interacting with BLAST related applications.
8
9 Obsolete wrappers for the old/classic NCBI BLAST tools (written in C):
10
11 - FastacmdCommandline
12 - BlastallCommandline
13 - BlastpgpCommandline
14 - RpsBlastCommandline
15
16 Wrappers for the new NCBI BLAST+ tools (written in C++):
17
18 - NcbiblastpCommandline - Protein-Protein BLAST
19 - NcbiblastnCommandline - Nucleotide-Nucleotide BLAST
20 - NcbiblastxCommandline - Translated Query-Protein Subject BLAST
21 - NcbitblastnCommandline - Protein Query-Translated Subject BLAST
22 - NcbitblastxCommandline - Translated Query-Protein Subject BLAST
23 - NcbipsiblastCommandline - Position-Specific Initiated BLAST
24 - NcbirpsblastCommandline - Reverse Position Specific BLAST
25 - NcbirpstblastnCommandline - Translated Reverse Position Specific BLAST
26
27 For further details, see:
28
29 Camacho et al. BLAST+: architecture and applications
30 BMC Bioinformatics 2009, 10:421
31 doi:10.1186/1471-2105-10-421
32 """
33 from Bio.Application import _Option, AbstractCommandline, _Switch
34
36 """Create a commandline for the fasta program from NCBI (OBSOLETE).
37
38 """
39 - def __init__(self, cmd="fastacmd", **kwargs):
40 self.parameters = \
41 [
42 _Option(["-d", "database"], ["input"], None, 1,
43 "The database to retrieve from."),
44 _Option(["-s", "search_string"], ["input"], None, 1,
45 "The id to search for.")
46 ]
47 AbstractCommandline.__init__(self, cmd, **kwargs)
48
49
51 """Base Commandline object for (classic) NCBI BLAST wrappers (PRIVATE).
52
53 This is provided for subclassing, it deals with shared options
54 common to all the BLAST tools (blastall, rpsblast, blastpgp).
55 """
57 assert cmd is not None
58 extra_parameters = [\
59 _Switch(["--help", "help"], ["input"],
60 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
61 _Option(["-d", "database"], ["input"], None, 1,
62 "The database to BLAST against.", False),
63 _Option(["-i", "infile"], ["input", "file"], None, 1,
64 "The sequence to search with.", False),
65 _Option(["-e", "expectation"], ["input"], None, 0,
66 "Expectation value cutoff.", False),
67 _Option(["-m", "align_view"], ["input"], None, 0,
68 "Alignment view. Integer 0-11. Use 7 for XML output.",
69 False),
70 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0,
71 "Output file for alignment.", False),
72 _Option(["-y", "xdrop_extension"], ["input"], None, 0,
73 "Dropoff for blast extensions.", False),
74 _Option(["-F", "filter"], ["input"], None, 0,
75 "Filter query sequence with SEG? T/F", False),
76 _Option(["-X", "xdrop"], ["input"], None, 0,
77 "Dropoff value (bits) for gapped alignments."),
78 _Option(["-I", "show_gi"], ["input"], None, 0,
79 "Show GI's in deflines? T/F", False),
80 _Option(["-J", "believe_query"], ["input"], None, 0,
81 "Believe the query defline? T/F", False),
82 _Option(["-Z", "xdrop_final"], ["input"], None, 0,
83 "X dropoff for final gapped alignment.", False),
84 _Option(["-z", "db_length"], ["input"], None, 0,
85 "Effective database length.", False),
86 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0,
87 "seqalign file to output.", False),
88 _Option(["-v", "descriptions"], ["input"], None, 0,
89 "Number of one-line descriptions.", False),
90 _Option(["-b", "alignments"], ["input"], None, 0,
91 "Number of alignments.", False),
92 _Option(["-Y", "search_length"], ["input"], None, 0,
93 "Effective length of search space (use zero for the " + \
94 "real size).", False),
95 _Option(["-T", "html"], ["input"], None, 0,
96 "Produce HTML output? T/F", False),
97 _Option(["-U", "case_filter"], ["input"], None, 0,
98 "Use lower case filtering of FASTA sequence? T/F", False),
99
100 _Option(["-a", "nprocessors"], ["input"], None, 0,
101 "Number of processors to use.", False),
102 _Option(["-g", "gapped"], ["input"], None, 0,
103 "Whether to do a gapped alignment. T/F", False),
104 ]
105 try:
106
107
108 self.parameters = extra_parameters + self.parameters
109 except AttributeError:
110
111 self.parameters = extra_parameters
112 AbstractCommandline.__init__(self, cmd, **kwargs)
113
119
120
122 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
123
124 This is provided for subclassing, it deals with shared options
125 common to all the blastall and blastpgp tools (but not rpsblast).
126 """
127 - def __init__(self, cmd=None, **kwargs):
128 assert cmd is not None
129 extra_parameters = [\
130 _Option(["-G", "gap_open"], ["input"], None, 0,
131 "Gap open penalty", False),
132 _Option(["-E", "gap_extend"], ["input"], None, 0,
133 "Gap extension penalty", False),
134 _Option(["-A", "window_size"], ["input"], None, 0,
135 "Multiple hits window size", False),
136 _Option(["-f", "hit_extend"], ["input"], None, 0,
137 "Threshold for extending hits.", False),
138 _Option(["-K", "keep_hits"], ["input"], None, 0,
139 " Number of best hits from a region to keep.", False),
140 _Option(["-W", "wordsize"], ["input"], None, 0,
141 "Word size", False),
142 _Option(["-P", "passes"], ["input"], None, 0,
143 "Hits/passes. Integer 0-2. 0 for multiple hit, "
144 "1 for single hit (does not apply to blastn)", False),
145 ]
146 try:
147
148
149 self.parameters = extra_parameters + self.parameters
150 except AttributeError:
151
152 self.parameters = extra_parameters
153 _BlastCommandLine.__init__(self, cmd, **kwargs)
154
155
157 """Create a commandline for the blastall program from NCBI (OBSOLETE).
158
159 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
160 are replacing blastall with separate tools blastn, blastp, blastx, tblastn
161 and tblastx.
162
163 Like blastall, this wrapper is now obsolete, and will be deprecated and
164 removed in a future release of Biopython.
165
166 >>> from Bio.Blast.Applications import BlastallCommandline
167 >>> cline = BlastallCommandline(program="blastx", infile="m_cold.fasta",
168 ... database="nr", expectation=0.001)
169 >>> cline
170 BlastallCommandline(cmd='blastall', database='nr', infile='m_cold.fasta', expectation=0.001, program='blastx')
171 >>> print cline
172 blastall -d nr -i m_cold.fasta -e 0.001 -p blastx
173
174 You would typically run the command line with the Python subprocess module,
175 as described in the Biopython tutorial.
176 """
177
178 - def __init__(self, cmd="blastall",**kwargs):
179 import warnings
180 warnings.warn("Like blastall, this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
181 self.parameters = [ \
182
183
184
185 _Option(["-p", "program"], ["input"], None, 1,
186 "The blast program to use (e.g. blastp, blastn).", False),
187 _Option(["-q", "nuc_mismatch"], ["input"], None, 0,
188 "Penalty for a nucleotide mismatch (blastn only).", False),
189 _Option(["-r", "nuc_match"], ["input"], None, 0,
190 "Reward for a nucleotide match (blastn only).", False),
191 _Option(["-Q", "query_genetic_code"], ["input"], None, 0,
192 "Query Genetic code to use.", False),
193 _Option(["-D", "db_genetic_code"], ["input"], None, 0,
194 "DB Genetic code (for tblast[nx] only).", False),
195 _Option(["-M", "matrix"], ["input"], None, 0,
196 "Matrix to use", False),
197 _Option(["-S", "strands"], ["input"], None, 0,
198 "Query strands to search against database (for blast[nx], " + \
199 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False),
200 _Option(["-l", "restrict_gi"], ["input"], None, 0,
201 "Restrict search of database to list of GI's.", False),
202 _Option(["-R", "checkpoint"], ["input", "file"], None, 0,
203 "PSI-TBLASTN checkpoint input file.", False),
204 _Option(["-n", "megablast"], ["input"], None, 0,
205 "MegaBlast search T/F.", False),
206
207
208 _Option(["-L", "region_length", "range_restriction"], ["input"],
209 None, 0,
210 """Location on query sequence (string format start,end).
211
212 In older versions of BLAST, -L set the length of region
213 used to judge hits (see -K parameter).""", False),
214 _Option(["-w", "frame_shit_penalty"], ["input"], None, 0,
215 "Frame shift penalty (OOF algorithm for blastx).", False),
216 _Option(["-t", "largest_intron"], ["input"], None, 0,
217 "Length of the largest intron allowed in a translated " + \
218 "nucleotide sequence when linking multiple distinct " + \
219 "alignments. (0 invokes default behavior; a negative value " + \
220 "disables linking.)", False),
221 _Option(["-B", "num_concatenated_queries"], ["input"], None, 0,
222 "Number of concatenated queries, for blastn and tblastn.",
223 False),
224 _Option(["-V", "oldengine"], ["input"], None, 0,
225 "Force use of the legacy BLAST engine.", False),
226 _Option(["-C", "composition_based"], ["input"], None, 0,
227 """Use composition-based statistics for tblastn:
228 D or d: default (equivalent to F)
229 0 or F or f: no composition-based statistics
230 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
231 2: Composition-based score adjustment as in Bioinformatics
232 21:902-911, 2005, conditioned on sequence properties
233 3: Composition-based score adjustment as in Bioinformatics
234 21:902-911, 2005, unconditionally
235 For programs other than tblastn, must either be absent or be
236 D, F or 0.""", False),
237 _Option(["-s", "smith_waterman"], ["input"], None, 0,
238 "Compute locally optimal Smith-Waterman alignments (This " + \
239 "option is only available for gapped tblastn.) T/F", False),
240 ]
241 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
242
243
245 """Create a commandline for the blastpgp program from NCBI (OBSOLETE).
246
247 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
248 are replacing blastpgp with a renamed tool psiblast. This module provides
249 NcbipsiblastCommandline as a wrapper for the new tool psiblast.
250
251 Like blastpgp (and blastall), this wrapper is now obsolete, and will be
252 deprecated and removed in a future release of Biopython.
253
254 >>> from Bio.Blast.Applications import BlastpgpCommandline
255 >>> cline = BlastpgpCommandline(help=True)
256 >>> cline
257 BlastpgpCommandline(cmd='blastpgp', help=True)
258 >>> print cline
259 blastpgp --help
260
261 You would typically run the command line with the Python subprocess module,
262 as described in the Biopython tutorial.
263 """
264 - def __init__(self, cmd="blastpgp",**kwargs):
265 import warnings
266 warnings.warn("Like blastpgp (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
267 self.parameters = [ \
268 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0,
269 "Output file for PSI-BLAST checkpointing.", False),
270 _Option(["-R", "restart_infile"], ["input", "file"], None, 0,
271 "Input file for PSI-BLAST restart.", False),
272 _Option(["-k", "hit_infile"], ["input", "file"], None, 0,
273 "Hit file for PHI-BLAST.", False),
274 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0,
275 "Output file for PSI-BLAST matrix in ASCII.", False),
276 _Option(["-B", "align_infile"], ["input", "file"], None, 0,
277 "Input alignment file for PSI-BLAST restart.", False),
278 _Option(["-S", "required_start"], ["input"], None, 0,
279 "Start of required region in query.", False),
280 _Option(["-H", "required_end"], ["input"], None, 0,
281 "End of required region in query.", False),
282 _Option(["-j", "npasses"], ["input"], None, 0,
283 "Number of passes", False),
284 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
285 "Number of bits to trigger gapping.", False),
286 _Option(["-c", "pseudocounts"], ["input"], None, 0,
287 "Pseudocounts constants for multiple passes.", False),
288 _Option(["-h", "model_threshold"], ["input"], None, 0,
289 "E-value threshold to include in multipass model.", False),
290
291 _Option(["-L", "region_length"], ["input"], None, 0,
292 "Cost to decline alignment (disabled when zero).", False),
293 _Option(["-M", "matrix"], ["input"], None, 0,
294 "Matrix (string, default BLOSUM62).", False),
295 _Option(["-p", "program"], ["input"], None, 1,
296 "The blast program to use (e.g blastpgp, patseedp or seedp).", False),
297 ]
298 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
299
300
302 """Create a commandline for the classic rpsblast program from NCBI (OBSOLETE).
303
304 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
305 are replacing the old rpsblast with a new version of the same name plus a
306 second tool rpstblastn, both taking different command line arguments. This
307 module provides NcbirpsblastCommandline and NcbirpsblasntCommandline as
308 wrappers for the new tools.
309
310 Like the old rpsblast (and blastall), this wrapper is now obsolete, and will
311 be deprecated and removed in a future release of Biopython.
312
313 >>> from Bio.Blast.Applications import RpsBlastCommandline
314 >>> cline = RpsBlastCommandline(help=True)
315 >>> cline
316 RpsBlastCommandline(cmd='rpsblast', help=True)
317 >>> print cline
318 rpsblast --help
319
320 You would typically run the command line with the Python subprocess module,
321 as described in the Biopython tutorial.
322 """
323 - def __init__(self, cmd="rpsblast",**kwargs):
324 import warnings
325 warnings.warn("Like the old rpsblast (and blastall), this wrapper is now obsolete, and will be deprecated and removed in a future release of Biopython.", PendingDeprecationWarning)
326 self.parameters = [ \
327
328 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
329 "Number of bits to trigger gapping.", False),
330
331
332 _Option(["-P", "multihit"], ["input"], None, 0,
333 "0 for multiple hit, 1 for single hit", False),
334 _Option(["-l", "logfile"], ["output", "file"], None, 0,
335 "Logfile name.", False),
336 _Option(["-p", "protein"], ["input"], None, 0,
337 "Query sequence is protein. T/F", False),
338 _Option(["-L", "range_restriction"], ["input"], None, 0,
339 "Location on query sequence (string format start,end).",
340 False),
341 ]
342 _BlastCommandLine.__init__(self, cmd, **kwargs)
343
344
346 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
347
348 This is provided for subclassing, it deals with shared options
349 common to all the BLAST tools (blastn, rpsblast, rpsblast, etc).
350 """
351 - def __init__(self, cmd=None, **kwargs):
352 assert cmd is not None
353 extra_parameters = [ \
354
355 _Switch(["-h", "h"], ["input"],
356 "Print USAGE and DESCRIPTION; ignore other arguments."),
357 _Switch(["-help", "help"], ["input"],
358 "Print USAGE, DESCRIPTION and ARGUMENTS description; ignore other arguments."),
359 _Switch(["-version", "version"], ["input"],
360 "Print version number; ignore other arguments."),
361
362 _Option(["-query", "query"], ["input", "file"], None, 0,
363 "The sequence to search with.", False),
364 _Option(["-query_loc", "query_loc"], ["input"], None, 0,
365 "Location on the query sequence (Format: start-stop)", False),
366
367 _Option(["-db", "db"], ["input"], None, 0,
368 "The database to BLAST against.", False),
369 _Option(["-out", "out"], ["output", "file"], None, 0,
370 "Output file for alignment.", False),
371 _Option(["-evalue", "evalue"], ["input"], None, 0,
372 "Expectation value cutoff.", False),
373 _Option(["-word_size","word_size"], ["input"], None, 0,
374 """Word size for wordfinder algorithm.
375
376 Integer. Minimum 2.""", False),
377
378
379
380 _Option(["-outfmt", "outfmt"], ["input"], None, 0,
381 "Alignment view. Integer 0-10. Use 5 for XML output (differs from classic BLAST which used 7 for XML).",
382 False),
383 _Switch(["-show_gis","show_gis"], ["input"],
384 "Show NCBI GIs in deflines?"),
385 _Option(["-num_descriptions","num_descriptions"], ["input"], None, 0,
386 """Number of database sequences to show one-line descriptions for.
387
388 Integer argument (at least zero). Default is 500.
389 See also num_alignments.""", False),
390 _Option(["-num_alignments","num_alignments"], ["input"], None, 0,
391 """Number of database sequences to show num_alignments for.
392
393 Integer argument (at least zero). Default is 200.
394 See also num_alignments.""", False),
395 _Switch(["-html", "html"], ["input"],
396 "Produce HTML output? See also the outfmt option."),
397
398
399
400
401 _Switch(["-lcase_masking", "lcase_masking"], ["input"],
402 "Use lower case filtering in query and subject sequence(s)?"),
403
404 _Option(["-gilist", "gilist"], ["input", "file"], None, 0,
405 """Restrict search of database to list of GI's.
406
407 Incompatible with: negative_gilist, seqidlist, remote, subject, subject_loc""",
408 False),
409 _Option(["-negative_gilist", "negative_gilist"], ["input", "file"], None, 0,
410 """Restrict search of database to everything except the listed GIs.
411
412 Incompatible with: gilist, seqidlist, remote, subject, subject_loc""",
413 False),
414 _Option(["-seqidlist", "seqidlist"], ["input", "file"], None, 0,
415 """Restrict search of database to list of SeqID's.
416
417 Incompatible with: gilist, negative_gilist, remote, subject, subject_loc""",
418 False),
419 _Option(["-entrez_query", "entrez_query"], ["input"], None, 0,
420 "Restrict search with the given Entrez query (requires remote).", False),
421 _Option(["-max_target_seqs", "max_target_seqs"], ["input"], None, 0,
422 """Maximum number of aligned sequences to keep.
423
424 Integer argument (at least one).""", False),
425
426 _Option(["-dbsize", "dbsize"], ["input"], None, 0,
427 "Effective length of the database (integer)", False),
428 _Option(["-searchsp", "searchsp"], ["input"], None, 0,
429 "Effective length of the search space (integer)", False),
430
431 _Option(["-xdrop_ungap", "xdrop_ungap"], ["input"], None, 0,
432 "X-dropoff value (in bits) for ungapped extensions. Float.",
433 False),
434 _Option(["-xdrop_gap", "xdrop_gap"], ["input"], None, 0,
435 "X-dropoff value (in bits) for preliminary gapped extensions. Float.",
436 False),
437 _Option(["-xdrop_gap_final", "xdrop_gap_final"], ["input"], None, 0,
438 "X-dropoff value (in bits) for final gapped alignment. Float.",
439 False),
440 _Option(["-window_size", "window_size"], ["input"], None, 0,
441 "Multiple hits window size, use 0 to specify 1-hit algorithm. Integer.",
442 False),
443
444 _Option(["-import_search_strategy", "import_search_strategy"],
445 ["input", "file"], None, 0,
446 """Search strategy to use.
447
448 Incompatible with: export_search_strategy""", False),
449 _Option(["-export_search_strategy", "export_search_strategy"],
450 ["output", "file"], None, 0,
451 """File name to record the search strategy used.
452
453 Incompatible with: import_search_strategy""", False),
454
455 _Switch(["-parse_deflines", "parse_deflines"], ["input"],
456 "Should the query and subject defline(s) be parsed?"),
457 _Option(["-num_threads", "num_threads"], ["input"], None, 0,
458 """Number of threads to use in the BLAST search.
459
460 Integer of at least one. Default is one.
461 Incompatible with: remote""", False),
462 _Switch(["-remote", "remote"], ["input"],
463 """Execute search remotely?
464
465 Incompatible with: gilist, negative_gilist, subject_loc, num_threads, ..."""),
466 ]
467 try:
468
469
470 self.parameters = extra_parameters + self.parameters
471 except AttributeError:
472
473 self.parameters = extra_parameters
474 AbstractCommandline.__init__(self, cmd, **kwargs)
475
477 incompatibles = {"remote":["gilist", "negative_gilist", "num_threads"],
478 "import_search_strategy" : ["export_search_strategy"],
479 "gilist":["negative_gilist"],
480 "seqidlist":["gilist", "negative_gilist", "remote"]}
481 self._validate_incompatibilities(incompatibles)
482 if self.entrez_query and not self.remote :
483 raise ValueError("Option entrez_query requires remote option.")
484 AbstractCommandline._validate(self)
485
487 for a in incompatibles:
488 if self._get_parameter(a):
489 for b in incompatibles[a]:
490 if self._get_parameter(b):
491 raise ValueError("Options %s and %s are incompatible." \
492 % (a,b))
493
495 """Base Commandline object for (new) NCBI BLAST+ wrappers (PRIVATE).
496
497 This is provided for subclassing, it deals with shared options
498 common to all the BLAST tools supporting two-sequence BLAST
499 (blastn, psiblast, etc) but not rpsblast or rpstblastn.
500 """
501 - def __init__(self, cmd=None, **kwargs):
502 assert cmd is not None
503 extra_parameters = [ \
504
505 _Option(["-gapopen", "gapopen"], ["input"], None, 0,
506 "Cost to open a gap (integer).", False),
507 _Option(["-gapextend", "gapextend"], ["input"], None, 0,
508 "Cost to extend a gap (integer).", False),
509
510 _Option(["-subject", "subject"], ["input", "file"], None, 0,
511 """Subject sequence(s) to search.
512
513 Incompatible with: db, gilist, negative_gilist.
514 See also subject_loc.""", False),
515 _Option(["-subject_loc", "subject_loc"], ["input"], None, 0,
516 """Location on the subject sequence (Format: start-stop)
517
518 Incompatible with: db, gilist, negative_gilist, remote.
519 See also subject.""", False),
520
521 _Option(["-culling_limit", "culling_limit"], ["input"], None, 0,
522 """Hit culling limit (integer).
523
524 If the query range of a hit is enveloped by that of at least this many
525 higher-scoring hits, delete the hit.
526
527 Incompatible with: best_hit_overhang, best_hit_score_edge.""", False),
528 _Option(["-best_hit_overhang", "best_hit_overhang"], ["input"], None, 0,
529 """Best Hit algorithm overhang value (recommended value: 0.1)
530
531 Float between 0.0 and 0.5 inclusive.
532
533 Incompatible with: culling_limit.""", False),
534 _Option(["-best_hit_score_edge", "best_hit_score_edge"], ["input"], None, 0,
535 """Best Hit algorithm score edge value (recommended value: 0.1)
536
537 Float between 0.0 and 0.5 inclusive.
538
539 Incompatible with: culling_limit.""", False), ]
540 try:
541
542
543 self.parameters = extra_parameters + self.parameters
544 except AttributeError:
545
546 self.parameters = extra_parameters
547 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
548
549
551 incompatibles = {"subject_loc":["db", "gilist", "negative_gilist", "seqidlist", "remote"],
552 "culling_limit":["best_hit_overhang","best_hit_score_edge"],
553 "subject":["db", "gilist", "negative_gilist", "seqidlist"]}
554 self._validate_incompatibilities(incompatibles)
555 _NcbiblastCommandline._validate(self)
556
558 """Create a commandline for the NCBI BLAST+ program blastp (for proteins).
559
560 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
561 replaced the old blastall tool with separate tools for each of the searches.
562 This wrapper therefore replaces BlastallCommandline with option -p blastp.
563
564 >>> from Bio.Blast.Applications import NcbiblastpCommandline
565 >>> cline = NcbiblastpCommandline(query="rosemary.pro", db="nr",
566 ... evalue=0.001, remote=True, ungapped=True)
567 >>> cline
568 NcbiblastpCommandline(cmd='blastp', query='rosemary.pro', db='nr', evalue=0.001, remote=True, ungapped=True)
569 >>> print cline
570 blastp -query rosemary.pro -db nr -evalue 0.001 -remote -ungapped
571
572 You would typically run the command line with the Python subprocess module,
573 as described in the Biopython tutorial.
574 """
575 - def __init__(self, cmd="blastp", **kwargs):
576 self.parameters = [ \
577
578 _Option(["-task", "task"], ["input"],
579 lambda value : value in ["blastp", "blastp-short"], 0,
580 "Task to execute (string, blastp (default) or blastp-short).", False),
581 _Option(["-matrix", "matrix"], ["input"], None, 0,
582 "Scoring matrix name (default BLOSUM62).", False),
583 _Option(["-threshold", "threshold"], ["input"], None, 0,
584 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
585 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
586 lambda value : value in "0Ft2TtDd", 0,
587 """Use composition-based statistics (string, default 2, i.e. True).
588
589 0, F or f: no composition-based statistics
590 2, T or t, D or d : Composition-based score adjustment as in
591 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
592
593 Note that tblastn also supports values of 1 and 3.""", False),
594
595 _Option(["-seg", "seg"], ["input"], None, 0,
596 """Filter query sequence with SEG (string).
597
598 Format: "yes", "window locut hicut", or "no" to disable.
599 Default is "12 2.2 2.5""", False),
600
601 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
602 """Filtering algorithm for soft masking (integer).
603
604 Filtering algorithm ID to apply to the BLAST database as soft masking.
605
606 Incompatible with: subject, subject_loc""", False),
607
608 _Switch(["-ungapped", "ungapped"], ["input"],
609 "Perform ungapped alignment only?"),
610
611 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
612 "Compute locally optimal Smith-Waterman alignments?"),
613 ]
614 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
615
620
621
623 """Wrapper for the NCBI BLAST+ program blastn (for nucleotides).
624
625 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
626 replaced the old blastall tool with separate tools for each of the searches.
627 This wrapper therefore replaces BlastallCommandline with option -p blastn.
628
629 For example, to run a search against the "nt" nucleotide database using the
630 FASTA nucleotide file "m_code.fasta" as the query, with an expectation value
631 cut off of 0.001, saving the output to a file in XML format:
632
633 >>> from Bio.Blast.Applications import NcbiblastnCommandline
634 >>> cline = NcbiblastnCommandline(query="m_cold.fasta", db="nt", strand="plus",
635 ... evalue=0.001, out="m_cold.xml", outfmt=5)
636 >>> cline
637 NcbiblastnCommandline(cmd='blastn', query='m_cold.fasta', db='nt', out='m_cold.xml', evalue=0.001, outfmt=5, strand='plus')
638 >>> print cline
639 blastn -query m_cold.fasta -db nt -out m_cold.xml -evalue 0.001 -outfmt 5 -strand plus
640
641 You would typically run the command line with the Python subprocess module,
642 as described in the Biopython tutorial.
643 """
644 - def __init__(self, cmd="blastn", **kwargs):
645 self.parameters = [ \
646
647 _Option(["-strand", "strand"], ["input"],
648 lambda value : value in ["both", "minus", "plus"],0,
649 """Query strand(s) to search against database/subject.
650
651 Values allowed are "both" (default), "minus", "plus".""", False),
652
653 _Option(["-task", "task"], ["input"],
654 lambda value : value in ['blastn', 'blastn-short', 'dc-megablast',
655 'megablast', 'vecscreen'], 0,
656 """Task to execute (string, default 'megablast')
657
658 Allowed values 'blastn', 'blastn-short', 'dc-megablast', 'megablast'
659 (the default), or 'vecscreen'.""", False),
660 _Option(["-penalty", "penalty"], ["input"], None, 0,
661 "Penalty for a nucleotide mismatch (integer, at most zero).", False),
662 _Option(["-reward", "reward"], ["input"], None, 0,
663 "Reward for a nucleotide match (integer, at least zero).", False),
664
665
666
667 _Option(["-index_name", "index_name"], ["input"], None, 0,
668 "MegaBLAST database index name.", False),
669
670 _Option(["-dust", "dust"], ["input"], None, 0,
671 """Filter query sequence with DUST (string).
672
673 Format: 'yes', 'level window linker', or 'no' to disable.
674 Default = '20 64 1'.
675 """, False),
676 _Option(["-filtering_db", "filtering_db"], ["input"], None, 0,
677 "BLAST database containing filtering elements (i.e. repeats).", False),
678 _Option(["-window_masker_taxid", "window_masker_taxid"], ["input"], None, 0,
679 "Enable WindowMasker filtering using a Taxonomic ID (integer).", False),
680 _Option(["-window_masker_db", "window_masker_db"], ["input"], None, 0,
681 "Enable WindowMasker filtering using this repeats database (string).", False),
682
683 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
684 """Filtering algorithm for soft masking (integer).
685
686 Filtering algorithm ID to apply to the BLAST database as soft masking.
687
688 Incompatible with: subject, subject_loc""", False),
689 _Option(["-perc_identity", "perc_identity"], ["input"], None, 0,
690 "Percent identity (real, 0 to 100 inclusive).", False),
691
692 _Option(["-template_type", "template_type"], ["input"],
693 lambda value : value in ['coding', 'coding_and_optimal','optimal'], 0,
694 """Discontiguous MegaBLAST template type (string).
695
696 Allowed values: 'coding', 'coding_and_optimal' or 'optimal'
697 Requires: template_length.""", False),
698 _Option(["-template_length", "template_length"], ["input"],
699 lambda value : value in [16,18,21,'16','18','21'], 0,
700 """Discontiguous MegaBLAST template length (integer).
701
702 Allowed values: 16, 18, 21
703
704 Requires: template_type.""", False),
705
706 _Switch(["-no_greedy", "no_greedy"], ["input"],
707 "Use non-greedy dynamic programming extension"),
708 _Option(["-min_raw_gapped_score", "min_raw_gapped_score"], ["input"], None, 0,
709 "Minimum raw gapped score to keep an alignment in the preliminary gapped and traceback stages (integer).", False),
710 _Switch(["-ungapped", "ungapped"], ["input"],
711 "Perform ungapped alignment only?"),
712 _Option(["-off_diagonal_range", "off_diagonal_range"], ["input"], None, 0,
713 """Number of off-diagonals to search for the 2nd hit (integer).
714
715 Expects a positive integer, or 0 (default) to turn off.
716
717 Added in BLAST 2.2.23+
718 """, False),
719 ]
720 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
721
723 incompatibles = {"db_soft_mask":["subject", "subject_loc"]}
724 self._validate_incompatibilities(incompatibles)
725 if (self.template_type and not self.template_length) \
726 or (self.template_length and not self.template_type) :
727 raise ValueError("Options template_type and template_type require each other.")
728 _Ncbiblast2SeqCommandline._validate(self)
729
730
732 """Wrapper for the NCBI BLAST+ program blastx (nucleotide query, protein database).
733
734 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
735 replaced the old blastall tool with separate tools for each of the searches.
736 This wrapper therefore replaces BlastallCommandline with option -p blastx.
737
738 >>> from Bio.Blast.Applications import NcbiblastxCommandline
739 >>> cline = NcbiblastxCommandline(query="m_cold.fasta", db="nr", evalue=0.001)
740 >>> cline
741 NcbiblastxCommandline(cmd='blastx', query='m_cold.fasta', db='nr', evalue=0.001)
742 >>> print cline
743 blastx -query m_cold.fasta -db nr -evalue 0.001
744
745 You would typically run the command line with the Python subprocess module,
746 as described in the Biopython tutorial.
747 """
748 - def __init__(self, cmd="blastx", **kwargs):
749 self.parameters = [ \
750
751 _Option(["-strand", "strand"], ["input"],
752 lambda value : value in ["both", "minus", "plus"],0,
753 """Query strand(s) to search against database/subject.
754
755 Values allowed are "both" (default), "minus", "plus".""", False),
756
757 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
758 """Genetic code to use to translate query
759
760 Integer. Default is one.""", False),
761
762 _Option(["-frame_shift_penalty", "frame_shift_penalty"], ["input"], None, 0,
763 "Frame shift penalty (integer, at least 1, default ignored).", False),
764 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
765 """Maximum intron length (integer).
766
767 Length of the largest intron allowed in a translated nucleotide
768 sequence when linking multiple distinct alignments (a negative
769 value disables linking). Default zero.""", False),
770 _Option(["-matrix", "matrix"], ["input"], None, 0,
771 "Scoring matrix name (default BLOSUM62).", False),
772 _Option(["-threshold", "threshold"], ["input"], None, 0,
773 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
774
775 _Option(["-seg", "seg"], ["input"], None, 0,
776 """Filter query sequence with SEG (string).
777
778 Format: "yes", "window locut hicut", or "no" to disable.
779 Default is "12 2.2 2.5""", False),
780
781 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
782 """Filtering algorithm for soft masking (integer).
783
784 Filtering algorithm ID to apply to the BLAST database as soft masking.
785
786 Incompatible with: subject, subject_loc""", False),
787
788 _Switch(["-ungapped", "ungapped"], ["input"],
789 "Perform ungapped alignment only?"),
790 ]
791 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
792
797
798
800 """Wrapper for the NCBI BLAST+ program tblastn.
801
802 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
803 replaced the old blastall tool with separate tools for each of the searches.
804 This wrapper therefore replaces BlastallCommandline with option -p tblastn.
805
806 >>> from Bio.Blast.Applications import NcbitblastnCommandline
807 >>> cline = NcbitblastnCommandline(help=True)
808 >>> cline
809 NcbitblastnCommandline(cmd='tblastn', help=True)
810 >>> print cline
811 tblastn -help
812
813 You would typically run the command line with the Python subprocess module,
814 as described in the Biopython tutorial.
815 """
816 - def __init__(self, cmd="tblastn", **kwargs):
817 self.parameters = [ \
818
819 _Option(["-db_gencode", "db_gencode"], ["input"], None, 0,
820 """Genetic code to use to translate query
821
822 Integer. Default is one.""", False),
823 _Option(["-frame_shift_penalty", "frame_shift_penalty"], ["input"], None, 0,
824 "Frame shift penalty (integer, at least 1, default ignored).", False),
825 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
826 """Maximum intron length (integer).
827
828 Length of the largest intron allowed in a translated nucleotide
829 sequence when linking multiple distinct alignments (a negative
830 value disables linking). Default zero.""", False),
831 _Option(["-matrix", "matrix"], ["input"], None, 0,
832 "Scoring matrix name (default BLOSUM62).", False),
833 _Option(["-threshold", "threshold"], ["input"], None, 0,
834 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
835 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
836 lambda value : value in "0Ft12TtDd3", 0,
837 """Use composition-based statistics (string, default 2, i.e. True).
838
839 0, F or f: no composition-based statistics
840 1: Composition-based statistics as in NAR 29:2994-3005, 2001
841 2, T or t, D or d : Composition-based score adjustment as in
842 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
843 3: Composition-based score adjustment as in Bioinformatics 21:902-911,
844 2005, unconditionally
845
846 Note that only tblastn supports values of 1 and 3.""", False),
847
848 _Option(["-seg", "seg"], ["input"], None, 0,
849 """Filter query sequence with SEG (string).
850
851 Format: "yes", "window locut hicut", or "no" to disable.
852 Default is "12 2.2 2.5""", False),
853
854 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
855 """Filtering algorithm ID to apply to the BLAST database as soft masking (string).
856
857 Incompatible with: subject, subject_loc
858 """, False),
859
860 _Switch(["-ungapped", "ungapped"], ["input"],
861 "Perform ungapped alignment only?"),
862
863 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
864 "Compute locally optimal Smith-Waterman alignments?"),
865
866 _Option(["-in_pssm", "in_pssm"], ["input", "file"], None, 0,
867 """PSI-BLAST checkpoint file
868
869 Incompatible with: remote, query""", False),
870 ]
871 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
872
877
878
880 """Wrapper for the NCBI BLAST+ program tblastx.
881
882 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
883 replaced the old blastall tool with separate tools for each of the searches.
884 This wrapper therefore replaces BlastallCommandline with option -p tblastx.
885
886 >>> from Bio.Blast.Applications import NcbitblastxCommandline
887 >>> cline = NcbitblastxCommandline(help=True)
888 >>> cline
889 NcbitblastxCommandline(cmd='tblastx', help=True)
890 >>> print cline
891 tblastx -help
892
893 You would typically run the command line with the Python subprocess module,
894 as described in the Biopython tutorial.
895 """
896 - def __init__(self, cmd="tblastx", **kwargs):
897 self.parameters = [ \
898
899 _Option(["-strand", "strand"], ["input"],
900 lambda value : value in ["both", "minus", "plus"],0,
901 """Query strand(s) to search against database/subject.
902
903 Values allowed are "both" (default), "minus", "plus".""", False),
904
905 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
906 """Genetic code to use to translate query
907
908 Integer. Default is one.""", False),
909
910 _Option(["-db_gencode", "db_gencode"], ["input"], None, 0,
911 """Genetic code to use to translate query
912
913 Integer. Default is one.""", False),
914 _Option(["-max_intron_length", "max_intron_length"], ["input"], None, 0,
915 """Maximum intron length (integer).
916
917 Length of the largest intron allowed in a translated nucleotide
918 sequence when linking multiple distinct alignments (a negative
919 value disables linking). Default zero.""", False),
920 _Option(["-matrix", "matrix"], ["input"], None, 0,
921 "Scoring matrix name (default BLOSUM62).", False),
922 _Option(["-threshold", "threshold"], ["input"], None, 0,
923 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
924
925 _Option(["-seg", "seg"], ["input"], None, 0,
926 """Filter query sequence with SEG (string).
927
928 Format: "yes", "window locut hicut", or "no" to disable.
929 Default is "12 2.2 2.5""", False),
930
931 _Option(["-db_soft_mask", "db_soft_mask"], ["input"], None, 0,
932 """Filtering algorithm ID to apply to the BLAST database as soft masking (string).
933
934 Incompatible with: subject, subject_loc
935 """, False),
936 ]
937 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
938
939
941 """Wrapper for the NCBI BLAST+ program psiblast.
942
943 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
944 replaced the old blastpgp tool with a similar tool psiblast. This wrapper
945 therefore replaces BlastpgpCommandline, the wrapper for blastpgp.
946
947 >>> from Bio.Blast.Applications import NcbipsiblastCommandline
948 >>> cline = NcbipsiblastCommandline(help=True)
949 >>> cline
950 NcbipsiblastCommandline(cmd='psiblast', help=True)
951 >>> print cline
952 psiblast -help
953
954 You would typically run the command line with the Python subprocess module,
955 as described in the Biopython tutorial.
956 """
957 - def __init__(self, cmd="psiblast", **kwargs):
958 self.parameters = [ \
959
960 _Option(["-matrix", "matrix"], ["input"], None, 0,
961 "Scoring matrix name (default BLOSUM62).", False),
962 _Option(["-threshold", "threshold"], ["input"], None, 0,
963 "Minimum word score such that the word is added to the BLAST lookup table (float)", False),
964 _Option(["-comp_based_stats", "comp_based_stats"], ["input"],
965 lambda value : value in "0Ft2TtDd", 0,
966 """Use composition-based statistics (string, default 2, i.e. True).
967
968 0, F or f: no composition-based statistics
969 2, T or t, D or d : Composition-based score adjustment as in
970 Bioinformatics 21:902-911, 2005, conditioned on sequence properties
971
972 Note that tblastn also supports values of 1 and 3.""", False),
973
974 _Option(["-seg", "seg"], ["input"], None, 0,
975 """Filter query sequence with SEG (string).
976
977 Format: "yes", "window locut hicut", or "no" to disable.
978 Default is "12 2.2 2.5""", False),
979
980 _Option(["-gap_trigger", "gap_trigger"], ["input"], None, 0,
981 "Number of bits to trigger gapping (float, default 22)", False),
982
983 _Switch(["-use_sw_tback", "use_sw_tback"], ["input"],
984 "Compute locally optimal Smith-Waterman alignments?"),
985
986 _Option(["-num_iterations", "num_iterations"], ["input"], None, 0,
987 """Number of iterations to perform, integer
988
989 Integer of at least one. Default is one.
990 Incompatible with: remote""", False),
991 _Option(["-out_pssm", "out_pssm"], ["output", "file"], None, 0,
992 "File name to store checkpoint file", False),
993 _Option(["-out_ascii_pssm", "out_ascii_pssm"], ["output", "file"], None, 0,
994 "File name to store ASCII version of PSSM", False),
995 _Option(["-in_msa", "in_msa"], ["input", "file"], None, 0,
996 """File name of multiple sequence alignment to restart PSI-BLAST
997
998 Incompatible with: in_pssm, query""", False),
999 _Option(["-in_pssm", "in_pssm"], ["input", "file"], None, 0,
1000 """PSI-BLAST checkpoint file
1001
1002 Incompatible with: in_msa, query, phi_pattern""", False),
1003
1004 _Option(["-pseudocount", "pseudocount"], ["input"], None, 0,
1005 """Pseudo-count value used when constructing PSSM
1006
1007 Integer. Default is zero.""", False),
1008 _Option(["-inclusion_ethresh", "inclusion_ethresh"], ["input"], None, 0,
1009 """E-value inclusion threshold for pairwise alignments
1010
1011 Float. Default is 0.002.""", False),
1012
1013 _Option(["-phi_pattern", "phi_pattern"], ["input", "file"], None, 0,
1014 """File name containing pattern to search
1015
1016 Incompatible with: in_pssm""", False),
1017 ]
1018 _Ncbiblast2SeqCommandline.__init__(self, cmd, **kwargs)
1019
1026
1027
1029 """Wrapper for the NCBI BLAST+ program rpsblast.
1030
1031 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1032 replaced the old rpsblast tool with a similar tool of the same name. This
1033 wrapper replaces RpsBlastCommandline, the wrapper for the old rpsblast.
1034
1035 >>> from Bio.Blast.Applications import NcbirpsblastCommandline
1036 >>> cline = NcbirpsblastCommandline(help=True)
1037 >>> cline
1038 NcbirpsblastCommandline(cmd='rpsblast', help=True)
1039 >>> print cline
1040 rpsblast -help
1041
1042 You would typically run the command line with the Python subprocess module,
1043 as described in the Biopython tutorial.
1044 """
1045 - def __init__(self, cmd="rpsblast", **kwargs):
1046 self.parameters = [ \
1047
1048 _Option(["-seg", "seg"], ["input"], None, 0,
1049 """Filter query sequence with SEG (string).
1050
1051 Format: "yes", "window locut hicut", or "no" to disable.
1052 Default is "12 2.2 2.5""", False),
1053 ]
1054 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1055
1056
1058 """Wrapper for the NCBI BLAST+ program rpstblastn.
1059
1060 With the release of BLAST+ (BLAST rewritten in C++ instead of C), the NCBI
1061 replaced the old rpsblast tool with a similar tool of the same name, and a
1062 separate tool rpstblastn for Translated Reverse Position Specific BLAST.
1063
1064 >>> from Bio.Blast.Applications import NcbirpstblastnCommandline
1065 >>> cline = NcbirpstblastnCommandline(help=True)
1066 >>> cline
1067 NcbirpstblastnCommandline(cmd='rpstblastn', help=True)
1068 >>> print cline
1069 rpstblastn -help
1070
1071 You would typically run the command line with the Python subprocess module,
1072 as described in the Biopython tutorial.
1073 """
1074 - def __init__(self, cmd="rpstblastn", **kwargs):
1075 self.parameters = [ \
1076
1077 _Option(["-strand", "strand"], ["input"],
1078 lambda value : value in ["both", "minus", "plus"],0,
1079 """Query strand(s) to search against database/subject.
1080
1081 Values allowed are "both" (default), "minus", "plus".""", False),
1082
1083 _Option(["-query_gencode", "query_gencode"], ["input"], None, 0,
1084 """Genetic code to use to translate query
1085
1086 Integer. Default is one.""", False),
1087
1088 _Option(["-seg", "seg"], ["input"], None, 0,
1089 """Filter query sequence with SEG (string).
1090
1091 Format: "yes", "window locut hicut", or "no" to disable.
1092 Default is "12 2.2 2.5""", False),
1093
1094 _Switch(["-ungapped", "ungapped"], ["input"],
1095 "Perform ungapped alignment only?"),
1096 ]
1097 _NcbiblastCommandline.__init__(self, cmd, **kwargs)
1098
1099
1101 """Run the Bio.Blast.Applications module's doctests."""
1102 import doctest
1103 doctest.testmod(verbose=1)
1104
1105 if __name__ == "__main__":
1106
1107 _test()
1108