1
2
3
4
5
6
7 """General mechanisms to access applications in Biopython.
8
9 This module is not intended for direct use. It provides the basic objects which
10 are subclassed by our command line wrappers, such as:
11
12 - Bio.Align.Applications
13 - Bio.Blast.Applications
14 - Bio.Emboss.Applications
15 - Bio.Sequencing.Applications
16
17 These modules provide wrapper classes for command line tools to help you
18 construct command line strings by setting the values of each parameter.
19 The finished command line strings are then normally invoked via the built-in
20 Python module subprocess.
21
22 This module also includes some deprecated functionality (function generic_run
23 and class ApplicationResult) which should not be used anymore.
24 """
25 import os, sys
26 import StringIO
27 import subprocess
28 import re
29
30
31 try:
32 from subprocess import CalledProcessError as _ProcessCalledError
33 except:
34
35 _ProcessCalledError = Exception
36
37 from Bio import File
38
39
40
41 _re_prop_name = re.compile(r"[a-zA-Z][a-zA-Z0-9_]*")
42 assert _re_prop_name.match("t")
43 assert _re_prop_name.match("test")
44 assert _re_prop_name.match("_test") is None
45 assert _re_prop_name.match("-test") is None
46 assert _re_prop_name.match("test_name")
47 assert _re_prop_name.match("test2")
48
49 _reserved_names = ["and", "del", "from", "not", "while", "as", "elif",
50 "global", "or", "with", "assert", "else", "if", "pass",
51 "yield", "break", "except", "import", "print", "class",
52 "exec", "in", "raise", "continue", "finally", "is",
53 "return", "def", "for", "lambda", "try"]
54
55 _local_reserved_names = ["set_parameter"]
56
58 """Run an application with the given commandline (DEPRECATED).
59
60 This expects a pre-built commandline that derives from
61 AbstractCommandline, and returns a ApplicationResult object
62 to get results from a program, along with handles of the
63 standard output and standard error.
64
65 WARNING - This will read in the full program output into memory!
66 This may be in issue when the program writes a large amount of
67 data to standard output.
68
69 NOTE - This function is deprecated, and we intend to remove it in
70 future releases of Biopython.
71 We now recommend you invoke subprocess directly, using str(commandline)
72 to turn an AbstractCommandline wrapper into a command line string. This
73 will give you full control of the tool's input and output as well.
74 """
75 import warnings
76 import Bio
77 warnings.warn("Bio.Application.generic_run and the associated "
78 "Bio.Application.ApplicationResult are deprecated. "
79 "Please use the Bio.Application based wrappers with "
80 "the built in Python module subprocess instead, as "
81 "described in the Biopython Tutorial.",
82 Bio.BiopythonDeprecationWarning)
83
84
85
86
87 child = subprocess.Popen(str(commandline),
88 stdin=subprocess.PIPE,
89 stdout=subprocess.PIPE,
90 stderr=subprocess.PIPE,
91 universal_newlines=True,
92 shell=(sys.platform!="win32"))
93
94 r_out, e_out = child.communicate()
95
96 error_code = child.returncode
97 return ApplicationResult(commandline, error_code), \
98 File.UndoHandle(StringIO.StringIO(r_out)), \
99 File.UndoHandle(StringIO.StringIO(e_out))
100
101
103 """Raised when an application returns a non-zero exit status.
104
105 The exit status will be stored in the returncode attribute, similarly
106 the command line string used in the cmd attribute, and (if captured)
107 stdout and stderr as strings.
108
109 This exception is a subclass of subprocess.CalledProcessError
110 (unless run on Python 2.4 where that does not exist).
111
112 >>> err = ApplicationError(-11, "helloworld", "", "Some error text")
113 >>> err.returncode, err.cmd, err.stdout, err.stderr
114 (-11, 'helloworld', '', 'Some error text')
115 >>> print err
116 Command 'helloworld' returned non-zero exit status -11, 'Some error text'
117
118 """
119 - def __init__(self, returncode, cmd, stdout="", stderr=""):
120 self.returncode = returncode
121 self.cmd = cmd
122 self.stdout = stdout
123 self.stderr = stderr
124
126
127 try:
128 msg = self.stderr.lstrip().split("\n",1)[0].rstrip()
129 except:
130 msg = ""
131 if msg:
132 return "Command '%s' returned non-zero exit status %d, %r" \
133 % (self.cmd, self.returncode, msg)
134 else:
135 return "Command '%s' returned non-zero exit status %d" \
136 % (self.cmd, self.returncode)
137
139 return "ApplicationError(%i, %s, %s, %s)" \
140 % (self.returncode, self.cmd, self.stdout, self.stderr)
141
142
144 """Make results of a program available through a standard interface (DEPRECATED).
145
146 This tries to pick up output information available from the program
147 and make it available programmatically.
148
149 NOTE - This class hase been deprecated and we intend to remove it in
150 a future release of Biopython.
151 """
152 - def __init__(self, application_cl, return_code):
153 """Intialize with the commandline from the program.
154 """
155 import warnings
156 import Bio
157 warnings.warn("Bio.Application.ApplicationResult and the "
158 "associated function Bio.Application.generic_run "
159 "are deprecated. Please use the Bio.Application "
160 "based wrappers with the built in Python module "
161 "subprocess instead, as described in the Biopython "
162 "Tutorial.", Bio.BiopythonDeprecationWarning)
163 self._cl = application_cl
164
165
166 self.return_code = return_code
167
168
169
170 self._results = {}
171
172 for parameter in self._cl.parameters:
173 if "file" in parameter.param_types and \
174 "output" in parameter.param_types:
175 if parameter.is_set:
176 self._results[parameter.names[-1]] = parameter.value
177
179 """Retrieve result information for the given output.
180
181 Supports any of the defined parameters aliases (assuming the
182 parameter is defined as an output).
183 """
184 try:
185 return self._results[output_name]
186 except KeyError, err:
187
188 for parameter in self._cl.parameters:
189 if output_name in parameter.names:
190 return self._results[parameter.names[-1]]
191
192 raise err
193
195 """Retrieve a list of all available results.
196 """
197 result_names = self._results.keys()
198 result_names.sort()
199 return result_names
200
202 """Generic interface for constructing command line strings.
203
204 This class shouldn't be called directly; it should be subclassed to
205 provide an implementation for a specific application.
206
207 For a usage example we'll show one of the EMBOSS wrappers. You can set
208 options when creating the wrapper object using keyword arguments - or
209 later using their corresponding properties:
210
211 >>> from Bio.Emboss.Applications import WaterCommandline
212 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
213 >>> cline
214 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5)
215
216 You can instead manipulate the parameters via their properties, e.g.
217
218 >>> cline.gapopen
219 10
220 >>> cline.gapopen = 20
221 >>> cline
222 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5)
223
224 You can clear a parameter you have already added by 'deleting' the
225 corresponding property:
226
227 >>> del cline.gapopen
228 >>> cline.gapopen
229 >>> cline
230 WaterCommandline(cmd='water', gapextend=0.5)
231
232 Once you have set the parameters you need, turn the object into a string:
233
234 >>> str(cline)
235 Traceback (most recent call last):
236 ...
237 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout).
238
239 In this case the wrapper knows certain arguments are required to construct
240 a valid command line for the tool. For a complete example,
241
242 >>> from Bio.Emboss.Applications import WaterCommandline
243 >>> water_cmd = WaterCommandline(gapopen=10, gapextend=0.5)
244 >>> water_cmd.asequence = "asis:ACCCGGGCGCGGT"
245 >>> water_cmd.bsequence = "asis:ACCCGAGCGCGGT"
246 >>> water_cmd.outfile = "temp_water.txt"
247 >>> print water_cmd
248 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
249 >>> water_cmd
250 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
251
252 You would typically run the command line via a standard Python operating
253 system call using the subprocess module for full control. For the simple
254 case where you just want to run the command and get the output:
255
256 stdout, stderr = water_cmd(capture=Ture)
257 """
258
259
261 """Create a new instance of a command line wrapper object."""
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278 self.program_name = cmd
279 try:
280 parameters = self.parameters
281 except AttributeError:
282 raise AttributeError("Subclass should have defined self.parameters")
283
284 aliases = set()
285 for p in parameters:
286 for name in p.names:
287 if name in aliases:
288 raise ValueError("Parameter alias %s multiply defined" \
289 % name)
290 aliases.add(name)
291 name = p.names[-1]
292 if _re_prop_name.match(name) is None:
293 raise ValueError("Final parameter name %s cannot be used as "
294 "an argument or property name in python"
295 % repr(name))
296 if name in _reserved_names:
297 raise ValueError("Final parameter name %s cannot be used as "
298 "an argument or property name because it is "
299 "a reserved word in python" % repr(name))
300 if name in _local_reserved_names:
301 raise ValueError("Final parameter name %s cannot be used as "
302 "an argument or property name due to the "
303 "way the AbstractCommandline class works"
304 % repr(name))
305
306 def getter(name):
307 return lambda x : x._get_parameter(name)
308 def setter(name):
309 return lambda x, value : x.set_parameter(name, value)
310 def deleter(name):
311 return lambda x : x._clear_parameter(name)
312 doc = p.description
313 if isinstance(p, _Switch):
314 doc += "\n\nThis property controls the addition of the %s " \
315 "switch, treat this property as a boolean." % p.names[0]
316 else:
317 doc += "\n\nThis controls the addition of the %s parameter " \
318 "and its associated value. Set this property to the " \
319 "argument value required." % p.names[0]
320 prop = property(getter(name), setter(name), deleter(name), doc)
321 setattr(self.__class__, name, prop)
322 for key, value in kwargs.iteritems():
323 self.set_parameter(key, value)
324
326 """Make sure the required parameters have been set (PRIVATE).
327
328 No return value - it either works or raises a ValueError.
329
330 This is a separate method (called from __str__) so that subclasses may
331 override it.
332 """
333 for p in self.parameters:
334
335 if p.is_required and not(p.is_set):
336 raise ValueError("Parameter %s is not set." \
337 % p.names[-1])
338
339
341 """Make the commandline string with the currently set options.
342
343 e.g.
344 >>> from Bio.Emboss.Applications import WaterCommandline
345 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
346 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
347 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
348 >>> cline.outfile = "temp_water.txt"
349 >>> print cline
350 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
351 >>> str(cline)
352 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5'
353 """
354 self._validate()
355 commandline = "%s " % self.program_name
356 for parameter in self.parameters:
357 if parameter.is_set:
358
359 commandline += str(parameter)
360 return commandline.strip()
361
363 """Return a representation of the command line object for debugging.
364
365 e.g.
366 >>> from Bio.Emboss.Applications import WaterCommandline
367 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5)
368 >>> cline.asequence = "asis:ACCCGGGCGCGGT"
369 >>> cline.bsequence = "asis:ACCCGAGCGCGGT"
370 >>> cline.outfile = "temp_water.txt"
371 >>> print cline
372 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5
373 >>> cline
374 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5)
375 """
376 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name))
377 for parameter in self.parameters:
378 if parameter.is_set:
379 if isinstance(parameter, _Switch):
380 answer += ", %s=True" % parameter.names[-1]
381 else:
382 answer += ", %s=%s" \
383 % (parameter.names[-1], repr(parameter.value))
384 answer += ")"
385 return answer
386
396
407
430
432 """Check whether the given value is valid.
433
434 No return value - it either works or raises a ValueError.
435
436 This uses the passed function 'check_function', which can either
437 return a [0, 1] (bad, good) value or raise an error. Either way
438 this function will raise an error if the value is not valid, or
439 finish silently otherwise.
440 """
441 if check_function is not None:
442 is_good = check_function(value)
443 assert is_good in [0,1,True,False]
444 if not is_good:
445 raise ValueError("Invalid parameter value %r for parameter %s" \
446 % (value, name))
447
449 """Set attribute name to value (PRIVATE).
450
451 This code implements a workaround for a user interface issue.
452 Without this __setattr__ attribute-based assignment of parameters
453 will silently accept invalid parameters, leading to known instances
454 of the user assuming that parameters for the application are set,
455 when they are not.
456
457 >>> from Bio.Emboss.Applications import WaterCommandline
458 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True)
459 >>> cline.asequence = "a.fasta"
460 >>> cline.bsequence = "b.fasta"
461 >>> cline.csequence = "c.fasta"
462 Traceback (most recent call last):
463 ...
464 ValueError: Option name csequence was not found.
465 >>> print cline
466 water -stdout -asequence=a.fasta -bsequence=b.fasta -gapopen=10 -gapextend=0.5
467
468 This workaround uses a whitelist of object attributes, and sets the
469 object attribute list as normal, for these. Other attributes are
470 assumed to be parameters, and passed to the self.set_parameter method
471 for validation and assignment.
472 """
473 if name in ['parameters', 'program_name']:
474 self.__dict__[name] = value
475 else:
476 self.set_parameter(name, value)
477
478 - def __call__(self, stdin=None, stdout=True, stderr=True):
479 """Execute the command and waits for it to finish, returns output.
480
481 Runs the command line tool and waits for it to finish. If it returns
482 a non-zero error level, an exception is raised. Otherwise two strings
483 are returned containing stdout and stderr.
484
485 The optional stdin argument should be a string of data which will be
486 passed to the tool as standard input.
487
488 The optional stdout and stderr argument are treated as a booleans, and
489 control if the output should be captured (True, default), or ignored
490 by sending it to /dev/null to avoid wasting memory (False). In the
491 later case empty string(s) are returned.
492
493 Default example usage:
494
495 from Bio.Emboss.Applications import WaterCommandline
496 water_cmd = WaterCommandline(gapopen=10, gapextend=0.5, stdout=True,
497 asequence="a.fasta", bsequence="b.fasta")
498 print "About to run:\n%s" % water_cmd
499 std_output, err_output = water_cmd()
500
501 This functionality is similar to subprocess.check_output() added in
502 Python 2.7. In general if you require more control over running the
503 command, use subprocess directly.
504
505 As of Biopython 1.56, when the program called returns a non-zero error
506 level, a custom ApplicationError exception is raised. This includes
507 any stdout and stderr strings captured as attributes of the exception
508 object, since they may be useful for diagnosing what went wrong.
509 """
510 if stdout:
511 stdout_arg = subprocess.PIPE
512 else:
513 stdout_arg = open(os.devnull)
514 if stderr:
515 stderr_arg = subprocess.PIPE
516 else:
517 stderr_arg = open(os.devnull)
518
519
520
521
522
523
524
525 child_process = subprocess.Popen(str(self), stdin=subprocess.PIPE,
526 stdout=stdout_arg, stderr=stderr_arg,
527 universal_newlines=True,
528 shell=(sys.platform!="win32"))
529
530 stdout_str, stderr_str = child_process.communicate(stdin)
531
532 if not stdout: assert not stdout_str
533 if not stderr: assert not stderr_str
534 return_code = child_process.returncode
535 if return_code:
536 raise ApplicationError(return_code, str(self),
537 stdout_str, stderr_str)
538 return stdout_str, stderr_str
539
540
542 """A class to hold information about a parameter for a commandline.
543
544 Do not use this directly, instead use one of the subclasses.
545 """
547 raise NotImplementedError
548
550 raise NotImplementedError
551
553 """Represent an option that can be set for a program.
554
555 This holds UNIXish options like --append=yes and -a yes,
556 where a value (here "yes") is generally expected.
557
558 For UNIXish options like -kimura in clustalw which don't
559 take a value, use the _Switch object instead.
560
561 Attributes:
562
563 o names -- a list of string names by which the parameter can be
564 referenced (ie. ["-a", "--append", "append"]). The first name in
565 the list is considered to be the one that goes on the commandline,
566 for those parameters that print the option. The last name in the list
567 is assumed to be a "human readable" name describing the option in one
568 word.
569
570 o param_types -- a list of string describing the type of parameter,
571 which can help let programs know how to use it. Example descriptions
572 include 'input', 'output', 'file'. Note that if 'file' is included,
573 these argument values will automatically be escaped if the filename
574 contains spaces.
575
576 o checker_function -- a reference to a function that will determine
577 if a given value is valid for this parameter. This function can either
578 raise an error when given a bad value, or return a [0, 1] decision on
579 whether the value is correct.
580
581 o equate -- should an equals sign be inserted if a value is used?
582
583 o description -- a description of the option.
584
585 o is_required -- a flag to indicate if the parameter must be set for
586 the program to be run.
587
588 o is_set -- if the parameter has been set
589
590 o value -- the value of a parameter
591 """
592 - def __init__(self, names = [], types = [], checker_function = None,
593 is_required = False, description = "", equate=True):
594 self.names = names
595 self.param_types = types
596 self.checker_function = checker_function
597 self.description = description
598 self.equate = equate
599 self.is_required = is_required
600
601 self.is_set = False
602 self.value = None
603
605 """Return the value of this option for the commandline.
606
607 Includes a trailing space.
608 """
609
610
611
612
613 if self.value is None:
614 return "%s " % self.names[0]
615 if "file" in self.param_types:
616 v = _escape_filename(self.value)
617 else:
618 v = str(self.value)
619 if self.equate:
620 return "%s=%s " % (self.names[0], v)
621 else:
622 return "%s %s " % (self.names[0], v)
623
625 """Represent an optional argument switch for a program.
626
627 This holds UNIXish options like -kimura in clustalw which don't
628 take a value, they are either included in the command string
629 or omitted.
630
631 o names -- a list of string names by which the parameter can be
632 referenced (ie. ["-a", "--append", "append"]). The first name in
633 the list is considered to be the one that goes on the commandline,
634 for those parameters that print the option. The last name in the list
635 is assumed to be a "human readable" name describing the option in one
636 word.
637
638 o param_types -- a list of string describing the type of parameter,
639 which can help let programs know how to use it. Example descriptions
640 include 'input', 'output', 'file'. Note that if 'file' is included,
641 these argument values will automatically be escaped if the filename
642 contains spaces.
643
644 o description -- a description of the option.
645
646 o is_set -- if the parameter has been set
647
648 NOTE - There is no value attribute, see is_set instead,
649 """
650 - def __init__(self, names = [], types = [], description = ""):
656
658 """Return the value of this option for the commandline.
659
660 Includes a trailing space.
661 """
662 assert not hasattr(self, "value")
663 if self.is_set:
664 return "%s " % self.names[0]
665 else:
666 return ""
667
669 """Represent an argument on a commandline.
670 """
671 - def __init__(self, names = [], types = [], checker_function = None,
672 is_required = False, description = ""):
673 self.names = names
674 self.param_types = types
675 self.checker_function = checker_function
676 self.description = description
677 self.is_required = is_required
678 self.is_set = False
679 self.value = None
680
682 if self.value is None:
683 return " "
684 else:
685 return "%s " % self.value
686
688 """Escape filenames with spaces by adding quotes (PRIVATE).
689
690 Note this will not add quotes if they are already included:
691
692 >>> print _escape_filename('example with spaces')
693 "example with spaces"
694 >>> print _escape_filename('"example with spaces"')
695 "example with spaces"
696 """
697
698
699
700
701
702
703
704
705
706
707
708
709
710 if " " not in filename:
711 return filename
712
713 if filename.startswith('"') and filename.endswith('"'):
714
715 return filename
716 else:
717 return '"%s"' % filename
718
720 """Run the Bio.Application module's doctests."""
721 import doctest
722 doctest.testmod(verbose=1)
723
724 if __name__ == "__main__":
725
726 _test()
727