1
2
3
4
5 """Command line wrapper for the short read aligner Novoalign by Novocraft (www.novocraft.com)
6
7 Last checked against version: 2.05.04
8 """
9 import types
10 from Bio.Application import _Option, AbstractCommandline
11
13 """Command line wrapper for the short read alignment program novoalign by Novocraft."""
14 - def __init__(self, cmd="novoalign", **kwargs):
15
16 READ_FORMAT = ['FA', 'SLXFQ', 'STDFQ', 'ILMFQ', 'PRB', 'PRBnSEQ']
17 REPORT_FORMAT = ['Native', 'Pairwise', 'SAM']
18 REPEAT_METHOD = ['None', 'Random', 'All', 'Exhaustive', '0.99']
19
20 self.parameters = \
21 [
22 _Option(["-d", "database"], ["input", "file"],
23 None, 0, "database filename",
24 0),
25 _Option(["-f", "readfile"], ["input", "file"],
26 None, 0, "read file",
27 0),
28 _Option(["-F", "format"], ["input", "option"],
29 lambda x: x in READ_FORMAT,
30 0, "Format of read files.\n\nAllowed values: %s" % ", ".join(READ_FORMAT),
31 0),
32
33
34 _Option(["-t", "threshold"], ["input"],
35 lambda x: isinstance(x, types.IntType),
36 0, "Threshold for alignment score",
37 0),
38 _Option(["-g", "gap_open"], ["input"],
39 lambda x: isinstance(x, types.IntType),
40 0, "Gap opening penalty [default: 40]",
41 0),
42 _Option(["-x", "gap_extend"], ["input"],
43 lambda x: isinstance(x, types.IntType),
44 0, "Gap extend penalty [default: 15]",
45 0),
46 _Option(["-u", "unconverted"], ["input"],
47 lambda x: isinstance(x, types.IntType), 0,
48 "Experimental: unconverted cytosines penalty in bisulfite mode\n\n"
49 "Default: no penalty",
50 0),
51
52
53 _Option(["-l", "good_bases"], ["input"],
54 lambda x: isinstance(x, types.IntType),
55 0, "Minimum number of good quality bases [default: log(N_g, 4) + 5]",
56 0),
57 _Option(["-h", "homopolymer"], ["input"],
58 lambda x: isinstance(x, types.IntType),
59 0, "Homopolymer read filter [default: 20; disable: negative value]",
60 0),
61
62
63 _Option(["-a", "adapter3"], ["input"],
64 lambda x: isinstance(x, types.StringType),
65 0, "Strips a 3' adapter sequence prior to alignment.\n\n"
66 "With paired ends two adapters can be specified",
67 0),
68 _Option(["-n", "truncate"], ["input"],
69 lambda x: isinstance(x, types.IntType),
70 0, "Truncate to specific length before alignment",
71 0),
72 _Option(["-s", "trimming"], ["input"],
73 lambda x: isinstance(x, types.IntType),
74 0, "If fail to align, trim by s bases until they map or become shorter than l.\n\n"
75 "Ddefault: 2",
76 0),
77 _Option(["-5", "adapter5"], ["input"],
78 lambda x: isinstance(x, types.StringType),
79 0, "Strips a 5' adapter sequence.\n\n"
80 "Similar to -a (adaptor_3), but on the 5' end.",
81 0),
82
83 _Option(["-o", "report"], ["input"],
84 lambda x: x in REPORT_FORMAT,
85 0, "Specifies the report format.\n\nAllowed values: %s\nDefault: Native" \
86 % ", ".join(REPORT_FORMAT),
87 0),
88 _Option(["-Q", "quality"], ["input"],
89 lambda x: isinstance(x, types.IntType),
90 0, "Lower threshold for an alignment to be reported [default: 0]",
91 0),
92 _Option(["-R", "repeats"], ["input"],
93 lambda x: isinstance(x, types.IntType),
94 0, "If score difference is higher, report repeats.\n\n"
95 "Otherwise -r read method applies [default: 5]",
96 0),
97 _Option(["-r", "read_method"], ["input"],
98 lambda x: x.split()[0] in REPEAT_METHOD,
99 0, "Methods to report reads with multiple matches.\n\n"
100 "Allowed values: %s\n"
101 "'All' and 'Exhaustive' accept limits." \
102 % ", ".join(REPEAT_METHOD),
103 0),
104 _Option(["-e", "recorded"], ["input"],
105 lambda x: isinstance(x, types.IntType),
106 0, "Alignments recorded with score equal to the best.\n\n"
107 "Default: 1000 in default read method, otherwise no limit.",
108 0),
109 _Option(["-q", "qual_digits"], ["input"],
110 lambda x: isinstance(x, types.IntType),
111 0, "Decimal digits for quality scores [default: 0]",
112 0),
113
114
115 _Option(["-i", "fragment"], ["input"],
116 lambda x: len(x.split()) == 2,
117 0, "Fragment length (2 reads + insert) and standard deviation [default: 250 30]",
118 0),
119 _Option(["-v", "variation"], ["input"],
120 lambda x: isinstance(x, types.IntType),
121 0, "Structural variation penalty [default: 70]",
122 0),
123
124
125 _Option(["-m", "miRNA"], ["input"],
126 lambda x: isinstance(x, types.IntType),
127 0, "Sets miRNA mode and optionally sets a value for the region scanned [default: off]",
128 0),
129
130
131 _Option(["-c", "cores"], ["input"],
132 lambda x: isinstance(x, types.IntType),
133 0, "Number of threads, disabled on free versions [default: number of cores]",
134 0),
135
136
137 _Option(["-k", "read_cal"], ["input"],
138 lambda x: isinstance(x, types.StringType),
139 0, "Read quality calibration from file (mismatch counts)",
140 0),
141 _Option(["-K", "write_cal"], ["input"],
142 lambda x: isinstance(x, types.StringType),
143 0, "Accumulate mismatch counts and write to file",
144 0)
145 ]
146 AbstractCommandline.__init__(self, cmd, **kwargs)
147
148 if __name__ == '__main__':
149 cml = NovoalignCommandline(database='~/some_dir/some_db',
150 readfile='~/some_dir/some_seq.txt')
151 cml.format = 'PRBnSEQ'
152 cml.r_method='0.99'
153 cml.fragment = '250 20'
154 cml.miRNA = 100
155 print cml
156
157