1
2
3
4
5
6
7
8 """
9 This module allows to control fdist.
10
11 This will allow to call fdist and associated program (cplot, datacal, pv).
12
13 http://www.rubic.rdg.ac.uk/~mab/software.html
14 """
15
16 import os
17 import tempfile
18 import sys
19 from shutil import copyfile
20 from random import randint, random
21 from time import strftime, clock
22
23
24 if sys.version_info[0] == 3:
25 maxint = sys.maxsize
26 else:
27 maxint = sys.maxint
28
30
31 if f=="-nan": f="nan"
32 return float(f)
33
35 - def __init__(self, fdist_dir = '', ext = None):
36 """Initializes the controller.
37
38 fdist_dir is the directory where fdist2 is.
39 ext is the extension of binaries (.exe on windows,
40 none on Unix)
41
42 """
43 self.tmp_idx = 0
44 self.fdist_dir = fdist_dir
45 self.os_name = os.name
46 if sys.platform=='win32':
47 py_ext = '.exe'
48 else:
49 py_ext = ''
50 if ext == None:
51 self.ext = py_ext
52 else:
53 self.ext = ext
54 exec_counts = 0
55
57 """Returns the path to an fdist application.
58
59 Includes Path where fdist can be found plus executable extension.
60 """
61 if self.fdist_dir == '':
62 return app + self.ext
63 else:
64 return os.sep.join([self.fdist_dir, app]) + self.ext
65
67 """Gets a temporary file name.
68
69 Returns a temporary file name, if executing inside jython
70 tries to replace unexisting tempfile.mkstemp().
71 """
72 self.tmp_idx += 1
73 return strftime("%H%M%S") + str(int(clock()*100)) + str(randint(0,1000)) + str(self.tmp_idx)
74
75 - def run_datacal(self, data_dir='.', version=1,
76 crit_freq = 0.99, p = 0.5, beta= (0.25, 0.25)):
77 """Executes datacal.
78
79 data_dir - Where the data is found.
80 """
81 in_name = self._get_temp_file()
82 out_name = self._get_temp_file()
83 f = open(data_dir + os.sep + in_name, 'w')
84 if version==1:
85 f.write('a\n')
86 datacal_name = "datacal"
87 else:
88 f.write('%f\n%f\n%f %f\na\n' % (crit_freq, p, beta[0], beta[1]))
89 datacal_name = "Ddatacal"
90 f.close()
91 curr_dir = os.getcwd()
92 os.system('cd ' + data_dir + ' && ' +
93 self._get_path(datacal_name) + ' < ' + in_name + ' > ' + out_name)
94 f = open(data_dir + os.sep + out_name)
95 if version == 1:
96 fst_line = f.readline().rstrip().split(' ')
97 fst = my_float(fst_line[4])
98 sample_line = f.readline().rstrip().split(' ')
99 sample = int(sample_line[9])
100 else:
101 l = f.readline().rstrip().split(" ")
102 loci, pops = int(l[-5]), int(l[-2])
103 fst_line = f.readline().rstrip().split(' ')
104 fst = my_float(fst_line[4])
105 sample_line = f.readline().rstrip().split(' ')
106 sample = int(sample_line[9])
107 F_line = f.readline().rstrip().split(' ')
108 F, obs = my_float(F_line[5]), int (F_line[8])
109 f.close()
110 os.remove(data_dir + os.sep + in_name)
111 os.remove(data_dir + os.sep + out_name)
112 if version==1:
113 return fst, sample
114 else:
115 return fst, sample, loci, pops, F, obs
116
118 """Generates an INTFILE.
119
120 Parameter:
121 data_dir - data directory
122 """
123 inf = open(data_dir + os.sep + 'INTFILE', 'w')
124 for i in range(98):
125 inf.write(str(randint(-maxint+1,maxint-1)) + '\n')
126 inf.write('8\n')
127 inf.close()
128
129 - def run_fdist(self, npops, nsamples, fst, sample_size,
130 mut = 0, num_sims = 50000, data_dir='.',
131 is_dominant = False, theta = 0.06, beta = (0.25, 0.25),
132 max_freq = 0.99):
133 """Executes (d)fdist.
134
135 Parameters:
136 npops - Number of populations
137 nsamples - Number of populations sampled
138 fst - expected Fst
139 sample_size - Sample size per population
140 For dfdist: if zero a sample size file has to be provided
141 mut - 1=Stepwise, 0=Infinite allele
142 num_sims - number of simulations
143 data_dir - Where the data is found
144 is_dominant - If true executes dfdist
145 theta - Theta (=2Nmu)
146 beta - Parameters for the beta prior
147 max_freq - Maximum allowed frequency of the commonest allele
148
149 Returns:
150 fst - Average Fst
151
152 Important Note: This can take quite a while to run!
153 """
154 if fst >= 0.9:
155
156 fst = 0.899
157 if fst <= 0.0:
158
159 fst = 0.001
160 in_name = 'input.fd'
161 out_name = 'output.fd'
162
163 f = open(data_dir + os.sep + in_name, 'w')
164 f.write('y\n\n')
165 f.close()
166 if is_dominant:
167 config_name = "Dfdist_params"
168 else:
169 config_name = "fdist_params2.dat"
170
171 f = open(data_dir + os.sep + config_name, 'w')
172 f.write(str(npops) + '\n')
173 f.write(str(nsamples) + '\n')
174 f.write(str(fst) + '\n')
175 f.write(str(sample_size) + '\n')
176 if is_dominant:
177 f.write(str(theta) + '\n')
178 else:
179 f.write(str(mut) + '\n')
180 f.write(str(num_sims) + '\n')
181 if is_dominant:
182 f.write("%f %f\n" % beta)
183 f.write("%f\n" % max_freq)
184 f.close()
185 self._generate_intfile(data_dir)
186
187 if is_dominant:
188 bin_name = "Dfdist"
189 else:
190 bin_name = "fdist2"
191 os.system('cd ' + data_dir + ' && ' +
192 self._get_path(bin_name) + ' < ' + in_name + ' > ' + out_name)
193 f = open(data_dir + os.sep + out_name)
194 lines = f.readlines()
195 f.close()
196 for line in lines:
197 if line.startswith('average Fst'):
198 fst = my_float(line.rstrip().split(' ')[-1])
199 os.remove(data_dir + os.sep + in_name)
200 os.remove(data_dir + os.sep + out_name)
201 return fst
202
203 - def run_fdist_force_fst(self, npops, nsamples, fst, sample_size,
204 mut = 0, num_sims = 50000, data_dir='.',
205 try_runs = 5000, limit=0.001,
206 is_dominant = False, theta = 0.06, beta = (0.25, 0.25),
207 max_freq = 0.99):
208 """Executes fdist trying to force Fst.
209
210 Parameters:
211 try_runs - Number of simulations on the part trying to get
212 Fst correct
213 limit - Interval limit
214 Other parameters can be seen on run_fdist.
215 """
216 max_run_fst = 1
217 min_run_fst = 0
218 current_run_fst = fst
219 old_fst = fst
220 while True:
221
222 real_fst = self.run_fdist(npops, nsamples,
223 current_run_fst, sample_size,
224 mut, try_runs, data_dir,
225 is_dominant, theta, beta, max_freq)
226
227 if abs(real_fst - fst) < limit:
228
229 return self.run_fdist(npops, nsamples, current_run_fst,
230 sample_size,
231 mut, num_sims, data_dir,
232 is_dominant, theta, beta, max_freq)
233 old_fst = current_run_fst
234 if real_fst > fst:
235 max_run_fst = current_run_fst
236 if current_run_fst < min_run_fst + limit:
237
238
239 return self.run_fdist(npops, nsamples, current_run_fst,
240 sample_size, mut, num_sims, data_dir)
241 current_run_fst = (min_run_fst + current_run_fst)/2
242 else:
243 min_run_fst = current_run_fst
244 if current_run_fst > max_run_fst - limit:
245
246
247 return self.run_fdist(npops, nsamples, current_run_fst,
248 sample_size, mut, num_sims, data_dir,
249 is_dominant, theta, beta, max_freq)
250 current_run_fst = (max_run_fst + current_run_fst)/2
251
252 - def run_cplot(self, ci= 0.95, data_dir='.', version = 1, smooth=0.04):
253 """Executes cplot.
254
255 ci - Confidence interval.
256 data_dir - Where the data is found.
257 """
258 in_name = self._get_temp_file()
259 out_name = self._get_temp_file()
260 f = open(data_dir + os.sep + in_name, 'w')
261 if version == 1:
262 f.write('out.dat out.cpl\n' + str(ci) + '\n')
263 else:
264 f.write("\n".join([
265 "data_fst_outfile out.cpl out.dat",
266 str(ci), str(smooth)]))
267 f.close()
268 curr_dir = os.getcwd()
269 self._generate_intfile(data_dir)
270 if version == 1:
271 cplot_name = "cplot"
272 else:
273 cplot_name = "cplot2"
274 os.system('cd ' + data_dir + ' && ' +
275 self._get_path(cplot_name) + ' < ' + in_name + ' > ' + out_name)
276 os.remove(data_dir + os.sep + in_name)
277 os.remove(data_dir + os.sep + out_name)
278 f = open(data_dir + os.sep + 'out.cpl')
279 conf_lines = []
280 l = f.readline()
281 try:
282 while l!='':
283 conf_lines.append(
284 tuple(map(lambda x : my_float(x), l.rstrip().split(' ')))
285 )
286 l = f.readline()
287 except ValueError:
288 f.close()
289 return []
290 f.close()
291 return conf_lines
292
293 - def run_pv(self, out_file='probs.dat', data_dir='.',
294 version = 1, smooth=0.04):
295 """Executes pv.
296
297 out_file - Name of output file.
298 data_dir - Where the data is found.
299 """
300 in_name = self._get_temp_file()
301 out_name = self._get_temp_file()
302 f = open(data_dir + os.sep + in_name, 'w')
303 f.write('data_fst_outfile ' + out_file + ' out.dat\n')
304 f.write(str(smooth) + '\n')
305 f.close()
306 self._generate_intfile(data_dir)
307 if version == 1:
308 pv_name = "pv"
309 else:
310 pv_name = "pv2"
311 os.system('cd ' + data_dir + ' && ' +
312 self._get_path(pv_name) + ' < ' + in_name + ' > ' + out_name)
313 pvf = open(data_dir + os.sep + out_file, 'r')
314 result = map(lambda x: tuple(map(lambda y: my_float(y), x.rstrip().split(' '))),
315 pvf.readlines())
316 pvf.close()
317 os.remove(data_dir + os.sep + in_name)
318 os.remove(data_dir + os.sep + out_name)
319 return result
320