Package Bio :: Package PopGen :: Package GenePop :: Module Controller
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.GenePop.Controller

  1  # Copyright 2009 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7   
  8  """ 
  9  This module allows to control GenePop. 
 10   
 11  """ 
 12   
 13  import os 
 14  import re 
 15  import shutil 
 16  import subprocess 
 17  import sys 
 18  import tempfile 
 19   
 20  from Bio.Application import AbstractCommandline, _Argument, _Option 
 21   
22 -def _gp_float(tok):
23 """Gets a float from a token, if it fails, returns the string. 24 """ 25 try: 26 return float(tok) 27 except ValueError: 28 return str(tok)
29
30 -def _gp_int(tok):
31 """Gets a int from a token, if it fails, returns the string. 32 """ 33 try: 34 return int(tok) 35 except ValueError: 36 return str(tok)
37 38
39 -def _read_allele_freq_table(f):
40 l = f.readline() 41 while l.find(" --")==-1: 42 if l == "": 43 raise StopIteration 44 l = f.readline() 45 alleles = filter(lambda x: x != '', f.readline().rstrip().split(" ")) 46 alleles = map(lambda x: _gp_int(x), alleles) 47 l = f.readline().rstrip() 48 table = [] 49 while l != "": 50 line = filter(lambda x: x != '', l.split(" ")) 51 try: 52 table.append( 53 (line[0], 54 map(lambda x: _gp_float(x), line[1:-1]), 55 _gp_int(line[-1]))) 56 except ValueError: 57 table.append( 58 (line[0], 59 [None] * len(alleles), 60 0)) 61 l = f.readline().rstrip() 62 return alleles, table
63
64 -def _read_table(f, funs):
65 table = [] 66 l = f.readline().rstrip() 67 while l.find("---")==-1: 68 l = f.readline().rstrip() 69 l = f.readline().rstrip() 70 while l.find("===")==-1 and l.find("---")==-1 and l != "": 71 toks = filter(lambda x: x != "", l.split(" ")) 72 line = [] 73 for i in range(len(toks)): 74 try: 75 line.append(funs[i](toks[i])) 76 except ValueError: 77 line.append(toks[i]) # Could not cast 78 table.append(tuple(line)) 79 l = f.readline().rstrip() 80 return table
81
82 -def _read_triangle_matrix(f):
83 matrix = [] 84 l = f.readline().rstrip() 85 while l != "": 86 matrix.append( 87 map(lambda x: _gp_float(x), 88 filter(lambda y: y != "", l.split(" ")))) 89 l = f.readline().rstrip() 90 return matrix
91
92 -def _read_headed_triangle_matrix(f):
93 matrix = {} 94 header = f.readline().rstrip() 95 if header.find("---")>-1 or header.find("===")>-1: 96 header = f.readline().rstrip() 97 nlines = len(filter(lambda x:x != '', header.split(' '))) - 1 98 for line_pop in range(nlines): 99 l = f.readline().rstrip() 100 vals = filter(lambda x:x != '', l.split(' ')[1:]) 101 clean_vals = [] 102 for val in vals: 103 try: 104 clean_vals.append(_gp_float(val)) 105 except ValueError: 106 clean_vals.append(None) 107 for col_pop in range(len(clean_vals)): 108 matrix[(line_pop+1, col_pop)] = clean_vals[col_pop] 109 return matrix
110
111 -def _hw_func(stream, is_locus, has_fisher = False):
112 l = stream.readline() 113 if is_locus: 114 hook = "Locus " 115 else: 116 hook = " Pop : " 117 while l != "": 118 if l.startswith(hook): 119 stream.readline() 120 stream.readline() 121 stream.readline() 122 table = _read_table(stream,[str,_gp_float,_gp_float,_gp_float,_gp_float,_gp_int,str]) 123 #loci might mean pop if hook="Locus " 124 loci = {} 125 for entry in table: 126 if len(entry) < 3: 127 loci[entry[0]] = None 128 else: 129 locus, p, se, fis_wc, fis_rh, steps = entry[:-1] 130 if se == "-": se = None 131 loci[locus] = p, se, fis_wc, fis_rh, steps 132 return loci 133 l = stream.readline() 134 #self.done = True 135 raise StopIteration 136
137 -class _FileIterator:
138 """Iterator which crawls over a stream of lines with a function. 139 140 The generator function is expected to yield a tuple, while 141 consuming input 142 """
143 - def __init__(self, func, stream, fname):
144 self.func = func 145 self.stream = stream 146 self.fname = fname 147 self.done = False
148
149 - def __iter__(self):
150 if self.done: 151 self.done = True 152 raise StopIteration 153 return self
154
155 - def next(self):
156 return self.func(self)
157
158 - def __del__(self):
159 self.stream.close() 160 try: 161 os.remove(self.fname) 162 except OSError: 163 #Jython seems to call the iterator twice 164 pass
165
166 -class _GenePopCommandline(AbstractCommandline):
167 """ Command Line Wrapper for GenePop. 168 """
169 - def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
170 self.parameters = [ 171 _Argument(["command"], 172 ["INTEGER(.INTEGER)*"], 173 None, 174 True, 175 "GenePop option to be called"), 176 _Argument(["mode"], 177 ["Dont touch this"], 178 None, 179 True, 180 "Should allways be batch"), 181 _Argument(["input"], 182 ["input"], 183 None, 184 True, 185 "Input file"), 186 _Argument(["Dememorization"], 187 ["input"], 188 None, 189 False, 190 "Dememorization step"), 191 _Argument(["BatchNumber"], 192 ["input"], 193 None, 194 False, 195 "Number of MCMC batches"), 196 _Argument(["BatchLength"], 197 ["input"], 198 None, 199 False, 200 "Length of MCMC chains"), 201 _Argument(["HWtests"], 202 ["input"], 203 None, 204 False, 205 "Enumeration or MCMC"), 206 _Argument(["IsolBDstatistic"], 207 ["input"], 208 None, 209 False, 210 "IBD statistic (a or e)"), 211 _Argument(["MinimalDistance"], 212 ["input"], 213 None, 214 False, 215 "Minimal IBD distance"), 216 _Argument(["GeographicScale"], 217 ["input"], 218 None, 219 False, 220 "Log or Linear"), 221 ] 222 AbstractCommandline.__init__(self, cmd, **kwargs) 223 self.set_parameter("mode", "Mode=Batch")
224
225 - def set_menu(self, option_list):
226 """Sets the menu option. 227 228 Example set_menu([6,1]) = get all F statistics (menu 6.1) 229 """ 230 self.set_parameter("command", "MenuOptions="+ 231 ".".join(map(lambda x:str(x),option_list)))
232
233 - def set_input(self, fname):
234 """Sets the input file name. 235 """ 236 self.set_parameter("input", "InputFile="+fname)
237
238 -class GenePopController:
239 - def __init__(self, genepop_dir = None):
240 """Initializes the controller. 241 242 genepop_dir is the directory where GenePop is. 243 244 The binary should be called Genepop (capital G) 245 246 """ 247 self.controller = _GenePopCommandline(genepop_dir)
248
249 - def _remove_garbage(self, fname_out):
250 try: 251 if fname_out != None: os.remove(fname_out) 252 except OSError: 253 pass # safe 254 try: 255 os.remove("genepop.txt") 256 except OSError: 257 pass # safe 258 try: 259 os.remove("fichier.in") 260 except OSError: 261 pass # safe 262 try: 263 os.remove("cmdline.txt") 264 except OSError: 265 pass # safe
266
267 - def _get_opts(self, dememorization, batches, iterations, enum_test=None):
268 opts = {} 269 opts["Dememorization"]=dememorization 270 opts["BatchNumber"]=batches 271 opts["BatchLength"]=iterations 272 if enum_test != None: 273 if enum_test == True: 274 opts["HWtests"]="Enumeration" 275 else: 276 opts["HWtests"]="MCMC" 277 return opts
278
279 - def _run_genepop(self, extensions, option, fname, opts={}):
280 for extension in extensions: 281 self._remove_garbage(fname + extension) 282 self.controller.set_menu(option) 283 self.controller.set_input(fname) 284 for opt in opts: 285 self.controller.set_parameter(opt, opt+"="+str(opts[opt])) 286 self.controller() #checks error level is zero 287 self._remove_garbage(None) 288 return
289 290
291 - def _test_pop_hz_both(self, fname, type, ext, enum_test = True, 292 dememorization = 10000, batches = 20, iterations = 5000):
293 """Hardy-Weinberg test for heterozygote deficiency/excess. 294 295 Returns a population iterator containg 296 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 297 Some loci have a None if the info is not available 298 SE might be none (for enumerations) 299 """ 300 opts = self._get_opts(dememorization, batches, iterations, enum_test) 301 self._run_genepop([ext], [1, type], fname, opts) 302 f = open(fname + ext) 303 def hw_func(self): 304 return _hw_func(self.stream, False)
305 return _FileIterator(hw_func, f, fname + ext)
306
307 - def _test_global_hz_both(self, fname, type, ext, enum_test = True, 308 dememorization = 10000, batches = 20, iterations = 5000):
309 """Global Hardy-Weinberg test for heterozygote deficiency/excess. 310 311 Returns a triple with: 312 A list per population containg 313 (pop_name, P-val, SE, switches) 314 Some pops have a None if the info is not available 315 SE might be none (for enumerations) 316 A list per loci containg 317 (locus_name, P-val, SE, switches) 318 Some loci have a None if the info is not available 319 SE might be none (for enumerations) 320 Overall results (P-val, SE, switches) 321 322 """ 323 opts = self._get_opts(dememorization, batches, iterations, enum_test) 324 self._run_genepop([ext], [1, type], fname, opts) 325 def hw_pop_func(self): 326 return _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float])
327 f1 = open(fname + ext) 328 l = f1.readline() 329 while l.find("by population") == -1: 330 l = f1.readline() 331 pop_p = _read_table(f1, [str, _gp_float, _gp_float, _gp_float]) 332 f2 = open(fname + ext) 333 l = f2.readline() 334 while l.find("by locus") == -1: 335 l = f2.readline() 336 loc_p = _read_table(f2, [str, _gp_float, _gp_float, _gp_float]) 337 f = open(fname + ext) 338 l = f.readline() 339 while l.find("all locus") == -1: 340 l = f.readline() 341 f.readline() 342 f.readline() 343 f.readline() 344 f.readline() 345 l = f.readline().rstrip() 346 p, se, switches = tuple(map(lambda x: _gp_float(x), 347 filter(lambda y: y != "",l.split(" ")))) 348 f.close() 349 return pop_p, loc_p, (p, se, switches) 350 351 #1.1
352 - def test_pop_hz_deficiency(self, fname, enum_test = True, 353 dememorization = 10000, batches = 20, iterations = 5000):
354 """Hardy-Weinberg test for heterozygote deficiency. 355 356 Returns a population iterator containg 357 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 358 Some loci have a None if the info is not available 359 SE might be none (for enumerations) 360 """ 361 return self._test_pop_hz_both(fname, 1, ".D", enum_test, 362 dememorization, batches, iterations)
363 364 #1.2
365 - def test_pop_hz_excess(self, fname, enum_test = True, 366 dememorization = 10000, batches = 20, iterations = 5000):
367 """Hardy-Weinberg test for heterozygote deficiency. 368 369 Returns a population iterator containg 370 A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 371 Some loci have a None if the info is not available 372 SE might be none (for enumerations) 373 """ 374 return self._test_pop_hz_both(fname, 2, ".E", enum_test, 375 dememorization, batches, iterations)
376 377 #1.3 P file
378 - def test_pop_hz_prob(self, fname, ext, enum_test = False, 379 dememorization = 10000, batches = 20, iterations = 5000):
380 """Hardy-Weinberg test based on probability. 381 382 Returns 2 iterators and a final tuple: 383 384 1. Returns a loci iterator containing 385 b. A dictionary[pop_pos]=(P-val, SE, Fis-WC, Fis-RH, steps) 386 Some pops have a None if the info is not available 387 SE might be none (for enumerations) 388 c. Result of Fisher's test (Chi2, deg freedom, prob) 389 2. Returns a population iterator containg 390 a. A dictionary[locus]=(P-val, SE, Fis-WC, Fis-RH, steps) 391 Some loci have a None if the info is not available 392 SE might be none (for enumerations) 393 b. Result of Fisher's test (Chi2, deg freedom, prob) 394 3. (Chi2, deg freedom, prob) 395 """ 396 opts = self._get_opts(dememorization, batches, iterations, enum_test) 397 self._run_genepop([ext], [1, 3], fname, opts) 398 def hw_prob_loci_func(self): 399 return _hw_func(self.stream, True, True)
400 def hw_prob_pop_func(self): 401 return _hw_func(self.stream, False, True) 402 shutil.copyfile(fname+".P", fname+".P2") 403 f1 = open(fname + ".P") 404 f2 = open(fname + ".P2") 405 return _FileIterator(hw_prob_loci_func, f1, fname + ".P"), _FileIterator(hw_prob_pop_func, f2, fname + ".P2") 406 407 #1.4
408 - def test_global_hz_deficiency(self, fname, enum_test = True, 409 dememorization = 10000, batches = 20, iterations = 5000):
410 """Global Hardy-Weinberg test for heterozygote deficiency. 411 412 Returns a triple with: 413 An list per population containg 414 (pop_name, P-val, SE, switches) 415 Some pops have a None if the info is not available 416 SE might be none (for enumerations) 417 An list per loci containg 418 (locus_name, P-val, SE, switches) 419 Some loci have a None if the info is not available 420 SE might be none (for enumerations) 421 Overall results (P-val, SE, switches) 422 """ 423 return self._test_global_hz_both(fname, 4, ".DG", enum_test, 424 dememorization, batches, iterations)
425 426 427 #1.5
428 - def test_global_hz_excess(self, fname, enum_test = True, 429 dememorization = 10000, batches = 20, iterations = 5000):
430 """Global Hardy-Weinberg test for heterozygote excess. 431 432 Returns a triple with: 433 An list per population containg 434 (pop_name, P-val, SE, switches) 435 Some pops have a None if the info is not available 436 SE might be none (for enumerations) 437 An list per loci containg 438 (locus_name, P-val, SE, switches) 439 Some loci have a None if the info is not available 440 SE might be none (for enumerations) 441 Overall results (P-val, SE, switches) 442 """ 443 return self._test_global_hz_both(fname, 5, ".EG", enum_test, 444 dememorization, batches, iterations)
445 446 #2.1
447 - def test_ld(self, fname, 448 dememorization = 10000, batches = 20, iterations = 5000):
449 opts = self._get_opts(dememorization, batches, iterations) 450 self._run_genepop([".DIS"], [2, 1], fname, opts) 451 def ld_pop_func(self): 452 current_pop = None 453 l = self.stream.readline().rstrip() 454 if l == "": 455 self.done = True 456 raise StopIteration 457 toks = filter(lambda x: x != "", l.split(" ")) 458 pop, locus1, locus2 = toks[0], toks[1], toks[2] 459 if not hasattr(self, "start_locus1"): 460 start_locus1, start_locus2 = locus1, locus2 461 current_pop = -1 462 if locus1 == start_locus1 and locus2 == start_locus2: 463 current_pop += 1 464 if toks[3] == "No": 465 return current_pop, pop, (locus1, locus2), None 466 p, se, switches = _gp_float(toks[3]), _gp_float(toks[4]), _gp_int(toks[5]) 467 return current_pop, pop, (locus1, locus2), (p, se, switches)
468 def ld_func(self): 469 l = self.stream.readline().rstrip() 470 if l == "": 471 self.done = True 472 raise StopIteration 473 toks = filter(lambda x: x != "", l.split(" ")) 474 locus1, locus2 = toks[0], toks[2] 475 try: 476 chi2, df, p = _gp_float(toks[3]), _gp_int(toks[4]), _gp_float(toks[5]) 477 except ValueError: 478 return (locus1, locus2), None 479 return (locus1, locus2), (chi2, df, p) 480 f1 = open(fname + ".DIS") 481 l = f1.readline() 482 while l.find("----")==-1: 483 l = f1.readline() 484 shutil.copyfile(fname + ".DIS", fname + ".DI2") 485 f2 = open(fname + ".DI2") 486 l = f2.readline() 487 while l.find("Locus pair")==-1: 488 l = f2.readline() 489 while l.find("----")==-1: 490 l = f2.readline() 491 return _FileIterator(ld_pop_func, f1, fname+".DIS"), _FileIterator(ld_func, f2, fname + ".DI2") 492 493 #2.2
494 - def create_contingency_tables(self, fname):
495 raise NotImplementedError
496 497 #3.1 PR/GE files
498 - def test_genic_diff_all(self, fname, 499 dememorization = 10000, batches = 20, iterations = 5000):
500 raise NotImplementedError
501 502 #3.2 PR2/GE2 files
503 - def test_genic_diff_pair(self, fname, 504 dememorization = 10000, batches = 20, iterations = 5000):
505 raise NotImplementedError
506 507 #3.3 G files
508 - def test_genotypic_diff_all(self, fname, 509 dememorization = 10000, batches = 20, iterations = 5000):
510 raise NotImplementedError
511 512 #3.4 2G2 files
513 - def test_genotypic_diff_pair(self, fname, 514 dememorization = 10000, batches = 20, iterations = 5000):
515 raise NotImplementedError
516 517 #4
518 - def estimate_nm(self, fname):
519 self._run_genepop(["PRI"], [4], fname) 520 f = open(fname + ".PRI") 521 lines = f.readlines() # Small file, it is ok 522 f.close() 523 for line in lines: 524 m = re.search("Mean sample size: ([.0-9]+)", line) 525 if m != None: 526 mean_sample_size = _gp_float(m.group(1)) 527 m = re.search("Mean frequency of private alleles p\(1\)= ([.0-9]+)", line) 528 if m != None: 529 mean_priv_alleles = _gp_float(m.group(1)) 530 m = re.search("N=10: ([.0-9]+)", line) 531 if m != None: 532 mig10 = _gp_float(m.group(1)) 533 m = re.search("N=25: ([.0-9]+)", line) 534 if m != None: 535 mig25 = _gp_float(m.group(1)) 536 m = re.search("N=50: ([.0-9]+)", line) 537 if m != None: 538 mig50 = _gp_float(m.group(1)) 539 m = re.search("for size= ([.0-9]+)", line) 540 if m != None: 541 mig_corrected = _gp_float(m.group(1)) 542 os.remove(fname + ".PRI") 543 return mean_sample_size, mean_priv_alleles, mig10, mig25, mig50, mig_corrected
544 545 #5.1
546 - def calc_allele_genotype_freqs(self, fname):
547 """Calculates allele and genotype frequencies per locus and per sample. 548 549 Parameters: 550 fname - file name 551 552 Returns tuple with 2 elements: 553 Population iterator with 554 population name 555 Locus dictionary with key = locus name and content tuple as 556 Genotype List with 557 (Allele1, Allele2, observed, expected) 558 (expected homozygotes, observed hm, 559 expected heterozygotes, observed ht) 560 Allele frequency/Fis dictionary with allele as key and 561 (count, frequency, Fis Weir & Cockerham) 562 Totals as a pair 563 count 564 Fis Weir & Cockerham, 565 Fis Robertson & Hill 566 Locus iterator with 567 Locus name 568 allele list 569 Population list with a triple 570 population name 571 list of allele frequencies in the same order as allele list above 572 number of genes 573 574 575 Will create a file called fname.INF 576 """ 577 self._run_genepop(["INF"], [5,1], fname) 578 #First pass, general information 579 #num_loci = None 580 #num_pops = None 581 #f = open(fname + ".INF") 582 #l = f.readline() 583 #while (num_loci == None or num_pops == None) and l != '': 584 # m = re.search("Number of populations detected : ([0-9+])", l) 585 # if m != None: 586 # num_pops = _gp_int(m.group(1)) 587 # m = re.search("Number of loci detected : ([0-9+])", l) 588 # if m != None: 589 # num_loci = _gp_int(m.group(1)) 590 # l = f.readline() 591 #f.close() 592 def pop_parser(self): 593 if hasattr(self, "old_line"): 594 l = self.old_line 595 del self.old_line 596 else: 597 l = self.stream.readline() 598 loci_content = {} 599 while l != '': 600 l = l.rstrip() 601 if l.find("Tables of allelic frequencies for each locus")>-1: 602 return self.curr_pop, loci_content 603 match = re.match(".*Pop: (.+) Locus: (.+)", l) 604 if match != None: 605 pop = match.group(1) 606 locus = match.group(2) 607 if not hasattr(self, "first_locus"): 608 self.first_locus = locus 609 if hasattr(self, "curr_pop"): 610 if self.first_locus == locus: 611 old_pop = self.curr_pop 612 #self.curr_pop = pop 613 self.old_line = l 614 del self.first_locus 615 del self.curr_pop 616 return old_pop, loci_content 617 self.curr_pop = pop 618 else: 619 l = self.stream.readline() 620 continue 621 geno_list = [] 622 l = self.stream.readline() 623 if l.find("No data")>-1: continue 624 625 while l.find("Genotypes Obs.")==-1: 626 l = self.stream.readline() 627 628 while l != "\n": 629 m2 = re.match(" +([0-9]+) , ([0-9]+) *([0-9]+) *(.+)",l) 630 if m2 != None: 631 geno_list.append((_gp_int(m2.group(1)), _gp_int(m2.group(2)), 632 _gp_int(m2.group(3)), _gp_float(m2.group(4)))) 633 else: 634 l = self.stream.readline() 635 continue 636 l = self.stream.readline() 637 638 while l.find("Expected number of ho")==-1: 639 l = self.stream.readline() 640 expHo = _gp_float(l[38:]) 641 l = self.stream.readline() 642 obsHo = _gp_int(l[38:]) 643 l = self.stream.readline() 644 expHe = _gp_float(l[38:]) 645 l = self.stream.readline() 646 obsHe = _gp_int(l[38:]) 647 l = self.stream.readline() 648 649 while l.find("Sample count")==-1: 650 l = self.stream.readline() 651 l = self.stream.readline() 652 freq_fis={} 653 overall_fis = None 654 while l.find("----")==-1: 655 vals = filter(lambda x: x!='', 656 l.rstrip().split(' ')) 657 if vals[0]=="Tot": 658 overall_fis = _gp_int(vals[1]), \ 659 _gp_float(vals[2]), _gp_float(vals[3]) 660 else: 661 freq_fis[_gp_int(vals[0])] = _gp_int(vals[1]), \ 662 _gp_float(vals[2]), _gp_float(vals[3]) 663 l = self.stream.readline() 664 loci_content[locus] = geno_list, \ 665 (expHo, obsHo, expHe, obsHe), \ 666 freq_fis, overall_fis 667 self.done = True 668 raise StopIteration
669 def locus_parser(self): 670 l = self.stream.readline() 671 while l != "": 672 l = l.rstrip() 673 match = re.match(" Locus: (.+)", l) 674 if match != None: 675 locus = match.group(1) 676 alleles, table = _read_allele_freq_table(self.stream) 677 return locus, alleles, table 678 l = self.stream.readline() 679 self.done = True 680 raise StopIteration 681 682 popf = open(fname + ".INF") 683 shutil.copyfile(fname + ".INF", fname + ".IN2") 684 locf = open(fname + ".IN2") 685 pop_iter = _FileIterator(pop_parser, popf, fname + ".INF") 686 locus_iter = _FileIterator(locus_parser, locf, fname + ".IN2") 687 return (pop_iter, locus_iter) 688
689 - def _calc_diversities_fis(self, fname, ext):
690 self._run_genepop([ext], [5,2], fname) 691 f = open(fname + ext) 692 l = f.readline() 693 while l != "": 694 l = l.rstrip() 695 if l.startswith("Statistics per sample over all loci with at least two individuals typed"): 696 avg_fis = _read_table(f, [str, _gp_float, _gp_float, _gp_float]) 697 avg_Qintra = _read_table(f, [str, _gp_float]) 698 l = f.readline() 699 f.close() 700 def fis_func(self): 701 l = self.stream.readline() 702 while l != "": 703 l = l.rstrip() 704 m = re.search("Locus: (.+)", l) 705 if m != None: 706 locus = m.group(1) 707 self.stream.readline() 708 if self.stream.readline().find("No complete")>-1: return locus, None 709 self.stream.readline() 710 fis_table = _read_table(self.stream, [str, _gp_float, _gp_float, _gp_float]) 711 self.stream.readline() 712 avg_qinter, avg_fis = tuple(map (lambda x: _gp_float(x), 713 filter(lambda y:y != "", self.stream.readline().split(" ")))) 714 return locus, fis_table, avg_qinter, avg_fis 715 l = self.stream.readline() 716 self.done = True 717 raise StopIteration
718 dvf = open(fname + ext) 719 return _FileIterator(fis_func, dvf, fname + ext), avg_fis, avg_Qintra 720 721 #5.2
722 - def calc_diversities_fis_with_identity(self, fname):
723 return self._calc_diversities_fis(fname, ".DIV")
724 725 #5.3
726 - def calc_diversities_fis_with_size(self, fname):
727 raise NotImplementedError
728 729 #6.1 Less genotype frequencies
730 - def calc_fst_all(self, fname):
731 """Executes GenePop and gets Fst/Fis/Fit (all populations) 732 733 Parameters: 734 fname - file name 735 736 Returns: 737 (multiLocusFis, multiLocusFst, multiLocus Fit), 738 Iterator of tuples 739 (Locus name, Fis, Fst, Fit, Qintra, Qinter) 740 741 Will create a file called fname.FST . 742 743 This does not return the genotype frequencies. 744 745 """ 746 self._run_genepop([".FST"], [6,1], fname) 747 f = open(fname + ".FST") 748 l = f.readline() 749 while l != '': 750 if l.startswith(' All:'): 751 toks=filter(lambda x:x!="", l.rstrip().split(' ')) 752 try: 753 allFis = _gp_float(toks[1]) 754 except ValueError: 755 allFis = None 756 try: 757 allFst = _gp_float(toks[2]) 758 except ValueError: 759 allFst = None 760 try: 761 allFit = _gp_float(toks[3]) 762 except ValueError: 763 allFit = None 764 l = f.readline() 765 f.close() 766 f = open(fname + ".FST") 767 def proc(self): 768 if hasattr(self, "last_line"): 769 l = self.last_line 770 del self.last_line 771 else: 772 l = self.stream.readline() 773 locus = None 774 fis = None 775 fst = None 776 fit = None 777 qintra = None 778 qinter = None 779 while l != '': 780 l = l.rstrip() 781 if l.startswith(' Locus:'): 782 if locus != None: 783 self.last_line = l 784 return locus, fis, fst, fit, qintra, qinter 785 else: 786 locus = l.split(':')[1].lstrip() 787 elif l.startswith('Fis^='): 788 fis = _gp_float(l.split(' ')[1]) 789 elif l.startswith('Fst^='): 790 fst = _gp_float(l.split(' ')[1]) 791 elif l.startswith('Fit^='): 792 fit = _gp_float(l.split(' ')[1]) 793 elif l.startswith('1-Qintra^='): 794 qintra = _gp_float(l.split(' ')[1]) 795 elif l.startswith('1-Qinter^='): 796 qinter = _gp_float(l.split(' ')[1]) 797 return locus, fis, fst, fit, qintra, qinter 798 l = self.stream.readline() 799 if locus != None: 800 return locus, fis, fst, fit, qintra, qinter 801 self.stream.close() 802 self.done = True 803 raise StopIteration
804 return (allFis, allFst, allFit), _FileIterator(proc , f, fname + ".FST") 805 806 #6.2
807 - def calc_fst_pair(self, fname):
808 self._run_genepop([".ST2", ".MIG"], [6,2], fname) 809 f = open(fname + ".ST2") 810 l = f.readline() 811 while l != "": 812 l = l.rstrip() 813 if l.startswith("Estimates for all loci"): 814 avg_fst = _read_headed_triangle_matrix(f) 815 l = f.readline() 816 f.close() 817 def loci_func(self): 818 l = self.stream.readline() 819 while l != "": 820 l = l.rstrip() 821 m = re.search(" Locus: (.+)", l) 822 if m != None: 823 locus = m.group(1) 824 matrix = _read_headed_triangle_matrix(self.stream) 825 return locus, matrix 826 l = self.stream.readline() 827 self.done = True 828 raise StopIteration
829 stf = open(fname + ".ST2") 830 os.remove(fname + ".MIG") 831 return _FileIterator(loci_func, stf, fname + ".ST2"), avg_fst 832 833 #6.3
834 - def calc_rho_all(self, fname):
835 raise NotImplementedError
836 837 #6.4
838 - def calc_rho_pair(self, fname):
839 raise NotImplementedError
840
841 - def _calc_ibd(self, fname, sub, stat="a", scale="Log", min_dist=0.00001):
842 """Calculates isolation by distance statistics 843 """ 844 self._run_genepop([".GRA", ".MIG", ".ISO"], [6,sub], 845 fname, opts = { 846 "MinimalDistance" : min_dist, 847 "GeographicScale" : scale, 848 "IsolBDstatistic" : stat, 849 }) 850 f = open(fname + ".ISO") 851 f.readline() 852 f.readline() 853 f.readline() 854 f.readline() 855 estimate = _read_triangle_matrix(f) 856 f.readline() 857 f.readline() 858 distance = _read_triangle_matrix(f) 859 f.readline() 860 match = re.match("a = (.+), b = (.+)", f.readline().rstrip()) 861 a = _gp_float(match.group(1)) 862 b = _gp_float(match.group(2)) 863 f.readline() 864 f.readline() 865 match = re.match(" b=(.+)", f.readline().rstrip()) 866 bb = _gp_float(match.group(1)) 867 match = re.match(".*\[(.+) ; (.+)\]", f.readline().rstrip()) 868 bblow = _gp_float(match.group(1)) 869 bbhigh = _gp_float(match.group(2)) 870 f.close() 871 os.remove(fname + ".MIG") 872 os.remove(fname + ".GRA") 873 os.remove(fname + ".ISO") 874 return estimate, distance, (a, b), (bb, bblow, bbhigh)
875 876 #6.5
877 - def calc_ibd_diplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
878 """Calculates isolation by distance statistics for diploid data. 879 880 See _calc_ibd for parameter details. 881 Note that each pop can only have a single individual and 882 the individual name has to be the sample coordinates. 883 """ 884 return self._calc_ibd(fname, 5, stat, scale, min_dist)
885 886 #6.6
887 - def calc_ibd_haplo(self, fname, stat="a", scale="Log", min_dist=0.00001):
888 """Calculates isolation by distance statistics for haploid data. 889 890 See _calc_ibd for parameter details. 891 Note that each pop can only have a single individual and 892 the individual name has to be the sample coordinates. 893 """ 894 return self._calc_ibd(fname, 6, stat, scale, min_dist)
895