Package Bio :: Package PopGen :: Package FDist :: Module Utils
[hide private]
[frames] | no frames]

Source Code for Module Bio.PopGen.FDist.Utils

  1  # Copyright 2007 by Tiago Antao <tiagoantao@gmail.com>.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6   
  7  import os 
  8  from Bio.PopGen import GenePop 
  9  from Bio.PopGen.GenePop import FileParser 
 10  import Bio.PopGen.FDist 
 11   
 12  # Quite a few utility functions could be done (like remove pop, 
 13  # add locus, etc...). The recommended strategy is convert back 
 14  # and forth from/to GenePop and use GenePop Utils 
 15   
16 -def convert_genepop_to_fdist(gp_rec, report_pops = None):
17 """Converts a GenePop record to a FDist one. 18 19 Parameters: 20 gp_rec - Genepop Record (either standard or big) 21 22 Returns: 23 FDist record. 24 """ 25 if hasattr(gp_rec, "populations"): 26 return _convert_genepop_to_fdist(gp_rec) 27 else: 28 return _convert_genepop_to_fdist_big(gp_rec, report_pops)
29
30 -def _convert_genepop_to_fdist(gp_rec):
31 """Converts a standard GenePop record to a FDist one. 32 33 Parameters: 34 gp_rec - Genepop Record (Standard) 35 36 Returns: 37 FDist record. 38 """ 39 fd_rec = Bio.PopGen.FDist.Record() 40 41 fd_rec.data_org = 0 42 fd_rec.num_loci = len(gp_rec.loci_list) 43 fd_rec.num_pops = len(gp_rec.populations) 44 for lc_i in range(len(gp_rec.loci_list)): 45 alleles = [] 46 pop_data = [] 47 for pop_i in range(len(gp_rec.populations)): 48 for indiv in gp_rec.populations[pop_i]: 49 for al in indiv[1][lc_i]: 50 if al is not None and al not in alleles: 51 alleles.append(al) 52 alleles.sort() #Dominance requires this 53 54 #here we go again (necessary...) 55 for pop_i in range(len(gp_rec.populations)): 56 allele_counts = {} 57 for indiv in gp_rec.populations[pop_i]: 58 for al in indiv[1][lc_i]: 59 if al is not None: 60 count = allele_counts.get(al, 0) 61 allele_counts[al] = count + 1 62 allele_array = [] #We need the same order as in alleles 63 for allele in alleles: 64 allele_array.append(allele_counts.get(allele, 0)) 65 pop_data.append(allele_array) 66 #if lc_i==3: 67 # print alleles, allele_counts#, pop_data 68 fd_rec.loci_data.append((len(alleles), pop_data)) 69 return fd_rec
70
71 -def _convert_genepop_to_fdist_big(gp_rec, report_pops = None):
72 """Converts a big GenePop record to a FDist one. 73 74 Parameters: 75 gp_rec - Genepop Record (Big) 76 77 Returns: 78 FDist record. 79 """ 80 fd_rec = Bio.PopGen.FDist.Record() 81 82 fd_rec.data_org = 1 83 fd_rec.num_loci = len(gp_rec.loci_list) 84 num_loci = len(gp_rec.loci_list) 85 loci = [] 86 for i in range(num_loci): 87 loci.append(set()) 88 pops = [] 89 work_rec = FileParser.read(gp_rec.fname) 90 lParser = work_rec.get_individual() 91 def init_pop(): 92 my_pop = [] 93 for i in range(num_loci): 94 my_pop.append({}) 95 return my_pop
96 curr_pop = init_pop() 97 num_pops = 1 98 if report_pops: 99 report_pops(num_pops) 100 while lParser: 101 if lParser != True: 102 for loci_pos in range(num_loci): 103 for al in lParser[1][loci_pos]: 104 if al is not None: 105 loci[loci_pos].add(al) 106 curr_pop[loci_pos][al]= curr_pop[loci_pos].get(al,0)+1 107 else: 108 pops.append(curr_pop) 109 num_pops += 1 110 if report_pops: 111 report_pops(num_pops) 112 curr_pop = init_pop() 113 lParser = work_rec.get_individual() 114 pops.append(curr_pop) 115 fd_rec.num_pops = num_pops 116 for loci_pos in range(num_loci): 117 alleles = list(loci[loci_pos]) 118 alleles.sort() 119 loci_rec = [len(alleles), []] 120 for pop in pops: 121 pop_rec = [] 122 for allele in alleles: 123 pop_rec.append(pop[loci_pos].get(allele, 0)) 124 loci_rec[1].append(pop_rec) 125 fd_rec.loci_data.append(tuple(loci_rec)) 126 return fd_rec 127 128
129 -def _convert_genepop_to_fdist_big_old(gp_rec, report_loci = None):
130 """Converts a big GenePop record to a FDist one. 131 132 Parameters: 133 gp_rec - Genepop Record (Big) 134 135 Returns: 136 FDist record. 137 """ 138 fd_rec = Bio.PopGen.FDist.Record() 139 140 def countPops(rec): 141 f2 = FileParser.read(rec.fname) 142 popCnt = 1 143 while f2.skip_population(): 144 popCnt += 1 145 return popCnt
146 147 148 fd_rec.data_org = 0 149 fd_rec.num_loci = len(gp_rec.loci_list) 150 work_rec0 = FileParser.read(gp_rec.fname) 151 fd_rec.num_pops = countPops(work_rec0) 152 153 num_loci = len(gp_rec.loci_list) 154 for lc_i in range(num_loci): 155 if report_loci: 156 report_loci(lc_i, num_loci) 157 work_rec = FileParser.read(gp_rec.fname) 158 work_rec2 = FileParser.read(gp_rec.fname) 159 160 alleles = [] 161 pop_data = [] 162 lParser = work_rec.get_individual() 163 while lParser: 164 if lParser != True: 165 for al in lParser[1][lc_i]: 166 if al is not None and al not in alleles: 167 alleles.append(al) 168 lParser = work_rec.get_individual() 169 #here we go again (necessary...) 170 alleles.sort() 171 def process_pop(pop_data, alleles, allele_counts): 172 allele_array = [] #We need the same order as in alleles 173 for allele in alleles: 174 allele_array.append(allele_counts.get(allele, 0)) 175 pop_data.append(allele_array) 176 lParser = work_rec2.get_individual() 177 allele_counts = {} 178 for allele in alleles: 179 allele_counts[allele] = 0 180 allele_counts[None]=0 181 while lParser: 182 if lParser == True: 183 process_pop(pop_data, alleles, allele_counts) 184 allele_counts = {} 185 for allele in alleles: 186 allele_counts[allele] = 0 187 allele_counts[None]=0 188 else: 189 for al in lParser[1][lc_i]: 190 allele_counts[al] += 1 191 lParser = work_rec2.get_individual() 192 process_pop(pop_data, alleles, allele_counts) 193 fd_rec.loci_data.append((len(alleles), pop_data)) 194 return fd_rec 195 196
197 -def approximate_fst(desired_fst, simulated_fst, parameter_fst, 198 max_run_fst = 1, min_run_fst = 0, limit = 0.005):
199 """Calculates the next Fst attempt in order to approximate a 200 desired Fst. 201 202 """ 203 if abs(simulated_fst - desired_fst) < limit: 204 return parameter_fst, max_run_fst, min_run_fst 205 if simulated_fst > desired_fst: 206 max_run_fst = parameter_fst 207 next_parameter_fst = (min_run_fst + parameter_fst)/2 208 else: 209 min_run_fst = parameter_fst 210 next_parameter_fst = (max_run_fst + parameter_fst)/2 211 return next_parameter_fst, max_run_fst, min_run_fst
212