1
2
3
4
5
6 """
7 This module provides code to work with GenePop.
8
9 See http://wbiomed.curtin.edu.au/genepop/ , the format is documented
10 here: http://wbiomed.curtin.edu.au/genepop/help_input.html .
11
12 Classes:
13 Record Holds GenePop data.
14
15 Functions:
16 read Parses a GenePop record (file) into a Record object.
17
18
19 Partially inspired on MedLine Code.
20
21 """
22 from copy import deepcopy
23
24
26 def int_no_zero(val):
27 v = int(val)
28 if v == 0:
29 return None
30 return v
31 indiv_name, marker_line = line.split(',')
32 markers = marker_line.replace('\t', ' ').split(' ')
33 markers = [marker for marker in markers if marker!='']
34 if len(markers[0]) in [2, 4]:
35 marker_len = 2
36 else:
37 marker_len = 3
38 try:
39 allele_list = [(int_no_zero(marker[0:marker_len]),
40 int_no_zero(marker[marker_len:]))
41 for marker in markers]
42 except ValueError:
43 allele_list = [(int_no_zero(marker[0:marker_len]),)
44 for marker in markers]
45 return indiv_name, allele_list, marker_len
46
48 """Parses a handle containing a GenePop file.
49
50 handle is a file-like object that contains a GenePop record.
51 """
52 record = Record()
53 record.comment_line = str(handle.next()).rstrip()
54
55
56
57 sample_loci_line = str(handle.next()).rstrip().replace(',', '')
58 all_loci = sample_loci_line.split(' ')
59 record.loci_list.extend(all_loci)
60 for line in handle:
61 line = line.rstrip()
62 if line.upper()=='POP':
63 break
64 record.loci_list.append(line)
65 else:
66 raise ValueError('No population data found, file probably not GenePop related')
67 record.populations.append([])
68 for line in handle:
69 line = line.rstrip()
70 if line.upper()=='POP':
71 record.populations.append([])
72 else:
73 indiv_name, allele_list, record.marker_len = get_indiv(line)
74 record.populations[-1].append((indiv_name, allele_list))
75 loci = record.loci_list
76 for pop in record.populations:
77 record.pop_list.append(pop[-1][0])
78 for indiv in pop:
79 for mk_i in range(len(loci)):
80 mk_orig = indiv[1][mk_i]
81 mk_real = []
82 for al in mk_orig:
83 if al == 0:
84 mk_real.append(None)
85 else:
86 mk_real.append(al)
87 indiv[1][mk_i] = tuple(mk_real)
88 return record
89
90
92 """Holds information from a GenePop record.
93
94 Members:
95 marker_len The marker length (2 or 3 digit code per allele).
96
97 comment_line Comment line.
98
99 loci_list List of loci names.
100
101 pop_list List of population names.
102
103 populations List of population data.
104
105 In most genepop files, the population name is not trustable.
106 It is strongly recommended that populations are referred by index.
107
108 populations has one element per population. Each element is itself
109 a list of individuals, each individual is a pair composed by individual
110 name and a list of alleles (2 per marker or 1 for haploids): Example
111 [
112 [
113 ('Ind1', [(1,2), (3,3), (200,201)],
114 ('Ind2', [(2,None), (3,3), (None,None)],
115 ],
116 [
117 ('Other1', [(1,1), (4,3), (200,200)],
118 ]
119 ]
120
121
122 """
124 self.marker_len = 0
125 self.comment_line = ""
126 self.loci_list = []
127 self.pop_list = []
128 self.populations = []
129
131 """Returns (reconstructs) a GenePop textual representation.
132 """
133 rep = [self.comment_line + '\n']
134 rep.append('\n'.join(self.loci_list) + '\n')
135 for pop in self.populations:
136 rep.append('Pop\n')
137 for indiv in pop:
138 name, markers = indiv
139 rep.append(name)
140 rep.append(',')
141 for marker in markers:
142 rep.append(' ')
143 for al in marker:
144 if al == None:
145 al = '0'
146 aStr = str(al)
147 while len(aStr)<self.marker_len:
148 aStr = "".join(['0', aStr])
149 rep.append(aStr)
150 rep.append('\n')
151 return "".join(rep)
152
154 """Splits a GP record in a dictionary with 1 pop per entry.
155
156 Given a record with n pops and m loci returns a dictionary
157 of records (key pop_name) where each item is a record
158 with a single pop and m loci.
159
160 Parameters:
161 pop_names - Population names
162 """
163 gp_pops = {}
164 for i in range(len(self.populations)):
165 gp_pop = Record()
166 gp_pop.marker_len = self.marker_len
167 gp_pop.comment_line = self.comment_line
168 gp_pop.loci_list = deepcopy(self.loci_list)
169 gp_pop.populations = [deepcopy(self.populations[i])]
170 gp_pops[pop_names[i]] = gp_pop
171 return gp_pops
172
174 """Splits a GP record in a dictionary with 1 locus per entry.
175
176 Given a record with n pops and m loci returns a dictionary
177 of records (key locus name) where each item is a record
178 with a single locus and n pops.
179 """
180 gp_loci = {}
181 for i in range(len(self.loci_list)):
182 gp_pop = Record()
183 gp_pop.marker_len = self.marker_len
184 gp_pop.comment_line = self.comment_line
185 gp_pop.loci_list = [self.loci_list[i]]
186 gp_pop.populations = []
187 for pop in self.populations:
188 my_pop = []
189 for indiv in pop:
190 my_pop.append((indiv[0], [indiv[1][i]]))
191 gp_pop.populations.append(my_pop)
192 gp_loci[gp_pop.loci_list[0]] = gp_pop
193 return gp_loci
194
195
197 """Removes a population (by position).
198 """
199 del self.populations[pos]
200
202 """Removes a locus by position.
203 """
204 del self.loci_list[pos]
205 for pop in self.populations:
206 for indiv in pop:
207 name, loci = indiv
208 del loci[pos]
209
217
218
219