1
2
3
4
5
6
7 """
8 This module provides code to work with the KEGG Enzyme database.
9
10 Functions:
11 parse - Returns an iterator giving Record objects.
12
13 Classes:
14 Record -- Holds the information from a KEGG Enzyme record.
15 """
16
17 from Bio.KEGG import _write_kegg
18 from Bio.KEGG import _wrap_kegg
19
20
21
22 rxn_wrap = [0, "",
23 (" + ","",1,1),
24 (" = ","",1,1),
25 (" ","$",1,1),
26 ("-","$",1,1)]
27 name_wrap = [0, "",
28 (" ","$",1,1),
29 ("-","$",1,1)]
30 id_wrap = lambda indent : [indent, "",
31 (" ","",1,0)]
32 struct_wrap = lambda indent : [indent, "",
33 (" ","",1,1)]
34
36 """Holds info from a KEGG Enzyme record.
37
38 Members:
39 entry The EC number (withou the 'EC ').
40 name A list of the enzyme names.
41 classname A list of the classification terms.
42 sysname The systematic name of the enzyme.
43 reaction A list of the reaction description strings.
44 substrate A list of the substrates.
45 product A list of the products.
46 inhibitor A list of the inhibitors.
47 cofactor A list of the cofactors.
48 effector A list of the effectors.
49 comment A list of the comment strings.
50 pathway A list of 3-tuples: (database, id, pathway)
51 genes A list of 2-tuples: (organism, list of gene ids)
52 disease A list of 3-tuples: (database, id, disease)
53 structures A list of 2-tuples: (database, list of struct ids)
54 dblinks A list of 2-tuples: (database, list of db ids)
55 """
57 """__init___(self)
58
59 Create a new Record.
60 """
61 self.entry = ""
62 self.name = []
63 self.classname = []
64 self.sysname = []
65 self.reaction = []
66 self.substrate = []
67 self.product = []
68 self.inhibitor = []
69 self.cofactor = []
70 self.effector = []
71 self.comment = []
72 self.pathway = []
73 self.genes = []
74 self.disease = []
75 self.structures = []
76 self.dblinks = []
100 return _write_kegg("ENTRY",
101 ["EC " + self.entry])
142 s = []
143 for entry in self.pathway:
144 s.append(entry[0] + ": " + entry[1] + " " + entry[2])
145 return _write_kegg("PATHWAY",
146 [_wrap_kegg(l, wrap_rule = id_wrap(16)) \
147 for l in s])
149 s = []
150 for entry in self.genes:
151 s.append(entry[0] + ": " + " ".join(entry[1]))
152 return _write_kegg("GENES",
153 [_wrap_kegg(l, wrap_rule = id_wrap(5)) \
154 for l in s])
163 s = []
164 for entry in self.structures:
165 s.append(entry[0] + ": " + " ".join(entry[1]) + " ")
166 return _write_kegg("STRUCTURES",
167 [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \
168 for l in s])
170
171
172
173
174 s = []
175 for entry in self.dblinks:
176 s.append(entry[0] + ": " + " ".join(entry[1]))
177 return _write_kegg("DBLINKS", s)
178
179
180
182 """Parse a KEGG Enzyme file, returning Record objects.
183
184 This is an iterator function, typically used in a for loop. For
185 example, using one of the example KEGG files in the Biopython
186 test suite,
187
188 >>> handle = open("KEGG/enzyme.sample")
189 >>> for record in parse(handle):
190 ... print record.entry, record.name[0]
191 ...
192 1.1.1.1 Alcohol dehydrogenase
193 1.1.1.62 Estradiol 17beta-dehydrogenase
194 1.1.1.68 Transferred to EC 1.7.99.5
195 1.6.5.3 NADH dehydrogenase (ubiquinone)
196 1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase
197 2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase
198 3.1.1.6 Acetylesterase
199 2.7.2.1 Acetate kinase
200 """
201 record = Record()
202 for line in handle:
203 if line[:3]=="///":
204 yield record
205 record = Record()
206 continue
207 if line[:12]!=" ":
208 keyword = line[:12]
209 data = line[12:].strip()
210 if keyword=="ENTRY ":
211 words = data.split()
212 record.entry = words[1]
213 elif keyword=="CLASS ":
214 record.classname.append(data)
215 elif keyword=="COFACTOR ":
216 record.cofactor.append(data)
217 elif keyword=="COMMENT ":
218 record.comment.append(data)
219 elif keyword=="DBLINKS ":
220 if ":" in data:
221 key, values = data.split(":")
222 values = values.split()
223 row = (key, values)
224 record.dblinks.append(row)
225 else:
226 row = record.dblinks[-1]
227 key, values = row
228 values.extend(data.split())
229 row = key, values
230 record.dblinks[-1] = row
231 elif keyword=="DISEASE ":
232 if ":" in data:
233 database, data = data.split(":")
234 number, name = data.split(None, 1)
235 row = (database, number, name)
236 record.disease.append(row)
237 else:
238 row = record.disease[-1]
239 database, number, name = row
240 name = name + " " + data
241 row = database, number, name
242 record.disease[-1] = row
243 elif keyword=="EFFECTOR ":
244 record.effector.append(data.strip(";"))
245 elif keyword=="GENES ":
246 if data[3:5]==': ':
247 key, values = data.split(":",1)
248 values = [value.split("(")[0] for value in values.split()]
249 row = (key, values)
250 record.genes.append(row)
251 else:
252 row = record.genes[-1]
253 key, values = row
254 for value in data.split():
255 value = value.split("(")[0]
256 values.append(value)
257 row = key, values
258 record.genes[-1] = row
259 elif keyword=="INHIBITOR ":
260 record.inhibitor.append(data.strip(";"))
261 elif keyword=="NAME ":
262 record.name.append(data.strip(";"))
263 elif keyword=="PATHWAY ":
264 if data[:5]=='PATH:':
265 path, map, name = data.split(None,2)
266 pathway = (path[:-1], map, name)
267 record.pathway.append(pathway)
268 else:
269 pathway = record.pathway[-1]
270 path, map, name = pathway
271 name = name + " " + data
272 pathway = path, map, name
273 record.pathway[-1] = pathway
274 elif keyword=="PRODUCT ":
275 record.product.append(data.strip(";"))
276 elif keyword=="REACTION ":
277 record.reaction.append(data.strip(";"))
278 elif keyword=="STRUCTURES ":
279 if data[:4]=='PDB:':
280 database = data[:3]
281 accessions = data[4:].split()
282 row = (database, accessions)
283 record.structures.append(row)
284 else:
285 row = record.structures[-1]
286 database, accessions = row
287 accessions.extend(data.split())
288 row = (database, accessions)
289 record.structures[-1] = row
290 elif keyword=="SUBSTRATE ":
291 record.substrate.append(data.strip(";"))
292 elif keyword=="SYSNAME ":
293 record.sysname.append(data.strip(";"))
294
296 """Run the Bio.KEGG.Enzyme module's doctests.
297
298 This will try and locate the unit tests directory, and run the doctests
299 from there in order that the relative paths used in the examples work.
300 """
301 import doctest
302 import os
303 if os.path.isdir(os.path.join("..","..","..","Tests")):
304 print "Runing doctests..."
305 cur_dir = os.path.abspath(os.curdir)
306 os.chdir(os.path.join("..","..","..","Tests"))
307 doctest.testmod()
308 os.chdir(cur_dir)
309 del cur_dir
310 print "Done"
311
312 if __name__ == "__main__":
313 _test()
314