Package Bio :: Package KEGG :: Package Enzyme
[hide private]
[frames] | no frames]

Source Code for Package Bio.KEGG.Enzyme

  1  # Copyright 2001 by Tarjei Mikkelsen.  All rights reserved. 
  2  # Copyright 2007 by Michiel de Hoon.  All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """ 
  8  This module provides code to work with the KEGG Enzyme database. 
  9   
 10  Functions: 
 11  parse - Returns an iterator giving Record objects. 
 12   
 13  Classes: 
 14  Record               -- Holds the information from a KEGG Enzyme record. 
 15  """ 
 16   
 17  from Bio.KEGG import _write_kegg 
 18  from Bio.KEGG import _wrap_kegg 
 19   
 20   
 21  # Set up line wrapping rules (see Bio.KEGG._wrap_kegg) 
 22  rxn_wrap = [0, "", 
 23              (" + ","",1,1), 
 24              (" = ","",1,1), 
 25              (" ","$",1,1), 
 26              ("-","$",1,1)] 
 27  name_wrap = [0, "", 
 28               (" ","$",1,1), 
 29               ("-","$",1,1)] 
 30  id_wrap = lambda indent : [indent, "", 
 31                             (" ","",1,0)] 
 32  struct_wrap = lambda indent : [indent, "", 
 33                                 ("  ","",1,1)] 
 34   
35 -class Record:
36 """Holds info from a KEGG Enzyme record. 37 38 Members: 39 entry The EC number (withou the 'EC '). 40 name A list of the enzyme names. 41 classname A list of the classification terms. 42 sysname The systematic name of the enzyme. 43 reaction A list of the reaction description strings. 44 substrate A list of the substrates. 45 product A list of the products. 46 inhibitor A list of the inhibitors. 47 cofactor A list of the cofactors. 48 effector A list of the effectors. 49 comment A list of the comment strings. 50 pathway A list of 3-tuples: (database, id, pathway) 51 genes A list of 2-tuples: (organism, list of gene ids) 52 disease A list of 3-tuples: (database, id, disease) 53 structures A list of 2-tuples: (database, list of struct ids) 54 dblinks A list of 2-tuples: (database, list of db ids) 55 """
56 - def __init__(self):
57 """__init___(self) 58 59 Create a new Record. 60 """ 61 self.entry = "" 62 self.name = [] 63 self.classname = [] 64 self.sysname = [] 65 self.reaction = [] 66 self.substrate = [] 67 self.product = [] 68 self.inhibitor = [] 69 self.cofactor = [] 70 self.effector = [] 71 self.comment = [] 72 self.pathway = [] 73 self.genes = [] 74 self.disease = [] 75 self.structures = [] 76 self.dblinks = []
77 - def __str__(self):
78 """__str__(self) 79 80 Returns a string representation of this Record. 81 """ 82 return self._entry() + \ 83 self._name() + \ 84 self._classname() + \ 85 self._sysname() + \ 86 self._reaction() + \ 87 self._substrate() + \ 88 self._product() + \ 89 self._inhibitor() + \ 90 self._cofactor() + \ 91 self._effector() + \ 92 self._comment() + \ 93 self._pathway() + \ 94 self._genes() + \ 95 self._disease() + \ 96 self._structures() + \ 97 self._dblinks() + \ 98 "///"
99 - def _entry(self):
100 return _write_kegg("ENTRY", 101 ["EC " + self.entry])
102 - def _name(self):
103 return _write_kegg("NAME", 104 [_wrap_kegg(l, wrap_rule = name_wrap) \ 105 for l in self.name])
106 - def _classname(self):
107 return _write_kegg("CLASS", 108 self.classname)
109 - def _sysname(self):
110 return _write_kegg("SYSNAME", 111 [_wrap_kegg(l, wrap_rule = name_wrap) \ 112 for l in self.sysname])
113 - def _reaction(self):
114 return _write_kegg("REACTION", 115 [_wrap_kegg(l, wrap_rule = rxn_wrap) \ 116 for l in self.reaction])
117 - def _substrate(self):
118 return _write_kegg("SUBSTRATE", 119 [_wrap_kegg(l, wrap_rule = name_wrap) \ 120 for l in self.substrate])
121 - def _product(self):
122 return _write_kegg("PRODUCT", 123 [_wrap_kegg(l, wrap_rule = name_wrap) \ 124 for l in self.product])
125 - def _inhibitor(self):
126 return _write_kegg("INHIBITOR", 127 [_wrap_kegg(l, wrap_rule = name_wrap) \ 128 for l in self.inhibitor])
129 - def _cofactor(self):
130 return _write_kegg("COFACTOR", 131 [_wrap_kegg(l, wrap_rule = name_wrap) \ 132 for l in self.cofactor])
133 - def _effector(self):
134 return _write_kegg("EFFECTOR", 135 [_wrap_kegg(l, wrap_rule = name_wrap) \ 136 for l in self.effector])
137 - def _comment(self):
138 return _write_kegg("COMMENT", 139 [_wrap_kegg(l, wrap_rule = id_wrap(0)) \ 140 for l in self.comment])
141 - def _pathway(self):
142 s = [] 143 for entry in self.pathway: 144 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 145 return _write_kegg("PATHWAY", 146 [_wrap_kegg(l, wrap_rule = id_wrap(16)) \ 147 for l in s])
148 - def _genes(self):
149 s = [] 150 for entry in self.genes: 151 s.append(entry[0] + ": " + " ".join(entry[1])) 152 return _write_kegg("GENES", 153 [_wrap_kegg(l, wrap_rule = id_wrap(5)) \ 154 for l in s])
155 - def _disease(self):
156 s = [] 157 for entry in self.disease: 158 s.append(entry[0] + ": " + entry[1] + " " + entry[2]) 159 return _write_kegg("DISEASE", 160 [_wrap_kegg(l, wrap_rule = id_wrap(13)) \ 161 for l in s])
162 - def _structures(self):
163 s = [] 164 for entry in self.structures: 165 s.append(entry[0] + ": " + " ".join(entry[1]) + " ") 166 return _write_kegg("STRUCTURES", 167 [_wrap_kegg(l, wrap_rule = struct_wrap(5)) \ 168 for l in s])
178 179 180
181 -def parse(handle):
182 """Parse a KEGG Enzyme file, returning Record objects. 183 184 This is an iterator function, typically used in a for loop. For 185 example, using one of the example KEGG files in the Biopython 186 test suite, 187 188 >>> handle = open("KEGG/enzyme.sample") 189 >>> for record in parse(handle): 190 ... print record.entry, record.name[0] 191 ... 192 1.1.1.1 Alcohol dehydrogenase 193 1.1.1.62 Estradiol 17beta-dehydrogenase 194 1.1.1.68 Transferred to EC 1.7.99.5 195 1.6.5.3 NADH dehydrogenase (ubiquinone) 196 1.14.13.28 3,9-Dihydroxypterocarpan 6a-monooxygenase 197 2.4.1.68 Glycoprotein 6-alpha-L-fucosyltransferase 198 3.1.1.6 Acetylesterase 199 2.7.2.1 Acetate kinase 200 """ 201 record = Record() 202 for line in handle: 203 if line[:3]=="///": 204 yield record 205 record = Record() 206 continue 207 if line[:12]!=" ": 208 keyword = line[:12] 209 data = line[12:].strip() 210 if keyword=="ENTRY ": 211 words = data.split() 212 record.entry = words[1] 213 elif keyword=="CLASS ": 214 record.classname.append(data) 215 elif keyword=="COFACTOR ": 216 record.cofactor.append(data) 217 elif keyword=="COMMENT ": 218 record.comment.append(data) 219 elif keyword=="DBLINKS ": 220 if ":" in data: 221 key, values = data.split(":") 222 values = values.split() 223 row = (key, values) 224 record.dblinks.append(row) 225 else: 226 row = record.dblinks[-1] 227 key, values = row 228 values.extend(data.split()) 229 row = key, values 230 record.dblinks[-1] = row 231 elif keyword=="DISEASE ": 232 if ":" in data: 233 database, data = data.split(":") 234 number, name = data.split(None, 1) 235 row = (database, number, name) 236 record.disease.append(row) 237 else: 238 row = record.disease[-1] 239 database, number, name = row 240 name = name + " " + data 241 row = database, number, name 242 record.disease[-1] = row 243 elif keyword=="EFFECTOR ": 244 record.effector.append(data.strip(";")) 245 elif keyword=="GENES ": 246 if data[3:5]==': ': 247 key, values = data.split(":",1) 248 values = [value.split("(")[0] for value in values.split()] 249 row = (key, values) 250 record.genes.append(row) 251 else: 252 row = record.genes[-1] 253 key, values = row 254 for value in data.split(): 255 value = value.split("(")[0] 256 values.append(value) 257 row = key, values 258 record.genes[-1] = row 259 elif keyword=="INHIBITOR ": 260 record.inhibitor.append(data.strip(";")) 261 elif keyword=="NAME ": 262 record.name.append(data.strip(";")) 263 elif keyword=="PATHWAY ": 264 if data[:5]=='PATH:': 265 path, map, name = data.split(None,2) 266 pathway = (path[:-1], map, name) 267 record.pathway.append(pathway) 268 else: 269 pathway = record.pathway[-1] 270 path, map, name = pathway 271 name = name + " " + data 272 pathway = path, map, name 273 record.pathway[-1] = pathway 274 elif keyword=="PRODUCT ": 275 record.product.append(data.strip(";")) 276 elif keyword=="REACTION ": 277 record.reaction.append(data.strip(";")) 278 elif keyword=="STRUCTURES ": 279 if data[:4]=='PDB:': 280 database = data[:3] 281 accessions = data[4:].split() 282 row = (database, accessions) 283 record.structures.append(row) 284 else: 285 row = record.structures[-1] 286 database, accessions = row 287 accessions.extend(data.split()) 288 row = (database, accessions) 289 record.structures[-1] = row 290 elif keyword=="SUBSTRATE ": 291 record.substrate.append(data.strip(";")) 292 elif keyword=="SYSNAME ": 293 record.sysname.append(data.strip(";"))
294
295 -def _test():
296 """Run the Bio.KEGG.Enzyme module's doctests. 297 298 This will try and locate the unit tests directory, and run the doctests 299 from there in order that the relative paths used in the examples work. 300 """ 301 import doctest 302 import os 303 if os.path.isdir(os.path.join("..","..","..","Tests")): 304 print "Runing doctests..." 305 cur_dir = os.path.abspath(os.curdir) 306 os.chdir(os.path.join("..","..","..","Tests")) 307 doctest.testmod() 308 os.chdir(cur_dir) 309 del cur_dir 310 print "Done"
311 312 if __name__ == "__main__": 313 _test() 314