Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package.   
  5   
  6  # Python stuff 
  7  import sys 
  8  from string import split 
  9   
 10  import numpy 
 11   
 12  # My stuff 
 13  from StructureBuilder import StructureBuilder 
 14  from PDBExceptions import PDBConstructionException 
 15  from parse_pdb_header import _parse_pdb_header_list 
 16   
 17  __doc__="Parser for PDB files." 
 18   
 19   
 20  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 21   
 22   
23 -class PDBParser:
24 """ 25 Parse a PDB file and return a Structure object. 26 """ 27
28 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
29 """ 30 The PDB parser call a number of standard methods in an aggregated 31 StructureBuilder object. Normally this object is instanciated by the 32 PDBParser object itself, but if the user provides his own StructureBuilder 33 object, the latter is used instead. 34 35 Arguments: 36 o PERMISSIVE - int, if this is 0 exceptions in constructing the 37 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are 38 caught, but some residues or atoms will be missing. THESE EXCEPTIONS 39 ARE DUE TO PROBLEMS IN THE PDB FILE!. 40 o structure_builder - an optional user implemented StructureBuilder class. 41 """ 42 if structure_builder!=None: 43 self.structure_builder=structure_builder 44 else: 45 self.structure_builder=StructureBuilder() 46 self.header=None 47 self.trailer=None 48 self.line_counter=0 49 self.PERMISSIVE=PERMISSIVE
50 51 # Public methods 52
53 - def get_structure(self, id, file):
54 """Return the structure. 55 56 Arguments: 57 o id - string, the id that will be used for the structure 58 o file - name of the PDB file OR an open filehandle 59 """ 60 self.header=None 61 self.trailer=None 62 # Make a StructureBuilder instance (pass id of structure as parameter) 63 self.structure_builder.init_structure(id) 64 if isinstance(file, basestring): 65 file=open(file) 66 self._parse(file.readlines()) 67 self.structure_builder.set_header(self.header) 68 # Return the Structure instance 69 return self.structure_builder.get_structure()
70
71 - def get_header(self):
72 "Return the header." 73 return self.header
74
75 - def get_trailer(self):
76 "Return the trailer." 77 return self.trailer
78 79 # Private methods 80
81 - def _parse(self, header_coords_trailer):
82 "Parse the PDB file." 83 # Extract the header; return the rest of the file 84 self.header, coords_trailer=self._get_header(header_coords_trailer) 85 # Parse the atomic data; return the PDB file trailer 86 self.trailer=self._parse_coordinates(coords_trailer)
87
88 - def _get_header(self, header_coords_trailer):
89 "Get the header of the PDB file, return the rest." 90 structure_builder=self.structure_builder 91 for i in range(0, len(header_coords_trailer)): 92 structure_builder.set_line_counter(i+1) 93 line=header_coords_trailer[i] 94 record_type=line[0:6] 95 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '): 96 break 97 header=header_coords_trailer[0:i] 98 # Return the rest of the coords+trailer for further processing 99 self.line_counter=i 100 coords_trailer=header_coords_trailer[i:] 101 header_dict=_parse_pdb_header_list(header) 102 return header_dict, coords_trailer
103
104 - def _parse_coordinates(self, coords_trailer):
105 "Parse the atomic data in the PDB file." 106 local_line_counter=0 107 structure_builder=self.structure_builder 108 current_model_id=0 109 # Flag we have an open model 110 model_open=0 111 current_chain_id=None 112 current_segid=None 113 current_residue_id=None 114 current_resname=None 115 for i in range(0, len(coords_trailer)): 116 line=coords_trailer[i] 117 record_type=line[0:6] 118 global_line_counter=self.line_counter+local_line_counter+1 119 structure_builder.set_line_counter(global_line_counter) 120 if(record_type=='ATOM ' or record_type=='HETATM'): 121 # Initialize the Model - there was no explicit MODEL record 122 if not model_open: 123 structure_builder.init_model(current_model_id) 124 current_model_id+=1 125 model_open=1 126 fullname=line[12:16] 127 # get rid of whitespace in atom names 128 split_list=split(fullname) 129 if len(split_list)!=1: 130 # atom name has internal spaces, e.g. " N B ", so 131 # we do not strip spaces 132 name=fullname 133 else: 134 # atom name is like " CA ", so we can strip spaces 135 name=split_list[0] 136 altloc=line[16:17] 137 resname=line[17:20] 138 chainid=line[21:22] 139 try: 140 serial_number=int(line[6:11]) 141 except: 142 serial_number=0 143 resseq=int(split(line[22:26])[0]) # sequence identifier 144 icode=line[26:27] # insertion code 145 if record_type=='HETATM': # hetero atom flag 146 if resname=="HOH" or resname=="WAT": 147 hetero_flag="W" 148 else: 149 hetero_flag="H" 150 else: 151 hetero_flag=" " 152 residue_id=(hetero_flag, resseq, icode) 153 # atomic coordinates 154 x=float(line[30:38]) 155 y=float(line[38:46]) 156 z=float(line[46:54]) 157 coord=numpy.array((x, y, z), 'f') 158 # occupancy & B factor 159 occupancy=float(line[54:60]) 160 bfactor=float(line[60:66]) 161 segid=line[72:76] 162 if current_segid!=segid: 163 current_segid=segid 164 structure_builder.init_seg(current_segid) 165 if current_chain_id!=chainid: 166 current_chain_id=chainid 167 structure_builder.init_chain(current_chain_id) 168 current_residue_id=residue_id 169 current_resname=resname 170 try: 171 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 172 except PDBConstructionException, message: 173 self._handle_PDB_exception(message, global_line_counter) 174 elif current_residue_id!=residue_id or current_resname!=resname: 175 current_residue_id=residue_id 176 current_resname=resname 177 try: 178 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 179 except PDBConstructionException, message: 180 self._handle_PDB_exception(message, global_line_counter) 181 # init atom 182 try: 183 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number) 184 except PDBConstructionException, message: 185 self._handle_PDB_exception(message, global_line_counter) 186 elif(record_type=='ANISOU'): 187 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 188 # U's are scaled by 10^4 189 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f') 190 structure_builder.set_anisou(anisou_array) 191 elif(record_type=='MODEL '): 192 structure_builder.init_model(current_model_id) 193 current_model_id+=1 194 model_open=1 195 current_chain_id=None 196 current_residue_id=None 197 elif(record_type=='END ' or record_type=='CONECT'): 198 # End of atomic data, return the trailer 199 self.line_counter=self.line_counter+local_line_counter 200 return coords_trailer[local_line_counter:] 201 elif(record_type=='ENDMDL'): 202 model_open=0 203 current_chain_id=None 204 current_residue_id=None 205 elif(record_type=='SIGUIJ'): 206 # standard deviation of anisotropic B factor 207 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 208 # U sigma's are scaled by 10^4 209 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f') 210 structure_builder.set_siguij(siguij_array) 211 elif(record_type=='SIGATM'): 212 # standard deviation of atomic positions 213 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66])) 214 sigatm_array=numpy.array(sigatm, 'f') 215 structure_builder.set_sigatm(sigatm_array) 216 local_line_counter=local_line_counter+1 217 # EOF (does not end in END or CONECT) 218 self.line_counter=self.line_counter+local_line_counter 219 return []
220
221 - def _handle_PDB_exception(self, message, line_counter):
222 """ 223 This method catches an exception that occurs in the StructureBuilder 224 object (if PERMISSIVE==1), or raises it again, this time adding the 225 PDB line number to the error message. 226 """ 227 message="%s at line %i." % (message, line_counter) 228 if self.PERMISSIVE: 229 # just print a warning - some residues/atoms will be missing 230 print "PDBConstructionException: %s" % message 231 print "Exception ignored.\nSome atoms or residues will be missing in the data structure." 232 else: 233 # exceptions are fatal - raise again with new message (including line nr) 234 raise PDBConstructionException(message)
235 236 237 if __name__=="__main__": 238 239 import sys 240 241 p=PDBParser(PERMISSIVE=1) 242 243 s=p.get_structure("scr", sys.argv[1]) 244 245 for m in s: 246 p=m.get_parent() 247 assert(p is s) 248 for c in m: 249 p=c.get_parent() 250 assert(p is m) 251 for r in c: 252 print r 253 p=r.get_parent() 254 assert(p is c) 255 for a in r: 256 p=a.get_parent() 257 if not p is r: 258 print p, r 259