Package Bio :: Package PDB :: Module DSSP'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.DSSP'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Use the DSSP program to calculate secondary structure and accessibility. 
  7   
  8  You need to have a working version of DSSP (and a license, free for academic 
  9  use) in order to use this. For DSSP, see U{http://www.cmbi.kun.nl/gv/dssp/}. 
 10   
 11  The DSSP codes for secondary structure used here are: 
 12   
 13      - H        Alpha helix (4-12) 
 14      - B        Isolated beta-bridge residue 
 15      - E        Strand 
 16      - G        3-10 helix 
 17      - I        pi helix 
 18      - T        Turn 
 19      - S        Bend 
 20      - -        None 
 21  """ 
 22   
 23  import os 
 24  import re 
 25  import tempfile 
 26   
 27  from Bio.SCOP.Raf import to_one_letter_code 
 28   
 29  from Bio.PDB.AbstractPropertyMap import AbstractResiduePropertyMap 
 30  from Bio.PDB.PDBExceptions import PDBException 
 31  from Bio.PDB.PDBParser import PDBParser 
 32   
 33   
 34  # Match C in DSSP 
 35  _dssp_cys=re.compile('[a-z]') 
 36   
 37  # Maximal ASA of amino acids 
 38  # Values from Sander & Rost, (1994), Proteins, 20:216-226 
 39  # Used for relative accessibility 
 40  MAX_ACC={} 
 41  MAX_ACC["ALA"]=106.0 
 42  MAX_ACC["CYS"]=135.0 
 43  MAX_ACC["ASP"]=163.0 
 44  MAX_ACC["GLU"]=194.0 
 45  MAX_ACC["PHE"]=197.0 
 46  MAX_ACC["GLY"]=84.0 
 47  MAX_ACC["HIS"]=184.0 
 48  MAX_ACC["ILE"]=169.0 
 49  MAX_ACC["LYS"]=205.0 
 50  MAX_ACC["LEU"]=164.0 
 51  MAX_ACC["MET"]=188.0 
 52  MAX_ACC["ASN"]=157.0 
 53  MAX_ACC["PRO"]=136.0 
 54  MAX_ACC["GLN"]=198.0 
 55  MAX_ACC["ARG"]=248.0 
 56  MAX_ACC["SER"]=130.0 
 57  MAX_ACC["THR"]=142.0 
 58  MAX_ACC["VAL"]=142.0 
 59  MAX_ACC["TRP"]=227.0 
 60  MAX_ACC["TYR"]=222.0 
 61   
 62   
63 -def ss_to_index(ss):
64 """ 65 Secondary structure symbol to index. 66 H=0 67 E=1 68 C=2 69 """ 70 if ss=='H': 71 return 0 72 if ss=='E': 73 return 1 74 if ss=='C': 75 return 2 76 assert(0)
77
78 -def dssp_dict_from_pdb_file(in_file, DSSP="dssp"):
79 """ 80 Create a DSSP dictionary from a PDB file. 81 82 Example: 83 >>> dssp_dict=dssp_dict_from_pdb_file("1fat.pdb") 84 >>> aa, ss, acc=dssp_dict[('A', 1)] 85 86 @param in_file: pdb file 87 @type in_file: string 88 89 @param DSSP: DSSP executable (argument to os.system) 90 @type DSSP: string 91 92 @return: a dictionary that maps (chainid, resid) to 93 amino acid type, secondary structure code and 94 accessibility. 95 @rtype: {} 96 """ 97 out_file=tempfile.mktemp() 98 os.system(DSSP+" %s > %s" % (in_file, out_file)) 99 dict, keys=make_dssp_dict(out_file) 100 # This can be dangerous... 101 #os.system("rm "+out_file) 102 return dict, keys
103
104 -def make_dssp_dict(filename):
105 """ 106 Return a DSSP dictionary that maps (chainid, resid) to 107 aa, ss and accessibility, from a DSSP file. 108 109 @param filename: the DSSP output file 110 @type filename: string 111 """ 112 dssp={} 113 fp=open(filename, "r") 114 start=0 115 keys=[] 116 for l in fp.readlines(): 117 sl=l.split() 118 if sl[1]=="RESIDUE": 119 # start 120 start=1 121 continue 122 if not start: 123 continue 124 if l[9]==" ": 125 # skip -- missing residue 126 continue 127 resseq=int(l[5:10]) 128 icode=l[10] 129 chainid=l[11] 130 aa=l[13] 131 ss=l[16] 132 if ss==" ": 133 ss="-" 134 acc=int(l[34:38]) 135 res_id=(" ", resseq, icode) 136 dssp[(chainid, res_id)]=(aa, ss, acc) 137 keys.append((chainid, res_id)) 138 fp.close() 139 return dssp, keys
140 141
142 -class DSSP(AbstractResiduePropertyMap):
143 """ 144 Run DSSP on a pdb file, and provide a handle to the 145 DSSP secondary structure and accessibility. 146 147 Note that DSSP can only handle one model. 148 149 Example: 150 >>> p=PDBParser() 151 >>> structure=parser.get_structure("1fat.pdb") 152 >>> model=structure[0] 153 >>> dssp=DSSP(model, "1fat.pdb") 154 >>> # print dssp data for a residue 155 >>> secondary_structure, accessibility=dssp[(chain_id, res_id)] 156 """
157 - def __init__(self, model, pdb_file, dssp="dssp"):
158 """ 159 @param model: the first model of the structure 160 @type model: L{Model} 161 162 @param pdb_file: a PDB file 163 @type pdb_file: string 164 165 @param dssp: the dssp executable (ie. the argument to os.system) 166 @type dssp: string 167 """ 168 # create DSSP dictionary 169 dssp_dict, dssp_keys=dssp_dict_from_pdb_file(pdb_file, dssp) 170 dssp_map={} 171 dssp_list=[] 172 # Now create a dictionary that maps Residue objects to 173 # secondary structure and accessibility, and a list of 174 # (residue, (secondary structure, accessibility)) tuples 175 for key in dssp_keys: 176 chain_id, res_id=key 177 chain=model[chain_id] 178 res=chain[res_id] 179 aa, ss, acc=dssp_dict[key] 180 res.xtra["SS_DSSP"]=ss 181 res.xtra["EXP_DSSP_ASA"]=acc 182 # relative accessibility 183 resname=res.get_resname() 184 rel_acc=acc/MAX_ACC[resname] 185 if rel_acc>1.0: 186 rel_acc=1.0 187 res.xtra["EXP_DSSP_RASA"]=rel_acc 188 # Verify if AA in DSSP == AA in Structure 189 # Something went wrong if this is not true! 190 resname=to_one_letter_code[resname] 191 if resname=="C": 192 # DSSP renames C in C-bridges to a,b,c,d,... 193 # - we rename it back to 'C' 194 if _dssp_cys.match(aa): 195 aa='C' 196 if not (resname==aa): 197 raise PDBException("Structure/DSSP mismatch at "+str(res)) 198 dssp_map[key]=((res, ss, acc, rel_acc)) 199 dssp_list.append((res, ss, acc, rel_acc)) 200 AbstractResiduePropertyMap.__init__(self, dssp_map, dssp_keys, dssp_list)
201 202 203 if __name__=="__main__": 204 205 import sys 206 207 p=PDBParser() 208 s=p.get_structure('X', sys.argv[1]) 209 210 model=s[0] 211 212 d=DSSP(model, sys.argv[1]) 213 214 for r in d: 215 print r 216 217 print d.keys() 218 219 print len(d) 220 221 print ('A', 1) in d 222 223 print d[('A', 1)] 224 225 print s[0]['A'][1].xtra 226