Package Bio :: Package FSSP
[hide private]
[frames] | no frames]

Source Code for Package Bio.FSSP

  1  """Parser for FSSP files, used in a database of protein fold classifications. 
  2   
  3  This is a module to handle FSSP files. For now it parses only the header, 
  4  summary and alignment sections. 
  5   
  6  See: Holm and Sander (1996) The FSSP database: fold classification based on 
  7  structure-structure alignment of proteins. 
  8   
  9  functions: read_fssp(file_handle): reads an fssp file into the records. Returns a 
 10  tuple of two instances. 
 11  mult_align: returns a Biopyton alignment object 
 12  """ 
 13  import string 
 14  import re 
 15  import fssp_rec 
 16  from Bio.Align import Generic 
 17  from Bio import Alphabet 
 18  fff_rec = fssp_rec.fff_rec 
 19  header_records = { 
 20     'database' : re.compile('^DATABASE'), 
 21     'pdbid': re.compile('^PDBID'), 
 22     'header': re.compile('^HEADER'), 
 23     'compnd': re.compile('^COMPND'), 
 24     'author': re.compile('^AUTHOR'), 
 25     'source': re.compile('^SOURCE'), 
 26     'seqlength': re.compile('^SEQLENGTH'), 
 27     'nalign': re.compile('^NALIGN') 
 28  } 
 29   
 30  summary_title = re.compile('## +SUMMARY') 
 31  summary_rec = re.compile(' *[0-9]+: +[1-9][0-9a-z]{3,3}') 
 32  alignments_title= re.compile('## +ALIGNMENTS') 
 33  alignments_rec = re.compile(' *[0-9]+ +-{0,1}[0-9]+') 
 34  equiv_title = re.compile('## +EQUIVALENCES') 
 35   
36 -class FSSPHeader:
37 - def __init__(self):
38 self.database = None 39 self.pdbid = '' 40 self.header = '' 41 self.compnd = '' 42 self.source = '' 43 self.author = [] 44 self.seqlength = 0 45 self.nalign = 0
46 - def fill_header(self,inline):
47 for i in header_records.keys(): 48 if header_records[i].match(inline): 49 if i == 'database' or i == 'seqlength' or i == 'nalign': 50 setattr(self,i,int(string.split(inline)[1])) 51 elif i == 'compnd' or i == 'author': 52 setattr(self,i,string.split(inline)[1:]) 53 elif i == 'source' or i == 'header': 54 attr = inline[inline.find(' ')+1:].strip() 55 setattr(self,i,attr) 56 else: 57 setattr(self,i,string.split(inline)[1])
58
59 -class PosAlign:
60 - def __init__(self,inStr):
61 inStr = string.strip(inStr) 62 if len(inStr) != 1 and len(inStr)!= 2: 63 raise ValueError('PosAlign: length not 2 chars' + inStr) 64 if inStr == '..': 65 self.aa = '-' 66 self.gap = 1 67 else: 68 self.gap = 0 69 self.aa = inStr[0] 70 if self.aa == string.lower(self.aa): 71 self.aa = 'C' 72 if len(inStr) == 2: 73 self.ss = string.upper(inStr[1]) 74 else: 75 self.ss = '0'
76
77 - def __repr__(self):
78 if self.gap: 79 outstring = '..' 80 else: 81 outstring = self.aa+string.lower(self.ss) 82 return outstring
83 84 __str__ = __repr__
85 86 87 88
89 -class FSSPSumRec:
90 """ Contains info from an FSSP summary record"""
91 - def __init__(self,in_str):
92 self.raw = in_str 93 in_rec = string.split(string.strip(in_str)) 94 # print in_rec 95 self.nr = string.atoi(in_rec[0][:-1]) 96 self.pdb1 = in_rec[1][:4] 97 if len(in_rec[1]) == 4: 98 self.chain1='0' 99 elif len(in_rec[1]) == 5: 100 self.chain1=in_rec[1][4] 101 else: 102 raise ValueError('Bad PDB ID 1') 103 self.pdb2 = in_rec[2][:4] 104 if len(in_rec[2]) == 4: 105 self.chain2='0' 106 elif len(in_rec[2]) == 5: 107 self.chain2=in_rec[2][4] 108 else: 109 raise ValueError('Bad PDB ID 2') 110 self.zscore = string.atof(in_rec[3]) 111 self.rmsd = string.atof(in_rec[4]) 112 self.lali = string.atof(in_rec[5]) 113 self.lseq2 = string.atof(in_rec[6]) 114 self.pID = string.atof(in_rec[7]) 115 self.revers = string.atoi(in_rec[8]) 116 self.permut = string.atoi(in_rec[9]) 117 self.nfrag = string.atoi(in_rec[10]) 118 self.topo = in_rec[11] 119 self.doc = '' 120 for i in in_rec[12:]: 121 self.doc = self.doc + i + ' ' 122 self.doc = string.rstrip(self.doc) + '\n'
123
124 - def __repr__(self):
125 return self.raw
126 __str__ = __repr__
127
128 -class FSSPAlignRec:
129 - def __init__(self,in_fff_rec):
130 # print in_fff_rec 131 self.abs_res_num = string.atoi(in_fff_rec[fssp_rec.align.abs_res_num]) 132 self.pdb_res_num = string.strip(in_fff_rec[fssp_rec.align.pdb_res_num]) 133 self.chain_id = in_fff_rec[fssp_rec.align.chain_id] 134 if self.chain_id == ' ': 135 self.chain_id = '0' 136 self.res_name = in_fff_rec[fssp_rec.align.res_name] 137 if self.res_name == string.lower(self.res_name): 138 self.res_name = 'C' 139 self.ss1 = in_fff_rec[fssp_rec.align.ss1] 140 self.turn3 = in_fff_rec[fssp_rec.align.turn3] 141 self.turn4 = in_fff_rec[fssp_rec.align.turn4] 142 self.turn5 = in_fff_rec[fssp_rec.align.turn5] 143 self.pos_align_dict = {} 144 self.PosAlignList = []
145 - def add_align_list(self,align_list):
146 for i in align_list: 147 self.PosAlignList.append(PosAlign(i))
148 - def pos_align_list2dict(self):
149 j = 1 150 for i in self.PosAlignList: 151 self.pos_align_dict[j] = i 152 j = j + 1
153 154
155 -class FSSPAlignDict(dict):
156 - def __init__(self):
157 # The following two dictionaries are pointers to records in self 158 # The first dictionary is a "pdb_residue_number: self_key" 159 # The second dictionary is a "absolute_residue_number: self_key" 160 self.pdb_res_dict = {} 161 self.abs_res_dict = {} 162 self.data = {}
163 - def build_resnum_list(self):
164 for i in self.keys(): 165 self.abs_res_dict[self[i].abs_res_num] = i 166 self.pdb_res_dict[self[i].pdb_res_num] = i
167 # Given an absolute residue number & chain, returns the relevant fssp 168 # record
169 - def abs(self,num):
170 return self[self.abs_res_dict[num]]
171 # Given an PDB residue number & chain, returns the relevant fssp 172 # record
173 - def pdb(self,num):
174 return self[self.pdb_res_dict[num]]
175 # Returns a sequence string 176
177 - def sequence(self,num):
178 s = '' 179 sorted_pos_nums = self.abs_res_dict.keys() 180 sorted_pos_nums.sort() 181 for i in sorted_pos_nums: 182 s += self.abs(i).pos_align_dict[num].aa 183 return s
184
185 - def fasta_mult_align(self):
186 mult_align_dict = {} 187 for j in self.abs(1).pos_align_dict.keys(): 188 mult_align_dict[j] = '' 189 for fssp_rec in self.values(): 190 for j in fssp_rec.pos_align_dict.keys(): 191 mult_align_dict[j] += fssp_rec.pos_align_dict[j].aa 192 seq_order = mult_align_dict.keys() 193 seq_order.sort() 194 out_str = '' 195 for i in seq_order: 196 out_str += '> %d\n' % i 197 k = 0 198 for j in mult_align_dict[i]: 199 k += 1 200 if k % 72 == 0: 201 out_str += '\n' 202 out_str += j 203 out_str += '\n' 204 return out_str
205
206 -class FSSPSumDict(dict):
207 pass
208 209 # 210 # Process a fssp file into its constituents. Return a 2-tuple containing 211 # a list of FSSPSumRecs and a dictionary of alignment records. 212 #
213 -def read_fssp(fssp_handle):
214 header = FSSPHeader() 215 sum_dict = FSSPSumDict() 216 align_dict = FSSPAlignDict() 217 # fssp_handle=open(fssp_handlename) 218 curline = fssp_handle.readline() 219 while not summary_title.match(curline): 220 # Still in title 221 header.fill_header(curline) 222 curline = fssp_handle.readline() 223 224 if not summary_title.match(curline): 225 raise ValueError('Bad FSSP file: no summary record found') 226 curline = fssp_handle.readline() #Read the title line, discard 227 curline = fssp_handle.readline() #Read the next line 228 # Process the summary records into a list 229 while summary_rec.match(curline): 230 cur_sum_rec = FSSPSumRec(curline) 231 sum_dict[cur_sum_rec.nr] = cur_sum_rec 232 curline = fssp_handle.readline() 233 234 # Outer loop: process everything up to the EQUIVALENCES title record 235 while not equiv_title.match(curline): 236 while (not alignments_title.match(curline) and 237 not equiv_title.match(curline)): 238 curline = fssp_handle.readline() 239 if not alignments_title.match(curline): 240 if equiv_title.match(curline): 241 # print "Reached equiv_title" 242 break 243 else: 244 raise ValueError('Bad FSSP file: no alignments title record found') 245 246 if equiv_title.match(curline): 247 break 248 # If we got to this point, this means that we have matched an 249 # alignments title. Parse the alignment records in a loop. 250 curline = fssp_handle.readline() #Read the title line, discard 251 curline = fssp_handle.readline() #Read the next line 252 while alignments_rec.match(curline): 253 align_rec = FSSPAlignRec(fff_rec(curline)) 254 key = align_rec.chain_id+align_rec.res_name+str(align_rec.pdb_res_num) 255 align_list = string.split(curline[fssp_rec.align.start_aa_list:]) 256 if key not in align_dict: 257 align_dict[key] = align_rec 258 align_dict[key].add_align_list(align_list) 259 curline = fssp_handle.readline() 260 if not curline: 261 print 'EOFEOFEOF' 262 raise EOFError 263 for i in align_dict.values(): 264 i.pos_align_list2dict() 265 del i.PosAlignList 266 align_dict.build_resnum_list() 267 return (header, sum_dict, align_dict)
268