1 """Parser for FSSP files, used in a database of protein fold classifications.
2
3 This is a module to handle FSSP files. For now it parses only the header,
4 summary and alignment sections.
5
6 See: Holm and Sander (1996) The FSSP database: fold classification based on
7 structure-structure alignment of proteins.
8
9 functions: read_fssp(file_handle): reads an fssp file into the records. Returns a
10 tuple of two instances.
11 mult_align: returns a Biopyton alignment object
12 """
13 import string
14 import re
15 import fssp_rec
16 from Bio.Align import Generic
17 from Bio import Alphabet
18 fff_rec = fssp_rec.fff_rec
19 header_records = {
20 'database' : re.compile('^DATABASE'),
21 'pdbid': re.compile('^PDBID'),
22 'header': re.compile('^HEADER'),
23 'compnd': re.compile('^COMPND'),
24 'author': re.compile('^AUTHOR'),
25 'source': re.compile('^SOURCE'),
26 'seqlength': re.compile('^SEQLENGTH'),
27 'nalign': re.compile('^NALIGN')
28 }
29
30 summary_title = re.compile('## +SUMMARY')
31 summary_rec = re.compile(' *[0-9]+: +[1-9][0-9a-z]{3,3}')
32 alignments_title= re.compile('## +ALIGNMENTS')
33 alignments_rec = re.compile(' *[0-9]+ +-{0,1}[0-9]+')
34 equiv_title = re.compile('## +EQUIVALENCES')
35
38 self.database = None
39 self.pdbid = ''
40 self.header = ''
41 self.compnd = ''
42 self.source = ''
43 self.author = []
44 self.seqlength = 0
45 self.nalign = 0
47 for i in header_records.keys():
48 if header_records[i].match(inline):
49 if i == 'database' or i == 'seqlength' or i == 'nalign':
50 setattr(self,i,int(string.split(inline)[1]))
51 elif i == 'compnd' or i == 'author':
52 setattr(self,i,string.split(inline)[1:])
53 elif i == 'source' or i == 'header':
54 attr = inline[inline.find(' ')+1:].strip()
55 setattr(self,i,attr)
56 else:
57 setattr(self,i,string.split(inline)[1])
58
61 inStr = string.strip(inStr)
62 if len(inStr) != 1 and len(inStr)!= 2:
63 raise ValueError('PosAlign: length not 2 chars' + inStr)
64 if inStr == '..':
65 self.aa = '-'
66 self.gap = 1
67 else:
68 self.gap = 0
69 self.aa = inStr[0]
70 if self.aa == string.lower(self.aa):
71 self.aa = 'C'
72 if len(inStr) == 2:
73 self.ss = string.upper(inStr[1])
74 else:
75 self.ss = '0'
76
78 if self.gap:
79 outstring = '..'
80 else:
81 outstring = self.aa+string.lower(self.ss)
82 return outstring
83
84 __str__ = __repr__
85
86
87
88
90 """ Contains info from an FSSP summary record"""
92 self.raw = in_str
93 in_rec = string.split(string.strip(in_str))
94
95 self.nr = string.atoi(in_rec[0][:-1])
96 self.pdb1 = in_rec[1][:4]
97 if len(in_rec[1]) == 4:
98 self.chain1='0'
99 elif len(in_rec[1]) == 5:
100 self.chain1=in_rec[1][4]
101 else:
102 raise ValueError('Bad PDB ID 1')
103 self.pdb2 = in_rec[2][:4]
104 if len(in_rec[2]) == 4:
105 self.chain2='0'
106 elif len(in_rec[2]) == 5:
107 self.chain2=in_rec[2][4]
108 else:
109 raise ValueError('Bad PDB ID 2')
110 self.zscore = string.atof(in_rec[3])
111 self.rmsd = string.atof(in_rec[4])
112 self.lali = string.atof(in_rec[5])
113 self.lseq2 = string.atof(in_rec[6])
114 self.pID = string.atof(in_rec[7])
115 self.revers = string.atoi(in_rec[8])
116 self.permut = string.atoi(in_rec[9])
117 self.nfrag = string.atoi(in_rec[10])
118 self.topo = in_rec[11]
119 self.doc = ''
120 for i in in_rec[12:]:
121 self.doc = self.doc + i + ' '
122 self.doc = string.rstrip(self.doc) + '\n'
123
126 __str__ = __repr__
127
149 j = 1
150 for i in self.PosAlignList:
151 self.pos_align_dict[j] = i
152 j = j + 1
153
154
157
158
159
160 self.pdb_res_dict = {}
161 self.abs_res_dict = {}
162 self.data = {}
167
168
170 return self[self.abs_res_dict[num]]
171
172
174 return self[self.pdb_res_dict[num]]
175
176
178 s = ''
179 sorted_pos_nums = self.abs_res_dict.keys()
180 sorted_pos_nums.sort()
181 for i in sorted_pos_nums:
182 s += self.abs(i).pos_align_dict[num].aa
183 return s
184
186 mult_align_dict = {}
187 for j in self.abs(1).pos_align_dict.keys():
188 mult_align_dict[j] = ''
189 for fssp_rec in self.values():
190 for j in fssp_rec.pos_align_dict.keys():
191 mult_align_dict[j] += fssp_rec.pos_align_dict[j].aa
192 seq_order = mult_align_dict.keys()
193 seq_order.sort()
194 out_str = ''
195 for i in seq_order:
196 out_str += '> %d\n' % i
197 k = 0
198 for j in mult_align_dict[i]:
199 k += 1
200 if k % 72 == 0:
201 out_str += '\n'
202 out_str += j
203 out_str += '\n'
204 return out_str
205
208
209
210
211
212
268