1
2
3
4
5
6
7
8
9
10
11
12 """ Graph module
13
14 Provides:
15
16 o GraphData - Contains data from which a graph will be drawn, and
17 information about its presentation
18
19 For drawing capabilities, this module uses reportlab to draw and write
20 the diagram:
21
22 http://www.reportlab.com
23
24 For dealing with biological information, the package expects BioPython
25 objects:
26
27 http://www.biopython.org
28 """
29
30
31 from reportlab.lib import colors
32
33 from math import sqrt
34
36 """ GraphData
37
38 Provides:
39
40 Methods:
41
42 o __init__(self, id=None, data=None, name=None, style='bar',
43 color=colors.lightgreen, altcolor=colors.darkseagreen)
44 Called on instantiation
45
46 o set_data(self, data) Load the object with data to be plotted
47
48 o get_data(self) Returns the data to be plotted as a list of
49 (position, value) tuples
50
51 o add_point(self, point) Add a single point to the data set
52
53 o quartiles(self) Returns a tuple of the data quartiles
54
55 o range(self) Returns a tuple of the base range covered by the graph
56 data
57
58 o mean(self) Returns a float of the mean data point value
59
60 o stdev(self) Returns the sample standard deviation of the data values
61
62 o __len__(self) Returns the length of sequence covered by the data
63
64 o __getitem__(self, index) Returns the value at the base specified,
65 or graph data in the base range
66
67 o __str__(self) Returns a formatted string describing the graph data
68
69 Attributes:
70
71 o id Unique identifier for the data
72
73 o data Dictionary of describing the data, keyed by position
74
75 o name String describing the data
76
77 o style String ('bar', 'heat', 'line') describing how to draw the data
78
79 o poscolor colors.Color for drawing high (some styles) or all
80 values
81
82 o negcolor colors.Color for drawing low values (some styles)
83
84 o linewidth Int, thickness to draw the line in 'line' styles
85 """
86 - def __init__(self, id=None, data=None, name=None, style='bar',
87 color=colors.lightgreen, altcolor=colors.darkseagreen,
88 center=None, colour=None, altcolour=None, centre=None):
89 """__init__(self, id=None, data=None, name=None, style='bar',
90 color=colors.lightgreen, altcolor=colors.darkseagreen)
91
92 o id Unique ID for the graph
93
94 o data List of (position, value) tuples
95
96 o name String describing the graph
97
98 o style String describing the presentation style ('bar', 'line',
99 'heat')
100
101 o color colors.Color describing the color to draw all or the
102 'high' (some styles) values (overridden by backwards
103 compatible argument with UK spelling, colour).
104
105 o altcolor colors.Color describing the color to draw the 'low'
106 values (some styles only) (overridden by backwards
107 compatible argument with UK spelling, colour).
108
109 o center Value at which x-axis crosses y-axis (overridden by
110 backwards comparible argument with UK spelling, centre).
111
112 """
113
114
115 if colour is not None:
116 color = colour
117 if altcolour is not None:
118 altcolor = altcolour
119 if centre is not None:
120 center = centre
121
122 self.id = id
123 self.data = {}
124 if data is not None:
125 self.set_data(data)
126 self.name = name
127
128
129 self.style = style
130 self.poscolor = color
131 self.negcolor = altcolor
132 self.linewidth = 2
133 self.center = center
134
136 import warnings
137 import Bio
138 warnings.warn("The _set_centre method and .centre attribute are deprecated; please use the .center attribute instead", Bio.BiopythonDeprecationWarning)
139 self.center = value
140 centre = property(fget = lambda self : self.center,
141 fset = _set_centre,
142 doc="Backwards compatible alias for center (DEPRECATED)")
143
145 """ set_data(self, data)
146
147 o data List of (position, value) tuples
148
149 Add data with a list of (position, value) tuples
150 """
151 for (pos, val) in data:
152 self.data[pos] = val
153
154
156 """ get_data(self) -> [(int, float), (int, float), ...]
157
158 Return data as a list of sorted (position, value) tuples
159 """
160 data = []
161 for xval in self.data.keys():
162 yval = self.data[xval]
163 data.append((xval, yval))
164 data.sort()
165 return data
166
167
169 """ add_point(self, point)
170
171 o point (position, value) tuple
172
173 Add a single point to the set of data
174 """
175 pos, val = point
176 self.data[pos] = val
177
178
180 """ quartiles(self) -> (float, float, float, float, float)
181
182 Returns the (minimum, lowerQ, medianQ, upperQ, maximum) values as
183 a tuple
184 """
185 data = self.data.values()
186 data.sort()
187 datalen = len(data)
188 return(data[0], data[datalen//4], data[datalen//2],
189 data[3*datalen//4], data[-1])
190
191
193 """ range(self) -> (int, int)
194
195 Returns the range of the data, i.e. its start and end points on
196 the genome as a (start, end) tuple
197 """
198 positions = self.data.keys()
199 positions.sort()
200
201
202 return (positions[0], positions[-1])
203
204
206 """ mean(self) -> Float
207
208 Returns the mean value for the data points
209 """
210 data = self.data.values()
211 sum = 0.
212 for item in data:
213 sum += float(item)
214 return sum/len(data)
215
216
218 """ stdev(self) -> Float
219
220 Returns the sample standard deviation for the data
221 """
222 data = self.data.values()
223 m = self.mean()
224 runtotal = 0.
225 for entry in data:
226 runtotal += float((entry - m)**2)
227
228
229 return sqrt(runtotal/(len(data)-1))
230
231
233 """ __len__(self) -> Int
234
235 Returns the number of points in the data set
236 """
237 return len(self.data)
238
239
241 """ __getitem__(self, index) -> Float or list of tuples
242
243 Given an integer representing position on the sequence
244 returns a float - the data value at the passed position.
245
246 If a slice, returns graph data from the region as a list or
247 (position, value) tuples. Slices with step are not supported.
248
249 Returns the data value at the passed position
250 """
251 if isinstance(index, int):
252 return self.data[index]
253 elif isinstance(index, slice):
254
255
256 low = index.start
257 high = index.stop
258 if index.step is not None and index.step != 1:
259 raise ValueError
260 positions = self.data.keys()
261 positions.sort()
262 outlist = []
263 for pos in positions:
264 if pos >= low and pos <=high:
265 outlist.append((pos, self.data[pos]))
266 return outlist
267 else:
268 raise TypeError("Need an integer or a slice")
269
270
272 """ __str__(self) -> ""
273
274 Returns a string describing the graph data
275 """
276 outstr = ["\nGraphData: %s, ID: %s" % (self.name, self.id)]
277 outstr.append("Number of points: %d" % len(self.data))
278 outstr.append("Mean data value: %s" % self.mean())
279 outstr.append("Sample SD: %.3f" % self.stdev())
280 outstr.append("Minimum: %s\n1Q: %s\n2Q: %s\n3Q: %s\nMaximum: %s" % self.quartiles())
281 outstr.append("Sequence Range: %s..%s" % self.range())
282 return "\n".join(outstr)
283