Package Bio :: Module File
[hide private]
[frames] | no frames]

Source Code for Module Bio.File

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """Code for more fancy file handles. 
  7   
  8   
  9  Classes: 
 10  UndoHandle     File object decorator with support for undo-like operations. 
 11  StringHandle   Wraps a file object around a string. 
 12  SGMLHandle     File object that automatically strips SGML tags from data. 
 13   
 14  SGMLStripper   Object that strips SGML. 
 15   
 16  """ 
 17  import os 
 18  import StringIO 
 19  import sgmllib 
 20   
21 -class UndoHandle:
22 """A Python handle that adds functionality for saving lines. 23 24 Saves lines in a LIFO fashion. 25 26 Added methods: 27 saveline Save a line to be returned next time. 28 peekline Peek at the next line without consuming it. 29 30 """
31 - def __init__(self, handle):
32 self._handle = handle 33 self._saved = []
34
35 - def __iter__(self):
36 return self
37
38 - def next(self):
39 next = self.readline() 40 if not next: 41 raise StopIteration 42 return next
43
44 - def readlines(self, *args, **keywds):
45 lines = self._saved + self._handle.readlines(*args,**keywds) 46 self._saved = [] 47 return lines
48
49 - def readline(self, *args, **keywds):
50 if self._saved: 51 line = self._saved.pop(0) 52 else: 53 line = self._handle.readline(*args,**keywds) 54 return line
55
56 - def read(self, size=-1):
57 if size == -1: 58 saved = "".join(self._saved) 59 self._saved[:] = [] 60 else: 61 saved = '' 62 while size > 0 and self._saved: 63 if len(self._saved[0]) <= size: 64 size = size - len(self._saved[0]) 65 saved = saved + self._saved.pop(0) 66 else: 67 saved = saved + self._saved[0][:size] 68 self._saved[0] = self._saved[0][size:] 69 size = 0 70 return saved + self._handle.read(size)
71
72 - def saveline(self, line):
73 if line: 74 self._saved = [line] + self._saved
75
76 - def peekline(self):
77 if self._saved: 78 line = self._saved[0] 79 else: 80 line = self._handle.readline() 81 self.saveline(line) 82 return line
83
84 - def tell(self):
85 lengths = map(len, self._saved) 86 sum = reduce(lambda x, y: x+y, lengths, 0) 87 return self._handle.tell() - sum
88
89 - def seek(self, *args):
90 self._saved = [] 91 self._handle.seek(*args)
92
93 - def __getattr__(self, attr):
94 return getattr(self._handle, attr)
95 96 # I could make this faster by using cStringIO. 97 # However, cStringIO (in v1.52) does not implement the 98 # readlines method. 99 StringHandle = StringIO.StringIO 100 101 102
103 -class SGMLHandle:
104 """A Python handle that automatically strips SGML tags from data. 105 106 """
107 - def __init__(self, handle):
108 """SGMLStripper(handle) 109 110 handle is a file handle to SGML-formatted data. 111 112 """ 113 self._handle = handle 114 self._stripper = SGMLStripper()
115
116 - def read(self, *args, **keywds):
117 data = self._handle.read(*args, **keywds) 118 return self._stripper.strip(data)
119
120 - def readline(self, *args, **keywds):
121 line = self._handle.readline(*args, **keywds) 122 return self._stripper.strip(line)
123
124 - def readlines(self, *args, **keywds):
125 lines = self._handle.readlines(*args, **keywds) 126 for i in range(len(lines)): 127 lines[i] = self._stripper.strip(str) 128 return lines
129
130 - def __getattr__(self, attr):
131 return getattr(self._handle, attr)
132 133
134 -class SGMLStripper:
135 - class MyParser(sgmllib.SGMLParser):
136 - def __init__(self):
137 sgmllib.SGMLParser.__init__(self) 138 self.data = ''
139 - def handle_data(self, data):
140 self.data = self.data + data
141
142 - def __init__(self):
143 self._parser = SGMLStripper.MyParser()
144
145 - def strip(self, str):
146 """S.strip(str) -> string 147 148 Strip the SGML tags from str. 149 150 """ 151 if not str: # empty string, don't do anything. 152 return '' 153 # I need to make sure that I don't return an empty string if 154 # the buffer is not empty. This can happen if there's a newline 155 # character embedded within a tag. Thus, I'll first check to 156 # see if the last character is a newline. If it is, and it's stripped 157 # away, I'll add it back. 158 is_newline = str[-1] in ['\n', '\r'] 159 160 self._parser.data = '' # clear the parser's data (don't reset) 161 self._parser.feed(str) 162 if self._parser.data: 163 str = self._parser.data 164 elif is_newline: 165 str = '\n' 166 else: 167 str = '' 168 return str
169