Package nltk_lite :: Package contrib :: Package toolbox :: Module utilities
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.toolbox.utilities

  1  # Natural Language Toolkit: Shoebox Utilities 
  2  # 
  3  # Copyright (C) 2001-2006 University of Pennsylvania 
  4  # Author: Stuart Robinson <stuart@zapata.org> 
  5  # URL: <http://nltk.sf.net> 
  6  # For license information, see LICENSE.TXT 
  7   
  8  """ 
  9  This module provides basic functionality for handling shoebox format files. 
 10  These feed into the more sophisticated Shoebox tools available in the 
 11  modules I{lexicon}, I{text}, and I{metadata}. 
 12  """ 
 13   
 14  import re 
 15  from UserDict import UserDict 
 16   
 17   
18 -def parse_field(line):
19 """ 20 This function returns the field marker and field value of a Shoebox field. 21 22 @return: parses field as string and returns tuple with field marker and field value 23 @rtype: tuple 24 """ 25 mo = re.match(r"\\(.*?) (.*)", line) 26 if mo: 27 fm = mo.group(1) 28 fv = mo.group(2) 29 return (fm, fv) 30 else: 31 return None
32 33
34 -class Field:
35 """ 36 Class used to represent a standard fromat field. A field 37 consists of a field marker and its value, stored as a tuple. 38 """ 39
40 - def __init__(self, fieldMarker, fieldValue):
41 """ 42 This method constructs a Field object as a tuple of a field 43 marker and a field value. 44 @param fieldMarker: a field's marker 45 @type fieldMarker: string 46 @param fieldValue : a field's value (the actual data) 47 @type fieldValue : string 48 """ 49 self._field = (fieldMarker, fieldValue)
50
51 - def __str__(self):
52 """ 53 This method returns the string representation of a Field object. 54 55 @return: a Field object formatted as a string 56 @rtype: string 57 """ 58 return "\\%s %s" % (self.getMarker(), self.getValue())
59
60 - def get_marker(self):
61 """ 62 This method returns the marker for a field. 63 64 @return: a field's marker 65 @rtype: string 66 """ 67 return self._field[0]
68
69 - def has_unique_value(self):
70 """ 71 This method checks whether a field has a single value, in 72 which case it returns true, or multiple values, in which 73 case it returns false. 74 75 @return: whether the value for a given field is unique 76 @rtype: boolean 77 """ 78 if not self.get_values() or len(self.get_values()) > 1: 79 return True 80 else: 81 return False
82
83 - def has_value(self):
84 """ 85 This method checks whether a field has a value or not. 86 87 @return: whether a given field has a value 88 @rtype: boolean 89 """ 90 if self.get_values(): 91 return True 92 else: 93 return False
94
95 - def get_values(self, sep=None):
96 """ 97 This method returns the values for a field, either as a raw list of 98 values or, if a separator string is provided, as a formatted string. 99 100 @return: the values for a field; if sep provided, formatted as string 101 @rtype: a list of values or a string of these values joined by I{sep} 102 """ 103 values = self._field[1] 104 if sep == None: 105 return values 106 else: 107 return sep.join(values)
108 109 110 # class FieldParser: 111 # """ 112 # Parses raw Shoebox field into a field object. 113 # """ 114 # def __init__(self, rawText): 115 # self._rawText = rawText 116 117 # def getRawText(self): 118 # """ 119 # This method returns the raw text to be parsed as a field by the parser. 120 121 # @return: string 122 # @rtype: a string with a standard format field as raw text 123 # """ 124 # return self._rawText 125 126 # def setRawText(self, rawtext): 127 # """ 128 # This method constructs a Field object as a tuple of a field 129 # marker and a field value. 130 # @param rawtext: the raw text to be parsed into a field object 131 # @type rawtext: string 132 # """ 133 # self._rawtext = rawtext 134 # return self._rawtext 135 136 # def parse(self): 137 # regex = r"\\([A-Za-z][A-Za-z0-9\_\-]*) (.*)" 138 # mo = re.search(regex, 139 # self.getRawText()) 140 # fm = mo.group(1) 141 # fv = mo.group(2) 142 # return Field(fm, fv) 143 144
145 -class SequentialDictionary(UserDict):
146 """ 147 Dictionary that retains the order in which keys were added to it. 148 """
149 - def __init__(self, dict=None):
150 self._keys = [] 151 UserDict.__init__(self, dict)
152
153 - def __delitem__(self, key):
154 UserDict.__delitem__(self, key) 155 self._keys.remove(key)
156
157 - def __setitem__(self, key, item):
158 UserDict.__setitem__(self, key, item) 159 if key not in self._keys: 160 self._keys.append(key)
161
162 - def clear(self):
163 UserDict.clear(self) 164 self._keys = []
165
166 - def copy(self):
167 dict = UserDict.copy(self) 168 dict._keys = self.keys[:] 169 return dict
170
171 - def items(self):
172 return zip(self._keys, self.values())
173
174 - def keys(self):
175 return self._keys
176
177 - def popitem(self):
178 try: 179 key = self._keys[-1] 180 except IndexError: 181 raise KeyError('dictionary is empty') 182 val = self[key] 183 del self[key] 184 185 return (key, val)
186
187 - def setdefault(self, key, failobj=None):
188 if key not in self._keys: 189 self._keys.append(key) 190 return UserDict.setdefault(self, key, failobj)
191
192 - def update(self, dict):
193 UserDict.update(self, dict) 194 for key in dict.keys(): 195 if key not in self._keys: 196 self._keys.append(key)
197
198 - def values(self):
199 return map(self.get, self._keys)
200