1
2
3
4
5
6 """
7 Maximum Entropy code.
8
9 Uses Improved Iterative Scaling:
10 XXX ref
11
12 # XXX need to define terminology
13
14 """
15
16 import numpy
17
18
19
20
21
22 MAX_IIS_ITERATIONS = 10000
23 IIS_CONVERGE = 1E-5
24 MAX_NEWTON_ITERATIONS = 100
25 NEWTON_CONVERGE = 1E-10
26
28 """Holds information for a Maximum Entropy classifier.
29
30 Members:
31 classes List of the possible classes of data.
32 alphas List of the weights for each feature.
33 feature_fns List of the feature functions.
34
35 """
37 self.classes = []
38 self.alphas = []
39 self.feature_fns = []
40
42 """calculate(me, observation) -> list of log probs
43
44 Calculate the log of the probability for each class. me is a
45 MaxEntropy object that has been trained. observation is a vector
46 representing the observed data. The return value is a list of
47 unnormalized log probabilities for each class.
48
49 """
50 scores = []
51 for klass in range(len(me.classes)):
52 lprob = 0.0
53 for fn, alpha in map(None, me.feature_fns, me.alphas):
54 lprob += fn(observation, klass) * alpha
55 scores.append(lprob)
56 return scores
57
59 """classify(me, observation) -> class
60
61 Classify an observation into a class.
62
63 """
64 scores = calculate(me, observation)
65 max_score, klass = scores[0], me.classes[0]
66 for i in range(1, len(scores)):
67 if scores[i] > max_score:
68 max_score, klass = scores[i], me.classes[i]
69 return klass
70
72 """_eval_feature_fn(fn, xs, classes) -> dict of values
73
74 Evaluate a feature function on every instance of the training set
75 and class. fn is a callback function that takes two parameters: a
76 training instance and a class. Return a dictionary of (training
77 set index, class index) -> non-zero value. Values of 0 are not
78 stored in the dictionary.
79
80 """
81 values = {}
82 for i in range(len(xs)):
83 for j in range(len(classes)):
84 f = fn(xs[i], classes[j])
85 if f != 0:
86 values[(i, j)] = f
87 return values
88
90 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations
91
92 Calculate the expectation of each function from the data. This is
93 the constraint for the maximum entropy distribution. Return a
94 list of expectations, parallel to the list of features.
95
96 """
97
98
99 class2index = {}
100 for index, key in enumerate(classes):
101 class2index[key] = index
102 ys_i = [class2index[y] for y in ys]
103
104 expect = []
105 N = len(xs)
106 for feature in features:
107 s = 0
108 for i in range(N):
109 s += feature.get((i, ys_i[i]), 0)
110 expect.append(float(s) / N)
111 return expect
112
114 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations.
115
116 Calculate the expectation of each feature from the model. This is
117 not used in maximum entropy training, but provides a good function
118 for debugging.
119
120 """
121
122
123 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
124
125 expects = []
126 for feature in features:
127 sum = 0.0
128 for (i, j), f in feature.items():
129 sum += p_yx[i][j] * f
130 expects.append(sum/len(xs))
131 return expects
132
134 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix
135
136 Calculate P(y|x), where y is the class and x is an instance from
137 the training set. Return a XSxCLASSES matrix of probabilities.
138
139 """
140 prob_yx = numpy.zeros((len(xs), len(classes)))
141
142
143 for feature, alpha in map(None, features, alphas):
144 for (x, y), f in feature.items():
145 prob_yx[x][y] += alpha * f
146
147 prob_yx = numpy.exp(prob_yx)
148
149 for i in range(len(xs)):
150 z = sum(prob_yx[i])
151 prob_yx[i] = prob_yx[i] / z
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166 return prob_yx
167
169 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""
170
171 f_sharp = numpy.zeros((N, nclasses))
172 for feature in features:
173 for (i, j), f in feature.items():
174 f_sharp[i][j] += f
175 return f_sharp
176
178
179
180 delta = 0.0
181 iters = 0
182 while iters < MAX_NEWTON_ITERATIONS:
183 f_newton = df_newton = 0.0
184 for (i, j), f in feature.items():
185 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
186 f_newton += prod
187 df_newton += prod * f_sharp[i][j]
188 f_newton, df_newton = empirical - f_newton / N, -df_newton / N
189
190 ratio = f_newton / df_newton
191 delta -= ratio
192 if numpy.fabs(ratio) < NEWTON_CONVERGE:
193 break
194 iters = iters + 1
195 else:
196 raise RuntimeError("Newton's method did not converge")
197 return delta
198
199 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical):
212
213
214 -def train(training_set, results, feature_fns, update_fn=None):
215 """train(training_set, results, feature_fns[, update_fn]) -> MaxEntropy object
216
217 Train a maximum entropy classifier on a training set.
218 training_set is a list of observations. results is a list of the
219 class assignments for each observation. feature_fns is a list of
220 the features. These are callback functions that take an
221 observation and class and return a 1 or 0. update_fn is a
222 callback function that's called at each training iteration. It is
223 passed a MaxEntropy object that encapsulates the current state of
224 the training.
225
226 """
227 if not len(training_set):
228 raise ValueError("No data in the training set.")
229 if len(training_set) != len(results):
230 raise ValueError("training_set and results should be parallel lists.")
231
232
233 xs, ys = training_set, results
234
235
236 classes = list(set(results))
237 classes.sort()
238
239
240 features = [_eval_feature_fn(fn, training_set, classes)
241 for fn in feature_fns]
242
243 f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
244
245
246 e_empirical = _calc_empirical_expects(xs, ys, classes, features)
247
248
249 alphas = [0.0] * len(features)
250 iters = 0
251 while iters < MAX_IIS_ITERATIONS:
252 nalphas = _train_iis(xs, classes, features, f_sharp,
253 alphas, e_empirical)
254 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
255 diff = reduce(lambda x, y: x+y, diff, 0)
256 alphas = nalphas
257
258 me = MaxEntropy()
259 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
260 if update_fn is not None:
261 update_fn(me)
262
263 if diff < IIS_CONVERGE:
264 break
265 else:
266 raise RuntimeError("IIS did not converge")
267
268 return me
269
270
271 if __name__ == "__main__" :
272
273
274
275 xcar=[
276 ['Red', 'Sports', 'Domestic'],
277 ['Red', 'Sports', 'Domestic'],
278 ['Red', 'Sports', 'Domestic'],
279 ['Yellow', 'Sports', 'Domestic'],
280 ['Yellow', 'Sports', 'Imported'],
281 ['Yellow', 'SUV', 'Imported'],
282 ['Yellow', 'SUV', 'Imported'],
283 ['Yellow', 'SUV', 'Domestic'],
284 ['Red', 'SUV', 'Imported'],
285 ['Red', 'Sports', 'Imported']
286 ]
287
288 ycar=[
289 'Yes',
290 'No',
291 'Yes',
292 'No',
293 'Yes',
294 'No',
295 'Yes',
296 'No',
297 'No',
298 'Yes'
299 ]
300
301
303 if ts[0] =='Red':
304 return 0
305 else:
306 return 1
307
309 if ts[1] =='Sports':
310 return 0
311 else:
312 return 1
313
315 if ts[2] =='Domestic':
316 return 0
317 else:
318 return 1
319
320 user_functions=[udf1, udf2, udf3]
321
322 xe=train(xcar, ycar, user_functions)
323 for xv,yv in zip(xcar, ycar):
324 xc=classify(xe, xv)
325 print 'Pred:', xv, 'gives', xc, 'y is', yv
326