1
2
3
4
5
6
7 from nltk_lite.contrib.classifier_tests import *
8 from nltk_lite.contrib.classifier import featureselect as fs, decisionstump as ds, format
9 from nltk_lite.contrib.classifier.exceptions import invaliddataerror as inv
10
13 feature_select = fs.FeatureSelect()
14 feature_select.parse(['-a', 'RNK', '-t', 'path', '-T', 'path1,path2', '-o', 'IG,4'])
15 algorithm = feature_select.values.ensure_value('algorithm', None)
16 training = feature_select.values.ensure_value('training', None)
17 test = feature_select.values.ensure_value('test', None)
18 options = feature_select.values.ensure_value('options', None)
19
20 self.assertEqual('RNK', algorithm)
21 self.assertEqual('path', training)
22 self.assertEqual('path1,path2', test)
23 self.assertEqual('IG,4', options)
24
26 feat_sel = FeatureSelectStub()
27 self.assertFalse(feat_sel.error_called)
28 feat_sel.parse(['-a', 'RNL', '-t', 'path', '-T', 'path1,path2', '-o', 'IG,4'])
29 self.assertTrue(feat_sel.error_called)
30 self.assertEqual('option -a: invalid choice: \'RNL\' (choose from \'RNK\')', feat_sel.message)
31
33 feat_sel = FeatureSelectStub()
34 self.assertFalse(feat_sel.error_called)
35 feat_sel.run(['-a', 'RNK', '-t', 'path', '-o', 'IG,4'])
36 self.assertTrue(feat_sel.error_called)
37 self.assertEqual('Invalid arguments. One or more required arguments are not present.', feat_sel.message)
38
39 feat_sel = FeatureSelectStub()
40 self.assertFalse(feat_sel.error_called)
41 feat_sel.run(['-a', 'RNK', '-T', 'path1,path2', '-o', 'IG,4'])
42 self.assertTrue(feat_sel.error_called)
43 self.assertEqual('Invalid arguments. One or more required arguments are not present.', feat_sel.message)
44
45
46 feat_sel = FeatureSelectStub()
47 self.assertFalse(feat_sel.error_called)
48 feat_sel.run(['-t', 'path', '-T', 'path1,path2', '-o', 'IG,4'])
49 self.assertFalse(feat_sel.error_called)
50
51 feat_sel = FeatureSelectStub()
52 self.assertFalse(feat_sel.error_called)
53 try:
54 feat_sel.run(['-a', 'RNK', '-t', 'path', '-T', 'path1,path2'])
55 except AttributeError:
56
57 pass
58 self.assertTrue(feat_sel.error_called)
59 self.assertEqual('Invalid arguments. One or more required arguments are not present.', feat_sel.message)
60
73
75 path = datasetsDir(self) + 'minigolf' + SEP + 'weather'
76 training = format.C45_FORMAT.get_training_instances(path)
77 attributes = format.C45_FORMAT.get_attributes(path)
78 klass = format.C45_FORMAT.get_klass(path)
79 test = format.C45_FORMAT.get_test_instances(path)
80 gold = format.C45_FORMAT.get_gold_instances(path)
81
82 feature_selection = fs.FeatureSelection(training, attributes, klass, test, gold, ['IG','3'])
83
84 ig_for_attr1 = information_gain(attributes[0], klass, training)
85 self.assertAlmostEqual(0.324409, ig_for_attr1, 6)
86 self.assertEqual('outlook', attributes[0].name)
87 ig_for_attr2 = information_gain(attributes[1], klass, training)
88 self.assertAlmostEqual(0.102187, ig_for_attr2, 6)
89 self.assertEqual('temperature', attributes[1].name)
90 ig_for_attr3 = information_gain(attributes[2], klass, training)
91 self.assertAlmostEqual(0.091091, ig_for_attr3, 6)
92 self.assertEqual('humidity', attributes[2].name)
93 ig_for_attr4 = information_gain(attributes[3], klass, training)
94 self.assertAlmostEqual(0.072780, ig_for_attr4, 6)
95 self.assertEqual('windy', attributes[3].name)
96 attributes_to_remove = feature_selection.find_attributes_by_ranking('information_gain', 3)
97 self.assertEqual(1, len(attributes_to_remove))
98 self.assertEqual('windy', attributes_to_remove[0].name)
99
105
111
112 - def error(self, message):
113
114 self.message = message
115 self.error_called = True
116
119