Package nltk_lite :: Package contrib :: Package classifier :: Module oner
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.oner

 1  # Natural Language Toolkit - OneR 
 2  #  Capable of classifying the test or gold data using the OneR algorithm 
 3  # 
 4  # Author: Sumukh Ghodke <sumukh dot ghodke at gmail dot com> 
 5  # 
 6  # URL: <http://nltk.sf.net> 
 7  # This software is distributed under GPL, for license information see LICENSE.TXT 
 8   
 9  from nltk_lite.contrib.classifier import instances as ins, decisionstump as ds, Classifier 
10  from nltk_lite.contrib.classifier.exceptions import invaliddataerror as inv 
11   
12 -class OneR(Classifier):
13 - def __init__(self, training, attributes, klass):
14 Classifier.__init__(self, training, attributes, klass) 15 self.__best_decision_stump = None
16
17 - def test(self, test_instances, printResults=True):
18 self.test_instances = test_instances 19 self.classify(self.test_instances) 20 if printResults: self.test_instances.print_all()
21
22 - def classify(self, instances):
23 if self.__best_decision_stump == None: 24 self.__best_decision_stump = self.best_decision_stump(self.training) 25 for instance in instances: 26 klass = self.__best_decision_stump.klass(instance) 27 instance.set_klass(klass)
28
29 - def verify(self, gold_instances):
30 self.gold_instances = gold_instances 31 self.classify(self.gold_instances) 32 return self.gold_instances.confusion_matrix(self.klass)
33
34 - def best_decision_stump(self, instances, ignore_attributes = [], algorithm = 'minimum_error'):
35 self.decision_stumps = self.attributes.empty_decision_stumps(ignore_attributes, self.klass); 36 for stump in self.decision_stumps: 37 for instance in instances: 38 stump.update_count(instance) 39 try: 40 return getattr(self, algorithm)() 41 except AttributeError: 42 raise inv.InvalidDataError('Invalid algorithm to find the best decision stump. ' + str(algorithm) + ' is not defined.')
43
44 - def minimum_error(self):
45 error, min_error_stump = 1, None 46 for decision_stump in self.decision_stumps: 47 new_error = decision_stump.error() 48 if new_error < error: 49 error = new_error 50 min_error_stump = decision_stump 51 return min_error_stump
52