Package nltk_lite :: Package contrib :: Package classifier :: Module classify
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.classifier.classify

  1  from nltk_lite.contrib.classifier import commandline as cl 
  2  from nltk_lite.contrib.classifier import oner, zeror, decisiontree, format 
  3  import sys 
  4   
  5  a_help = "Selects the classification algorithm                  " \ 
  6          + "Options: 0R for Zero R, 1R for One R, DT for Decision" \ 
  7          + " Trees.                                              " \ 
  8          + "Default: 0R." 
  9   
 10  f_help = "Specifies the base name of test, training or gold files." \ 
 11          + "By default it searches for training and test files, look at the verify option for more details." 
 12   
 13  v_help = "Used in conjunction with the files option to verify  " \ 
 14          + "the efficiency with a gold file instead of testing " \ 
 15          + "the classifier on a test file. Setting this option " \ 
 16          + "will mean that a gold file is present with the common" \ 
 17          + "name.                                               " \ 
 18          + "Options: True/False or yes/no." 
 19   
 20  t_help = "When the files option is not used this option is used " \ 
 21          + "to specify the path to the training file without the " \ 
 22          + "extension." 
 23   
 24  T_help = "When the files option is not used this option is used " \ 
 25          + "to specify the path to the test file without the " \ 
 26          + "extension." 
 27   
 28  g_help = "When the files option is not used this option is used " \ 
 29          + "to specify the path to the gold file without the " \ 
 30          + "extension." 
 31   
 32  A_help = "Used to disable calculation of Accuracy.              " \ 
 33          + "Options: True/False or yes/no.                       " \ 
 34          + "Default: False.                                      " 
 35   
 36  e_help = "Used to enable calculation of Error rate.             " \ 
 37          + "Options: True/False or yes/no.                       " \ 
 38          + "Default: False.                                      " 
 39   
 40  F_help = "Used to disable calculation of F-score.               " \ 
 41          + "Options: True/False or yes/no.                       " \ 
 42          + "Default: False.                                      " 
 43   
 44  p_help = "Used to enable calculation of Precision.              " \ 
 45          + "Options: True/False or yes/no.                       " \ 
 46          + "Default: False.                                      " 
 47   
 48  r_help = "Used to enable calculation of Recall.                 " \ 
 49          + "Options: True/False or yes/no.                       " \ 
 50          + "Default: False.                                      " 
 51           
 52  ZERO_R = '0R' 
 53  ONE_R = '1R' 
 54  DECISION_TREE = 'DT' 
 55   
 56  ALGORITHM_MAPPINGS = {ZERO_R:zeror.ZeroR, ONE_R:oner.OneR, DECISION_TREE:decisiontree.DecisionTree} 
 57   
58 -class Classify(cl.CommandLineInterface):
59 - def __init__(self):
60 cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), ONE_R, a_help, f_help, t_help, T_help, g_help) 61 self.add_option("-v", "--verify", dest="verify", action="store_true", default=False, help=v_help) 62 self.add_option("-A", "--accuracy", dest="accuracy", action="store_false", default=True, help=A_help) 63 self.add_option("-e", "--error", dest="error", action="store_true", default=False, help=e_help) 64 self.add_option("-F", "--f-score", dest="fscore", action="store_false", default=True, help=F_help) 65 self.add_option("-p", "--precision", dest="precision", action="store_true", default=False, help=p_help) 66 self.add_option("-r", "--recall", dest="recall", action="store_true", default=False, help=r_help)
67
68 - def execute(self):
69 cl.CommandLineInterface.execute(self) 70 self.validate_basic_arguments_are_present() 71 self.validate_files_arg_is_exclusive() 72 if self.test_path is not None and self.gold_path is not None: 73 self.error('Invalid arguments. Test and gold files are mutually exclusive.') 74 if self.files is None and self.test_path is not None and self.get_value('verify'): 75 self.error('Invalid arguments. Cannot verify classification for test data.') 76 if self.files is not None: 77 self.training_path = self.files 78 self.test_path, self.gold_path = self.__test_and_gold(self.files) 79 training, attributes, klass, test, gold = self.get_instances(self.training_path, self.test_path, self.gold_path) 80 classifier = ALGORITHM_MAPPINGS[self.algorithm](training, attributes, klass) 81 self.classify(classifier, test, gold)
82
83 - def __test_and_gold(self, files):
84 if self.get_value('verify'): 85 return [None, files] 86 return [files, None]
87
88 - def classify(self, classifier, test, gold):
89 if (test is not None): 90 classifier.test(test) 91 else: 92 self.confusion_matrix = classifier.verify(gold) 93 self.print_value('accuracy', 'Accuracy') 94 self.print_value('error', 'Error') 95 self.print_value('fscore', 'F-score') 96 self.print_value('precision', 'Precision') 97 self.print_value('recall', 'Recall')
98
99 - def print_value(self, attribute, str_repn):
100 if (self.get_value(attribute)): 101 print str_repn + ': ' + getattr(self.confusion_matrix, attribute)().__str__()
102 103 if __name__ == "__main__": 104 Classify().run(sys.argv[1:]) 105