1 from nltk_lite.contrib.classifier import commandline as cl
2 from nltk_lite.contrib.classifier import oner, zeror, decisiontree, format
3 import sys
4
5 a_help = "Selects the classification algorithm " \
6 + "Options: 0R for Zero R, 1R for One R, DT for Decision" \
7 + " Trees. " \
8 + "Default: 0R."
9
10 f_help = "Specifies the base name of test, training or gold files." \
11 + "By default it searches for training and test files, look at the verify option for more details."
12
13 v_help = "Used in conjunction with the files option to verify " \
14 + "the efficiency with a gold file instead of testing " \
15 + "the classifier on a test file. Setting this option " \
16 + "will mean that a gold file is present with the common" \
17 + "name. " \
18 + "Options: True/False or yes/no."
19
20 t_help = "When the files option is not used this option is used " \
21 + "to specify the path to the training file without the " \
22 + "extension."
23
24 T_help = "When the files option is not used this option is used " \
25 + "to specify the path to the test file without the " \
26 + "extension."
27
28 g_help = "When the files option is not used this option is used " \
29 + "to specify the path to the gold file without the " \
30 + "extension."
31
32 A_help = "Used to disable calculation of Accuracy. " \
33 + "Options: True/False or yes/no. " \
34 + "Default: False. "
35
36 e_help = "Used to enable calculation of Error rate. " \
37 + "Options: True/False or yes/no. " \
38 + "Default: False. "
39
40 F_help = "Used to disable calculation of F-score. " \
41 + "Options: True/False or yes/no. " \
42 + "Default: False. "
43
44 p_help = "Used to enable calculation of Precision. " \
45 + "Options: True/False or yes/no. " \
46 + "Default: False. "
47
48 r_help = "Used to enable calculation of Recall. " \
49 + "Options: True/False or yes/no. " \
50 + "Default: False. "
51
52 ZERO_R = '0R'
53 ONE_R = '1R'
54 DECISION_TREE = 'DT'
55
56 ALGORITHM_MAPPINGS = {ZERO_R:zeror.ZeroR, ONE_R:oner.OneR, DECISION_TREE:decisiontree.DecisionTree}
57
60 cl.CommandLineInterface.__init__(self, ALGORITHM_MAPPINGS.keys(), ONE_R, a_help, f_help, t_help, T_help, g_help)
61 self.add_option("-v", "--verify", dest="verify", action="store_true", default=False, help=v_help)
62 self.add_option("-A", "--accuracy", dest="accuracy", action="store_false", default=True, help=A_help)
63 self.add_option("-e", "--error", dest="error", action="store_true", default=False, help=e_help)
64 self.add_option("-F", "--f-score", dest="fscore", action="store_false", default=True, help=F_help)
65 self.add_option("-p", "--precision", dest="precision", action="store_true", default=False, help=p_help)
66 self.add_option("-r", "--recall", dest="recall", action="store_true", default=False, help=r_help)
67
69 cl.CommandLineInterface.execute(self)
70 self.validate_basic_arguments_are_present()
71 self.validate_files_arg_is_exclusive()
72 if self.test_path is not None and self.gold_path is not None:
73 self.error('Invalid arguments. Test and gold files are mutually exclusive.')
74 if self.files is None and self.test_path is not None and self.get_value('verify'):
75 self.error('Invalid arguments. Cannot verify classification for test data.')
76 if self.files is not None:
77 self.training_path = self.files
78 self.test_path, self.gold_path = self.__test_and_gold(self.files)
79 training, attributes, klass, test, gold = self.get_instances(self.training_path, self.test_path, self.gold_path)
80 classifier = ALGORITHM_MAPPINGS[self.algorithm](training, attributes, klass)
81 self.classify(classifier, test, gold)
82
84 if self.get_value('verify'):
85 return [None, files]
86 return [files, None]
87
88 - def classify(self, classifier, test, gold):
98
102
103 if __name__ == "__main__":
104 Classify().run(sys.argv[1:])
105