1
2
3
4 from nltk_lite.stem.porter import Porter
5 from nltk_lite.corpora import brown
6 from nltk_lite import tokenize
7
8 import sys
9 from collections import defaultdict
10 import operator
11
12 -def sortby(nlist ,n, reverse=0):
14
18
22
24 s = s.lower()
25 s = s.replace("z", "s")
26 s = s.replace("h", "")
27 for i in [chr(ord("a") + i) for i in range(26)]:
28 s = s.replace(i+i, i)
29 s = self.stemmer.stem(s)
30 return s
31
32 - def test(self, token):
33 hashed = self.specialhash(token)
34 if hashed in self.learned:
35 words = self.learned[hashed].items()
36 sortby(words, 1, reverse=1)
37 if token in [i[0] for i in words]:
38 return 'This word seems OK'
39 else:
40 if len(words) == 1:
41 return 'Did you mean "%s" ?' % words[0][0]
42 else:
43 return 'Did you mean "%s" ? (or %s)' \
44 % (words[0][0], ", ".join(['"'+i[0]+'"' \
45 for i in words[1:]]))
46 return "I can't found similar word in my learned db"
47
48 - def learn(self, listofsentences=[], n=2000):
57
64
65 if __name__ == "__main__":
66 demo()
67