1
2
3
4
5
6
7
8
9
10 from nltk_lite.stem import *
11
13 """
14 A stemmer that uses regular expressions to identify morphological
15 affixes. Any substrings that matches the regular expressions will
16 be removed.
17 """
19 """
20 Create a new regexp stemmer.
21
22 @type regexp: C{string} or C{regexp}
23 @param regexp: The regular expression that should be used to
24 identify morphological affixes.
25 @type min: int
26 @param min: The minimum length of string to stem
27 """
28
29 if not hasattr(regexp, 'pattern'):
30 regexp = re.compile(regexp)
31 self._regexp = regexp
32 self._min = min
33
34 - def stem(self, word):
35 if len(word) < self._min:
36 return word
37 else:
38 return self._regexp.sub('', word)
39
41 return '<Regexp Stemmer: %r>' % self._regexp.pattern
42
56
57
58 if __name__ == '__main__': demo()
59