1
2
3
4
5
6
7
8
9
10 """
11 Interfaces used to remove morphological affixes from words, leaving
12 only the word stem. Stemming algorithms aim to remove those affixes
13 required for eg. grammatical role, tense, derivational morphology
14 leaving only the stem of the word. This is a difficult problem due to
15 irregular words (eg. common verbs in English), complicated
16 morphological rules, and part-of-speech and sense ambiguities
17 (eg. C{ceil-} is not the stem of C{ceiling}).
18
19 C{StemI} defines a standard interface for stemmers.
20 """
21
22 import re
23
24
25
26
27
29 """
30 A processing interface for removing morphological affixes from
31 words. This process is known as X{stemming}.
32
33 """
34 - def stem(self, token):
35 """
36 Strip affixes from the token and return the stem.
37
38 @param token: The token that should be stemmed.
39 @type token: L{string}
40 """
41 raise NotImplementedError()
42
43
44 from regexp import *
45 from porter import *
46 from lancaster import *
47