1
2
3
4
5
6
7
8 """
9 The Carnegie Mellon Pronouncing Dictionary [cmudict.0.6]
10 ftp://ftp.cs.cmu.edu/project/speech/dict/
11 Copyright 1998 Carnegie Mellon University
12
13 File Format: Each line consists of an uppercased word, a counter
14 (for alternative pronunciations), and a transcription. Vowels are
15 marked for stress (1=primary, 2=secondary, 0=no stress). E.g.:
16 NATURAL 1 N AE1 CH ER0 AH0 L
17
18 The dictionary contains 127069 entries. Of these, 119400 words are assigned
19 a unique pronunciation, 6830 words have two pronunciations, and 839 words have
20 three or more pronunciations. Many of these are fast-speech variants.
21
22 Phonemes: There are 39 phonemes, as shown below:
23
24 Phoneme Example Translation Phoneme Example Translation
25 ------- ------- ----------- ------- ------- -----------
26 AA odd AA D AE at AE T
27 AH hut HH AH T AO ought AO T
28 AW cow K AW AY hide HH AY D
29 B be B IY CH cheese CH IY Z
30 D dee D IY DH thee DH IY
31 EH Ed EH D ER hurt HH ER T
32 EY ate EY T F fee F IY
33 G green G R IY N HH he HH IY
34 IH it IH T IY eat IY T
35 JH gee JH IY K key K IY
36 L lee L IY M me M IY
37 N knee N IY NG ping P IH NG
38 OW oat OW T OY toy T OY
39 P pee P IY R read R IY D
40 S sea S IY SH she SH IY
41 T tea T IY TH theta TH EY T AH
42 UH hood HH UH D UW two T UW
43 V vee V IY W we W IY
44 Y yield Y IY L D Z zee Z IY
45 ZH seizure S IY ZH ER
46 """
47
48 from nltk_lite.corpora import get_basedir
49 import os
50
51 items = [
52 'cmudict']
53
54 item_name = {
55 'cmudict': 'CMU Pronunciation Dictionary, Version 0.6, 1998',
56 }
57
58 -def raw(files = 'cmudict'):
73
75 d = {}
76 for word, num, pron in raw(files):
77 if num == 1:
78 d[word] = (pron,)
79 else:
80 d[word] += (pron,)
81 return d
82
98
99 if __name__ == '__main__':
100 demo()
101