1
2
3
4
5
6
7
8
9
10
11 """
12 Kimmo Morphological Analyzer. Supports proper recognizer completion,
13 generator ordering, kimmo control class, loader for own file format,
14 also .rul compatible with old pckimmo.
15 """
16
17
18
19 import Tkinter
20 import os, re, sys, types, string, glob, time, md5
21
22 from nltk_lite.contrib.fsa import *
23 from nltk_lite.corpora import get_basedir
24 from nltk_lite import tokenize
25
26
27 """
28 A gui for input of generative & recognition models
29 need 3 input boxes, one for text input, lexicon box, rules box
30 one output box?
31
32 need alternations rules and lexicon
33 plus 1 input test & recognition box.
34
35 we want to "step" through alternations
36 we want to "show" the rules that fire.
37 and we want batch mode, big file, or big input test with output.
38 """
39
40 from ScrolledText import ScrolledText
41
43 - def __init__(self, grammar, text, title='Kimmo Interface v1.78'):
44 self.root = None
45 try:
46 self.dbgTracing = None
47 self.highlightIds = []
48 self.tagId = 0
49
50 self.lexmd5 = None
51 self.rulemd5 = None
52 self.lexicalGraphWindow = None
53
54 self.rulfilename = ''
55 self.lexfilename = ''
56 self.altfilename = ''
57 self.kimmoResultFile = ''
58
59 self.helpFilename = 'kimmo.help'
60
61 self._root = Tkinter.Tk()
62 self._root.title(title)
63
64 ctlbuttons = Tkinter.Frame(self._root)
65 ctlbuttons.pack(side='top', fill='x')
66 level1 = Tkinter.Frame(self._root)
67 level1.pack(side='top', fill='none')
68 Tkinter.Frame(self._root).pack(side='top', fill='none')
69 level2 = Tkinter.Frame(self._root)
70 level2.pack(side='top', fill='x')
71 buttons = Tkinter.Frame(self._root)
72 buttons.pack(side='top', fill='none')
73 batchFrame = Tkinter.Frame(self._root)
74 batchFrame.pack(side='top', fill='x')
75
76 self.batchpath = Tkinter.StringVar()
77 Tkinter.Label(batchFrame, text="Batch File:").pack(side='left')
78 Tkinter.Entry(batchFrame, background='white', foreground='black',
79 width=30, textvariable=self.batchpath).pack(side='left')
80 Tkinter.Button(batchFrame, text='Go!',
81 background='#a0c0c0', foreground='black',
82 command=self.batch).pack(side='left')
83
84 self.debugWin = Tkinter.StringVar()
85 Tkinter.Entry(batchFrame, background='grey', foreground='red',
86 width=30, textvariable=self.debugWin).pack(side='right')
87
88 self.wordIn = Tkinter.StringVar()
89 Tkinter.Label(level2, text="Generate or Recognize:").pack(side='left')
90 Tkinter.Entry(level2, background='white', foreground='black',
91 width=30, textvariable=self.wordIn).pack(side='left')
92
93 lexiconFrame = Tkinter.Frame(level1)
94 Tkinter.Label(lexiconFrame, text="Lexicon & Alternations").pack(side='top',
95 fill='x')
96 self.lexicon = ScrolledText(lexiconFrame, background='white',
97 foreground='black', width=50, height=36, wrap='none')
98
99
100 scroll = Tkinter.Scrollbar(lexiconFrame, orient='horizontal',command=self.lexicon.xview)
101
102 scroll.pack(side='bottom', fill='x')
103 self.lexicon.configure(xscrollcommand = scroll.set)
104
105 self.lexicon.pack(side='top')
106
107
108 midFrame = Tkinter.Frame(level1)
109 rulesFrame = Tkinter.Frame(midFrame)
110 rulesFrame.pack(side='top', fill='x')
111 Tkinter.Label(rulesFrame, text="Rules/Subsets").pack(side='top',
112 fill='x')
113 self.rules = ScrolledText(rulesFrame, background='white',
114 foreground='black', width=60, height=19, wrap='none')
115
116 scroll = Tkinter.Scrollbar(rulesFrame, orient='horizontal',command=self.rules.xview)
117 scroll.pack(side='bottom', fill='x')
118 self.rules.configure(xscrollcommand = scroll.set)
119
120 self.rules.pack(side='top')
121
122 midbetweenFrame = Tkinter.Frame(midFrame)
123 midbetweenFrame.pack(side='top', fill='x')
124
125 Tkinter.Button(midbetweenFrame, text='clear',
126 background='#f0f0f0', foreground='black',
127 command= lambda start=1.0, end=Tkinter.END : self.results.delete(start,end)
128 ).pack(side='right')
129
130 Tkinter.Label(midbetweenFrame,
131 text="Results ").pack(side='right')
132
133 self.results = ScrolledText(midFrame, background='white',
134 foreground='black', width=60, height=13, wrap='none')
135
136
137 scroll = Tkinter.Scrollbar(midFrame, orient='horizontal',command=self.results.xview)
138 scroll.pack(side='bottom', fill='x')
139 self.results.configure(xscrollcommand = scroll.set)
140
141 self.results.pack(side='bottom')
142
143
144
145 """
146 alternationFrame = Tkinter.Frame(level1)
147 Tkinter.Label(alternationFrame, text="Alternations").pack(side='top',
148 fill='x')
149 self.alternation = ScrolledText(alternationFrame, background='white',
150 foreground='black', width=1, wrap='none')
151 self.alternation.pack(side='top')
152 """
153
154 Tkinter.Button(ctlbuttons, text='Quit',
155 background='#a0c0c0', foreground='black',
156 command=self.destroy).pack(side='left')
157
158 self.loadMenuButton = Tkinter.Menubutton(ctlbuttons, text='Load', background='#a0c0c0', foreground='black', relief='raised')
159 self.loadMenuButton.pack(side='left')
160 self.loadMenu=Tkinter.Menu(self.loadMenuButton,tearoff=0)
161
162 self.loadMenu.add_command(label='Load Lexicon', underline=0,command = lambda filetype='.lex', targetWindow = self.lexicon, tf = 'l' : self.loadTypetoTarget(filetype, targetWindow, tf))
163 self.loadMenu.add_command(label='Load Rules', underline=0,command = lambda filetype='.rul', targetWindow = self.rules, tf = 'r' : self.loadTypetoTarget(filetype, targetWindow, tf))
164
165 self.loadMenuButton["menu"]=self.loadMenu
166
167
168
169 self.saveMenuButton = Tkinter.Menubutton(ctlbuttons, text='Save',background='#a0c0c0', foreground='black', relief='raised')
170 self.saveMenuButton.pack(side='left')
171 self.saveMenu=Tkinter.Menu(self.saveMenuButton,tearoff=0)
172 self.saveMenu.add_command(label='Save Lexicon', underline=0,command = lambda filename=self.lexfilename, sourceWindow = self.lexicon : self.writeToFilefromWindow(filename, sourceWindow,'w',0,'l'))
173 self.saveMenu.add_command(label='Save Rules', underline=0,command = lambda filename=self.rulfilename, sourceWindow = self.rules : self.writeToFilefromWindow(filename, sourceWindow,'w',0,'r'))
174 self.saveMenu.add_command(label='Save Results', underline=0,command = lambda filename='.results', sourceWindow = self.results : self.writeToFilefromWindow(filename, sourceWindow,'w',0))
175 self.saveMenu.add_command(label='Save All', underline=0,command = self.saveAll)
176 self.saveMenuButton["menu"]=self.saveMenu
177
178
179 Tkinter.Label(ctlbuttons, text=" Preset:").pack(side='left')
180
181 self.configValue = Tkinter.StringVar()
182 self.configsMenuButton = Tkinter.Menubutton(ctlbuttons, text='Configs', background='#a0c0c0', foreground='black', relief='raised')
183 self.configsMenuButton.pack(side='left')
184 self.configsMenu=Tkinter.Menu(self.configsMenuButton,tearoff=0)
185
186
187
188
189
190 currentconfigfiles = glob.glob('*.cfg')
191 for x in currentconfigfiles:
192 newname = x
193 self.configsMenu.add_command(label=newname, underline=0,command = lambda newname=x : self.configLoader(newname))
194
195
196 if len(currentconfigfiles) == 0:
197
198 self.configsMenuButton.configure(text='<none>')
199
200 self.configsMenuButton["menu"]=self.configsMenu
201
202
203
204
205
206
207
208
209
210
211
212
213 self.tracingbtn = Tkinter.Button(ctlbuttons, text='Tracing',
214 background='#fff0f0', foreground='black',
215 command=lambda : self.create_destroyDebugTracing()).pack(side='right')
216
217
218 self.graphMenuButton = Tkinter.Menubutton(ctlbuttons, text='Graph', background='#d0d0e8', foreground='black', relief='raised')
219 self.graphMenuButton.pack(side='right')
220 self.graphMenu=Tkinter.Menu(self.graphMenuButton,tearoff=0)
221
222 self.graphMenu.add_command(label='Graph Lexicon', underline=0,command = lambda which = 'l' : self.graph(which))
223 self.graphMenu.add_command(label='Graph FSA Rules', underline=0,command = lambda which = 'r' : self.graph(which))
224
225 self.graphMenuButton["menu"]=self.graphMenu
226
227 self.helpbtn = Tkinter.Button(ctlbuttons, text='Help',
228 background='#f0fff0', foreground='black',
229 command=self.kimmoHelp).pack(side='right')
230
231
232 lexiconFrame.pack(side='left')
233 midFrame.pack(side='left')
234
235
236 Tkinter.Button(level2, text='Generate',
237 background='#a0c0c0', foreground='black',
238 command=self.generate).pack(side='left')
239 Tkinter.Button(level2, text='Recognize',
240 background='#a0c0c0', foreground='black',
241 command=self.recognize).pack(side='left')
242
243
244
245
246 """
247 self.klexicons = []
248 self.kalternations = []
249 self.ksubsets = []
250 self.kdefaults = []
251 self.krules = []
252 """
253
254 self.kimmoinstance = None
255
256 self.kimmoResultFile = ''
257 self.traceWindow = ''
258
259 self.debug = False
260
261 self.configLoader('kimmo.cfg')
262
263
264
265 self.phOut = PrintHook()
266 self.phOut.Start(self.capturePrint)
267
268
269
270 Tkinter.mainloop()
271 except:
272 print 'Error creating Tree View'
273 self.destroy()
274 raise
275
277 menubar = Tkinter.Menu(self._root)
278
279 filemenu = Tkinter.Menu(menubar, tearoff=0)
280 filemenu.add_command(label='Save Rules', underline=0,
281 command=self.save, accelerator='Ctrl-s')
282 self._root.bind('<Control-s>', self.save)
283 filemenu.add_command(label='Load Rules', underline=0,
284 command=self.load, accelerator='Ctrl-o')
285 self._root.bind('<Control-o>', self.load)
286 filemenu.add_command(label='Clear Rules', underline=0,
287 command=self.clear, accelerator='Ctrl-r')
288 self._root.bind('<Control-r>', self.clear)
289 filemenu.add_command(label='Exit', underline=1,
290 command=self.destroy, accelerator='Ctrl-q')
291 self._root.bind('<Control-q>', self.destroy)
292 menubar.add_cascade(label='File', underline=0,
293 menu=filemenu)
294 self._root.config(menu=menubar)
295
297 self.debugWin.set(args[0].strip())
298
299
301
302
303 if (self.dbgTracing):
304 self.dbgTracing.destroy()
305 self.dbgTracing = None
306 self.debug = False
307
308 else:
309 try:
310
311 self.dbgTracing = Tkinter.Toplevel()
312 self.dbgTracing.title("Tracing/Debug")
313 dbgTraceFrame2 = Tkinter.Frame(self.dbgTracing)
314 dbgTraceFrame2.pack(side='top', fill='x')
315 dbgTraceFrame = Tkinter.Frame(self.dbgTracing)
316 dbgTraceFrame.pack(side='top', fill='x',expand='yes')
317 self.traceWindow = ScrolledText(dbgTraceFrame, background='#f4f4f4',
318 foreground='#aa0000', width=45, height=24, wrap='none')
319
320 Tkinter.Button(dbgTraceFrame2, text='clear',
321 background='#a0c0c0', foreground='black',
322 command= lambda start=1.0, end=Tkinter.END : self.traceWindow.delete(start,end)
323 ).pack(side='right')
324 Tkinter.Button(dbgTraceFrame2, text='Save',
325 background='#a0c0c0', foreground='black',
326 command= lambda file=self.kimmoResultFile,windowName=self.traceWindow,mode='w',auto=0 : self.writeToFilefromWindow(file,windowName,mode,auto)
327 ).pack(side='left')
328
329
330 scroll = Tkinter.Scrollbar(dbgTraceFrame, orient='horizontal',command=self.traceWindow.xview)
331 scroll.pack(side='bottom', fill='x')
332
333 self.traceWindow.configure(xscrollcommand = scroll.set)
334 self.traceWindow.pack(side='bottom')
335
336
337 self.debug = True
338
339
340 self.dbgTracing.protocol("WM_DELETE_WINDOW", self.create_destroyDebugTracing)
341
342 except:
343 print 'Error creating Tree View'
344 self.dbgTracing.destroy()
345 self.dbgTracing = None
346 self.debug = False
347 raise
348
349
351
352
353
354
355
356 if not (auto and windowName and filename):
357
358 from tkFileDialog import asksaveasfilename
359 ftypes = [('Text file', '.txt'),('Rule file', '.rul'),('Lexicon file', '.lex'),('Alternations file', '.alt'),
360 ('All files', '*')]
361 filename = asksaveasfilename(filetypes=ftypes,
362 defaultextension='', initialfile=filename)
363
364 if not filename:
365 self.guiError('Need File Name')
366 return
367 f = open(filename, 'w')
368 f.write(windowName.get(1.0,Tkinter.END))
369 f.close()
370
371 if filename:
372 if wt == 'l': self.lexfilename = filename
373 elif wt == 'r': self.rulfilename = filename
374
375
376
377
378
379
387
388 """
389 def save(self, *args):
390 "Save a rule/lexicon set to a text file"
391 from tkFileDialog import asksaveasfilename
392 ftypes = [('Text file', '.txt'),
393 ('All files', '*')]
394 filename = asksaveasfilename(filetypes=ftypes,
395 defaultextension='.txt')
396 if not filename: return
397 f = open(filename, 'w')
398 f.write('---- Rules -----\n%s\n' % '\n'.join(self.getRules(False)))
399 f.write('---- Lexicon -----\n%s\n' % '\n'.join(self.getLexicon(False)))
400 f.close()
401 """
402
404 print args[0]
405 filename = args[0]
406
407
408
409 if filename:
410 f = read_kimmo_file(filename, self)
411 lines = f.readlines()
412 f.close()
413
414
415 self.clear()
416
417
418 self.configsMenuButton.configure(text=filename)
419
420
421
422
423
424
425 self.rulfilename = ''
426 self.lexfilename = ''
427 self.altfilename = ''
428 self.kimmoResultFile = ''
429 self.batchpath.set('')
430
431 for line in lines:
432 line = line.strip()
433 cfgargs = line.split(":")
434 for x in range(len(cfgargs)): cfgargs[x] = cfgargs[x].strip()
435
436 if len(line) == 0: continue
437 elif (line[0] == '#') or (line[0] == ';'): continue
438 elif cfgargs[0] == 'lexicon':
439 self.lexfilename = self.loadIntoWindow(os.path.expanduser(cfgargs[1]),self.lexicon)
440 elif cfgargs[0] == 'rules':
441 self.rulfilename = self.loadIntoWindow(os.path.expanduser(cfgargs[1]),self.rules)
442
443
444
445 elif cfgargs[0] == 'results':
446 self.kimmoResultFile = os.path.expanduser(cfgargs[1])
447 self.resfilename = os.path.expanduser(cfgargs[1])
448 elif cfgargs[0] == 'batch': self.batchpath.set(os.path.expanduser(cfgargs[1]))
449
450 else: self.guiError('unknown line :' + line)
451
452
453 else: self.guiError('Empty Filename')
454
455
456
458 "Load rule/lexicon set from a text file directly into the window pane specified"
459
460
461
462 if filename:
463 filename = os.path.expanduser(filename)
464 f = read_kimmo_file(filename, self)
465 lines = f.readlines()
466 f.close()
467
468 text = []
469 for line in lines:
470 line = line.strip()
471 text.append(line)
472
473
474 windowField.delete(1.0, Tkinter.END)
475
476 windowField.insert(1.0, '\n'.join(text))
477
478 return filename
479 return ''
480
481
483
484 if not (fileType and targetWindow): return
485
486 from tkFileDialog import askopenfilename
487 ftypes = [(fileType, fileType)]
488
489 filename = askopenfilename(filetypes=ftypes, defaultextension=fileType)
490
491 self.loadIntoWindow(filename, targetWindow)
492
493
494 self.configsMenuButton.configure(text='<none>')
495
496
497 if filename:
498 if ftype == 'l': self.lexfilename = filename
499 elif ftype == 'r': self.rulfilename = filename
500
501 - def load(self, *args):
502
503
504 "Load rule/lexicon set from a text file"
505 from tkFileDialog import askopenfilename
506 ftypes = [('Text file', '.txt'),
507 ('All files', '*')]
508
509 filename = 'kimmo.lex'
510
511 if filename:
512 f = read_kimmo_file(filename, self)
513 lines = f.readlines()
514 f.close()
515
516 rules = []
517 lexicon = []
518 alternations = []
519
520 state = 'rules'
521 for line in lines:
522 line = line.strip()
523 lexicon.append(line)
524
525 self.clear()
526 self.lexicon.insert(1.0, '\n'.join(lexicon))
527
528
529
530
531 filename = 'kimmo.alt'
532
533 if filename:
534 f = read_kimmo_file(filename, self)
535 lines = f.readlines()
536 f.close()
537
538 for line in lines:
539 line = line.strip()
540 alternations.append(line)
541
542 self.alternation.insert(1.0, '\n'.join(alternations))
543
544 filename = 'kimmo.rul'
545
546 if filename:
547 f = read_kimmo_file(filename, self)
548 lines = f.readlines()
549 f.close()
550
551 for line in lines:
552 line = line.strip()
553 rules.append(line)
554
555 self.rules.insert(1.0, '\n'.join(rules))
556
563
565 if self._root is None: return
566 self.phOut.Stop()
567 self._root.destroy()
568 self._root = None
569
570
571
572 - def step(self, *args):
574
577
579 filename = self.batchpath.get()
580 if filename:
581 f = read_kimmo_file(filename, self)
582 lines = f.readlines()
583 f.close()
584
585 self.initKimmo()
586
587
588 self.results.insert(1.0, '\n')
589
590 results_string = ''
591 for line in lines:
592
593 singleword = line.strip()
594 spcr = re.compile(r"\s+")
595 linevals = []
596 linevals = spcr.split(singleword)
597
598
599 batch_result = []
600 batch_result_str = ''
601 if not singleword: continue
602 elif (singleword[0] == '#') or (singleword[0] == ';'):
603 results_string += (singleword + '\n')
604
605
606 elif (linevals[0] == 'g') and (len(linevals) == 2):
607 batch_result = self.kimmoinstance.generate(linevals[1])
608 elif (linevals[0] == 'r') and (len(linevals) == 2):
609 batch_result = self.kimmoinstance.recognize(linevals[1])
610
611 elif '+' in singleword:
612 batch_result = self.kimmoinstance.generate(singleword)
613 else:
614 batch_result = self.kimmoinstance.recognize(singleword)
615
616
617 if len(batch_result) > 0:
618 for x in batch_result: batch_result_str = batch_result_str + x
619 batch_result_str = batch_result_str + '\n'
620 results_string += (batch_result_str)
621
622
623
624 self.results.insert(1.0, '----- '+ time.strftime("%a, %d %b %Y %I:%M %p", time.gmtime()) +' -----\n')
625 self.results.insert(2.0, results_string)
626 self.results.see(1.0)
627
628 if self.traceWindow:
629 self.highlightMatches(' BLOCKED',self.traceWindow,'#ffe0e0')
630 self.highlightMatches(' AT END OF WORD',self.traceWindow,'#e0ffe0')
631
632
633
634
635
636
637
638
639
641 if self._root is None: return
642
643 if len(self.wordIn.get()) > 0:
644 self.initKimmo()
645
646 tmpword = self.wordIn.get()
647
648 tmpword.strip()
649
650
651 generate_result = self.kimmoinstance.generate(tmpword)
652 generate_result_str = ''
653
654 for x in generate_result: generate_result_str = generate_result_str + x
655 generate_result_str = generate_result_str + '\n'
656 self.results.insert(1.0, generate_result_str)
657
658 if self.dbgTracing:
659 self.highlightMatches(' BLOCKED',self.traceWindow,'#ffe0e0')
660 self.highlightMatches(' AT END OF WORD',self.traceWindow,'#e0ffe0')
661 self.highlightMatches('SUCCESS!',self.traceWindow,'#e0ffe0')
662
663
665 self.lexicon.tag_delete("highlight")
666 if self._root is None: return
667
668 if len(self.wordIn.get()) > 0:
669 self.initKimmo()
670
671 tmpword = self.wordIn.get()
672
673 tmpword.strip()
674
675
676 recognize_result = self.kimmoinstance.recognize(tmpword)
677 recognize_result_str = ''
678
679 for x in recognize_result: recognize_result_str = recognize_result_str + x
680 recognize_result_str = recognize_result_str + '\n'
681 self.results.insert(1.0, recognize_result_str)
682
683 if self.dbgTracing:
684 self.highlightMatches(' BLOCKED',self.traceWindow,'#ffe0e0')
685 self.highlightMatches(' AT END OF WORD',self.traceWindow,'#e0ffe0')
686
687
688
689
690
691
693
694 self.initKimmo()
695 graphtitle = ''
696
697
698
699
700
701
702
703
704
705 path = ''
706 pathstatus = os.stat('./')
707 if not ((pathstatus[0] & 0600) == 0600):
708 path = '/tmp/' + str(os.environ.get("USER")) + '/'
709 if not os.path.exists(path):
710 os.mkdir(path,0777)
711
712 pathre = re.compile(r"^.*\/")
713
714 if which == 'l':
715 graphfname = path + pathre.sub("", self.lexfilename)
716 dotstring = dotformat(self.kimmoinstance.lexicalNodes)
717 leximagefile = dot2image(graphfname, dotstring)
718 graphtitle = 'Lexicon Graph'
719
720 elif which == 'r':
721 graphfname = path + pathre.sub("", self.rulfilename)
722
723 tmpOptions = []
724 for x in self.kimmoinstance.fsasNodes:
725
726 tmpOptions.append(x['name'])
727
728 ld = ListDialog(self._root,tmpOptions,"Select FSA")
729
730 if not ld.result: return
731
732
733 dotstring = dotformat(self.kimmoinstance.fsasNodes[string.atoi(ld.result[0])]['nodes'])
734 graphtitle = 'FSA ' + self.kimmoinstance.fsasNodes[string.atoi(ld.result[0])]['name']
735
736
737
738 graphfname += ('.' + str(ld.result[0]))
739
740
741
742 leximagefile = dot2image(graphfname, dotstring)
743
744
745
746 if leximagefile:
747 if self.lexicalGraphWindow: self.lexicalGraphWindow.destroy()
748 self.lexicalGraphWindow = tkImageView(leximagefile, graphtitle)
749
750
751
752
753
755 self.tagId = 1
756
757 for x in self.lexicon.tag_names(): self.lexicon.tag_delete(x)
758
759
760
761 for l in self.kimmoinstance.validateLexicon:
762 if not l in self.kimmoinstance.validateAlternations:
763 if l:
764 self.guiError('Unused Alternation')
765 self.highlightMatches(l,self.lexicon,'#ffffc0')
766
767 for a in self.kimmoinstance.validateAlternations:
768 if not a in self.kimmoinstance.validateLexicon:
769 if a:
770 self.guiError('Unknown Alternation Name')
771 self.highlightMatches(a,self.lexicon,'#ffffc0')
772
773
774
776
777 if not word: return
778
779 matchIdx = '1.0'
780 matchRight = '1.0'
781 while matchIdx != '':
782 matchIdx = window.search(word,matchRight,count=1,stopindex=Tkinter.END)
783 if matchIdx == '': break
784
785 strptr = matchIdx.split(".")
786 matchRight = strptr[0] + '.' + str((int(strptr[1],10) + len(word)))
787
788 window.tag_add(self.tagId, matchIdx, matchRight )
789 window.tag_configure(self.tagId,background=color, foreground='black')
790 self.highlightIds.append([window,self.tagId])
791 self.tagId = self.tagId + 1
792
793
794
795
797 """
798 Initialize the Kimmo engine from the lexicon. This will get called no matter generate
799 or recognize. (i.e. loading all rules, lexicon, and alternations
800 """
801
802 tmprmd5 = md5.new(self.rules.get(1.0, Tkinter.END))
803 tmplmd5 = md5.new(self.lexicon.get(1.0, Tkinter.END))
804 if (not self.kimmoinstance) or (self.rulemd5 != tmprmd5) or (self.lexmd5 != tmplmd5):
805 self.guiError("Creating new Kimmo instance")
806 self.kimmoinstance = KimmoControl(self.lexicon.get(1.0, Tkinter.END),self.rules.get(1.0, Tkinter.END),'','',self.debug)
807 self.guiError("")
808 self.rulemd5 = tmprmd5
809 self.lexmd5 = tmplmd5
810
811 if not self.kimmoinstance.ok:
812 self.guiError("Creation of Kimmo Instance Failed")
813 return
814 if not self.kimmoinstance.m.initial_state() :
815 self.guiError("Morphology Setup Failed")
816 elif self.kimmoinstance.errors:
817 self.guiError(self.kimmoinstance.errors)
818 self.kimmoinstance.errors = ''
819
820
822 if self._root is None: return
823 print self.wordIn.get()
824
825
826
827
829
830
831
832 if self.dbgTracing:
833 self.traceWindow.insert(Tkinter.END, string.join(args," "))
834 self.traceWindow.see(Tkinter.END)
835
836
837
838
839
840
841
842
843
844
845
846
847 return 0,0,''
848
849
850
852
853
854
855
856
857
858
859 helpText = ''
860 try: f = open(self.helpFilename, 'r')
861 except IOError, e:
862 self.guiError("HelpFile not loaded")
863 return
864
865 self.guiError("")
866
867
868
869 helpText = str(f.read())
870 f.close()
871
872
873 helpText = re.sub("\r","",helpText)
874
875
876 helpWindow = Tkinter.Toplevel()
877 helpWindow.title("PyKimmo Documentation & Help")
878
879
880 help = ScrolledText(helpWindow, background='#f0f0f0',
881 foreground='black', width=70, height=40,wrap='none',
882 font='Times 12 bold')
883
884 help.pack(side='top')
885 help.insert(1.0, helpText)
886
887 scroll = Tkinter.Scrollbar(helpWindow, orient='horizontal',command=help.xview)
888 scroll.pack(side='bottom', fill='x')
889 help.configure(xscrollcommand = scroll.set)
890
891
892 matchIdx = Tkinter.END
893 matchRight = Tkinter.END
894 matchLen = Tkinter.IntVar()
895 tagId = 1
896 while 1:
897 matchIdx = help.search(r"::[^\n]*::",matchIdx, stopindex=1.0, backwards=True, regexp=True, count=matchLen )
898 if not matchIdx: break
899
900 matchIdxFields = matchIdx.split(".")
901 matchLenStr = matchIdxFields[0] + "." + str(string.atoi(matchIdxFields[1],10) + matchLen.get())
902
903 print (matchIdx, matchLenStr)
904 help.tag_add(tagId, matchIdx, matchLenStr )
905 help.tag_configure(tagId, background='aquamarine', foreground='blue', underline=True)
906 tagId += 1
907
908
909
910
911
912
913
915
916
918 self.func = None
919 self.origOut = None
920 self.out = out
921
922
928 - def Start(self,func=None):
939
947
949 proceed = 1
950 lineNo = 0
951 addText = ''
952 if self.func != None:
953 proceed,lineNo,newText = self.func(text)
954 if proceed:
955 if text.split() == []:
956 self.origOut.write(text)
957 else:
958
959
960 if self.out:
961 if lineNo:
962 try:
963 raise "Dummy"
964 except:
965 newText = 'line('+str(sys.exc_info()[2].tb_frame.f_back.f_lineno)+'):'+newText
966 codeObject = sys.exc_info()[2].tb_frame.f_back.f_code
967 fileName = codeObject.co_filename
968 funcName = codeObject.co_name
969 self.origOut.write('file '+fileName+','+'func '+funcName+':')
970 self.origOut.write(newText)
971
974
976 - def __init__(self, imagefileName, title):
977 self._root = Tkinter.Toplevel()
978 self._root.title(title + ' (' + imagefileName + ')')
979 self.image = Tkinter.PhotoImage("LGraph",file=imagefileName)
980
981 Tkinter.Label(self._root, image=self.image).pack(side='top',fill='x')
982
983
985 if self._root:
986 self._root.destroy()
987 self._root = None
988 self.image = None
989
990
991
993
994 - def __init__(self, parent, listOptions, title = None):
995
996 Tkinter.Toplevel.__init__(self, parent)
997 self.transient(parent)
998
999 if title:
1000 self.title(title)
1001
1002 self.parent = parent
1003
1004 self.result = None
1005
1006 body = Tkinter.Frame(self)
1007
1008 self.initial_focus = self.body(body)
1009 body.pack(padx=5, pady=5)
1010
1011 box = Tkinter.Frame(self)
1012 Tkinter.Label(box,text="Select an FSA to graph").pack(side='top',fill='x')
1013 box.pack()
1014
1015
1016
1017 self.listbox(listOptions)
1018
1019 self.buttonbox()
1020
1021 self.grab_set()
1022
1023 if not self.initial_focus:
1024 self.initial_focus = self
1025
1026 self.protocol("WM_DELETE_WINDOW", self.cancel)
1027
1028 self.geometry("+%d+%d" % (parent.winfo_rootx()+50,
1029 parent.winfo_rooty()+50))
1030
1031 self.initial_focus.focus_set()
1032
1033 self.wait_window(self)
1034
1035
1036
1037
1038 - def body(self, master):
1043
1044
1046 box = Tkinter.Frame(self)
1047 self.lb = Tkinter.Listbox(box,height=len(listOptions),width=30,background='#f0f0ff', selectbackground='#c0e0ff'
1048 ,selectmode='single')
1049 self.lb.pack()
1050
1051 for x in listOptions:
1052 self.lb.insert(Tkinter.END,x)
1053
1054 box.pack()
1055
1071
1072
1073
1074
1075 - def ok(self, event=None):
1076
1077 if not self.validate():
1078 self.initial_focus.focus_set()
1079 return
1080
1081 self.withdraw()
1082 self.update_idletasks()
1083
1084 self.apply()
1085
1086
1087 self.result = self.lb.curselection()
1088
1089 self.cancel()
1090
1091
1092 - def cancel(self, event=None):
1097
1098
1099
1100
1104
1108
1109
1110
1111
1112
1113
1114
1115
1116 """
1117 + CNsib + s # y o @
1118 e CNsib @ s # i o @
1119 1: 0 2 1 2 1 2 7 1
1120 2: 3 2 5 2 1 2 7 1
1121 3. 0 0 0 4 0 0 0 0
1122 4. 0 0 1 0 1 0 0 0
1123 5: 0 1 1 6 1 1 1 1
1124 6: 0 1 0 1 0 1 1 1
1125 7: 3 2 1 2 1 2 7 1
1126 """
1127
1128
1129
1130
1131
1132
1133
1165
1167 """
1168 @return: true iff the given objects are instances of the same
1169 class.
1170 @rtype: C{bool}
1171 """
1172 return (type(instance1) == types.InstanceType and
1173 type(instance2) == types.InstanceType and
1174 instance1.__class__ == instance2.__class__)
1175
1176
1177
1178
1180 dotfilename = filename + '.dot'
1181
1182 psfilename = filename + '.ps'
1183 imgfilename = filename + '.ppm'
1184 pngfilename = filename + '.png'
1185
1186
1187 f = open(dotfilename, 'w')
1188 f.write(dotstring)
1189 f.close()
1190
1191 os.system('dot -Tps -o ' + psfilename +' ' + dotfilename)
1192
1193
1194
1195
1196
1197
1198 os.system('rm -f ' + imgfilename)
1199 os.system('pstopnm -stdout -portrait -ppm ' + psfilename + ' > ' + imgfilename)
1200
1201 if os.path.isfile(imgfilename) : return imgfilename
1202
1203 return ''
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1216 - def __init__(self, lexicon_string, rule_string, lexicon_file, rule_file, debug):
1217
1218 self.validateLexicon = []
1219 self.validateAlternations = []
1220
1221 self.lexicalNodes = []
1222 self.ruleNodes = []
1223
1224
1225 self.ok = 0
1226 self.errors = ''
1227
1228
1229 if lexicon_file:
1230 f = read_kimmo_file(lexicon_file)
1231 lexicon_string = string.join(f.readlines(),"")
1232 f.close()
1233
1234
1235 if rule_file:
1236 f = read_kimmo_file(rule_file)
1237 rule_string = string.join(f.readlines(),"")
1238 f.close()
1239
1240 try:
1241 self.processRules(rule_string)
1242 self.processLexicon(lexicon_string)
1243 self.m = KimmoMorphology(self.kalternations, self.klexicons)
1244 self.m.set_boundary(self.boundary_char)
1245 self.s = KimmoRuleSet(self.ksubsets, self.kdefaults, self.krules)
1246 self.s.debug = debug
1247 self.ok = 1
1248 except RuntimeError, e:
1249 self.errors = ('Caught:' + str(e) + ' ' + self.errors)
1250 print 'Caught:', e
1251 print "Setup of the kimmoinstance failed. Most likely cause"
1252 print "is infinite recursion due to self-referential lexicon"
1253 print "For instance:"
1254 print "Begin: Begin Noun End"
1255 print "Begin is pointing to itself. Simple example, but check"
1256 print "to insure no directed loops"
1257 self.ok = 0
1258
1259
1260
1262 if self.boundary_char: word += self.boundary_char
1263 genlist = _generate_test(self.s, word)
1264
1265 genliststr = genlist.__repr__()
1266 if self.boundary_char: genliststr = genliststr.replace(self.boundary_char,'')
1267
1268 return eval(genliststr)
1269
1272
1273
1274
1275
1276
1277
1278 - def batch(self, filename):
1279 if filename:
1280 f = read_kimmo_file(filename)
1281 lines = f.readlines()
1282 f.close()
1283
1284
1285 results_string = ''
1286 for line in lines:
1287
1288 singleword = line.strip()
1289 spcr = re.compile(r"\s+")
1290 linevals = []
1291 linevals = spcr.split(singleword)
1292
1293 batch_result = []
1294 batch_result_str = ''
1295 if not singleword: continue
1296 elif (singleword[0] == '#') or (singleword[0] == ';'):
1297 results_string += (singleword + '\n')
1298
1299 elif (linevals[0] == 'g') and (len(linevals) == 2):
1300 batch_result = self.generate(linevals[1])
1301 elif (linevals[0] == 'r') and (len(linevals) == 2):
1302 batch_result = self.recognize(linevals[1])
1303
1304 elif '+' in singleword:
1305 batch_result = self.generate(singleword)
1306 else:
1307 batch_result = self.recognize(singleword)
1308
1309
1310 if len(batch_result) > 0:
1311 for x in batch_result: batch_result_str = batch_result_str + x
1312 batch_result_str = batch_result_str + '\n'
1313 results_string += (batch_result_str)
1314
1315
1316 print '----- '+ time.strftime("%a, %d %b %Y %I:%M %p", time.gmtime()) +' -----\n'
1317 print results_string
1318
1319
1320
1321
1322
1323
1325 """
1326 Takes the currently typed in lexicon and turns them from text into
1327 the kimmo lexicon array.
1328 """
1329
1330 testlex = []
1331 self.klexicons = []
1332 lexigroup = ''
1333 kimmoWords = []
1334 alternationText = ''
1335
1336 tmpnode = {}
1337 tmpnode['node'] = ''
1338 tmpnode['features'] = ''
1339 tmpnode['edges'] = []
1340 tmpnode['edgenames'] = []
1341 self.lexicalNodes = []
1342
1343 for item in text.split("\n"):
1344
1345 cleanLine = item.strip()
1346
1347
1348 if len(cleanLine) == 0 : continue
1349 elif cleanLine[0] == '#' : continue
1350 elif cleanLine[0] == ';' : continue
1351
1352
1353
1354
1355
1356 elif cleanLine[len(cleanLine)-1] == ':' :
1357 if (len(lexigroup) > 0):
1358 if len(kimmoWords):
1359
1360
1361 self.klexicons.append( KimmoLexicon(lexigroup, kimmoWords) )
1362 self.lexicalNodes.append(tmpnode)
1363 kimmoWords = []
1364 lexigroup = cleanLine[0:len(cleanLine)-1]
1365
1366
1367 tmpnode = {}
1368 tmpnode['node'] = lexigroup
1369 tmpnode['features'] = ''
1370 tmpnode['edges'] = []
1371 tmpnode['edgenames'] = []
1372
1373 self.validateLexicon.append(lexigroup)
1374
1375
1376
1377 elif ':' in cleanLine:
1378 alternationText += ( cleanLine + "\n")
1379
1380 elif lexigroup:
1381 p = re.compile(r"\s+")
1382 moreitems = []
1383
1384 moreitems = p.split(item)
1385
1386
1387
1388
1389
1390 rangestart = -1
1391 for x in range(len(moreitems)):
1392
1393 if (moreitems[x][0] == '"') and (rangestart < 0): rangestart = x
1394 elif (moreitems[x][len(moreitems[x])-1] == '"') and (rangestart > -1):
1395 rangeend = x
1396 moreitems[rangestart] = string.join(moreitems[rangestart:rangeend+1], " ")
1397
1398 i = 0
1399 for furtheritem in moreitems:
1400 furtheritem = furtheritem.strip()
1401 moreitems[i] = furtheritem
1402
1403 if not len(moreitems[i]): continue
1404 if i > 2 : continue
1405 else: testlex.append(moreitems[i])
1406 i += 1
1407
1408 for x in range(len(moreitems)):
1409 if x > 2: continue
1410 elif (moreitems[x] == '\'\'') or (moreitems[x] == '""'):
1411 moreitems[x] = ''
1412 elif (moreitems[x][0] == '"') and (moreitems[x][len(moreitems[x])-1] == '"'):
1413 moreitems[x] = moreitems[x][1:len(moreitems[x])-1]
1414 elif (moreitems[x][0] == '\'') and (moreitems[x][len(moreitems[x])-1] == '\''):
1415
1416 tmpitem = moreitems[x]
1417 moreitems[x] = tmpitem[1:(len(tmpitem)-1)]
1418
1419 elif moreitems[x] == 'None' : moreitems[x] = None
1420
1421
1422
1423 if len(moreitems) > 2 :
1424 kimmoWords.append( KimmoWord(moreitems[0], moreitems[2], moreitems[1]) )
1425 self.validateLexicon.append(moreitems[1])
1426
1427 elif len(moreitems) > 1 :
1428 kimmoWords.append( KimmoWord(moreitems[0], '', moreitems[1]) )
1429 self.validateLexicon.append(moreitems[1])
1430
1431 if (len(moreitems) > 1) and not (moreitems[1] in tmpnode['edges']):
1432 tmpnode['edges'].append(moreitems[1])
1433
1434 else :
1435
1436 self.errors += "Unknown Line in Lexicon (" + cleanLine + ")"
1437
1438
1439 if (len(lexigroup) > 0) and (len(kimmoWords)):
1440 self.klexicons.append( KimmoLexicon(lexigroup, kimmoWords) )
1441 self.lexicalNodes.append(tmpnode)
1442
1443
1444
1445 self.processAlternations(alternationText)
1446
1447
1448
1449 return self.lexicalNodes
1450
1451
1452
1453
1454
1455
1456
1458 """
1459 Takes the currently typed in alternations and turns them from text into
1460 the kimmo alternation array.
1461 """
1462
1463 testalt = []
1464 self.kalternations = []
1465 altgroup = ''
1466 kimmoAlts = []
1467
1468 for line in text.split("\n"):
1469
1470 cleanLine = line.strip()
1471
1472 if len(cleanLine) == 0 : continue
1473 elif cleanLine[0] == '#' : continue
1474 elif cleanLine[0] == ';' : continue
1475 else:
1476
1477
1478 p = re.compile(r"\s+")
1479 items = []
1480 items = p.split(cleanLine)
1481
1482 for item in items:
1483 item_tmp = item.strip()
1484
1485
1486 if len(item_tmp) == 0 : continue
1487
1488 elif ':' in item_tmp :
1489
1490 if len(altgroup) > 0:
1491 if len(kimmoAlts) > 0:
1492 self.kalternations.append(
1493 KimmoAlternation(altgroup, kimmoAlts) )
1494
1495 self.validateAlternations.append(altgroup)
1496 for x in kimmoAlts: self.validateAlternations.append(x)
1497 self.lexicalNodes.append(tmpnode)
1498
1499
1500
1501 altgroup = cleanLine[0:len(item_tmp)-1]
1502 kimmoAlts = []
1503
1504 tmpnode = {}
1505 tmpnode['node'] = altgroup
1506 tmpnode['features'] = 'color=\"aquamarine2\", style=filled'
1507 tmpnode['edges'] = []
1508 tmpnode['edgenames'] = []
1509
1510
1511 else :
1512
1513 if (item_tmp[0] == '\'') and (item_tmp[len(item_tmp)-1] == '\''):
1514 item_tmp = item_tmp[1:(len(item_tmp)-1)]
1515
1516 elif item_tmp == 'None' : item_tmp = None
1517
1518
1519 kimmoAlts.append(item_tmp)
1520
1521
1522 tmpnode['edges'].append(item_tmp)
1523
1524 if len(altgroup) > 0:
1525 if len(kimmoAlts) > 0:
1526 self.kalternations.append(
1527 KimmoAlternation(altgroup, kimmoAlts) )
1528 self.validateAlternations.append(altgroup)
1529 for x in kimmoAlts: self.validateAlternations.append(x)
1530 self.lexicalNodes.append(tmpnode)
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1549 """
1550 Takes the currently typed in rules and processes them into the python kimmo
1551 format. expects rules to be in c version of .rul file format. needs to
1552 be file compatible.
1553 """
1554
1555 testrule = []
1556 self.krules = []
1557 self.ksubsets = []
1558 self.kdefaults = []
1559 self.boundary_char = ''
1560 setgroup = ''
1561 rulegroup = ''
1562 rulerowcnt = 0
1563 rulecolcnt = 0
1564 kimmoRule = []
1565
1566
1567
1568 ruleFrom = []
1569 ruleTo = []
1570 ruleTran = []
1571
1572 anyset = ['','','','']
1573
1574
1575 tmpnode = {}
1576 tmpnode['node'] = ''
1577 tmpnode['features'] = ''
1578 tmpnode['edges'] = []
1579 tmpnode['edgenames'] = []
1580
1581 tmpfsanodes = {}
1582 tmpfsanodes['nodes'] = []
1583 tmpfsanodes['name'] = ''
1584 self.fsasNodes = []
1585
1586
1587 for line in text.split("\n"):
1588
1589 cleanLine = line.strip()
1590
1591
1592
1593 if len(cleanLine) == 0 : continue
1594
1595
1596
1597 elif (cleanLine[0] == '#') and (anyset[3] != '#'): continue
1598 elif (cleanLine[0] == ';') and (anyset[3] != ';') : continue
1599 else:
1600
1601
1602 p = re.compile(r"\s+")
1603 items = []
1604 items = p.split(cleanLine)
1605
1606
1607
1608
1609 if items[0] == 'SUBSET':
1610 if items[1] == 'ALL': items[1] = '@'
1611 self.ksubsets.append(
1612 KimmoSubset(items[1], string.join(items[2:len(items)]," ") ))
1613
1614
1615
1616
1617 elif items[0] == 'ALPHABET': anyset[1] = string.join(items[1:len(items)]," ")
1618
1619 elif items[0] == 'ANY': anyset[0] = items[1]
1620
1621 elif items[0] == 'NULL': anyset[2] = items[1]
1622
1623
1624 elif items[0] == 'BOUNDARY':
1625 anyset[3] = items[1]
1626 self.boundary_char = items[1]
1627
1628 elif items[0] == 'DEFAULT':
1629 self.kdefaults = [ KimmoDefaults(string.join(items[1:len(items)]," ")) ]
1630
1631 elif items[0] == 'ARROWRULE':
1632
1633
1634
1635 self.krules.append(
1636 KimmoArrowRule(items[1], string.join(items[2:len(items)]," "))
1637
1638 )
1639
1640 elif items[0] == 'RULE':
1641
1642
1643 if rulegroup: self.guiError('error, fsa rule not finished')
1644
1645 rulecolcnt = string.atoi(items[len(items)-1])
1646 rulerowcnt = string.atoi(items[len(items)-2])
1647 rulegroup = string.join(items[1:len(items)-2])
1648
1649
1650
1651 tmpfsanodes = {}
1652 tmpfsanodes['nodes'] = []
1653 tmpfsanodes['name'] = rulegroup
1654
1655
1656 tmpnode = {}
1657 tmpnode['node'] = '0'
1658 tmpnode['features'] = 'color="indianred1", style=filled, shape=box'
1659 tmpnode['edges'] = []
1660 tmpnode['edgenames'] = []
1661
1662 tmpfsanodes['nodes'].append(tmpnode)
1663
1664
1665
1666 elif rulegroup:
1667
1668
1669
1670 ct = re.compile('[^0-9:\.]')
1671
1672
1673
1674
1675
1676 if ((':' in items[0]) or ('.' in items[0])) and (not ct.match(items[0])):
1677
1678
1679
1680 if (items[0][len(items[0])-1] == ':') : finalstate = True
1681 elif (items[0][len(items[0])-1] == '.') : finalstate = False
1682 else :
1683 self.guiError("FSA table failure -- 'final state defn'")
1684 continue
1685
1686 items[0] = items[0][0:len(items[0])-1]
1687
1688
1689 for x in range(rulecolcnt + 1): items[x] = string.atoi(items[x])
1690
1691
1692 kimmoRule.append((items[0], finalstate, items[1:len(items)]))
1693
1694
1695 tmpnode = {}
1696 tmpnode['node'] = str(items[0])
1697 tmpnode['features'] = 'shape=box, fillcolor="lavender blush", style=filled'
1698 if finalstate and (items[0] == 1):
1699 tmpnode['features'] = 'shape=circle, color="paleturquoise2", style=filled'
1700 elif (items[0] == 1):
1701 tmpnode['features'] = 'color="paleturquoise2", style=filled, shape=box'
1702 elif (finalstate):
1703 tmpnode['features'] = 'shape=circle,fillcolor="honeydew2", style=filled'
1704 tmpnode['edges'] = []
1705 tmpnode['edgenames'] = []
1706
1707
1708
1709 tmpitems = items[1:len(items)]
1710 for i in range(len(tmpitems)):
1711 if str(tmpitems[i]) in tmpnode['edges']:
1712
1713 for j in range(len(tmpnode['edges'])):
1714 if str(tmpnode['edges'][j]) == str(tmpitems[i]):
1715
1716 m = re.match(r"(^|\\n)([^\\]*)$", tmpnode['edgenames'][j])
1717
1718
1719
1720 if not m:
1721 tmpnode['edgenames'][j] += (',' + ruleTran[i])
1722 elif (len(m.group(2)) >= 15):
1723 tmpnode['edgenames'][j] += ('\\n ' + ruleTran[i])
1724 else:
1725 tmpnode['edgenames'][j] += (',' + ruleTran[i])
1726 else:
1727 tmpnode['edges'].append(str(tmpitems[i]))
1728 tmpnode['edgenames'].append(ruleTran[i])
1729
1730
1731 """
1732 for x in items[1:len(items)]:
1733 # go through and check, already added?
1734 # for i in range(len(tmpnode['edges'])):
1735 # if tmpnode['edges'][i] == x:
1736 # tmpnode['edgenames'][i] += "," +
1737
1738 tmpnode['edges'].append(str(x))
1739 for x in ruleTran: tmpnode['edgenames'].append(x)
1740 """
1741 tmpfsanodes['nodes'].append(tmpnode)
1742
1743
1744
1745 if ( items[0] == rulerowcnt):
1746 self.krules.append(
1747 KimmoFSARule(str(rulerowcnt)+':'+rulegroup, string.join(ruleTran," "), kimmoRule))
1748
1749
1750 self.fsasNodes.append(tmpfsanodes)
1751
1752
1753 rulegroup = ''
1754 rulerowcnt = 0
1755 rulecolcnt = 0
1756 ruleTran = []
1757 kimmoRule = []
1758
1759
1760 elif len(items) == rulecolcnt:
1761
1762 if len(ruleFrom) == 0: ruleFrom = items
1763 elif len(ruleTo) == 0: ruleTo = items
1764
1765
1766
1767 if (len(ruleTo) != rulecolcnt) or (len(ruleFrom) != rulecolcnt): continue
1768 else:
1769 for x in range(rulecolcnt):
1770 if ruleTo[x] == ruleFrom[x]: ruleTran.append(ruleTo[x])
1771 else:
1772 ruleTran.append(ruleFrom[x] + ':' + ruleTo[x])
1773
1774 ruleTo = []
1775 ruleFrom = []
1776
1777
1778 if (anyset[0] and anyset[1]):
1779 self.ksubsets.append(KimmoSubset(anyset[0], string.join(anyset[1:len(anyset)]," ") ))
1780
1781
1782
1783
1784
1785
1786
1788 """
1789 Input/Output character pair
1790 """
1791 - def __init__(self, input_subset, output_subset):
1792 self._input = input_subset
1793 self._output = output_subset
1794
1795
1797 - def output(self): return self._output
1798
1799
1801 sI = self.input()
1802 sO = self.output()
1803 s = sI + ':' + sO
1804 return s
1805
1806
1808 return (_classeq(self, other) and
1809 self._input == other._input and
1810 self._output == other._output)
1811
1812
1814 return hash( (self._input, self._output,) )
1815
1816
1817 - def matches(self, input, output, subsets, negatedOutputMatch=False):
1822
1823
1824 - def _matches(self, me, terminal, subsets):
1825 if (me == terminal): return True
1826 if (me[0] == '~'):
1827 m = me[1:]
1828 if (m in subsets):
1829 return not(terminal in subsets[m])
1830 else:
1831 return False
1832 if (me in subsets):
1833 return terminal in subsets[me]
1834 else:
1835 return False
1836
1837 _kimmo_terminal_regexp = '[a-zA-Z0-9\+\'\-\#\@\$\%\!\^\`\}\{]+'
1838 _kimmo_terminal_regexp_fsa = '[^:\s]+'
1839
1840 _kimmo_terminal_regexp_ext= '~?' + _kimmo_terminal_regexp
1841
1842 _kimmo_defaults = _kimmo_terminal_regexp + '|\:'
1843 _kimmo_defaults_fsa = _kimmo_terminal_regexp_fsa + '|\:'
1844 _kimmo_rule = _kimmo_terminal_regexp_ext + '|[\:\(\)\[\]\?\&\*\_]|<=>|==>|<==|/<='
1845
1846 _arrows = ['==>', '<=>', '<==', '/<=']
1847
1848
1849 _special_tokens = ['(', ')', '[', ']', '*', '&', '_', ':']
1850 _special_tokens.extend(_arrows)
1851 _non_list_initial_special_tokens = [')', ']', '*', '&', '_', ':']
1852 _non_list_initial_special_tokens.extend(_arrows)
1853
1854
1856 """Read the description, which should be in form [X|X:Y]+, and return a list of pairs"""
1857
1858 if token_type == 'FSA':
1859 desc = list(tokenize.regexp(description, _kimmo_defaults_fsa))
1860 else:
1861 desc = list(tokenize.regexp(description, _kimmo_defaults))
1862
1863 prev = None
1864 colon = False
1865 result = []
1866 for token in desc:
1867 if token == ':':
1868 if colon: raise ValueError('two colons in a row')
1869 if prev == None: raise ValueError('colon must follow identifier')
1870 colon = True
1871 elif colon:
1872 result.append(KimmoPair(prev, token))
1873 prev = None
1874 colon = False
1875 else:
1876 if prev:
1877 result.append(KimmoPair(prev, prev))
1878 prev = token
1879 colon = False
1880 if colon: raise ValueError('colon with no following identifier')
1881 if prev: result.append(KimmoPair(prev, prev))
1882 return result
1883
1884
1885
1887 - def __init__(self, name, description):
1891 - def name(self): return self._name
1893 - def subset(self): return self._subset
1896
1903 - def defaults(self): return self._defaults
1905 return '<KimmoDefaults %s>' % (self._description,)
1906
1908 - def pairs(self): raise RuntimeError('unimplemented: KimmoRule.pairs()')
1909 - def right_advance(self, current_states, input, output, subsets):
1910 raise RuntimeError('unimplemented: KimmoRule.right_advance()')
1911
1912
1914 """
1915 Two level rule
1916 """
1917
1918 - def leftFSA(self): return self._left_fsa
1919 - def rightFSA(self): return self._right_fsa
1920 - def pairs(self): return self._pairs
1921 - def arrow(self): return self._arrow
1922 - def lhpair(self): return self._lhpair
1923
1924 - def __init__(self, name, description):
1931
1933 return '<KimmoArrowRule %s: %s>' % (self._name, self._description)
1934
1935 - def advance(self, fsa, current_states, input, output, subsets):
1936 """Returns a tuple of (next_states, contains_halt_state)"""
1937 result = []
1938 contains_halt_state = False
1939 for current_state in current_states:
1940 for next_state in fsa.forward_traverse(current_state):
1941 ok = False
1942 for pair in fsa._labels[(current_state, next_state)]:
1943 if pair.matches(input, output, subsets):
1944 ok = True
1945 break
1946 if (ok):
1947 if (next_state in fsa.finals()): contains_halt_state = True
1948 if not(next_state in result): result.append(next_state)
1949 return (result, contains_halt_state)
1950
1951
1952 - def right_advance(self, current_states, input, output, subsets):
1954
1955 - def matches(self, input, output, subsets):
1956 """Does this rule's LHS match this input/output pair?
1957
1958
1959 If it doesn't, return None. If it does, return True if the rule must pass, False if the rule must fail."""
1960
1961
1962 if (self.arrow() == '==>'):
1963 if self.lhpair().matches(input, output, subsets):
1964 return True
1965 else:
1966 return None
1967 elif (self.arrow() == '<=='):
1968 if self.lhpair().matches(input, output, subsets, negatedOutputMatch=True):
1969 return False
1970 else:
1971 return None
1972 elif (self.arrow() == '/<='):
1973 if self.lhpair().matches(input, output, subsets, negatedOutputMatch=False):
1974 return False
1975 else:
1976 return None
1977 elif (self.arrow() == '<=>'):
1978 if self.lhpair().matches(input, output, subsets, negatedOutputMatch=False):
1979 return True
1980 elif self.lhpair().matches(input, output, subsets, negatedOutputMatch=True):
1981 return False
1982 else:
1983 return None
1984 else:
1985 raise RuntimeError('unknown arrow: '+self.arrow())
1986
1988
1989 (end_pair, tree) = self._parse_pair(tokens, 0)
1990 lhpair = self._pair_from_tree(tree)
1991 self._lhpair = lhpair
1992 self._pairs.add(lhpair)
1993
1994 end_arrow = self._parse_arrow(tokens, end_pair)
1995 (end_left, lfsa) = self._parse_context(tokens, end_arrow, True)
1996 end_slot = self._parse_slot(tokens, end_left)
1997 (end_right, rfsa) = self._parse_context(tokens, end_slot, False)
1998 if not(end_right == len(tokens)):
1999 raise ValueError('unidentified tokens')
2000
2001 self._left_fsa = lfsa
2002 self._right_fsa = rfsa
2003
2005 if i >= len(tokens):
2006 if raise_error:
2007 raise ValueError('ran off end of input')
2008 else:
2009 return None
2010 return tokens[i]
2011
2018
2020
2021 t1 = self._next_token(tokens, i, True)
2022 if t1 in _special_tokens: raise ValueError('expected identifier, not ' + t1)
2023 t2 = t1
2024 j = i + 1
2025 if self._next_token(tokens, j) == ':':
2026 t2 = self._next_token(tokens, j+1, True)
2027 if t2 in _special_tokens: raise ValueError('expected identifier, not ' + t2)
2028 j = j + 2
2029 tree = Tree('Pair', tokens[i:j])
2030 else:
2031 tree = Tree('Pair', [tokens[i]])
2032
2033 return (j, tree)
2034
2035
2037 self._arrow = self._next_token(tokens, i, True)
2038 if not(self.arrow() in _arrows):
2039 raise ValueError('expected arrow, not ' + self.arrow())
2040
2041 return i + 1
2042
2043
2045 slot = self._next_token(tokens, i, True)
2046 if slot != '_':
2047 raise ValueError('expected _, not ' + slot)
2048
2049 return i + 1
2050
2051
2052 - def _parse_context(self, tokens, i, reverse):
2053 (j, tree) = self._parse_list(tokens, i)
2054 if j == i: return (i, None)
2055
2056 sigma = set()
2057 self._collect_alphabet(tree, sigma)
2058 fsa = FSA(sigma)
2059 final_state = self._build_fsa(fsa, fsa.new_state(), tree, reverse)
2060 fsa.set_final([final_state])
2061
2062 dfa = fsa.dfa()
2063
2064 dfa.prune()
2065
2066 return (j, dfa)
2067
2068
2076
2077
2079
2080 t = self._next_token(tokens, i)
2081 if t == None or t in _non_list_initial_special_tokens:
2082
2083 return (i, None)
2084 (j, s) = self._parse_singleton(tokens, i)
2085 (k, r) = self._parse_list(tokens, j, type)
2086
2087 if r == None:
2088
2089 return (j, s)
2090 tree = Tree(type, [s, r])
2091
2092 return (k, tree)
2093
2094
2096
2097 t = self._next_token(tokens, i, True)
2098 j = i
2099 result = None
2100 if t == '(':
2101 (j, result) = self._parse_list(tokens, i + 1, 'Cons')
2102 if result == None: raise ValueError('missing contents of (...)')
2103 t = self._next_token(tokens, j, True)
2104 if t != ')': raise ValueError('missing final parenthesis, instead found ' + t)
2105 j = j + 1
2106 elif t == '[':
2107 (j, result) = self._parse_list(tokens, i + 1, 'Or')
2108 if result == None: raise ValueError('missing contents of [...]')
2109 t = self._next_token(tokens, j, True)
2110 if t != ']': raise ValueError('missing final bracket, instead found ' + t)
2111 j = j + 1
2112 elif t in _special_tokens:
2113 raise ValueError('expected identifier, found ' + t)
2114 else:
2115 (j, tree) = self._parse_pair(tokens, i)
2116 result = tree
2117 t = self._next_token(tokens, j)
2118 if t in ['*', '&', '?']:
2119 j = j + 1
2120 result = Tree(t, [result])
2121 return (j, result)
2122
2123
2124 - def _build_fsa(self, fsa, entry_node, tree, reverse):
2125 if tree.node == 'Pair':
2126 return self._build_terminal(fsa, entry_node, self._pair_from_tree(tree))
2127 elif tree.node == 'Cons':
2128 return self._build_seq(fsa, entry_node, tree[0], tree[1], reverse)
2129 elif tree.node == 'Or':
2130 return self._build_or(fsa, entry_node, tree[0], tree[1], reverse)
2131 elif tree.node == '*':
2132 return self._build_star(fsa, entry_node, tree[0], reverse)
2133 elif tree.node == '&':
2134 return self._build_plus(fsa, entry_node, tree[0], reverse)
2135 elif tree.node == '?':
2136 return self._build_qmk(fsa, entry_node, tree[0], reverse)
2137 else:
2138 raise RuntimeError('unknown tree node'+tree.node)
2139
2140
2142 new_exit_node = fsa.new_state()
2143 fsa.insert(entry_node, terminal, new_exit_node)
2144
2145 return new_exit_node
2146
2147
2152
2153
2162
2163
2165 node1 = fsa.new_state()
2166 node2 = self._build_fsa(fsa, node1, tree, reverse)
2167 node3 = fsa.new_state()
2168 fsa.insert(node, epsilon, node1)
2169 fsa.insert(node, epsilon, node3)
2170 fsa.insert(node2, epsilon, node1)
2171 fsa.insert(node2, epsilon, node3)
2172 return node3
2173
2174
2175 - def _build_seq(self, fsa, node, tree0, tree1, reverse):
2176 (d0, d1) = (tree0, tree1)
2177 if reverse: (d0, d1) = (d1, d0)
2178 node1 = self._build_fsa(fsa, node, d0, reverse)
2179 node2 = self._build_fsa(fsa, node1, d1, reverse)
2180
2181 return node2
2182
2183 - def _build_or(self, fsa, node, tree0, tree1, reverse):
2184 node0 = fsa.new_state()
2185 node1 = fsa.new_state()
2186 node2 = self._build_fsa(fsa, node0, tree0, reverse)
2187 node3 = self._build_fsa(fsa, node1, tree1, reverse)
2188 node4 = fsa.new_state()
2189 fsa.insert(node, epsilon, node0)
2190 fsa.insert(node, epsilon, node1)
2191 fsa.insert(node2, epsilon, node4)
2192 fsa.insert(node3, epsilon, node4)
2193 return node4
2194
2195
2197 - def __init__(self, name, pair_description, state_descriptions):
2198 self._name = name
2199 self._pairs = parse_pair_sequence(pair_description, 'FSA')
2200 self.transitions = {}
2201 self.is_final = {}
2202 self._state_descriptions = state_descriptions
2203
2204 for (index, is_final, next_state_array) in state_descriptions:
2205 if not(is_final == True or is_final == False):
2206 raise ValueError('each state description must take the form (index, True/False, [next_state_indices...]')
2207
2208 if len(next_state_array) != len(self.pairs()):
2209 raise ValueError('transition array of wrong size '+ str(len(next_state_array)) + ' ' + str(len(self.pairs())))
2210 self.transitions[index] = next_state_array
2211 self.is_final[index] = is_final
2212
2213 - def name(self): return self._name
2214 - def pairs(self): return self._pairs
2215 - def start(self): return self._state_descriptions[0][0]
2217
2218
2220 for i in indices:
2221 if self.is_final[i]: return True
2222 return False
2223
2224
2226
2227
2228
2229
2230 sorted_with_index = []
2231 for idx, pair in enumerate(self.pairs()):
2232
2233
2234 size1 = 1
2235 size2 = 1
2236 if pair.input() in subsets: size1 = len(subsets[pair.input()])
2237 if pair.output() in subsets: size2 = len(subsets[pair.output()])
2238
2239 sorted_with_index.append([idx,pair,size1,size2])
2240
2241 sorted_with_index.sort(lambda x,y: self.mycompare(x[2],y[2],x[3],y[3]) )
2242 return sorted_with_index
2243
2244
2245
2247 if x1 == y1: return x2-y2
2248 else: return x1-y1
2249
2250 - def right_advance(self, current_states, input, output, subsets):
2251
2252 next_states = []
2253 contains_halt_state = False
2254 for index in current_states:
2255
2256
2257
2258 any_next_state = ''
2259 next_state_isset = 0
2260 any_next_states_ary = []
2261
2262 for i, pair, size1, size2 in self.sorted_pairs(subsets):
2263
2264
2265
2266 if pair.matches(input, output, subsets):
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282 if 0:
2283
2284
2285
2286 if self.transitions.has_key(self.transitions[index][i]): ft = self.is_final[self.transitions[index][i]]
2287 else : ft = ''
2288 any_next_states_ary.append([ i, self.transitions[index][i], ft, pair.__repr__() ] )
2289 if not any_next_state:
2290 any_next_state = self.transitions[index][i]
2291
2292
2293 else:
2294
2295
2296
2297
2298 next_state_isset = 1
2299 next_state = self.transitions[index][i]
2300 if self.transitions.has_key(next_state):
2301 if not(next_state in next_states):
2302 next_states.append(next_state)
2303
2304 if self.is_final[next_state]: contains_halt_state = True
2305 break
2306
2307 return (next_states, contains_halt_state)
2308
2309
2311 return '<KimmoFSARule %s>' % (self.name(), )
2312
2313
2315 - def __init__(self, letters, gloss, next_alternation=None):
2319
2320
2322 return '<KimmoWord %s: %s>' % (self.letters(), self.gloss())
2323
2324
2325 - def letters(self): return self._letters
2326 - def gloss(self): return self._gloss
2328
2329
2335
2336
2338 return '<KimmoLexicon ' + self.name() + '>'
2339
2340
2341 - def name(self): return self._name
2342 - def words(self): return self._words
2343 - def trie(self): return self._trie
2344
2345
2347 if len(words) == 0: return ([], [])
2348 first_chars = {}
2349 for w in words:
2350 if len(w.letters()) <= word_position: continue
2351 fc = w.letters()[word_position]
2352 if first_chars.has_key(fc):
2353 first_chars[fc].append(w)
2354 else:
2355 first_chars[fc] = [ w ]
2356 sub_tries = []
2357 for c, sub_words in first_chars.items():
2358 sub_tries.append( (c, self.build_trie(sub_words, word_position+1)) )
2359 return ( [w for w in words if len(w.letters()) == word_position], sub_tries )
2360
2361
2363 - def __init__(self, name, lexicon_names):
2366
2369
2370
2371 - def name(self): return self._name
2373
2374
2376 - def __init__(self, alternations, lexicons, start='Begin'):
2377 self.alternations = {}
2378 self.lexicons = {}
2379 self._start = start
2380 for a in alternations: self.alternations[a.name()] = a
2381 for l in lexicons: self.lexicons[l.name()] = l
2382
2384 self.boundary = boundary_char
2385
2388
2389
2394
2395
2396
2397
2408
2410
2411 if name == None:
2412 return []
2413 elif self.alternations.has_key(name):
2414 result = []
2415 for ln in self.alternations[name].lexicon_names():
2416 result.extend(self._collect(ln))
2417 return result
2418 elif self.lexicons.has_key(name):
2419 return [ self.lexicons[name] ]
2420 else:
2421
2422 return []
2423
2425 result = []
2426
2427
2428 for s in state:
2429 if isinstance(s, KimmoLexicon):
2430
2431 (words, sub_tries) = s.trie()
2432 else:
2433 (words, sub_tries) = s
2434 for w in words:
2435 for v in self._advance_through_word(w, char):
2436 yield v
2437 for c, sub_trie in sub_tries:
2438 if c == char: result.append(sub_trie)
2439 if len(result) > 0:
2440 yield (result, [])
2441
2442
2443
2444
2452
2454 - def __init__(self, subsets, defaults, rules, null='0'):
2473
2474 - def rules(self): return self._rules
2475 - def subsets(self): return self._subsets
2478
2479 - def null(self): return self._null;
2480
2481
2482 - def _evaluate_rule_left_context(self, rule, input, output):
2483 fsa = rule.leftFSA()
2484 if fsa == None: return True
2485 states = [ fsa.start() ]
2486 i = len(input) - 1
2487 while i >= 0:
2488 next_states = []
2489 (result, contains_halt_state) = rule.advance(fsa, states, input[i], output[i], self.subsets())
2490 if contains_halt_state: return True
2491 for s in result:
2492 if not(s in next_states): next_states.append(s)
2493 if (len(next_states) == 0): return False
2494 states = next_states
2495 i = i - 1
2496 return False
2497
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538 - def _generate(self, input_tokens, position, rule_states, morphological_state, input, output, result_str, result_words,
2539 invert=False):
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549 if ((position >= len(input_tokens)) ):
2550
2551 if (self.debug) : print ' AT END OF WORD'
2552
2553
2554
2555
2556
2557
2558 if morphological_state:
2559
2560
2561 possible_next_input_chars = invert.possible_next_characters(morphological_state)
2562
2563
2564
2565 if ('0' in possible_next_input_chars) or ('#' in possible_next_input_chars):
2566 if '0' in possible_next_input_chars: boundary = '0'
2567 elif '#' in possible_next_input_chars: boundary = '#'
2568
2569
2570
2571
2572
2573
2574 for next_morphological_state, new_words in invert.advance(morphological_state, boundary):
2575
2576
2577
2578
2579
2580
2581
2582
2583 yield result_str, result_words + new_words
2584
2585
2586
2587 else:
2588
2589
2590 self._debug_print_input_and_output(position, rule_states, morphological_state, input, output, None, None, invert)
2591 for (start, rule, fsa_states, required_truth_value) in rule_states:
2592 if isinstance(rule, KimmoArrowRule):
2593 truth_value = False
2594 elif isinstance(rule, KimmoFSARule):
2595 truth_value = rule.contains_final(fsa_states)
2596
2597 if (required_truth_value != truth_value):
2598 if (self.debug):
2599 print ' BLOCKED by rule {%d %s %s}' % (start, rule, required_truth_value)
2600 print fsa_states
2601 break
2602 else:
2603 if 0:
2604 print ' passed rule {%d %s %s}' % (start, rule, required_truth_value)
2605
2606 else:
2607 if (self.debug):
2608 print ' SUCCESS!'
2609 yield result_str, result_words
2610 else:
2611 if morphological_state:
2612
2613 possible_next_input_chars = invert.possible_next_characters(morphological_state)
2614
2615
2616
2617
2618
2619 for pair_input, pair_output in self._pair_alphabet:
2620
2621 if (pair_input != self.null() and morphological_state):
2622
2623
2624 if not(pair_input in possible_next_input_chars):
2625 continue
2626
2627 if invert:
2628
2629 compare_token = pair_output
2630 else:
2631 compare_token = pair_input
2632
2633 if not(compare_token == self.null() or compare_token == input_tokens[position]): continue
2634
2635
2636 self._debug_print_input_and_output(position, rule_states, morphological_state,
2637 input, output, pair_input, pair_output, invert)
2638
2639
2640 fail = None
2641 next_rule_states = []
2642
2643
2644
2645 rule_state_debug = ' '
2646 for s in rule_states:
2647
2648
2649 (start, rule, fsa_state_set, required_truth_value) = s
2650
2651 current_state_str = '['
2652 for x in fsa_state_set: current_state_str += str(x)
2653 rule_state_debug += current_state_str
2654
2655 (next_fsa_state_set, contains_halt_state) = rule.right_advance(fsa_state_set, pair_input, pair_output,
2656 self.subsets())
2657
2658 current_state_str = ''
2659 for x in next_fsa_state_set: current_state_str += str(x)
2660 if not current_state_str: current_state_str = '0 (FAIL)'
2661 rule_state_debug += ('->' + current_state_str + '] ')
2662
2663 if (contains_halt_state == True and isinstance(rule, KimmoArrowRule)):
2664 if (required_truth_value == False):
2665 fail = s
2666 break
2667 else:
2668 if (0):
2669 print ' passed rule {%d %s %s}' % (start, rule, required_truth_value)
2670 elif (len(next_fsa_state_set) == 0):
2671
2672
2673
2674 if (required_truth_value == True):
2675 fail = s
2676 break
2677 else:
2678 if (0):
2679 print ' passed rule {%d %s %s}' % (start, rule, required_truth_value)
2680 else:
2681 next_rule_states.append( (start, rule, next_fsa_state_set, required_truth_value) )
2682
2683 if (self.debug) : print rule_state_debug
2684
2685 if (fail):
2686 if (self.debug):
2687 print ' BLOCKED by rule %s' % (fail,)
2688 continue
2689
2690
2691
2692 for rule in self.rules():
2693 if not(isinstance(rule, KimmoArrowRule)): continue
2694
2695 required_truth_value = rule.matches(pair_input, pair_output, self.subsets())
2696 if required_truth_value == None: continue
2697 left_value = self._evaluate_rule_left_context(rule, input, output)
2698 if (left_value == False):
2699 if (required_truth_value == True):
2700 fail = rule
2701 continue
2702
2703
2704 if (rule.rightFSA()):
2705 if (self.debug):
2706 print ' adding rule {%d %s %s}' % (position, rule, required_truth_value)
2707 next_rule_states.append( (position, rule, [ rule.rightFSA().start() ], required_truth_value) )
2708 else:
2709 if (required_truth_value == False):
2710 fail = rule
2711 continue
2712 else:
2713 if (0):
2714 print ' passed rule ' + str(rule)
2715
2716
2717 if (fail == None):
2718 new_position = position
2719 new_input = input + [pair_input]
2720 new_output = output + [pair_output]
2721 new_result_str = result_str
2722
2723 if (pair_input != self.null()):
2724 if invert:
2725 new_result_str = result_str + pair_input
2726 else:
2727 new_position = position + 1
2728 if (pair_output != self.null()):
2729 if invert:
2730 new_position = position + 1
2731 else:
2732 new_result_str = result_str + pair_output
2733
2734
2735
2736
2737 if morphological_state and pair_input != self.null():
2738 for next_morphological_state, new_words in invert.advance(morphological_state, pair_input):
2739
2740 for o in self._generate(input_tokens, new_position, next_rule_states, next_morphological_state,
2741 new_input, new_output, new_result_str,
2742 result_words + new_words,
2743 invert):
2744 yield o
2745 else:
2746 for o in self._generate(input_tokens, new_position, next_rule_states, morphological_state,
2747 new_input, new_output, new_result_str, result_words, invert):
2748 yield o
2749 else:
2750 if (self.debug):
2751 print ' BLOCKED by rule ' + str(fail)
2752
2755
2757 """Generator: yields output strings"""
2758 for o, w in self._generate(input_tokens, 0, self._initial_rule_states(), None, [], [], '', None):
2759 yield o
2760
2761
2762 - def recognize(self, input_tokens, morphology=None):
2763 """Recognizer: yields (input_string, input_words)"""
2764 morphology_state = None
2765 output_words = None
2766 invert = True
2767 if morphology:
2768 morphology_state = morphology.initial_state()
2769 output_words = []
2770 invert = morphology
2771
2772
2773 if not morphology_state:
2774 print "Bad Morphological State, failing recognition"
2775 return
2776 if (self.debug) : print 'recognize: ' + input_tokens
2777
2778 for o in self._generate(input_tokens, 0, self._initial_rule_states(), morphology_state, [], [], '',
2779 output_words, invert):
2780 yield o
2781
2782
2784 resultlist = '%s -> ' % (input,),
2785 padlevel = len(input) + 4
2786 padstring = ''
2787
2788
2789 tmplist = '%s' % ('***NONE***'),
2790 for o in s.generate(input):
2791 tmplist = '%s%s\n' % (padstring,o,),
2792 resultlist = resultlist + tmplist
2793 padstring = ''
2794 for x in range(padlevel): padstring = padstring + ' '
2795 tmplist = '%s' % (''),
2796 resultlist = resultlist + tmplist
2797
2798 return resultlist
2799
2800
2802 resultlist = '%s <- ' % (input,),
2803 padlevel = len(input) + 4
2804 padstring = ''
2805
2806
2807 tmplist = '%s' % ('***NONE***'),
2808 for o, w in s.recognize(input, morphology):
2809 if w:
2810
2811 tmplist = '\n %s %s \n' % (o, w),
2812 resultlist = resultlist + tmplist
2813 else:
2814 tmplist = '%s%s \n' % (padstring,o,),
2815 resultlist = resultlist + tmplist
2816
2817 padstring = ''
2818 for x in range(padlevel): padstring = padstring + ' '
2819 tmplist = '%s' % (''),
2820
2821
2822
2823 resultlist = resultlist + tmplist
2824
2825 return resultlist
2826
2844
2845
2846
2847
2848 if __name__ == '__main__':
2849 filename_lex = ''
2850 filename_rul = ''
2851 filename_batch_test = ''
2852 recognize_string = ''
2853 generate_string = ''
2854 console_debug = 0
2855
2856 for x in sys.argv:
2857
2858
2859
2860
2861 if ".lex" in x: filename_lex = x
2862 elif ".rul" in x: filename_rul = x
2863 elif ".batch" in x: filename_batch_test = x
2864 elif x[0:3] == "-r:": recognize_string = x[3:len(x)]
2865 elif x[0:3] == "-g:": generate_string = x[3:len(x)]
2866 elif x == "debug": console_debug = 1
2867
2868
2869 print 'Tips:'
2870 print 'kimmo.cfg is loaded by default, so if you name your project that, '
2871 print "it will be loaded at startup\n"
2872
2873 print 'For commandline operation:'
2874 print ' (for instance if you want to use a different editor)'
2875 print "To Recognize:"
2876 print " % python kimmo.py english.lex english.rul -r:cats"
2877 print "To Generate:"
2878 print " % python kimmo.py english.lex english.rul -g:cat+s"
2879 print "To Batch Test:"
2880 print " % python kimmo.py english.lex english.rul english.batch_test"
2881 print "With Debug and Tracing:"
2882 print " % python kimmo.py english.lex english.rul -r:cats debug\n"
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892 if (recognize_string or generate_string or filename_batch_test) and filename_rul:
2893 kimmoinstance = KimmoControl("","",filename_lex,filename_rul,console_debug)
2894
2895
2896 if not kimmoinstance.ok :
2897 print kimmoinstance.errors
2898 sys.exit()
2899
2900
2901 if recognize_string:
2902 recognize_results = kimmoinstance.recognize(recognize_string)
2903 print recognize_results
2904
2905 if generate_string:
2906 generate_results = kimmoinstance.generate(generate_string)
2907 print generate_results
2908
2909 if filename_batch_test:
2910 kimmoinstance.batch(filename_batch_test)
2911
2912 else:
2913 KimmoGUI(None, None)
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928