1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 from xml.dom.ext.reader import Sax2
25 from paradigmquery import ParadigmQuery
26 import re, os
27
29 """
30 Paradigm visualisation class
31
32 *Usage*
33
34 Simple usage of the system would be:
35 >>> from paradigm import Paradigm
36 >>> p = Paradigm('german.xml')
37 >>> p.show('table(case, gender/number, content)')
38
39 Here, a table is generated in HTML format and sent to the file ``output.html``.
40 The table can be viewed in a browser, and is updated for every new query.
41
42 A more advanced usage of the system is show below.
43 The user simply creates a paradigm p, changes the output format and location,
44 and calls a dedicated prompt to enter the query:
45 >>> from paradigm import Paradigm
46 >>> p = Paradigm('german.xml')
47 >>> p.setFormat('html')
48 >>> p.setOutput('test.html')
49 >>> p.setCSS('simple.css')
50 >>> p.prompt()
51 > table(case, gender/number, content)
52
53 Please note, however, that plain text tables have not yet been implemented.
54 """
55
57 """
58 Load the given paradigm
59 p_filename is a string representing the filename of a paradigm xml file
60 """
61
62 self.loadParadigm(p_filename)
63
64 self.format = "html"
65 self.output = "output.html"
66 self.css = "simple.css"
67
69 """
70 Changes to a dedicated prompt
71 Type 'exit' or 'quit' to exit
72 """
73 s = ""
74 while s != "exit":
75 s = "exit"
76 try: s = raw_input(">")
77 except EOFError:
78 print s
79 if s == "exit":
80 return
81 if s == "quit":
82 return
83 if s:
84 while s[-1] in "!.": s = s[:-1]
85 self.show(s)
86
87 - def show(self, p_string):
88 """
89 Process and display the given query
90 """
91
92 try:
93
94 parse = ParadigmQuery(p_string)
95 except:
96 print "Could not parse query."
97 return
98
99 try:
100
101 result = Sentence(self, parse.getTree())
102
103 if result == None:
104 raise Error
105 except:
106 print "Sorry, no result can be returned"
107 return
108
109 try:
110
111 if self.format == "html":
112 output = '<html>\n'
113
114 if self.css <> None:
115 output += '<link rel="stylesheet" href="'
116 output += self.css
117 output += '" type="text/css" media="screen" />\n'
118 output += '<body>'
119 output += "<table cellspacing=\"0\" cellpadding=\"0\">"
120 output += result.getHTML()
121 output += "</table>\n"
122 output += '</body></html>\n'
123 else:
124 output = result.getText()
125 except:
126 output = None
127 print "--no output--"
128 return
129
130
131 if self.output == "term":
132 print output
133 else:
134 print "Output written to file:", self.output
135 f = open(self.output, 'w')
136 f.write(output)
137
138
139 return
140
158
159 - def setCSS(self, p_string=None):
160 """
161 Set the file location for a Cascading Stylesheet: None or filename
162 This allows for simple formatting
163 """
164 if p_string <> None:
165 print "Using CSS file:", p_string
166 self.output = p_string
167
169 """
170 Set the output location: "term" or filename
171 """
172
173 if p_string == None:
174 p_string = "term"
175
176 if p_string == "term":
177 print "Directing output to terminal"
178 else:
179 print "Directing output to file:", p_string
180 self.output = p_string
181
182
184 """
185 Load the given paradigm (XML file)
186 Attributes are stored in self.attributes
187 Data are stored in self.data
188
189 They can be accessed as follows:
190 self.attributes['gender'] # list of genders
191 self.data[6]['gender'] # gender for the sixth data object
192 self.data[6]['content'] # content for the sixth data object
193 """
194
195 from nltk_lite.corpora import get_basedir
196 basedir = get_basedir()
197
198
199 try_filename = os.path.join(get_basedir(), "paradigms", p_filename)
200 try:
201 f = open(try_filename)
202 p_filename = try_filename
203 except IOError:
204 print "Cannot find file"
205 return None
206 f.close()
207
208
209 self.attributes = {}
210 self.data = []
211
212
213 reader = Sax2.Reader()
214 doc = reader.fromStream(p_filename)
215
216
217
218 attributes = doc.getElementsByTagName('attributes')[0]
219 for name in attributes.getElementsByTagName('name'):
220
221
222 tmp_list = []
223
224
225 for value in name.getElementsByTagName('value'):
226 tmp_list.append(value.getAttribute('value'))
227
228
229 self.attributes[name.getAttribute('name')] = tmp_list
230
231
232
233
234 forms = doc.getElementsByTagName('paradigm')[0]
235 for form in forms.getElementsByTagName('form'):
236
237 tmp_dict = {}
238 for value in form.getElementsByTagName('attribute'):
239 tmp_dict[value.getAttribute('name')] = value.getAttribute('value')
240
241 self.data.append(tmp_dict)
242
243
244 print "Paradigm information successfully loaded from file:", p_filename
245
246 print " "*4 + str(len(self.attributes)) + " attributes imported:",
247 for att in self.attributes:
248 print att,
249 print
250
251 print " "*4 + str(len(self.data)) + " paradigm objects imported."
252
253 return
254
256 """
257 Manages any operation
258 Passes request onto other handlers if necessary
259 """
260
261 - def __init__(self, p_paradigm, p_tree):
280
282 """
283 Returns values in the form of a list
284 """
285 if self.tree == None:
286 return None
287 return self.item.getList()
288
290 """
291 Returns values in html (table) form
292 """
293 return self.item.getHTML()
294
296 """
297 Returns values in html (table) form
298 """
299 return self.item.getHorizontalHTML(p_parentSpan)
300
302 """
303 Returns values in plain text form
304 """
305 return self.item.getText()
306
308 """
309 Return a list of conditions for each combination (cell)
310 """
311 return self.item.getConditions()
312
314 """
315 Returns the width in number of characters
316 """
317 return self.item.getMaxWidth()
318
320 """
321 Returns the span (requred for "rowspan" and "colspan" HTML attributes)
322 """
323 return self.item.getSpan()
324
326 """
327 Get the depth
328 """
329 return self.item.getDepth()
330
332 """
333 Determine the type of the current node of the tree
334 This need not be overridden
335 """
336 if p_tree == None:
337 p_tree = self.tree
338
339 return str(p_tree)[1:2]
340
341 -class Domain(Sentence):
342 """
343 Manages a domain operation
344
345 Provides: Domain(paradigm,tree)
346 """
347 - def __init__(self, p_paradigm, p_tree):
348 """
349 p_paradigm is the given paradigm (attributes and data)
350 p_tree is the query tree
351 """
352 self.paradigm = p_paradigm
353
354 assert self.getType(p_tree) == 'D'
355
356 self.attribute = p_tree[0]
357 self.error = None
358
359 try:
360 self.paradigm.attributes[self.attribute]
361 except KeyError:
362 self.error = "I couldn't find this attribute: " + self.attribute
363 print self.error
364
365 - def __getitem__(self, p_index):
366 return self.paradigm.attributes[self.attribute][p_index]
367
369 """
370 Return the domain in list form
371 """
372 return self.paradigm.attributes[self.attribute]
373
375 """
376 Return html for this domain
377 """
378 ret_string = ""
379 for item in self.getList():
380 ret_string += "<tr><td>" + item + "</td></tr>"
381 return ret_string
382
383 - def getHorizontalHTML(self,p_parentSpan=1):
384 """
385 Return a horizontal html table
386 """
387 ret_string = ""
388 for item in self.getList():
389 ret_string += "<td>" + item + "</td>"
390 return "<tr>" + ret_string*p_parentSpan + "</tr>"
391
392
394 """
395 Return text for this domain
396 """
397 ret_string = ""
398 for item in self.getList():
399 ret_string += item + "\n"
400 return ret_string
401
402 - def getConditions(self):
403 """
404 Return a list of conditions for each combination (cell)
405 """
406 ret_conds = []
407 for item in self.getList():
408 new = {self.attribute: item}
409
410 ret_conds.append(new)
411 return ret_conds
412
413 - def getMaxWidth(self):
414 """
415 Get max width (chars) for display purposes
416 """
417 max_width = 0
418 for item in self.getList():
419 if max_width < len(item):
420 max_width = len(item)
421 return max_width
422
424 """
425 Get the span of this domain (number of elements)
426 """
427 return len(self.getList())
428
429 - def getDepth(self):
430 """
431 Get the depth of this domain (always one!)
432 """
433 return 1
434
436 """
437 Manages a hierarchy operation
438
439 Provides: Hierarchy(paradigm,tree)
440 """
441 - def __init__(self, p_paradigm, p_tree):
442 """
443 p_paradigm is the given paradigm (attributes and data)
444 p_tree is the tree representation of this part of the query (Tree)
445 """
446 self.paradigm = p_paradigm
447 self.error = None
448
449 self.tree = p_tree
450
451 assert self.getType(p_tree) == 'H'
452
453 assert self.getType(p_tree[0]) == 'D'
454
455 self.root = Domain(self.paradigm, p_tree[0])
456 self.leaf = Sentence(self.paradigm, p_tree[1])
457
458
460 """
461 Return the hierarchy in list form
462 """
463
464 rootList = self.root.getList()
465 leafList = self.leaf.getList()
466
467
468 ret_val = []
469 for item_root in rootList:
470 for item_leaf in leafList:
471 ret_val.append([item_root,item_leaf])
472
473 return ret_val
474
476 """
477 Return a html table for this hierarchy
478 """
479 ret_string = ""
480 for index in range(len(self.root.getList())):
481 leafCells = self.leaf.getHTML()[4:]
482 ret_string += "<tr><td rowspan=\"" + str(self.leaf.getSpan()) + "\">" + self.root[index] \
483 + "</td>" + leafCells
484 return ret_string
485
487 """
488 Return a horizontal html table
489 """
490 ret_string = ""
491
492 for index in range(len(self.root.getList())):
493 ret_string += "<td colspan=\"" + str(self.leaf.getSpan()) + "\">" \
494 + self.root[index] + "</td>"
495
496 leafCells = self.leaf.getHorizontalHTML(p_parentSpan*len(self.root.getList()))
497
498 return "<tr>" + ret_string*p_parentSpan + "</tr>" + leafCells
499
501 """
502 Return text for this hierarchy
503 """
504 ret_string = ""
505
506 max_width_root = self.root.getMaxWidth()
507 max_width_leaf = self.leaf.getMaxWidth()
508
509
510 for index in range(len(self.root.getList())):
511 ret_string += self.root[index].ljust(max_width_root) + " " \
512 + self.leaf.getText().ljust(max_width_leaf).replace('\n',"\n" \
513 + " "*(max_width_root+1)) + "\n"
514
515 re_blank = re.compile('\n[ ]+\n')
516 return re_blank.sub('\n',ret_string)
517
519 """
520 Return a list of conditions for each combination (cell)
521 """
522 ret_conds = []
523
524 for item_r in self.root.getList():
525
526 for cond_l in self.leaf.getConditions():
527
528 cond_l[self.root.attribute] = item_r
529
530 ret_conds.append(cond_l)
531
532 return ret_conds
533
539
541 """
542 Get the depth of this hierarchy
543 """
544 return 1 + self.leaf.getDepth()
545
547 """
548 Get the span (for HTML tables) of this hierarchy
549 """
550 return self.root.getSpan() * self.leaf.getSpan()
551
553 """
554 Manages a table operation
555
556 Provides: Table(paradigm,tree)
557 """
558 - def __init__(self, p_paradigm, p_tree):
559 """
560 p_paradigm is the given paradigm (attributes and data)
561 p_tree is the tree representation of this part of the query (Tree)
562 """
563 self.paradigm = p_paradigm
564 self.error = None
565
566 self.tree = p_tree
567
568 assert self.getType(p_tree) == 'T'
569
570 self.horizontal = Sentence(self.paradigm, p_tree[0])
571 self.vertical = Sentence(self.paradigm, p_tree[1])
572 self.cells = Sentence(self.paradigm, p_tree[2])
573
574
576 """
577 Return the table (cells) in list form
578 """
579 ret_val = []
580 return ret_val
581
583 """
584 Return a html table for this table operation
585 """
586
587 dead_cell = "<tr><td colspan=\"" + str(self.vertical.getDepth()) \
588 + "\" rowspan=\"" + str(self.horizontal.getDepth()) \
589 + "\"></td>"
590
591 horizontal_header = self.horizontal.getHorizontalHTML()[4:].replace('td','th')
592
593
594 vertical_header = self.vertical.getHTML().replace('td','th')
595 str_cells = ""
596
597 conditions = {}
598
599 conditions_v = self.vertical.getConditions()
600
601 for cond_v in conditions_v:
602 str_cells += "<tr>"
603
604 conditions_h = self.horizontal.getConditions()
605
606 for cond_h in conditions_h:
607
608 cell_data = self.getData(self.cells.tree, dictJoin(cond_v,cond_h))
609
610 str_cells += "<td>" + cell_data + "</td>"
611
612 str_cells += "</tr>"
613
614
615
616 vertical_header_rows = vertical_header.split('</tr>')
617 cell_rows = str_cells.replace('<tr>','').split('</tr>')
618
619 zipped = zip(vertical_header_rows, cell_rows)
620 str_zipped = ""
621 for (header,cells) in zipped:
622 if header <> '':
623 str_zipped += header + cells + "</tr>\n"
624
625
626 return dead_cell + horizontal_header + str_zipped
627
629 """
630 Return a horizontal html table (?)
631 """
632 print "?: getHorizontalHTML() called on a table."
633 return None
634
636 """
637 Return text for this table (?)
638 """
639 print "?: getText() for a table? HAHAHAHAHA"
640 print "call setFormat('html') if you want to run queries like that"
641 return
642
644 """
645 Return conditions for this table (?)
646 """
647 print "?: getConditions() called on a table. I don't think so."
648 return None
649
651 """
652 Return the maximum width this table could take up.
653 ... I hope you're not trying to nest tables ...
654 """
655 return self.cells.getMaxWidth() + self.vertical.getMaxWidth() + 1
656
658 """
659 Return span for this table (?)
660 """
661 print "WTF: getSpan() called on a table."
662 return None
663
664 - def getData(self, p_return, p_attDict):
665 """
666 Retrieve data that matches the given list of attributes
667 Returns (an HTML) string of values that match.
668
669 p_return is a tree pointing to the key of the value to include in the return
670 p_attDict is a dictionary of conditions.
671 """
672 output = []
673 return_key = p_return.leaves()[0]
674
675
676 for datum in self.paradigm.data:
677 inc = True
678
679 for att in p_attDict.keys():
680
681 if datum[att] != p_attDict[att]:
682 inc = False
683 break
684
685 if inc == True:
686 output.append(datum[return_key])
687
688
689 if len(output) == 1:
690 return output[0]
691 else:
692
693
694 ret_str = "<table>"
695 for item in output:
696 ret_str += "<tr><td>" + item + "</td></tr>"
697 ret_str += "</table>"
698 return ret_str
699
700
702 """
703 A handy function to join two dictionaries
704 If there is any key overlap, dict1 wins!
705 (just make sure this doesn't happen)
706 """
707 for key in dict1.keys():
708 dict2[key] = dict1[key]
709 return dict2
710
712
713
714 print """
715 ================================================================================
716 Load: Paradigm(file)
717 ================================================================================
718 """
719 print
720 print ">>> a = Paradigm('german.xml')"
721 print
722 a = Paradigm('german.xml')
723 print
724 print ">>> a.setOutput('term')"
725 print
726 a.setOutput('term')
727 print
728 print ">>> a.setFormat('text')"
729 print
730 a.setFormat('text')
731
732
733 print """
734 ================================================================================
735 Domain: case
736 ================================================================================
737 """
738 print
739 print ">>> a.show('case')"
740 print
741 a.show('case')
742
743
744 print """
745 ================================================================================
746 Hierarchy: case/gender
747 ================================================================================
748 """
749 print
750 print ">>> a.show('case/gender')"
751 print
752 a.show('case/gender')
753
754
755 print """
756 ================================================================================
757 Table: table(case/number,gender,content)
758 ================================================================================
759 """
760 print
761 print ">>> a.setOutput('demo.html')"
762 print
763 a.setOutput('demo.html')
764 print
765 print ">>> a.setFormat('html')"
766 print
767 a.setFormat('html')
768 print
769 print ">>> a.show('table(case/number,gender,content)')"
770 print
771 a.show('table(case/number,gender,content)')
772
773
774 print
775
776 if __name__ == '__main__':
777 demo()
778