Package Pyblio :: Package Parsers :: Package Syntax :: Module XMLEndNote
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Parsers.Syntax.XMLEndNote

  1  # This file is part of pybliographer 
  2  #  
  3  # Copyright (C) 1998-2006 Frederic GOBRY 
  4  # Email : gobry@pybliographer.org 
  5  #           
  6  # This program is free software; you can redistribute it and/or 
  7  # modify it under the terms of the GNU General Public License 
  8  # as published by the Free Software Foundation; either version 2  
  9  # of the License, or (at your option) any later version. 
 10  #    
 11  # This program is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU General Public License for more details.  
 15  #  
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
 19  #  
 20   
 21  import string, re, StringIO, sys, logging 
 22   
 23  from xml import sax 
 24  from xml.sax.saxutils import escape, quoteattr 
 25   
 26  from Pyblio import Attribute, Store, Exceptions, Tools, Compat 
 27   
 28  from gettext import gettext as _ 
 29   
 30  # Unofficial mapping from EndNote type codes to type names 
 31  typemap = [ 
 32      (0  , "Journal Article"), 
 33      (1  , "Book"), 
 34      (2  , "Thesis"), 
 35      (3  , "Conference Proceedings"), 
 36      (4  , "Personal Communication"), 
 37      (5  , "Newspaper Article"), 
 38      (6  , "Computer Program"), 
 39      (7  , "Book Section"), 
 40      (8  , "Magazine Article"), 
 41      (9  , "Edited Book"), 
 42      (10 , "Report"), 
 43      (11 , "Map"), 
 44      (12 , "Audiovisual Material"), 
 45      (13 , "Artwork"), 
 46      (15 , "Patent"), 
 47      (16 , "Electronic Source"), 
 48      (17 , "Bill"), 
 49      (18 , "Case"), 
 50      (19 , "Hearing"), 
 51      (20 , "Manuscript"), 
 52      (21 , "Film or Broadcast"), 
 53      (22 , "Statute"), 
 54      (25 , "Figure"), 
 55      (26 , "Chart or Table"), 
 56      (27 , "Equation"), 
 57      (31 , "Generic"), 
 58  ] 
 59   
 60           
61 -class Reader(object):
62 63 # The official channel in which messages must be sent 64 log = logging.getLogger('pyblio.import.xmlendnote') 65 66 id2type = dict (typemap) 67
68 - def clean_tag (self, tag):
69 #dash cannot be called. convert to underscore. 70 #and assume case doesn't matter 71 return tag.lower ().replace ('-','_')
72
73 - def process_children (self, elem):
74 for cont in elem.getchildren (): 75 tag = self.clean_tag (cont.tag) 76 getattr (self, 'do_' + tag, self.do_default) (cont)
77
78 - def style_genocide (self, elem):
79 for ch in list (elem): 80 if ch.tag == "style": 81 if elem.text == None: elem.text = "" 82 elem.text += ch.text 83 else: 84 ch = self.style_genocide (ch) 85 return elem
86
87 - def record_begin (self):
88 pass
89
90 - def record_end (self):
91 pass
92
93 - def do_default (self, elem):
94 pass
95
96 - def add (self, field, value):
97 """ 98 Use this function to add anything to your record. It's auto-typeing, even 99 for Txo's. 100 """ 101 t = self.db.schema[field].type 102 103 if t == Attribute.Txo: 104 value = self.db.txo [field].byname (value) 105 106 self.record.add (field, value, t)
107
108 - def id_add (self, field, value):
109 """ 110 Deprecated: use L{add} instead. 111 """ 112 self.record.add (field, value, Attribute.ID)
113
114 - def text_add (self, field, value):
115 """ 116 Deprecated: use L{add} instead. 117 """ 118 self.record.add (field, value.text, Attribute.Text)
119
120 - def url_add (self, field, value):
121 """ 122 Deprecated: use L{add} instead. 123 """ 124 self.record.add (field, value, Attribute.URL)
125
126 - def person_add (self, field, value):
127 f = self.record.get (field, []) 128 129 130 def mkauthor (txt): 131 parts = map (string.strip, txt.text.split (',')) 132 if len (parts) == 2: 133 return Attribute.Person (last = parts [0], 134 first = parts [1]) 135 else: 136 return Attribute.Person (last = txt.text.strip ())
137 138 f += [ mkauthor (x) for x in value ] 139 140 self.record [field] = f 141 return
142
143 - def parse (self, fd, db):
144 self.db = db 145 146 rs = db.rs.add(True) 147 rs.name = _('Imported from XML EndNote') 148 149 for event, elem in Compat.ElementTree.iterparse (fd, events = ('end',)): 150 if elem.tag != 'RECORD' and elem.tag != 'record': continue 151 152 self.record = Store.Record () 153 self.record_begin () 154 155 for field in elem: 156 tag = self.clean_tag (field.tag) 157 elem = self.style_genocide (field) 158 getattr (self, 'do_' + tag, self.do_default) (field) 159 160 self.record_end () 161 162 if self.record is not None: 163 k = self.db.add (self.record) 164 rs.add(k) 165 166 elem.clear() 167 168 return rs
169 170
171 -class Writer(object):
172 173 # The official channel in which messages must be sent 174 log = logging.getLogger('pyblio.export.xmlendnote') 175 176 177 type2id = dict ([ (x [1], x [0]) for x in typemap ]) 178 179 _charref = re.compile (r'.*&#(\d+);') 180
181 - def _encode (self, txt):
182 183 txt = escape (txt).encode ('ascii', 'xmlcharrefreplace') 184 185 while 1: 186 d = self._charref.match (txt) 187 if d is None: break 188 189 s, e = d.start (1), d.end (1) 190 191 v = int (d.group (1)) 192 193 txt = txt [:s] + 'x%X' % v + txt [e:] 194 195 return txt.replace ('\n', '
')
196
197 - def text_add (self, text, tag):
198 199 text = self._encode ('\n'.join (text)) 200 201 if not text: return 202 203 self.fd.write ('<%s>%s</%s>' % (tag, text, tag)) 204 return
205
206 - def keywords_add (self, keywords):
207 208 209 txts = [] 210 for k in keywords: 211 k = self._encode (k) 212 if k: txts.append ('<KEYWORD>%s</KEYWORD>' % k) 213 214 if not txts: return 215 216 self.fd.write ('<KEYWORDS>') 217 for txt in txts: self.fd.write (txt) 218 self.fd.write ('</KEYWORDS>') 219 220 return
221
222 - def person_add (self, persons, tag = 'AUTHOR'):
223 224 txts = [] 225 226 for person in persons: 227 if person.first: 228 txt = '%s, %s' % (person.last, person.first) 229 else: 230 txt = person.last 231 232 if txt: txts.append ('<%s>%s</%s>' % ( 233 tag, self._encode (txt), tag)) 234 235 if not txts: return 236 237 self.fd.write ('<%sS>' % tag) 238 for txt in txts: self.fd.write (txt) 239 self.fd.write ('</%sS>' % tag) 240 241 return
242
243 - def header_add (self, key, reftype):
244 245 self.fd.write ('<REFERENCE_TYPE>%d</REFERENCE_TYPE>' % reftype) 246 self.fd.write ('<REFNUM>%d</REFNUM>' % key) 247 return
248 249
250 - def record_parse (self, record):
251 pass
252 253
254 - def write (self, fd, rs, db):
255 256 self.db = db 257 258 fd.write ('<XML><RECORDS>') 259 260 for r in rs.itervalues (): 261 262 self.fd = StringIO.StringIO () 263 264 self.record_parse (r) 265 266 record = self.fd.getvalue ().strip () 267 268 self.fd.close () 269 270 if not record: continue 271 272 fd.write ('<RECORD>%s</RECORD>' % record) 273 274 fd.write ('</RECORDS></XML>\n') 275 return
276