Package Pyblio :: Module Schema
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Schema

  1  # This file is part of pybliographer 
  2  #  
  3  # Copyright (C) 1998-2006 Frederic GOBRY 
  4  # Email : gobry@pybliographer.org 
  5  #           
  6  # This program is free software; you can redistribute it and/or 
  7  # modify it under the terms of the GNU General Public License 
  8  # as published by the Free Software Foundation; either version 2  
  9  # of the License, or (at your option) any later version. 
 10  #    
 11  # This program is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU General Public License for more details.  
 15  #  
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
 19  #  
 20   
 21  """ Schema definition for a pyblio database. When a database is 
 22  created, the schema is instantiated from a template. The user can then 
 23  customize it. 
 24   
 25  At the moment, a schema contains a dictionnary of known document 
 26  types. For each document, it is possible to know the mandatory and 
 27  optional fields that describe the document. These fields are typed. 
 28   
 29  """ 
 30   
 31  from gettext import gettext as _ 
 32   
 33  from xml.sax.saxutils import escape 
 34   
 35  from Pyblio.Attribute import N_to_C, C_to_N, Txo 
 36  from Pyblio import I18n, Compat 
 37   
38 -class SchemaError (Exception): pass
39
40 -class Schema (dict):
41
42 - def __init__ (self, file = None):
43 44 self.id = None 45 self.names = {} 46 self.txo = {} 47 48 if file: 49 tree = Compat.ElementTree.ElementTree (file = file) 50 self.xmlread (tree.getroot ()) 51 return
52
53 - def _name_get (self):
54 return I18n.lz.trn (self.names)
55 56 name = property (_name_get) 57 58
59 - def xmlread (self, tree):
60 self.id = tree.attrib.get('id', None) 61 62 for name in tree.findall ('./name'): 63 lang = name.attrib.get ('lang', '') 64 self.names [lang] = name.text 65 66 67 def parseattr (attr): 68 aid = attr.attrib ['id'] 69 70 try: 71 atype = N_to_C [attr.attrib ['type']] 72 except KeyError: 73 raise SchemaError ('attribute %s has an unknown type' % repr (aid)) 74 75 if atype is Txo: 76 a = TxoAttribute(aid) 77 else: 78 a = Attribute (aid) 79 80 a.type = atype 81 a.indexed = attr.attrib.get ('indexed', '0') == '1' 82 83 try: 84 mx = attr.attrib ['max'] 85 a.range = (1, int (mx)) 86 except KeyError: pass 87 88 for name in attr.findall ('name'): 89 lang = name.attrib.get ('lang', '') 90 a.names [lang] = name.text 91 92 a.xmlread(self, attr) 93 return a
94 95 for attr in tree.findall ('./attribute'): 96 a = parseattr (attr) 97 98 if self.has_key (a.id): 99 raise SchemaError ('duplicate attribute %s' % repr (a.id)) 100 101 for q in attr.findall ('./qualifiers/attribute'): 102 qa = parseattr (q) 103 if a.q.has_key (qa.id): 104 raise SchemaError ('duplicate qualifier %s for attribute %s' % ( 105 repr (qa.id), repr (a.id))) 106 107 a.q [qa.id] = qa 108 109 self [a.id] = a 110 111 112 # Read the Txo groups predefined in the schema itself 113 for attr in tree.findall ('./txo-group'): 114 g = TxoGroup() 115 g.xmlread(attr) 116 117 self.txo[g.group] = g 118 return
119 120
121 - def xmlwrite (self, fd, embedded = False):
122 123 if not embedded: 124 fd.write ('<?xml version="1.0" encoding="utf-8"?>\n\n') 125 126 fd.write ('<pyblio-schema') 127 if self.id: 128 fd.write(' id="%s"' % escape(self.id)) 129 fd.write('>\n') 130 131 132 keys = self.names.keys () 133 keys.sort () 134 135 for k in keys: 136 v = self.names [k] 137 if k: 138 lang = ' lang="%s"' % k 139 else: 140 lang = '' 141 142 fd.write (' <name%s>%s</name>\n' % ( 143 lang, escape (v.encode ('utf-8')))) 144 145 if keys: fd.write('\n') 146 147 keys = self.keys () 148 keys.sort () 149 150 for k in keys: 151 self[k].xmlwrite (fd) 152 fd.write('\n') 153 154 ks = self.txo.keys() 155 ks.sort() 156 157 for k in ks: 158 self.txo[k].xmlwrite(fd) 159 160 fd.write ('</pyblio-schema>\n') 161 return
162 163
164 -class Attribute(object):
165
166 - def __init__ (self, id):
167 168 self.id = id 169 170 self.type = None 171 172 self.range = (1, None) 173 174 self.names = {} 175 176 self.q = {} 177 return
178
179 - def __repr__ (self):
180 181 return 'Attribute (%s, %s, %s)' % ( 182 repr (self.id), repr (self.type), repr (self.q))
183 184
185 - def _name_get (self):
186 187 return I18n.lz.trn (self.names)
188 189 name = property (_name_get) 190
191 - def _xmlopen(self, fd, offset, **extra):
192 ws = ' ' * offset 193 194 names = self.names.keys () 195 names.sort () 196 197 if self.indexed: idx = ' indexed="1"' 198 else: idx = '' 199 200 if self.range [1] is None: card = "" 201 else: card = ' max="%d"' % self.range [1] 202 203 if extra: 204 extra = ' ' + ' '.join(['%s="%s"' % x for x in extra.iteritems()]) 205 else: 206 extra = '' 207 208 fd.write ('%s<attribute id="%s" type="%s"%s%s%s>\n' % ( 209 ws, self.id, C_to_N [self.type], card, idx, extra)) 210 211 for k in names: 212 v = escape (self.names [k].encode ('utf-8')) 213 if k: k = ' lang="%s"' % k 214 fd.write ('%s <name%s>%s</name>\n' % (ws, k, v)) 215 216 if self.q: 217 keys = self.q.keys () 218 keys.sort () 219 220 fd.write ('\n') 221 fd.write ('%s <qualifiers>\n' % ws) 222 for k in keys: self.q [k].xmlwrite (fd, offset = offset + 2) 223 fd.write ('%s </qualifiers>\n' % ws)
224 225
226 - def xmlread(self, schema, attr):
227 # We do not need to extract additional data from here 228 return
229
230 - def xmlwrite (self, fd, offset = 1):
231 232 ws = ' ' * offset 233 234 self._xmlopen(fd, offset) 235 236 fd.write ('%s</attribute>\n' % ws) 237 return
238 239
240 -class TxoAttribute(Attribute):
241
242 - def __repr__ (self):
243 244 return 'TxoAttribute (%s, %s, %s, %s)' % ( 245 repr (self.id), repr (self.type), repr (self.group), 246 repr (self.q))
247
248 - def xmlread(self, schema, attr):
249 # fetch the possible txo-items 250 self.group = attr.attrib ['group'] 251 252 g = TxoGroup() 253 g.group = self.group 254 255 schema.txo.setdefault(self.group, g) 256 return
257
258 - def xmlwrite (self, fd, offset=1):
259 260 ws = ' ' * offset 261 262 self._xmlopen(fd, offset, group=self.group) 263 264 fd.write ('%s</attribute>\n' % ws) 265 return
266 267
268 -class TxoItem (object):
269 270 """ Definition of a taxonomy item. 271 272 This item can then be reused as the argument for L{Attribute.Txo} 273 creation. A taxonomy item can be seen as a value in a enumeration 274 of possible values. Compared to a I{simple} enumeration, it has 275 the additional property of being hierachical. For instance, you 276 could define a taxonomy of document types:: 277 278 - publication 279 - article 280 - peer-reviewed 281 - not peer-reviewed 282 - conference paper 283 - unpublished 284 - report 285 286 ...and use this taxonomy to fill an attribute of your records. If 287 you use L{Pyblio.Query} to search for the item I{article}, you 288 will retrieve all the records which contain one of I{article}, 289 I{peer-reviewed} or I{not peer-reviewed}. 290 """ 291
292 - def __init__ (self):
293 294 self.id = None 295 self.group = None 296 self.parent = None 297 298 self.names = {} 299 return
300
301 - def _name_get (self):
302 303 return I18n.lz.trn (self.names)
304 305 name = property (_name_get) 306 307
308 - def xmlwrite (self, fd, space = ''):
309 310 keys = self.names.keys () 311 keys.sort () 312 313 for k in keys: 314 v = self.names [k] 315 if k: 316 lang = ' lang="%s"' % k 317 else: 318 lang = '' 319 320 fd.write (' %s<name%s>%s</name>\n' % ( 321 space, lang, escape (v.encode ('utf-8')))) 322 323 return
324
325 - def __repr__ (self):
326 327 return 'TxoItem(%s, %s)' % (repr(self.group), repr(self.id))
328 329
330 -class TxoGroup(dict):
331
332 - def __init__(self):
333 dict.__init__(self) 334 335 self.group = None 336 337 # the cache for searching by name 338 self._byname = {} 339 return
340
341 - def __repr__ (self):
342 return 'TxoGroup (%s)' % ( 343 repr (self.group))
344
345 - def byname (self, name):
346 return self._byname[name]
347
348 - def xmlread(self, attr):
349 # fetch the possible txo-items 350 self.group = attr.attrib['id'] 351 352 def nesting(tree, parent): 353 for item in tree.findall ('./txo-item'): 354 i = TxoItem () 355 356 i.id = int(item.attrib['id']) 357 i.parent = parent 358 i.group = self.group 359 360 for name in item.findall ('./name'): 361 lang = name.attrib.get ('lang', '') 362 i.names[lang] = name.text 363 364 if 'C' in i.names: 365 cname = i.names['C'] 366 if cname in self._byname: 367 raise SchemaError('name %r appears more than once' % cname) 368 369 self._byname[cname] = i 370 371 self[i.id] = i 372 373 nesting (item, i.id)
374 375 nesting(attr, None) 376 return
377 378
379 - def _reverse (self):
380 """ Create the reversed taxonomy tree """ 381 382 children = { None: [] } 383 384 for k in self.keys (): 385 children [k] = [] 386 387 for v in self.values (): 388 children [v.parent].append (v.id) 389 390 return children
391
392 - def expand (self, k):
393 """ Return a txo and all its children """ 394 395 children = self._reverse () 396 397 full = [] 398 for c in children [k]: 399 full = full + self.expand (c) 400 401 full.append (k) 402 403 return full
404 405
406 - def xmlwrite (self, fd, offset=1):
407 408 ws = ' ' * offset 409 410 if not self.keys(): return 411 412 fd.write ('%s<txo-group id="%s">\n' % (ws, self.group)) 413 414 children = self._reverse() 415 416 def subwrite (node, depth = 0): 417 child = self [node] 418 419 space = ' ' * (offset + depth) 420 421 fd.write (' %s<txo-item id="%d">\n' % ( 422 space, child.id)) 423 424 child.xmlwrite (fd, space) 425 426 for n in children [node]: 427 subwrite (n, depth + 1) 428 429 fd.write (' %s</txo-item>\n' % space) 430 return
431 432 for n in children [None]: 433 subwrite (n) 434 435 fd.write ('%s</txo-group>\n\n' % ws) 436 return 437