Package Pyblio :: Module Store
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Store

  1  # This file is part of pybliographer 
  2  #  
  3  # Copyright (C) 1998-2006 Frederic GOBRY 
  4  # Email : gobry@pybliographer.org 
  5  #           
  6  # This program is free software; you can redistribute it and/or 
  7  # modify it under the terms of the GNU General Public License 
  8  # as published by the Free Software Foundation; either version 2  
  9  # of the License, or (at your option) any later version. 
 10  #    
 11  # This program is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU General Public License for more details.  
 15  #  
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
 19  #  
 20   
 21   
 22  ''' 
 23  Overview 
 24  ======== 
 25   
 26    Contains the base classes and interfaces used to define a database of records. 
 27     
 28    The databases can be managed in different L{physical stores 
 29    <Pyblio.Stores>}.  To create a new database, get a specific store 
 30    implementation with the L{get <Pyblio.Store.get>} function, and call 
 31    the provided L{dbcreate <Pyblio.Stores.filestore.dbcreate>} function: 
 32     
 33      >>> db = get ('file').dbcreate (path, schema) 
 34     
 35    Once this is done, the database is ready to accept L{records 
 36    <Pyblio.Store.Record>}: 
 37   
 38      >>> record = Store.Record() 
 39      >>> record.add('title', u'my title', Attribute.Text) 
 40      >>> key = db.add(record) 
 41   
 42    @see: the L{Database} class to know what operations can be performed 
 43    on databases. 
 44  ''' 
 45   
 46  import os, string, copy, logging, warnings 
 47   
 48  from xml import sax 
 49  from xml.sax.saxutils import escape, quoteattr 
 50   
 51  from gettext import gettext as _ 
 52   
 53  from Pyblio import Schema, Attribute, Exceptions, I18n, Compat 
 54   
 55   
56 -class StoreError (Exception):
57 """ Generic error occuring while accessing a database storage """ 58 59 pass
60 61
62 -class Key (int):
63 64 ''' A key that uniquely identifies a record in a database. 65 66 @note: this class is shared by all backend stores. 67 ''' 68 69 pass
70 71
72 -class Record (dict):
73 74 """ 75 A database record. 76 77 It behaves like a dictionnary, which returns a B{list} of 78 attributes for each key. The attributes types depend on the 79 database L{Schema <Pyblio.Schema>}. 80 81 As a convenience, it is possible to use L{Record.add} to build up 82 a Record, instead of setting its fields manually. 83 84 @ivar key: the key of the record, unique over the whole 85 database. It is generated by the actual storage layer. This key 86 has only an internal meaning. Do not expose it. 87 88 @type key: instance of L{Key <Pyblio.Store.Key>} 89 90 @note: this class is shared by all stores 91 """ 92
93 - def __init__(self):
94 self.key = None 95 return
96
97 - def get(self, key, default=None):
98 """ Get a field, understanding the dotted notation of the 99 L{add} method""" 100 if '.' not in key: 101 return dict.get(self, key, default) 102 103 l, r = key.split('.') 104 try: 105 return self[l][0].q[r] 106 except (KeyError, IndexError): 107 return default
108 109
110 - def xmlwrite(self, fd, offset=1):
111 """ Export as XML. 112 113 Writes the content of the record as an XML fragment. 114 115 @param fd: file descriptor to write to. 116 """ 117 118 ws = ' ' * offset 119 120 fd.write (ws + '<entry id=%s>\n' % quoteattr (str (self.key))) 121 122 keys = self.keys () 123 keys.sort () 124 125 for k in keys: 126 127 fd.write (ws + ' <attribute id=%s>\n' % quoteattr (k)) 128 129 for v in self [k]: 130 v.xmlwrite (fd, offset + 2) 131 fd.write ('\n') 132 fd.write (ws + ' </attribute>\n') 133 134 fd.write (ws + '</entry>\n') 135 return
136
137 - def add(self, field, value, constructor=None):
138 """ 139 Adds a new value to a field of this record. 140 141 This function allows you to add an item to a record. It 142 converts the specified 'value' by calling 'constructor' on it, 143 and appends the resulting attribute to the record. 144 145 If you specify something like 'a.b' in fields, the 'b' 146 qualifier for field 'a' is set, for the last 'a' added. It is 147 possible, if you know that you will only have B{one} 'a', to 148 set 'a.b' before 'a'. 149 150 Example: 151 152 >>> rec.add ('title', u'My title', Attribute.Text) 153 >>> rec.add ('title.subtitle', u'My subtitle', Attribute.Text) 154 155 >>> rec.add ('author', definition, author_parser) 156 157 158 @param field: the field we want to add in the record 159 @type field: a string, possibly containing a '.' in the case of structured attributes 160 161 @param value: the 'source' value to set in the record. This 162 value has not yet been converted into an 163 L{Pyblio.Attribute} instance. 164 165 @param constructor: a function that will turn a 'value' into a 166 proper attribute. 167 """ 168 169 if value is None: 170 return 171 172 def generate(value, typ): 173 """ 174 Constructs type with value. Effects neccessary dict-conversion 175 operations 176 """ 177 if isinstance (value, Attribute._Qualified): 178 #is already of Attribute.XXX-type, so don't do anything. 179 return value 180 else: 181 if type (value) is dict: 182 return typ (**value) 183 else: 184 return typ (value)
185 186 187 if not '.' in field: 188 f = self.get (field, []) 189 190 if f and type(f [-1]) == Attribute.UnknownContent: 191 q = f [-1].q 192 f [-1] = generate (value, constructor) 193 f [-1].q = q 194 return 195 196 f = self.get (field, []) 197 198 f.append(generate(value, constructor)) 199 self [field] = f 200 201 else: 202 main, sub = field.split ('.') 203 204 f = self.get (main, None) 205 206 if not f: 207 self [main] = [Attribute.UnknownContent ()] 208 f = self [main] 209 210 upd = f [-1].q.get (sub, []) 211 upd.append (generate (value, constructor)) 212 f [-1].q [sub] = upd
213
214 - def deep_equal(self, other):
215 if not isinstance (other, Record): return False 216 217 for k in self: 218 if not k in other or not len (self [k]) == len (other [k]): 219 return False 220 221 for x, y in zip (self [k], other [k]): 222 if not x.deep_equal (y): 223 return False 224 225 for k in other: 226 if not k in self: 227 return False 228 229 return True
230 231 # -------------------------------------------------- 232
233 -class View(object):
234 235 """ A view of a Result Set represents the Result Set sorted 236 according to a specific criterion. 237 238 DERIVED BY ALL STORES 239 """ 240
241 - def __iter__ (self):
242 raise NotImplemented ('please override')
243
244 - def itervalues (self):
245 raise NotImplemented ('please override')
246
247 - def iterkeys (self):
248 raise NotImplemented ('please override')
249
250 - def iteritems (self):
251 raise NotImplemented ('please override')
252
253 - def __len__ (self):
254 raise NotImplemented ('please override')
255
256 - def __getitem__ (self, idx):
257 raise NotImplemented ('please override')
258
259 - def index(self, key):
260 raise NotImplemented ('please override')
261 262 263 # -------------------------------------------------- 264
265 -class ResultSet(object):
266 267 """ A set of keys from the database. 268 269 These sets can be manually managed by the user or be the result of 270 a query. They can be made persistent, and are then stored along 271 with the database. 272 273 @note: this class is usually derived by every backend store. 274 """ 275 276
277 - def add(self, k):
278 """ Add a new item in the set. 279 280 @param k: the key to add to the set 281 @type k: instance of L{Key} 282 """ 283 raise NotImplemented ('please override')
284
285 - def __delitem__(self, k):
286 """ Remove an item from the set. 287 288 @param k: the key to remove from the set 289 @type k: instance of L{Key} 290 """ 291 raise NotImplemented ('please override')
292
293 - def __iter__(self):
294 raise NotImplemented ('please override')
295
296 - def itervalues(self):
297 raise NotImplemented ('please override')
298
299 - def iterkeys(self):
300 raise NotImplemented ('please override')
301
302 - def iteritems(self):
303 raise NotImplemented ('please override')
304
305 - def __len__(self):
306 raise NotImplemented ('please override')
307
308 - def destroy(self, k):
309 """ Delete B{all the records} contained in the result set.""" 310 raise NotImplemented ('please override')
311
312 - def has_key(self):
313 raise NotImplemented ('please override')
314
315 - def view(self, criterion):
316 raise NotImplemented ('please override')
317
318 - def xmlwrite(self, fd):
319 320 if self.name: 321 name = ' name=%s' % quoteattr (self.name.encode ('utf-8')) 322 else: 323 name = '' 324 325 fd.write (' <resultset id="%d"%s>\n' % (self.id, name)) 326 327 for v in self: 328 fd.write (' <ref id="%d"/>\n' % v) 329 330 fd.write (' </resultset>\n') 331 return
332 333
334 -class ResultSetStore (object):
335 336 """ Interface to the stored result sets. 337 338 DERIVED BY ALL STORES 339 """ 340
341 - def __getitem__ (self, k):
342 raise NotImplemented ('please override')
343
344 - def __delitem__ (self, k):
345 raise NotImplemented ('please override')
346
347 - def __iter__ (self):
348 raise NotImplemented ('please override')
349
350 - def new(self, rsid=None):
351 raise NotImplemented ('please override')
352
353 - def _add_warn(self):
354 warnings.warn('db.rs.add() is deprecated. please use db.rs.new()', 355 DeprecationWarning, stacklevel=3) 356 # ensure we get called only once 357 ResultSetStore._add_warn = lambda self: None
358
359 - def add(self, permanent=False, rsid=None):
360 self._add_warn() 361 return self.new(rsid)
362
363 - def update(self, result_set):
364 """Use this to permanently store a ResultSet() in the database. 365 366 Note: when a ResultSet object is modified, it is necessary to 367 call db.rs.update(result_set) to store the updated version. 368 """ 369 raise NotImplemented('please override')
370 371 # -------------------------------------------------- 372
373 -class Database (object):
374 375 ''' A bibliographic database. 376 377 A database behaves like a dictionnary, linking a L{key 378 <Pyblio.Store.Key>} with a L{record <Pyblio.Store.Record>}. The 379 records are B{typed}, and must follow the specifications of a 380 L{Schema <Pyblio.Schema>}. 381 382 Adding a new record 383 =================== 384 385 To add a new record r to a database db: 386 387 >>> record = Record () 388 >>> record ['title'] = Attribute.Text ('my title') 389 >>> # ... 390 >>> key = db.add (record) 391 392 When the record is added, a L{key <Pyblio.Store.Key>} is generated 393 which uniquely references the record. 394 395 Accessing a record 396 ================== 397 398 It is possible to use the database as a dictionnary. So, given a key k: 399 400 >>> r = db [k] 401 402 Alternatively, one can access all the records in a database in random 403 order: 404 405 >>> for key, record in db.entries.iteritems (): 406 >>> # do something with the record... 407 408 Updating a record 409 ================= 410 411 Simply store the record back once it is updated: 412 413 >>> record = db [key] 414 >>> ... # update the record 415 >>> db [key] = record 416 417 418 @see: L{queries <Pyblio.Query>} 419 420 @attention: getting a record from the database returns a I{new copy} 421 at each access. Updating this copy I{does not} change the stored 422 value. 423 424 @cvar entries: a L{resultset <Pyblio.Store.ResultSet>} containing 425 all the records of the database. 426 427 @cvar txo: B{DEPRECATED}, use L{schema.txo} instead. A L{TxoGroup} 428 instance, containing all the taxonomy definitions in the 429 database. See L{TxoItem <Pyblio.Schema.TxoItem>}. 430 431 @cvar rs: a L{ResultSetStore} instance, containing all the result 432 sets defined on this database. 433 ''' 434
435 - def __init__ (self):
436 raise NotImplemented ('please override')
437
438 - def _txo_warn(self):
439 warnings.warn('db.txo is deprecated. please use db.schema.txo', 440 DeprecationWarning, stacklevel=2) 441 # ensure we get called only once 442 Database._txo_warn = lambda self: None
443
444 - def _txo_get(self):
445 self._txo_warn() 446 return self.schema.txo
447 448 txo = property(_txo_get, None) 449
450 - def _entries_get (self):
451 """ Return the result set that contains _all_ the entries. """ 452 453 raise NotImplemented ('please override')
454 455 entries = property (_entries_get, None) 456 457
458 - def add (self, record, key = None):
459 """ Insert a new entry in the database. 460 461 New entries B{MUST} be added with this method, not via an 462 update with a hand-made Key. 463 464 @param record: the new record to add 465 @type record: a L{Record <Pyblio.Store.Record>} 466 467 @param key: only useful for importing an existing database, by 468 I{proposing} a key choice. 469 @type key: a L{Key <Pyblio.Store.Key>} 470 """ 471 472 raise NotImplemented ('please override')
473 474
475 - def __setitem__ (self, key, record):
476 """ Update a record. 477 478 Updates a record with a new value. 479 480 @param key: the record's key 481 @type key: a L{Key <Pyblio.Store.Key>} 482 483 @param record: the new value of the record 484 @type record: a L{Record <Pyblio.Store.Record>} 485 """ 486 487 raise NotImplemented ('please override')
488
489 - def __getitem__ (self, key):
490 """ Get a record by key. 491 492 @param key: the key of the requested record 493 @type key: a L{Key <Pyblio.Store.Key>} 494 """ 495 496 raise NotImplemented ('please override')
497
498 - def has_key (self, key):
499 """ Check for the existence of a key. 500 501 @param key: the key to check for 502 @type key: a L{Key <Pyblio.Store.Key>} 503 """ 504 505 raise NotImplemented ('please override')
506
507 - def query (self, query, permanent = False):
508 raise NotImplemented ('please override')
509
510 - def collate (self, rs, field):
511 """ Partition the result set in a list of sets for every value 512 taken by the specified field""" 513 514 sets = {} 515 516 for k, rec in rs.iteritems (): 517 try: value = rec [field] [0] 518 except KeyError: value = None 519 520 try: 521 sets [value].add (k) 522 523 except KeyError: 524 rs = self.rs.new() 525 sets [value] = rs 526 527 rs.add (k) 528 529 return sets
530 531
532 - def save (self):
533 raise NotImplemented ('please override')
534 535
536 - def validate (self, entry):
537 """ Check an entry for conformance against the Schema. This 538 method may modify the entry to normalize certain fields.""" 539 540 for k in entry.keys (): 541 542 vals = entry [k] 543 544 if type (vals) not in (list, tuple): 545 vals = [ vals ] 546 547 entry [k] = vals = [ x for x in vals if x is not None ] 548 549 if len (vals) == 0: 550 del entry [k] 551 continue 552 553 for v in vals: 554 for qk, qs in v.q.items (): 555 if type (qs) not in (list, tuple): 556 qs = [ qs ] 557 558 v.q [qk] = qs = [ x for x in qs if x is not None ] 559 560 if len (qs) == 0: 561 del v.q [qk] 562 563 # check type and arity 564 try: 565 s = self.schema [k] 566 567 except KeyError: 568 raise Exceptions.SchemaError \ 569 (_('unknown attribute %s') % `k`) 570 571 for v in vals: 572 if not isinstance (v, s.type): 573 raise Exceptions.SchemaError \ 574 (_('%s: attribute %s has an incorrect type (should be %s but is %s)') % ( 575 entry.key, `k`, `s.type`, repr (v))) 576 577 for qk, qs in v.q.items (): 578 for q in qs: 579 if not isinstance (q, s.q [qk].type): 580 raise Exceptions.SchemaError \ 581 (_('%s: qualifier %s in attribute %s has an incorrect type (should be %s but is %s)') % ( 582 entry.key, `qk`, `k`, `s.q [qk].type`, repr (q))) 583 584 l = len (vals) 585 lb, ub = s.range 586 587 if (lb is not None and l < lb) or (ub is not None and l > ub): 588 raise Exceptions.SchemaError \ 589 (_('attribute %s should have %s - %s values, not %d') % ( 590 k, str (lb), str (ub), l)) 591 592 593 # additional special checks 594 if s.type is Attribute.Txo: 595 596 for v in vals: 597 598 # check if the enum is in the group defined in the schema 599 if v.group != s.group: 600 raise Exceptions.SchemaError ( 601 _('txo item %s/%d should be in %s') % ( 602 v.group, v.id, s.group)) 603 604 # check for the enum existence 605 try: 606 self.schema.txo[v.group][v.id] 607 608 except KeyError: 609 raise Exceptions.SchemaError ( 610 _('invalid txo item %s/%d') % ( 611 v.group, v.id)) 612 613 # Remove unnecessary txo items (for instance when a 614 # more specific item is also present, there is no need 615 # to keep the parent) 616 g = self.schema.txo [s.group] 617 ids = map (lambda x: x.id, vals) 618 619 for v in [] + vals: 620 621 # exp is the list of children of the current txo item 622 exp = g.expand (v.id) 623 exp.remove (v.id) 624 625 # If another txo is a child of the current txo, 626 # the current one can be removed. 627 for i in ids: 628 if i in exp: 629 vals.remove (v) 630 break 631 632 633 return entry
634
635 - def xmlwrite (self, fd):
636 """ Output a database in XML format """ 637 638 fd.write ('<?xml version="1.0" encoding="utf-8"?>\n\n') 639 fd.write ('<pyblio-db>\n') 640 641 self.schema.xmlwrite (fd, embedded = True) 642 643 if self.header: 644 fd.write ('<header>%s</header>\n' % escape (self.header)) 645 646 for v in self.entries.itervalues (): 647 v.xmlwrite(fd) 648 649 for rs in self.rs: 650 rs.xmlwrite(fd) 651 652 fd.write ('</pyblio-db>\n') 653 return
654 655
656 - def xmlread (self, fd):
657 658 for event, elem in Compat.ElementTree.iterparse (fd, events = ('end',)): 659 t = elem.tag 660 661 if t == 'entry': 662 k = elem.attrib ['id'] 663 r = Record () 664 665 for att in elem.findall ('./attribute'): 666 aid = att.attrib ['id'] 667 668 try: 669 tp = self.schema [aid] 670 except KeyError: 671 raise StoreError (_("invalid attribute '%s'") % aid) 672 673 for sub in att: 674 a = tp.type.xmlread (sub) 675 676 # check for possible qualifiers 677 for q in sub.findall ('./attribute'): 678 qid = q.attrib ['id'] 679 680 try: 681 stp = self.schema [aid].q [qid] 682 except KeyError: 683 raise StoreError (_("invalid attribute qualifier '%s'") % qid) 684 685 for subsub in q: 686 qv = stp.type.xmlread (subsub) 687 688 try: a.q [qid].append (qv) 689 except KeyError: a.q [qid] = [qv] 690 691 try: r [aid].append (a) 692 except KeyError: r [aid] = [a] 693 694 self.add (r, key = Key (k)) 695 696 elem.clear() 697 698 if t == 'resultset': 699 rsid = int (elem.attrib ['id']) 700 rs = self.rs.new(rsid=rsid) 701 702 try: 703 rs.name = elem.attrib ['name'] 704 except KeyError: 705 pass 706 707 for ref in elem.findall ('./ref'): 708 rs.add(Key (ref.attrib ['id'])) 709 710 self.rs.update(rs) 711 elem.clear() 712 713 if t == 'pyblio-schema': 714 self.schema = Schema.Schema () 715 self.schema.xmlread (elem) 716 717 elif t == 'header': 718 self.header = elem.text
719 720 721 # -------------------------------------------------- 722 723 _dir = os.path.normpath(os.path.join( 724 os.path.dirname (__file__), 'Stores')) 725 726 _modules = {} 727 728 for m in os.listdir(_dir): 729 730 full = os.path.join(_dir, m) 731 732 if os.path.isdir(full) and \ 733 m.endswith('store') and \ 734 os.path.exists(os.path.join(full, '__init__.py')): 735 736 _modules[m.lower()[:-5]] = m 737 continue 738 739 m, ext = os.path.splitext(m) 740 if ext != '.py' or not m.endswith('store'): 741 continue 742 743 _modules[m.lower()[:-5]] = m 744 745 _cache = {} 746
747 -def get (fmt):
748 749 """ Return the methods provided by a specific storage layer. 750 751 For instance: 752 753 >>> fmt = get ('file') 754 >>> db = fmt.dbopen (...) 755 756 The methods are: 757 758 - dbcreate (file, schema): create a new database 759 760 - dbopen (file): open a database in the specific store 761 762 - dbimport (file): import an XML database into the specific store 763 764 - dbdestroy (file): destroy a database 765 766 For more information, consult the documentation for the specific 767 backends, L{Pyblio.Stores.filestore}, L{Pyblio.Stores.bsddbstore} 768 and L{Pyblio.Stores.memorystore}. 769 """ 770 771 try: 772 module = _cache [fmt] 773 774 if module is None: 775 raise ImportError ("store '%s' is not available" % fmt) 776 777 return module 778 779 except KeyError: 780 parts = ('Pyblio', 'Stores', _modules[fmt]) 781 782 try: 783 module = __import__ (string.join (parts, '.')) 784 785 except ImportError, msg: 786 _cache [fmt] = None 787 raise 788 789 for comp in parts [1:]: 790 module = getattr (module, comp) 791 792 _cache [fmt] = module 793 794 return module
795
796 -def modules ():
797 798 return _modules.keys ()
799