Package Pyblio :: Package Parsers :: Package Semantic :: Module WOK
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Parsers.Semantic.WOK

  1  # This file is part of pybliographer 
  2  #  
  3  # Copyright (C) 1998-2006 Frederic GOBRY 
  4  # Email : gobry@pybliographer.org 
  5  #           
  6  # This program is free software; you can redistribute it and/or 
  7  # modify it under the terms of the GNU General Public License 
  8  # as published by the Free Software Foundation; either version 2  
  9  # of the License, or (at your option) any later version. 
 10  #    
 11  # This program is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU General Public License for more details.  
 15  #  
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
 19   
 20  """ 
 21  Parser for the XML format returned by Web of Knowledge queries. 
 22  """ 
 23   
 24  import logging 
 25   
 26  from gettext import gettext as _ 
 27   
 28  from Pyblio import Attribute, Store 
 29  from Pyblio.Exceptions import ParserError 
 30   
 31   
32 -class Reader(object):
33 """ Parse records as returned by Web of Science's web service.""" 34 35 log = logging.getLogger('pyblio.import.wok') 36 37 # Supported fields 38
39 - def do_default(self, node):
40 """ Called when no specific handler exist.""" 41 42 self.log.warn('%s: unhandled attribute %s' % ( 43 self.uid(), repr(node.tag))) 44 return
45
46 - def do_ut(self, node):
47 self.record.add('ut', node.text, Attribute.ID)
48
49 - def do_authors(self, node):
50 # authors can be either listed as <primaryauthor> and <author> 51 # fields (with text concatenated), or split into finer tags 52 # under <fullauthorname>. As <fullauthorname> simply follows 53 # the simpler tags, there is no way to know in advance if it 54 # is provided or not... 55 def single(author): 56 try: 57 last, first = [part.strip() for part in author.split(',')] 58 return Attribute.Person(last=last, first=first) 59 except ValueError: 60 return Attribute.Person(last=author.strip())
61 62 simple = None 63 for author in node: 64 if author.tag in ('primaryauthor', 'author'): 65 if simple is not None: 66 self.record.add('author', simple) 67 simple = single(author.text) 68 elif author.tag == 'fullauthorname': 69 auth = Attribute.Person( 70 last=author.findtext('./AuLastName'), 71 first=author.findtext('./AuFirstName')) 72 self.record.add('author', auth) 73 simple = None 74 if simple is not None: 75 self.record.add('author', simple) 76 return
77
78 - def do_corp_authors(self, node):
79 def single(author): 80 return Attribute.Person(last=author)
81 82 for author in node: 83 self.record.add('author', author.text, single) 84
85 - def do_refs(self, node):
86 for ref in node: 87 self.record.add('ref', ref.text, Attribute.ID) 88 return
89
90 - def do_keywords(self, node):
91 for ref in node: 92 self.record.add('keyword', ref.text, Attribute.Text) 93 return
94
95 - def do_keywords_plus(self, node):
96 for ref in node: 97 self.record.add('keyword-plus', ref.text, Attribute.Text) 98 return
99
100 - def do_abstract(self, node):
101 if node.attrib.get("avail").lower() != 'y': 102 return 103 paras = [n.text for n in node.findall('p')] 104 self.record.add('abstract', '\n'.join(paras), Attribute.Text) 105 return
106
107 - def do_doctype(self, node):
108 tp = self._type(node.get('code')) 109 110 self.record.add('doctype', tp, Attribute.Txo) 111 return
112
113 - def do_source_title(self, node):
114 self.record.add('source', node.text, Attribute.Text)
115
116 - def do_item_title(self, node):
117 self.record.add('title', node.text, Attribute.Text)
118
119 - def do_source_series(self, node):
120 self.record.add('source.series', node.text, Attribute.Text)
121
122 - def do_source_abbrev(self, node):
123 self.record.add('source.abbrev', node.text, Attribute.Text)
124
125 - def do_article_nos(self, node):
126 127 for no in node: 128 t = no.text.strip() 129 if t.startswith('DOI '): 130 self.record.add('doi', t[4:], Attribute.ID) 131 continue 132 133 self.log.warn('%s: unhandled article_no %s' % ( 134 self.uid(), repr(t))) 135 return
136
137 - def do_bib_pages(self, node):
138 self.record.add('source.pages', node.text, Attribute.Text)
139
140 - def do_bib_issue(self, node):
141 for s, d in (('vol', 'source.volume'), 142 ('year', 'source.year')): 143 144 v = node.get(s) 145 if v: 146 self.record.add(d, v, Attribute.Text) 147 return
148 149 150 151 # Fields I either don't need or don't know. Feel free to improve. 152
153 - def do_i_ckey(self, node):pass
154 - def do_i_cid(self, node):pass
155 - def do_sq(self, node):pass
156 - def do_emails(self, node):pass
157 - def do_reprint(self, node):pass
158 - def do_research_addrs(self, node):pass
159 - def do_languages(self, node):pass
160 - def do_bib_id(self, node):pass
161 - def do_editions(self, node):pass
162 163 164 # Parsing logic and hooks 165
166 - def record_begin (self):
167 pass
168
169 - def record_end (self):
170 pass
171
172 - def uid(self):
173 """ Generate the display name of a record. 174 175 Used when outputting a warning for instance.""" 176 try: 177 return 'ISI:' + self.record['ut'][0] 178 except KeyError: 179 return repr(self.record.key)
180 181
182 - def parse(self, fd, db, rs=None):
183 184 if rs is None: 185 rs = db.rs.add(True) 186 rs.name = _('Imported from Web of Knowledge') 187 188 self.db = db 189 self._type = self.db.schema.txo['doctype'].byname 190 191 for item in fd.findall('./REC/item'): 192 self.record = Store.Record() 193 self.record_begin() 194 195 for child in item: 196 fn = getattr(self, 'do_' + child.tag, 197 self.do_default) 198 fn(child) 199 200 self.record_end() 201 202 k = db.add(self.record) 203 rs.add(k) 204 205 return rs
206