1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 """
21 Parser for the XML format returned by Web of Knowledge queries.
22 """
23
24 import logging
25
26 from gettext import gettext as _
27
28 from Pyblio import Attribute, Store
29 from Pyblio.Exceptions import ParserError
30
31
33 """ Parse records as returned by Web of Science's web service."""
34
35 log = logging.getLogger('pyblio.import.wok')
36
37
38
40 """ Called when no specific handler exist."""
41
42 self.log.warn('%s: unhandled attribute %s' % (
43 self.uid(), repr(node.tag)))
44 return
45
48
50
51
52
53
54
55 def single(author):
56 try:
57 last, first = [part.strip() for part in author.split(',')]
58 return Attribute.Person(last=last, first=first)
59 except ValueError:
60 return Attribute.Person(last=author.strip())
61
62 simple = None
63 for author in node:
64 if author.tag in ('primaryauthor', 'author'):
65 if simple is not None:
66 self.record.add('author', simple)
67 simple = single(author.text)
68 elif author.tag == 'fullauthorname':
69 auth = Attribute.Person(
70 last=author.findtext('./AuLastName'),
71 first=author.findtext('./AuFirstName'))
72 self.record.add('author', auth)
73 simple = None
74 if simple is not None:
75 self.record.add('author', simple)
76 return
77
81
82 for author in node:
83 self.record.add('author', author.text, single)
84
86 for ref in node:
87 self.record.add('ref', ref.text, Attribute.ID)
88 return
89
91 for ref in node:
92 self.record.add('keyword', ref.text, Attribute.Text)
93 return
94
96 for ref in node:
97 self.record.add('keyword-plus', ref.text, Attribute.Text)
98 return
99
101 if node.attrib.get("avail").lower() != 'y':
102 return
103 paras = [n.text for n in node.findall('p')]
104 self.record.add('abstract', '\n'.join(paras), Attribute.Text)
105 return
106
108 tp = self._type(node.get('code'))
109
110 self.record.add('doctype', tp, Attribute.Txo)
111 return
112
115
118
121
124
125 - def do_article_nos(self, node):
126
127 for no in node:
128 t = no.text.strip()
129 if t.startswith('DOI '):
130 self.record.add('doi', t[4:], Attribute.ID)
131 continue
132
133 self.log.warn('%s: unhandled article_no %s' % (
134 self.uid(), repr(t)))
135 return
136
137 - def do_bib_pages(self, node):
138 self.record.add('source.pages', node.text, Attribute.Text)
139
141 for s, d in (('vol', 'source.volume'),
142 ('year', 'source.year')):
143
144 v = node.get(s)
145 if v:
146 self.record.add(d, v, Attribute.Text)
147 return
148
149
150
151
152
155 - def do_sq(self, node):pass
162
163
164
165
168
171
173 """ Generate the display name of a record.
174
175 Used when outputting a warning for instance."""
176 try:
177 return 'ISI:' + self.record['ut'][0]
178 except KeyError:
179 return repr(self.record.key)
180
181
182 - def parse(self, fd, db, rs=None):
183
184 if rs is None:
185 rs = db.rs.add(True)
186 rs.name = _('Imported from Web of Knowledge')
187
188 self.db = db
189 self._type = self.db.schema.txo['doctype'].byname
190
191 for item in fd.findall('./REC/item'):
192 self.record = Store.Record()
193 self.record_begin()
194
195 for child in item:
196 fn = getattr(self, 'do_' + child.tag,
197 self.do_default)
198 fn(child)
199
200 self.record_end()
201
202 k = db.add(self.record)
203 rs.add(k)
204
205 return rs
206