1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 """
21 Parser for the XML format returned by PubMed's Web API
22 """
23
24 import logging
25
26 from gettext import gettext as _
27
28 from Pyblio import Attribute, Store, Compat
29
30 _DEBUG = False
31
33 """ Parse records as returned by PubMed's web service."""
34
35 log = logging.getLogger('pyblio.import.pubmed')
36
38 """ Generate the display name of a record.
39
40 Used when outputting a warning for instance."""
41 try:
42 return 'PMID:' + self.record['pmid'][0]
43 except KeyError:
44 return repr(self.record.key)
45
46
48 """ Called when no specific handler exist."""
49
50 self.log.warn('%s: unhandled attribute %s' % (
51 self.uid(), repr(node.tag)))
52 return
53
55
56 self._fallback_journal = node.findtext('./MedlineTA')
57
58 - def do_Article(self, node):
59 for child in node:
60 fn = getattr(self, 'do_Article_' + child.tag,
61 self.do_default)
62 fn(child)
63 return
64
66 self.record.add('title', node.text, Attribute.Text)
67
68 - def do_Article_Abstract(self, node):
69 abstract = node.find('./AbstractText')
70 self.record.add('abstract', abstract.text, Attribute.Text)
71
72 - def do_Article_Journal(self, node):
73 def maybe(dst, key, conv):
74 v = node.find(key)
75 if v is not None:
76 self.record.add(dst, v.text, conv)
77
78
79 maybe('journal', 'Title', Attribute.Text)
80 maybe('journal.issn', 'ISSN', Attribute.ID)
81
82 maybe('journal.volume', 'JournalIssue/Volume', Attribute.Text)
83 maybe('journal.issue', 'JournalIssue/Issue', Attribute.Text)
84
85 maybe('journal.year', 'JournalIssue/PubDate/Year', Attribute.Text)
86 maybe('journal.month', 'JournalIssue/PubDate/Month', Attribute.Text)
87
89 def v(n, k):
90 l = n.find(k)
91 if l is not None:
92 return l.text
93 return None
94
95
96
97 for au in node.findall('./Author'):
98 person = Attribute.Person(
99 last=v(au, './LastName'),
100 first=v(au, './ForeName') or v(au, './FirstName'))
101 self.record.add('author', person)
102
103 - def do_Article_Pagination(self, node):
104 v = node.find('./MedlinePgn')
105 if v is not None and v.text:
106
107
108
109 pages = v.text
110 textual_pair = pages.split('-')
111 try:
112 pair = [int(x) for x in textual_pair]
113 except ValueError:
114 pair = []
115 if len(pair) == 2 and pair[1] < pair[0]:
116
117
118
119 left, right = textual_pair
120 full_right = left[:len(left)-len(right)] + right
121 if int(full_right) > pair[0]:
122 pages = '%s-%s' % (left, full_right)
123 self.record.add('journal.pages', pages, Attribute.Text)
124
127
128
129
132
134
135
136 j = self.record.get('journal')
137 if j and not j[0].is_complete() and self._fallback_journal:
138 self.record.add('journal', self._fallback_journal, Attribute.Text)
139
140 - def parse(self, fd, db, rs=None):
141
142 if rs is None:
143 rs = db.rs.new()
144 rs.name = _('Imported from PubMed')
145
146 self.db = db
147
148 for item in fd.findall('./PubmedArticle/MedlineCitation'):
149 self.record = Store.Record()
150 self.record_begin()
151
152 if _DEBUG:
153 Compat.ElementTree.dump(item)
154 for child in item:
155 fn = getattr(self, 'do_' + child.tag,
156 self.do_default)
157 fn(child)
158
159 self.record_end()
160
161 k = db.add(self.record)
162 rs.add(k)
163
164 return rs
165