Package Pyblio :: Package Parsers :: Package Syntax :: Module RIS
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Parsers.Syntax.RIS

  1  from Pyblio.Parsers.Syntax import Tagged 
  2  from Pyblio import Attribute 
  3   
  4  from gettext import gettext as _ 
  5   
  6  import re, string 
  7   
  8  start_re = re.compile (r'^(\w\w)\s\s-\s(.*?)\r?$') 
  9  contd_re = re.compile (r'^\s{6,6}(.*?)\r?$') 
 10   
 11   
12 -class RISParser (Tagged.Parser):
13 14 """ This parser knows how to split RIS records in fields """ 15
16 - def line_handler (self, line, count):
17 18 if line.strip () == '': return 19 20 m = start_re.match (line) 21 22 if m: 23 tag, data = m.groups ((1, 2)) 24 25 if tag == 'TY': 26 self.record_start () 27 28 elif self.state == self.ST_IN_FIELD: 29 self.field_end () 30 31 if tag == 'ER': 32 self.record_end () 33 return 34 35 self.field_start (tag, count) 36 self.field_data (data) 37 return 38 39 m = contd_re.match (line) 40 if m: 41 self.field_data (' ' + m.group (1)) 42 return 43 44 raise SyntaxError (_('line %d: unexpected data') % count)
45 46
47 -def _mkyear (y):
48 if not y: return None 49 return int (y)
50
51 -class Reader(Tagged.Reader):
52 53 """ The importer knows how to map the RIS fields to the 'standard' 54 pyblio model.""" 55 56 Parser = RISParser 57 58
59 - def __init__ (self):
60 61 Tagged.Reader.__init__ (self) 62 63 self.mapping = { 64 65 'T1': (self.text_add, 'title'), 66 'TI': (self.text_add, 'title'), 67 'CT': (self.text_add, 'title'), 68 'BT': (self.text_add, 'title'), 69 'N1': (self.text_add, 'note'), 70 'AB': (self.text_add, 'note'), 71 'JF': (self.text_add, 'journal'), 72 'JO': (self.text_add, 'journal'), 73 'JA': (self.text_add, 'journal'), 74 'J1': (self.text_add, 'journal'), 75 'J2': (self.text_add, 'journal'), 76 'VL': (self.text_add, 'volume'), 77 'IS': (self.text_add, 'issue'), 78 'CP': (self.text_add, 'issue'), 79 'CY': (self.text_add, 'city'), 80 'PB': (self.text_add, 'publisher'), 81 'N2': (self.text_add, 'abstract'), 82 'SN': (self.text_add, 'issn'), 83 'AV': (self.text_add, 'availability'), 84 'AD': (self.text_add, 'address'), 85 86 'ID': (self.id_add, 'id'), 87 88 'UR': (self.url_add, 'url'), 89 90 'A1': (self.person_add, 'author'), 91 'AU': (self.person_add, 'author'), 92 93 'Y1': (self.date_add, 'date'), 94 'PY': (self.date_add, 'date'), 95 96 'L1': '? pdf ?', 97 'L2': '? fulltext ?', 98 99 'TY': '? type ?', 100 101 'KW': '? keyword ?', 102 103 'SP': '? start page ?', 104 'EP': '? end page ?', 105 106 'RP': '? reprint ?', 107 108 109 'T2': '? title secondary ?', 110 'A2': '? author secondary ?', 111 'ED': '? author secondary ?', 112 113 'T3': '? title series ?', 114 'A3': '? author series ?', 115 116 'Y2': '? date secondary ?', 117 118 'U1': '? user defined ?', 119 'U2': '? user defined ?', 120 'U3': '? user defined ?', 121 'U4': '? user defined ?', 122 'U5': '? user defined ?', 123 124 'M1': '? misc ?', 125 'M2': '? misc ?', 126 'M3': '? misc ?', 127 128 'L3': '? related ?', 129 'L4': '? images ?' 130 } 131 132 return
133 134
135 - def person_add (self, field, value):
136 137 ''' Parse a person name in RIS format ''' 138 139 last, first, lineage = (map (string.strip, 140 value.split (',')) + [None, None]) [:3] 141 142 143 a = self.record.get (field, []) 144 a.append (Attribute.Person (last = last, 145 first = first, 146 lineage = lineage)) 147 148 self.record [field] = a 149 return
150 151
152 - def date_add (self, field, value):
153 154 ''' Parse a date in RIS format ''' 155 156 year, month, day = ([ _mkyear (x) for x in value.split ('/')] + [None, None]) [:3] 157 158 159 a = self.record.get (field, []) 160 a.append (Attribute.Date (year = year, month = month, day = day)) 161 162 self.record [field] = a 163 return
164 165 166
167 - def do_TY (self, line, tag, data):
168 169 pass
170
171 - def do_SP (self, line, tag, data):
172 173 self._sp = data.strip () 174 return
175
176 - def do_EP (self, line, tag, data):
177 178 self._ep = data.strip () 179 return
180
181 - def do_KW (self, line, tag, data):
182 183 pass
184
185 - def do_RP (self, line, tag, data):
186 187 pass
188 189
190 - def do_default (self, line, tag, data):
191 192 try: 193 meth, field = self.mapping [tag] 194 195 except KeyError: 196 197 raise SyntaxError (_("line %s: unknown tag '%s'") % (line, tag)) 198 199 except ValueError: 200 201 self.emit ('warning', 202 (_("line %s: unsupported tag '%s'") % (line, tag))) 203 return 204 205 meth (field, data) 206 return
207