Package Pyblio :: Package Parsers :: Package Semantic :: Module ISI
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Parsers.Semantic.ISI

  1  # This file is part of pybliographer 
  2  #  
  3  # Copyright (C) 1998-2006 Frederic GOBRY 
  4  # Email : gobry@pybliographer.org 
  5  #           
  6  # This program is free software; you can redistribute it and/or 
  7  # modify it under the terms of the GNU General Public License 
  8  # as published by the Free Software Foundation; either version 2  
  9  # of the License, or (at your option) any later version. 
 10  #    
 11  # This program is distributed in the hope that it will be useful, 
 12  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 13  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 14  # GNU General Public License for more details.  
 15  #  
 16  # You should have received a copy of the GNU General Public License 
 17  # along with this program; if not, write to the Free Software 
 18  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
 19   
 20  """ 
 21  Parser for the ISI format returned by the Web of Knowledge. 
 22  """ 
 23   
 24  import logging 
 25   
 26  from Pyblio.Parsers.Syntax import ISI 
 27  from Pyblio import Attribute 
 28   
29 -class Reader(ISI.Reader):
30 # mapping between document types declared in the ISI file format, 31 # and doctypes declared in the XML Web of Knowledge format... Not 32 # very readable, but would it be more sensible to add an 33 # additional notation in this mess? 34 # See Pyblio/RIS/wok.sip for detailed explanations on the document types. 35 doctype_mapping = { 36 'J': '@' 37 } 38 39 log = logging.getLogger('pyblio.import.isi') 40
41 - def record_begin(self):
42 self._page_start = None
43
44 - def do_PT(self, line, tag, data):
45 # publication type 46 type_name = self.doctype_mapping[data] 47 self.record.add('doctype', self.db.schema.txo['doctype'].byname(type_name), 48 Attribute.Txo)
49
50 - def do_DT(self, line, tag, data):
51 # document type? difference with PT? 52 pass
53
54 - def do_ID(self, line, tag, data):
55 # keywords 56 for kw in data.split(';'): 57 self.record.add('keyword', kw.strip(), Attribute.Text)
58
59 - def do_AU(self, line, tag, data):
60 # author 61 self.person_add('author', data)
62
63 - def do_TI(self, line, tag, data):
64 # title 65 self.record.add('title', data, Attribute.Text)
66
67 - def do_UT(self, line, tag, data):
68 # identifier 69 if ':' in data: 70 source, uid = data.split(':') 71 if source == 'ISI': 72 self.record.add('ut', uid, Attribute.ID)
73
74 - def do_AB(self, line, tag, data):
75 # abstract 76 self.record.add('abstract', data, Attribute.Text)
77
78 - def do_SO(self, line, tag, data):
79 # source 80 self.record.add('source', data, Attribute.Text)
81
82 - def do_JI(self, line, tag, data):
83 self.record.add('source.abbrev', data, Attribute.Text)
84 - def do_J9(self, line, tag, data):
85 self.record.add('source.abbrev', data, Attribute.Text)
86
87 - def do_PY(self, line, tag, data):
88 # publication year 89 self.record.add('source.year', data, Attribute.Text)
90
91 - def do_SE(self, line, tag, data):
92 # series 93 self.record.add('source.series', data, Attribute.Text)
94
95 - def do_VL(self, line, tag, data):
96 # volume 97 self.record.add('source.volume', data, Attribute.Text)
98
99 - def do_IS(self, line, tag, data):
100 # number (issue) 101 self.record.add('source.number', data, Attribute.Text)
102
103 - def do_SN(self, line, tag, data):
104 # ISSN 105 self.record.add('source.issn', data, Attribute.ID)
106
107 - def do_C1(self, line, tag, data):
108 # authors' addresses 109 pass
110 - def do_RP(self, line, tag, data):
111 # authors' addresses 112 pass
113
114 - def do_BP(self, line, tag, data):
115 self._page_start = data
116 - def do_EP(self, line, tag, data):
117 if self._page_start is not None: 118 if self._page_start == data: 119 page_range = data 120 else: 121 page_range = self._page_start + '-' + data 122 self.record.add('source.pages', page_range, Attribute.Text)
123
124 - def do_default(self, line, tag, data):
125 try: 126 ISI.Reader.do_default(self, line, tag, data) 127 except ISI.ParserError, msg: 128 self.log.warn(str(msg))
129