Package Pyblio :: Package Parsers :: Package Syntax :: Package BibTeX :: Module Coding
[hide private]
[frames] | no frames]

Source Code for Module Pyblio.Parsers.Syntax.BibTeX.Coding

  1  # -*- coding: utf-8 -*- 
  2  # This file is part of pybliographer 
  3  #  
  4  # Copyright (C) 1998-2006 Frederic GOBRY 
  5  # Email : gobry@pybliographer.org 
  6  #           
  7  # This program is free software; you can redistribute it and/or 
  8  # modify it under the terms of the GNU General Public License 
  9  # as published by the Free Software Foundation; either version 2  
 10  # of the License, or (at your option) any later version. 
 11  #    
 12  # This program is distributed in the hope that it will be useful, 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 15  # GNU General Public License for more details.  
 16  #  
 17  # You should have received a copy of the GNU General Public License 
 18  # along with this program; if not, write to the Free Software 
 19  # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. 
 20  #  
 21  #  
 22   
 23  """ 
 24  Handles coding and decoding of LaTeX-escaped characters. 
 25   
 26  Coding and decoding tries to be as reversible as possible (though 
 27  certain encodings are ambiguous). 
 28  """ 
 29   
 30  # this map is for composing letters with diacritics, like in \'e 
 31  basemap = { 
 32      ".": { 
 33      'C': u"Ċ",      'E': u"Ė", 'G': u"Ġ", 'I': u"İ", 'Z': u"Ż", 
 34      'c': u"\u010b", 'e': u"Ė", 'g': u"ġ",            'z': u"ż", }, 
 35       
 36      "'": { 
 37      'A': u"Á", 'E': u"É", 'I': u"Í", 'O': u"Ó", 'U': u"Ú", 'Y': u"Ý", 'C': u"Ć", 'Z': u"Ź", 'N': u"Ń", 
 38      'a': u"á", 'e': u"é", 'i': u"í", 'o': u"ó", 'u': u"ú", 'y': u"ý", 'c': u"ć", 'z': u"ź", 'n': u"ń", 
 39      }, 
 40       
 41      "`": { 
 42      'A': u"À", 'E': u"È", 'I': u"Ì", 'O': u"Ò", 'U': u"Ù", 
 43      'a': u"à", 'e': u"è", 'i': u"ì", 'o': u"ò", 'u': u"ù", 
 44      }, 
 45       
 46      "^": { 
 47      'A': u"Â", 'E': u"Ê", 'I': u"Î", 'O': u"Ô", 'U': u"Û", 
 48      'a': u"â", 'e': u"ê", 'i': u"î", 'o': u"ô", 'u': u"û", 
 49      }, 
 50   
 51      '"': { 
 52      'A': u"Ä", 'E': u"Ë", 'I': u"Ï", 'O': u"Ö", 'U': u"Ü", 
 53      'a': u"ä", 'e': u"ë", 'i': u"ï", 'o': u"ö", 'u': u"ü", 'y': u"ÿ", 
 54      }, 
 55   
 56      "c": { 
 57      'C': u"Ç", 'c': u"ç", 
 58      }, 
 59   
 60      "~": { 
 61      'A': u"Ã", 'O': u"Õ", 'N': u"Ñ", 
 62      'a': u"ã", 'o': u"õ", 'n': u"ñ",  
 63      }, 
 64  } 
 65   
 66  staticmap = { 
 67      'ss': (u'ß', 0), 
 68      'ae': (u'æ', 0), 'AE': (u'Æ', 0), 
 69      'oe': (u'œ', 0), 'OE': (u'Œ', 0), 
 70      'aa': (u'å', 0), 'AA': (u'Å', 0), 
 71      'o' : (u'ø', 0), 'O' : (u'Ø', 0), 
 72   
 73      'copyright': (u'©', 0), 
 74  } 
 75   
 76   
 77  _reversemap = {} 
 78   
 79  # construct a simple map that goes from the unicode character to the 
 80  # BibTeX representation 
 81  for cmd, sub in basemap.iteritems(): 
 82      for letter, symbol in sub.iteritems(): 
 83          if letter == 'i': 
 84              letter = '{\\i}' 
 85          _reversemap[symbol] = '\\%s%s' % (cmd, letter) 
 86   
 87  for cmd, (symbol, count) in staticmap.iteritems(): 
 88      _reversemap[symbol] = '\\%s{}' % cmd 
 89   
90 -def _encodeone(char):
91 o = ord(char) 92 if o >= 32 and o <= 127 and char not in '{}%\\$': 93 return char 94 95 try: 96 return _reversemap[char] 97 except KeyError: 98 return '\\char%d' % o
99
100 -def encode(text):
101 """ encode a unicode string into a valid BibTeX string """ 102 return u''.join([_encodeone(c) for c in text])
103