Package openid :: Module urinorm
[frames] | no frames]

Source Code for Module openid.urinorm

  1  import re 
  2   
  3  # from appendix B of rfc 3986 (http://www.ietf.org/rfc/rfc3986.txt) 
  4  uri_pattern = r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' 
  5  uri_re = re.compile(uri_pattern) 
  6   
  7   
  8  authority_pattern = r'^([^@]*@)?([^:]*)(:.*)?' 
  9  authority_re = re.compile(authority_pattern) 
 10   
 11   
 12  pct_encoded_pattern = r'%([0-9A-Fa-f]{2})' 
 13  pct_encoded_re = re.compile(pct_encoded_pattern) 
 14   
 15  try: 
 16      unichr(0x10000) 
 17  except ValueError: 
 18      # narrow python build 
 19      UCSCHAR = [ 
 20          (0xA0, 0xD7FF), 
 21          (0xF900, 0xFDCF), 
 22          (0xFDF0, 0xFFEF), 
 23          ] 
 24   
 25      IPRIVATE = [ 
 26          (0xE000, 0xF8FF), 
 27          ] 
 28  else: 
 29      UCSCHAR = [ 
 30          (0xA0, 0xD7FF), 
 31          (0xF900, 0xFDCF), 
 32          (0xFDF0, 0xFFEF), 
 33          (0x10000, 0x1FFFD), 
 34          (0x20000, 0x2FFFD), 
 35          (0x30000, 0x3FFFD), 
 36          (0x40000, 0x4FFFD), 
 37          (0x50000, 0x5FFFD), 
 38          (0x60000, 0x6FFFD), 
 39          (0x70000, 0x7FFFD), 
 40          (0x80000, 0x8FFFD), 
 41          (0x90000, 0x9FFFD), 
 42          (0xA0000, 0xAFFFD), 
 43          (0xB0000, 0xBFFFD), 
 44          (0xC0000, 0xCFFFD), 
 45          (0xD0000, 0xDFFFD), 
 46          (0xE1000, 0xEFFFD), 
 47          ] 
 48   
 49      IPRIVATE = [ 
 50          (0xE000, 0xF8FF), 
 51          (0xF0000, 0xFFFFD), 
 52          (0x100000, 0x10FFFD), 
 53          ] 
 54   
 55   
 56  _unreserved = [False] * 256 
 57  for _ in range(ord('A'), ord('Z') + 1): _unreserved[_] = True 
 58  for _ in range(ord('0'), ord('9') + 1): _unreserved[_] = True 
 59  for _ in range(ord('a'), ord('z') + 1): _unreserved[_] = True 
 60  _unreserved[ord('-')] = True 
 61  _unreserved[ord('.')] = True 
 62  _unreserved[ord('_')] = True 
 63  _unreserved[ord('~')] = True 
 64   
 65   
 66  _escapeme_re = re.compile('[%s]' % (''.join( 
 67      map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)), 
 68          UCSCHAR + IPRIVATE)),)) 
 69   
 70   
71 -def _pct_escape_unicode(char_match):
72 c = char_match.group() 73 return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')])
74 75
76 -def _pct_encoded_replace_unreserved(mo):
77 try: 78 i = int(mo.group(1), 16) 79 if _unreserved[i]: 80 return chr(i) 81 else: 82 return mo.group().upper() 83 84 except ValueError: 85 return mo.group()
86 87
88 -def _pct_encoded_replace(mo):
89 try: 90 return chr(int(mo.group(1), 16)) 91 except ValueError: 92 return mo.group()
93 94
95 -def remove_dot_segments(path):
96 result_segments = [] 97 98 while path: 99 if path.startswith('../'): 100 path = path[3:] 101 elif path.startswith('./'): 102 path = path[2:] 103 elif path.startswith('/./'): 104 path = path[2:] 105 elif path == '/.': 106 path = '/' 107 elif path.startswith('/../'): 108 path = path[3:] 109 if result_segments: 110 result_segments.pop() 111 elif path == '/..': 112 path = '/' 113 if result_segments: 114 result_segments.pop() 115 elif path == '..' or path == '.': 116 path = '' 117 else: 118 i = 0 119 if path[0] == '/': 120 i = 1 121 i = path.find('/', i) 122 if i == -1: 123 i = len(path) 124 result_segments.append(path[:i]) 125 path = path[i:] 126 127 return ''.join(result_segments)
128 129
130 -def urinorm(uri):
131 if isinstance(uri, unicode): 132 uri = _escapeme_re.sub(_pct_escape_unicode, uri).encode('ascii') 133 134 uri_mo = uri_re.match(uri) 135 136 scheme = uri_mo.group(2) 137 if scheme is None: 138 raise ValueError('No scheme specified') 139 140 scheme = scheme.lower() 141 if scheme not in ('http', 'https'): 142 raise ValueError('Not an absolute HTTP or HTTPS URI: %r' % (uri,)) 143 144 authority = uri_mo.group(4) 145 if authority is None: 146 raise ValueError('Not an absolute URI: %r' % (uri,)) 147 148 authority_mo = authority_re.match(authority) 149 if authority_mo is None: 150 raise ValueError('URI does not have a valid authority: %r' % (uri,)) 151 152 userinfo, host, port = authority_mo.groups() 153 154 if userinfo is None: 155 userinfo = '' 156 157 if '%' in host: 158 host = host.lower() 159 host = pct_encoded_re.sub(_pct_encoded_replace, host) 160 host = unicode(host, 'utf-8').encode('idna') 161 else: 162 host = host.lower() 163 164 if port: 165 if (port == ':' or 166 (scheme == 'http' and port == ':80') or 167 (scheme == 'https' and port == ':443')): 168 port = '' 169 else: 170 port = '' 171 172 authority = userinfo + host + port 173 174 path = uri_mo.group(5) 175 path = pct_encoded_re.sub(_pct_encoded_replace_unreserved, path) 176 path = remove_dot_segments(path) 177 if not path: 178 path = '/' 179 180 query = uri_mo.group(6) 181 if query is None: 182 query = '' 183 184 fragment = uri_mo.group(8) 185 if fragment is None: 186 fragment = '' 187 188 return scheme + '://' + authority + path + query + fragment
189