Package dpkt :: Module http
[hide private]
[frames] | no frames]

Source Code for Module dpkt.http

  1  # $Id: http.py 86 2013-03-05 19:25:19Z andrewflnr@gmail.com $ 
  2  # -*- coding: utf-8 -*- 
  3  """Hypertext Transfer Protocol.""" 
  4  from __future__ import print_function 
  5  from __future__ import absolute_import 
  6  try: 
  7      from collections import OrderedDict 
  8  except ImportError: 
  9      # Python 2.6 
 10      OrderedDict = dict 
 11   
 12  from . import dpkt 
 13  from .compat import BytesIO, iteritems 
 14   
 15   
16 -def parse_headers(f):
17 """Return dict of HTTP headers parsed from a file object.""" 18 d = OrderedDict() 19 while 1: 20 # The following logic covers two kinds of loop exit criteria. 21 # 1) If the header is valid, when we reached the end of the header, 22 # f.readline() would return with '\r\n', then after strip(), 23 # we can break the loop. 24 # 2) If this is a weird header, which do not ends with '\r\n', 25 # f.readline() would return with '', then after strip(), 26 # we still get an empty string, also break the loop. 27 line = f.readline().strip().decode("ascii", "ignore") 28 if not line: 29 break 30 l = line.split(':', 1) 31 if len(l[0].split()) != 1: 32 raise dpkt.UnpackError('invalid header: %r' % line) 33 k = l[0].lower() 34 v = len(l) != 1 and l[1].lstrip() or '' 35 if k in d: 36 if not type(d[k]) is list: 37 d[k] = [d[k]] 38 d[k].append(v) 39 else: 40 d[k] = v 41 return d
42 43
44 -def parse_body(f, headers):
45 """Return HTTP body parsed from a file object, given HTTP header dict.""" 46 if headers.get('transfer-encoding', '').lower() == 'chunked': 47 l = [] 48 found_end = False 49 while 1: 50 try: 51 sz = f.readline().split(None, 1)[0] 52 except IndexError: 53 raise dpkt.UnpackError('missing chunk size') 54 n = int(sz, 16) 55 if n == 0: 56 found_end = True 57 buf = f.read(n) 58 if f.readline().strip(): 59 break 60 if n and len(buf) == n: 61 l.append(buf) 62 else: 63 break 64 if not found_end: 65 raise dpkt.NeedData('premature end of chunked body') 66 body = b''.join(l) 67 elif 'content-length' in headers: 68 n = int(headers['content-length']) 69 body = f.read(n) 70 if len(body) != n: 71 raise dpkt.NeedData('short body (missing %d bytes)' % (n - len(body))) 72 elif 'content-type' in headers: 73 body = f.read() 74 else: 75 # XXX - need to handle HTTP/0.9 76 body = b'' 77 return body
78 79
80 -class Message(dpkt.Packet):
81 """Hypertext Transfer Protocol headers + body. 82 83 TODO: Longer class information.... 84 85 Attributes: 86 __hdr__: Header fields of HTTP. 87 TODO. 88 """ 89 90 __metaclass__ = type 91 __hdr_defaults__ = {} 92 headers = None 93 body = None 94
95 - def __init__(self, *args, **kwargs):
96 if args: 97 self.unpack(args[0]) 98 else: 99 self.headers = OrderedDict() 100 self.body = b'' 101 self.data = b'' 102 # NOTE: changing this to iteritems breaks py3 compatibility 103 for k, v in self.__hdr_defaults__.items(): 104 setattr(self, k, v) 105 for k, v in iteritems(kwargs): 106 setattr(self, k, v)
107
108 - def unpack(self, buf, is_body_allowed=True):
109 f = BytesIO(buf) 110 # Parse headers 111 self.headers = parse_headers(f) 112 # Parse body 113 if is_body_allowed: 114 self.body = parse_body(f, self.headers) 115 # Save the rest 116 self.data = f.read()
117
118 - def pack_hdr(self):
119 return ''.join(['%s: %s\r\n' % t for t in iteritems(self.headers)])
120
121 - def __len__(self):
122 return len(str(self))
123
124 - def __str__(self):
125 return '%s\r\n%s' % (self.pack_hdr(), self.body.decode("utf8", "ignore"))
126
127 - def __bytes__(self):
128 # Not using byte interpolation to preserve Python 3.4 compatibility. The extra 129 # \r\n doesn't get trimmed from the bytes, so it's necessary to omit the spacing 130 # one when building the output if there's no body 131 if self.body: 132 return self.pack_hdr().encode("ascii", "ignore") + b'\r\n' + self.body 133 else: 134 return self.pack_hdr().encode("ascii", "ignore")
135 136
137 -class Request(Message):
138 """Hypertext Transfer Protocol Request. 139 140 TODO: Longer class information.... 141 142 Attributes: 143 __hdr__: Header fields of HTTP request. 144 TODO. 145 """ 146 147 __hdr_defaults__ = { 148 'method': 'GET', 149 'uri': '/', 150 'version': '1.0', 151 } 152 __methods = dict.fromkeys(( 153 'GET', 'PUT', 'ICY', 154 'COPY', 'HEAD', 'LOCK', 'MOVE', 'POLL', 'POST', 155 'BCOPY', 'BMOVE', 'MKCOL', 'TRACE', 'LABEL', 'MERGE', 156 'DELETE', 'SEARCH', 'UNLOCK', 'REPORT', 'UPDATE', 'NOTIFY', 157 'BDELETE', 'CONNECT', 'OPTIONS', 'CHECKIN', 158 'PROPFIND', 'CHECKOUT', 'CCM_POST', 159 'SUBSCRIBE', 'PROPPATCH', 'BPROPFIND', 160 'BPROPPATCH', 'UNCHECKOUT', 'MKACTIVITY', 161 'MKWORKSPACE', 'UNSUBSCRIBE', 'RPC_CONNECT', 162 'VERSION-CONTROL', 163 'BASELINE-CONTROL' 164 )) 165 __proto = 'HTTP' 166
167 - def unpack(self, buf):
168 f = BytesIO(buf) 169 line = f.readline().decode("ascii", "ignore") 170 l = line.strip().split() 171 if len(l) < 2: 172 raise dpkt.UnpackError('invalid request: %r' % line) 173 if l[0] not in self.__methods: 174 raise dpkt.UnpackError('invalid http method: %r' % l[0]) 175 if len(l) == 2: 176 # HTTP/0.9 does not specify a version in the request line 177 self.version = '0.9' 178 else: 179 if not l[2].startswith(self.__proto): 180 raise dpkt.UnpackError('invalid http version: %r' % l[2]) 181 self.version = l[2][len(self.__proto) + 1:] 182 self.method = l[0] 183 self.uri = l[1] 184 Message.unpack(self, f.read())
185
186 - def __str__(self):
187 return '%s %s %s/%s\r\n' % (self.method, self.uri, self.__proto, 188 self.version) + Message.__str__(self)
189
190 - def __bytes__(self):
191 str_out = '%s %s %s/%s\r\n' % (self.method, self.uri, self.__proto, 192 self.version) 193 return str_out.encode("ascii", "ignore") + Message.__bytes__(self)
194 195
196 -class Response(Message):
197 """Hypertext Transfer Protocol Response. 198 199 TODO: Longer class information.... 200 201 Attributes: 202 __hdr__: Header fields of HTTP Response. 203 TODO. 204 """ 205 206 __hdr_defaults__ = { 207 'version': '1.0', 208 'status': '200', 209 'reason': 'OK' 210 } 211 __proto = 'HTTP' 212
213 - def unpack(self, buf):
214 f = BytesIO(buf) 215 line = f.readline() 216 l = line.strip().decode("ascii", "ignore").split(None, 2) 217 if len(l) < 2 or not l[0].startswith(self.__proto) or not l[1].isdigit(): 218 raise dpkt.UnpackError('invalid response: %r' % line) 219 self.version = l[0][len(self.__proto) + 1:] 220 self.status = l[1] 221 self.reason = l[2] if len(l) > 2 else '' 222 # RFC Sec 4.3. 223 # http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.3. 224 # For response messages, whether or not a message-body is included with 225 # a message is dependent on both the request method and the response 226 # status code (section 6.1.1). All responses to the HEAD request method 227 # MUST NOT include a message-body, even though the presence of entity- 228 # header fields might lead one to believe they do. All 1xx 229 # (informational), 204 (no content), and 304 (not modified) responses 230 # MUST NOT include a message-body. All other responses do include a 231 # message-body, although it MAY be of zero length. 232 is_body_allowed = int(self.status) >= 200 and 204 != int(self.status) != 304 233 Message.unpack(self, f.read(), is_body_allowed)
234
235 - def __str__(self):
236 return '%s/%s %s %s\r\n' % (self.__proto, self.version, self.status, 237 self.reason) + Message.__str__(self)
238
239 - def __bytes__(self):
240 str_out = '%s/%s %s %s\r\n' % (self.__proto, self.version, self.status, 241 self.reason) + Message.__str__(self) 242 return str_out.encode("ascii", "ignore") + Message.__bytes__(self)
243 244
245 -def test_parse_request():
246 s = b"""POST /main/redirect/ab/1,295,,00.html HTTP/1.0\r\nReferer: http://www.email.com/login/snap/login.jhtml\r\nConnection: Keep-Alive\r\nUser-Agent: Mozilla/4.75 [en] (X11; U; OpenBSD 2.8 i386; Nav)\r\nHost: ltd.snap.com\r\nAccept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\nAccept-Encoding: gzip\r\nAccept-Language: en\r\nAccept-Charset: iso-8859-1,*,utf-8\r\nContent-type: application/x-www-form-urlencoded\r\nContent-length: 61\r\n\r\nsn=em&mn=dtest4&pw=this+is+atest&fr=true&login=Sign+in&od=www""" 247 r = Request(s) 248 assert r.method == 'POST' 249 assert r.uri == '/main/redirect/ab/1,295,,00.html' 250 assert r.body == b'sn=em&mn=dtest4&pw=this+is+atest&fr=true&login=Sign+in&od=www' 251 assert r.headers['content-type'] == 'application/x-www-form-urlencoded' 252 try: 253 Request(s[:60]) 254 assert 'invalid headers parsed!' 255 except dpkt.UnpackError: 256 pass
257 258
259 -def test_format_request():
260 r = Request() 261 assert str(r) == 'GET / HTTP/1.0\r\n\r\n' 262 r.method = 'POST' 263 r.uri = '/foo/bar/baz.html' 264 r.headers['content-type'] = 'text/plain' 265 r.headers['content-length'] = '5' 266 r.body = b'hello' 267 s = str(r) 268 assert s.startswith('POST /foo/bar/baz.html HTTP/1.0\r\n') 269 assert s.endswith('\r\n\r\nhello') 270 assert '\r\ncontent-length: 5\r\n' in s 271 assert '\r\ncontent-type: text/plain\r\n' in s 272 s = bytes(r) 273 assert s.startswith(b'POST /foo/bar/baz.html HTTP/1.0\r\n') 274 assert s.endswith(b'\r\n\r\nhello') 275 assert b'\r\ncontent-length: 5\r\n' in s 276 assert b'\r\ncontent-type: text/plain\r\n' in s 277 r = Request(bytes(r)) 278 assert bytes(r) == s
279 280
281 -def test_chunked_response():
282 s = b"""HTTP/1.1 200 OK\r\nCache-control: no-cache\r\nPragma: no-cache\r\nContent-Type: text/javascript; charset=utf-8\r\nContent-Encoding: gzip\r\nTransfer-Encoding: chunked\r\nSet-Cookie: S=gmail=agg:gmail_yj=v2s:gmproxy=JkU; Domain=.google.com; Path=/\r\nServer: GFE/1.3\r\nDate: Mon, 12 Dec 2005 22:33:23 GMT\r\n\r\na\r\n\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x00\r\n152\r\nm\x91MO\xc4 \x10\x86\xef\xfe\n\x82\xc9\x9eXJK\xe9\xb6\xee\xc1\xe8\x1e6\x9e4\xf1\xe0a5\x86R\xda\x12Yh\x80\xba\xfa\xef\x85\xee\x1a/\xf21\x99\x0c\xef0<\xc3\x81\xa0\xc3\x01\xe6\x10\xc1<\xa7eYT5\xa1\xa4\xac\xe1\xdb\x15:\xa4\x9d\x0c\xfa5K\x00\xf6.\xaa\xeb\x86\xd5y\xcdHY\x954\x8e\xbc*h\x8c\x8e!L7Y\xe6\'\xeb\x82WZ\xcf>8\x1ed\x87\x851X\xd8c\xe6\xbc\x17Z\x89\x8f\xac \x84e\xde\n!]\x96\x17i\xb5\x02{{\xc2z0\x1e\x0f#7\x9cw3v\x992\x9d\xfc\xc2c8\xea[/EP\xd6\xbc\xce\x84\xd0\xce\xab\xf7`\'\x1f\xacS\xd2\xc7\xd2\xfb\x94\x02N\xdc\x04\x0f\xee\xba\x19X\x03TtW\xd7\xb4\xd9\x92\n\xbcX\xa7;\xb0\x9b\'\x10$?F\xfd\xf3CzPt\x8aU\xef\xb8\xc8\x8b-\x18\xed\xec<\xe0\x83\x85\x08!\xf8"[\xb0\xd3j\x82h\x93\xb8\xcf\xd8\x9b\xba\xda\xd0\x92\x14\xa4a\rc\reM\xfd\x87=X;h\xd9j;\xe0db\x17\xc2\x02\xbd\xb0F\xc2in#\xfb:\xb6\xc4x\x15\xd6\x9f\x8a\xaf\xcf)\x0b^\xbc\xe7i\x11\x80\x8b\x00D\x01\xd8/\x82x\xf6\xd8\xf7J(\xae/\x11p\x1f+\xc4p\t:\xfe\xfd\xdf\xa3Y\xfa\xae4\x7f\x00\xc5\xa5\x95\xa1\xe2\x01\x00\x00\r\n0\r\n\r\n""" 283 r = Response(s) 284 assert r.version == '1.1' 285 assert r.status == '200' 286 assert r.reason == 'OK'
287 288
289 -def test_multicookie_response():
290 s = b"""HTTP/1.x 200 OK\r\nSet-Cookie: first_cookie=cookie1; path=/; domain=.example.com\r\nSet-Cookie: second_cookie=cookie2; path=/; domain=.example.com\r\nContent-Length: 0\r\n\r\n""" 291 r = Response(s) 292 assert type(r.headers['set-cookie']) is list 293 assert len(r.headers['set-cookie']) == 2
294 295
296 -def test_noreason_response():
297 s = b"""HTTP/1.1 200 \r\n\r\n""" 298 r = Response(s) 299 assert r.reason == '' 300 assert bytes(r) == s
301 302
303 -def test_body_forbidden_response():
304 s = b'HTTP/1.1 304 Not Modified\r\n'\ 305 b'Content-Type: text/css\r\n'\ 306 b'Last-Modified: Wed, 14 Jan 2009 16:42:11 GMT\r\n'\ 307 b'ETag: "3a7-496e15e3"\r\n'\ 308 b'Cache-Control: private, max-age=414295\r\n'\ 309 b'Date: Wed, 22 Sep 2010 17:55:54 GMT\r\n'\ 310 b'Connection: keep-alive\r\n'\ 311 b'Vary: Accept-Encoding\r\n\r\n'\ 312 b'HTTP/1.1 200 OK\r\n'\ 313 b'Server: Sun-ONE-Web-Server/6.1\r\n'\ 314 b'ntCoent-length: 257\r\n'\ 315 b'Content-Type: application/x-javascript\r\n'\ 316 b'Last-Modified: Wed, 06 Jan 2010 19:34:06 GMT\r\n'\ 317 b'ETag: "101-4b44e5ae"\r\n'\ 318 b'Accept-Ranges: bytes\r\n'\ 319 b'Content-Encoding: gzip\r\n'\ 320 b'Cache-Control: private, max-age=439726\r\n'\ 321 b'Date: Wed, 22 Sep 2010 17:55:54 GMT\r\n'\ 322 b'Connection: keep-alive\r\n'\ 323 b'Vary: Accept-Encoding\r\n' 324 result = [] 325 while s: 326 msg = Response(s) 327 s = msg.data 328 result.append(msg) 329 330 # the second HTTP response should be an standalone message 331 assert len(result) == 2
332 333
334 -def test_request_version():
335 s = b"""GET / HTTP/1.0\r\n\r\n""" 336 r = Request(s) 337 assert r.method == 'GET' 338 assert r.uri == '/' 339 assert r.version == '1.0' 340 341 s = b"""GET /\r\n\r\n""" 342 r = Request(s) 343 assert r.method == 'GET' 344 assert r.uri == '/' 345 assert r.version == '0.9' 346 347 s = b"""GET / CHEESE/1.0\r\n\r\n""" 348 try: 349 Request(s) 350 assert "invalid protocol version parsed!" 351 except: 352 pass
353 354
355 -def test_invalid_header():
356 # valid header. 357 s = b'POST /main/redirect/ab/1,295,,00.html HTTP/1.0\r\n' \ 358 b'Referer: http://www.email.com/login/snap/login.jhtml\r\n' \ 359 b'Connection: Keep-Alive\r\n' \ 360 b'User-Agent: Mozilla/4.75 [en] (X11; U; OpenBSD 2.8 i386; Nav)\r\n' \ 361 b'Host: ltd.snap.com\r\n' \ 362 b'Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n' \ 363 b'Accept-Encoding: gzip\r\n' \ 364 b'Accept-Language: en\r\n' \ 365 b'Accept-Charset: iso-8859-1,*,utf-8\r\n' \ 366 b'Content-type: application/x-www-form-urlencoded\r\n' \ 367 b'Content-length: 61\r\n\r\n' \ 368 b'sn=em&mn=dtest4&pw=this+is+atest&fr=true&login=Sign+in&od=www' 369 r = Request(s) 370 assert r.method == 'POST' 371 assert r.uri == '/main/redirect/ab/1,295,,00.html' 372 assert r.body == b'sn=em&mn=dtest4&pw=this+is+atest&fr=true&login=Sign+in&od=www' 373 assert r.headers['content-type'] == 'application/x-www-form-urlencoded' 374 375 # invalid header. 376 s_weird_end = b'POST /main/redirect/ab/1,295,,00.html HTTP/1.0\r\n' \ 377 b'Referer: http://www.email.com/login/snap/login.jhtml\r\n' \ 378 b'Connection: Keep-Alive\r\n' \ 379 b'User-Agent: Mozilla/4.75 [en] (X11; U; OpenBSD 2.8 i386; Nav)\r\n' \ 380 b'Host: ltd.snap.com\r\n' \ 381 b'Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\n' \ 382 b'Accept-Encoding: gzip\r\n' \ 383 b'Accept-Language: en\r\n' \ 384 b'Accept-Charset: iso-8859-1,*,utf-8\r\n' \ 385 b'Content-type: application/x-www-form-urlencoded\r\n' \ 386 b'Cookie: TrackID=1PWdcr3MO_C611BGW' 387 r = Request(s_weird_end) 388 assert r.method == 'POST' 389 assert r.uri == '/main/redirect/ab/1,295,,00.html' 390 assert r.headers['content-type'] == 'application/x-www-form-urlencoded' 391 392 # messy header. 393 s_messy_header = b'aaaaaaaaa\r\nbbbbbbbbb' 394 try: 395 r = Request(s_messy_header) 396 except dpkt.UnpackError: 397 assert True 398 # If the http request is built successfully or raised exceptions 399 # other than UnpackError, then return a false assertion. 400 except: 401 assert False 402 else: 403 assert False
404 405
406 -def test_gzip_response():
407 import zlib 408 # valid response, compressed using gzip 409 s = b'HTTP/1.0 200 OK\r\n' \ 410 b'Server: SimpleHTTP/0.6 Python/2.7.12\r\n' \ 411 b'Date: Fri, 10 Mar 2017 20:43:08 GMT\r\n' \ 412 b'Content-type: text/plain\r\n' \ 413 b'Content-Encoding: gzip\r\n' \ 414 b'Content-Length: 68\r\n' \ 415 b'Last-Modified: Fri, 10 Mar 2017 20:40:43 GMT\r\n\r\n' \ 416 b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\x03\x0b\xc9\xc8,V\x00\xa2D' \ 417 b'\x85\xb2\xd4\xa2J\x85\xe2\xdc\xc4\x9c\x1c\x85\xb4\xcc\x9cT\x85\x92' \ 418 b'|\x85\x92\xd4\xe2\x12\x85\xf4\xaa\xcc\x02\x85\xa2\xd4\xe2\x82\xfc' \ 419 b'\xbc\xe2\xd4b=.\x00\x01(m\xad2\x00\x00\x00' 420 r = Response(s) 421 assert r.version == '1.0' 422 assert r.status == '200' 423 assert r.reason == 'OK' 424 # Make a zlib compressor with the appropriate gzip options 425 decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) 426 body = decompressor.decompress(r.body) 427 assert body.startswith(b'This is a very small file')
428 429 430 if __name__ == '__main__': 431 # Runs all the test associated with this class/file 432 test_parse_request() 433 test_format_request() 434 test_chunked_response() 435 test_multicookie_response() 436 test_noreason_response() 437 test_request_version() 438 test_invalid_header() 439 test_body_forbidden_response() 440 print('Tests Successful...') 441