Package dpkt :: Module http
[hide private]
[frames] | no frames]

Source Code for Module dpkt.http

  1  # $Id: http.py 59 2010-03-24 15:31:17Z jon.oberheide $ 
  2   
  3  """Hypertext Transfer Protocol.""" 
  4   
  5  import cStringIO 
  6  import dpkt 
  7   
8 -def parse_headers(f):
9 """Return dict of HTTP headers parsed from a file object.""" 10 d = {} 11 while 1: 12 line = f.readline() 13 if not line: 14 raise dpkt.NeedData('premature end of headers') 15 line = line.strip() 16 if not line: 17 break 18 l = line.split(None, 1) 19 if not l[0].endswith(':'): 20 raise dpkt.UnpackError('invalid header: %r' % line) 21 k = l[0][:-1].lower() 22 v = len(l) != 1 and l[1] or '' 23 if k in d: 24 if not type(d[k]) is list: 25 d[k] = [d[k]] 26 d[k].append(v) 27 else: 28 d[k] = v 29 return d
30
31 -def parse_body(f, headers):
32 """Return HTTP body parsed from a file object, given HTTP header dict.""" 33 if headers.get('transfer-encoding', '').lower() == 'chunked': 34 l = [] 35 found_end = False 36 while 1: 37 try: 38 sz = f.readline().split(None, 1)[0] 39 except IndexError: 40 raise dpkt.UnpackError('missing chunk size') 41 n = int(sz, 16) 42 if n == 0: 43 found_end = True 44 buf = f.read(n) 45 if f.readline().strip(): 46 break 47 if n and len(buf) == n: 48 l.append(buf) 49 else: 50 break 51 if not found_end: 52 raise dpkt.NeedData('premature end of chunked body') 53 body = ''.join(l) 54 elif 'content-length' in headers: 55 n = int(headers['content-length']) 56 body = f.read(n) 57 if len(body) != n: 58 raise dpkt.NeedData('short body (missing %d bytes)' % (n - len(body))) 59 elif 'content-type' in headers: 60 body = f.read() 61 else: 62 # XXX - need to handle HTTP/0.9 63 body = '' 64 return body
65
66 -class Message(dpkt.Packet):
67 """Hypertext Transfer Protocol headers + body.""" 68 __metaclass__ = type 69 __hdr_defaults__ = {} 70 headers = None 71 body = None 72
73 - def __init__(self, *args, **kwargs):
74 if args: 75 self.unpack(args[0]) 76 else: 77 self.headers = {} 78 self.body = '' 79 for k, v in self.__hdr_defaults__.iteritems(): 80 setattr(self, k, v) 81 for k, v in kwargs.iteritems(): 82 setattr(self, k, v)
83
84 - def unpack(self, buf):
85 f = cStringIO.StringIO(buf) 86 # Parse headers 87 self.headers = parse_headers(f) 88 # Parse body 89 self.body = parse_body(f, self.headers) 90 # Save the rest 91 self.data = f.read()
92
93 - def pack_hdr(self):
94 return ''.join([ '%s: %s\r\n' % t for t in self.headers.iteritems() ])
95
96 - def __len__(self):
97 return len(str(self))
98
99 - def __str__(self):
100 return '%s\r\n%s' % (self.pack_hdr(), self.body)
101
102 -class Request(Message):
103 """Hypertext Transfer Protocol Request.""" 104 __hdr_defaults__ = { 105 'method':'GET', 106 'uri':'/', 107 'version':'1.0', 108 } 109 __methods = dict.fromkeys(( 110 'GET', 'PUT', 'ICY', 111 'COPY', 'HEAD', 'LOCK', 'MOVE', 'POLL', 'POST', 112 'BCOPY', 'BMOVE', 'MKCOL', 'TRACE', 'LABEL', 'MERGE', 113 'DELETE', 'SEARCH', 'UNLOCK', 'REPORT', 'UPDATE', 'NOTIFY', 114 'BDELETE', 'CONNECT', 'OPTIONS', 'CHECKIN', 115 'PROPFIND', 'CHECKOUT', 'CCM_POST', 116 'SUBSCRIBE', 'PROPPATCH', 'BPROPFIND', 117 'BPROPPATCH', 'UNCHECKOUT', 'MKACTIVITY', 118 'MKWORKSPACE', 'UNSUBSCRIBE', 'RPC_CONNECT', 119 'VERSION-CONTROL', 120 'BASELINE-CONTROL' 121 )) 122 __proto = 'HTTP' 123
124 - def unpack(self, buf):
125 f = cStringIO.StringIO(buf) 126 line = f.readline() 127 l = line.strip().split() 128 if len(l) != 3 or l[0] not in self.__methods or \ 129 not l[2].startswith(self.__proto): 130 raise dpkt.UnpackError('invalid request: %r' % line) 131 self.method = l[0] 132 self.uri = l[1] 133 self.version = l[2][len(self.__proto)+1:] 134 Message.unpack(self, f.read())
135
136 - def __str__(self):
137 return '%s %s %s/%s\r\n' % (self.method, self.uri, self.__proto, 138 self.version) + Message.__str__(self)
139
140 -class Response(Message):
141 """Hypertext Transfer Protocol Response.""" 142 __hdr_defaults__ = { 143 'version':'1.0', 144 'status':'200', 145 'reason':'OK' 146 } 147 __proto = 'HTTP' 148
149 - def unpack(self, buf):
150 f = cStringIO.StringIO(buf) 151 line = f.readline() 152 l = line.strip().split(None, 2) 153 if len(l) < 2 or not l[0].startswith(self.__proto) or not l[1].isdigit(): 154 raise dpkt.UnpackError('invalid response: %r' % line) 155 self.version = l[0][len(self.__proto)+1:] 156 self.status = l[1] 157 self.reason = l[2] 158 Message.unpack(self, f.read())
159
160 - def __str__(self):
161 return '%s/%s %s %s\r\n' % (self.__proto, self.version, self.status, 162 self.reason) + Message.__str__(self)
163 164 if __name__ == '__main__': 165 import unittest 166
167 - class HTTPTest(unittest.TestCase):
168 - def test_parse_request(self):
169 s = """POST /main/redirect/ab/1,295,,00.html HTTP/1.0\r\nReferer: http://www.email.com/login/snap/login.jhtml\r\nConnection: Keep-Alive\r\nUser-Agent: Mozilla/4.75 [en] (X11; U; OpenBSD 2.8 i386; Nav)\r\nHost: ltd.snap.com\r\nAccept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*\r\nAccept-Encoding: gzip\r\nAccept-Language: en\r\nAccept-Charset: iso-8859-1,*,utf-8\r\nContent-type: application/x-www-form-urlencoded\r\nContent-length: 61\r\n\r\nsn=em&mn=dtest4&pw=this+is+atest&fr=true&login=Sign+in&od=www""" 170 r = Request(s) 171 assert r.method == 'POST' 172 assert r.uri == '/main/redirect/ab/1,295,,00.html' 173 assert r.body == 'sn=em&mn=dtest4&pw=this+is+atest&fr=true&login=Sign+in&od=www' 174 assert r.headers['content-type'] == 'application/x-www-form-urlencoded' 175 try: 176 r = Request(s[:60]) 177 assert 'invalid headers parsed!' 178 except dpkt.UnpackError: 179 pass
180
181 - def test_format_request(self):
182 r = Request() 183 assert str(r) == 'GET / HTTP/1.0\r\n\r\n' 184 r.method = 'POST' 185 r.uri = '/foo/bar/baz.html' 186 r.headers['content-type'] = 'text/plain' 187 r.headers['content-length'] = '5' 188 r.body = 'hello' 189 assert str(r) == 'POST /foo/bar/baz.html HTTP/1.0\r\ncontent-length: 5\r\ncontent-type: text/plain\r\n\r\nhello' 190 r = Request(str(r)) 191 assert str(r) == 'POST /foo/bar/baz.html HTTP/1.0\r\ncontent-length: 5\r\ncontent-type: text/plain\r\n\r\nhello'
192
193 - def test_chunked_response(self):
194 s = """HTTP/1.1 200 OK\r\nCache-control: no-cache\r\nPragma: no-cache\r\nContent-Type: text/javascript; charset=utf-8\r\nContent-Encoding: gzip\r\nTransfer-Encoding: chunked\r\nSet-Cookie: S=gmail=agg:gmail_yj=v2s:gmproxy=JkU; Domain=.google.com; Path=/\r\nServer: GFE/1.3\r\nDate: Mon, 12 Dec 2005 22:33:23 GMT\r\n\r\na\r\n\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\x00\r\n152\r\nm\x91MO\xc4 \x10\x86\xef\xfe\n\x82\xc9\x9eXJK\xe9\xb6\xee\xc1\xe8\x1e6\x9e4\xf1\xe0a5\x86R\xda\x12Yh\x80\xba\xfa\xef\x85\xee\x1a/\xf21\x99\x0c\xef0<\xc3\x81\xa0\xc3\x01\xe6\x10\xc1<\xa7eYT5\xa1\xa4\xac\xe1\xdb\x15:\xa4\x9d\x0c\xfa5K\x00\xf6.\xaa\xeb\x86\xd5y\xcdHY\x954\x8e\xbc*h\x8c\x8e!L7Y\xe6\'\xeb\x82WZ\xcf>8\x1ed\x87\x851X\xd8c\xe6\xbc\x17Z\x89\x8f\xac \x84e\xde\n!]\x96\x17i\xb5\x02{{\xc2z0\x1e\x0f#7\x9cw3v\x992\x9d\xfc\xc2c8\xea[/EP\xd6\xbc\xce\x84\xd0\xce\xab\xf7`\'\x1f\xacS\xd2\xc7\xd2\xfb\x94\x02N\xdc\x04\x0f\xee\xba\x19X\x03TtW\xd7\xb4\xd9\x92\n\xbcX\xa7;\xb0\x9b\'\x10$?F\xfd\xf3CzPt\x8aU\xef\xb8\xc8\x8b-\x18\xed\xec<\xe0\x83\x85\x08!\xf8"[\xb0\xd3j\x82h\x93\xb8\xcf\xd8\x9b\xba\xda\xd0\x92\x14\xa4a\rc\reM\xfd\x87=X;h\xd9j;\xe0db\x17\xc2\x02\xbd\xb0F\xc2in#\xfb:\xb6\xc4x\x15\xd6\x9f\x8a\xaf\xcf)\x0b^\xbc\xe7i\x11\x80\x8b\x00D\x01\xd8/\x82x\xf6\xd8\xf7J(\xae/\x11p\x1f+\xc4p\t:\xfe\xfd\xdf\xa3Y\xfa\xae4\x7f\x00\xc5\xa5\x95\xa1\xe2\x01\x00\x00\r\n0\r\n\r\n""" 195 r = Response(s) 196 assert r.version == '1.1' 197 assert r.status == '200' 198 assert r.reason == 'OK'
199
201 s = """HTTP/1.x 200 OK\r\nSet-Cookie: first_cookie=cookie1; path=/; domain=.example.com\r\nSet-Cookie: second_cookie=cookie2; path=/; domain=.example.com\r\nContent-Length: 0\r\n\r\n""" 202 r = Response(s) 203 assert type(r.headers['set-cookie']) is list 204 assert len(r.headers['set-cookie']) == 2
205 206 unittest.main() 207