00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <cstdlib>
00011 #include <boost/regex.hpp>
00012 #include <boost/logic/tribool.hpp>
00013 #include <pion/net/HTTPParser.hpp>
00014 #include <pion/net/HTTPRequest.hpp>
00015 #include <pion/net/HTTPResponse.hpp>
00016 #include <pion/net/HTTPMessage.hpp>
00017
00018
00019 namespace pion {
00020 namespace net {
00021
00022
00023
00024
00025 const boost::uint32_t HTTPParser::STATUS_MESSAGE_MAX = 1024;
00026 const boost::uint32_t HTTPParser::METHOD_MAX = 1024;
00027 const boost::uint32_t HTTPParser::RESOURCE_MAX = 256 * 1024;
00028 const boost::uint32_t HTTPParser::QUERY_STRING_MAX = 1024 * 1024;
00029 const boost::uint32_t HTTPParser::HEADER_NAME_MAX = 1024;
00030 const boost::uint32_t HTTPParser::HEADER_VALUE_MAX = 1024 * 1024;
00031 const boost::uint32_t HTTPParser::QUERY_NAME_MAX = 1024;
00032 const boost::uint32_t HTTPParser::QUERY_VALUE_MAX = 1024 * 1024;
00033 const boost::uint32_t HTTPParser::COOKIE_NAME_MAX = 1024;
00034 const boost::uint32_t HTTPParser::COOKIE_VALUE_MAX = 1024 * 1024;
00035 const std::size_t HTTPParser::DEFAULT_CONTENT_MAX = 1024 * 1024;
00036 HTTPParser::ErrorCategory * HTTPParser::m_error_category_ptr = NULL;
00037 boost::once_flag HTTPParser::m_instance_flag = BOOST_ONCE_INIT;
00038
00039
00040
00041
00042 boost::tribool HTTPParser::parse(HTTPMessage& http_msg,
00043 boost::system::error_code& ec)
00044 {
00045 PION_ASSERT(! eof() );
00046
00047 boost::tribool rc = boost::indeterminate;
00048 std::size_t total_bytes_parsed = 0;
00049
00050 if(http_msg.hasMissingPackets()) {
00051 http_msg.setDataAfterMissingPacket(true);
00052 }
00053
00054 do {
00055 switch (m_message_parse_state) {
00056
00057 case PARSE_START:
00058 m_message_parse_state = PARSE_HEADERS;
00059
00060
00061
00062 case PARSE_HEADERS:
00063 rc = parseHeaders(http_msg, ec);
00064 total_bytes_parsed += m_bytes_last_read;
00065
00066 if (rc == true) {
00067
00068 rc = finishHeaderParsing(http_msg, ec);
00069 }
00070 break;
00071
00072
00073 case PARSE_CHUNKS:
00074 rc = parseChunks(http_msg.getChunkCache(), ec);
00075 total_bytes_parsed += m_bytes_last_read;
00076
00077 if (rc == true) {
00078 http_msg.concatenateChunks();
00079 }
00080 break;
00081
00082
00083 case PARSE_CONTENT:
00084 rc = consumeContent(http_msg, ec);
00085 total_bytes_parsed += m_bytes_last_read;
00086 break;
00087
00088
00089 case PARSE_CONTENT_NO_LENGTH:
00090 consumeContentAsNextChunk(http_msg.getChunkCache());
00091 total_bytes_parsed += m_bytes_last_read;
00092 break;
00093
00094
00095 case PARSE_END:
00096 rc = true;
00097 break;
00098 }
00099 } while ( boost::indeterminate(rc) && ! eof() );
00100
00101
00102 if (rc == true) {
00103 m_message_parse_state = PARSE_END;
00104 finish(http_msg);
00105 } else if(rc == false) {
00106 computeMsgStatus(http_msg, false);
00107 }
00108
00109
00110 m_bytes_last_read = total_bytes_parsed;
00111
00112 return rc;
00113 }
00114
00115 boost::tribool HTTPParser::parseMissingData(HTTPMessage& http_msg,
00116 std::size_t len, boost::system::error_code& ec)
00117 {
00118 static const char MISSING_DATA_CHAR = 'X';
00119 boost::tribool rc = boost::indeterminate;
00120
00121 http_msg.setMissingPackets(true);
00122
00123 switch (m_message_parse_state) {
00124
00125
00126 case PARSE_START:
00127 case PARSE_HEADERS:
00128 setError(ec, ERROR_MISSING_HEADER_DATA);
00129 rc = false;
00130 break;
00131
00132
00133 case PARSE_CHUNKS:
00134
00135 if (m_chunked_content_parse_state == PARSE_CHUNK
00136 && m_bytes_read_in_current_chunk < m_size_of_current_chunk
00137 && (m_size_of_current_chunk - m_bytes_read_in_current_chunk) >= len)
00138 {
00139
00140 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n)
00141 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00142
00143 m_bytes_read_in_current_chunk += len;
00144 m_bytes_last_read = len;
00145 m_bytes_total_read += len;
00146 m_bytes_content_read += len;
00147
00148 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
00149 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
00150 }
00151 } else {
00152
00153 setError(ec, ERROR_MISSING_CHUNK_DATA);
00154 rc = false;
00155 }
00156 break;
00157
00158
00159 case PARSE_CONTENT:
00160
00161 if (m_bytes_content_remaining == 0) {
00162
00163 rc = true;
00164 } else if (m_bytes_content_remaining < len) {
00165
00166 setError(ec, ERROR_MISSING_TOO_MUCH_CONTENT);
00167 rc = false;
00168 } else {
00169
00170
00171 if ( (m_bytes_content_read+len) <= m_max_content_length) {
00172
00173 for (std::size_t n = 0; n < len; ++n)
00174 http_msg.getContent()[m_bytes_content_read++] = MISSING_DATA_CHAR;
00175 } else {
00176 m_bytes_content_read += len;
00177 }
00178
00179 m_bytes_content_remaining -= len;
00180 m_bytes_total_read += len;
00181 m_bytes_last_read = len;
00182
00183 if (m_bytes_content_remaining == 0)
00184 rc = true;
00185 }
00186 break;
00187
00188
00189 case PARSE_CONTENT_NO_LENGTH:
00190
00191 for (std::size_t n = 0; n < len && http_msg.getChunkCache().size() < m_max_content_length; ++n)
00192 http_msg.getChunkCache().push_back(MISSING_DATA_CHAR);
00193 m_bytes_last_read = len;
00194 m_bytes_total_read += len;
00195 m_bytes_content_read += len;
00196 break;
00197
00198
00199 case PARSE_END:
00200 rc = true;
00201 break;
00202 }
00203
00204
00205 if (rc == true) {
00206 m_message_parse_state = PARSE_END;
00207 finish(http_msg);
00208 } else if(rc == false) {
00209 computeMsgStatus(http_msg, false);
00210 }
00211
00212 return rc;
00213 }
00214
00215 boost::tribool HTTPParser::parseHeaders(HTTPMessage& http_msg,
00216 boost::system::error_code& ec)
00217 {
00218
00219
00220
00221
00222
00223
00224
00225 const char *read_start_ptr = m_read_ptr;
00226 m_bytes_last_read = 0;
00227 while (m_read_ptr < m_read_end_ptr) {
00228
00229 if (m_save_raw_headers)
00230 m_raw_headers += *m_read_ptr;
00231
00232 switch (m_headers_parse_state) {
00233 case PARSE_METHOD_START:
00234
00235 if (*m_read_ptr != ' ' && *m_read_ptr!='\r' && *m_read_ptr!='\n') {
00236 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00237 setError(ec, ERROR_METHOD_CHAR);
00238 return false;
00239 }
00240 m_headers_parse_state = PARSE_METHOD;
00241 m_method.erase();
00242 m_method.push_back(*m_read_ptr);
00243 }
00244 break;
00245
00246 case PARSE_METHOD:
00247
00248 if (*m_read_ptr == ' ') {
00249 m_resource.erase();
00250 m_headers_parse_state = PARSE_URI_STEM;
00251 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00252 setError(ec, ERROR_METHOD_CHAR);
00253 return false;
00254 } else if (m_method.size() >= METHOD_MAX) {
00255 setError(ec, ERROR_METHOD_SIZE);
00256 return false;
00257 } else {
00258 m_method.push_back(*m_read_ptr);
00259 }
00260 break;
00261
00262 case PARSE_URI_STEM:
00263
00264 if (*m_read_ptr == ' ') {
00265 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00266 } else if (*m_read_ptr == '?') {
00267 m_query_string.erase();
00268 m_headers_parse_state = PARSE_URI_QUERY;
00269 } else if (*m_read_ptr == '\r') {
00270 http_msg.setVersionMajor(0);
00271 http_msg.setVersionMinor(0);
00272 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00273 } else if (*m_read_ptr == '\n') {
00274 http_msg.setVersionMajor(0);
00275 http_msg.setVersionMinor(0);
00276 m_headers_parse_state = PARSE_EXPECTING_CR;
00277 } else if (isControl(*m_read_ptr)) {
00278 setError(ec, ERROR_URI_CHAR);
00279 return false;
00280 } else if (m_resource.size() >= RESOURCE_MAX) {
00281 setError(ec, ERROR_URI_SIZE);
00282 return false;
00283 } else {
00284 m_resource.push_back(*m_read_ptr);
00285 }
00286 break;
00287
00288 case PARSE_URI_QUERY:
00289
00290 if (*m_read_ptr == ' ') {
00291 m_headers_parse_state = PARSE_HTTP_VERSION_H;
00292 } else if (isControl(*m_read_ptr)) {
00293 setError(ec, ERROR_QUERY_CHAR);
00294 return false;
00295 } else if (m_query_string.size() >= QUERY_STRING_MAX) {
00296 setError(ec, ERROR_QUERY_SIZE);
00297 return false;
00298 } else {
00299 m_query_string.push_back(*m_read_ptr);
00300 }
00301 break;
00302
00303 case PARSE_HTTP_VERSION_H:
00304
00305 if (*m_read_ptr == '\r') {
00306
00307 if (! m_is_request) {
00308 setError(ec, ERROR_VERSION_EMPTY);
00309 return false;
00310 }
00311 http_msg.setVersionMajor(0);
00312 http_msg.setVersionMinor(0);
00313 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00314 } else if (*m_read_ptr == '\n') {
00315
00316 if (! m_is_request) {
00317 setError(ec, ERROR_VERSION_EMPTY);
00318 return false;
00319 }
00320 http_msg.setVersionMajor(0);
00321 http_msg.setVersionMinor(0);
00322 m_headers_parse_state = PARSE_EXPECTING_CR;
00323 } else if (*m_read_ptr != 'H') {
00324 setError(ec, ERROR_VERSION_CHAR);
00325 return false;
00326 }
00327 m_headers_parse_state = PARSE_HTTP_VERSION_T_1;
00328 break;
00329
00330 case PARSE_HTTP_VERSION_T_1:
00331
00332 if (*m_read_ptr != 'T') {
00333 setError(ec, ERROR_VERSION_CHAR);
00334 return false;
00335 }
00336 m_headers_parse_state = PARSE_HTTP_VERSION_T_2;
00337 break;
00338
00339 case PARSE_HTTP_VERSION_T_2:
00340
00341 if (*m_read_ptr != 'T') {
00342 setError(ec, ERROR_VERSION_CHAR);
00343 return false;
00344 }
00345 m_headers_parse_state = PARSE_HTTP_VERSION_P;
00346 break;
00347
00348 case PARSE_HTTP_VERSION_P:
00349
00350 if (*m_read_ptr != 'P') {
00351 setError(ec, ERROR_VERSION_CHAR);
00352 return false;
00353 }
00354 m_headers_parse_state = PARSE_HTTP_VERSION_SLASH;
00355 break;
00356
00357 case PARSE_HTTP_VERSION_SLASH:
00358
00359 if (*m_read_ptr != '/') {
00360 setError(ec, ERROR_VERSION_CHAR);
00361 return false;
00362 }
00363 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR_START;
00364 break;
00365
00366 case PARSE_HTTP_VERSION_MAJOR_START:
00367
00368 if (!isDigit(*m_read_ptr)) {
00369 setError(ec, ERROR_VERSION_CHAR);
00370 return false;
00371 }
00372 http_msg.setVersionMajor(*m_read_ptr - '0');
00373 m_headers_parse_state = PARSE_HTTP_VERSION_MAJOR;
00374 break;
00375
00376 case PARSE_HTTP_VERSION_MAJOR:
00377
00378 if (*m_read_ptr == '.') {
00379 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR_START;
00380 } else if (isDigit(*m_read_ptr)) {
00381 http_msg.setVersionMajor( (http_msg.getVersionMajor() * 10)
00382 + (*m_read_ptr - '0') );
00383 } else {
00384 setError(ec, ERROR_VERSION_CHAR);
00385 return false;
00386 }
00387 break;
00388
00389 case PARSE_HTTP_VERSION_MINOR_START:
00390
00391 if (!isDigit(*m_read_ptr)) {
00392 setError(ec, ERROR_VERSION_CHAR);
00393 return false;
00394 }
00395 http_msg.setVersionMinor(*m_read_ptr - '0');
00396 m_headers_parse_state = PARSE_HTTP_VERSION_MINOR;
00397 break;
00398
00399 case PARSE_HTTP_VERSION_MINOR:
00400
00401 if (*m_read_ptr == ' ') {
00402
00403 if (! m_is_request) {
00404 m_headers_parse_state = PARSE_STATUS_CODE_START;
00405 }
00406 } else if (*m_read_ptr == '\r') {
00407
00408 if (! m_is_request) {
00409 setError(ec, ERROR_STATUS_EMPTY);
00410 return false;
00411 }
00412 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00413 } else if (*m_read_ptr == '\n') {
00414
00415 if (! m_is_request) {
00416 setError(ec, ERROR_STATUS_EMPTY);
00417 return false;
00418 }
00419 m_headers_parse_state = PARSE_EXPECTING_CR;
00420 } else if (isDigit(*m_read_ptr)) {
00421 http_msg.setVersionMinor( (http_msg.getVersionMinor() * 10)
00422 + (*m_read_ptr - '0') );
00423 } else {
00424 setError(ec, ERROR_VERSION_CHAR);
00425 return false;
00426 }
00427 break;
00428
00429 case PARSE_STATUS_CODE_START:
00430
00431 if (!isDigit(*m_read_ptr)) {
00432 setError(ec, ERROR_STATUS_CHAR);
00433 return false;
00434 }
00435 m_status_code = (*m_read_ptr - '0');
00436 m_headers_parse_state = PARSE_STATUS_CODE;
00437 break;
00438
00439 case PARSE_STATUS_CODE:
00440
00441 if (*m_read_ptr == ' ') {
00442 m_status_message.erase();
00443 m_headers_parse_state = PARSE_STATUS_MESSAGE;
00444 } else if (isDigit(*m_read_ptr)) {
00445 m_status_code = ( (m_status_code * 10) + (*m_read_ptr - '0') );
00446 } else if (*m_read_ptr == '\r') {
00447
00448 m_status_message.erase();
00449 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00450 } else if (*m_read_ptr == '\n') {
00451
00452 m_status_message.erase();
00453 m_headers_parse_state = PARSE_EXPECTING_CR;
00454 } else {
00455 setError(ec, ERROR_STATUS_CHAR);
00456 return false;
00457 }
00458 break;
00459
00460 case PARSE_STATUS_MESSAGE:
00461
00462 if (*m_read_ptr == '\r') {
00463 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00464 } else if (*m_read_ptr == '\n') {
00465 m_headers_parse_state = PARSE_EXPECTING_CR;
00466 } else if (isControl(*m_read_ptr)) {
00467 setError(ec, ERROR_STATUS_CHAR);
00468 return false;
00469 } else if (m_status_message.size() >= STATUS_MESSAGE_MAX) {
00470 setError(ec, ERROR_STATUS_CHAR);
00471 return false;
00472 } else {
00473 m_status_message.push_back(*m_read_ptr);
00474 }
00475 break;
00476
00477 case PARSE_EXPECTING_NEWLINE:
00478
00479 if (*m_read_ptr == '\n') {
00480 m_headers_parse_state = PARSE_HEADER_START;
00481 } else if (*m_read_ptr == '\r') {
00482
00483
00484
00485 ++m_read_ptr;
00486 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00487 m_bytes_total_read += m_bytes_last_read;
00488 return true;
00489 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00490 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00491 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00492 setError(ec, ERROR_HEADER_CHAR);
00493 return false;
00494 } else {
00495
00496 m_header_name.erase();
00497 m_header_name.push_back(*m_read_ptr);
00498 m_headers_parse_state = PARSE_HEADER_NAME;
00499 }
00500 break;
00501
00502 case PARSE_EXPECTING_CR:
00503
00504 if (*m_read_ptr == '\r') {
00505 m_headers_parse_state = PARSE_HEADER_START;
00506 } else if (*m_read_ptr == '\n') {
00507
00508
00509
00510 ++m_read_ptr;
00511 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00512 m_bytes_total_read += m_bytes_last_read;
00513 return true;
00514 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00515 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00516 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00517 setError(ec, ERROR_HEADER_CHAR);
00518 return false;
00519 } else {
00520
00521 m_header_name.erase();
00522 m_header_name.push_back(*m_read_ptr);
00523 m_headers_parse_state = PARSE_HEADER_NAME;
00524 }
00525 break;
00526
00527 case PARSE_HEADER_WHITESPACE:
00528
00529 if (*m_read_ptr == '\r') {
00530 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00531 } else if (*m_read_ptr == '\n') {
00532 m_headers_parse_state = PARSE_EXPECTING_CR;
00533 } else if (*m_read_ptr != '\t' && *m_read_ptr != ' ') {
00534 if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr))
00535 setError(ec, ERROR_HEADER_CHAR);
00536 return false;
00537
00538 m_header_name.erase();
00539 m_header_name.push_back(*m_read_ptr);
00540 m_headers_parse_state = PARSE_HEADER_NAME;
00541 }
00542 break;
00543
00544 case PARSE_HEADER_START:
00545
00546 if (*m_read_ptr == '\r') {
00547 m_headers_parse_state = PARSE_EXPECTING_FINAL_NEWLINE;
00548 } else if (*m_read_ptr == '\n') {
00549 m_headers_parse_state = PARSE_EXPECTING_FINAL_CR;
00550 } else if (*m_read_ptr == '\t' || *m_read_ptr == ' ') {
00551 m_headers_parse_state = PARSE_HEADER_WHITESPACE;
00552 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00553 setError(ec, ERROR_HEADER_CHAR);
00554 return false;
00555 } else {
00556
00557 m_header_name.erase();
00558 m_header_name.push_back(*m_read_ptr);
00559 m_headers_parse_state = PARSE_HEADER_NAME;
00560 }
00561 break;
00562
00563 case PARSE_HEADER_NAME:
00564
00565 if (*m_read_ptr == ':') {
00566 m_header_value.erase();
00567 m_headers_parse_state = PARSE_SPACE_BEFORE_HEADER_VALUE;
00568 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00569 setError(ec, ERROR_HEADER_CHAR);
00570 return false;
00571 } else if (m_header_name.size() >= HEADER_NAME_MAX) {
00572 setError(ec, ERROR_HEADER_NAME_SIZE);
00573 return false;
00574 } else {
00575
00576 m_header_name.push_back(*m_read_ptr);
00577 }
00578 break;
00579
00580 case PARSE_SPACE_BEFORE_HEADER_VALUE:
00581
00582 if (*m_read_ptr == ' ') {
00583 m_headers_parse_state = PARSE_HEADER_VALUE;
00584 } else if (*m_read_ptr == '\r') {
00585 http_msg.addHeader(m_header_name, m_header_value);
00586 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00587 } else if (*m_read_ptr == '\n') {
00588 http_msg.addHeader(m_header_name, m_header_value);
00589 m_headers_parse_state = PARSE_EXPECTING_CR;
00590 } else if (!isChar(*m_read_ptr) || isControl(*m_read_ptr) || isSpecial(*m_read_ptr)) {
00591 setError(ec, ERROR_HEADER_CHAR);
00592 return false;
00593 } else {
00594
00595 m_header_value.push_back(*m_read_ptr);
00596 m_headers_parse_state = PARSE_HEADER_VALUE;
00597 }
00598 break;
00599
00600 case PARSE_HEADER_VALUE:
00601
00602 if (*m_read_ptr == '\r') {
00603 http_msg.addHeader(m_header_name, m_header_value);
00604 m_headers_parse_state = PARSE_EXPECTING_NEWLINE;
00605 } else if (*m_read_ptr == '\n') {
00606 http_msg.addHeader(m_header_name, m_header_value);
00607 m_headers_parse_state = PARSE_EXPECTING_CR;
00608 } else if (isControl(*m_read_ptr)) {
00609 setError(ec, ERROR_HEADER_CHAR);
00610 return false;
00611 } else if (m_header_value.size() >= HEADER_VALUE_MAX) {
00612 setError(ec, ERROR_HEADER_VALUE_SIZE);
00613 return false;
00614 } else {
00615
00616 m_header_value.push_back(*m_read_ptr);
00617 }
00618 break;
00619
00620 case PARSE_EXPECTING_FINAL_NEWLINE:
00621 if (*m_read_ptr == '\n') ++m_read_ptr;
00622 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00623 m_bytes_total_read += m_bytes_last_read;
00624 return true;
00625
00626 case PARSE_EXPECTING_FINAL_CR:
00627 if (*m_read_ptr == '\r') ++m_read_ptr;
00628 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00629 m_bytes_total_read += m_bytes_last_read;
00630 return true;
00631 }
00632
00633 ++m_read_ptr;
00634 }
00635
00636 m_bytes_last_read = (m_read_ptr - read_start_ptr);
00637 m_bytes_total_read += m_bytes_last_read;
00638 return boost::indeterminate;
00639 }
00640
00641 void HTTPParser::updateMessageWithHeaderData(HTTPMessage& http_msg) const
00642 {
00643 if (isParsingRequest()) {
00644
00645
00646
00647 HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
00648 http_request.setMethod(m_method);
00649 http_request.setResource(m_resource);
00650 http_request.setQueryString(m_query_string);
00651
00652
00653 if (! m_query_string.empty()) {
00654 if (! parseURLEncoded(http_request.getQueryParams(),
00655 m_query_string.c_str(),
00656 m_query_string.size()))
00657 PION_LOG_WARN(m_logger, "Request query string parsing failed (URI)");
00658 }
00659
00660
00661 std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00662 cookie_pair = http_request.getHeaders().equal_range(HTTPTypes::HEADER_COOKIE);
00663 for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00664 cookie_iterator != http_request.getHeaders().end()
00665 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00666 {
00667 if (! parseCookieHeader(http_request.getCookieParams(),
00668 cookie_iterator->second, false) )
00669 PION_LOG_WARN(m_logger, "Cookie header parsing failed");
00670 }
00671
00672 } else {
00673
00674
00675
00676 HTTPResponse& http_response(dynamic_cast<HTTPResponse&>(http_msg));
00677 http_response.setStatusCode(m_status_code);
00678 http_response.setStatusMessage(m_status_message);
00679
00680
00681 std::pair<HTTPTypes::Headers::const_iterator, HTTPTypes::Headers::const_iterator>
00682 cookie_pair = http_response.getHeaders().equal_range(HTTPTypes::HEADER_SET_COOKIE);
00683 for (HTTPTypes::Headers::const_iterator cookie_iterator = cookie_pair.first;
00684 cookie_iterator != http_response.getHeaders().end()
00685 && cookie_iterator != cookie_pair.second; ++cookie_iterator)
00686 {
00687 if (! parseCookieHeader(http_response.getCookieParams(),
00688 cookie_iterator->second, true) )
00689 PION_LOG_WARN(m_logger, "Set-Cookie header parsing failed");
00690 }
00691
00692 }
00693 }
00694
00695 boost::tribool HTTPParser::finishHeaderParsing(HTTPMessage& http_msg,
00696 boost::system::error_code& ec)
00697 {
00698 boost::tribool rc = boost::indeterminate;
00699
00700 m_bytes_content_remaining = m_bytes_content_read = 0;
00701 http_msg.setContentLength(0);
00702 http_msg.updateTransferCodingUsingHeader();
00703 updateMessageWithHeaderData(http_msg);
00704
00705 if (http_msg.isChunked()) {
00706
00707
00708 m_message_parse_state = PARSE_CHUNKS;
00709
00710
00711 if (m_parse_headers_only)
00712 rc = true;
00713
00714 } else if (http_msg.isContentLengthImplied()) {
00715
00716
00717 m_message_parse_state = PARSE_END;
00718 rc = true;
00719
00720 } else {
00721
00722
00723 if (http_msg.hasHeader(HTTPTypes::HEADER_CONTENT_LENGTH)) {
00724
00725
00726 try {
00727 http_msg.updateContentLengthUsingHeader();
00728 } catch (...) {
00729 PION_LOG_ERROR(m_logger, "Unable to update content length");
00730 setError(ec, ERROR_INVALID_CONTENT_LENGTH);
00731 return false;
00732 }
00733
00734
00735 if (http_msg.getContentLength() == 0) {
00736 m_message_parse_state = PARSE_END;
00737 rc = true;
00738 } else {
00739 m_message_parse_state = PARSE_CONTENT;
00740 m_bytes_content_remaining = http_msg.getContentLength();
00741
00742
00743 if (m_bytes_content_remaining > m_max_content_length)
00744 http_msg.setContentLength(m_max_content_length);
00745
00746
00747 if (m_parse_headers_only)
00748 rc = true;
00749 }
00750
00751 } else {
00752
00753
00754
00755
00756 if (! m_is_request) {
00757
00758 http_msg.getChunkCache().clear();
00759
00760
00761 m_message_parse_state = PARSE_CONTENT_NO_LENGTH;
00762
00763
00764 if (m_parse_headers_only)
00765 rc = true;
00766 } else {
00767 m_message_parse_state = PARSE_END;
00768 rc = true;
00769 }
00770 }
00771 }
00772
00773
00774 http_msg.createContentBuffer();
00775
00776 return rc;
00777 }
00778
00779 bool HTTPParser::parseURLEncoded(HTTPTypes::QueryParams& dict,
00780 const char *ptr, const size_t len)
00781 {
00782
00783 enum QueryParseState {
00784 QUERY_PARSE_NAME, QUERY_PARSE_VALUE
00785 } parse_state = QUERY_PARSE_NAME;
00786
00787
00788 const char * const end = ptr + len;
00789 std::string query_name;
00790 std::string query_value;
00791
00792
00793 while (ptr < end) {
00794 switch (parse_state) {
00795
00796 case QUERY_PARSE_NAME:
00797
00798 if (*ptr == '=') {
00799
00800 parse_state = QUERY_PARSE_VALUE;
00801 } else if (*ptr == '&') {
00802
00803 if (! query_name.empty()) {
00804
00805 dict.insert( std::make_pair(query_name, query_value) );
00806 query_name.erase();
00807 }
00808 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00809
00810 } else if (isControl(*ptr) || query_name.size() >= QUERY_NAME_MAX) {
00811
00812 return false;
00813 } else {
00814
00815 query_name.push_back(*ptr);
00816 }
00817 break;
00818
00819 case QUERY_PARSE_VALUE:
00820
00821 if (*ptr == '&') {
00822
00823 if (! query_name.empty()) {
00824 dict.insert( std::make_pair(query_name, query_value) );
00825 query_name.erase();
00826 }
00827 query_value.erase();
00828 parse_state = QUERY_PARSE_NAME;
00829 } else if (*ptr == '\r' || *ptr == '\n' || *ptr == '\t') {
00830
00831 } else if (isControl(*ptr) || query_value.size() >= QUERY_VALUE_MAX) {
00832
00833 return false;
00834 } else {
00835
00836 query_value.push_back(*ptr);
00837 }
00838 break;
00839 }
00840
00841 ++ptr;
00842 }
00843
00844
00845 if (! query_name.empty())
00846 dict.insert( std::make_pair(query_name, query_value) );
00847
00848 return true;
00849 }
00850
00851 bool HTTPParser::parseCookieHeader(HTTPTypes::CookieParams& dict,
00852 const char *ptr, const size_t len,
00853 bool set_cookie_header)
00854 {
00855
00856
00857
00858
00859
00860
00861
00862 enum CookieParseState {
00863 COOKIE_PARSE_NAME, COOKIE_PARSE_VALUE, COOKIE_PARSE_IGNORE
00864 } parse_state = COOKIE_PARSE_NAME;
00865
00866
00867 const char * const end = ptr + len;
00868 std::string cookie_name;
00869 std::string cookie_value;
00870 char value_quote_character = '\0';
00871
00872
00873 while (ptr < end) {
00874 switch (parse_state) {
00875
00876 case COOKIE_PARSE_NAME:
00877
00878 if (*ptr == '=') {
00879
00880 value_quote_character = '\0';
00881 parse_state = COOKIE_PARSE_VALUE;
00882 } else if (*ptr == ';' || *ptr == ',') {
00883
00884
00885 if (! cookie_name.empty()) {
00886
00887 if (! isCookieAttribute(cookie_name, set_cookie_header))
00888 dict.insert( std::make_pair(cookie_name, cookie_value) );
00889 cookie_name.erase();
00890 }
00891 } else if (*ptr != ' ') {
00892
00893 if (isControl(*ptr) || cookie_name.size() >= COOKIE_NAME_MAX)
00894 return false;
00895
00896 cookie_name.push_back(*ptr);
00897 }
00898 break;
00899
00900 case COOKIE_PARSE_VALUE:
00901
00902 if (value_quote_character == '\0') {
00903
00904 if (*ptr == ';' || *ptr == ',') {
00905
00906 if (! isCookieAttribute(cookie_name, set_cookie_header))
00907 dict.insert( std::make_pair(cookie_name, cookie_value) );
00908 cookie_name.erase();
00909 cookie_value.erase();
00910 parse_state = COOKIE_PARSE_NAME;
00911 } else if (*ptr == '\'' || *ptr == '"') {
00912 if (cookie_value.empty()) {
00913
00914 value_quote_character = *ptr;
00915 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00916
00917 return false;
00918 } else {
00919
00920 cookie_value.push_back(*ptr);
00921 }
00922 } else if (*ptr != ' ' || !cookie_value.empty()) {
00923
00924 if (isControl(*ptr) || cookie_value.size() >= COOKIE_VALUE_MAX)
00925 return false;
00926
00927 cookie_value.push_back(*ptr);
00928 }
00929 } else {
00930
00931 if (*ptr == value_quote_character) {
00932
00933 if (! isCookieAttribute(cookie_name, set_cookie_header))
00934 dict.insert( std::make_pair(cookie_name, cookie_value) );
00935 cookie_name.erase();
00936 cookie_value.erase();
00937 parse_state = COOKIE_PARSE_IGNORE;
00938 } else if (cookie_value.size() >= COOKIE_VALUE_MAX) {
00939
00940 return false;
00941 } else {
00942
00943 cookie_value.push_back(*ptr);
00944 }
00945 }
00946 break;
00947
00948 case COOKIE_PARSE_IGNORE:
00949
00950 if (*ptr == ';' || *ptr == ',')
00951 parse_state = COOKIE_PARSE_NAME;
00952 break;
00953 }
00954
00955 ++ptr;
00956 }
00957
00958
00959 if (! isCookieAttribute(cookie_name, set_cookie_header))
00960 dict.insert( std::make_pair(cookie_name, cookie_value) );
00961
00962 return true;
00963 }
00964
00965 boost::tribool HTTPParser::parseChunks(HTTPMessage::ChunkCache& chunk_cache,
00966 boost::system::error_code& ec)
00967 {
00968
00969
00970
00971
00972
00973
00974
00975 const char *read_start_ptr = m_read_ptr;
00976 m_bytes_last_read = 0;
00977 while (m_read_ptr < m_read_end_ptr) {
00978
00979 switch (m_chunked_content_parse_state) {
00980 case PARSE_CHUNK_SIZE_START:
00981
00982 if (isHexDigit(*m_read_ptr)) {
00983 m_chunk_size_str.erase();
00984 m_chunk_size_str.push_back(*m_read_ptr);
00985 m_chunked_content_parse_state = PARSE_CHUNK_SIZE;
00986 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09' || *m_read_ptr == '\x0D' || *m_read_ptr == '\x0A') {
00987
00988
00989 break;
00990 } else {
00991 setError(ec, ERROR_CHUNK_CHAR);
00992 return false;
00993 }
00994 break;
00995
00996 case PARSE_CHUNK_SIZE:
00997 if (isHexDigit(*m_read_ptr)) {
00998 m_chunk_size_str.push_back(*m_read_ptr);
00999 } else if (*m_read_ptr == '\x0D') {
01000 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01001 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01002
01003
01004 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE;
01005 } else {
01006 setError(ec, ERROR_CHUNK_CHAR);
01007 return false;
01008 }
01009 break;
01010
01011 case PARSE_EXPECTING_CR_AFTER_CHUNK_SIZE:
01012 if (*m_read_ptr == '\x0D') {
01013 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE;
01014 } else if (*m_read_ptr == ' ' || *m_read_ptr == '\x09') {
01015
01016
01017 break;
01018 } else {
01019 setError(ec, ERROR_CHUNK_CHAR);
01020 return false;
01021 }
01022 break;
01023
01024 case PARSE_EXPECTING_LF_AFTER_CHUNK_SIZE:
01025
01026
01027 if (*m_read_ptr == '\x0A') {
01028 m_bytes_read_in_current_chunk = 0;
01029 m_size_of_current_chunk = strtol(m_chunk_size_str.c_str(), 0, 16);
01030 if (m_size_of_current_chunk == 0) {
01031 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK;
01032 } else {
01033 m_chunked_content_parse_state = PARSE_CHUNK;
01034 }
01035 } else {
01036 setError(ec, ERROR_CHUNK_CHAR);
01037 return false;
01038 }
01039 break;
01040
01041 case PARSE_CHUNK:
01042 if (m_bytes_read_in_current_chunk < m_size_of_current_chunk) {
01043 if (chunk_cache.size() < m_max_content_length)
01044 chunk_cache.push_back(*m_read_ptr);
01045 m_bytes_read_in_current_chunk++;
01046 }
01047 if (m_bytes_read_in_current_chunk == m_size_of_current_chunk) {
01048 m_chunked_content_parse_state = PARSE_EXPECTING_CR_AFTER_CHUNK;
01049 }
01050 break;
01051
01052 case PARSE_EXPECTING_CR_AFTER_CHUNK:
01053
01054 if (*m_read_ptr == '\x0D') {
01055 m_chunked_content_parse_state = PARSE_EXPECTING_LF_AFTER_CHUNK;
01056 } else {
01057 setError(ec, ERROR_CHUNK_CHAR);
01058 return false;
01059 }
01060 break;
01061
01062 case PARSE_EXPECTING_LF_AFTER_CHUNK:
01063
01064 if (*m_read_ptr == '\x0A') {
01065 m_chunked_content_parse_state = PARSE_CHUNK_SIZE_START;
01066 } else {
01067 setError(ec, ERROR_CHUNK_CHAR);
01068 return false;
01069 }
01070 break;
01071
01072 case PARSE_EXPECTING_FINAL_CR_AFTER_LAST_CHUNK:
01073
01074 if (*m_read_ptr == '\x0D') {
01075 m_chunked_content_parse_state = PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK;
01076 } else {
01077 setError(ec, ERROR_CHUNK_CHAR);
01078 return false;
01079 }
01080 break;
01081
01082 case PARSE_EXPECTING_FINAL_LF_AFTER_LAST_CHUNK:
01083
01084 if (*m_read_ptr == '\x0A') {
01085 ++m_read_ptr;
01086 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01087 m_bytes_total_read += m_bytes_last_read;
01088 m_bytes_content_read += m_bytes_last_read;
01089 PION_LOG_DEBUG(m_logger, "Parsed " << m_bytes_last_read << " chunked payload content bytes; chunked content complete.");
01090 return true;
01091 } else {
01092 setError(ec, ERROR_CHUNK_CHAR);
01093 return false;
01094 }
01095 }
01096
01097 ++m_read_ptr;
01098 }
01099
01100 m_bytes_last_read = (m_read_ptr - read_start_ptr);
01101 m_bytes_total_read += m_bytes_last_read;
01102 m_bytes_content_read += m_bytes_last_read;
01103 return boost::indeterminate;
01104 }
01105
01106 boost::tribool HTTPParser::consumeContent(HTTPMessage& http_msg,
01107 boost::system::error_code& ec)
01108 {
01109 size_t content_bytes_to_read;
01110 size_t content_bytes_available = bytes_available();
01111 boost::tribool rc = boost::indeterminate;
01112
01113 if (m_bytes_content_remaining == 0) {
01114
01115 return true;
01116 } else {
01117 if (content_bytes_available >= m_bytes_content_remaining) {
01118
01119 rc = true;
01120 content_bytes_to_read = m_bytes_content_remaining;
01121 } else {
01122
01123 content_bytes_to_read = content_bytes_available;
01124 }
01125 m_bytes_content_remaining -= content_bytes_to_read;
01126 }
01127
01128
01129 if (m_bytes_content_read < m_max_content_length) {
01130 if (m_bytes_content_read + content_bytes_to_read > m_max_content_length) {
01131
01132
01133 memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr,
01134 m_max_content_length - m_bytes_content_read);
01135 } else {
01136
01137 memcpy(http_msg.getContent() + m_bytes_content_read, m_read_ptr, content_bytes_to_read);
01138 }
01139 }
01140
01141 m_read_ptr += content_bytes_to_read;
01142 m_bytes_content_read += content_bytes_to_read;
01143 m_bytes_total_read += content_bytes_to_read;
01144 m_bytes_last_read = content_bytes_to_read;
01145
01146 return rc;
01147 }
01148
01149 std::size_t HTTPParser::consumeContentAsNextChunk(HTTPMessage::ChunkCache& chunk_cache)
01150 {
01151 if (bytes_available() == 0) {
01152 m_bytes_last_read = 0;
01153 } else {
01154 m_bytes_last_read = (m_read_end_ptr - m_read_ptr);
01155 while (m_read_ptr < m_read_end_ptr) {
01156 if (chunk_cache.size() < m_max_content_length)
01157 chunk_cache.push_back(*m_read_ptr);
01158 ++m_read_ptr;
01159 }
01160 m_bytes_total_read += m_bytes_last_read;
01161 m_bytes_content_read += m_bytes_last_read;
01162 }
01163 return m_bytes_last_read;
01164 }
01165
01166 void HTTPParser::finish(HTTPMessage& http_msg) const
01167 {
01168 switch (m_message_parse_state) {
01169 case PARSE_START:
01170 http_msg.setIsValid(false);
01171 http_msg.setContentLength(0);
01172 http_msg.createContentBuffer();
01173 return;
01174 case PARSE_END:
01175 http_msg.setIsValid(true);
01176 break;
01177 case PARSE_HEADERS:
01178 http_msg.setIsValid(false);
01179 updateMessageWithHeaderData(http_msg);
01180 http_msg.setContentLength(0);
01181 http_msg.createContentBuffer();
01182 break;
01183 case PARSE_CONTENT:
01184 http_msg.setIsValid(false);
01185 http_msg.setContentLength(getContentBytesRead());
01186 break;
01187 case PARSE_CHUNKS:
01188 http_msg.setIsValid(m_chunked_content_parse_state==PARSE_CHUNK_SIZE_START);
01189 http_msg.concatenateChunks();
01190 break;
01191 case PARSE_CONTENT_NO_LENGTH:
01192 http_msg.setIsValid(true);
01193 http_msg.concatenateChunks();
01194 break;
01195 }
01196
01197 computeMsgStatus(http_msg, http_msg.isValid());
01198
01199 if (isParsingRequest()) {
01200
01201
01202
01203 HTTPRequest& http_request(dynamic_cast<HTTPRequest&>(http_msg));
01204 const std::string& content_type_header = http_request.getHeader(HTTPTypes::HEADER_CONTENT_TYPE);
01205 if (content_type_header.compare(0, HTTPTypes::CONTENT_TYPE_URLENCODED.length(),
01206 HTTPTypes::CONTENT_TYPE_URLENCODED) == 0)
01207 {
01208 if (! parseURLEncoded(http_request.getQueryParams(),
01209 http_request.getContent(),
01210 http_request.getContentLength()))
01211 PION_LOG_WARN(m_logger, "Request query string parsing failed (POST content)");
01212 }
01213 }
01214 }
01215
01216 void HTTPParser::computeMsgStatus(HTTPMessage& http_msg, bool msg_parsed_ok )
01217 {
01218 HTTPMessage::DataStatus st = HTTPMessage::STATUS_NONE;
01219
01220 if(http_msg.hasMissingPackets()) {
01221 st = http_msg.hasDataAfterMissingPackets() ?
01222 HTTPMessage::STATUS_PARTIAL : HTTPMessage::STATUS_TRUNCATED;
01223 } else {
01224 st = msg_parsed_ok ? HTTPMessage::STATUS_OK : HTTPMessage::STATUS_TRUNCATED;
01225 }
01226
01227 http_msg.setStatus(st);
01228 }
01229
01230 void HTTPParser::createErrorCategory(void)
01231 {
01232 static ErrorCategory UNIQUE_ERROR_CATEGORY;
01233 m_error_category_ptr = &UNIQUE_ERROR_CATEGORY;
01234 }
01235
01236 bool HTTPParser::parseForwardedFor(const std::string& header, std::string& public_ip)
01237 {
01238
01239 static const boost::regex IPV4_ADDR_RX("[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}");
01240
01246 static const boost::regex PRIVATE_NET_RX("(10\\.[0-9]{1,3}|127\\.[0-9]{1,3}|192\\.168|172\\.1[6-9]|172\\.2[0-9]|172\\.3[0-1])\\.[0-9]{1,3}\\.[0-9]{1,3}");
01247
01248
01249 if (header.empty())
01250 return false;
01251
01252
01253 boost::match_results<std::string::const_iterator> m;
01254 std::string::const_iterator start_it = header.begin();
01255
01256
01257 while (boost::regex_search(start_it, header.end(), m, IPV4_ADDR_RX)) {
01258
01259 std::string ip_str(m[0].first, m[0].second);
01260
01261 if (! boost::regex_match(ip_str, PRIVATE_NET_RX) ) {
01262
01263 public_ip = ip_str;
01264 return true;
01265 }
01266
01267 start_it = m[0].second;
01268 }
01269
01270
01271 return false;
01272 }
01273
01274 }
01275 }
01276