libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 00027 #include "config.h" 00028 00029 static char rcsid[] not_used = 00030 { "$Id: HTTPConnect.cc 19864 2008-11-14 17:01:35Z jimg $" 00031 }; 00032 00033 #ifdef HAVE_UNISTD_H 00034 #include <unistd.h> 00035 #endif 00036 00037 #ifdef WIN32 00038 #include <io.h> 00039 #endif 00040 00041 #include <string> 00042 #include <vector> 00043 #include <functional> 00044 #include <algorithm> 00045 #include <sstream> 00046 #include <iterator> 00047 #include <cstdlib> 00048 #include <cstring> 00049 00050 //#define DODS_DEBUG 00051 //#define DODS_DEBUG2 00052 00053 #include "debug.h" 00054 #include "GNURegex.h" 00055 #include "HTTPCache.h" 00056 #include "HTTPConnect.h" 00057 #include "RCReader.h" 00058 #include "HTTPResponse.h" 00059 #include "HTTPCacheResponse.h" 00060 00061 using namespace std; 00062 00063 namespace libdap { 00064 00065 // These global variables are not MT-Safe, but I'm leaving them as is because 00066 // they are used only for debugging (set them in a debugger like gdb or ddd). 00067 // They are not static because I *believe* that many debuggers cannot access 00068 // static variables. 08/07/02 jhrg 00069 00070 // Set this to 1 to turn on libcurl's verbose mode (for debugging). 00071 int www_trace = 0; 00072 00073 // Keep the temporary files; useful for debugging. 00074 int dods_keep_temps = 0; 00075 00076 #define CLIENT_ERR_MIN 400 00077 #define CLIENT_ERR_MAX 417 00078 static char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] = 00079 { 00080 "Bad Request:", 00081 "Unauthorized: Contact the server administrator.", 00082 "Payment Required.", 00083 "Forbidden: Contact the server administrator.", 00084 "Not Found: The data source or server could not be found.\n\ 00085 Often this means that the OPeNDAP server is missing or needs attention;\n\ 00086 Please contact the server administrator.", 00087 "Method Not Allowed.", 00088 "Not Acceptable.", 00089 "Proxy Authentication Required.", 00090 "Request Time-out.", 00091 "Conflict.", 00092 "Gone:.", 00093 "Length Required.", 00094 "Precondition Failed.", 00095 "Request Entity Too Large.", 00096 "Request URI Too Large.", 00097 "Unsupported Media Type.", 00098 "Requested Range Not Satisfiable.", 00099 "Expectation Failed." 00100 }; 00101 00102 #define SERVER_ERR_MIN 500 00103 #define SERVER_ERR_MAX 505 00104 static char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN +1] = 00105 { 00106 "Internal Server Error.", 00107 "Not Implemented.", 00108 "Bad Gateway.", 00109 "Service Unavailable.", 00110 "Gateway Time-out.", 00111 "HTTP Version Not Supported." 00112 }; 00113 00120 ObjectType 00121 get_type(const string &value) 00122 { 00123 if (value == "dods_das" | value == "dods-das") 00124 return dods_das; 00125 else if (value == "dods_dds" | value == "dods-dds") 00126 return dods_dds; 00127 else if (value == "dods_data" | value == "dods-data") 00128 return dods_data; 00129 else if (value == "dods_error" | value == "dods-error") 00130 return dods_error; 00131 else if (value == "web_error" | value == "web-error") 00132 return web_error; 00133 else if (value == "dap4_ddx" | value == "dap4-ddx") 00134 return dap4_ddx; 00135 else if (value == "dap4_datax" | value == "dap4-datax") 00136 return dap4_datax; 00137 else if (value == "dap4_errorx" | value == "dap4-errorx") 00138 return dap4_errorx; 00139 else 00140 return unknown_type; 00141 } 00142 00148 ObjectType 00149 get_description_type(const string &value) 00150 { 00151 if (value == "dods_das" | value == "dods-das") 00152 return dods_das; 00153 else if (value == "dods_dds" | value == "dods-dds") 00154 return dods_dds; 00155 else if (value == "dods_data" | value == "dods-data") 00156 return dods_data; 00157 else if (value == "dods_error" | value == "dods-error") 00158 return dods_error; 00159 else if (value == "web_error" | value == "web-error") 00160 return web_error; 00161 else if (value == "dap4_ddx" | value == "dap4-ddx") 00162 return dap4_ddx; 00163 else if (value == "dap4_datax" | value == "dap4-datax") 00164 return dap4_datax; 00165 else if (value == "dap4_errorx" | value == "dap4-errorx") 00166 return dap4_errorx; 00167 else 00168 return unknown_type; 00169 } 00170 00173 static string 00174 http_status_to_string(int status) 00175 { 00176 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX) 00177 return string(http_client_errors[status - CLIENT_ERR_MIN]); 00178 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX) 00179 return string(http_server_errors[status - SERVER_ERR_MIN]); 00180 else 00181 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org."); 00182 } 00183 00188 class ParseHeader : public unary_function<const string &, void> 00189 { 00190 ObjectType type; // What type of object is in the stream? 00191 string server; // Server's version string. 00192 string protocol; // Server's protocol version. 00193 string location; // Url returned by server 00194 00195 public: 00196 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0") 00197 { } 00198 00199 void operator()(const string &header) 00200 { 00201 std::istringstream line(header); 00202 00203 string name; 00204 line >> name; 00205 downcase(name); 00206 if (name == "content-description:") { 00207 string value; 00208 line >> value; 00209 downcase(value); 00210 DBG2(cout << name << ": " << value << endl); 00211 type = get_description_type(value); 00212 } 00213 // The second test (== "dods/0.0") tests if xopendap-server has already 00214 // been seen. If so, use that header in preference to the old 00215 // XDODS-Server header. jhrg 2/7/06 00216 else if (name == "xdods-server:" && server == "dods/0.0") { 00217 string value; 00218 line >> value; 00219 downcase(value); 00220 DBG2(cout << name << ": " << value << endl); 00221 server = value; 00222 } 00223 else if (name == "xopendap-server:") { 00224 string value; 00225 line >> value; 00226 downcase(value); 00227 DBG2(cout << name << ": " << value << endl); 00228 server = value; 00229 } 00230 else if (name == "xdap:") { 00231 string value; 00232 line >> value; 00233 downcase(value); 00234 DBG2(cout << name << ": " << value << endl); 00235 protocol = value; 00236 } 00237 else if (server == "dods/0.0" && name == "server:") { 00238 string value; 00239 line >> value; 00240 downcase(value); 00241 DBG2(cout << name << ": " << value << endl); 00242 server = value; 00243 } 00244 else if (name == "location:") { 00245 string value; 00246 line >> value; 00247 DBG2(cout << name << ": " << value << endl); 00248 location = value; 00249 } 00250 else if (type == unknown_type && name == "content-type:" 00251 && line.str().find("text/html") != string::npos) { 00252 DBG2(cout << name << ": text/html..." << endl); 00253 type = web_error; 00254 } 00255 } 00256 00257 ObjectType get_object_type() 00258 { 00259 return type; 00260 } 00261 00262 string get_server() 00263 { 00264 return server; 00265 } 00266 00267 string get_protocol() 00268 { 00269 return protocol; 00270 } 00271 00272 string get_location() { 00273 return location; 00274 } 00275 }; 00276 00293 static size_t 00294 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) 00295 { 00296 DBG2(cerr << "Inside the header parser." << endl); 00297 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs); 00298 00299 // Grab the header, minus the trailing newline. Or \r\n pair. 00300 string complete_line; 00301 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r') 00302 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2)); 00303 else 00304 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1)); 00305 00306 // Store all non-empty headers that are not HTTP status codes 00307 if (complete_line != "" && complete_line.find("HTTP") == string::npos) { 00308 DBG(cerr << "Header line: " << complete_line << endl); 00309 hdrs->push_back(complete_line); 00310 } 00311 00312 return size * nmemb; 00313 } 00314 00316 static int 00317 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) 00318 { 00319 string message(msg, size); 00320 00321 switch (info) { 00322 case CURLINFO_TEXT: 00323 cerr << "Text: " << message; break; 00324 case CURLINFO_HEADER_IN: 00325 cerr << "Header in: " << message; break; 00326 case CURLINFO_HEADER_OUT: 00327 cerr << "Header out: " << message; break; 00328 case CURLINFO_DATA_IN: 00329 cerr << "Data in: " << message; break; 00330 case CURLINFO_DATA_OUT: 00331 cerr << "Data out: " << message; break; 00332 case CURLINFO_END: 00333 cerr << "End: " << message; break; 00334 #ifdef CURLINFO_SSL_DATA_IN 00335 case CURLINFO_SSL_DATA_IN: 00336 cerr << "SSL Data in: " << message; break; 00337 #endif 00338 #ifdef CURLINFO_SSL_DATA_OUT 00339 case CURLINFO_SSL_DATA_OUT: 00340 cerr << "SSL Data out: " << message; break; 00341 #endif 00342 default: 00343 cerr << "Curl info: " << message; break; 00344 } 00345 return 0; 00346 } 00347 00351 void 00352 HTTPConnect::www_lib_init() 00353 { 00354 d_curl = curl_easy_init(); 00355 if (!d_curl) 00356 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl."); 00357 00358 // Now set options that will remain constant for the duration of this 00359 // CURL object. 00360 00361 // Set the proxy host. 00362 if (!d_rcr->get_proxy_server_host().empty()) { 00363 DBG(cerr << "Setting up a proxy server." << endl); 00364 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() 00365 << endl); 00366 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() 00367 << endl); 00368 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() 00369 << endl); 00370 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00371 d_rcr->get_proxy_server_host().c_str()); 00372 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, 00373 d_rcr->get_proxy_server_port()); 00374 00375 // As of 4/21/08 only NTLM, Digest and Basic work. 00376 #ifdef CURLOPT_PROXYAUTH 00377 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY); 00378 #endif 00379 00380 // Password might not be required. 06/21/04 jhrg 00381 if (!d_rcr->get_proxy_server_userpw().empty()) 00382 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, 00383 d_rcr->get_proxy_server_userpw().c_str()); 00384 } 00385 00386 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer); 00387 // We have to set FailOnError to false for any of the non-Basic 00388 // authentication schemes to work. 07/28/03 jhrg 00389 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0); 00390 00391 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM, 00392 // choosing the the 'safest' one supported by the server. 00393 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg 00394 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY); 00395 00396 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1); 00397 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1); 00398 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers); 00399 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth 00400 // param of save_raw_http_headers to a vector<string> object. 00401 00402 // Follow 302 (redirect) responses 00403 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1); 00404 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5); 00405 00406 // If the user turns off SSL validation... 00407 if (!d_rcr->get_validate_ssl() == 0) { 00408 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0); 00409 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0); 00410 } 00411 00412 // Look to see if cookies are turned on in the .dodsrc file. If so, 00413 // activate here. We honor 'session cookies' (cookies without an 00414 // expiration date) here so that session-base SSO systems will work as 00415 // expected. 00416 if (!d_cookie_jar.empty()) { 00417 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl); 00418 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str()); 00419 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1); 00420 } 00421 00422 if (www_trace) { 00423 cerr << "Curl version: " << curl_version() << endl; 00424 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1); 00425 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug); 00426 } 00427 } 00428 00432 class BuildHeaders : public unary_function<const string &, void> 00433 { 00434 struct curl_slist *d_cl; 00435 00436 public: 00437 BuildHeaders() : d_cl(0) 00438 {} 00439 00440 void operator()(const string &header) 00441 { 00442 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." 00443 << endl); 00444 d_cl = curl_slist_append(d_cl, header.c_str()); 00445 } 00446 00447 struct curl_slist *get_headers() 00448 { 00449 return d_cl; 00450 } 00451 }; 00452 00467 long 00468 HTTPConnect::read_url(const string &url, FILE *stream, 00469 vector<string> *resp_hdrs, 00470 const vector<string> *headers) 00471 { 00472 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str()); 00473 00474 #ifdef WIN32 00475 // See the curl documentation for CURLOPT_FILE (nka CURLOPT_WRITEDATA) 00476 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as 00477 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the 00478 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of 00479 // this issue is that one should not pass a FILE * to a windows DLL. Close 00480 // inspection of libcurl yields that their default write function when using 00481 // the CURLOPT_WRITEDATA is just "fwrite". 00482 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00483 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite); 00484 #else 00485 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00486 #endif 00487 00488 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00489 ostream_iterator<string>(cerr, "\n"))); 00490 00491 BuildHeaders req_hdrs; 00492 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), 00493 req_hdrs); 00494 if (headers) 00495 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs); 00496 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers()); 00497 00498 if (d_accept_deflate) 00499 curl_easy_setopt(d_curl, CURLOPT_ENCODING, "deflate"); 00500 00501 // Turn off the proxy for this URL? 00502 bool temporary_proxy = false; 00503 if ((temporary_proxy = url_uses_no_proxy_for(url))) { 00504 DBG(cerr << "Suppress proxy for url: " << url << endl); 00505 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0); 00506 } 00507 00508 string::size_type at_sign = url.find('@'); 00509 // Assume username:password present *and* assume it's an HTTP URL; it *is* 00510 // HTTPConnect, after all. 7 is position after "http://"; the second arg 00511 // to substr() is the sub string length. 00512 if (at_sign != url.npos) 00513 d_upstring = url.substr(7, at_sign - 7); 00514 00515 if (!d_upstring.empty()) 00516 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str()); 00517 00518 // Pass save_raw_http_headers() a pointer to the vector<string> where the 00519 // response headers may be stored. Callers can use the resp_hdrs 00520 // value/result parameter to get the raw response header information . 00521 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs); 00522 00523 CURLcode res = curl_easy_perform(d_curl); 00524 00525 // Free the header list and null the value in d_curl. 00526 curl_slist_free_all(req_hdrs.get_headers()); 00527 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0); 00528 00529 // Reset the proxy? 00530 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty()) 00531 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00532 d_rcr->get_proxy_server_host().c_str()); 00533 00534 if (res != 0) 00535 throw Error(d_error_buffer); 00536 00537 long status; 00538 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status); 00539 if (res != 0) 00540 throw Error(d_error_buffer); 00541 00542 return status; 00543 } 00544 00548 bool 00549 HTTPConnect::url_uses_proxy_for(const string &url) throw() 00550 { 00551 if (d_rcr->is_proxy_for_used()) { 00552 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str()); 00553 int index = 0, matchlen; 00554 return host_regex.search(url.c_str(), url.size(), matchlen, index) 00555 != -1; 00556 } 00557 00558 return false; 00559 } 00560 00564 bool 00565 HTTPConnect::url_uses_no_proxy_for(const string &url) throw() 00566 { 00567 return d_rcr->is_no_proxy_for_used() 00568 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos; 00569 } 00570 00571 // Public methods. Mostly... 00572 00579 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""), 00580 d_cookie_jar(""), 00581 d_dap_client_protocol_major(2), 00582 d_dap_client_protocol_minor(0) 00583 00584 { 00585 d_accept_deflate = rcr->get_deflate(); 00586 d_rcr = rcr; 00587 00588 // Load in the default headers to send with a request. The empty Pragma 00589 // headers overrides libcurl's default Pragma: no-cache header (which 00590 // will disable caching by Squid, et c.). The User-Agent header helps 00591 // make server logs more readable. 05/05/03 jhrg 00592 d_request_headers.push_back(string("Pragma:")); 00593 string user_agent = string("User-Agent: ") + string(CNAME) 00594 + string("/") + string(CVER); 00595 d_request_headers.push_back(user_agent); 00596 if (d_accept_deflate) 00597 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00598 00599 // HTTPCache::instance returns a valid ptr or 0. 00600 if (d_rcr->get_use_cache()) 00601 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(), 00602 true); 00603 else 00604 d_http_cache = 0; 00605 00606 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec 00607 << ")" << endl); 00608 00609 if (d_http_cache) { 00610 d_http_cache->set_cache_enabled(d_rcr->get_use_cache()); 00611 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0); 00612 d_http_cache->set_max_size(d_rcr->get_max_cache_size()); 00613 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj()); 00614 d_http_cache->set_default_expiration(d_rcr->get_default_expires()); 00615 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0); 00616 } 00617 00618 d_cookie_jar = rcr->get_cookie_jar(); 00619 00620 www_lib_init(); // This may throw either Error or InternalErr 00621 } 00622 00623 HTTPConnect::~HTTPConnect() 00624 { 00625 DBG2(cerr << "Entering the HTTPConnect dtor" << endl); 00626 00627 curl_easy_cleanup(d_curl); 00628 00629 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl); 00630 } 00631 00644 HTTPResponse * 00645 HTTPConnect::fetch_url(const string &url) 00646 { 00647 #ifdef HTTP_TRACE 00648 cout << "GET " << url << " HTTP/1.0" << endl; 00649 #endif 00650 00651 HTTPResponse *stream; 00652 00653 if (d_http_cache && d_http_cache->is_cache_enabled()) { 00654 stream = caching_fetch_url(url); 00655 } 00656 else { 00657 stream = plain_fetch_url(url); 00658 } 00659 00660 #ifdef HTTP_TRACE 00661 stringstream ss; 00662 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl; 00663 for (size_t i = 0; i < stream->get_headers()->size(); i++) { 00664 ss << stream->get_headers()->at(i) << endl; 00665 } 00666 cout << ss.str(); 00667 #endif 00668 00669 ParseHeader parser; 00670 00671 parser = for_each(stream->get_headers()->begin(), 00672 stream->get_headers()->end(), ParseHeader()); 00673 00674 #ifdef HTTP_TRACE 00675 cout << endl << endl; 00676 #endif 00677 00678 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu) 00679 if (parser.get_location() != "" && 00680 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) { 00681 return fetch_url(parser.get_location()); 00682 } 00683 00684 stream->set_type(parser.get_object_type()); 00685 stream->set_version(parser.get_server()); 00686 stream->set_protocol(parser.get_protocol()); 00687 00688 return stream; 00689 } 00690 00691 // Look around for a reasonable place to put a temporary file. Check first 00692 // the value of the TMPDIR env var. If that does not yeild a path that's 00693 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as 00694 // defined in stdio.h. If both come up empty, then use `./'. 00695 // 00696 // This function allocates storage using new. The caller must delete the char 00697 // array. 00698 00699 // Change this to a version that either returns a string or an open file 00700 // descriptor. Use information from https://buildsecurityin.us-cert.gov/ 00701 // (see open()) to make it more secure. Ideal solution: get deserialize() 00702 // methods to read from a stream returned by libcurl, not from a temporary 00703 // file. 9/21/07 jhrg 00704 static char * 00705 get_tempfile_template(char *file_template) 00706 { 00707 char *c; 00708 00709 #ifdef WIN32 00710 // whitelist for a WIN32 directory 00711 Regex directory("[-a-zA-Z0-9_\\]*"); 00712 00713 c = getenv("TEMP"); 00714 if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)) 00715 goto valid_temp_directory; 00716 00717 c= getenv("TMP"); 00718 if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)) 00719 goto valid_temp_directory; 00720 #else 00721 // whitelist for a directory 00722 Regex directory("[-a-zA-Z0-9_/]*"); 00723 00724 c = getenv("TMPDIR"); 00725 if (c && directory.match(c, strlen(c)) && (access(c, W_OK | R_OK) == 0)) 00726 goto valid_temp_directory; 00727 00728 #ifdef P_tmpdir 00729 if (access(P_tmpdir, W_OK | R_OK) == 0) { 00730 c = P_tmpdir; 00731 goto valid_temp_directory; 00732 } 00733 #endif 00734 00735 #endif // WIN32 00736 00737 c = "."; 00738 00739 valid_temp_directory: 00740 // Sanitize allocation 00741 int size = strlen(c) + strlen(file_template) + 2; 00742 if (!size_ok(1, size)) 00743 throw Error("Bad temporary file name."); 00744 00745 char *temp = new char[size]; 00746 strncpy(temp, c, size-2); 00747 strcat(temp, "/"); 00748 00749 strcat(temp, file_template); 00750 00751 return temp; 00752 } 00753 00772 string 00773 get_temp_file(FILE *&stream) throw(InternalErr) 00774 { 00775 // get_tempfile_template() uses new, must call delete 00776 char *dods_temp = get_tempfile_template("dodsXXXXXX"); 00777 00778 // Open truncated for update. NB: mkstemp() returns a file descriptor. 00779 #if defined(WIN32) || defined(TEST_WIN32_TEMPS) 00780 stream = fopen(_mktemp(dods_temp), "w+b"); 00781 #else 00782 stream = fdopen(mkstemp(dods_temp), "w+"); 00783 #endif 00784 00785 if (!stream) 00786 throw InternalErr("I/O Error: Failed to open a temporary file for the data values."); 00787 00788 string dods_temp_s = dods_temp; 00789 delete[] dods_temp; dods_temp = 0; 00790 00791 return dods_temp_s; 00792 } 00793 00795 void 00796 close_temp(FILE *s, const string &name) 00797 { 00798 int res = fclose(s); 00799 if (res) 00800 DBG(cerr << "Failed to close " << (void *)s << endl); 00801 00802 unlink(name.c_str()); 00803 } 00804 00826 HTTPResponse * 00827 HTTPConnect::caching_fetch_url(const string &url) 00828 { 00829 DBG(cerr << "Is this URL (" << url << ") in the cache?... "); 00830 00831 vector<string> *headers = new vector<string> ; 00832 FILE *s = d_http_cache->get_cached_response(url, *headers); 00833 if (!s) { 00834 // url not in cache; get it and cache it 00835 DBGN(cerr << "no; getting response and caching." << endl); 00836 time_t now = time(0); 00837 HTTPResponse *rs = plain_fetch_url(url); 00838 d_http_cache->cache_response(url, now, *(rs->get_headers()), 00839 rs->get_stream()); 00840 00841 return rs; 00842 } 00843 else { // url in cache 00844 DBGN(cerr << "yes... "); 00845 00846 if (d_http_cache->is_url_valid(url)) { // url in cache and valid 00847 DBGN(cerr << "and it's valid; using cached response." << endl); 00848 HTTPCacheResponse *crs = 00849 new HTTPCacheResponse(s, 200, headers, d_http_cache); 00850 return crs; 00851 } 00852 else { // url in cache but not valid; validate 00853 DBGN(cerr << "but it's not valid; validating... "); 00854 00855 d_http_cache->release_cached_response(s); 00856 00857 vector<string> *resp_hdrs = new vector<string> ; 00858 vector<string> cond_hdrs = 00859 d_http_cache->get_conditional_request_headers(url); 00860 FILE *body = 0; 00861 string dods_temp = get_temp_file(body); 00862 time_t now = time(0); // When was the request made (now). 00863 long http_status; 00864 00865 try { 00866 http_status = read_url(url, body, resp_hdrs, &cond_hdrs); 00867 rewind(body); 00868 } 00869 catch (Error &e) { 00870 close_temp(body, dods_temp); 00871 throw ; 00872 } 00873 00874 switch (http_status) { 00875 case 200: { // New headers and new body 00876 DBGN(cerr << "read a new response; caching." << endl); 00877 00878 d_http_cache->cache_response(url, now, *resp_hdrs, body); 00879 HTTPResponse *rs = new HTTPResponse(body, http_status, resp_hdrs, 00880 dods_temp); 00881 00882 return rs; 00883 } 00884 00885 case 304: { // Just new headers, use cached body 00886 DBGN(cerr << "cached response valid; updating." << endl); 00887 00888 close_temp(body, dods_temp); 00889 d_http_cache->update_response(url, now, *resp_hdrs); 00890 00891 vector<string> *headers = new vector<string>; 00892 FILE *hs = d_http_cache->get_cached_response(url, *headers); 00893 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, d_http_cache); 00894 return crs; 00895 } 00896 00897 default: { // Oops. 00898 close_temp(body, dods_temp); 00899 if (http_status >= 400) { 00900 string msg = "Error while reading the URL: "; 00901 msg += url; 00902 msg 00903 += ".\nThe OPeNDAP server returned the following message:\n"; 00904 msg += http_status_to_string(http_status); 00905 throw Error(msg); 00906 } 00907 else { 00908 throw InternalErr(__FILE__, __LINE__, 00909 "Bad response from the HTTP server: " + long_to_string(http_status)); 00910 } 00911 } 00912 } 00913 } 00914 } 00915 00916 throw InternalErr(__FILE__, __LINE__, "Should never get here"); 00917 } 00918 00930 HTTPResponse * 00931 HTTPConnect::plain_fetch_url(const string &url) 00932 { 00933 DBG(cerr << "Getting URL: " << url << endl); 00934 FILE *stream = 0; 00935 string dods_temp = get_temp_file(stream); 00936 vector<string> *resp_hdrs = new vector<string>; 00937 00938 int status = -1; 00939 try { 00940 status = read_url(url, stream, resp_hdrs); // Throws Error. 00941 if (status >= 400) { 00942 string msg = "Error while reading the URL: "; 00943 msg += url; 00944 msg += ".\nThe OPeNDAP server returned the following message:\n"; 00945 msg += http_status_to_string(status); 00946 throw Error(msg); 00947 } 00948 } 00949 00950 catch (Error &e) { 00951 close_temp(stream, dods_temp); 00952 throw e; 00953 } 00954 00955 rewind(stream); 00956 00957 return new HTTPResponse(stream, status, resp_hdrs, dods_temp); 00958 } 00959 00971 void 00972 HTTPConnect::set_accept_deflate(bool deflate) 00973 { 00974 d_accept_deflate = deflate; 00975 00976 if (d_accept_deflate) { 00977 if (find(d_request_headers.begin(), d_request_headers.end(), 00978 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end()) 00979 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00980 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00981 ostream_iterator<string>(cerr, "\n"))); 00982 } 00983 else { 00984 vector<string>::iterator i; 00985 i = remove_if(d_request_headers.begin(), d_request_headers.end(), 00986 bind2nd(equal_to<string>(), 00987 string("Accept-Encoding: deflate, gzip, compress"))); 00988 d_request_headers.erase(i, d_request_headers.end()); 00989 } 00990 } 00991 00993 class HeaderMatch : public unary_function<const string &, bool> { 00994 const string &d_header; 00995 public: 00996 HeaderMatch(const string &header) : d_header(header) {} 00997 bool operator()(const string &arg) { return arg.find(d_header) == 0; } 00998 }; 00999 01008 void 01009 HTTPConnect::set_xdap_protocol(int major, int minor) 01010 { 01011 // Look for, and remove if one exists, an XDAP-Accept header 01012 vector<string>::iterator i; 01013 i = find_if(d_request_headers.begin(), d_request_headers.end(), 01014 HeaderMatch("XDAP-Accept:")); 01015 if (i != d_request_headers.end()) 01016 d_request_headers.erase(i); 01017 01018 // Record and add the new header value 01019 d_dap_client_protocol_major = major; 01020 d_dap_client_protocol_minor = minor; 01021 ostringstream xdap_accept; 01022 xdap_accept << "XDAP-Accept: " << major << "." << minor; 01023 01024 d_request_headers.push_back(xdap_accept.str()); 01025 01026 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 01027 ostream_iterator<string>(cerr, "\n"))); 01028 } 01029 01045 void 01046 HTTPConnect::set_credentials(const string &u, const string &p) 01047 { 01048 if (u.empty()) 01049 return; 01050 01051 // Store the credentials locally. 01052 d_username = u; 01053 d_password = p; 01054 01055 d_upstring = u + ":" + p; 01056 } 01057 01058 } // namespace libdap