libdap++
Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 00027 #include "config.h" 00028 00029 static char rcsid[] not_used = 00030 { "$Id: HTTPConnect.cc 25101 2011-12-19 22:03:29Z jimg $" 00031 }; 00032 00033 #ifdef HAVE_UNISTD_H 00034 #include <unistd.h> 00035 #endif 00036 00037 #include <sys/stat.h> 00038 00039 #ifdef WIN32 00040 #include <io.h> 00041 #endif 00042 00043 #include <string> 00044 #include <vector> 00045 #include <functional> 00046 #include <algorithm> 00047 #include <sstream> 00048 #include <iterator> 00049 #include <cstdlib> 00050 #include <cstring> 00051 00052 //#define DODS_DEBUG2 00053 //#define HTTP_TRACE 00054 //#define DODS_DEBUG 00055 00056 #undef USE_GETENV 00057 00058 00059 #include "debug.h" 00060 #include "mime_util.h" 00061 #include "GNURegex.h" 00062 #include "HTTPCache.h" 00063 #include "HTTPConnect.h" 00064 #include "RCReader.h" 00065 #include "HTTPResponse.h" 00066 #include "HTTPCacheResponse.h" 00067 00068 using namespace std; 00069 00070 namespace libdap { 00071 00072 // These global variables are not MT-Safe, but I'm leaving them as is because 00073 // they are used only for debugging (set them in a debugger like gdb or ddd). 00074 // They are not static because I think that many debuggers cannot access 00075 // static variables. 08/07/02 jhrg 00076 00077 // Set this to 1 to turn on libcurl's verbose mode (for debugging). 00078 int www_trace = 0; 00079 00080 // Keep the temporary files; useful for debugging. 00081 int dods_keep_temps = 0; 00082 00083 #define CLIENT_ERR_MIN 400 00084 #define CLIENT_ERR_MAX 417 00085 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] = 00086 { 00087 "Bad Request:", 00088 "Unauthorized: Contact the server administrator.", 00089 "Payment Required.", 00090 "Forbidden: Contact the server administrator.", 00091 "Not Found: The data source or server could not be found.\n\ 00092 Often this means that the OPeNDAP server is missing or needs attention;\n\ 00093 Please contact the server administrator.", 00094 "Method Not Allowed.", 00095 "Not Acceptable.", 00096 "Proxy Authentication Required.", 00097 "Request Time-out.", 00098 "Conflict.", 00099 "Gone:.", 00100 "Length Required.", 00101 "Precondition Failed.", 00102 "Request Entity Too Large.", 00103 "Request URI Too Large.", 00104 "Unsupported Media Type.", 00105 "Requested Range Not Satisfiable.", 00106 "Expectation Failed." 00107 }; 00108 00109 #define SERVER_ERR_MIN 500 00110 #define SERVER_ERR_MAX 505 00111 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] = 00112 { 00113 "Internal Server Error.", 00114 "Not Implemented.", 00115 "Bad Gateway.", 00116 "Service Unavailable.", 00117 "Gateway Time-out.", 00118 "HTTP Version Not Supported." 00119 }; 00120 00123 static string 00124 http_status_to_string(int status) 00125 { 00126 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX) 00127 return string(http_client_errors[status - CLIENT_ERR_MIN]); 00128 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX) 00129 return string(http_server_errors[status - SERVER_ERR_MIN]); 00130 else 00131 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org."); 00132 } 00133 00138 class ParseHeader : public unary_function<const string &, void> 00139 { 00140 ObjectType type; // What type of object is in the stream? 00141 string server; // Server's version string. 00142 string protocol; // Server's protocol version. 00143 string location; // Url returned by server 00144 00145 public: 00146 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0") 00147 { } 00148 00149 void operator()(const string &line) 00150 { 00151 string name, value; 00152 parse_mime_header(line, name, value); 00153 if (name == "content-description") { 00154 DBG2(cerr << name << ": " << value << endl); 00155 type = get_description_type(value); 00156 } 00157 // The second test (== "dods/0.0") tests if xopendap-server has already 00158 // been seen. If so, use that header in preference to the old 00159 // XDODS-Server header. jhrg 2/7/06 00160 else if (name == "xdods-server" && server == "dods/0.0") { 00161 DBG2(cerr << name << ": " << value << endl); 00162 server = value; 00163 } 00164 else if (name == "xopendap-server") { 00165 DBG2(cerr << name << ": " << value << endl); 00166 server = value; 00167 } 00168 else if (name == "xdap") { 00169 DBG2(cerr << name << ": " << value << endl); 00170 protocol = value; 00171 } 00172 else if (server == "dods/0.0" && name == "server") { 00173 DBG2(cerr << name << ": " << value << endl); 00174 server = value; 00175 } 00176 else if (name == "location") { 00177 DBG2(cerr << name << ": " << value << endl); 00178 location = value; 00179 } 00180 else if (type == unknown_type && name == "content-type" 00181 && line.find("text/html") != string::npos) { 00182 DBG2(cerr << name << ": text/html..." << endl); 00183 type = web_error; 00184 } 00185 } 00186 00187 ObjectType get_object_type() 00188 { 00189 return type; 00190 } 00191 00192 string get_server() 00193 { 00194 return server; 00195 } 00196 00197 string get_protocol() 00198 { 00199 return protocol; 00200 } 00201 00202 string get_location() { 00203 return location; 00204 } 00205 }; 00206 00223 static size_t 00224 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs) 00225 { 00226 DBG2(cerr << "Inside the header parser." << endl); 00227 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs); 00228 00229 // Grab the header, minus the trailing newline. Or \r\n pair. 00230 string complete_line; 00231 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r') 00232 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2)); 00233 else 00234 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1)); 00235 00236 // Store all non-empty headers that are not HTTP status codes 00237 if (complete_line != "" && complete_line.find("HTTP") == string::npos) { 00238 DBG(cerr << "Header line: " << complete_line << endl); 00239 hdrs->push_back(complete_line); 00240 } 00241 00242 return size * nmemb; 00243 } 00244 00246 static int 00247 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *) 00248 { 00249 string message(msg, size); 00250 00251 switch (info) { 00252 case CURLINFO_TEXT: 00253 cerr << "Text: " << message; break; 00254 case CURLINFO_HEADER_IN: 00255 cerr << "Header in: " << message; break; 00256 case CURLINFO_HEADER_OUT: 00257 cerr << "Header out: " << message; break; 00258 case CURLINFO_DATA_IN: 00259 cerr << "Data in: " << message; break; 00260 case CURLINFO_DATA_OUT: 00261 cerr << "Data out: " << message; break; 00262 case CURLINFO_END: 00263 cerr << "End: " << message; break; 00264 #ifdef CURLINFO_SSL_DATA_IN 00265 case CURLINFO_SSL_DATA_IN: 00266 cerr << "SSL Data in: " << message; break; 00267 #endif 00268 #ifdef CURLINFO_SSL_DATA_OUT 00269 case CURLINFO_SSL_DATA_OUT: 00270 cerr << "SSL Data out: " << message; break; 00271 #endif 00272 default: 00273 cerr << "Curl info: " << message; break; 00274 } 00275 return 0; 00276 } 00277 00281 void 00282 HTTPConnect::www_lib_init() 00283 { 00284 d_curl = curl_easy_init(); 00285 if (!d_curl) 00286 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl."); 00287 00288 // Now set options that will remain constant for the duration of this 00289 // CURL object. 00290 00291 // Set the proxy host. 00292 if (!d_rcr->get_proxy_server_host().empty()) { 00293 DBG(cerr << "Setting up a proxy server." << endl); 00294 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host() 00295 << endl); 00296 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port() 00297 << endl); 00298 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw() 00299 << endl); 00300 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00301 d_rcr->get_proxy_server_host().c_str()); 00302 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT, 00303 d_rcr->get_proxy_server_port()); 00304 00305 // As of 4/21/08 only NTLM, Digest and Basic work. 00306 #ifdef CURLOPT_PROXYAUTH 00307 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY); 00308 #endif 00309 00310 // Password might not be required. 06/21/04 jhrg 00311 if (!d_rcr->get_proxy_server_userpw().empty()) 00312 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD, 00313 d_rcr->get_proxy_server_userpw().c_str()); 00314 } 00315 00316 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer); 00317 // We have to set FailOnError to false for any of the non-Basic 00318 // authentication schemes to work. 07/28/03 jhrg 00319 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0); 00320 00321 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM, 00322 // choosing the the 'safest' one supported by the server. 00323 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg 00324 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY); 00325 00326 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1); 00327 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1); 00328 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers); 00329 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth 00330 // param of save_raw_http_headers to a vector<string> object. 00331 00332 // Follow 302 (redirect) responses 00333 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1); 00334 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5); 00335 00336 // If the user turns off SSL validation... 00337 if (!d_rcr->get_validate_ssl() == 0) { 00338 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0); 00339 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0); 00340 } 00341 00342 // Look to see if cookies are turned on in the .dodsrc file. If so, 00343 // activate here. We honor 'session cookies' (cookies without an 00344 // expiration date) here so that session-base SSO systems will work as 00345 // expected. 00346 if (!d_cookie_jar.empty()) { 00347 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl); 00348 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str()); 00349 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1); 00350 } 00351 00352 if (www_trace) { 00353 cerr << "Curl version: " << curl_version() << endl; 00354 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1); 00355 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug); 00356 } 00357 } 00358 00362 class BuildHeaders : public unary_function<const string &, void> 00363 { 00364 struct curl_slist *d_cl; 00365 00366 public: 00367 BuildHeaders() : d_cl(0) 00368 {} 00369 00370 void operator()(const string &header) 00371 { 00372 DBG(cerr << "Adding '" << header.c_str() << "' to the header list." 00373 << endl); 00374 d_cl = curl_slist_append(d_cl, header.c_str()); 00375 } 00376 00377 struct curl_slist *get_headers() 00378 { 00379 return d_cl; 00380 } 00381 }; 00382 00397 long 00398 HTTPConnect::read_url(const string &url, FILE *stream, 00399 vector<string> *resp_hdrs, 00400 const vector<string> *headers) 00401 { 00402 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str()); 00403 00404 #ifdef WIN32 00405 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA) 00406 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as 00407 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the 00408 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of 00409 // this issue is that one should not pass a FILE * to a windows DLL. Close 00410 // inspection of libcurl yields that their default write function when using 00411 // the CURLOPT_WRITEDATA is just "fwrite". 00412 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00413 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite); 00414 #else 00415 curl_easy_setopt(d_curl, CURLOPT_FILE, stream); 00416 #endif 00417 00418 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00419 ostream_iterator<string>(cerr, "\n"))); 00420 00421 BuildHeaders req_hdrs; 00422 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(), 00423 req_hdrs); 00424 if (headers) 00425 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs); 00426 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers()); 00427 00428 // Turn off the proxy for this URL? 00429 bool temporary_proxy = false; 00430 if ((temporary_proxy = url_uses_no_proxy_for(url))) { 00431 DBG(cerr << "Suppress proxy for url: " << url << endl); 00432 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0); 00433 } 00434 00435 string::size_type at_sign = url.find('@'); 00436 // Assume username:password present *and* assume it's an HTTP URL; it *is* 00437 // HTTPConnect, after all. 7 is position after "http://"; the second arg 00438 // to substr() is the sub string length. 00439 if (at_sign != url.npos) 00440 d_upstring = url.substr(7, at_sign - 7); 00441 00442 if (!d_upstring.empty()) 00443 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str()); 00444 00445 // Pass save_raw_http_headers() a pointer to the vector<string> where the 00446 // response headers may be stored. Callers can use the resp_hdrs 00447 // value/result parameter to get the raw response header information . 00448 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs); 00449 00450 CURLcode res = curl_easy_perform(d_curl); 00451 00452 // Free the header list and null the value in d_curl. 00453 curl_slist_free_all(req_hdrs.get_headers()); 00454 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0); 00455 00456 // Reset the proxy? 00457 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty()) 00458 curl_easy_setopt(d_curl, CURLOPT_PROXY, 00459 d_rcr->get_proxy_server_host().c_str()); 00460 00461 if (res != 0) 00462 throw Error(d_error_buffer); 00463 00464 long status; 00465 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status); 00466 if (res != 0) 00467 throw Error(d_error_buffer); 00468 00469 return status; 00470 } 00471 00475 bool 00476 HTTPConnect::url_uses_proxy_for(const string &url) throw() 00477 { 00478 if (d_rcr->is_proxy_for_used()) { 00479 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str()); 00480 int index = 0, matchlen; 00481 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1; 00482 } 00483 00484 return false; 00485 } 00486 00490 bool 00491 HTTPConnect::url_uses_no_proxy_for(const string &url) throw() 00492 { 00493 return d_rcr->is_no_proxy_for_used() 00494 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos; 00495 } 00496 00497 // Public methods. Mostly... 00498 00505 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""), 00506 d_cookie_jar(""), 00507 d_dap_client_protocol_major(2), 00508 d_dap_client_protocol_minor(0) 00509 00510 { 00511 d_accept_deflate = rcr->get_deflate(); 00512 d_rcr = rcr; 00513 00514 // Load in the default headers to send with a request. The empty Pragma 00515 // headers overrides libcurl's default Pragma: no-cache header (which 00516 // will disable caching by Squid, et c.). The User-Agent header helps 00517 // make server logs more readable. 05/05/03 jhrg 00518 d_request_headers.push_back(string("Pragma:")); 00519 string user_agent = string("User-Agent: ") + string(CNAME) 00520 + string("/") + string(CVER); 00521 d_request_headers.push_back(user_agent); 00522 if (d_accept_deflate) 00523 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00524 00525 // HTTPCache::instance returns a valid ptr or 0. 00526 if (d_rcr->get_use_cache()) 00527 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(), 00528 true); 00529 else 00530 d_http_cache = 0; 00531 00532 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec 00533 << ")" << endl); 00534 00535 if (d_http_cache) { 00536 d_http_cache->set_cache_enabled(d_rcr->get_use_cache()); 00537 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0); 00538 d_http_cache->set_max_size(d_rcr->get_max_cache_size()); 00539 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj()); 00540 d_http_cache->set_default_expiration(d_rcr->get_default_expires()); 00541 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0); 00542 } 00543 00544 d_cookie_jar = rcr->get_cookie_jar(); 00545 00546 www_lib_init(); // This may throw either Error or InternalErr 00547 } 00548 00549 HTTPConnect::~HTTPConnect() 00550 { 00551 DBG2(cerr << "Entering the HTTPConnect dtor" << endl); 00552 00553 curl_easy_cleanup(d_curl); 00554 00555 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl); 00556 } 00557 00570 HTTPResponse * 00571 HTTPConnect::fetch_url(const string &url) 00572 { 00573 #ifdef HTTP_TRACE 00574 cout << "GET " << url << " HTTP/1.0" << endl; 00575 #endif 00576 00577 HTTPResponse *stream; 00578 00579 if (d_http_cache && d_http_cache->is_cache_enabled()) { 00580 stream = caching_fetch_url(url); 00581 } 00582 else { 00583 stream = plain_fetch_url(url); 00584 } 00585 00586 #ifdef HTTP_TRACE 00587 stringstream ss; 00588 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl; 00589 for (size_t i = 0; i < stream->get_headers()->size(); i++) { 00590 ss << stream->get_headers()->at(i) << endl; 00591 } 00592 cout << ss.str(); 00593 #endif 00594 00595 ParseHeader parser; 00596 00597 parser = for_each(stream->get_headers()->begin(), 00598 stream->get_headers()->end(), ParseHeader()); 00599 00600 #ifdef HTTP_TRACE 00601 cout << endl << endl; 00602 #endif 00603 00604 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu) 00605 if (parser.get_location() != "" && 00606 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) { 00607 delete stream; 00608 return fetch_url(parser.get_location()); 00609 } 00610 00611 stream->set_type(parser.get_object_type()); 00612 stream->set_version(parser.get_server()); 00613 stream->set_protocol(parser.get_protocol()); 00614 00615 return stream; 00616 } 00617 00618 // Look around for a reasonable place to put a temporary file. Check first 00619 // the value of the TMPDIR env var. If that does not yeild a path that's 00620 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as 00621 // defined in stdio.h. If both come up empty, then use `./'. 00622 00623 // Change this to a version that either returns a string or an open file 00624 // descriptor. Use information from https://buildsecurityin.us-cert.gov/ 00625 // (see open()) to make it more secure. Ideal solution: get deserialize() 00626 // methods to read from a stream returned by libcurl, not from a temporary 00627 // file. 9/21/07 jhrg Updated to use strings, so other misc changes. 3/22/11 00628 static string 00629 get_tempfile_template(const string &file_template) 00630 { 00631 string c; 00632 00633 // Windows has one idea of the standard name(s) for a temporary files dir 00634 #ifdef WIN32 00635 // white list for a WIN32 directory 00636 Regex directory("[-a-zA-Z0-9_:\\]*"); 00637 00638 // If we're OK to use getenv(), try it. 00639 #ifdef USE_GETENV 00640 c = getenv("TEMP"); 00641 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00642 goto valid_temp_directory; 00643 00644 c= getenv("TMP"); 00645 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00646 goto valid_temp_directory; 00647 #endif // USE_GETENV 00648 00649 // The windows default 00650 c = "c:\tmp"; 00651 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0)) 00652 goto valid_temp_directory; 00653 00654 #else // Unix/Linux/OSX has another... 00655 // white list for a directory 00656 Regex directory("[-a-zA-Z0-9_/]*"); 00657 #ifdef USE_GETENV 00658 c = getenv("TMPDIR"); 00659 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0)) 00660 goto valid_temp_directory; 00661 #endif // USE_GETENV 00662 00663 // Unix defines this sometimes - if present, use it. 00664 #ifdef P_tmpdir 00665 if (access(P_tmpdir, W_OK | R_OK) == 0) { 00666 c = P_tmpdir; 00667 goto valid_temp_directory; 00668 } 00669 #endif 00670 00671 // The Unix default 00672 c = "/tmp"; 00673 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0)) 00674 goto valid_temp_directory; 00675 00676 #endif // WIN32 00677 00678 // If we found nothing useful, use the current directory 00679 c = "."; 00680 00681 valid_temp_directory: 00682 00683 #ifdef WIN32 00684 c += "\\" + file_template; 00685 #else 00686 c += "/" + file_template; 00687 #endif 00688 00689 return c; 00690 } 00691 00710 string 00711 get_temp_file(FILE *&stream) throw(InternalErr) 00712 { 00713 string dods_temp = get_tempfile_template((string)"dodsXXXXXX"); 00714 00715 vector<char> pathname(dods_temp.length() + 1); 00716 00717 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length()); 00718 00719 DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl); 00720 00721 // Open truncated for update. NB: mkstemp() returns a file descriptor. 00722 #if defined(WIN32) || defined(TEST_WIN32_TEMPS) 00723 stream = fopen(_mktemp(&pathname[0]), "w+b"); 00724 #else 00725 // Make sure that temp files are accessible only by the owner. 00726 umask(077); 00727 stream = fdopen(mkstemp(&pathname[0]), "w+"); 00728 #endif 00729 00730 if (!stream) { 00731 throw InternalErr(__FILE__, __LINE__, 00732 "Failed to open a temporary file for the data values (" 00733 + dods_temp + ")"); 00734 } 00735 00736 dods_temp = &pathname[0]; 00737 return dods_temp; 00738 } 00739 00741 void 00742 close_temp(FILE *s, const string &name) 00743 { 00744 int res = fclose(s); 00745 if (res) 00746 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res)); 00747 00748 res = unlink(name.c_str()); 00749 if (res != 0) 00750 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res)); 00751 } 00752 00774 HTTPResponse * 00775 HTTPConnect::caching_fetch_url(const string &url) 00776 { 00777 DBG(cerr << "Is this URL (" << url << ") in the cache?... "); 00778 00779 vector<string> *headers = new vector<string>; 00780 string file_name; 00781 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name); 00782 if (!s) { 00783 // url not in cache; get it and cache it 00784 DBGN(cerr << "no; getting response and caching." << endl); 00785 delete headers; headers = 0; 00786 time_t now = time(0); 00787 HTTPResponse *rs = plain_fetch_url(url); 00788 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream()); 00789 00790 return rs; 00791 } 00792 else { // url in cache 00793 DBGN(cerr << "yes... "); 00794 00795 if (d_http_cache->is_url_valid(url)) { // url in cache and valid 00796 DBGN(cerr << "and it's valid; using cached response." << endl); 00797 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache); 00798 return crs; 00799 } 00800 else { // url in cache but not valid; validate 00801 DBGN(cerr << "but it's not valid; validating... "); 00802 00803 d_http_cache->release_cached_response(s); // This closes 's' 00804 headers->clear(); 00805 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url); 00806 FILE *body = 0; 00807 string dods_temp = get_temp_file(body); 00808 time_t now = time(0); // When was the request made (now). 00809 long http_status; 00810 00811 try { 00812 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs); 00813 rewind(body); 00814 } 00815 catch (Error &e) { 00816 close_temp(body, dods_temp); 00817 delete headers; 00818 throw ; 00819 } 00820 00821 switch (http_status) { 00822 case 200: { // New headers and new body 00823 DBGN(cerr << "read a new response; caching." << endl); 00824 00825 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body); 00826 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp); 00827 00828 return rs; 00829 } 00830 00831 case 304: { // Just new headers, use cached body 00832 DBGN(cerr << "cached response valid; updating." << endl); 00833 00834 close_temp(body, dods_temp); 00835 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers); 00836 string file_name; 00837 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name); 00838 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache); 00839 return crs; 00840 } 00841 00842 default: { // Oops. 00843 close_temp(body, dods_temp); 00844 if (http_status >= 400) { 00845 delete headers; headers = 0; 00846 string msg = "Error while reading the URL: "; 00847 msg += url; 00848 msg 00849 += ".\nThe OPeNDAP server returned the following message:\n"; 00850 msg += http_status_to_string(http_status); 00851 throw Error(msg); 00852 } 00853 else { 00854 delete headers; headers = 0; 00855 throw InternalErr(__FILE__, __LINE__, 00856 "Bad response from the HTTP server: " + long_to_string(http_status)); 00857 } 00858 } 00859 } 00860 } 00861 } 00862 00863 throw InternalErr(__FILE__, __LINE__, "Should never get here"); 00864 } 00865 00877 HTTPResponse * 00878 HTTPConnect::plain_fetch_url(const string &url) 00879 { 00880 DBG(cerr << "Getting URL: " << url << endl); 00881 FILE *stream = 0; 00882 string dods_temp = get_temp_file(stream); 00883 vector<string> *resp_hdrs = new vector<string>; 00884 00885 int status = -1; 00886 try { 00887 status = read_url(url, stream, resp_hdrs); // Throws Error. 00888 if (status >= 400) { 00889 delete resp_hdrs; 00890 string msg = "Error while reading the URL: "; 00891 msg += url; 00892 msg += ".\nThe OPeNDAP server returned the following message:\n"; 00893 msg += http_status_to_string(status); 00894 throw Error(msg); 00895 } 00896 } 00897 00898 catch (Error &e) { 00899 delete resp_hdrs; 00900 close_temp(stream, dods_temp); 00901 throw; 00902 } 00903 00904 rewind(stream); 00905 00906 return new HTTPResponse(stream, status, resp_hdrs, dods_temp); 00907 } 00908 00920 void 00921 HTTPConnect::set_accept_deflate(bool deflate) 00922 { 00923 d_accept_deflate = deflate; 00924 00925 if (d_accept_deflate) { 00926 if (find(d_request_headers.begin(), d_request_headers.end(), 00927 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end()) 00928 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress")); 00929 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00930 ostream_iterator<string>(cerr, "\n"))); 00931 } 00932 else { 00933 vector<string>::iterator i; 00934 i = remove_if(d_request_headers.begin(), d_request_headers.end(), 00935 bind2nd(equal_to<string>(), 00936 string("Accept-Encoding: deflate, gzip, compress"))); 00937 d_request_headers.erase(i, d_request_headers.end()); 00938 } 00939 } 00940 00942 class HeaderMatch : public unary_function<const string &, bool> { 00943 const string &d_header; 00944 public: 00945 HeaderMatch(const string &header) : d_header(header) {} 00946 bool operator()(const string &arg) { return arg.find(d_header) == 0; } 00947 }; 00948 00957 void 00958 HTTPConnect::set_xdap_protocol(int major, int minor) 00959 { 00960 // Look for, and remove if one exists, an XDAP-Accept header 00961 vector<string>::iterator i; 00962 i = find_if(d_request_headers.begin(), d_request_headers.end(), 00963 HeaderMatch("XDAP-Accept:")); 00964 if (i != d_request_headers.end()) 00965 d_request_headers.erase(i); 00966 00967 // Record and add the new header value 00968 d_dap_client_protocol_major = major; 00969 d_dap_client_protocol_minor = minor; 00970 ostringstream xdap_accept; 00971 xdap_accept << "XDAP-Accept: " << major << "." << minor; 00972 00973 d_request_headers.push_back(xdap_accept.str()); 00974 00975 DBG(copy(d_request_headers.begin(), d_request_headers.end(), 00976 ostream_iterator<string>(cerr, "\n"))); 00977 } 00978 00994 void 00995 HTTPConnect::set_credentials(const string &u, const string &p) 00996 { 00997 if (u.empty()) 00998 return; 00999 01000 // Store the credentials locally. 01001 d_username = u; 01002 d_password = p; 01003 01004 d_upstring = u + ":" + p; 01005 } 01006 01007 } // namespace libdap