libdap++
Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 #include "config.h" 00027 00028 // #define DODS_DEBUG 00029 // #define DODS_DEBUG2 00030 #undef USE_GETENV 00031 00032 #include <pthread.h> 00033 #include <limits.h> 00034 #include <unistd.h> // for stat 00035 #include <sys/types.h> // for stat and mkdir 00036 #include <sys/stat.h> 00037 00038 #include <cstring> 00039 #include <iostream> 00040 #include <sstream> 00041 #include <algorithm> 00042 #include <iterator> 00043 #include <set> 00044 00045 #include "Error.h" 00046 #include "InternalErr.h" 00047 #include "ResponseTooBigErr.h" 00048 #ifndef WIN32 00049 #include "SignalHandler.h" 00050 #endif 00051 #include "HTTPCacheInterruptHandler.h" 00052 #include "HTTPCacheTable.h" 00053 #include "HTTPCache.h" 00054 #include "HTTPCacheMacros.h" 00055 00056 #include "util_mit.h" 00057 #include "debug.h" 00058 00059 using namespace std; 00060 00061 namespace libdap { 00062 00063 HTTPCache *HTTPCache::_instance = 0; 00064 00065 // instance_mutex is used to ensure that only one instance is created. 00066 // That is, it protects the body of the HTTPCache::instance() method. This 00067 // mutex is initialized from within the static function once_init_routine() 00068 // and the call to that takes place using pthread_once_init() where the mutex 00069 // once_block is used to protect that call. All of this ensures that no matter 00070 // how many threads call the instance() method, only one instance is ever 00071 // made. 00072 static pthread_mutex_t instance_mutex; 00073 static pthread_once_t once_block = PTHREAD_ONCE_INIT; 00074 00075 00076 #define NO_LM_EXPIRATION 24*3600 // 24 hours 00077 00078 #define DUMP_FREQUENCY 10 // Dump index every x loads 00079 00080 #define MEGA 0x100000L 00081 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 00082 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 00083 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 00084 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 00085 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 00086 00087 static void 00088 once_init_routine() 00089 { 00090 int status; 00091 status = INIT(&instance_mutex); 00092 00093 if (status != 0) 00094 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00095 } 00096 00125 HTTPCache * 00126 HTTPCache::instance(const string &cache_root, bool force) 00127 { 00128 int status = pthread_once(&once_block, once_init_routine); 00129 if (status != 0) 00130 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00131 00132 LOCK(&instance_mutex); 00133 00134 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" 00135 << "... "); 00136 00137 try { 00138 if (!_instance) { 00139 _instance = new HTTPCache(cache_root, force); 00140 00141 DBG(cerr << "New instance: " << _instance << ", cache root: " 00142 << _instance->d_cache_root << endl); 00143 00144 atexit(delete_instance); 00145 00146 #ifndef WIN32 00147 // Register the interrupt handler. If we've already registered 00148 // one, barf. If this becomes a problem, hack SignalHandler so 00149 // that we can chain these handlers... 02/10/04 jhrg 00150 // 00151 // Technically we're leaking memory here. However, since this 00152 // class is a singleton, we know that only three objects will 00153 // ever be created and they will all exist until the process 00154 // exits. We can let this slide... 02/12/04 jhrg 00155 EventHandler *old_eh = SignalHandler::instance()->register_handler 00156 (SIGINT, new HTTPCacheInterruptHandler); 00157 if (old_eh) { 00158 SignalHandler::instance()->register_handler(SIGINT, old_eh); 00159 throw SignalHandlerRegisteredErr( 00160 "Could not register event handler for SIGINT without superseding an existing one."); 00161 } 00162 00163 old_eh = SignalHandler::instance()->register_handler 00164 (SIGPIPE, new HTTPCacheInterruptHandler); 00165 if (old_eh) { 00166 SignalHandler::instance()->register_handler(SIGPIPE, old_eh); 00167 throw SignalHandlerRegisteredErr( 00168 "Could not register event handler for SIGPIPE without superseding an existing one."); 00169 } 00170 00171 old_eh = SignalHandler::instance()->register_handler 00172 (SIGTERM, new HTTPCacheInterruptHandler); 00173 if (old_eh) { 00174 SignalHandler::instance()->register_handler(SIGTERM, old_eh); 00175 throw SignalHandlerRegisteredErr( 00176 "Could not register event handler for SIGTERM without superseding an existing one."); 00177 } 00178 #endif 00179 } 00180 } 00181 catch (...) { 00182 DBG2(cerr << "The constructor threw an Error!" << endl); 00183 UNLOCK(&instance_mutex); 00184 throw; 00185 } 00186 00187 UNLOCK(&instance_mutex); 00188 DBGN(cerr << "returning " << hex << _instance << dec << endl); 00189 00190 return _instance; 00191 } 00192 00196 void 00197 HTTPCache::delete_instance() 00198 { 00199 DBG(cerr << "Entering delete_instance()..." << endl); 00200 if (HTTPCache::_instance) { 00201 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl); 00202 delete HTTPCache::_instance; 00203 HTTPCache::_instance = 0; 00204 } 00205 00206 DBG(cerr << "Exiting delete_instance()" << endl); 00207 } 00208 00223 HTTPCache::HTTPCache(string cache_root, bool force) : 00224 d_locked_open_file(0), 00225 d_cache_enabled(false), 00226 d_cache_protected(false), 00227 d_expire_ignored(false), 00228 d_always_validate(false), 00229 d_total_size(CACHE_TOTAL_SIZE * MEGA), 00230 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), 00231 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT), 00232 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), 00233 d_default_expiration(NO_LM_EXPIRATION), 00234 d_max_age(-1), 00235 d_max_stale(-1), 00236 d_min_fresh(-1), 00237 d_http_cache_table(0) 00238 { 00239 DBG(cerr << "Entering the constructor for " << this << "... "); 00240 #if 0 00241 int status = pthread_once(&once_block, once_init_routine); 00242 if (status != 0) 00243 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00244 #endif 00245 INIT(&d_cache_mutex); 00246 00247 // This used to throw an Error object if we could not get the 00248 // single user lock. However, that results in an invalid object. It's 00249 // better to have an instance that has default values. If we cannot get 00250 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg 00251 // 00252 // I fixed this block so that the cache root is set before we try to get 00253 // the single user lock. That was the fix for bug #661. To make that 00254 // work, I had to move the call to create_cache_root out of 00255 // set_cache_root(). 09/08/03 jhrg 00256 00257 set_cache_root(cache_root); 00258 int block_size; 00259 00260 if (!get_single_user_lock(force)) 00261 throw Error("Could not get single user lock for the cache"); 00262 00263 #ifdef WIN32 00264 // Windows is unable to provide us this information. 4096 appears 00265 // a best guess. It is likely to be in the range [2048, 8192] on 00266 // windows, but will the level of truth of that statement vary over 00267 // time ? 00268 block_size = 4096; 00269 #else 00270 struct stat s; 00271 if (stat(cache_root.c_str(), &s) == 0) 00272 block_size = s.st_blksize; 00273 else 00274 throw Error("Could not set file system block size."); 00275 #endif 00276 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size); 00277 d_cache_enabled = true; 00278 00279 DBGN(cerr << "exiting" << endl); 00280 } 00281 00294 HTTPCache::~HTTPCache() 00295 { 00296 DBG(cerr << "Entering the destructor for " << this << "... "); 00297 00298 try { 00299 if (startGC()) 00300 perform_garbage_collection(); 00301 00302 d_http_cache_table->cache_index_write(); 00303 } 00304 catch (Error &e) { 00305 // If the cache index cannot be written, we've got problems. However, 00306 // unless we're debugging, still free up the cache table in memory. 00307 // How should we let users know they cache index is not being 00308 // written?? 10/03/02 jhrg 00309 DBG(cerr << e.get_error_message() << endl); 00310 } 00311 00312 delete d_http_cache_table; 00313 00314 release_single_user_lock(); 00315 00316 DBGN(cerr << "exiting destructor." << endl); 00317 DESTROY(&d_cache_mutex); 00318 } 00319 00320 00324 00328 bool 00329 HTTPCache::stopGC() const 00330 { 00331 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer); 00332 } 00333 00340 bool 00341 HTTPCache::startGC() const 00342 { 00343 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl); 00344 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size); 00345 } 00346 00361 void 00362 HTTPCache::perform_garbage_collection() 00363 { 00364 DBG(cerr << "Performing garbage collection" << endl); 00365 00366 // Remove all the expired responses. 00367 expired_gc(); 00368 00369 // Remove entries larger than max_entry_size. 00370 too_big_gc(); 00371 00372 // Remove entries starting with zero hits, 1, ..., until stopGC() 00373 // returns true. 00374 hits_gc(); 00375 } 00376 00382 void 00383 HTTPCache::expired_gc() 00384 { 00385 if (!d_expire_ignored) { 00386 d_http_cache_table->delete_expired_entries(); 00387 } 00388 } 00389 00406 void 00407 HTTPCache::hits_gc() 00408 { 00409 int hits = 0; 00410 00411 if (startGC()) { 00412 while (!stopGC()) { 00413 d_http_cache_table->delete_by_hits(hits); 00414 hits++; 00415 } 00416 } 00417 } 00418 00423 void HTTPCache::too_big_gc() { 00424 if (startGC()) 00425 d_http_cache_table->delete_by_size(d_max_entry_size); 00426 } 00427 00429 00440 bool HTTPCache::get_single_user_lock(bool force) 00441 { 00442 if (!d_locked_open_file) { 00443 FILE * fp = NULL; 00444 00445 try { 00446 // It's OK to call create_cache_root if the directory already 00447 // exists. 00448 create_cache_root(d_cache_root); 00449 } 00450 catch (Error &e) { 00451 // We need to catch and return false because this method is 00452 // called from a ctor and throwing at this point will result in a 00453 // partially constructed object. 01/22/04 jhrg 00454 DBG(cerr << "Failure to create the cache root" << endl); 00455 return false; 00456 } 00457 00458 // Try to read the lock file. If we can open for reading, it exists. 00459 string lock = d_cache_root + CACHE_LOCK; 00460 if ((fp = fopen(lock.c_str(), "r")) != NULL) { 00461 int res = fclose(fp); 00462 if (res) { 00463 DBG(cerr << "Failed to close " << (void *)fp << endl); 00464 } 00465 if (force) 00466 REMOVE(lock.c_str()); 00467 else 00468 return false; 00469 } 00470 00471 if ((fp = fopen(lock.c_str(), "w")) == NULL) { 00472 DBG(cerr << "Could not open for write access" << endl); 00473 return false; 00474 } 00475 00476 d_locked_open_file = fp; 00477 return true; 00478 } 00479 00480 cerr << "locked_open_file is true" << endl; 00481 return false; 00482 } 00483 00486 void 00487 HTTPCache::release_single_user_lock() 00488 { 00489 if (d_locked_open_file) { 00490 int res = fclose(d_locked_open_file); 00491 if (res) { 00492 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ; 00493 } 00494 d_locked_open_file = 0; 00495 } 00496 00497 string lock = d_cache_root + CACHE_LOCK; 00498 REMOVE(lock.c_str()); 00499 } 00500 00503 00507 string 00508 HTTPCache::get_cache_root() const 00509 { 00510 return d_cache_root; 00511 } 00512 00513 00522 void 00523 HTTPCache::create_cache_root(const string &cache_root) 00524 { 00525 struct stat stat_info; 00526 string::size_type cur = 0; 00527 00528 #ifdef WIN32 00529 cur = cache_root[1] == ':' ? 3 : 1; 00530 typedef int mode_t; 00531 #else 00532 cur = 1; 00533 #endif 00534 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) { 00535 string dir = cache_root.substr(0, cur); 00536 if (stat(dir.c_str(), &stat_info) == -1) { 00537 DBG2(cerr << "Cache....... Creating " << dir << endl); 00538 mode_t mask = UMASK(0); 00539 if (MKDIR(dir.c_str(), 0777) < 0) { 00540 DBG2(cerr << "Error: can't create." << endl); 00541 UMASK(mask); 00542 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string(".")); 00543 } 00544 UMASK(mask); 00545 } 00546 else { 00547 DBG2(cerr << "Cache....... Found " << dir << endl); 00548 } 00549 cur++; 00550 } 00551 } 00552 00567 void 00568 HTTPCache::set_cache_root(const string &root) 00569 { 00570 if (root != "") { 00571 d_cache_root = root; 00572 // cache root should end in /. 00573 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00574 d_cache_root += DIR_SEPARATOR_CHAR; 00575 } 00576 else { 00577 // If no cache root has been indicated then look for a suitable 00578 // location. 00579 #ifdef USE_GETENV 00580 char * cr = (char *) getenv("DODS_CACHE"); 00581 if (!cr) cr = (char *) getenv("TMP"); 00582 if (!cr) cr = (char *) getenv("TEMP"); 00583 if (!cr) cr = (char*)CACHE_LOCATION; 00584 d_cache_root = cr; 00585 #else 00586 d_cache_root = CACHE_LOCATION; 00587 #endif 00588 00589 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00590 d_cache_root += DIR_SEPARATOR_CHAR; 00591 00592 d_cache_root += CACHE_ROOT; 00593 } 00594 00595 // Test d_hhtp_cache_table because this method can be called before that 00596 // instance is created and also can be called later to cahnge the cache 00597 // root. jhrg 05.14.08 00598 if (d_http_cache_table) 00599 d_http_cache_table->set_cache_root(d_cache_root); 00600 } 00601 00613 void 00614 HTTPCache::set_cache_enabled(bool mode) 00615 { 00616 lock_cache_interface(); 00617 00618 d_cache_enabled = mode; 00619 00620 unlock_cache_interface(); 00621 } 00622 00625 bool 00626 HTTPCache::is_cache_enabled() const 00627 { 00628 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")" 00629 << endl); 00630 return d_cache_enabled; 00631 } 00632 00643 void 00644 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode) 00645 { 00646 lock_cache_interface(); 00647 00648 d_cache_disconnected = mode; 00649 00650 unlock_cache_interface(); 00651 } 00652 00655 CacheDisconnectedMode 00656 HTTPCache::get_cache_disconnected() const 00657 { 00658 return d_cache_disconnected; 00659 } 00660 00669 void 00670 HTTPCache::set_expire_ignored(bool mode) 00671 { 00672 lock_cache_interface(); 00673 00674 d_expire_ignored = mode; 00675 00676 unlock_cache_interface(); 00677 } 00678 00679 /* Is the cache ignoring Expires headers returned with responses that have 00680 been cached? */ 00681 00682 bool 00683 HTTPCache::is_expire_ignored() const 00684 { 00685 return d_expire_ignored; 00686 } 00687 00703 void 00704 HTTPCache::set_max_size(unsigned long size) 00705 { 00706 lock_cache_interface(); 00707 00708 try { 00709 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? 00710 MIN_CACHE_TOTAL_SIZE * MEGA : 00711 (size > ULONG_MAX ? ULONG_MAX : size * MEGA); 00712 unsigned long old_size = d_total_size; 00713 d_total_size = new_size; 00714 d_folder_size = d_total_size / CACHE_FOLDER_PCT; 00715 d_gc_buffer = d_total_size / CACHE_GC_PCT; 00716 00717 if (new_size < old_size && startGC()) { 00718 perform_garbage_collection(); 00719 d_http_cache_table->cache_index_write(); 00720 } 00721 } 00722 catch (...) { 00723 unlock_cache_interface(); 00724 DBGN(cerr << "Unlocking interface." << endl); 00725 throw; 00726 } 00727 00728 DBG2(cerr << "Cache....... Total cache size: " << d_total_size 00729 << " with " << d_folder_size 00730 << " bytes for meta information and folders and at least " 00731 << d_gc_buffer << " bytes free after every gc" << endl); 00732 00733 unlock_cache_interface(); 00734 } 00735 00738 unsigned long 00739 HTTPCache::get_max_size() const 00740 { 00741 return d_total_size / MEGA; 00742 } 00743 00752 void 00753 HTTPCache::set_max_entry_size(unsigned long size) 00754 { 00755 lock_cache_interface(); 00756 00757 try { 00758 unsigned long new_size = size * MEGA; 00759 if (new_size > 0 && new_size < d_total_size - d_folder_size) { 00760 unsigned long old_size = d_max_entry_size; 00761 d_max_entry_size = new_size; 00762 if (new_size < old_size && startGC()) { 00763 perform_garbage_collection(); 00764 d_http_cache_table->cache_index_write(); 00765 } 00766 } 00767 } 00768 catch (...) { 00769 unlock_cache_interface(); 00770 throw; 00771 } 00772 00773 DBG2(cerr << "Cache...... Max entry cache size is " 00774 << d_max_entry_size << endl); 00775 00776 unlock_cache_interface(); 00777 } 00778 00783 unsigned long 00784 HTTPCache::get_max_entry_size() const 00785 { 00786 return d_max_entry_size / MEGA; 00787 } 00788 00799 void 00800 HTTPCache::set_default_expiration(const int exp_time) 00801 { 00802 lock_cache_interface(); 00803 00804 d_default_expiration = exp_time; 00805 00806 unlock_cache_interface(); 00807 } 00808 00811 int 00812 HTTPCache::get_default_expiration() const 00813 { 00814 return d_default_expiration; 00815 } 00816 00821 void 00822 HTTPCache::set_always_validate(bool validate) 00823 { 00824 d_always_validate = validate; 00825 } 00826 00830 bool 00831 HTTPCache::get_always_validate() const 00832 { 00833 return d_always_validate; 00834 } 00835 00852 void 00853 HTTPCache::set_cache_control(const vector<string> &cc) 00854 { 00855 lock_cache_interface(); 00856 00857 try { 00858 d_cache_control = cc; 00859 00860 vector<string>::const_iterator i; 00861 for (i = cc.begin(); i != cc.end(); ++i) { 00862 string header = (*i).substr(0, (*i).find(':')); 00863 string value = (*i).substr((*i).find(": ") + 2); 00864 if (header != "Cache-Control") { 00865 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found."); 00866 } 00867 else { 00868 if (value == "no-cache" || value == "no-store") 00869 d_cache_enabled = false; 00870 else if (value.find("max-age") != string::npos) { 00871 string max_age = value.substr(value.find("=" + 1)); 00872 d_max_age = parse_time(max_age.c_str()); 00873 } 00874 else if (value == "max-stale") 00875 d_max_stale = 0; // indicates will take anything; 00876 else if (value.find("max-stale") != string::npos) { 00877 string max_stale = value.substr(value.find("=" + 1)); 00878 d_max_stale = parse_time(max_stale.c_str()); 00879 } 00880 else if (value.find("min-fresh") != string::npos) { 00881 string min_fresh = value.substr(value.find("=" + 1)); 00882 d_min_fresh = parse_time(min_fresh.c_str()); 00883 } 00884 } 00885 } 00886 } 00887 catch (...) { 00888 unlock_cache_interface(); 00889 throw; 00890 } 00891 00892 unlock_cache_interface(); 00893 } 00894 00895 00900 vector<string> 00901 HTTPCache::get_cache_control() 00902 { 00903 return d_cache_control; 00904 } 00905 00907 00916 bool 00917 HTTPCache::is_url_in_cache(const string &url) 00918 { 00919 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl); 00920 00921 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 00922 bool status = entry != 0; 00923 if (entry) { 00924 entry->unlock_read_response(); 00925 } 00926 return status; 00927 } 00928 00934 bool 00935 is_hop_by_hop_header(const string &header) 00936 { 00937 return header.find("Connection") != string::npos 00938 || header.find("Keep-Alive") != string::npos 00939 || header.find("Proxy-Authenticate") != string::npos 00940 || header.find("Proxy-Authorization") != string::npos 00941 || header.find("Transfer-Encoding") != string::npos 00942 || header.find("Upgrade") != string::npos; 00943 } 00944 00956 void 00957 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) 00958 { 00959 string fname = cachename + CACHE_META; 00960 d_open_files.push_back(fname); 00961 00962 FILE *dest = fopen(fname.c_str(), "w"); 00963 if (!dest) { 00964 throw InternalErr(__FILE__, __LINE__, 00965 "Could not open named cache entry file."); 00966 } 00967 00968 vector<string>::const_iterator i; 00969 for (i = headers.begin(); i != headers.end(); ++i) { 00970 if (!is_hop_by_hop_header(*i)) { 00971 int s = fwrite((*i).c_str(), (*i).size(), 1, dest); 00972 if (s != 1) { 00973 fclose(dest); 00974 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s)); 00975 } 00976 s = fwrite("\n", 1, 1, dest); 00977 if (s != 1) { 00978 fclose(dest); 00979 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s)); 00980 } 00981 } 00982 } 00983 00984 int res = fclose(dest); 00985 if (res) { 00986 DBG(cerr << "HTTPCache::write_metadata - Failed to close " 00987 << dest << endl); 00988 } 00989 00990 d_open_files.pop_back(); 00991 } 00992 01003 void 01004 HTTPCache::read_metadata(const string &cachename, vector<string> &headers) 01005 { 01006 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r"); 01007 if (!md) { 01008 throw InternalErr(__FILE__, __LINE__, 01009 "Could not open named cache entry meta data file."); 01010 } 01011 01012 char line[1024]; 01013 while (!feof(md) && fgets(line, 1024, md)) { 01014 line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline 01015 headers.push_back(string(line)); 01016 } 01017 01018 int res = fclose(md); 01019 if (res) { 01020 DBG(cerr << "HTTPCache::read_metadata - Failed to close " 01021 << md << endl); 01022 } 01023 } 01024 01046 int 01047 HTTPCache::write_body(const string &cachename, const FILE *src) 01048 { 01049 d_open_files.push_back(cachename); 01050 01051 FILE *dest = fopen(cachename.c_str(), "wb"); 01052 if (!dest) { 01053 throw InternalErr(__FILE__, __LINE__, 01054 "Could not open named cache entry file."); 01055 } 01056 01057 // Read and write in 1k blocks; an attempt at doing this efficiently. 01058 // 09/30/02 jhrg 01059 char line[1024]; 01060 size_t n; 01061 int total = 0; 01062 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) { 01063 total += fwrite(line, 1, n, dest); 01064 DBG2(sleep(3)); 01065 } 01066 01067 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) { 01068 int res = fclose(dest); 01069 res = res & unlink(cachename.c_str()); 01070 if (res) { 01071 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " 01072 << dest << endl); 01073 } 01074 throw InternalErr(__FILE__, __LINE__, 01075 "I/O error transferring data to the cache."); 01076 } 01077 01078 rewind(const_cast<FILE *>(src)); 01079 01080 int res = fclose(dest); 01081 if (res) { 01082 DBG(cerr << "HTTPCache::write_body - Failed to close " 01083 << dest << endl); 01084 } 01085 01086 d_open_files.pop_back(); 01087 01088 return total; 01089 } 01090 01099 FILE * 01100 HTTPCache::open_body(const string &cachename) 01101 { 01102 DBG(cerr << "cachename: " << cachename << endl); 01103 01104 FILE *src = fopen(cachename.c_str(), "rb"); // Read only 01105 if (!src) 01106 throw InternalErr(__FILE__, __LINE__, "Could not open cache file."); 01107 01108 return src; 01109 } 01110 01136 bool 01137 HTTPCache::cache_response(const string &url, time_t request_time, 01138 const vector<string> &headers, const FILE *body) 01139 { 01140 lock_cache_interface(); 01141 01142 DBG(cerr << "Caching url: " << url << "." << endl); 01143 01144 try { 01145 // If this is not an http or https URL, don't cache. 01146 if (url.find("http:") == string::npos && 01147 url.find("https:") == string::npos) { 01148 unlock_cache_interface(); 01149 return false; 01150 } 01151 01152 // This does nothing if url is not already in the cache. It's 01153 // more efficient to do this than to first check and see if the entry 01154 // exists. 10/10/02 jhrg 01155 d_http_cache_table->remove_entry_from_cache_table(url); 01156 01157 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url); 01158 entry->lock_write_response(); 01159 01160 try { 01161 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age. 01162 if (entry->is_no_cache()) { 01163 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry 01164 << "(" << url << ")" << endl); 01165 entry->unlock_write_response(); 01166 delete entry; entry = 0; 01167 unlock_cache_interface(); 01168 return false; 01169 } 01170 01171 // corrected_initial_age, freshness_lifetime, response_time. 01172 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01173 01174 d_http_cache_table->create_location(entry); // cachename, cache_body_fd 01175 // move these write function to cache table 01176 entry->set_size(write_body(entry->get_cachename(), body)); 01177 write_metadata(entry->get_cachename(), headers); 01178 d_http_cache_table->add_entry_to_cache_table(entry); 01179 entry->unlock_write_response(); 01180 } 01181 catch (ResponseTooBigErr &e) { 01182 // Oops. Bummer. Clean up and exit. 01183 DBG(cerr << e.get_error_message() << endl); 01184 REMOVE(entry->get_cachename().c_str()); 01185 REMOVE(string(entry->get_cachename() + CACHE_META).c_str()); 01186 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url 01187 << ")" << endl); 01188 entry->unlock_write_response(); 01189 delete entry; entry = 0; 01190 unlock_cache_interface(); 01191 return false; 01192 } 01193 01194 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) { 01195 if (startGC()) 01196 perform_garbage_collection(); 01197 01198 d_http_cache_table->cache_index_write(); // resets new_entries 01199 } 01200 } 01201 catch (...) { 01202 unlock_cache_interface(); 01203 throw; 01204 } 01205 01206 unlock_cache_interface(); 01207 01208 return true; 01209 } 01210 01229 vector<string> 01230 HTTPCache::get_conditional_request_headers(const string &url) 01231 { 01232 lock_cache_interface(); 01233 01234 HTTPCacheTable::CacheEntry *entry = 0; 01235 vector<string> headers; 01236 01237 DBG(cerr << "Getting conditional request headers for " << url << endl); 01238 01239 try { 01240 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01241 if (!entry) 01242 throw Error("There is no cache entry for the URL: " + url); 01243 01244 if (entry->get_etag() != "") 01245 headers.push_back(string("If-None-Match: ") + entry->get_etag()); 01246 01247 if (entry->get_lm() > 0) { 01248 time_t lm = entry->get_lm(); 01249 headers.push_back(string("If-Modified-Since: ") 01250 + date_time_str(&lm)); 01251 } 01252 else if (entry->get_max_age() > 0) { 01253 time_t max_age = entry->get_max_age(); 01254 headers.push_back(string("If-Modified-Since: ") 01255 + date_time_str(&max_age)); 01256 } 01257 else if (entry->get_expires() > 0) { 01258 time_t expires = entry->get_expires(); 01259 headers.push_back(string("If-Modified-Since: ") 01260 + date_time_str(&expires)); 01261 } 01262 entry->unlock_read_response(); 01263 unlock_cache_interface(); 01264 } 01265 catch (...) { 01266 unlock_cache_interface(); 01267 if (entry) { 01268 entry->unlock_read_response(); 01269 } 01270 throw; 01271 } 01272 01273 return headers; 01274 } 01275 01279 struct HeaderLess: binary_function<const string&, const string&, bool> 01280 { 01281 bool operator()(const string &s1, const string &s2) const { 01282 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':')); 01283 } 01284 }; 01285 01299 void 01300 HTTPCache::update_response(const string &url, time_t request_time, 01301 const vector<string> &headers) 01302 { 01303 lock_cache_interface(); 01304 01305 HTTPCacheTable::CacheEntry *entry = 0; 01306 DBG(cerr << "Updating the response headers for: " << url << endl); 01307 01308 try { 01309 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url); 01310 if (!entry) 01311 throw Error("There is no cache entry for the URL: " + url); 01312 01313 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object. 01314 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); 01315 01316 // Update corrected_initial_age, freshness_lifetime, response_time. 01317 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01318 01319 // Merge the new headers with those in the persistent store. How: 01320 // Load the new headers into a set, then merge the old headers. Since 01321 // set<> ignores duplicates, old headers with the same name as a new 01322 // header will got into the bit bucket. Define a special compare 01323 // functor to make sure that headers are compared using only their 01324 // name and not their value too. 01325 set<string, HeaderLess> merged_headers; 01326 01327 // Load in the new headers 01328 copy(headers.begin(), headers.end(), 01329 inserter(merged_headers, merged_headers.begin())); 01330 01331 // Get the old headers and load them in. 01332 vector<string> old_headers; 01333 read_metadata(entry->get_cachename(), old_headers); 01334 copy(old_headers.begin(), old_headers.end(), 01335 inserter(merged_headers, merged_headers.begin())); 01336 01337 // Read the values back out. Use reverse iterators with back_inserter 01338 // to preserve header order. NB: vector<> does not support push_front 01339 // so we can't use front_inserter(). 01/09/03 jhrg 01340 vector<string> result; 01341 copy(merged_headers.rbegin(), merged_headers.rend(), 01342 back_inserter(result)); 01343 01344 write_metadata(entry->get_cachename(), result); 01345 entry->unlock_write_response(); 01346 unlock_cache_interface(); 01347 } 01348 catch (...) { 01349 if (entry) { 01350 entry->unlock_read_response(); 01351 } 01352 unlock_cache_interface(); 01353 throw; 01354 } 01355 } 01356 01368 bool 01369 HTTPCache::is_url_valid(const string &url) 01370 { 01371 lock_cache_interface(); 01372 01373 bool freshness; 01374 HTTPCacheTable::CacheEntry *entry = 0; 01375 01376 DBG(cerr << "Is this URL valid? (" << url << ")" << endl); 01377 01378 try { 01379 if (d_always_validate) { 01380 unlock_cache_interface(); 01381 return false; // force re-validation. 01382 } 01383 01384 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01385 if (!entry) 01386 throw Error("There is no cache entry for the URL: " + url); 01387 01388 // If we supported range requests, we'd need code here to check if 01389 // there was only a partial response in the cache. 10/02/02 jhrg 01390 01391 // In case this entry is of type "must-revalidate" then we consider it 01392 // invalid. 01393 if (entry->get_must_revalidate()) { 01394 entry->unlock_read_response(); 01395 unlock_cache_interface(); 01396 return false; 01397 } 01398 01399 time_t resident_time = time(NULL) - entry->get_response_time(); 01400 time_t current_age = entry->get_corrected_initial_age() + resident_time; 01401 01402 // Check that the max-age, max-stale, and min-fresh directives 01403 // given in the request cache control header is followed. 01404 if (d_max_age >= 0 && current_age > d_max_age) { 01405 DBG(cerr << "Cache....... Max-age validation" << endl); 01406 entry->unlock_read_response(); 01407 unlock_cache_interface(); 01408 return false; 01409 } 01410 if (d_min_fresh >= 0 01411 && entry->get_freshness_lifetime() < current_age + d_min_fresh) { 01412 DBG(cerr << "Cache....... Min-fresh validation" << endl); 01413 entry->unlock_read_response(); 01414 unlock_cache_interface(); 01415 return false; 01416 } 01417 01418 freshness = (entry->get_freshness_lifetime() 01419 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age); 01420 entry->unlock_read_response(); 01421 unlock_cache_interface(); 01422 } 01423 catch (...) { 01424 if (entry) { 01425 entry->unlock_read_response(); 01426 } 01427 unlock_cache_interface(); 01428 throw; 01429 } 01430 01431 return freshness; 01432 } 01433 01461 FILE * HTTPCache::get_cached_response(const string &url, 01462 vector<string> &headers, string &cacheName) { 01463 lock_cache_interface(); 01464 01465 FILE *body = 0; 01466 HTTPCacheTable::CacheEntry *entry = 0; 01467 01468 DBG(cerr << "Getting the cached response for " << url << endl); 01469 01470 try { 01471 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01472 if (!entry) { 01473 unlock_cache_interface(); 01474 return 0; 01475 } 01476 01477 cacheName = entry->get_cachename(); 01478 read_metadata(entry->get_cachename(), headers); 01479 01480 DBG(cerr << "Headers just read from cache: " << endl); 01481 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n"))); 01482 01483 body = open_body(entry->get_cachename()); 01484 01485 DBG(cerr << "Returning: " << url << " from the cache." << endl); 01486 01487 d_http_cache_table->bind_entry_to_data(entry, body); 01488 } 01489 catch (...) { 01490 // Why make this unlock operation conditional on entry? 01491 if (entry) 01492 unlock_cache_interface(); 01493 if (body != 0) 01494 fclose(body); 01495 throw; 01496 } 01497 01498 unlock_cache_interface(); 01499 01500 return body; 01501 } 01502 01514 FILE * 01515 HTTPCache::get_cached_response(const string &url, vector<string> &headers) 01516 { 01517 string discard_name; 01518 return get_cached_response(url, headers, discard_name); 01519 } 01520 01531 FILE * 01532 HTTPCache::get_cached_response(const string &url) 01533 { 01534 string discard_name; 01535 vector<string> discard_headers; 01536 return get_cached_response(url, discard_headers, discard_name); 01537 } 01538 01551 void 01552 HTTPCache::release_cached_response(FILE *body) 01553 { 01554 lock_cache_interface(); 01555 01556 try { 01557 d_http_cache_table->uncouple_entry_from_data(body); 01558 } 01559 catch (...) { 01560 unlock_cache_interface(); 01561 throw; 01562 } 01563 01564 unlock_cache_interface(); 01565 } 01566 01579 void 01580 HTTPCache::purge_cache() 01581 { 01582 lock_cache_interface(); 01583 01584 try { 01585 if (d_http_cache_table->is_locked_read_responses()) 01586 throw Error("Attempt to purge the cache with entries in use."); 01587 01588 d_http_cache_table->delete_all_entries(); 01589 } 01590 catch (...) { 01591 unlock_cache_interface(); 01592 throw; 01593 } 01594 01595 unlock_cache_interface(); 01596 } 01597 01598 } // namespace libdap