libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 #include "config.h" 00027 00028 //#define DODS_DEBUG 00029 //#define DODS_DEBUG2 00030 00031 #include <pthread.h> 00032 #include <limits.h> 00033 #include <unistd.h> // for stat 00034 #include <sys/types.h> // for stat and mkdir 00035 #include <sys/stat.h> 00036 00037 #include <cstring> 00038 #include <iostream> 00039 #include <sstream> 00040 #include <algorithm> 00041 #include <iterator> 00042 #include <set> 00043 00044 #include "Error.h" 00045 #include "InternalErr.h" 00046 #include "ResponseTooBigErr.h" 00047 #ifndef WIN32 00048 #include "SignalHandler.h" 00049 #endif 00050 #include "HTTPCacheInterruptHandler.h" 00051 #include "HTTPCacheTable.h" 00052 #include "HTTPCache.h" 00053 00054 #include "util_mit.h" 00055 #include "debug.h" 00056 00057 using namespace std; 00058 00059 namespace libdap { 00060 00061 HTTPCache *HTTPCache::_instance = 0; 00062 00063 // instance_mutex is used to ensure that only one instance is created. 00064 // That is, it protects the body of the HTTPCache::instance() method. This 00065 // mutex is initialized from within the static function once_init_routine() 00066 // and the call to that takes place using pthread_once_init() where the mutex 00067 // once_block is used to protect that call. All of this ensures that no matter 00068 // how many threads call the instance() method, only one instance is ever 00069 // made. 00070 static pthread_mutex_t instance_mutex; 00071 static pthread_once_t once_block = PTHREAD_ONCE_INIT; 00072 00073 #ifdef WIN32 00074 #include <direct.h> 00075 #include <time.h> 00076 #include <fcntl.h> 00077 #define MKDIR(a,b) _mkdir((a)) 00078 #define UMASK(a) _umask((a)) 00079 #define REMOVE(a) remove((a)) 00080 #define MKSTEMP(a) _open(_mktemp((a)),_O_CREAT,_S_IREAD|_S_IWRITE) 00081 #define DIR_SEPARATOR_CHAR '\\' 00082 #define DIR_SEPARATOR_STR "\\" 00083 #else 00084 #define MKDIR(a,b) mkdir((a), (b)) 00085 #define UMASK(a) umask((a)) 00086 #define REMOVE(a) remove((a)) 00087 #define MKSTEMP(a) mkstemp((a)) 00088 #define DIR_SEPARATOR_CHAR '/' 00089 #define DIR_SEPARATOR_STR "/" 00090 #endif 00091 00092 #ifdef WIN32 00093 #define CACHE_LOC "\\tmp\\" 00094 #define CACHE_ROOT "dods-cache\\" 00095 #else 00096 #define CACHE_LOC "/tmp/" 00097 #define CACHE_ROOT "dods-cache/" 00098 #endif 00099 #define CACHE_INDEX ".index" 00100 #define CACHE_LOCK ".lock" 00101 #define CACHE_META ".meta" 00102 //#define CACHE_EMPTY_ETAG "@cache@" 00103 00104 #define NO_LM_EXPIRATION 24*3600 // 24 hours 00105 00106 #define DUMP_FREQUENCY 10 // Dump index every x loads 00107 00108 #define MEGA 0x100000L 00109 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M 00110 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc. 00111 #define CACHE_GC_PCT 10 // 10% of cache size free after GC 00112 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size 00113 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry 00114 00115 static void 00116 once_init_routine() 00117 { 00118 int status; 00119 status = INIT(&instance_mutex); 00120 00121 if (status != 0) 00122 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00123 } 00124 00153 HTTPCache * 00154 HTTPCache::instance(const string &cache_root, bool force) 00155 { 00156 LOCK(&instance_mutex); 00157 DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")" 00158 << "... "); 00159 00160 try { 00161 if (!_instance) { 00162 _instance = new HTTPCache(cache_root, force); 00163 00164 DBG(cerr << "New instance: " << _instance << ", cache root: " 00165 << _instance->d_cache_root << endl); 00166 00167 atexit(delete_instance); 00168 00169 #ifndef WIN32 00170 // Register the interrupt handler. If we've already registered 00171 // one, barf. If this becomes a problem, hack SignalHandler so 00172 // that we can chain these handlers... 02/10/04 jhrg 00173 // 00174 // Technically we're leaking memory here. However, since this 00175 // class is a singleton, we know that only three objects will 00176 // ever be created and they will all exist until the process 00177 // exits. We can let this slide... 02/12/04 jhrg 00178 EventHandler *old_eh = SignalHandler::instance()->register_handler 00179 (SIGINT, new HTTPCacheInterruptHandler); 00180 if (old_eh) { 00181 SignalHandler::instance()->register_handler(SIGINT, old_eh); 00182 throw SignalHandlerRegisteredErr( 00183 "Could not register event handler for SIGINT without superseding an existing one."); 00184 } 00185 00186 old_eh = SignalHandler::instance()->register_handler 00187 (SIGPIPE, new HTTPCacheInterruptHandler); 00188 if (old_eh) { 00189 SignalHandler::instance()->register_handler(SIGPIPE, old_eh); 00190 throw SignalHandlerRegisteredErr( 00191 "Could not register event handler for SIGPIPE without superseding an existing one."); 00192 } 00193 00194 old_eh = SignalHandler::instance()->register_handler 00195 (SIGTERM, new HTTPCacheInterruptHandler); 00196 if (old_eh) { 00197 SignalHandler::instance()->register_handler(SIGTERM, old_eh); 00198 throw SignalHandlerRegisteredErr( 00199 "Could not register event handler for SIGTERM without superseding an existing one."); 00200 } 00201 #endif 00202 } 00203 } 00204 catch (...) { 00205 DBG2(cerr << "The constructor threw an Error!" << endl); 00206 UNLOCK(&instance_mutex); 00207 throw; 00208 } 00209 00210 UNLOCK(&instance_mutex); 00211 DBGN(cerr << "returning " << hex << _instance << dec << endl); 00212 00213 return _instance; 00214 } 00215 00219 void 00220 HTTPCache::delete_instance() 00221 { 00222 DBG(cerr << "Entering delete_instance()..." << endl); 00223 if (HTTPCache::_instance) { 00224 DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl); 00225 delete HTTPCache::_instance; 00226 HTTPCache::_instance = 0; 00227 } 00228 00229 DBG(cerr << "Exiting delete_instance()" << endl); 00230 } 00231 00246 HTTPCache::HTTPCache(string cache_root, bool force) : 00247 d_locked_open_file(0), 00248 d_cache_enabled(false), 00249 d_cache_protected(false), 00250 d_expire_ignored(false), 00251 d_always_validate(false), 00252 d_total_size(CACHE_TOTAL_SIZE * MEGA), 00253 d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT), 00254 d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT), 00255 d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA), 00256 d_default_expiration(NO_LM_EXPIRATION), 00257 d_max_age(-1), 00258 d_max_stale(-1), 00259 d_min_fresh(-1), 00260 d_http_cache_table(0) 00261 { 00262 DBG(cerr << "Entering the constructor for " << this << "... "); 00263 00264 int status = pthread_once(&once_block, once_init_routine); 00265 if (status != 0) 00266 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting."); 00267 00268 INIT(&d_cache_mutex); 00269 00270 // This used to throw an Error object if we could not get the 00271 // single user lock. However, that results in an invalid object. It's 00272 // better to have an instance that has default values. If we cannot get 00273 // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg 00274 // 00275 // I fixed this block so that the cache root is set before we try to get 00276 // the single user lock. That was the fix for bug #661. To make that 00277 // work, I had to move the call to create_cache_root out of 00278 // set_cache_root(). 09/08/03 jhrg 00279 00280 set_cache_root(cache_root); 00281 int block_size; 00282 00283 if (!get_single_user_lock(force)) 00284 throw Error("Could not get single user lock for the cache"); 00285 00286 #ifdef WIN32 00287 // Windows is unable to provide us this information. 4096 appears 00288 // a best guess. It is likely to be in the range [2048, 8192] on 00289 // windows, but will the level of truth of that statement vary over 00290 // time ? 00291 block_size = 4096; 00292 #else 00293 struct stat s; 00294 if (stat(cache_root.c_str(), &s) == 0) 00295 block_size = s.st_blksize; 00296 else 00297 throw Error("Could not set file system block size."); 00298 #endif 00299 d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size); 00300 d_cache_enabled = true; 00301 00302 DBGN(cerr << "exiting" << endl); 00303 } 00304 00317 HTTPCache::~HTTPCache() 00318 { 00319 DBG(cerr << "Entering the destructor for " << this << "... "); 00320 00321 try { 00322 if (startGC()) 00323 perform_garbage_collection(); 00324 00325 d_http_cache_table->cache_index_write(); 00326 } 00327 catch (Error &e) { 00328 // If the cache index cannot be written, we've got problems. However, 00329 // unless we're debugging, still free up the cache table in memory. 00330 // How should we let users know they cache index is not being 00331 // written?? 10/03/02 jhrg 00332 DBG(cerr << e.get_error_message() << endl); 00333 } 00334 00335 delete d_http_cache_table; 00336 00337 release_single_user_lock(); 00338 00339 DBGN(cerr << "exiting destructor." << endl); 00340 DESTROY(&d_cache_mutex); 00341 } 00342 00343 00347 00351 bool 00352 HTTPCache::stopGC() const 00353 { 00354 return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer); 00355 } 00356 00363 bool 00364 HTTPCache::startGC() const 00365 { 00366 DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl); 00367 return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size); 00368 } 00369 00384 void 00385 HTTPCache::perform_garbage_collection() 00386 { 00387 DBG(cerr << "Performing garbage collection" << endl); 00388 00389 // Remove all the expired responses. 00390 expired_gc(); 00391 00392 // Remove entries larger than max_entry_size. 00393 too_big_gc(); 00394 00395 // Remove entries starting with zero hits, 1, ..., until stopGC() 00396 // returns true. 00397 hits_gc(); 00398 } 00399 00405 void 00406 HTTPCache::expired_gc() 00407 { 00408 if (!d_expire_ignored) { 00409 d_http_cache_table->delete_expired_entries(); 00410 } 00411 } 00412 00429 void 00430 HTTPCache::hits_gc() 00431 { 00432 int hits = 0; 00433 00434 if (startGC()) { 00435 while (!stopGC()) { 00436 d_http_cache_table->delete_by_hits(hits); 00437 hits++; 00438 } 00439 } 00440 } 00441 00446 void HTTPCache::too_big_gc() { 00447 if (startGC()) 00448 d_http_cache_table->delete_by_size(d_max_entry_size); 00449 } 00450 00452 00463 bool HTTPCache::get_single_user_lock(bool force) { 00464 if (!d_locked_open_file) { 00465 FILE * fp = NULL; 00466 00467 try { 00468 // It's OK to call create_cache_root if the directory already 00469 // exists. 00470 create_cache_root(d_cache_root); 00471 } 00472 catch (Error &e) { 00473 // We need to catch and return false because this method is 00474 // called from a ctor and throwing at this point will result in a 00475 // partially constructed object. 01/22/04 jhrg 00476 return false; 00477 } 00478 00479 // Try to read the lock file. If we can open for reading, it exists. 00480 string lock = d_cache_root + CACHE_LOCK; 00481 if ((fp = fopen(lock.c_str(), "r")) != NULL) { 00482 int res = fclose(fp); 00483 if (res) { 00484 DBG(cerr << "Failed to close " << (void *)fp << endl); 00485 } 00486 if (force) 00487 REMOVE(lock.c_str()); 00488 else 00489 return false; 00490 } 00491 00492 if ((fp = fopen(lock.c_str(), "w")) == NULL) 00493 return false; 00494 00495 d_locked_open_file = fp; 00496 return true; 00497 } 00498 00499 return false; 00500 } 00501 00504 void 00505 HTTPCache::release_single_user_lock() 00506 { 00507 if (d_locked_open_file) { 00508 int res = fclose(d_locked_open_file); 00509 if (res) { 00510 DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ; 00511 } 00512 d_locked_open_file = 0; 00513 } 00514 00515 string lock = d_cache_root + CACHE_LOCK; 00516 REMOVE(lock.c_str()); 00517 } 00518 00521 00525 string 00526 HTTPCache::get_cache_root() const 00527 { 00528 return d_cache_root; 00529 } 00530 00531 00540 void 00541 HTTPCache::create_cache_root(const string &cache_root) 00542 { 00543 struct stat stat_info; 00544 string::size_type cur = 0; 00545 00546 #ifdef WIN32 00547 cur = cache_root[1] == ':' ? 3 : 1; 00548 #else 00549 cur = 1; 00550 #endif 00551 while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) { 00552 string dir = cache_root.substr(0, cur); 00553 if (stat(dir.c_str(), &stat_info) == -1) { 00554 DBG2(cerr << "Cache....... Creating " << dir << endl); 00555 mode_t mask = UMASK(0); 00556 if (MKDIR(dir.c_str(), 0777) < 0) { 00557 DBG2(cerr << "Error: can't create." << endl); 00558 UMASK(mask); 00559 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string(".")); 00560 } 00561 UMASK(mask); 00562 } 00563 else { 00564 DBG2(cerr << "Cache....... Found " << dir << endl); 00565 } 00566 cur++; 00567 } 00568 } 00569 00584 void 00585 HTTPCache::set_cache_root(const string &root) 00586 { 00587 if (root != "") { 00588 d_cache_root = root; 00589 // cache root should end in /. 00590 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00591 d_cache_root += DIR_SEPARATOR_CHAR; 00592 } 00593 else { 00594 // If no cache root has been indicated then look for a suitable 00595 // location. 00596 char * cr = (char *) getenv("DODS_CACHE"); 00597 if (!cr) cr = (char *) getenv("TMP"); 00598 if (!cr) cr = (char *) getenv("TEMP"); 00599 if (!cr) cr = CACHE_LOC; 00600 00601 d_cache_root = cr; 00602 if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR) 00603 d_cache_root += DIR_SEPARATOR_CHAR; 00604 00605 d_cache_root += CACHE_ROOT; 00606 } 00607 00608 // Test d_hhtp_cache_table because this method can be called before that 00609 // instance is created and also can be called later to cahnge the cache 00610 // root. jhrg 05.14.08 00611 if (d_http_cache_table) 00612 d_http_cache_table->set_cache_root(d_cache_root); 00613 } 00614 00626 void 00627 HTTPCache::set_cache_enabled(bool mode) 00628 { 00629 lock_cache_interface(); 00630 00631 d_cache_enabled = mode; 00632 00633 unlock_cache_interface(); 00634 } 00635 00638 bool 00639 HTTPCache::is_cache_enabled() const 00640 { 00641 DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")" 00642 << endl); 00643 return d_cache_enabled; 00644 } 00645 00656 void 00657 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode) 00658 { 00659 lock_cache_interface(); 00660 00661 d_cache_disconnected = mode; 00662 00663 unlock_cache_interface(); 00664 } 00665 00668 CacheDisconnectedMode 00669 HTTPCache::get_cache_disconnected() const 00670 { 00671 return d_cache_disconnected; 00672 } 00673 00682 void 00683 HTTPCache::set_expire_ignored(bool mode) 00684 { 00685 lock_cache_interface(); 00686 00687 d_expire_ignored = mode; 00688 00689 unlock_cache_interface(); 00690 } 00691 00692 /* Is the cache ignoring Expires headers returned with responses that have 00693 been cached? */ 00694 00695 bool 00696 HTTPCache::is_expire_ignored() const 00697 { 00698 return d_expire_ignored; 00699 } 00700 00716 void 00717 HTTPCache::set_max_size(unsigned long size) 00718 { 00719 lock_cache_interface(); 00720 00721 try { 00722 unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ? 00723 MIN_CACHE_TOTAL_SIZE * MEGA : 00724 (size > ULONG_MAX ? ULONG_MAX : size * MEGA); 00725 unsigned long old_size = d_total_size; 00726 d_total_size = new_size; 00727 d_folder_size = d_total_size / CACHE_FOLDER_PCT; 00728 d_gc_buffer = d_total_size / CACHE_GC_PCT; 00729 00730 if (new_size < old_size && startGC()) { 00731 perform_garbage_collection(); 00732 d_http_cache_table->cache_index_write(); 00733 } 00734 } 00735 catch (...) { 00736 unlock_cache_interface(); 00737 DBGN(cerr << "Unlocking interface." << endl); 00738 throw; 00739 } 00740 00741 DBG2(cerr << "Cache....... Total cache size: " << d_total_size 00742 << " with " << d_folder_size 00743 << " bytes for meta information and folders and at least " 00744 << d_gc_buffer << " bytes free after every gc" << endl); 00745 00746 unlock_cache_interface(); 00747 } 00748 00751 unsigned long 00752 HTTPCache::get_max_size() const 00753 { 00754 return d_total_size / MEGA; 00755 } 00756 00765 void 00766 HTTPCache::set_max_entry_size(unsigned long size) 00767 { 00768 lock_cache_interface(); 00769 00770 try { 00771 unsigned long new_size = size * MEGA; 00772 if (new_size > 0 && new_size < d_total_size - d_folder_size) { 00773 unsigned long old_size = d_max_entry_size; 00774 d_max_entry_size = new_size; 00775 if (new_size < old_size && startGC()) { 00776 perform_garbage_collection(); 00777 d_http_cache_table->cache_index_write(); 00778 } 00779 } 00780 } 00781 catch (...) { 00782 unlock_cache_interface(); 00783 throw; 00784 } 00785 00786 DBG2(cerr << "Cache...... Max entry cache size is " 00787 << d_max_entry_size << endl); 00788 00789 unlock_cache_interface(); 00790 } 00791 00796 unsigned long 00797 HTTPCache::get_max_entry_size() const 00798 { 00799 return d_max_entry_size / MEGA; 00800 } 00801 00812 void 00813 HTTPCache::set_default_expiration(const int exp_time) 00814 { 00815 lock_cache_interface(); 00816 00817 d_default_expiration = exp_time; 00818 00819 unlock_cache_interface(); 00820 } 00821 00824 int 00825 HTTPCache::get_default_expiration() const 00826 { 00827 return d_default_expiration; 00828 } 00829 00834 void 00835 HTTPCache::set_always_validate(bool validate) 00836 { 00837 d_always_validate = validate; 00838 } 00839 00843 bool 00844 HTTPCache::get_always_validate() const 00845 { 00846 return d_always_validate; 00847 } 00848 00865 void 00866 HTTPCache::set_cache_control(const vector<string> &cc) 00867 { 00868 lock_cache_interface(); 00869 00870 try { 00871 d_cache_control = cc; 00872 00873 vector<string>::const_iterator i; 00874 for (i = cc.begin(); i != cc.end(); ++i) { 00875 string header = (*i).substr(0, (*i).find(':')); 00876 string value = (*i).substr((*i).find(": ") + 2); 00877 if (header != "Cache-Control") { 00878 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found."); 00879 } 00880 else { 00881 if (value == "no-cache" || value == "no-store") 00882 d_cache_enabled = false; 00883 else if (value.find("max-age") != string::npos) { 00884 string max_age = value.substr(value.find("=" + 1)); 00885 d_max_age = parse_time(max_age.c_str()); 00886 } 00887 else if (value == "max-stale") 00888 d_max_stale = 0; // indicates will take anything; 00889 else if (value.find("max-stale") != string::npos) { 00890 string max_stale = value.substr(value.find("=" + 1)); 00891 d_max_stale = parse_time(max_stale.c_str()); 00892 } 00893 else if (value.find("min-fresh") != string::npos) { 00894 string min_fresh = value.substr(value.find("=" + 1)); 00895 d_min_fresh = parse_time(min_fresh.c_str()); 00896 } 00897 } 00898 } 00899 } 00900 catch (...) { 00901 unlock_cache_interface(); 00902 throw; 00903 } 00904 00905 unlock_cache_interface(); 00906 } 00907 00908 00913 vector<string> 00914 HTTPCache::get_cache_control() 00915 { 00916 return d_cache_control; 00917 } 00918 00920 00929 bool 00930 HTTPCache::is_url_in_cache(const string &url) 00931 { 00932 DBG(cerr << "Is this url in the cache? (" << url << ")" << endl); 00933 00934 HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 00935 bool status = entry != 0; 00936 if (entry) { 00937 #if 0 00938 entry->unlock(); 00939 #endif 00940 entry->unlock_read_response(); 00941 } 00942 return status; 00943 } 00944 00950 bool 00951 is_hop_by_hop_header(const string &header) 00952 { 00953 return header.find("Connection") != string::npos 00954 || header.find("Keep-Alive") != string::npos 00955 || header.find("Proxy-Authenticate") != string::npos 00956 || header.find("Proxy-Authorization") != string::npos 00957 || header.find("Transfer-Encoding") != string::npos 00958 || header.find("Upgrade") != string::npos; 00959 } 00960 00972 void 00973 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers) 00974 { 00975 string fname = cachename + CACHE_META; 00976 d_open_files.push_back(fname); 00977 00978 FILE *dest = fopen(fname.c_str(), "w"); 00979 if (!dest) { 00980 throw InternalErr(__FILE__, __LINE__, 00981 "Could not open named cache entry file."); 00982 } 00983 00984 vector<string>::const_iterator i; 00985 for (i = headers.begin(); i != headers.end(); ++i) { 00986 if (!is_hop_by_hop_header(*i)) { 00987 fwrite((*i).c_str(), (*i).size(), 1, dest); 00988 fwrite("\n", 1, 1, dest); 00989 } 00990 } 00991 00992 int res = fclose(dest); 00993 if (res) { 00994 DBG(cerr << "HTTPCache::write_metadata - Failed to close " 00995 << dest << endl); 00996 } 00997 00998 d_open_files.pop_back(); 00999 } 01000 01011 void 01012 HTTPCache::read_metadata(const string &cachename, vector<string> &headers) 01013 { 01014 FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r"); 01015 if (!md) { 01016 throw InternalErr(__FILE__, __LINE__, 01017 "Could not open named cache entry meta data file."); 01018 } 01019 01020 char line[1024]; 01021 while (!feof(md) && fgets(line, 1024, md)) { 01022 line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline 01023 headers.push_back(string(line)); 01024 } 01025 01026 int res = fclose(md); 01027 if (res) { 01028 DBG(cerr << "HTTPCache::read_metadata - Failed to close " 01029 << md << endl); 01030 } 01031 } 01032 01054 int 01055 HTTPCache::write_body(const string &cachename, const FILE *src) 01056 { 01057 d_open_files.push_back(cachename); 01058 01059 FILE *dest = fopen(cachename.c_str(), "wb"); 01060 if (!dest) { 01061 throw InternalErr(__FILE__, __LINE__, 01062 "Could not open named cache entry file."); 01063 } 01064 01065 // Read and write in 1k blocks; an attempt at doing this efficiently. 01066 // 09/30/02 jhrg 01067 char line[1024]; 01068 size_t n; 01069 int total = 0; 01070 while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) { 01071 total += fwrite(line, 1, n, dest); 01072 DBG2(sleep(3)); 01073 } 01074 01075 if (ferror(const_cast<FILE *>(src)) || ferror(dest)) { 01076 int res = fclose(dest); 01077 res = res & unlink(cachename.c_str()); 01078 if (res) { 01079 DBG(cerr << "HTTPCache::write_body - Failed to close/unlink " 01080 << dest << endl); 01081 } 01082 throw InternalErr(__FILE__, __LINE__, 01083 "I/O error transferring data to the cache."); 01084 } 01085 01086 rewind(const_cast<FILE *>(src)); 01087 01088 int res = fclose(dest); 01089 if (res) { 01090 DBG(cerr << "HTTPCache::write_body - Failed to close " 01091 << dest << endl); 01092 } 01093 01094 d_open_files.pop_back(); 01095 01096 return total; 01097 } 01098 01107 FILE * 01108 HTTPCache::open_body(const string &cachename) 01109 { 01110 FILE *src = fopen(cachename.c_str(), "rb"); // Read only 01111 if (!src) 01112 throw InternalErr(__FILE__, __LINE__, "Could not open cache file."); 01113 01114 return src; 01115 } 01116 01142 bool 01143 HTTPCache::cache_response(const string &url, time_t request_time, 01144 const vector<string> &headers, const FILE *body) 01145 { 01146 lock_cache_interface(); 01147 01148 DBG(cerr << "Caching url: " << url << "." << endl); 01149 01150 try { 01151 // If this is not an http or https URL, don't cache. 01152 if (url.find("http:") == string::npos && 01153 url.find("https:") == string::npos) { 01154 unlock_cache_interface(); 01155 return false; 01156 } 01157 01158 // This does nothing if url is not already in the cache. It's 01159 // more efficient to do this than to first check and see if the entry 01160 // exists. 10/10/02 jhrg 01161 d_http_cache_table->remove_entry_from_cache_table(url); 01162 01163 HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url); 01164 entry->lock_write_response(); 01165 01166 try { 01167 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age. 01168 if (entry->is_no_cache()) { 01169 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry 01170 << "(" << url << ")" << endl); 01171 entry->unlock_write_response(); 01172 delete entry; entry = 0; 01173 unlock_cache_interface(); 01174 return false; 01175 } 01176 01177 // corrected_initial_age, freshness_lifetime, response_time. 01178 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01179 01180 d_http_cache_table->create_location(entry); // cachename, cache_body_fd 01181 // move these write function to cache table 01182 entry->set_size(write_body(entry->get_cachename(), body)); 01183 write_metadata(entry->get_cachename(), headers); 01184 d_http_cache_table->add_entry_to_cache_table(entry); 01185 entry->unlock_write_response(); 01186 } 01187 catch (ResponseTooBigErr &e) { 01188 // Oops. Bummer. Clean up and exit. 01189 DBG(cerr << e.get_error_message() << endl); 01190 REMOVE(entry->get_cachename().c_str()); 01191 REMOVE(string(entry->get_cachename() + CACHE_META).c_str()); 01192 DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url 01193 << ")" << endl); 01194 entry->unlock_write_response(); 01195 delete entry; entry = 0; 01196 unlock_cache_interface(); 01197 return false; 01198 } 01199 01200 if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) { 01201 if (startGC()) 01202 perform_garbage_collection(); 01203 01204 d_http_cache_table->cache_index_write(); // resets new_entries 01205 } 01206 } 01207 catch (...) { 01208 unlock_cache_interface(); 01209 throw; 01210 } 01211 01212 unlock_cache_interface(); 01213 01214 return true; 01215 } 01216 01235 vector<string> 01236 HTTPCache::get_conditional_request_headers(const string &url) 01237 { 01238 lock_cache_interface(); 01239 01240 HTTPCacheTable::CacheEntry *entry = 0; 01241 vector<string> headers; 01242 01243 DBG(cerr << "Getting conditional request headers for " << url << endl); 01244 01245 try { 01246 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01247 if (!entry) 01248 throw Error("There is no cache entry for the URL: " + url); 01249 01250 if (entry->get_etag() != "") 01251 headers.push_back(string("If-None-Match: ") + entry->get_etag()); 01252 01253 if (entry->get_lm() > 0) { 01254 time_t lm = entry->get_lm(); 01255 headers.push_back(string("If-Modified-Since: ") 01256 + date_time_str(&lm)); 01257 } 01258 else if (entry->get_max_age() > 0) { 01259 time_t max_age = entry->get_max_age(); 01260 headers.push_back(string("If-Modified-Since: ") 01261 + date_time_str(&max_age)); 01262 } 01263 else if (entry->get_expires() > 0) { 01264 time_t expires = entry->get_expires(); 01265 headers.push_back(string("If-Modified-Since: ") 01266 + date_time_str(&expires)); 01267 } 01268 #if 0 01269 entry->unlock(); 01270 #endif 01271 entry->unlock_read_response(); 01272 unlock_cache_interface(); 01273 } 01274 catch (...) { 01275 unlock_cache_interface(); 01276 if (entry) { 01277 #if 0 01278 entry->unlock(); 01279 #endif 01280 entry->unlock_read_response(); 01281 } 01282 throw; 01283 } 01284 01285 return headers; 01286 } 01287 01291 struct HeaderLess: binary_function<const string&, const string&, bool> 01292 { 01293 bool operator()(const string &s1, const string &s2) const { 01294 return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':')); 01295 } 01296 }; 01297 01311 void 01312 HTTPCache::update_response(const string &url, time_t request_time, 01313 const vector<string> &headers) 01314 { 01315 lock_cache_interface(); 01316 01317 HTTPCacheTable::CacheEntry *entry = 0; 01318 DBG(cerr << "Updating the response headers for: " << url << endl); 01319 01320 try { 01321 entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url); 01322 if (!entry) 01323 throw Error("There is no cache entry for the URL: " + url); 01324 01325 // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object. 01326 d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); 01327 01328 // Update corrected_initial_age, freshness_lifetime, response_time. 01329 d_http_cache_table->calculate_time(entry, d_default_expiration, request_time); 01330 01331 // Merge the new headers with those in the persistent store. How: 01332 // Load the new headers into a set, then merge the old headers. Since 01333 // set<> ignores duplicates, old headers with the same name as a new 01334 // header will got into the bit bucket. Define a special compare 01335 // functor to make sure that headers are compared using only their 01336 // name and not their value too. 01337 set<string, HeaderLess> merged_headers; 01338 01339 // Load in the new headers 01340 copy(headers.begin(), headers.end(), 01341 inserter(merged_headers, merged_headers.begin())); 01342 01343 // Get the old headers and load them in. 01344 vector<string> old_headers; 01345 read_metadata(entry->get_cachename(), old_headers); 01346 copy(old_headers.begin(), old_headers.end(), 01347 inserter(merged_headers, merged_headers.begin())); 01348 01349 // Read the values back out. Use reverse iterators with back_inserter 01350 // to preserve header order. NB: vector<> does not support push_front 01351 // so we can't use front_inserter(). 01/09/03 jhrg 01352 vector<string> result; 01353 copy(merged_headers.rbegin(), merged_headers.rend(), 01354 back_inserter(result)); 01355 01356 write_metadata(entry->get_cachename(), result); 01357 #if 0 01358 entry->unlock(); 01359 #endif 01360 entry->unlock_write_response(); 01361 unlock_cache_interface(); 01362 } 01363 catch (...) { 01364 if (entry) { 01365 #if 0 01366 entry->unlock(); 01367 #endif 01368 entry->unlock_read_response(); 01369 } 01370 unlock_cache_interface(); 01371 throw; 01372 } 01373 } 01374 01386 bool 01387 HTTPCache::is_url_valid(const string &url) 01388 { 01389 lock_cache_interface(); 01390 01391 bool freshness; 01392 HTTPCacheTable::CacheEntry *entry = 0; 01393 01394 DBG(cerr << "Is this URL valid? (" << url << ")" << endl); 01395 01396 try { 01397 if (d_always_validate) { 01398 unlock_cache_interface(); 01399 return false; // force re-validation. 01400 } 01401 01402 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01403 if (!entry) 01404 throw Error("There is no cache entry for the URL: " + url); 01405 01406 // If we supported range requests, we'd need code here to check if 01407 // there was only a partial response in the cache. 10/02/02 jhrg 01408 01409 // In case this entry is of type "must-revalidate" then we consider it 01410 // invalid. 01411 if (entry->get_must_revalidate()) { 01412 #if 0 01413 entry->unlock(); 01414 #endif 01415 entry->unlock_read_response(); 01416 unlock_cache_interface(); 01417 return false; 01418 } 01419 01420 time_t resident_time = time(NULL) - entry->get_response_time(); 01421 time_t current_age = entry->get_corrected_initial_age() + resident_time; 01422 01423 // Check that the max-age, max-stale, and min-fresh directives 01424 // given in the request cache control header is followed. 01425 if (d_max_age >= 0 && current_age > d_max_age) { 01426 DBG(cerr << "Cache....... Max-age validation" << endl); 01427 #if 0 01428 entry->unlock(); 01429 #endif 01430 entry->unlock_read_response(); 01431 unlock_cache_interface(); 01432 return false; 01433 } 01434 if (d_min_fresh >= 0 01435 && entry->get_freshness_lifetime() < current_age + d_min_fresh) { 01436 DBG(cerr << "Cache....... Min-fresh validation" << endl); 01437 #if 0 01438 entry->unlock(); 01439 #endif 01440 entry->unlock_read_response(); 01441 unlock_cache_interface(); 01442 return false; 01443 } 01444 01445 freshness = (entry->get_freshness_lifetime() 01446 + (d_max_stale >= 0 ? d_max_stale : 0) > current_age); 01447 #if 0 01448 entry->unlock(); 01449 #endif 01450 entry->unlock_read_response(); 01451 unlock_cache_interface(); 01452 } 01453 catch (...) { 01454 if (entry) { 01455 #if 0 01456 entry->unlock(); 01457 #endif 01458 entry->unlock_read_response(); 01459 } 01460 unlock_cache_interface(); 01461 throw; 01462 } 01463 01464 return freshness; 01465 } 01466 01494 FILE * HTTPCache::get_cached_response(const string &url, 01495 vector<string> &headers, string &cacheName) { 01496 lock_cache_interface(); 01497 01498 FILE *body; 01499 HTTPCacheTable::CacheEntry *entry = 0; 01500 01501 DBG(cerr << "Getting the cached response for " << url << endl); 01502 01503 try { 01504 entry = d_http_cache_table->get_locked_entry_from_cache_table(url); 01505 if (!entry) { 01506 unlock_cache_interface(); 01507 return 0; 01508 } 01509 01510 cacheName = entry->get_cachename(); 01511 read_metadata(entry->get_cachename(), headers); 01512 01513 DBG(cerr << "Headers just read from cache: " << endl); 01514 DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n"))); 01515 01516 body = open_body(entry->get_cachename()); 01517 01518 DBG(cerr << "Returning: " << url << " from the cache." << endl); 01519 01520 d_http_cache_table->bind_entry_to_data(entry, body); 01521 } 01522 catch (...) { 01523 if (entry) 01524 #if 0 01525 entry->unlock(); 01526 #endif 01527 unlock_cache_interface(); 01528 throw; 01529 } 01530 01531 unlock_cache_interface(); 01532 01533 return body; 01534 } 01546 FILE * 01547 HTTPCache::get_cached_response(const string &url, vector<string> &headers) 01548 { 01549 string discard_name; 01550 return get_cached_response(url, headers, discard_name); 01551 } 01552 01563 FILE * 01564 HTTPCache::get_cached_response(const string &url) 01565 { 01566 string discard_name; 01567 vector<string> discard_headers; 01568 return get_cached_response(url, discard_headers, discard_name); 01569 } 01570 01583 void 01584 HTTPCache::release_cached_response(FILE *body) 01585 { 01586 lock_cache_interface(); 01587 01588 try { 01589 d_http_cache_table->uncouple_entry_from_data(body); 01590 } 01591 catch (...) { 01592 unlock_cache_interface(); 01593 throw; 01594 } 01595 01596 unlock_cache_interface(); 01597 } 01598 01611 void 01612 HTTPCache::purge_cache() 01613 { 01614 lock_cache_interface(); 01615 01616 try { 01617 if (d_http_cache_table->is_locked_read_responses()) 01618 throw Error("Attempt to purge the cache with entries in use."); 01619 01620 d_http_cache_table->delete_all_entries(); 01621 } 01622 catch (...) { 01623 unlock_cache_interface(); 01624 throw; 01625 } 01626 01627 unlock_cache_interface(); 01628 } 01629 01630 } // namespace libdap