libdap++  Updated for version 3.8.2
HTTPCache.cc
Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 #include "config.h"
00027 
00028 // #define DODS_DEBUG
00029 // #define DODS_DEBUG2
00030 #undef USE_GETENV
00031 
00032 #include <pthread.h>
00033 #include <limits.h>
00034 #include <unistd.h>   // for stat
00035 #include <sys/types.h>  // for stat and mkdir
00036 #include <sys/stat.h>
00037 
00038 #include <cstring>
00039 #include <iostream>
00040 #include <sstream>
00041 #include <algorithm>
00042 #include <iterator>
00043 #include <set>
00044 
00045 #include "Error.h"
00046 #include "InternalErr.h"
00047 #include "ResponseTooBigErr.h"
00048 #ifndef WIN32
00049 #include "SignalHandler.h"
00050 #endif
00051 #include "HTTPCacheInterruptHandler.h"
00052 #include "HTTPCacheTable.h"
00053 #include "HTTPCache.h"
00054 #include "HTTPCacheMacros.h"
00055 
00056 #include "util_mit.h"
00057 #include "debug.h"
00058 
00059 using namespace std;
00060 
00061 namespace libdap {
00062 
00063 HTTPCache *HTTPCache::_instance = 0;
00064 
00065 // instance_mutex is used to ensure that only one instance is created.
00066 // That is, it protects the body of the HTTPCache::instance() method. This
00067 // mutex is initialized from within the static function once_init_routine()
00068 // and the call to that takes place using pthread_once_init() where the mutex
00069 // once_block is used to protect that call. All of this ensures that no matter
00070 // how many threads call the instance() method, only one instance is ever
00071 // made.
00072 static pthread_mutex_t instance_mutex;
00073 static pthread_once_t once_block = PTHREAD_ONCE_INIT;
00074 
00075 
00076 #define NO_LM_EXPIRATION 24*3600 // 24 hours
00077 
00078 #define DUMP_FREQUENCY 10 // Dump index every x loads
00079 
00080 #define MEGA 0x100000L
00081 #define CACHE_TOTAL_SIZE 20 // Default cache size is 20M
00082 #define CACHE_FOLDER_PCT 10 // 10% of cache size for metainfo etc.
00083 #define CACHE_GC_PCT 10  // 10% of cache size free after GC
00084 #define MIN_CACHE_TOTAL_SIZE 5 // 5M Min cache size
00085 #define MAX_CACHE_ENTRY_SIZE 3 // 3M Max size of single cached entry
00086 
00087 static void
00088 once_init_routine()
00089 {
00090     int status;
00091     status = INIT(&instance_mutex);
00092 
00093     if (status != 0)
00094         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00095 }
00096 
00125 HTTPCache *
00126 HTTPCache::instance(const string &cache_root, bool force)
00127 {
00128     int status = pthread_once(&once_block, once_init_routine);
00129     if (status != 0)
00130         throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00131 
00132     LOCK(&instance_mutex);
00133 
00134     DBG(cerr << "Entering instance(); (" << hex << _instance << dec << ")"
00135             << "... ");
00136 
00137     try {
00138         if (!_instance) {
00139             _instance = new HTTPCache(cache_root, force);
00140 
00141             DBG(cerr << "New instance: " << _instance << ", cache root: "
00142                 << _instance->d_cache_root << endl);
00143 
00144             atexit(delete_instance);
00145 
00146 #ifndef WIN32
00147             // Register the interrupt handler. If we've already registered
00148             // one, barf. If this becomes a problem, hack SignalHandler so
00149             // that we can chain these handlers... 02/10/04 jhrg
00150             //
00151             // Technically we're leaking memory here. However, since this
00152             // class is a singleton, we know that only three objects will
00153             // ever be created and they will all exist until the process
00154             // exits. We can let this slide... 02/12/04 jhrg
00155             EventHandler *old_eh = SignalHandler::instance()->register_handler
00156                                    (SIGINT, new HTTPCacheInterruptHandler);
00157             if (old_eh) {
00158                 SignalHandler::instance()->register_handler(SIGINT, old_eh);
00159                 throw SignalHandlerRegisteredErr(
00160                     "Could not register event handler for SIGINT without superseding an existing one.");
00161             }
00162 
00163             old_eh = SignalHandler::instance()->register_handler
00164                      (SIGPIPE, new HTTPCacheInterruptHandler);
00165             if (old_eh) {
00166                 SignalHandler::instance()->register_handler(SIGPIPE, old_eh);
00167                 throw SignalHandlerRegisteredErr(
00168                     "Could not register event handler for SIGPIPE without superseding an existing one.");
00169             }
00170 
00171             old_eh = SignalHandler::instance()->register_handler
00172                      (SIGTERM, new HTTPCacheInterruptHandler);
00173             if (old_eh) {
00174                 SignalHandler::instance()->register_handler(SIGTERM, old_eh);
00175                 throw SignalHandlerRegisteredErr(
00176                     "Could not register event handler for SIGTERM without superseding an existing one.");
00177             }
00178 #endif
00179         }
00180     }
00181     catch (...) {
00182         DBG2(cerr << "The constructor threw an Error!" << endl);
00183         UNLOCK(&instance_mutex);
00184         throw;
00185     }
00186 
00187     UNLOCK(&instance_mutex);
00188     DBGN(cerr << "returning " << hex << _instance << dec << endl);
00189 
00190     return _instance;
00191 }
00192 
00196 void
00197 HTTPCache::delete_instance()
00198 {
00199     DBG(cerr << "Entering delete_instance()..." << endl);
00200     if (HTTPCache::_instance) {
00201         DBG(cerr << "Deleting the cache: " << HTTPCache::_instance << endl);
00202         delete HTTPCache::_instance;
00203         HTTPCache::_instance = 0;
00204     }
00205 
00206     DBG(cerr << "Exiting delete_instance()" << endl);
00207 }
00208 
00223 HTTPCache::HTTPCache(string cache_root, bool force) :
00224         d_locked_open_file(0),
00225         d_cache_enabled(false),
00226         d_cache_protected(false),
00227         d_expire_ignored(false),
00228         d_always_validate(false),
00229         d_total_size(CACHE_TOTAL_SIZE * MEGA),
00230         d_folder_size(CACHE_TOTAL_SIZE / CACHE_FOLDER_PCT),
00231         d_gc_buffer(CACHE_TOTAL_SIZE / CACHE_GC_PCT),
00232         d_max_entry_size(MAX_CACHE_ENTRY_SIZE * MEGA),
00233         d_default_expiration(NO_LM_EXPIRATION),
00234         d_max_age(-1),
00235         d_max_stale(-1),
00236         d_min_fresh(-1),
00237         d_http_cache_table(0)
00238 {
00239     DBG(cerr << "Entering the constructor for " << this << "... ");
00240 #if 0
00241         int status = pthread_once(&once_block, once_init_routine);
00242         if (status != 0)
00243                 throw InternalErr(__FILE__, __LINE__, "Could not initialize the HTTP Cache mutex. Exiting.");
00244 #endif
00245         INIT(&d_cache_mutex);
00246 
00247         // This used to throw an Error object if we could not get the
00248         // single user lock. However, that results in an invalid object. It's
00249         // better to have an instance that has default values. If we cannot get
00250         // the lock, make sure to set the cache as *disabled*. 03/12/03 jhrg
00251         //
00252         // I fixed this block so that the cache root is set before we try to get
00253         // the single user lock. That was the fix for bug #661. To make that
00254         // work, I had to move the call to create_cache_root out of
00255         // set_cache_root(). 09/08/03 jhrg
00256 
00257         set_cache_root(cache_root);
00258         int block_size;
00259 
00260         if (!get_single_user_lock(force))
00261             throw Error("Could not get single user lock for the cache");
00262 
00263 #ifdef WIN32
00264         //  Windows is unable to provide us this information.  4096 appears
00265         //  a best guess.  It is likely to be in the range [2048, 8192] on
00266         //  windows, but will the level of truth of that statement vary over
00267         //  time ?
00268         block_size = 4096;
00269 #else
00270         struct stat s;
00271         if (stat(cache_root.c_str(), &s) == 0)
00272                 block_size = s.st_blksize;
00273         else
00274                 throw Error("Could not set file system block size.");
00275 #endif
00276         d_http_cache_table = new HTTPCacheTable(d_cache_root, block_size);
00277         d_cache_enabled = true;
00278 
00279         DBGN(cerr << "exiting" << endl);
00280 }
00281 
00294 HTTPCache::~HTTPCache()
00295 {
00296     DBG(cerr << "Entering the destructor for " << this << "... ");
00297 
00298     try {
00299         if (startGC())
00300             perform_garbage_collection();
00301 
00302         d_http_cache_table->cache_index_write();
00303     }
00304     catch (Error &e) {
00305         // If the cache index cannot be written, we've got problems. However,
00306         // unless we're debugging, still free up the cache table in memory.
00307         // How should we let users know they cache index is not being
00308         // written?? 10/03/02 jhrg
00309         DBG(cerr << e.get_error_message() << endl);
00310     }
00311 
00312     delete d_http_cache_table;
00313 
00314     release_single_user_lock();
00315 
00316     DBGN(cerr << "exiting destructor." << endl);
00317     DESTROY(&d_cache_mutex);
00318 }
00319 
00320 
00324 
00328 bool
00329 HTTPCache::stopGC() const
00330 {
00331     return (d_http_cache_table->get_current_size() + d_folder_size < d_total_size - d_gc_buffer);
00332 }
00333 
00340 bool
00341 HTTPCache::startGC() const
00342 {
00343     DBG(cerr << "startGC, current_size: " << d_http_cache_table->get_current_size() << endl);
00344     return (d_http_cache_table->get_current_size() + d_folder_size > d_total_size);
00345 }
00346 
00361 void
00362 HTTPCache::perform_garbage_collection()
00363 {
00364     DBG(cerr << "Performing garbage collection" << endl);
00365 
00366     // Remove all the expired responses.
00367     expired_gc();
00368 
00369     // Remove entries larger than max_entry_size.
00370     too_big_gc();
00371 
00372     // Remove entries starting with zero hits, 1, ..., until stopGC()
00373     // returns true.
00374     hits_gc();
00375 }
00376 
00382 void
00383 HTTPCache::expired_gc()
00384 {
00385     if (!d_expire_ignored) {
00386         d_http_cache_table->delete_expired_entries();
00387     }
00388 }
00389 
00406 void
00407 HTTPCache::hits_gc()
00408 {
00409     int hits = 0;
00410 
00411     if (startGC()) {
00412                 while (!stopGC()) {
00413                         d_http_cache_table->delete_by_hits(hits);
00414                         hits++;
00415                 }
00416         }
00417 }
00418 
00423 void HTTPCache::too_big_gc() {
00424         if (startGC())
00425                 d_http_cache_table->delete_by_size(d_max_entry_size);
00426 }
00427 
00429 
00440 bool HTTPCache::get_single_user_lock(bool force) 
00441 {
00442     if (!d_locked_open_file) {
00443         FILE * fp = NULL;
00444 
00445         try {
00446             // It's OK to call create_cache_root if the directory already
00447             // exists.
00448             create_cache_root(d_cache_root);
00449         }
00450         catch (Error &e) {
00451             // We need to catch and return false because this method is
00452             // called from a ctor and throwing at this point will result in a
00453             // partially constructed object. 01/22/04 jhrg
00454             DBG(cerr << "Failure to create the cache root" << endl);
00455             return false;
00456         }
00457 
00458         // Try to read the lock file. If we can open for reading, it exists.
00459         string lock = d_cache_root + CACHE_LOCK;
00460         if ((fp = fopen(lock.c_str(), "r")) != NULL) {
00461             int res = fclose(fp);
00462             if (res) {
00463                 DBG(cerr << "Failed to close " << (void *)fp << endl);
00464             }
00465             if (force)
00466                 REMOVE(lock.c_str());
00467             else
00468                 return false;
00469         }
00470 
00471         if ((fp = fopen(lock.c_str(), "w")) == NULL) {
00472             DBG(cerr << "Could not open for write access" << endl);
00473             return false;
00474         }
00475 
00476         d_locked_open_file = fp;
00477         return true;
00478     }
00479 
00480     cerr << "locked_open_file is true" << endl;
00481     return false;
00482 }
00483 
00486 void
00487 HTTPCache::release_single_user_lock()
00488 {
00489     if (d_locked_open_file) {
00490         int res = fclose(d_locked_open_file);
00491         if (res) {
00492             DBG(cerr << "Failed to close " << (void *)d_locked_open_file << endl) ;
00493         }
00494         d_locked_open_file = 0;
00495     }
00496 
00497     string lock = d_cache_root + CACHE_LOCK;
00498     REMOVE(lock.c_str());
00499 }
00500 
00503 
00507 string
00508 HTTPCache::get_cache_root() const
00509 {
00510     return d_cache_root;
00511 }
00512 
00513 
00522 void
00523 HTTPCache::create_cache_root(const string &cache_root)
00524 {
00525     struct stat stat_info;
00526     string::size_type cur = 0;
00527 
00528 #ifdef WIN32
00529     cur = cache_root[1] == ':' ? 3 : 1;
00530     typedef int mode_t;
00531 #else
00532     cur = 1;
00533 #endif
00534     while ((cur = cache_root.find(DIR_SEPARATOR_CHAR, cur)) != string::npos) {
00535         string dir = cache_root.substr(0, cur);
00536         if (stat(dir.c_str(), &stat_info) == -1) {
00537             DBG2(cerr << "Cache....... Creating " << dir << endl);
00538             mode_t mask = UMASK(0);
00539             if (MKDIR(dir.c_str(), 0777) < 0) {
00540                 DBG2(cerr << "Error: can't create." << endl);
00541                 UMASK(mask);
00542                 throw Error(string("Could not create the directory for the cache. Failed when building path at ") + dir + string("."));
00543             }
00544             UMASK(mask);
00545         }
00546         else {
00547             DBG2(cerr << "Cache....... Found " << dir << endl);
00548         }
00549         cur++;
00550     }
00551 }
00552 
00567 void
00568 HTTPCache::set_cache_root(const string &root)
00569 {
00570     if (root != "") {
00571         d_cache_root = root;
00572         // cache root should end in /.
00573         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00574             d_cache_root += DIR_SEPARATOR_CHAR;
00575     }
00576     else {
00577         // If no cache root has been indicated then look for a suitable
00578         // location.
00579 #ifdef USE_GETENV
00580         char * cr = (char *) getenv("DODS_CACHE");
00581         if (!cr) cr = (char *) getenv("TMP");
00582         if (!cr) cr = (char *) getenv("TEMP");
00583         if (!cr) cr = (char*)CACHE_LOCATION;
00584         d_cache_root = cr;
00585 #else
00586         d_cache_root = CACHE_LOCATION;
00587 #endif
00588 
00589         if (d_cache_root[d_cache_root.size()-1] != DIR_SEPARATOR_CHAR)
00590             d_cache_root += DIR_SEPARATOR_CHAR;
00591 
00592         d_cache_root += CACHE_ROOT;
00593     }
00594 
00595     // Test d_hhtp_cache_table because this method can be called before that
00596     // instance is created and also can be called later to cahnge the cache
00597     // root. jhrg 05.14.08
00598     if (d_http_cache_table)
00599         d_http_cache_table->set_cache_root(d_cache_root);
00600 }
00601 
00613 void
00614 HTTPCache::set_cache_enabled(bool mode)
00615 {
00616     lock_cache_interface();
00617 
00618     d_cache_enabled = mode;
00619 
00620     unlock_cache_interface();
00621 }
00622 
00625 bool
00626 HTTPCache::is_cache_enabled() const
00627 {
00628     DBG2(cerr << "In HTTPCache::is_cache_enabled: (" << d_cache_enabled << ")"
00629          << endl);
00630     return d_cache_enabled;
00631 }
00632 
00643 void
00644 HTTPCache::set_cache_disconnected(CacheDisconnectedMode mode)
00645 {
00646     lock_cache_interface();
00647 
00648     d_cache_disconnected = mode;
00649 
00650     unlock_cache_interface();
00651 }
00652 
00655 CacheDisconnectedMode
00656 HTTPCache::get_cache_disconnected() const
00657 {
00658     return d_cache_disconnected;
00659 }
00660 
00669 void
00670 HTTPCache::set_expire_ignored(bool mode)
00671 {
00672     lock_cache_interface();
00673 
00674     d_expire_ignored = mode;
00675 
00676     unlock_cache_interface();
00677 }
00678 
00679 /* Is the cache ignoring Expires headers returned with responses that have
00680    been cached? */
00681 
00682 bool
00683 HTTPCache::is_expire_ignored() const
00684 {
00685     return d_expire_ignored;
00686 }
00687 
00703 void
00704 HTTPCache::set_max_size(unsigned long size)
00705 {
00706     lock_cache_interface();
00707 
00708     try {
00709         unsigned long new_size = size < MIN_CACHE_TOTAL_SIZE ?
00710                                  MIN_CACHE_TOTAL_SIZE * MEGA :
00711                                  (size > ULONG_MAX ? ULONG_MAX : size * MEGA);
00712         unsigned long old_size = d_total_size;
00713         d_total_size = new_size;
00714         d_folder_size = d_total_size / CACHE_FOLDER_PCT;
00715         d_gc_buffer = d_total_size / CACHE_GC_PCT;
00716 
00717         if (new_size < old_size && startGC()) {
00718             perform_garbage_collection();
00719             d_http_cache_table->cache_index_write();
00720         }
00721     }
00722     catch (...) {
00723         unlock_cache_interface();
00724         DBGN(cerr << "Unlocking interface." << endl);
00725         throw;
00726     }
00727 
00728     DBG2(cerr << "Cache....... Total cache size: " << d_total_size
00729          << " with " << d_folder_size
00730          << " bytes for meta information and folders and at least "
00731          << d_gc_buffer << " bytes free after every gc" << endl);
00732 
00733     unlock_cache_interface();
00734 }
00735 
00738 unsigned long
00739 HTTPCache::get_max_size() const
00740 {
00741     return d_total_size / MEGA;
00742 }
00743 
00752 void
00753 HTTPCache::set_max_entry_size(unsigned long size)
00754 {
00755     lock_cache_interface();
00756 
00757     try {
00758         unsigned long new_size = size * MEGA;
00759         if (new_size > 0 && new_size < d_total_size - d_folder_size) {
00760             unsigned long old_size = d_max_entry_size;
00761             d_max_entry_size = new_size;
00762             if (new_size < old_size && startGC()) {
00763                 perform_garbage_collection();
00764                 d_http_cache_table->cache_index_write();
00765             }
00766         }
00767     }
00768     catch (...) {
00769         unlock_cache_interface();
00770         throw;
00771     }
00772 
00773     DBG2(cerr << "Cache...... Max entry cache size is "
00774          << d_max_entry_size << endl);
00775 
00776     unlock_cache_interface();
00777 }
00778 
00783 unsigned long
00784 HTTPCache::get_max_entry_size() const
00785 {
00786     return d_max_entry_size / MEGA;
00787 }
00788 
00799 void
00800 HTTPCache::set_default_expiration(const int exp_time)
00801 {
00802     lock_cache_interface();
00803 
00804     d_default_expiration = exp_time;
00805 
00806     unlock_cache_interface();
00807 }
00808 
00811 int
00812 HTTPCache::get_default_expiration() const
00813 {
00814     return d_default_expiration;
00815 }
00816 
00821 void
00822 HTTPCache::set_always_validate(bool validate)
00823 {
00824     d_always_validate = validate;
00825 }
00826 
00830 bool
00831 HTTPCache::get_always_validate() const
00832 {
00833     return d_always_validate;
00834 }
00835 
00852 void
00853 HTTPCache::set_cache_control(const vector<string> &cc)
00854 {
00855     lock_cache_interface();
00856 
00857     try {
00858         d_cache_control = cc;
00859 
00860         vector<string>::const_iterator i;
00861         for (i = cc.begin(); i != cc.end(); ++i) {
00862             string header = (*i).substr(0, (*i).find(':'));
00863             string value = (*i).substr((*i).find(": ") + 2);
00864             if (header != "Cache-Control") {
00865                 throw InternalErr(__FILE__, __LINE__, "Expected cache control header not found.");
00866             }
00867             else {
00868                 if (value == "no-cache" || value == "no-store")
00869                     d_cache_enabled = false;
00870                 else if (value.find("max-age") != string::npos) {
00871                     string max_age = value.substr(value.find("=" + 1));
00872                     d_max_age = parse_time(max_age.c_str());
00873                 }
00874                 else if (value == "max-stale")
00875                     d_max_stale = 0; // indicates will take anything;
00876                 else if (value.find("max-stale") != string::npos) {
00877                     string max_stale = value.substr(value.find("=" + 1));
00878                     d_max_stale = parse_time(max_stale.c_str());
00879                 }
00880                 else if (value.find("min-fresh") != string::npos) {
00881                     string min_fresh = value.substr(value.find("=" + 1));
00882                     d_min_fresh = parse_time(min_fresh.c_str());
00883                 }
00884             }
00885         }
00886     }
00887     catch (...) {
00888         unlock_cache_interface();
00889         throw;
00890     }
00891 
00892     unlock_cache_interface();
00893 }
00894 
00895 
00900 vector<string>
00901 HTTPCache::get_cache_control()
00902 {
00903     return d_cache_control;
00904 }
00905 
00907 
00916 bool
00917 HTTPCache::is_url_in_cache(const string &url)
00918 {
00919     DBG(cerr << "Is this url in the cache? (" << url << ")" << endl);
00920 
00921     HTTPCacheTable::CacheEntry *entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
00922     bool status = entry != 0;
00923     if (entry) {
00924         entry->unlock_read_response();
00925     }
00926     return  status;
00927 }
00928 
00934 bool
00935 is_hop_by_hop_header(const string &header)
00936 {
00937     return header.find("Connection") != string::npos
00938            || header.find("Keep-Alive") != string::npos
00939            || header.find("Proxy-Authenticate") != string::npos
00940            || header.find("Proxy-Authorization") != string::npos
00941            || header.find("Transfer-Encoding") != string::npos
00942            || header.find("Upgrade") != string::npos;
00943 }
00944 
00956 void
00957 HTTPCache::write_metadata(const string &cachename, const vector<string> &headers)
00958 {
00959     string fname = cachename + CACHE_META;
00960     d_open_files.push_back(fname);
00961 
00962     FILE *dest = fopen(fname.c_str(), "w");
00963     if (!dest) {
00964         throw InternalErr(__FILE__, __LINE__,
00965                           "Could not open named cache entry file.");
00966     }
00967 
00968     vector<string>::const_iterator i;
00969     for (i = headers.begin(); i != headers.end(); ++i) {
00970         if (!is_hop_by_hop_header(*i)) {
00971             int s = fwrite((*i).c_str(), (*i).size(), 1, dest);
00972             if (s != 1) {
00973                 fclose(dest);
00974                 throw InternalErr(__FILE__, __LINE__, "could not write header: '" + (*i) + "' " + long_to_string(s));
00975             }
00976             s = fwrite("\n", 1, 1, dest);
00977             if (s != 1) {
00978                 fclose(dest);
00979                 throw InternalErr(__FILE__, __LINE__, "could not write header: " + long_to_string(s));
00980             }
00981         }
00982     }
00983 
00984     int res = fclose(dest);
00985     if (res) {
00986         DBG(cerr << "HTTPCache::write_metadata - Failed to close "
00987             << dest << endl);
00988     }
00989 
00990     d_open_files.pop_back();
00991 }
00992 
01003 void
01004 HTTPCache::read_metadata(const string &cachename, vector<string> &headers)
01005 {
01006     FILE *md = fopen(string(cachename + CACHE_META).c_str(), "r");
01007     if (!md) {
01008         throw InternalErr(__FILE__, __LINE__,
01009                           "Could not open named cache entry meta data file.");
01010     }
01011 
01012     char line[1024];
01013     while (!feof(md) && fgets(line, 1024, md)) {
01014         line[min(1024, static_cast<int>(strlen(line)))-1] = '\0'; // erase newline
01015         headers.push_back(string(line));
01016     }
01017 
01018     int res = fclose(md);
01019     if (res) {
01020         DBG(cerr << "HTTPCache::read_metadata - Failed to close "
01021             << md << endl);
01022     }
01023 }
01024 
01046 int
01047 HTTPCache::write_body(const string &cachename, const FILE *src)
01048 {
01049     d_open_files.push_back(cachename);
01050 
01051     FILE *dest = fopen(cachename.c_str(), "wb");
01052     if (!dest) {
01053         throw InternalErr(__FILE__, __LINE__,
01054                           "Could not open named cache entry file.");
01055     }
01056 
01057     // Read and write in 1k blocks; an attempt at doing this efficiently.
01058     // 09/30/02 jhrg
01059     char line[1024];
01060     size_t n;
01061     int total = 0;
01062     while ((n = fread(line, 1, 1024, const_cast<FILE *>(src))) > 0) {
01063         total += fwrite(line, 1, n, dest);
01064         DBG2(sleep(3));
01065     }
01066 
01067     if (ferror(const_cast<FILE *>(src)) || ferror(dest)) {
01068         int res = fclose(dest);
01069         res = res & unlink(cachename.c_str());
01070         if (res) {
01071             DBG(cerr << "HTTPCache::write_body - Failed to close/unlink "
01072                 << dest << endl);
01073         }
01074         throw InternalErr(__FILE__, __LINE__,
01075                           "I/O error transferring data to the cache.");
01076     }
01077 
01078     rewind(const_cast<FILE *>(src));
01079 
01080     int res = fclose(dest);
01081     if (res) {
01082         DBG(cerr << "HTTPCache::write_body - Failed to close "
01083             << dest << endl);
01084     }
01085 
01086     d_open_files.pop_back();
01087 
01088     return total;
01089 }
01090 
01099 FILE *
01100 HTTPCache::open_body(const string &cachename)
01101 {
01102     DBG(cerr << "cachename: " << cachename << endl);
01103 
01104     FILE *src = fopen(cachename.c_str(), "rb"); // Read only
01105     if (!src)
01106         throw InternalErr(__FILE__, __LINE__, "Could not open cache file.");
01107 
01108     return src;
01109 }
01110 
01136 bool
01137 HTTPCache::cache_response(const string &url, time_t request_time,
01138                           const vector<string> &headers, const FILE *body)
01139 {
01140     lock_cache_interface();
01141 
01142     DBG(cerr << "Caching url: " << url << "." << endl);
01143 
01144     try {
01145         // If this is not an http or https URL, don't cache.
01146         if (url.find("http:") == string::npos &&
01147             url.find("https:") == string::npos) {
01148             unlock_cache_interface();
01149             return false;
01150         }
01151 
01152         // This does nothing if url is not already in the cache. It's
01153         // more efficient to do this than to first check and see if the entry
01154         // exists. 10/10/02 jhrg
01155         d_http_cache_table->remove_entry_from_cache_table(url);
01156 
01157         HTTPCacheTable::CacheEntry *entry = new HTTPCacheTable::CacheEntry(url);
01158         entry->lock_write_response();
01159 
01160         try {
01161             d_http_cache_table->parse_headers(entry, d_max_entry_size, headers); // etag, lm, date, age, expires, max_age.
01162             if (entry->is_no_cache()) {
01163                 DBG(cerr << "Not cache-able; deleting HTTPCacheTable::CacheEntry: " << entry
01164                     << "(" << url << ")" << endl);
01165                 entry->unlock_write_response();
01166                 delete entry; entry = 0;
01167                 unlock_cache_interface();
01168                 return false;
01169             }
01170 
01171             // corrected_initial_age, freshness_lifetime, response_time.
01172             d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01173 
01174             d_http_cache_table->create_location(entry); // cachename, cache_body_fd
01175             // move these write function to cache table
01176             entry->set_size(write_body(entry->get_cachename(), body));
01177             write_metadata(entry->get_cachename(), headers);
01178             d_http_cache_table->add_entry_to_cache_table(entry);
01179             entry->unlock_write_response();
01180         }
01181         catch (ResponseTooBigErr &e) {
01182             // Oops. Bummer. Clean up and exit.
01183             DBG(cerr << e.get_error_message() << endl);
01184             REMOVE(entry->get_cachename().c_str());
01185             REMOVE(string(entry->get_cachename() + CACHE_META).c_str());
01186             DBG(cerr << "Too big; deleting HTTPCacheTable::CacheEntry: " << entry << "(" << url
01187                 << ")" << endl);
01188             entry->unlock_write_response();
01189             delete entry; entry = 0;
01190             unlock_cache_interface();
01191             return false;
01192         }
01193 
01194         if (d_http_cache_table->get_new_entries() > DUMP_FREQUENCY) {
01195             if (startGC())
01196                 perform_garbage_collection();
01197 
01198             d_http_cache_table->cache_index_write(); // resets new_entries
01199         }
01200     }
01201     catch (...) {
01202         unlock_cache_interface();
01203         throw;
01204     }
01205 
01206     unlock_cache_interface();
01207 
01208     return true;
01209 }
01210 
01229 vector<string>
01230 HTTPCache::get_conditional_request_headers(const string &url)
01231 {
01232     lock_cache_interface();
01233 
01234     HTTPCacheTable::CacheEntry *entry = 0;
01235     vector<string> headers;
01236 
01237     DBG(cerr << "Getting conditional request headers for " << url << endl);
01238 
01239     try {
01240         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01241         if (!entry)
01242             throw Error("There is no cache entry for the URL: " + url);
01243 
01244         if (entry->get_etag() != "")
01245             headers.push_back(string("If-None-Match: ") + entry->get_etag());
01246 
01247         if (entry->get_lm() > 0) {
01248                 time_t lm = entry->get_lm();
01249             headers.push_back(string("If-Modified-Since: ")
01250                               + date_time_str(&lm));
01251         }
01252         else if (entry->get_max_age() > 0) {
01253                 time_t max_age = entry->get_max_age();
01254             headers.push_back(string("If-Modified-Since: ")
01255                               + date_time_str(&max_age));
01256         }
01257         else if (entry->get_expires() > 0) {
01258                 time_t expires = entry->get_expires();
01259             headers.push_back(string("If-Modified-Since: ")
01260                               + date_time_str(&expires));
01261         }
01262         entry->unlock_read_response();
01263         unlock_cache_interface();
01264     }
01265     catch (...) {
01266         unlock_cache_interface();
01267         if (entry) {
01268             entry->unlock_read_response();
01269         }
01270         throw;
01271     }
01272 
01273     return headers;
01274 }
01275 
01279 struct HeaderLess: binary_function<const string&, const string&, bool>
01280 {
01281     bool operator()(const string &s1, const string &s2) const {
01282         return s1.substr(0, s1.find(':')) < s2.substr(0, s2.find(':'));
01283     }
01284 };
01285 
01299 void
01300 HTTPCache::update_response(const string &url, time_t request_time,
01301                            const vector<string> &headers)
01302 {
01303     lock_cache_interface();
01304 
01305     HTTPCacheTable::CacheEntry *entry = 0;
01306     DBG(cerr << "Updating the response headers for: " << url << endl);
01307 
01308     try {
01309         entry = d_http_cache_table->get_write_locked_entry_from_cache_table(url);
01310         if (!entry)
01311             throw Error("There is no cache entry for the URL: " + url);
01312 
01313         // Merge the new headers with the exiting HTTPCacheTable::CacheEntry object.
01314         d_http_cache_table->parse_headers(entry, d_max_entry_size, headers);
01315 
01316         // Update corrected_initial_age, freshness_lifetime, response_time.
01317         d_http_cache_table->calculate_time(entry, d_default_expiration, request_time);
01318 
01319         // Merge the new headers with those in the persistent store. How:
01320         // Load the new headers into a set, then merge the old headers. Since
01321         // set<> ignores duplicates, old headers with the same name as a new
01322         // header will got into the bit bucket. Define a special compare
01323         // functor to make sure that headers are compared using only their
01324         // name and not their value too.
01325         set<string, HeaderLess> merged_headers;
01326 
01327         // Load in the new headers
01328         copy(headers.begin(), headers.end(),
01329              inserter(merged_headers, merged_headers.begin()));
01330 
01331         // Get the old headers and load them in.
01332         vector<string> old_headers;
01333         read_metadata(entry->get_cachename(), old_headers);
01334         copy(old_headers.begin(), old_headers.end(),
01335              inserter(merged_headers, merged_headers.begin()));
01336 
01337         // Read the values back out. Use reverse iterators with back_inserter
01338         // to preserve header order. NB: vector<> does not support push_front
01339         // so we can't use front_inserter(). 01/09/03 jhrg
01340         vector<string> result;
01341         copy(merged_headers.rbegin(), merged_headers.rend(),
01342              back_inserter(result));
01343 
01344         write_metadata(entry->get_cachename(), result);
01345         entry->unlock_write_response();
01346         unlock_cache_interface();
01347     }
01348     catch (...) {
01349         if (entry) {
01350             entry->unlock_read_response();
01351         }
01352         unlock_cache_interface();
01353         throw;
01354     }
01355 }
01356 
01368 bool
01369 HTTPCache::is_url_valid(const string &url)
01370 {
01371     lock_cache_interface();
01372 
01373     bool freshness;
01374     HTTPCacheTable::CacheEntry *entry = 0;
01375 
01376     DBG(cerr << "Is this URL valid? (" << url << ")" << endl);
01377 
01378     try {
01379         if (d_always_validate) {
01380             unlock_cache_interface();
01381             return false;  // force re-validation.
01382         }
01383 
01384         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01385         if (!entry)
01386             throw Error("There is no cache entry for the URL: " + url);
01387 
01388         // If we supported range requests, we'd need code here to check if
01389         // there was only a partial response in the cache. 10/02/02 jhrg
01390 
01391         // In case this entry is of type "must-revalidate" then we consider it
01392         // invalid.
01393         if (entry->get_must_revalidate()) {
01394             entry->unlock_read_response();
01395             unlock_cache_interface();
01396             return false;
01397         }
01398 
01399         time_t resident_time = time(NULL) - entry->get_response_time();
01400         time_t current_age = entry->get_corrected_initial_age() + resident_time;
01401 
01402         // Check that the max-age, max-stale, and min-fresh directives
01403         // given in the request cache control header is followed.
01404         if (d_max_age >= 0 && current_age > d_max_age) {
01405             DBG(cerr << "Cache....... Max-age validation" << endl);
01406             entry->unlock_read_response();
01407             unlock_cache_interface();
01408             return false;
01409         }
01410         if (d_min_fresh >= 0
01411             && entry->get_freshness_lifetime() < current_age + d_min_fresh) {
01412             DBG(cerr << "Cache....... Min-fresh validation" << endl);
01413             entry->unlock_read_response();
01414             unlock_cache_interface();
01415             return false;
01416         }
01417 
01418         freshness = (entry->get_freshness_lifetime()
01419                      + (d_max_stale >= 0 ? d_max_stale : 0) > current_age);
01420         entry->unlock_read_response();
01421         unlock_cache_interface();
01422     }
01423     catch (...) {
01424         if (entry) {
01425             entry->unlock_read_response();
01426         }
01427         unlock_cache_interface();
01428         throw;
01429     }
01430 
01431     return freshness;
01432 }
01433 
01461 FILE * HTTPCache::get_cached_response(const string &url,
01462                 vector<string> &headers, string &cacheName) {
01463     lock_cache_interface();
01464 
01465     FILE *body = 0;
01466     HTTPCacheTable::CacheEntry *entry = 0;
01467 
01468     DBG(cerr << "Getting the cached response for " << url << endl);
01469 
01470     try {
01471         entry = d_http_cache_table->get_locked_entry_from_cache_table(url);
01472         if (!entry) {
01473                 unlock_cache_interface();
01474                 return 0;
01475         }
01476 
01477         cacheName = entry->get_cachename();
01478         read_metadata(entry->get_cachename(), headers);
01479 
01480         DBG(cerr << "Headers just read from cache: " << endl);
01481         DBGN(copy(headers.begin(), headers.end(), ostream_iterator<string>(cerr, "\n")));
01482 
01483         body = open_body(entry->get_cachename());
01484 
01485         DBG(cerr << "Returning: " << url << " from the cache." << endl);
01486 
01487         d_http_cache_table->bind_entry_to_data(entry, body);
01488     }
01489     catch (...) {
01490         // Why make this unlock operation conditional on entry?
01491         if (entry)
01492                 unlock_cache_interface();
01493         if (body != 0)
01494             fclose(body);
01495         throw;
01496     }
01497 
01498     unlock_cache_interface();
01499 
01500     return body;
01501 }
01502 
01514 FILE *
01515 HTTPCache::get_cached_response(const string &url, vector<string> &headers)
01516 {
01517         string discard_name;
01518         return get_cached_response(url, headers, discard_name);
01519 }
01520 
01531 FILE *
01532 HTTPCache::get_cached_response(const string &url)
01533 {
01534         string discard_name;
01535         vector<string> discard_headers;
01536         return get_cached_response(url, discard_headers, discard_name);
01537 }
01538 
01551 void
01552 HTTPCache::release_cached_response(FILE *body)
01553 {
01554     lock_cache_interface();
01555 
01556     try {
01557         d_http_cache_table->uncouple_entry_from_data(body);
01558     }
01559     catch (...) {
01560         unlock_cache_interface();
01561         throw;
01562     }
01563 
01564     unlock_cache_interface();
01565 }
01566 
01579 void
01580 HTTPCache::purge_cache()
01581 {
01582     lock_cache_interface();
01583 
01584     try {
01585         if (d_http_cache_table->is_locked_read_responses())
01586             throw Error("Attempt to purge the cache with entries in use.");
01587 
01588         d_http_cache_table->delete_all_entries();
01589     }
01590     catch (...) {
01591         unlock_cache_interface();
01592         throw;
01593     }
01594 
01595     unlock_cache_interface();
01596 }
01597 
01598 } // namespace libdap