OPeNDAP Hyrax Back End Server (BES)
Updated for version 3.8.3
|
00001 // BESCache.cc 00002 00003 // This file is part of bes, A C++ back-end server implementation framework 00004 // for the OPeNDAP Data Access Protocol. 00005 00006 // Copyright (c) 2004-2009 University Corporation for Atmospheric Research 00007 // Author: Patrick West <pwest@ucar.edu> and Jose Garcia <jgarcia@ucar.edu> 00008 // 00009 // This library is free software; you can redistribute it and/or 00010 // modify it under the terms of the GNU Lesser General Public 00011 // License as published by the Free Software Foundation; either 00012 // version 2.1 of the License, or (at your option) any later version. 00013 // 00014 // This library is distributed in the hope that it will be useful, 00015 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 // Lesser General Public License for more details. 00018 // 00019 // You should have received a copy of the GNU Lesser General Public 00020 // License along with this library; if not, write to the Free Software 00021 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00022 // 00023 // You can contact University Corporation for Atmospheric Research at 00024 // 3080 Center Green Drive, Boulder, CO 80301 00025 00026 // (c) COPYRIGHT University Corporation for Atmospheric Research 2004-2005 00027 // Please read the full copyright statement in the file COPYRIGHT_UCAR. 00028 // 00029 // Authors: 00030 // pwest Patrick West <pwest@ucar.edu> 00031 // jgarcia Jose Garcia <jgarcia@ucar.edu> 00032 00033 #include "config.h" 00034 00035 #include <unistd.h> // for unlink 00036 #include <sys/types.h> 00037 #include <sys/stat.h> 00038 #include <dirent.h> 00039 #include <fcntl.h> 00040 00041 #include <cstring> 00042 #include <cerrno> 00043 #include <iostream> 00044 #include <sstream> 00045 00046 #include "BESCache.h" 00047 #include "TheBESKeys.h" 00048 #include "BESSyntaxUserError.h" 00049 #include "BESInternalError.h" 00050 #include "BESDebug.h" 00051 00052 using std::string; 00053 using std::multimap ; 00054 using std::pair ; 00055 using std::greater ; 00056 using std::endl ; 00057 00058 // conversion factor 00059 static const unsigned long long BYTES_PER_MEG = 1048576ULL; 00060 00061 // Max cache size in megs, so we can check the user input and warn. 00062 // 2^64 / 2^20 == 2^44 00063 static const unsigned long long MAX_CACHE_SIZE_IN_MEGABYTES = (1ULL << 44); 00064 00065 void 00066 BESCache::check_ctor_params() 00067 { 00068 if( _cache_dir.empty() ) 00069 { 00070 string err = "The cache directory was not specified, must be non-empty"; 00071 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00072 } 00073 00074 struct stat buf; 00075 int statret = stat( _cache_dir.c_str(), &buf ) ; 00076 if( statret != 0 || ! S_ISDIR(buf.st_mode) ) 00077 { 00078 string err = "The cache directory " + _cache_dir + " does not exist" ; 00079 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00080 } 00081 00082 if( _prefix.empty() ) 00083 { 00084 string err = "The cache file prefix was not specified, must be non-empty" ; 00085 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00086 } 00087 00088 if( _cache_size_in_megs <= 0 ) 00089 { 00090 string err = "The cache size was not specified, must be non-zero" ; 00091 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00092 } 00093 00094 // If the user specifies a cache that is too large, 00095 // it is a user exception and we should tell them. 00096 // Actually, this may not work since by this 00097 // time we may have already overflowed the variable... 00098 if( _cache_size_in_megs > MAX_CACHE_SIZE_IN_MEGABYTES ) 00099 { 00100 _cache_size_in_megs = MAX_CACHE_SIZE_IN_MEGABYTES ; 00101 std::ostringstream msg; 00102 msg << "The specified cache size was larger than the max cache size of: " 00103 << MAX_CACHE_SIZE_IN_MEGABYTES; 00104 throw BESSyntaxUserError(msg.str(), __FILE__, __LINE__); 00105 } 00106 00107 BESDEBUG( "bes", "BES Cache: directory " << _cache_dir 00108 << ", prefix " << _prefix 00109 << ", max size " << _cache_size_in_megs << endl ) ; 00110 } 00111 00121 BESCache::BESCache( const string &cache_dir, 00122 const string &prefix, 00123 unsigned long long sizeInMegs ) 00124 : _cache_dir( cache_dir ), 00125 _prefix( prefix ), 00126 _cache_size_in_megs( sizeInMegs ), 00127 _lock_fd( -1 ) 00128 { 00129 check_ctor_params(); // Throws BESSyntaxUserError on error. 00130 } 00131 00146 BESCache::BESCache( BESKeys &keys, 00147 const string &cache_dir_key, 00148 const string &prefix_key, 00149 const string &size_key ) 00150 : _cache_size_in_megs( 0 ), 00151 _lock_fd( -1 ) 00152 { 00153 bool found = false ; 00154 keys.get_value( cache_dir_key, _cache_dir, found ) ; 00155 if( !found ) 00156 { 00157 string err = "The cache directory key " + cache_dir_key 00158 + " was not found in the BES configuration file" ; 00159 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00160 } 00161 00162 found = false ; 00163 keys.get_value( prefix_key, _prefix, found ) ; 00164 if( !found ) 00165 { 00166 string err = "The prefix key " + prefix_key 00167 + " was not found in the BES configuration file" ; 00168 throw BESSyntaxUserError( err, __FILE__, __LINE__ ) ; 00169 } 00170 00171 found = false ; 00172 string cache_size_str ; 00173 keys.get_value( size_key, cache_size_str, found ) ; 00174 if( !found ) 00175 { 00176 string err = "The size key " + size_key 00177 + " was not found in the BES configuration file" ; 00178 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00179 } 00180 00181 std::istringstream is( cache_size_str ) ; 00182 is >> _cache_size_in_megs ; 00183 00184 check_ctor_params(); // Throws BESSyntaxUserError on error. 00185 } 00186 00193 bool 00194 BESCache::lock( unsigned int retry, unsigned int num_tries ) 00195 { 00196 // make sure we aren't retrying too many times 00197 if( num_tries > MAX_LOCK_TRIES ) 00198 num_tries = MAX_LOCK_TRIES ; 00199 if( retry > MAX_LOCK_RETRY_MS ) 00200 retry = MAX_LOCK_RETRY_MS ; 00201 00202 bool got_lock = true ; 00203 if( _lock_fd == -1 ) 00204 { 00205 string lock_file = _cache_dir + "/lock" ; 00206 unsigned int tries = 0 ; 00207 _lock_fd = open( lock_file.c_str(), 00208 O_CREAT | O_EXCL, 00209 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ; 00210 while( _lock_fd < 0 && got_lock ) 00211 { 00212 tries ++ ; 00213 if( tries > num_tries ) 00214 { 00215 _lock_fd = -1 ; 00216 got_lock = false ; 00217 } 00218 else 00219 { 00220 usleep( retry ) ; 00221 _lock_fd = open( lock_file.c_str(), 00222 O_CREAT | O_EXCL, 00223 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH ) ; 00224 } 00225 } 00226 } 00227 else 00228 { 00229 // This would be a programming error, or we've gotten into a 00230 // situation where the lock is lost. Lock has been called on the 00231 // same cache object twice in a row without an unlock being called. 00232 string err = "The cache dir " + _cache_dir + " is already locked" ; 00233 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00234 } 00235 00236 return got_lock ; 00237 } 00238 00245 bool 00246 BESCache::unlock() 00247 { 00248 // if we call unlock twice in a row, does it matter? I say no, just say 00249 // that it is unlocked. 00250 bool unlocked = true ; 00251 if( _lock_fd != -1 ) 00252 { 00253 string lock_file = _cache_dir + "/lock" ; 00254 close( _lock_fd ) ; 00255 (void)unlink( lock_file.c_str() ) ; 00256 } 00257 00258 _lock_fd = -1 ; 00259 00260 return unlocked ; 00261 } 00262 00276 bool 00277 BESCache::is_cached( const string &src, string &target ) 00278 { 00279 bool is_it = true ; 00280 string tmp_target = src ; 00281 00282 // Create the file that would be created in the cache directory 00283 //echo ${infile} | sed 's/^\///' | sed 's/\//#/g' | sed 's/\(.*\)\..*$/\1/g' 00284 if( tmp_target.at(0) == '/' ) 00285 { 00286 tmp_target = src.substr( 1, tmp_target.length() - 1 ) ; 00287 } 00288 string::size_type slash = 0 ; 00289 while( ( slash = tmp_target.find( '/' ) ) != string::npos ) 00290 { 00291 tmp_target.replace( slash, 1, 1, BES_CACHE_CHAR ) ; 00292 } 00293 string::size_type last_dot = tmp_target.rfind( '.' ) ; 00294 if( last_dot != string::npos ) 00295 { 00296 tmp_target = tmp_target.substr( 0, last_dot ) ; 00297 } 00298 00299 target = _cache_dir + "/" + _prefix + BES_CACHE_CHAR + tmp_target ; 00300 00301 // Determine if the target file is already in the cache or not 00302 struct stat buf; 00303 int statret = stat( target.c_str(), &buf ) ; 00304 if( statret != 0 ) 00305 { 00306 is_it = false ; 00307 } 00308 00309 return is_it ; 00310 } 00311 00312 00313 00314 00323 void 00324 BESCache::purge( ) 00325 { 00326 // Fill in contents and get the info 00327 CacheDirInfo cd_info; 00328 collect_cache_dir_info(cd_info); 00329 unsigned long long avg_size = cd_info.get_avg_size(); 00330 00331 // These are references in the refactor, probably would make 00332 // sense to add these calls below to the info, but... 00333 unsigned long long& size = cd_info._total_cache_files_size; 00334 unsigned long long& num_files_in_cache = cd_info._num_files_in_cache; 00335 BESCache::CacheFilesByAgeMap& contents = cd_info._contents; 00336 00337 BESDEBUG( "bes", "cache size = " << size << endl ) ; 00338 BESDEBUG( "bes", "avg size = " << avg_size << endl ) ; 00339 BESDEBUG( "bes", "num files in cache = " 00340 << num_files_in_cache << endl ) ; 00341 if( BESISDEBUG( "bes" ) ) 00342 { 00343 BESDEBUG( "bes", endl << "BEFORE" << endl ) ; 00344 CacheFilesByAgeMap::iterator ti = contents.begin() ; 00345 CacheFilesByAgeMap::iterator te = contents.end() ; 00346 for( ; ti != te; ti++ ) 00347 { 00348 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ; 00349 } 00350 BESDEBUG( "bes", endl ) ; 00351 } 00352 00353 00354 // if the size of files is greater than max allowed then we need to 00355 // purge the cache directory. Keep going until the size is less than 00356 // the max. 00357 // [Maybe change this to size + (fraction of max_size) > max_size? 00358 // jhrg 5/9/07] 00359 unsigned long long max_size_in_bytes = _cache_size_in_megs * BYTES_PER_MEG ; // Bytes/Meg 00360 while( (size+avg_size) > max_size_in_bytes ) 00361 { 00362 // Grab the first which is the oldest 00363 // in terms of access time. 00364 CacheFilesByAgeMap::iterator i = contents.begin() ; 00365 00366 // if we've deleted all entries, exit the loop 00367 if( i == contents.end() ) 00368 { 00369 break; 00370 } 00371 00372 // Otherwise, remove the file with unlink 00373 BESDEBUG( "bes", "BESCache::purge - removing " 00374 << (*i).second.name << endl ) ; 00375 // unlink rather than remove in case the file is in use 00376 // by a forked BES process 00377 if( unlink( (*i).second.name.c_str() ) != 0 ) 00378 { 00379 char *s_err = strerror( errno ) ; 00380 string err = "Unable to remove the file " 00381 + (*i).second.name 00382 + " from the cache: " ; 00383 if( s_err ) 00384 { 00385 err.append( s_err ) ; 00386 } 00387 else 00388 { 00389 err.append( "Unknown error" ) ; 00390 } 00391 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00392 } 00393 00394 size -= (*i).second.size ; 00395 contents.erase( i ) ; 00396 } 00397 00398 if( BESISDEBUG( "bes" ) ) 00399 { 00400 BESDEBUG( "bes", endl << "AFTER" << endl ) ; 00401 CacheFilesByAgeMap::iterator ti = contents.begin() ; 00402 CacheFilesByAgeMap::iterator te = contents.end() ; 00403 for( ; ti != te; ti++ ) 00404 { 00405 BESDEBUG( "bes", (*ti).first << ": " << (*ti).second.name << ": size " << (*ti).second.size << endl ) ; 00406 } 00407 } 00408 } 00409 00410 // Local RAII helper class to be sure the DIR 00411 // is closed in the face of exceptions using RAII 00412 struct DIR_Wrapper 00413 { 00414 DIR_Wrapper(const std::string& dir_name) 00415 { 00416 _dip = opendir(dir_name.c_str()); 00417 } 00418 00419 ~DIR_Wrapper() 00420 { 00421 close(); 00422 } 00423 00424 DIR* get() const { return _dip; } 00425 00426 void close() 00427 { 00428 if (_dip) 00429 { 00430 closedir(_dip); 00431 _dip = NULL; 00432 } 00433 } 00434 00435 // data rep 00436 DIR* _dip; 00437 }; 00438 00439 void 00440 BESCache::collect_cache_dir_info( 00441 BESCache::CacheDirInfo& cd_info // output 00442 ) const 00443 { 00444 // start fresh 00445 cd_info.clear(); 00446 00447 time_t curr_time = time( NULL ) ; // grab the current time so we can 00448 // determine the oldest file 00449 00450 DIR_Wrapper dip = DIR_Wrapper( _cache_dir ); 00451 if (! (dip.get()) ) 00452 { 00453 string err = "Unable to open cache directory " + _cache_dir ; 00454 throw BESInternalError( err, __FILE__, __LINE__ ) ; 00455 } 00456 else // got a dir entry so count up the cached files 00457 { 00458 struct stat buf; 00459 struct dirent *dit; 00460 // go through the cache directory and collect all of the files that 00461 // start with the matching prefix 00462 while( ( dit = readdir( dip.get() ) ) != NULL ) 00463 { 00464 string dirEntry = dit->d_name ; 00465 if( dirEntry.compare( 0, _prefix.length(), _prefix ) == 0) 00466 { 00467 // Now that we have found a match we want to get the size of 00468 // the file and the last access time from the file. 00469 string fullPath = _cache_dir + "/" + dirEntry ; 00470 int statret = stat( fullPath.c_str(), &buf ) ; 00471 if( statret == 0 ) 00472 { 00473 cd_info._total_cache_files_size += buf.st_size ; 00474 00475 // Find out how old the file is 00476 time_t file_time = buf.st_atime ; 00477 00478 // I think we can use the access time without the diff, 00479 // since it's the relative ages that determine when to 00480 // delete a file. Good idea to use the access time so 00481 // recently used (read) files will linger. jhrg 5/9/07 00482 double time_diff = difftime( curr_time, file_time ) ; 00483 cache_entry entry ; 00484 entry.name = fullPath ; 00485 entry.size = buf.st_size ; 00486 cd_info._contents.insert( pair<double, cache_entry>( time_diff, entry ) ); 00487 } 00488 cd_info._num_files_in_cache++ ; 00489 } 00490 } 00491 } 00492 00493 dip.close(); 00494 } 00495 00503 void 00504 BESCache::dump( ostream &strm ) const 00505 { 00506 strm << BESIndent::LMarg << "BESCache::dump - (" 00507 << (void *)this << ")" << endl ; 00508 BESIndent::Indent() ; 00509 strm << BESIndent::LMarg << "cache dir: " << _cache_dir << endl ; 00510 strm << BESIndent::LMarg << "prefix: " << _prefix << endl ; 00511 strm << BESIndent::LMarg << "size (mb): " << _cache_size_in_megs << endl ; 00512 BESIndent::UnIndent() ; 00513 } 00514