libdap++
Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 // (c) COPYRIGHT URI/MIT 1994-1999 00027 // Please read the full copyright statement in the file COPYRIGHT_URI. 00028 // 00029 // Authors: 00030 // jhrg,jimg James Gallagher <jgallagher@gso.uri.edu> 00031 00032 // Utility functions used by the api. 00033 // 00034 // jhrg 9/21/94 00035 00036 #include "config.h" 00037 00038 static char rcsid[] not_used = 00039 {"$Id: util.cc 25112 2011-12-29 21:44:54Z jimg $" 00040 }; 00041 00042 #include <cassert> 00043 #include <cstring> 00044 00045 #include <ctype.h> 00046 #ifndef TM_IN_SYS_TIME 00047 #include <time.h> 00048 #else 00049 #include <sys/time.h> 00050 #endif 00051 00052 #ifndef WIN32 00053 #include <unistd.h> // for stat 00054 #else 00055 #include <io.h> 00056 #include <fcntl.h> 00057 #include <process.h> 00058 #endif 00059 00060 #include <sys/types.h> 00061 #include <sys/stat.h> 00062 00063 #include <string> 00064 #include <sstream> 00065 #include <vector> 00066 #include <algorithm> 00067 #include <stdexcept> 00068 00069 #include "BaseType.h" 00070 #include "Str.h" 00071 #include "Url.h" 00072 #include "Sequence.h" 00073 #include "Error.h" 00074 #include "parser.h" 00075 #include "util.h" 00076 #include "GNURegex.h" 00077 #include "debug.h" 00078 00079 using namespace std; 00080 00081 namespace libdap { 00082 00083 // Remove spaces from the start of a URL and from the start of any constraint 00084 // expression it contains. 4/7/98 jhrg 00085 00094 string 00095 prune_spaces(const string &name) 00096 { 00097 // If the URL does not even have white space return. 00098 if (name.find_first_of(' ') == name.npos) 00099 return name; 00100 else { 00101 // Strip leading spaces from http://... 00102 unsigned int i = name.find_first_not_of(' '); 00103 string tmp_name = name.substr(i); 00104 00105 // Strip leading spaces from constraint part (following `?'). 00106 unsigned int j = tmp_name.find('?') + 1; 00107 i = tmp_name.find_first_not_of(' ', j); 00108 tmp_name.erase(j, i - j); 00109 00110 return tmp_name; 00111 } 00112 } 00113 00114 // Compare elements in a list of (BaseType *)s and return true if there are 00115 // no duplicate elements, otherwise return false. 00116 00117 bool 00118 unique_names(vector<BaseType *> l, const string &var_name, 00119 const string &type_name, string &msg) 00120 { 00121 // copy the identifier names to a vector 00122 vector<string> names(l.size()); 00123 00124 int nelem = 0; 00125 typedef std::vector<BaseType *>::const_iterator citer ; 00126 for (citer i = l.begin(); i != l.end(); i++) { 00127 assert(*i); 00128 names[nelem++] = (*i)->name(); 00129 DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl); 00130 } 00131 00132 // sort the array of names 00133 sort(names.begin(), names.end()); 00134 00135 #ifdef DODS_DEBUG2 00136 cout << "unique:" << endl; 00137 for (int ii = 0; ii < nelem; ++ii) 00138 cout << "NAMES[" << ii << "]=" << names[ii] << endl; 00139 #endif 00140 00141 // sort the array of names 00142 sort(names.begin(), names.end()); 00143 00144 #ifdef DODS_DEBUG2 00145 cout << "unique:" << endl; 00146 for (int ii = 0; ii < nelem; ++ii) 00147 cout << "NAMES[" << ii << "]=" << names[ii] << endl; 00148 #endif 00149 00150 // look for any instance of consecutive names that are == 00151 for (int j = 1; j < nelem; ++j) { 00152 if (names[j-1] == names[j]) { 00153 ostringstream oss; 00154 oss << "The variable `" << names[j] 00155 << "' is used more than once in " << type_name << " `" 00156 << var_name << "'"; 00157 msg = oss.str(); 00158 00159 return false; 00160 } 00161 } 00162 00163 return true; 00164 } 00165 00166 const char * 00167 libdap_root() 00168 { 00169 return LIBDAP_ROOT; 00170 } 00171 00172 extern "C" 00173 const char * 00174 libdap_version() 00175 { 00176 return PACKAGE_VERSION; 00177 } 00178 00179 extern "C" 00180 const char * 00181 libdap_name() 00182 { 00183 return PACKAGE_NAME; 00184 } 00185 00186 // Since Server4 can get compressed responses using Tomcat, bail on this 00187 // software (which complicates building under Win32). It can be turned on 00188 // for use with Server3 in configure.ac. 00189 00190 #if COMPRESSION_FOR_SERVER3 00191 00192 // Return true if the program deflate exists and is executable by user, group 00193 // and world. If this returns false the caller should assume that server 00194 // filter programs won't be able to find the deflate program and thus won't 00195 // be able to compress the return document. 00196 // NB: this works because this function uses the same rules as compressor() 00197 // (which follows) to look for deflate. 2/11/98 jhrg 00198 00199 bool 00200 deflate_exists() 00201 { 00202 DBG(cerr << "Entering deflate_exists..."); 00203 00204 int status = false; 00205 struct stat buf; 00206 00207 #ifdef WIN32 00208 string deflate = (string)libdap_root() + "\\bin\\deflate"; 00209 #else 00210 string deflate = (string)libdap_root() + "/sbin/deflate"; 00211 #endif 00212 00213 // Check that the file exists... 00214 // First look for deflate using DODS_ROOT (compile-time constant subsumed 00215 // by an environment variable) and if that fails in the CWD which finds 00216 // the program when it is in the same directory as the dispatch script 00217 // and other server components. 2/11/98 jhrg 00218 status = (stat(deflate.c_str(), &buf) == 0) 00219 #ifdef WIN32 00220 || (stat(".\\deflate", &buf) == 0); 00221 #else 00222 || (stat("./deflate", &buf) == 0); 00223 #endif 00224 00225 // and that it can be executed. 00226 #ifdef WIN32 00227 status &= (buf.st_mode & _S_IEXEC); 00228 #else 00229 status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH); 00230 #endif 00231 DBG(cerr << " returning " << (status ? "true." : "false.") << endl); 00232 return (status != 0); 00233 } 00234 00235 FILE * 00236 compressor(FILE *output, int &childpid) 00237 { 00238 #ifdef WIN32 00239 // There is no such thing as a "fork" under win32. This makes it so that 00240 // we have to juggle handles more aggressively. This code hasn't been 00241 // tested and shown to work as of 07/2000. 00242 int pid, data[2]; 00243 int hStdIn, hStdOut; 00244 00245 if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) { 00246 cerr << "Could not create IPC channel for compressor process" 00247 << endl; 00248 return NULL; 00249 } 00250 00251 00252 // This sets up for the child process, but it has to be reversed for the 00253 // parent after the spawn takes place. 00254 00255 // Store stdin, stdout so we have something to restore to 00256 hStdIn = _dup(_fileno(stdin)); 00257 hStdOut = _dup(_fileno(stdout)); 00258 00259 // Child is to read from read end of pipe 00260 if (_dup2(data[0], _fileno(stdin)) != 0) { 00261 cerr << "dup of child stdin failed" << endl; 00262 return NULL; 00263 } 00264 // Child is to write its's stdout to file 00265 if (_dup2(_fileno(output), _fileno(stdout)) != 0) { 00266 cerr << "dup of child stdout failed" << endl; 00267 return NULL; 00268 } 00269 00270 // Spawn child process 00271 string deflate = "deflate.exe"; 00272 if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(), 00273 "-c", "5", "-s", NULL)) < 0) { 00274 cerr << "Could not spawn to create compressor process" << endl; 00275 return NULL; 00276 } 00277 00278 // Restore stdin, stdout for parent and close duplicate copies 00279 if (_dup2(hStdIn, _fileno(stdin)) != 0) { 00280 cerr << "dup of stdin failed" << endl; 00281 return NULL; 00282 } 00283 if (_dup2(hStdOut, _fileno(stdout)) != 0) { 00284 cerr << "dup of stdout failed" << endl; 00285 return NULL; 00286 } 00287 close(hStdIn); 00288 close(hStdOut); 00289 00290 // Tell the parent that it reads from the opposite end of the 00291 // place where the child writes. 00292 close(data[0]); 00293 FILE *input = fdopen(data[1], "w"); 00294 setbuf(input, 0); 00295 childpid = pid; 00296 return input; 00297 00298 #else 00299 FILE *ret_file = NULL ; 00300 00301 int pid, data[2]; 00302 00303 if (pipe(data) < 0) { 00304 cerr << "Could not create IPC channel for compressor process" 00305 << endl; 00306 return NULL; 00307 } 00308 00309 if ((pid = fork()) < 0) { 00310 cerr << "Could not fork to create compressor process" << endl; 00311 return NULL; 00312 } 00313 00314 // The parent process closes the write end of the Pipe, and creates a 00315 // FILE * using fdopen(). The FILE * is used by the calling program to 00316 // access the read end of the Pipe. 00317 00318 if (pid > 0) { // Parent, pid is that of the child 00319 close(data[0]); 00320 ret_file = fdopen(data[1], "w"); 00321 setbuf(ret_file, 0); 00322 childpid = pid; 00323 } 00324 else { // Child 00325 close(data[1]); 00326 dup2(data[0], 0); // Read from the pipe... 00327 dup2(fileno(output), 1); // Write to the FILE *output. 00328 00329 DBG(cerr << "Opening compression stream." << endl); 00330 00331 // First try to run deflate using DODS_ROOT (the value read from the 00332 // DODS_ROOT environment variable takes precedence over the value set 00333 // at build time. If that fails, try the CWD. 00334 string deflate = (string)libdap_root() + "/sbin/deflate"; 00335 (void) execl(deflate.c_str(), "deflate", "-c", "5", "-s", NULL); 00336 (void) execl("./deflate", "deflate", "-c", "5", "-s", NULL); 00337 cerr << "Warning: Could not start compressor!" << endl; 00338 cerr << "defalte should be in DODS_ROOT/etc or in the CWD!" 00339 << endl; 00340 _exit(127); // Only here if an error occurred. 00341 } 00342 00343 return ret_file ; 00344 #endif 00345 } 00346 00347 #endif // COMPRESSION_FOR_SERVER3 00348 00349 // This function returns a pointer to the system time formated for an httpd 00350 // log file. 00351 00352 string 00353 systime() 00354 { 00355 time_t TimBin; 00356 00357 if (time(&TimBin) == (time_t) - 1) 00358 return string("time() error"); 00359 else { 00360 string TimStr = ctime(&TimBin); 00361 return TimStr.substr(0, TimStr.size() - 2); // remove the \n 00362 } 00363 } 00364 00365 void 00366 downcase(string &s) 00367 { 00368 for (unsigned int i = 0; i < s.length(); i++) 00369 s[i] = tolower(s[i]); 00370 } 00371 00372 bool 00373 is_quoted(const string &s) 00374 { 00375 return (!s.empty() && s[0] == '\"' && s[s.length()-1] == '\"'); 00376 } 00377 00378 string 00379 remove_quotes(const string &s) 00380 { 00381 if (is_quoted(s)) 00382 return s.substr(1, s.length() - 2); 00383 else 00384 return s; 00385 } 00386 00387 #ifdef WIN32 00388 // Sometimes need to buffer within an iostream under win32 when 00389 // we want the output to go to a FILE *. This is because 00390 // it's not possible to associate an ofstream with a FILE * 00391 // under the Standard ANSI C++ Library spec. Unix systems 00392 // don't follow the spec in this regard. 00393 void flush_stream(iostream ios, FILE *out) 00394 { 00395 int nbytes; 00396 char buffer[512]; 00397 00398 ios.get(buffer, 512, NULL); 00399 while ((nbytes = ios.gcount()) > 0) { 00400 fwrite(buffer, 1, nbytes, out); 00401 ios.get(buffer, 512, NULL); 00402 } 00403 00404 return; 00405 } 00406 #endif 00407 00408 // Jose Garcia 00409 void 00410 append_long_to_string(long val, int base, string &str_val) 00411 { 00412 // The array digits contains 36 elements which are the 00413 // posible valid digits for out bases in the range 00414 // [2,36] 00415 char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 00416 // result of val / base 00417 ldiv_t r; 00418 00419 if (base > 36 || base < 2) { 00420 // no conversion if wrong base 00421 std::invalid_argument ex("The parameter base has an invalid value."); 00422 throw ex; 00423 } 00424 if (val < 0) 00425 str_val += '-'; 00426 r = ldiv(labs(val), base); 00427 00428 // output digits of val/base first 00429 if (r.quot > 0) 00430 append_long_to_string(r.quot, base, str_val); 00431 00432 // output last digit 00433 00434 str_val += digits[(int)r.rem]; 00435 } 00436 00437 // base defaults to 10 00438 string 00439 long_to_string(long val, int base) 00440 { 00441 string s; 00442 append_long_to_string(val, base, s); 00443 return s; 00444 } 00445 00446 // Jose Garcia 00447 void append_double_to_string(const double &num, string &str) 00448 { 00449 // s having 100 characters should be enough for sprintf to do its job. 00450 // I want to banish all instances of sprintf. 10/5/2001 jhrg 00451 ostringstream oss; 00452 oss.precision(9); 00453 oss << num; 00454 str += oss.str(); 00455 } 00456 00457 string 00458 double_to_string(const double &num) 00459 { 00460 string s; 00461 append_double_to_string(num, s); 00462 return s; 00463 } 00464 00465 // Get the version number of the core software. Defining this means that 00466 // clients of the DAP don't have to rely on config.h for the version 00467 // number. 00468 string 00469 dap_version() 00470 { 00471 return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ; 00472 } 00473 00474 // Given a pathname, return the file at the end of the path. This is used 00475 // when reporting errors (maybe other times, too) to keep the server from 00476 // revealing too much about its organization when sending error responses 00477 // back to clients. 10/11/2000 jhrg 00478 // MT-safe. 08/05/02 jhrg 00479 00480 #ifdef WIN32 00481 static const char path_sep[] = 00482 {"\\" 00483 }; 00484 #else 00485 static const char path_sep[] = 00486 {"/" 00487 }; 00488 #endif 00489 00490 string 00491 path_to_filename(string path) 00492 { 00493 string::size_type pos = path.rfind(path_sep); 00494 00495 return (pos == string::npos) ? path : path.substr(++pos); 00496 } 00497 00502 string 00503 file_to_string(FILE *fp) 00504 { 00505 rewind(fp); 00506 ostringstream oss; 00507 char c; 00508 while (fread(&c, 1, 1, fp)) 00509 oss << c; 00510 return oss.str(); 00511 } 00512 00513 int 00514 wildcmp(const char *wild, const char *string) 00515 { 00516 // Written by Jack Handy - jakkhandy@hotmail.com 00517 00518 if (!wild || !string) 00519 return 0; 00520 00521 const char *cp = NULL, *mp = NULL; 00522 00523 while ((*string) && (*wild != '*')) { 00524 if ((*wild != *string) && (*wild != '?')) { 00525 return 0; 00526 } 00527 wild++; 00528 string++; 00529 } 00530 00531 while (*string) { 00532 if (*wild == '*') { 00533 if (!*++wild) { 00534 return 1; 00535 } 00536 mp = wild; 00537 cp = string+1; 00538 } else if ((*wild == *string) || (*wild == '?')) { 00539 wild++; 00540 string++; 00541 } else { 00542 wild = mp; 00543 string = cp++; 00544 } 00545 } 00546 00547 while (*wild == '*') { 00548 wild++; 00549 } 00550 return !*wild; 00551 } 00552 00553 #define CHECK_BIT( tab, bit ) ( tab[ (bit)/8 ] & (1<<( (bit)%8 )) ) 00554 #define BITLISTSIZE 16 /* bytes used for [chars] in compiled expr */ 00555 00556 static void globchars( const char *s, const char *e, char *b ); 00557 00558 /* 00559 * glob: match a string against a simple pattern 00560 * 00561 * Understands the following patterns: 00562 * 00563 * * any number of characters 00564 * ? any single character 00565 * [a-z] any single character in the range a-z 00566 * [^a-z] any single character not in the range a-z 00567 * \x match x 00568 * 00569 * @param c The pattern 00570 * @param s The string 00571 * @return 0 on success, -1 if the pattern is exhausted but there are 00572 * characters remaining in the string and 1 if the pattern does not match 00573 */ 00574 00575 int 00576 glob(const char *c, const char *s) 00577 { 00578 if (!c || !s) 00579 return 1; 00580 00581 char bitlist[BITLISTSIZE]; 00582 int i = 0; 00583 for (;;) { 00584 ++i; 00585 switch (*c++) { 00586 case '\0': 00587 return *s ? -1 : 0; 00588 00589 case '?': 00590 if (!*s++) 00591 return i/*1*/; 00592 break; 00593 00594 case '[': { 00595 /* scan for matching ] */ 00596 00597 const char *here = c; 00598 do { 00599 if (!*c++) 00600 return i/*1*/; 00601 } while (here == c || *c != ']'); 00602 c++; 00603 00604 /* build character class bitlist */ 00605 00606 globchars(here, c, bitlist); 00607 00608 if (!CHECK_BIT( bitlist, *(unsigned char *)s )) 00609 return i/*1*/; 00610 s++; 00611 break; 00612 } 00613 00614 case '*': { 00615 const char *here = s; 00616 00617 while (*s) 00618 s++; 00619 00620 /* Try to match the rest of the pattern in a recursive */ 00621 /* call. If the match fails we'll back up chars, retrying. */ 00622 00623 while (s != here) { 00624 int r; 00625 00626 /* A fast path for the last token in a pattern */ 00627 00628 r = *c ? glob(c, s) : *s ? -1 : 0; 00629 00630 if (!r) 00631 return 0; 00632 else if (r < 0) 00633 return i/*1*/; 00634 00635 --s; 00636 } 00637 break; 00638 } 00639 00640 case '\\': 00641 /* Force literal match of next char. */ 00642 00643 if (!*c || *s++ != *c++) 00644 return i/*1*/; 00645 break; 00646 00647 default: 00648 if (*s++ != c[-1]) 00649 return i/*1*/; 00650 break; 00651 } 00652 } 00653 } 00654 00655 /* 00656 * globchars() - build a bitlist to check for character group match 00657 */ 00658 00659 static void globchars(const char *s, const char *e, char *b) { 00660 int neg = 0; 00661 00662 memset(b, '\0', BITLISTSIZE); 00663 00664 if (*s == '^') 00665 neg++, s++; 00666 00667 while (s < e) { 00668 int c; 00669 00670 if (s + 2 < e && s[1] == '-') { 00671 for (c = s[0]; c <= s[2]; c++) 00672 b[c / 8] |= (1 << (c % 8)); 00673 s += 3; 00674 } 00675 else { 00676 c = *s++; 00677 b[c / 8] |= (1 << (c % 8)); 00678 } 00679 } 00680 00681 if (neg) { 00682 int i; 00683 for (i = 0; i < BITLISTSIZE; i++) 00684 b[i] ^= 0377; 00685 } 00686 00687 /* Don't include \0 in either $[chars] or $[^chars] */ 00688 00689 b[0] &= 0376; 00690 } 00691 00692 int wmatch(const char *pat, const char *s) 00693 { 00694 if (!pat || !s) 00695 return 0; 00696 00697 switch (*pat) { 00698 case '\0': return (*s == '\0'); 00699 case '?': return (*s != '\0') && wmatch(pat+1, s+1); 00700 case '*': return wmatch(pat+1, s) || (*s != '\0' && wmatch(pat, s+1)); 00701 default: return (*s == *pat) && wmatch(pat+1, s+1); 00702 } 00703 } 00704 00707 00713 bool 00714 size_ok(unsigned int sz, unsigned int nelem) 00715 { 00716 return (sz > 0 && nelem < UINT_MAX / sz); 00717 } 00718 00735 bool 00736 pathname_ok(const string &path, bool strict) 00737 { 00738 if (path.length() > 255) 00739 return false; 00740 00741 Regex name("[-0-9A-z_./]+"); 00742 if (!strict) 00743 name = "[:print:]+"; 00744 00745 string::size_type len = path.length(); 00746 int result = name.match(path.c_str(), len); 00747 // Protect against casting too big an uint to int 00748 // if LEN is bigger than the max int32, the second test can't work 00749 if (len > INT_MAX || result != static_cast<int>(len)) 00750 return false; 00751 00752 return true; 00753 } 00754 00756 00757 } // namespace libdap 00758