libdap++ Updated for version 3.8.2
|
00001 00002 // -*- mode: c++; c-basic-offset:4 -*- 00003 00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data 00005 // Access Protocol. 00006 00007 // Copyright (c) 2002,2003 OPeNDAP, Inc. 00008 // Author: James Gallagher <jgallagher@opendap.org> 00009 // 00010 // This library is free software; you can redistribute it and/or 00011 // modify it under the terms of the GNU Lesser General Public 00012 // License as published by the Free Software Foundation; either 00013 // version 2.1 of the License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, 00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00023 // 00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112. 00025 00026 // (c) COPYRIGHT URI/MIT 1994-1999 00027 // Please read the full copyright statement in the file COPYRIGHT_URI. 00028 // 00029 // Authors: 00030 // jhrg,jimg James Gallagher <jgallagher@gso.uri.edu> 00031 00032 // Utility functions used by the api. 00033 // 00034 // jhrg 9/21/94 00035 00036 #include "config.h" 00037 00038 static char rcsid[] not_used = 00039 {"$Id: util.cc 20518 2009-03-05 23:39:46Z jimg $" 00040 }; 00041 00042 #include <cassert> 00043 #include <cstring> 00044 00045 #include <ctype.h> 00046 #ifndef TM_IN_SYS_TIME 00047 #include <time.h> 00048 #else 00049 #include <sys/time.h> 00050 #endif 00051 00052 #ifndef WIN32 00053 #include <unistd.h> // for stat 00054 #else 00055 #include <io.h> 00056 #include <fcntl.h> 00057 #include <process.h> 00058 #endif 00059 00060 #include <sys/types.h> 00061 #include <sys/stat.h> 00062 00063 #include <string> 00064 #include <sstream> 00065 #include <vector> 00066 #include <algorithm> 00067 #include <stdexcept> 00068 00069 #include "BaseType.h" 00070 #include "Str.h" 00071 #include "Url.h" 00072 #include "Sequence.h" 00073 #include "Error.h" 00074 #include "parser.h" 00075 #include "util.h" 00076 #include "GNURegex.h" 00077 #include "debug.h" 00078 00079 00080 using namespace std; 00081 00082 namespace libdap { 00083 00084 // Remove spaces from the start of a URL and from the start of any constraint 00085 // expression it contains. 4/7/98 jhrg 00086 00095 string 00096 prune_spaces(const string &name) 00097 { 00098 // If the URL does not even have white space return. 00099 if (name.find_first_of(' ') == name.npos) 00100 return name; 00101 else { 00102 // Strip leading spaces from http://... 00103 unsigned int i = name.find_first_not_of(' '); 00104 string tmp_name = name.substr(i); 00105 00106 // Strip leading spaces from constraint part (following `?'). 00107 unsigned int j = tmp_name.find('?') + 1; 00108 i = tmp_name.find_first_not_of(' ', j); 00109 tmp_name.erase(j, i - j); 00110 00111 return tmp_name; 00112 } 00113 } 00114 00115 // Compare elements in a list of (BaseType *)s and return true if there are 00116 // no duplicate elements, otherwise return false. 00117 00118 bool 00119 unique_names(vector<BaseType *> l, const string &var_name, 00120 const string &type_name, string &msg) 00121 { 00122 // copy the identifier names to a vector 00123 vector<string> names(l.size()); 00124 00125 int nelem = 0; 00126 typedef std::vector<BaseType *>::const_iterator citer ; 00127 for (citer i = l.begin(); i != l.end(); i++) { 00128 assert(*i); 00129 names[nelem++] = (*i)->name(); 00130 DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl); 00131 } 00132 00133 // sort the array of names 00134 sort(names.begin(), names.end()); 00135 00136 #ifdef DODS_DEBUG2 00137 cout << "unique:" << endl; 00138 for (int ii = 0; ii < nelem; ++ii) 00139 cout << "NAMES[" << ii << "]=" << names[ii] << endl; 00140 #endif 00141 00142 // sort the array of names 00143 sort(names.begin(), names.end()); 00144 00145 #ifdef DODS_DEBUG2 00146 cout << "unique:" << endl; 00147 for (int ii = 0; ii < nelem; ++ii) 00148 cout << "NAMES[" << ii << "]=" << names[ii] << endl; 00149 #endif 00150 00151 // look for any instance of consecutive names that are == 00152 for (int j = 1; j < nelem; ++j) { 00153 if (names[j-1] == names[j]) { 00154 ostringstream oss; 00155 oss << "The variable `" << names[j] 00156 << "' is used more than once in " << type_name << " `" 00157 << var_name << "'"; 00158 msg = oss.str(); 00159 00160 return false; 00161 } 00162 } 00163 00164 return true; 00165 } 00166 00167 const char * 00168 libdap_root() 00169 { 00170 return LIBDAP_ROOT; 00171 #if 0 00172 // I've changed this because this could be used to get the library to 00173 // use a different compression function when it builds compressed 00174 // responses. The use of 'deflate' to compress responses should be 00175 // removed since Hyrax now uses Tomcat to perform this function. 00176 char *libdap_root = 0; 00177 return ((libdap_root = getenv("LIBDAP_ROOT")) ? libdap_root : LIBDAP_ROOT); 00178 #endif 00179 } 00180 00181 extern "C" 00182 const char * 00183 libdap_version() 00184 { 00185 return PACKAGE_VERSION; 00186 } 00187 00188 extern "C" 00189 const char * 00190 libdap_name() 00191 { 00192 return PACKAGE_NAME; 00193 } 00194 00195 // Since Server4 can get compressed responses using Tomcat, bail on this 00196 // software (which complicates building under Win32). It can be turned on 00197 // for use with Server3 in configure.ac. 00198 00199 #if COMPRESSION_FOR_SERVER3 00200 00201 // Return true if the program deflate exists and is executable by user, group 00202 // and world. If this returns false the caller should assume that server 00203 // filter programs won't be able to find the deflate program and thus won't 00204 // be able to compress the return document. 00205 // NB: this works because this function uses the same rules as compressor() 00206 // (which follows) to look for deflate. 2/11/98 jhrg 00207 00208 bool 00209 deflate_exists() 00210 { 00211 DBG(cerr << "Entering deflate_exists..."); 00212 00213 int status = false; 00214 struct stat buf; 00215 00216 #ifdef WIN32 00217 string deflate = (string)libdap_root() + "\\bin\\deflate"; 00218 #else 00219 string deflate = (string)libdap_root() + "/sbin/deflate"; 00220 #endif 00221 00222 // Check that the file exists... 00223 // First look for deflate using DODS_ROOT (compile-time constant subsumed 00224 // by an environment variable) and if that fails in the CWD which finds 00225 // the program when it is in the same directory as the dispatch script 00226 // and other server components. 2/11/98 jhrg 00227 status = (stat(deflate.c_str(), &buf) == 0) 00228 #ifdef WIN32 00229 || (stat(".\\deflate", &buf) == 0); 00230 #else 00231 || (stat("./deflate", &buf) == 0); 00232 #endif 00233 00234 // and that it can be executed. 00235 #ifdef WIN32 00236 status &= (buf.st_mode & _S_IEXEC); 00237 #else 00238 status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH); 00239 #endif 00240 DBG(cerr << " returning " << (status ? "true." : "false.") << endl); 00241 return (status != 0); 00242 } 00243 00244 FILE * 00245 compressor(FILE *output, int &childpid) 00246 { 00247 #ifdef WIN32 00248 // There is no such thing as a "fork" under win32. This makes it so that 00249 // we have to juggle handles more aggressively. This code hasn't been 00250 // tested and shown to work as of 07/2000. 00251 int pid, data[2]; 00252 int hStdIn, hStdOut; 00253 00254 if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) { 00255 cerr << "Could not create IPC channel for compressor process" 00256 << endl; 00257 return NULL; 00258 } 00259 00260 00261 // This sets up for the child process, but it has to be reversed for the 00262 // parent after the spawn takes place. 00263 00264 // Store stdin, stdout so we have something to restore to 00265 hStdIn = _dup(_fileno(stdin)); 00266 hStdOut = _dup(_fileno(stdout)); 00267 00268 // Child is to read from read end of pipe 00269 if (_dup2(data[0], _fileno(stdin)) != 0) { 00270 cerr << "dup of child stdin failed" << endl; 00271 return NULL; 00272 } 00273 // Child is to write its's stdout to file 00274 if (_dup2(_fileno(output), _fileno(stdout)) != 0) { 00275 cerr << "dup of child stdout failed" << endl; 00276 return NULL; 00277 } 00278 00279 // Spawn child process 00280 string deflate = "deflate.exe"; 00281 if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(), 00282 "-c", "5", "-s", NULL)) < 0) { 00283 cerr << "Could not spawn to create compressor process" << endl; 00284 return NULL; 00285 } 00286 00287 // Restore stdin, stdout for parent and close duplicate copies 00288 if (_dup2(hStdIn, _fileno(stdin)) != 0) { 00289 cerr << "dup of stdin failed" << endl; 00290 return NULL; 00291 } 00292 if (_dup2(hStdOut, _fileno(stdout)) != 0) { 00293 cerr << "dup of stdout failed" << endl; 00294 return NULL; 00295 } 00296 close(hStdIn); 00297 close(hStdOut); 00298 00299 // Tell the parent that it reads from the opposite end of the 00300 // place where the child writes. 00301 close(data[0]); 00302 FILE *input = fdopen(data[1], "w"); 00303 setbuf(input, 0); 00304 childpid = pid; 00305 return input; 00306 00307 #else 00308 FILE *ret_file = NULL ; 00309 00310 int pid, data[2]; 00311 00312 if (pipe(data) < 0) { 00313 cerr << "Could not create IPC channel for compressor process" 00314 << endl; 00315 return NULL; 00316 } 00317 00318 if ((pid = fork()) < 0) { 00319 cerr << "Could not fork to create compressor process" << endl; 00320 return NULL; 00321 } 00322 00323 // The parent process closes the write end of the Pipe, and creates a 00324 // FILE * using fdopen(). The FILE * is used by the calling program to 00325 // access the read end of the Pipe. 00326 00327 if (pid > 0) { // Parent, pid is that of the child 00328 close(data[0]); 00329 ret_file = fdopen(data[1], "w"); 00330 setbuf(ret_file, 0); 00331 childpid = pid; 00332 } 00333 else { // Child 00334 close(data[1]); 00335 dup2(data[0], 0); // Read from the pipe... 00336 dup2(fileno(output), 1); // Write to the FILE *output. 00337 00338 DBG(cerr << "Opening compression stream." << endl); 00339 00340 // First try to run deflate using DODS_ROOT (the value read from the 00341 // DODS_ROOT environment variable takes precedence over the value set 00342 // at build time. If that fails, try the CWD. 00343 string deflate = (string)libdap_root() + "/sbin/deflate"; 00344 (void) execl(deflate.c_str(), "deflate", "-c", "5", "-s", NULL); 00345 (void) execl("./deflate", "deflate", "-c", "5", "-s", NULL); 00346 cerr << "Warning: Could not start compressor!" << endl; 00347 cerr << "defalte should be in DODS_ROOT/etc or in the CWD!" 00348 << endl; 00349 _exit(127); // Only here if an error occurred. 00350 } 00351 00352 return ret_file ; 00353 #endif 00354 } 00355 00356 #endif // COMPRESSION_FOR_SERVER3 00357 00358 // This function returns a pointer to the system time formated for an httpd 00359 // log file. 00360 00361 string 00362 systime() 00363 { 00364 time_t TimBin; 00365 00366 if (time(&TimBin) == (time_t) - 1) 00367 return string("time() error"); 00368 else { 00369 string TimStr = ctime(&TimBin); 00370 return TimStr.substr(0, TimStr.size() - 2); // remove the \n 00371 } 00372 } 00373 00374 void 00375 downcase(string &s) 00376 { 00377 for (unsigned int i = 0; i < s.length(); i++) 00378 s[i] = tolower(s[i]); 00379 } 00380 00381 bool 00382 is_quoted(const string &s) 00383 { 00384 return (!s.empty() && s[0] == '\"' && s[s.length()-1] == '\"'); 00385 } 00386 00387 string 00388 remove_quotes(const string &s) 00389 { 00390 if (is_quoted(s)) 00391 return s.substr(1, s.length() - 2); 00392 else 00393 return s; 00394 } 00395 00396 #ifdef WIN32 00397 // Sometimes need to buffer within an iostream under win32 when 00398 // we want the output to go to a FILE *. This is because 00399 // it's not possible to associate an ofstream with a FILE * 00400 // under the Standard ANSI C++ Library spec. Unix systems 00401 // don't follow the spec in this regard. 00402 void flush_stream(iostream ios, FILE *out) 00403 { 00404 int nbytes; 00405 char buffer[512]; 00406 00407 ios.get(buffer, 512, NULL); 00408 while ((nbytes = ios.gcount()) > 0) { 00409 fwrite(buffer, 1, nbytes, out); 00410 ios.get(buffer, 512, NULL); 00411 } 00412 00413 return; 00414 } 00415 #endif 00416 00417 // Jose Garcia 00418 void 00419 append_long_to_string(long val, int base, string &str_val) 00420 { 00421 // The array digits contains 36 elements which are the 00422 // posible valid digits for out bases in the range 00423 // [2,36] 00424 char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 00425 // result of val / base 00426 ldiv_t r; 00427 00428 if (base > 36 || base < 2) { 00429 // no conversion if wrong base 00430 std::invalid_argument ex("The parameter base has an invalid value."); 00431 throw ex; 00432 } 00433 if (val < 0) 00434 str_val += '-'; 00435 r = ldiv(labs(val), base); 00436 00437 // output digits of val/base first 00438 if (r.quot > 0) 00439 append_long_to_string(r.quot, base, str_val); 00440 00441 // output last digit 00442 00443 str_val += digits[(int)r.rem]; 00444 } 00445 00446 // base defaults to 10 00447 string 00448 long_to_string(long val, int base) 00449 { 00450 string s; 00451 append_long_to_string(val, base, s); 00452 return s; 00453 } 00454 00455 // Jose Garcia 00456 void append_double_to_string(const double &num, string &str) 00457 { 00458 // s having 100 characters should be enough for sprintf to do its job. 00459 // I want to banish all instances of sprintf. 10/5/2001 jhrg 00460 ostringstream oss; 00461 oss.precision(9); 00462 oss << num; 00463 str += oss.str(); 00464 } 00465 00466 string 00467 double_to_string(const double &num) 00468 { 00469 string s; 00470 append_double_to_string(num, s); 00471 return s; 00472 } 00473 00474 // Get the version number of the core software. Defining this means that 00475 // clients of the DAP don't have to rely on config.h for the version 00476 // number. 00477 string 00478 dap_version() 00479 { 00480 return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ; 00481 } 00482 00483 // Given a pathname, return the file at the end of the path. This is used 00484 // when reporting errors (maybe other times, too) to keep the server from 00485 // revealing too much about its organization when sending error responses 00486 // back to clients. 10/11/2000 jhrg 00487 // MT-safe. 08/05/02 jhrg 00488 00489 #ifdef WIN32 00490 static const char path_sep[] = 00491 {"\\" 00492 }; 00493 #else 00494 static const char path_sep[] = 00495 {"/" 00496 }; 00497 #endif 00498 00499 string 00500 path_to_filename(string path) 00501 { 00502 string::size_type pos = path.rfind(path_sep); 00503 00504 return (pos == string::npos) ? path : path.substr(++pos); 00505 } 00506 00507 #if 0 00508 // Look around for a reasonable place to put a temporary file. Check first 00509 // the value of the TMPDIR env var. If that does not yeild a path that's 00510 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as 00511 // defined in stdio.h. If both come up empty, then use `./'. 00512 // 00513 // This function allocates storage using new. The caller must delete the char 00514 // array. 00515 00516 // Change this to a version that either returns a string or an open file 00517 // descriptor. Use information from https://buildsecurityin.us-cert.gov/ 00518 // (see open()) to make it more secure. Ideal solution: get deserialize() 00519 // methods to read from a stream returned by libcurl, not from a temporary 00520 // file. 9/21/07 jhrg 00521 char * 00522 get_tempfile_template(char *file_template) 00523 { 00524 char *c; 00525 00526 #ifdef WIN32 00527 // whitelist for a WIN32 directory 00528 Regex directory("[-a-zA-Z0-9_\\]*"); 00529 00530 c = getenv("TEMP"); 00531 if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)) 00532 goto valid_temp_directory; 00533 00534 c= getenv("TMP"); 00535 if (c && directory.match(c, strlen(c)) && (access(getenv("TEMP"), 6) == 0)) 00536 goto valid_temp_directory; 00537 #else 00538 00539 c = getenv("TMPDIR"); 00540 // Changed this so that it uses the pathname_ok() method instead 00541 // of using its own regex. jhrg 2/4/08 00542 if (c) { 00543 string tmpdir = *c; 00544 if (pathname_ok(tmpdir) && (access(c, W_OK | R_OK) == 0)) 00545 goto valid_temp_directory; 00546 } 00547 00548 #ifdef P_tmpdir 00549 if (access(P_tmpdir, W_OK | R_OK) == 0) { 00550 c = P_tmpdir; 00551 goto valid_temp_directory; 00552 } 00553 #endif 00554 00555 #endif // WIN32 00556 00557 c = "."; 00558 00559 valid_temp_directory: 00560 // Sanitize allocation 00561 int size = strlen(c) + strlen(file_template) + 2; 00562 if (!size_ok(1, size)) 00563 throw Error("Bad temporary file name."); 00564 00565 char *temp = new char[size]; 00566 strncpy(temp, c, size-2); 00567 strcat(temp, "/"); 00568 00569 strcat(temp, file_template); 00570 00571 return temp; 00572 } 00573 #endif 00574 #if 0 00575 00580 #ifndef WIN32 00581 FILE * 00582 get_temp_file(char *temp) 00583 { 00584 int fd = mkstemp(temp); 00585 if (fd < 0) 00586 return 0; 00587 FILE *tmp = fdopen(fd, "a+"); 00588 return tmp; 00589 } 00590 #endif 00591 #endif 00592 00596 string 00597 file_to_string(FILE *fp) 00598 { 00599 rewind(fp); 00600 ostringstream oss; 00601 char c; 00602 while (fread(&c, 1, 1, fp)) 00603 oss << c; 00604 return oss.str(); 00605 } 00606 00609 00615 bool 00616 size_ok(uint sz, uint nelem) 00617 { 00618 return (sz > 0 && nelem < UINT_MAX / sz); 00619 } 00620 00637 bool 00638 pathname_ok(const string &path, bool strict) 00639 { 00640 if (path.length() > 255) 00641 return false; 00642 00643 Regex name("[-0-9A-z_./]+"); 00644 if (!strict) 00645 name = "[:print:]+"; 00646 00647 string::size_type len = path.length(); 00648 int result = name.match(path.c_str(), len); 00649 // Protect against casting too big an uint to int 00650 // if LEN is bigger than the max int32, the second test can't work 00651 if (len > INT_MAX || result != static_cast<int>(len)) 00652 return false; 00653 00654 return true; 00655 } 00656 00658 00659 } // namespace libdap 00660