libdap++  Updated for version 3.8.2
util.cc
Go to the documentation of this file.
00001 
00002 // -*- mode: c++; c-basic-offset:4 -*-
00003 
00004 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
00005 // Access Protocol.
00006 
00007 // Copyright (c) 2002,2003 OPeNDAP, Inc.
00008 // Author: James Gallagher <jgallagher@opendap.org>
00009 //
00010 // This library is free software; you can redistribute it and/or
00011 // modify it under the terms of the GNU Lesser General Public
00012 // License as published by the Free Software Foundation; either
00013 // version 2.1 of the License, or (at your option) any later version.
00014 //
00015 // This library is distributed in the hope that it will be useful,
00016 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018 // Lesser General Public License for more details.
00019 //
00020 // You should have received a copy of the GNU Lesser General Public
00021 // License along with this library; if not, write to the Free Software
00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 //
00024 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
00025 
00026 // (c) COPYRIGHT URI/MIT 1994-1999
00027 // Please read the full copyright statement in the file COPYRIGHT_URI.
00028 //
00029 // Authors:
00030 //      jhrg,jimg       James Gallagher <jgallagher@gso.uri.edu>
00031 
00032 // Utility functions used by the api.
00033 //
00034 // jhrg 9/21/94
00035 
00036 #include "config.h"
00037 
00038 static char rcsid[] not_used =
00039     {"$Id: util.cc 25112 2011-12-29 21:44:54Z jimg $"
00040     };
00041 
00042 #include <cassert>
00043 #include <cstring>
00044 
00045 #include <ctype.h>
00046 #ifndef TM_IN_SYS_TIME
00047 #include <time.h>
00048 #else
00049 #include <sys/time.h>
00050 #endif
00051 
00052 #ifndef WIN32
00053 #include <unistd.h>    // for stat
00054 #else
00055 #include <io.h>
00056 #include <fcntl.h>
00057 #include <process.h>
00058 #endif
00059 
00060 #include <sys/types.h>
00061 #include <sys/stat.h>
00062 
00063 #include <string>
00064 #include <sstream>
00065 #include <vector>
00066 #include <algorithm>
00067 #include <stdexcept>
00068 
00069 #include "BaseType.h"
00070 #include "Str.h"
00071 #include "Url.h"
00072 #include "Sequence.h"
00073 #include "Error.h"
00074 #include "parser.h"
00075 #include "util.h"
00076 #include "GNURegex.h"
00077 #include "debug.h"
00078 
00079 using namespace std;
00080 
00081 namespace libdap {
00082 
00083 // Remove spaces from the start of a URL and from the start of any constraint
00084 // expression it contains. 4/7/98 jhrg
00085 
00094 string
00095 prune_spaces(const string &name)
00096 {
00097     // If the URL does not even have white space return.
00098     if (name.find_first_of(' ') == name.npos)
00099         return name;
00100     else {
00101         // Strip leading spaces from http://...
00102         unsigned int i = name.find_first_not_of(' ');
00103         string tmp_name = name.substr(i);
00104 
00105         // Strip leading spaces from constraint part (following `?').
00106         unsigned int j = tmp_name.find('?') + 1;
00107         i = tmp_name.find_first_not_of(' ', j);
00108         tmp_name.erase(j, i - j);
00109 
00110         return tmp_name;
00111     }
00112 }
00113 
00114 // Compare elements in a list of (BaseType *)s and return true if there are
00115 // no duplicate elements, otherwise return false.
00116 
00117 bool
00118 unique_names(vector<BaseType *> l, const string &var_name,
00119              const string &type_name, string &msg)
00120 {
00121     // copy the identifier names to a vector
00122     vector<string> names(l.size());
00123 
00124     int nelem = 0;
00125     typedef std::vector<BaseType *>::const_iterator citer ;
00126     for (citer i = l.begin(); i != l.end(); i++) {
00127         assert(*i);
00128         names[nelem++] = (*i)->name();
00129         DBG(cerr << "NAMES[" << nelem - 1 << "]=" << names[nelem-1] << endl);
00130     }
00131 
00132     // sort the array of names
00133     sort(names.begin(), names.end());
00134 
00135 #ifdef DODS_DEBUG2
00136     cout << "unique:" << endl;
00137     for (int ii = 0; ii < nelem; ++ii)
00138         cout << "NAMES[" << ii << "]=" << names[ii] << endl;
00139 #endif
00140 
00141     // sort the array of names
00142     sort(names.begin(), names.end());
00143 
00144 #ifdef DODS_DEBUG2
00145     cout << "unique:" << endl;
00146     for (int ii = 0; ii < nelem; ++ii)
00147         cout << "NAMES[" << ii << "]=" << names[ii] << endl;
00148 #endif
00149 
00150     // look for any instance of consecutive names that are ==
00151     for (int j = 1; j < nelem; ++j) {
00152         if (names[j-1] == names[j]) {
00153             ostringstream oss;
00154             oss << "The variable `" << names[j]
00155             << "' is used more than once in " << type_name << " `"
00156             << var_name << "'";
00157             msg = oss.str();
00158 
00159             return false;
00160         }
00161     }
00162 
00163     return true;
00164 }
00165 
00166 const char *
00167 libdap_root()
00168 {
00169     return LIBDAP_ROOT;
00170 }
00171 
00172 extern "C"
00173     const char *
00174     libdap_version()
00175 {
00176     return PACKAGE_VERSION;
00177 }
00178 
00179 extern "C"
00180     const char *
00181     libdap_name()
00182 {
00183     return PACKAGE_NAME;
00184 }
00185 
00186 // Since Server4 can get compressed responses using Tomcat, bail on this
00187 // software (which complicates building under Win32). It can be turned on
00188 // for use with Server3 in configure.ac.
00189 
00190 #if COMPRESSION_FOR_SERVER3
00191 
00192 // Return true if the program deflate exists and is executable by user, group
00193 // and world. If this returns false the caller should assume that server
00194 // filter programs won't be able to find the deflate program and thus won't
00195 // be able to compress the return document.
00196 // NB: this works because this function uses the same rules as compressor()
00197 // (which follows) to look for deflate. 2/11/98 jhrg
00198 
00199 bool
00200 deflate_exists()
00201 {
00202     DBG(cerr << "Entering deflate_exists...");
00203 
00204     int status = false;
00205     struct stat buf;
00206 
00207 #ifdef WIN32
00208     string deflate = (string)libdap_root() + "\\bin\\deflate";
00209 #else
00210     string deflate = (string)libdap_root() + "/sbin/deflate";
00211 #endif
00212 
00213     // Check that the file exists...
00214     // First look for deflate using DODS_ROOT (compile-time constant subsumed
00215     // by an environment variable) and if that fails in the CWD which finds
00216     // the program when it is in the same directory as the dispatch script
00217     // and other server components. 2/11/98 jhrg
00218     status = (stat(deflate.c_str(), &buf) == 0)
00219 #ifdef WIN32
00220              || (stat(".\\deflate", &buf) == 0);
00221 #else
00222              || (stat("./deflate", &buf) == 0);
00223 #endif
00224 
00225     // and that it can be executed.
00226 #ifdef WIN32
00227     status &= (buf.st_mode & _S_IEXEC);
00228 #else
00229     status &= buf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH);
00230 #endif
00231     DBG(cerr << " returning " << (status ? "true." : "false.") << endl);
00232     return (status != 0);
00233 }
00234 
00235 FILE *
00236 compressor(FILE *output, int &childpid)
00237 {
00238 #ifdef WIN32
00239     //  There is no such thing as a "fork" under win32. This makes it so that
00240     //  we have to juggle handles more aggressively. This code hasn't been
00241     //  tested and shown to work as of 07/2000.
00242     int pid, data[2];
00243     int hStdIn, hStdOut;
00244 
00245     if (_pipe(data, 512, O_BINARY | O_NOINHERIT) < 0) {
00246         cerr << "Could not create IPC channel for compressor process"
00247         << endl;
00248         return NULL;
00249     }
00250 
00251 
00252     // This sets up for the child process, but it has to be reversed for the
00253     // parent after the spawn takes place.
00254 
00255     // Store stdin, stdout so we have something to restore to
00256     hStdIn  = _dup(_fileno(stdin));
00257     hStdOut = _dup(_fileno(stdout));
00258 
00259     // Child is to read from read end of pipe
00260     if (_dup2(data[0], _fileno(stdin)) != 0) {
00261         cerr << "dup of child stdin failed" << endl;
00262         return NULL;
00263     }
00264     // Child is to write its's stdout to file
00265     if (_dup2(_fileno(output), _fileno(stdout)) != 0) {
00266         cerr << "dup of child stdout failed" << endl;
00267         return NULL;
00268     }
00269 
00270     // Spawn child process
00271     string deflate = "deflate.exe";
00272     if ((pid = _spawnlp(_P_NOWAIT, deflate.c_str(), deflate.c_str(),
00273                         "-c", "5", "-s", NULL)) < 0) {
00274         cerr << "Could not spawn to create compressor process" << endl;
00275         return NULL;
00276     }
00277 
00278     // Restore stdin, stdout for parent and close duplicate copies
00279     if (_dup2(hStdIn, _fileno(stdin)) != 0) {
00280         cerr << "dup of stdin failed" << endl;
00281         return NULL;
00282     }
00283     if (_dup2(hStdOut, _fileno(stdout)) != 0) {
00284         cerr << "dup of stdout failed" << endl;
00285         return NULL;
00286     }
00287     close(hStdIn);
00288     close(hStdOut);
00289 
00290     // Tell the parent that it reads from the opposite end of the
00291     // place where the child writes.
00292     close(data[0]);
00293     FILE *input = fdopen(data[1], "w");
00294     setbuf(input, 0);
00295     childpid = pid;
00296     return input;
00297 
00298 #else
00299     FILE *ret_file = NULL ;
00300 
00301     int pid, data[2];
00302 
00303     if (pipe(data) < 0) {
00304         cerr << "Could not create IPC channel for compressor process"
00305         << endl;
00306         return NULL;
00307     }
00308 
00309     if ((pid = fork()) < 0) {
00310         cerr << "Could not fork to create compressor process" << endl;
00311         return NULL;
00312     }
00313 
00314     // The parent process closes the write end of the Pipe, and creates a
00315     // FILE * using fdopen(). The FILE * is used by the calling program to
00316     // access the read end of the Pipe.
00317 
00318     if (pid > 0) {   // Parent, pid is that of the child
00319         close(data[0]);
00320         ret_file = fdopen(data[1], "w");
00321         setbuf(ret_file, 0);
00322         childpid = pid;
00323     }
00324     else {   // Child
00325         close(data[1]);
00326         dup2(data[0], 0); // Read from the pipe...
00327         dup2(fileno(output), 1); // Write to the FILE *output.
00328 
00329         DBG(cerr << "Opening compression stream." << endl);
00330 
00331         // First try to run deflate using DODS_ROOT (the value read from the
00332         // DODS_ROOT environment variable takes precedence over the value set
00333         // at build time. If that fails, try the CWD.
00334         string deflate = (string)libdap_root() + "/sbin/deflate";
00335         (void) execl(deflate.c_str(), "deflate", "-c",  "5", "-s", NULL);
00336         (void) execl("./deflate", "deflate", "-c",  "5", "-s", NULL);
00337         cerr << "Warning: Could not start compressor!" << endl;
00338         cerr << "defalte should be in DODS_ROOT/etc or in the CWD!"
00339         << endl;
00340         _exit(127);  // Only here if an error occurred.
00341     }
00342 
00343     return ret_file ;
00344 #endif
00345 }
00346 
00347 #endif // COMPRESSION_FOR_SERVER3
00348 
00349 // This function returns a pointer to the system time formated for an httpd
00350 // log file.
00351 
00352 string
00353 systime()
00354 {
00355     time_t TimBin;
00356 
00357     if (time(&TimBin) == (time_t) - 1)
00358         return string("time() error");
00359     else {
00360         string TimStr = ctime(&TimBin);
00361         return TimStr.substr(0, TimStr.size() - 2); // remove the \n
00362     }
00363 }
00364 
00365 void
00366 downcase(string &s)
00367 {
00368     for (unsigned int i = 0; i < s.length(); i++)
00369         s[i] = tolower(s[i]);
00370 }
00371 
00372 bool
00373 is_quoted(const string &s)
00374 {
00375     return (!s.empty() && s[0] == '\"' && s[s.length()-1] == '\"');
00376 }
00377 
00378 string
00379 remove_quotes(const string &s)
00380 {
00381     if (is_quoted(s))
00382         return s.substr(1, s.length() - 2);
00383     else
00384         return s;
00385 }
00386 
00387 #ifdef WIN32
00388 //  Sometimes need to buffer within an iostream under win32 when
00389 //  we want the output to go to a FILE *.  This is because
00390 //  it's not possible to associate an ofstream with a FILE *
00391 //  under the Standard ANSI C++ Library spec.  Unix systems
00392 //  don't follow the spec in this regard.
00393 void flush_stream(iostream ios, FILE *out)
00394 {
00395     int nbytes;
00396     char buffer[512];
00397 
00398     ios.get(buffer, 512, NULL);
00399     while ((nbytes = ios.gcount()) > 0) {
00400         fwrite(buffer, 1, nbytes, out);
00401         ios.get(buffer, 512, NULL);
00402     }
00403 
00404     return;
00405 }
00406 #endif
00407 
00408 // Jose Garcia
00409 void
00410 append_long_to_string(long val, int base, string &str_val)
00411 {
00412     // The array digits contains 36 elements which are the
00413     // posible valid digits for out bases in the range
00414     // [2,36]
00415     char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
00416     // result of val / base
00417     ldiv_t r;
00418 
00419     if (base > 36 || base < 2) {
00420         // no conversion if wrong base
00421         std::invalid_argument ex("The parameter base has an invalid value.");
00422         throw ex;
00423     }
00424     if (val < 0)
00425         str_val += '-';
00426     r = ldiv(labs(val), base);
00427 
00428     // output digits of val/base first
00429     if (r.quot > 0)
00430         append_long_to_string(r.quot, base, str_val);
00431 
00432     // output last digit
00433 
00434     str_val += digits[(int)r.rem];
00435 }
00436 
00437 // base defaults to 10
00438 string
00439 long_to_string(long val, int base)
00440 {
00441     string s;
00442     append_long_to_string(val, base, s);
00443     return s;
00444 }
00445 
00446 // Jose Garcia
00447 void append_double_to_string(const double &num, string &str)
00448 {
00449     // s having 100 characters should be enough for sprintf to do its job.
00450     // I want to banish all instances of sprintf. 10/5/2001 jhrg
00451     ostringstream oss;
00452     oss.precision(9);
00453     oss << num;
00454     str += oss.str();
00455 }
00456 
00457 string
00458 double_to_string(const double &num)
00459 {
00460     string s;
00461     append_double_to_string(num, s);
00462     return s;
00463 }
00464 
00465 // Get the version number of the core software. Defining this means that
00466 // clients of the DAP don't have to rely on config.h for the version
00467 // number.
00468 string
00469 dap_version()
00470 {
00471     return (string)"OPeNDAP DAP/" + libdap_version() + ": compiled on " + __DATE__ + ":" + __TIME__ ;
00472 }
00473 
00474 // Given a pathname, return the file at the end of the path. This is used
00475 // when reporting errors (maybe other times, too) to keep the server from
00476 // revealing too much about its organization when sending error responses
00477 // back to clients. 10/11/2000 jhrg
00478 // MT-safe. 08/05/02 jhrg
00479 
00480 #ifdef WIN32
00481 static const char path_sep[] =
00482     {"\\"
00483     };
00484 #else
00485 static const char path_sep[] =
00486     {"/"
00487     };
00488 #endif
00489 
00490 string
00491 path_to_filename(string path)
00492 {
00493     string::size_type pos = path.rfind(path_sep);
00494 
00495     return (pos == string::npos) ? path : path.substr(++pos);
00496 }
00497 
00502 string
00503 file_to_string(FILE *fp)
00504 {
00505     rewind(fp);
00506     ostringstream oss;
00507     char c;
00508     while (fread(&c, 1, 1, fp))
00509         oss << c;
00510     return oss.str();
00511 }
00512 
00513 int
00514 wildcmp(const char *wild, const char *string)
00515 {
00516   // Written by Jack Handy - jakkhandy@hotmail.com
00517 
00518   if (!wild || !string)
00519       return 0;
00520 
00521   const char *cp = NULL, *mp = NULL;
00522 
00523   while ((*string) && (*wild != '*')) {
00524     if ((*wild != *string) && (*wild != '?')) {
00525       return 0;
00526     }
00527     wild++;
00528     string++;
00529   }
00530 
00531   while (*string) {
00532     if (*wild == '*') {
00533       if (!*++wild) {
00534         return 1;
00535       }
00536       mp = wild;
00537       cp = string+1;
00538     } else if ((*wild == *string) || (*wild == '?')) {
00539       wild++;
00540       string++;
00541     } else {
00542       wild = mp;
00543       string = cp++;
00544     }
00545   }
00546 
00547   while (*wild == '*') {
00548     wild++;
00549   }
00550   return !*wild;
00551 }
00552 
00553 #define CHECK_BIT( tab, bit ) ( tab[ (bit)/8 ] & (1<<( (bit)%8 )) )
00554 #define BITLISTSIZE 16 /* bytes used for [chars] in compiled expr */
00555 
00556 static void globchars( const char *s, const char *e, char *b );
00557 
00558 /*
00559  * glob:  match a string against a simple pattern
00560  *
00561  * Understands the following patterns:
00562  *
00563  *  *   any number of characters
00564  *  ?   any single character
00565  *  [a-z]   any single character in the range a-z
00566  *  [^a-z]  any single character not in the range a-z
00567  *  \x  match x
00568  *
00569  * @param c The pattern
00570  * @param s The string
00571  * @return 0 on success, -1 if the pattern is exhausted but there are
00572  * characters remaining in the string and 1 if the pattern does not match
00573  */
00574 
00575 int
00576 glob(const char *c, const char *s)
00577 {
00578     if (!c || !s)
00579         return 1;
00580 
00581     char bitlist[BITLISTSIZE];
00582     int i = 0;
00583     for (;;) {
00584         ++i;
00585         switch (*c++) {
00586         case '\0':
00587             return *s ? -1 : 0;
00588 
00589         case '?':
00590             if (!*s++)
00591                 return i/*1*/;
00592             break;
00593 
00594         case '[': {
00595             /* scan for matching ] */
00596 
00597             const char *here = c;
00598             do {
00599                 if (!*c++)
00600                     return i/*1*/;
00601             } while (here == c || *c != ']');
00602             c++;
00603 
00604             /* build character class bitlist */
00605 
00606             globchars(here, c, bitlist);
00607 
00608             if (!CHECK_BIT( bitlist, *(unsigned char *)s ))
00609                 return i/*1*/;
00610             s++;
00611             break;
00612         }
00613 
00614         case '*': {
00615             const char *here = s;
00616 
00617             while (*s)
00618                 s++;
00619 
00620             /* Try to match the rest of the pattern in a recursive */
00621             /* call.  If the match fails we'll back up chars, retrying. */
00622 
00623             while (s != here) {
00624                 int r;
00625 
00626                 /* A fast path for the last token in a pattern */
00627 
00628                 r = *c ? glob(c, s) : *s ? -1 : 0;
00629 
00630                 if (!r)
00631                     return 0;
00632                 else if (r < 0)
00633                     return i/*1*/;
00634 
00635                 --s;
00636             }
00637             break;
00638         }
00639 
00640         case '\\':
00641             /* Force literal match of next char. */
00642 
00643             if (!*c || *s++ != *c++)
00644                 return i/*1*/;
00645             break;
00646 
00647         default:
00648             if (*s++ != c[-1])
00649                 return i/*1*/;
00650             break;
00651         }
00652     }
00653 }
00654 
00655 /*
00656  * globchars() - build a bitlist to check for character group match
00657  */
00658 
00659 static void globchars(const char *s, const char *e, char *b) {
00660     int neg = 0;
00661 
00662     memset(b, '\0', BITLISTSIZE);
00663 
00664     if (*s == '^')
00665         neg++, s++;
00666 
00667     while (s < e) {
00668         int c;
00669 
00670         if (s + 2 < e && s[1] == '-') {
00671             for (c = s[0]; c <= s[2]; c++)
00672                 b[c / 8] |= (1 << (c % 8));
00673             s += 3;
00674         }
00675         else {
00676             c = *s++;
00677             b[c / 8] |= (1 << (c % 8));
00678         }
00679     }
00680 
00681     if (neg) {
00682         int i;
00683         for (i = 0; i < BITLISTSIZE; i++)
00684             b[i] ^= 0377;
00685     }
00686 
00687     /* Don't include \0 in either $[chars] or $[^chars] */
00688 
00689     b[0] &= 0376;
00690 }
00691 
00692 int wmatch(const char *pat, const char *s)
00693 {
00694     if (!pat || !s)
00695         return 0;
00696 
00697   switch (*pat) {
00698     case '\0': return (*s == '\0');
00699     case '?': return (*s != '\0') && wmatch(pat+1, s+1);
00700     case '*': return wmatch(pat+1, s) || (*s != '\0' && wmatch(pat, s+1));
00701     default: return (*s == *pat) && wmatch(pat+1, s+1);
00702   }
00703 }
00704 
00707 
00713 bool
00714 size_ok(unsigned int sz, unsigned int nelem)
00715 {
00716     return (sz > 0 && nelem < UINT_MAX / sz);
00717 }
00718 
00735 bool
00736 pathname_ok(const string &path, bool strict)
00737 {
00738     if (path.length() > 255)
00739         return false;
00740 
00741     Regex name("[-0-9A-z_./]+");
00742     if (!strict)
00743         name = "[:print:]+";
00744 
00745     string::size_type len = path.length();
00746     int result = name.match(path.c_str(), len);
00747     // Protect against casting too big an uint to int
00748     // if LEN is bigger than the max int32, the second test can't work
00749     if (len > INT_MAX || result != static_cast<int>(len))
00750         return false;
00751 
00752     return true;
00753 }
00754 
00756 
00757 } // namespace libdap
00758