WvStreams
wvurl.cc
00001 /*
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  * 
00005  * WvUrl is a simple URL-parsing class with built-in (though still somewhat
00006  * inconvenient) DNS resolution.
00007  * 
00008  * See wvurl.h.
00009  */ 
00010 #include "wvurl.h"
00011 #include "strutils.h"
00012 
00013 // A static list of the default ports for each protocol.
00014 struct DefaultPort
00015 {
00016     const char *proto;
00017     int port;
00018     bool uses_slashes;
00019 };
00020 
00021 // The protocols must be arranged from longest to shortest because they're
00022 // compared with strncmp, so "https://" will also match http.
00023 static DefaultPort portmap[] = {
00024     { "exchangeits", 7070, false },
00025     { "exchangeit", 6969, false },
00026     { "https", 443, true },
00027     { "http", 80, true },
00028     { "file", 0, true },
00029     { "sip", 5060, false },
00030     { "ftp", 21, true },
00031     { NULL, 0 }
00032 };
00033 
00034 // Look up the protocol and return the default port.
00035 static int get_default_port(WvString proto)
00036 {
00037     DefaultPort *p = portmap;
00038     for (p = portmap; p->proto != NULL; p++)
00039     {
00040         if (strncmp(p->proto, proto, strlen(p->proto)) == 0)
00041             return p->port;
00042     }
00043     return -1;
00044 }
00045 
00046 // Look up the protocol and decide whether it uses slashes (http) or not (sip)
00047 // A check of rfc2396 shows that the URI standard actually distinguishes
00048 // these: 'hierarchical' vs. 'opaque'.
00049 static bool protocol_uses_slashes(WvString proto)
00050 {
00051     DefaultPort *p = portmap;
00052     for (p = portmap; p->proto != NULL; p++)
00053     {
00054         if (strncmp(p->proto, proto, strlen(p->proto)) == 0)
00055             return p->uses_slashes;
00056     }
00057     return false;
00058 }
00059 
00060 // Split up the URL into a hostname, a port, and the rest of it.
00061 WvUrl::WvUrl(WvStringParm url) : err("No error")
00062 {
00063     WvString work(url);
00064     char *cptr, *wptr = work.edit();
00065     
00066     port = 0; // error condition by default
00067     addr = NULL;
00068     resolving = true;
00069     
00070     // deal with extra whitespace.
00071     wptr = trim_string(wptr);
00072     cptr = wptr + strcspn(wptr, " \t\r\n");
00073     *cptr = 0;
00074 
00075     // if it's not one of these easy prefixes, give up.  Our URL parser is
00076     // pretty dumb.
00077     if (get_default_port(wptr) < 0)
00078     {
00079         err = "WvUrl cannot handle the given protocol.";
00080         return;
00081     }
00082 
00083     cptr = strchr(wptr, ':');
00084     if (!cptr)
00085     {
00086         err = "No colon after the protocol.";
00087         return;
00088     }
00089     *cptr = 0;
00090     proto = wptr;
00091 
00092     bool use_slashes = protocol_uses_slashes(proto);
00093     wptr = cptr + (use_slashes ? 3 : 1);
00094 
00095     cptr = strchr(wptr, '@');
00096     if (!cptr) // no user given
00097     {
00098         user = "";
00099         password = "";
00100     }
00101     else
00102     {
00103         *cptr = 0;
00104         char *cptr2 = strchr(wptr, ':');
00105         if (cptr2 && (*(cptr2+1) != 0))
00106         {
00107             *cptr2 = 0;
00108             password = cptr2 + 1;
00109         }
00110         else
00111             password = ""; 
00112         user = wptr;
00113         wptr = cptr + 1;
00114     }
00115     
00116     cptr = strchr(wptr, '/');
00117     if (!cptr) // no path given
00118         file = use_slashes ? "/" : "";
00119     else
00120     {
00121         file = cptr;
00122         *cptr = 0;
00123     }
00124     
00125     cptr = strchr(wptr, ':');
00126     if (!cptr)
00127         port = get_default_port(proto);
00128     else
00129     {
00130         port = atoi(cptr+1);
00131         *cptr = 0;
00132     }
00133 
00134     hostname = wptr;
00135 
00136     resolve();
00137 }
00138 
00139 
00140 WvUrl::WvUrl(const WvUrl &url) : err("No error")
00141 {
00142     addr = NULL;
00143     resolving = true;
00144     
00145     proto = url.proto;
00146     user = url.user;
00147     password = url.password;
00148     hostname = url.hostname;
00149     file = url.file;
00150     port = url.port;
00151 
00152     resolve();
00153 }
00154 
00155 
00156 WvUrl::~WvUrl()
00157 {
00158     if (addr) delete addr;
00159 }
00160 
00161 
00162 bool WvUrl::resolve()
00163 {
00164     const WvIPAddr *ip;
00165     int numaddrs;
00166     
00167     numaddrs = dns.findaddr(0, hostname, &ip);
00168     if (!numaddrs) // error condition
00169     {
00170         err = WvString("Host '%s' could not be found.", hostname);
00171         resolving = false;
00172         return false;
00173     }
00174     else if (numaddrs < 0) // still waiting
00175     {
00176         resolving = true;
00177         return false;
00178     }
00179     else // got at least one address
00180     {
00181         resolving = false;
00182         if (addr) delete addr;
00183         addr = new WvIPPortAddr(*ip, port);
00184         return true;
00185     }
00186 }
00187 
00188 
00189 // Print out the URL, using the port name (if it's not 80), and either the 
00190 // hostname (if we know it) or the address (if we know that instead.)
00191 WvUrl::operator WvString () const
00192 {
00193     if (!isok())
00194         return WvString("(Invalid URL: %s)", err);
00195 
00196     WvString protostr;
00197     if (protocol_uses_slashes(proto))
00198         protostr = WvString("%s://", proto);
00199     else
00200         protostr = WvString("%s:", proto);
00201     WvString userstr("");
00202     if (user && user.len() != 0)
00203     {
00204         userstr = WvString("%s", user);
00205         if (password && password.len() != 0)
00206             userstr.append(WvString(":%s@", password));
00207         else
00208             userstr.append("@");
00209     }
00210     WvString portstr("");
00211     if (port && port != get_default_port(proto))
00212         portstr = WvString(":%s", port);
00213     if (hostname)
00214         return WvString("%s%s%s%s%s", protostr, userstr, hostname, portstr, file);
00215     else if (addr)
00216         return WvString("%s%s%s%s%s", protostr, userstr, *addr, portstr, file);
00217     else
00218     {
00219         assert(0);
00220         return WvString("(Invalid URL)");
00221     }
00222 }
00223 
00224