WvStreams
wvhttppool.h
00001 /* -*- Mode: C++ -*-
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  *
00005  * A fast, easy-to-use, parallelizing, pipelining HTTP/1.1 file retriever.
00006  * 
00007  * Just create a WvHttpPool object, add it to your list, and use pool.addurl()
00008  * to get a WvStream* that gives you the file you requested.
00009  */ 
00010 #ifndef __WVHTTPPOOL_H
00011 #define __WVHTTPPOOL_H
00012 
00013 #include "ftpparse.h"
00014 #include "wvurl.h"
00015 #include "wvistreamlist.h"
00016 #include "wvstreamclone.h"
00017 #include "wvlog.h"
00018 #include "wvhashtable.h"
00019 #include "wvbufstream.h"
00020 #include "wvbuf.h"
00021 #include "wvcont.h"
00022 #include "wvtcp.h"
00023 
00024 class WvBufUrlStream;
00025 class WvUrlStream;
00026 class WvHttpStream;
00027 
00028 static const WvString DEFAULT_ANON_PW("weasels@");
00029 
00030 struct WvHTTPHeader
00031 {
00032     WvString name, value;
00033     
00034     WvHTTPHeader(WvStringParm _name, WvStringParm _value)
00035         : name(_name), value(_value) 
00036                 {}
00037 };
00038 
00039 
00040 DeclareWvDict(WvHTTPHeader, WvString, name);
00041 
00042 
00043 class WvUrlRequest
00044 {
00045 public:
00046     WvUrl url;
00047     WvString headers;
00048     WvUrlStream *instream;
00049     WvBufUrlStream *outstream;
00050     WvStream *putstream;
00051 
00052     bool pipeline_test;
00053     bool inuse;
00054     bool is_dir;
00055     bool create_dirs;
00056     WvString method;
00057     
00058     WvUrlRequest(WvStringParm _url, WvStringParm _method, WvStringParm _headers,
00059                  WvStream *content_source, bool _create_dirs, bool _pipeline_test);
00060     ~WvUrlRequest();
00061     
00062     void done();
00063 };
00064 
00065 DeclareWvList(WvUrlRequest);
00066 
00067 
00068 struct WvUrlLink
00069 {
00070     WvString linkname;
00071     WvUrl url;
00072 
00073     WvUrlLink(WvStringParm _linkname, WvStringParm _url)
00074         : linkname(_linkname), url(_url)
00075     {}
00076 };
00077 DeclareWvList(WvUrlLink);
00078 
00079 
00080 class WvBufUrlStream : public WvBufStream
00081 {
00082 public:
00083     WvString url;
00084     WvString proto;
00085     WvUrlLinkList links;  // HTML links or FTP directory listing
00086 
00087     // HTTP stuff...
00088     WvString version;
00089     int status;
00090     WvHTTPHeaderDict headers; 
00091 
00092     WvBufUrlStream() : status(0), headers(10)
00093         {}
00094     virtual ~WvBufUrlStream()
00095         {}
00096 
00097 public:
00098     const char *wstype() const { return "WvBufUrlStream"; }
00099 };
00100 
00101 DeclareWvTable(WvIPPortAddr);
00102 
00103 
00104 class WvUrlStream : public WvStreamClone
00105 {
00106 public:
00107     class Target
00108     {
00109     public:
00110         WvIPPortAddr remaddr;
00111         WvString username;
00112 
00113         Target(const WvIPPortAddr &_remaddr, WvStringParm _username)
00114             : remaddr(_remaddr), username(_username) {}
00115 
00116         ~Target() {}
00117 
00118         bool operator== (const Target &n2) const
00119         { return (username == n2.username && remaddr == n2.remaddr); }
00120     };
00121     Target target;
00122     static int max_requests;
00123 
00124 protected:
00125     WvLog log;
00126     WvUrlRequestList urls, waiting_urls;
00127     int request_count;
00128     WvUrlRequest *curl; // current url
00129     virtual void doneurl() = 0;
00130     virtual void request_next() = 0;
00131 
00132 public:
00133     WvUrlStream(const WvIPPortAddr &_remaddr, WvStringParm _username, 
00134         WvStringParm logname)
00135         : WvStreamClone(new WvTCPConn(_remaddr)), target(_remaddr, _username),
00136           log(logname, WvLog::Debug)
00137     {
00138         request_count = 0;
00139         curl = NULL;
00140     }
00141 
00142     virtual ~WvUrlStream() {};
00143 
00144     virtual void close() = 0;
00145     void addurl(WvUrlRequest *url);
00146     void delurl(WvUrlRequest *url);
00147     // only implemented in WvHttpStream
00148     virtual size_t remaining()
00149     { return 0; }
00150     
00151     virtual void execute() = 0;
00152     
00153 public:
00154     const char *wstype() const { return "WvUrlStream"; }
00155 };
00156 
00157 unsigned WvHash(const WvUrlStream::Target &n);
00158 
00159 DeclareWvDict(WvUrlStream, WvUrlStream::Target, target);
00160 
00161 
00162 class WvHttpStream : public WvUrlStream
00163 {
00164 public:
00165     static bool global_enable_pipelining;
00166     bool enable_pipelining;
00167     
00168 private:
00169     int pipeline_test_count;
00170     bool ssl;
00171     bool sent_url_request;      // Have we sent a request to the server yet?
00172     WvIPPortAddrTable &pipeline_incompatible;
00173     WvString http_response, pipeline_test_response;
00174     WvDynBuf putstream_data;
00175     
00176     enum { Unknown, Chunked, ContentLength, Infinity,
00177            PostHeadInfinity, PostHeadChunked, PostHeadStream,
00178            ChuckInfinity, ChuckChunked, ChuckStream } encoding;
00179     size_t bytes_remaining;
00180     bool in_chunk_trailer, last_was_pipeline_test, in_doneurl;
00181 
00182     virtual void doneurl();
00183     virtual void request_next();
00184     void start_pipeline_test(WvUrl *url);
00185     WvString request_str(WvUrlRequest *url, bool keep_alive);
00186     void send_request(WvUrlRequest *url);
00187     void pipelining_is_broken(int why);
00188     
00189 public:
00190     WvHttpStream(const WvIPPortAddr &_remaddr, WvStringParm _username,
00191          bool ssl, WvIPPortAddrTable &_pipeline_incompatible);
00192     virtual ~WvHttpStream();
00193 
00194     virtual void close();
00195     virtual void pre_select(SelectInfo &si);
00196     virtual bool post_select(SelectInfo &si);
00197     virtual void execute();
00198     virtual size_t remaining()
00199     { return bytes_remaining; }
00200     
00201 public:
00202     const char *wstype() const { return "WvHttpStream"; }
00203 };
00204 
00205 
00206 class WvFtpStream : public WvUrlStream
00207 {
00208     bool logged_in, pasv_acked;
00209     WvString password;
00210     WvTCPConn *data;
00211     time_t last_request_time;
00212     bool sure;
00213 
00214     virtual void doneurl();
00215     virtual void request_next();
00216 
00217     // Disregard all lines that are of the form "xxx-", meaning that another
00218     // line follows.  Only the last line is important for us.
00219     char *get_important_line();
00220 
00221     // Parse response to "PASV" command and returns a pointer to the address
00222     // of the data port (or NULL if it can't parse the response)..
00223     // This mucks about with line.
00224     WvIPPortAddr *parse_pasv_response(char *line);
00225 
00226     WvString parse_for_links(char *line);
00227 
00228     WvCont cont;
00229     void* real_execute(void*);
00230 
00231 public:
00232     WvFtpStream(const WvIPPortAddr &_remaddr, WvStringParm _username,
00233                 WvStringParm _password);
00234 
00235     virtual void pre_select(SelectInfo &si);
00236     virtual bool post_select(SelectInfo &si);
00237     virtual void close();
00238     virtual void execute();
00239     
00240 public:
00241     const char *wstype() const { return "WvFtpStream"; }
00242 };
00243 
00244 
00245 // FIXME: Rename this to WvUrlPool someday.
00246 class WvHttpPool : public WvIStreamList
00247 {
00248     WvLog log;
00249     WvResolver dns;
00250     WvUrlStreamDict conns;
00251     WvUrlRequestList urls;
00252     int num_streams_created;
00253     bool sure;
00254     
00255     WvIPPortAddrTable pipeline_incompatible;
00256     
00257 public:
00258     WvHttpPool();
00259     virtual ~WvHttpPool();
00260     
00261     virtual void pre_select(SelectInfo &si);
00262     virtual bool post_select(SelectInfo &si);
00263     virtual void execute();
00264     
00265     WvBufUrlStream *addurl(WvStringParm _url, WvStringParm _method = "GET",
00266                             WvStringParm _headers = "",
00267                             WvStream *content_source = NULL,
00268                             bool create_dirs = false);
00269 
00270     // For URL uploads.  create_dirs should be true if you want all
00271     // non-existent directories in _url to be created.
00272 //    WvBufUrlStream *addputurl(WvStringParm _url, WvStringParm _headers,
00273 //                            WvStream *s, bool create_dirs = false);
00274 private:
00275     void unconnect(WvUrlStream *s);
00276     
00277 public:
00278     bool idle() const 
00279         { return !urls.count(); }
00280     
00281 public:
00282     const char *wstype() const { return "WvHttpPool"; }
00283 };
00284 
00285 
00286 #endif // __WVHTTPPOOL_H