WvStreams
wvftpstream.cc
00001 /*
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
00004  * 
00005  * A fast, easy-to-use, parallelizing, pipelining HTTP/1.1 file retriever.
00006  * 
00007  * See wvhttppool.h.
00008  */
00009 
00010 #ifdef __GNUC__
00011 # define alloca __builtin_alloca
00012 #else
00013 # ifdef _MSC_VER
00014 #  include <malloc.h>
00015 #  define alloca _alloca
00016 # else
00017 #  if HAVE_ALLOCA_H
00018 #   include <alloca.h>
00019 #  else
00020 #   ifdef _AIX
00021 #pragma alloca
00022 #   else
00023 #    ifndef alloca /* predefined by HP cc +Olibcalls */
00024 char *alloca ();
00025 #    endif
00026 #   endif
00027 #  endif
00028 # endif
00029 #endif
00030 
00031 #include <ctype.h>
00032 #include <time.h>
00033 #include "wvhttppool.h"
00034 #include "wvbufstream.h"
00035 #include "wvtcp.h"
00036 #include "wvsslstream.h"
00037 #include "strutils.h"
00038 #include <stdlib.h> // for alloca()... FIXME: which we shouldn't be using!
00039 
00040 WvFtpStream::WvFtpStream(const WvIPPortAddr &_remaddr, WvStringParm _username,
00041                 WvStringParm _password)
00042     : WvUrlStream(_remaddr, _username, WvString("FTP %s", _remaddr)),
00043       cont(wv::bind(&WvFtpStream::real_execute, this, _1))
00044 {
00045     data = NULL;
00046     logged_in = false;
00047     password = _password;
00048     last_request_time = time(0);
00049     alarm(60000); // timeout if no connection, or something goes wrong
00050 }
00051 
00052 
00053 void WvFtpStream::doneurl()
00054 {
00055     log("Done URL: %s\n", curl->url);
00056 
00057     curl->done();
00058     curl = NULL;
00059     WVRELEASE(data);
00060     urls.unlink_first();
00061     last_request_time = time(0);
00062     alarm(60000);
00063     request_next();
00064     // We just processed the last url in the queue,
00065     // so go away.
00066     if (urls.isempty() && waiting_urls.isempty())
00067         close();
00068 }
00069 
00070 
00071 void WvFtpStream::request_next()
00072 {
00073     // don't do a request if we've done too many already or we have none
00074     // waiting.
00075     if (request_count >= max_requests || waiting_urls.isempty())
00076         return;
00077 
00078     if (!urls.isempty())
00079         return;
00080 
00081     // okay then, we really do want to send a new request.
00082     WvUrlRequest *url = waiting_urls.first();
00083 
00084     waiting_urls.unlink_first();
00085 
00086     request_count++;
00087     log("Request #%s: %s\n", request_count, url->url);
00088     urls.append(url, false, "request_url");
00089     alarm(0);
00090 }
00091 
00092 
00093 void WvFtpStream::close()
00094 {
00095     if (isok())
00096         log("Closing.\n");
00097     WvStreamClone::close();
00098 
00099     if (geterr())
00100     {
00101         // if there was an error, count the first URL as done.  This prevents
00102         // retrying indefinitely.
00103         if (!curl && !urls.isempty())
00104             curl = urls.first();
00105         if (!curl && !waiting_urls.isempty())
00106             curl = waiting_urls.first();
00107         if (curl)
00108             log("URL '%s' is FAILED\n", curl->url);
00109         if (curl) 
00110             curl->done();
00111     }
00112 
00113     if (curl)
00114         curl->done();
00115 }
00116 
00117 
00118 char *WvFtpStream::get_important_line()
00119 {
00120     char *line;
00121     do
00122     {
00123         line = blocking_getline(-1);
00124         if (!line)
00125             return NULL;
00126     }
00127     while (line[3] == '-');
00128     log(WvLog::Debug5, ">> %s\n", line);
00129     return line;
00130 }
00131 
00132 
00133 void WvFtpStream::pre_select(SelectInfo &si)
00134 {
00135     SelectRequest oldwant = si.wants;
00136 
00137     WvUrlStream::pre_select(si);
00138 
00139     if (data)
00140         data->pre_select(si);
00141 
00142     if (curl && curl->putstream) 
00143         curl->putstream->pre_select(si);
00144 
00145     si.wants = oldwant;
00146 }
00147 
00148 
00149 bool WvFtpStream::post_select(SelectInfo &si)
00150 {
00151     SelectRequest oldwant = si.wants;
00152 
00153     if (WvUrlStream::post_select(si))
00154         return true;
00155 
00156     if (data && data->post_select(si))
00157         return true;
00158 
00159     if (curl && curl->putstream && curl->putstream->post_select(si))
00160         return true;
00161 
00162     si.wants = oldwant;
00163 
00164     return false;
00165 }
00166 
00167 
00168 void *WvFtpStream::real_execute(void*)
00169 {
00170     WvString line;
00171     WvStreamClone::execute();
00172 
00173     if (alarm_was_ticking && ((last_request_time + 60) <= time(0)))
00174     {
00175         log(WvLog::Debug4, "urls count: %s\n", urls.count());
00176         if (urls.isempty())
00177             close(); // timed out, but not really an error
00178 
00179         return 0;
00180     }
00181 
00182     if (!logged_in)
00183     {
00184         line = get_important_line();
00185         if (!line)
00186         {
00187             seterr("Server not reachable: %s\n",strerror(errno));
00188             return 0;
00189         }
00190             
00191         if (strncmp(line, "220", 3))
00192         {
00193             log("Server rejected connection: %s\n", line);
00194             seterr("server rejected connection");
00195             return 0;
00196         }
00197         print("USER %s\r\n", !target.username ? WvString("anonymous") :
00198                     target.username);
00199         line = get_important_line();
00200         if (!line)
00201             return 0;
00202 
00203         if (!strncmp(line, "230", 3))
00204         {
00205             log(WvLog::Info, "Server doesn't need password.\n");
00206             logged_in = true;        // No password needed;
00207         }
00208         else if (!strncmp(line, "33", 2))
00209         {
00210             print("PASS %s\r\n", !password ? DEFAULT_ANON_PW : password);
00211             
00212             line = get_important_line();
00213             if (!line)
00214                 return 0;
00215 
00216             if (line[0] == '2')
00217             {
00218                 log(WvLog::Info, "Authenticated.\n");
00219                 logged_in = true;
00220             }
00221             else
00222             {
00223                 log("Strange response to PASS command: %s\n", line);
00224                 seterr("strange response to PASS command");
00225                 return 0;
00226             }
00227         }
00228         else
00229         {
00230             log("Strange response to USER command: %s\n", line);
00231             seterr("strange response to USER command");
00232             return 0;
00233         }
00234 
00235         print("TYPE I\r\n");
00236         log(WvLog::Debug5, "<< TYPE I\n");
00237         line = get_important_line();
00238         if (!line)
00239             return 0;
00240         
00241         if (strncmp(line, "200", 3))
00242         {
00243             log("Strange response to TYPE I command: %s\n", line);
00244             seterr("strange response to TYPE I command");
00245             return 0;
00246         }
00247     }
00248 
00249     if (!curl && !urls.isempty())
00250     {
00251         curl = urls.first();
00252 
00253         print("CWD %s\r\n", curl->url.getfile());
00254         line = get_important_line();
00255         if (!line)
00256             return 0;
00257 
00258         if (!strncmp(line, "250", 3))
00259         {
00260             log(WvLog::Debug5, "This is a directory.\n");
00261             curl->is_dir = true;
00262         }
00263 
00264         print("PASV\r\n");
00265         line = get_important_line();
00266         if (!line)
00267             return 0;
00268         WvIPPortAddr *dataip = parse_pasv_response(line.edit());
00269 
00270         if (!dataip)
00271             return 0;
00272 
00273         log(WvLog::Debug4, "Data port is %s.\n", *dataip);
00274         // Open data connection.
00275         data = new WvTCPConn(*dataip);
00276         if (!data)
00277         {
00278             log("Can't open data connection.\n");
00279             seterr("can't open data connection");
00280             return 0;
00281         }
00282 
00283         if (curl->is_dir)
00284         {
00285             if (!curl->putstream)
00286             {
00287                 print("LIST %s\r\n", curl->url.getfile());
00288                 if (curl->outstream)
00289                 {
00290                     WvString url_no_pw("ftp://%s%s%s%s", curl->url.getuser(),
00291                             !!curl->url.getuser() ? "@" : "", 
00292                             curl->url.gethost(),
00293                             curl->url.getfile());
00294                     curl->outstream->print("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML "
00295                                            "4.01//EN\">\n"
00296                                            "<html>\n<head>\n<title>%s</title>\n"
00297                                            "<meta http-equiv=\"Content-Type\" "
00298                                            "content=\"text/html; "
00299                                            "charset=ISO-8859-1\">\n"
00300                                            "<base href=\"%s\"/>\n</head>\n"
00301                                            "<style type=\"text/css\">\n"
00302                                            "img { border: 0; padding: 0 2px; vertical-align: "
00303                                            "text-bottom; }\n"
00304                                            "td  { font-family: monospace; padding: 2px 3px; "
00305                                            "text-align: right; vertical-align: bottom; }\n"
00306                                            "td:first-child { text-align: left; padding: "
00307                                            "2px 10px 2px 3px; }\n"
00308                                            "table { border: 0; }\n"
00309                                            "a.symlink { font-style: italic; }\n"
00310                                            "</style>\n<body>\n"
00311                                            "<h1>Index of %s</h1>\n"
00312                                            "<hr/><table>\n", url_no_pw, curl->url, url_no_pw 
00313                                            );
00314                 }
00315             }
00316             else
00317             {
00318                 log("Target is a directory.\n");
00319                 seterr("target is a directory");
00320                 doneurl();
00321                 return 0;
00322             }
00323         }
00324         else if (!curl->putstream)
00325             print("RETR %s\r\n", curl->url.getfile());
00326         else
00327         {
00328             if (curl->create_dirs)
00329             {
00330                 print("CWD %s\r\n", getdirname(curl->url.getfile()));
00331                 line = get_important_line();
00332                 if (!line)
00333                     return 0;
00334                 if (strncmp(line, "250", 3))
00335                 {
00336                     log("Path doesn't exist; creating directories...\n");
00337                     // create missing directories.
00338                     WvString current_dir("");
00339                     WvStringList dirs;
00340                     dirs.split(getdirname(curl->url.getfile()), "/");
00341                     WvStringList::Iter i(dirs);
00342                     for (i.rewind(); i.next(); )
00343                     {
00344                         current_dir.append(WvString("/%s", i()));
00345                         print("MKD %s\r\n", current_dir);
00346                         line = get_important_line();
00347                         if (!line)
00348                             return 0;
00349                     }
00350                 }
00351             }
00352             print("STOR %s\r\n", curl->url.getfile());
00353         }
00354 
00355         log(WvLog::Debug5, "Waiting for response to %s\n", curl->putstream ? "STOR" : 
00356             curl->is_dir ? "LIST" : "RETR");
00357         line = get_important_line();
00358 
00359         if (!line)
00360             doneurl();
00361         else if (strncmp(line, "150", 3))
00362         {
00363             log("Strange response to %s command: %s\n", 
00364                     curl->putstream ? "STOR" : "RETR", line);
00365             seterr(WvString("strange response to %s command",
00366                         curl->putstream ? "STOR" : "RETR"));
00367             doneurl();
00368         }
00369 
00370     }
00371 
00372     if (curl)
00373     {
00374         if (curl->is_dir)
00375         {
00376             line = data->blocking_getline(-1);
00377             if (line && curl->outstream)
00378             {
00379                 WvString output_line(parse_for_links(line.edit()));
00380                 if (!!output_line)
00381                     curl->outstream->write(output_line);
00382                 else
00383                     curl->outstream->write("Unknown format of LIST "
00384                             "response\n");
00385             }
00386         }
00387         else
00388         {
00389             char buf[1024];
00390             
00391             if (curl->putstream)
00392             {
00393                 while (curl->putstream->isreadable())
00394                 {
00395                     int len = curl->putstream->read(buf, sizeof(buf));
00396                     log(WvLog::Debug5, "Read %s bytes.\n%s\n", len, hexdump_buffer(buf, len));
00397 
00398                     if (len)
00399                     {
00400                         int wrote = data->write(buf, len);
00401                         log(WvLog::Debug5,"Wrote %s bytes\n", wrote);
00402                         data->flush(0);
00403                     }
00404                 }
00405                 curl->putstream->close();
00406             }
00407             else
00408             {
00409                 while (data->isreadable() && curl->outstream->isok())
00410                 {
00411                     int len = data->read(buf, sizeof(buf));
00412                     log(WvLog::Debug5, "Read %s bytes from remote.\n", len);
00413                     
00414                     if (len && curl->outstream)
00415                     {
00416                         int wrote = curl->outstream->write(buf, len);
00417                         log(WvLog::Debug5, "Wrote %s bytes to local.\n", wrote);
00418                     }
00419                 }
00420             }
00421         }
00422 
00423         if (!data->isok() || (curl->putstream && !curl->putstream->isok()))
00424         {
00425             log("OK, we should have finished writing!\n");
00426             if (curl->putstream && data->isok())
00427                 data->close();
00428             line = get_important_line();
00429             if (!line)
00430             {
00431                 doneurl();
00432                 return 0;
00433             }
00434 
00435             if (strncmp(line, "226", 3))
00436                 log("Unexpected message: %s\n", line);
00437 
00438             if (curl->is_dir)
00439             {
00440                 if (curl->outstream)
00441                     curl->outstream->write("</table><hr/></body>\n"
00442                             "</html>\n");
00443                 write("CWD /\r\n");
00444                 log(WvLog::Debug5, "Waiting for response to CWD /\n");
00445                 line = get_important_line();
00446                 if (!line)
00447                     return 0;
00448 
00449                 if (strncmp(line, "250", 3))
00450                     log("Strange resonse to \"CWD /\": %s\n", line);
00451                 // Don't bother failing here.
00452             }
00453             doneurl();
00454         }
00455         else
00456         {
00457             log("Why are we here??\n");
00458         }
00459     }
00460 
00461     return 0;
00462 }
00463 
00464 
00465 void WvFtpStream::execute()
00466 {
00467     real_execute(0);
00468 }
00469 
00470 
00471 WvString WvFtpStream::parse_for_links(char *line)
00472 {
00473     WvString output_line("");
00474     trim_string(line);
00475 
00476     if (curl->is_dir && curl->outstream)
00477     {
00478         struct ftpparse fp;
00479         int res = ftpparse(&fp, line, strlen(line));
00480         if (res)
00481         {
00482             char *linkname = (char *)alloca(fp.namelen+1);
00483             int i;
00484             for (i = 0; i < fp.namelen; i++)
00485             {
00486                 if (fp.name[i] >= 32)
00487                     linkname[i] = fp.name[i];
00488                 else
00489                 {
00490                     linkname[i] = '?';
00491                 }
00492             }
00493             linkname[i] = 0;
00494 
00495             WvString linkurl(curl->url);
00496             if (linkurl.cstr()[linkurl.len()-1] != '/')
00497                 linkurl.append("/");
00498             linkurl.append(linkname);
00499             WvUrlLink *link = new WvUrlLink(linkname, linkurl);
00500             curl->outstream->links.append(link, true);
00501 
00502             output_line.append("<tr>\n");
00503 
00504             output_line.append(WvString(" <td>%s%s</td>\n", linkname,
00505                         fp.flagtrycwd ? "/" : ""));
00506 
00507             if (fp.flagtryretr)
00508             {
00509                 if (!fp.sizetype)
00510                     output_line.append(" <td>? bytes</td>\n");
00511                 else
00512                     output_line.append(WvString(" <td>%s bytes</td>\n",
00513                                 fp.size));
00514                 if (fp.mtimetype > 0)
00515                     output_line.append(WvString(" <td>%s</td>\n", (fp.mtime)));
00516                 else
00517                     output_line.append(" <td>?</td>\n");
00518             }
00519             else
00520                 output_line.append(" <td></td>\n");
00521 
00522             output_line.append("</tr>\n");
00523         }
00524     }
00525     return output_line;
00526 }
00527 
00528 
00529 WvIPPortAddr *WvFtpStream::parse_pasv_response(char *line)
00530 {
00531     if (strncmp(line, "227 ", 4))
00532     {
00533         log("Strange response to PASV command: %s\n", line);
00534         seterr("strange response to PASV command");
00535         return NULL;
00536     }
00537 
00538     char *p = &line[3];
00539     while (!isdigit(*p))
00540     {
00541         if (*p == '\0' || *p == '\r' || *p == '\n')
00542         {
00543             log("Couldn't parse PASV response: %s\n", line);
00544             seterr("couldn't parse response to PASV command");
00545             return NULL;
00546         }
00547         p++;
00548     }
00549     char *ipstart = p;
00550 
00551     for (int i = 0; i < 4; i++)
00552     {
00553         p = strchr(p, ',');
00554         if (!p)
00555         {
00556             log("Couldn't parse PASV IP: %s\n", line);
00557             seterr("couldn't parse PASV IP");
00558             return NULL;
00559         }
00560         *p = '.';
00561     }
00562     *p = '\0';
00563     WvString pasvip(ipstart);
00564     p++;
00565     int pasvport;
00566     pasvport = atoi(p)*256;
00567     p = strchr(p, ',');
00568     if (!p)
00569     {
00570         log("Couldn't parse PASV IP port: %s\n", line);
00571         seterr("couldn't parse PASV IP port");
00572         return NULL;
00573     }
00574     pasvport += atoi(++p);
00575 
00576     WvIPPortAddr *res = new WvIPPortAddr(pasvip.cstr(), pasvport);
00577 
00578     return res;
00579 }