WvStreams
ftpparse.cc
00001 /* ftpparse.c, ftpparse.h: library for parsing FTP LIST responses
00002 20001223
00003 D. J. Bernstein, djb@cr.yp.to
00004 http://cr.yp.to/ftpparse.html
00005 
00006 Commercial use is fine, if you let me know what programs you're using this in.
00007 
00008 Currently covered formats:
00009 EPLF.
00010 UNIX ls, with or without gid.
00011 Microsoft FTP Service.
00012 Windows NT FTP Server.
00013 VMS.
00014 WFTPD.
00015 NetPresenz (Mac).
00016 NetWare.
00017 MSDOS.
00018 
00019 Definitely not covered: 
00020 Long VMS filenames, with information split across two lines.
00021 NCSA Telnet FTP server. Has LIST = NLST (and bad NLST for directories).
00022 */
00023 
00024 #include <time.h>
00025 #include "ftpparse.h"
00026 
00027 static long totai(long year,long month,long mday)
00028 {
00029   long result;
00030   if (month >= 2) month -= 2;
00031   else { month += 10; --year; }
00032   result = (mday - 1) * 10 + 5 + 306 * month;
00033   result /= 10;
00034   if (result == 365) { year -= 3; result = 1460; }
00035   else result += 365 * (year % 4);
00036   year /= 4;
00037   result += 1461 * (year % 25);
00038   year /= 25;
00039   if (result == 36524) { year -= 3; result = 146096; }
00040   else { result += 36524 * (year % 4); }
00041   year /= 4;
00042   result += 146097 * (year - 5);
00043   result += 11017;
00044   return result * 86400;
00045 }
00046 
00047 static int flagneedbase = 1;
00048 static time_t base; /* time() value on this OS at the beginning of 1970 TAI */
00049 static long now; /* current time */
00050 static int flagneedcurrentyear = 1;
00051 static long currentyear; /* approximation to current year */
00052 
00053 static void initbase(void)
00054 {
00055   struct tm *t;
00056   if (!flagneedbase) return;
00057 
00058   base = 0;
00059   t = gmtime(&base);
00060   base = -(totai(t->tm_year + 1900,t->tm_mon,t->tm_mday) + t->tm_hour * 3600 + t->tm_min * 60 + t->tm_sec);
00061   /* assumes the right time_t, counting seconds. */
00062   /* base may be slightly off if time_t counts non-leap seconds. */
00063   flagneedbase = 0;
00064 }
00065 
00066 static void initnow(void)
00067 {
00068   long day;
00069   long year;
00070 
00071   initbase();
00072   now = time((time_t *) 0) - base;
00073 
00074   if (flagneedcurrentyear) {
00075     day = now / 86400;
00076     if ((now % 86400) < 0) --day;
00077     day -= 11017;
00078     year = 5 + day / 146097;
00079     day = day % 146097;
00080     if (day < 0) { day += 146097; --year; }
00081     year *= 4;
00082     if (day == 146096) { year += 3; day = 36524; }
00083     else { year += day / 36524; day %= 36524; }
00084     year *= 25;
00085     year += day / 1461;
00086     day %= 1461;
00087     year *= 4;
00088     if (day == 1460) { year += 3; day = 365; }
00089     else { year += day / 365; day %= 365; }
00090     day *= 10;
00091     if ((day + 5) / 306 >= 10) ++year;
00092     currentyear = year;
00093     flagneedcurrentyear = 0;
00094   }
00095 }
00096 
00097 /* UNIX ls does not show the year for dates in the last six months. */
00098 /* So we have to guess the year. */
00099 /* Apparently NetWare uses ``twelve months'' instead of ``six months''; ugh. */
00100 /* Some versions of ls also fail to show the year for future dates. */
00101 static long guesstai(long month,long mday)
00102 {
00103   long year;
00104   long t;
00105 
00106   initnow();
00107 
00108   for (year = currentyear - 1;year < currentyear + 100;++year) {
00109     t = totai(year,month,mday);
00110     if (now - t < 350 * 86400)
00111       return t;
00112   }
00113   return 0;  /* shouldn't happen, but this gets rid of compiler warnings */
00114 }
00115 
00116 static int check(char *buf, const char *monthname)
00117 {
00118   if ((buf[0] != monthname[0]) && (buf[0] != monthname[0] - 32)) return 0;
00119   if ((buf[1] != monthname[1]) && (buf[1] != monthname[1] - 32)) return 0;
00120   if ((buf[2] != monthname[2]) && (buf[2] != monthname[2] - 32)) return 0;
00121   return 1;
00122 }
00123 
00124 static const char *months[12] = {
00125   "jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"
00126 } ;
00127 
00128 static int getmonth(char *buf, int len)
00129 {
00130   int i;
00131   if (len == 3)
00132     for (i = 0;i < 12;++i)
00133       if (check(buf,months[i])) return i;
00134   return -1;
00135 }
00136 
00137 static long getlong(char *buf,int len)
00138 {
00139   long u = 0;
00140   while (len-- > 0)
00141     u = u * 10 + (*buf++ - '0');
00142   return u;
00143 }
00144 
00145 int ftpparse(struct ftpparse *fp,char *buf,int len)
00146 {
00147   int i;
00148   int j;
00149   int state;
00150   long size = 0;
00151   long year;
00152   long month = 0;
00153   long mday = 0;
00154   long hour;
00155   long minute;
00156 
00157   fp->name = 0;
00158   fp->namelen = 0;
00159   fp->flagtrycwd = 0;
00160   fp->flagtryretr = 0;
00161   fp->sizetype = FTPPARSE_SIZE_UNKNOWN;
00162   fp->size = 0;
00163   fp->mtimetype = FTPPARSE_MTIME_UNKNOWN;
00164   fp->mtime = 0;
00165   fp->idtype = FTPPARSE_ID_UNKNOWN;
00166   fp->id = 0;
00167   fp->idlen = 0;
00168 
00169   if (len < 2) /* an empty name in EPLF, with no info, could be 2 chars */
00170     return 0;
00171 
00172   switch(*buf) {
00173     /* see http://pobox.com/~djb/proto/eplf.txt */
00174     /* "+i8388621.29609,m824255902,/,\tdev" */
00175     /* "+i8388621.44468,m839956783,r,s10376,\tRFCEPLF" */
00176     case '+':
00177       i = 1;
00178       for (j = 1;j < len;++j) {
00179         if (buf[j] == 9) {
00180           fp->name = buf + j + 1;
00181           fp->namelen = len - j - 1;
00182           return 1;
00183         }
00184         if (buf[j] == ',') {
00185           switch(buf[i]) {
00186             case '/':
00187               fp->flagtrycwd = 1;
00188               break;
00189             case 'r':
00190               fp->flagtryretr = 1;
00191               break;
00192             case 's':
00193               fp->sizetype = FTPPARSE_SIZE_BINARY;
00194               fp->size = getlong(buf + i + 1,j - i - 1);
00195               break;
00196             case 'm':
00197               fp->mtimetype = FTPPARSE_MTIME_LOCAL;
00198               initbase();
00199               fp->mtime = base + getlong(buf + i + 1,j - i - 1);
00200               break;
00201             case 'i':
00202               fp->idtype = FTPPARSE_ID_FULL;
00203               fp->id = buf + i + 1;
00204               fp->idlen = j - i - 1;
00205           }
00206           i = j + 1;
00207         }
00208       }
00209       return 0;
00210     
00211     /* UNIX-style listing, without inum and without blocks */
00212     /* "-rw-r--r--   1 root     other        531 Jan 29 03:26 README" */
00213     /* "dr-xr-xr-x   2 root     other        512 Apr  8  1994 etc" */
00214     /* "dr-xr-xr-x   2 root     512 Apr  8  1994 etc" */
00215     /* "lrwxrwxrwx   1 root     other          7 Jan 25 00:17 bin -> usr/bin" */
00216     /* Also produced by Microsoft's FTP servers for Windows: */
00217     /* "----------   1 owner    group         1803128 Jul 10 10:18 ls-lR.Z" */
00218     /* "d---------   1 owner    group               0 May  9 19:45 Softlib" */
00219     /* Also WFTPD for MSDOS: */
00220     /* "-rwxrwxrwx   1 noone    nogroup      322 Aug 19  1996 message.ftp" */
00221     /* Also NetWare: */
00222     /* "d [R----F--] supervisor            512       Jan 16 18:53    login" */
00223     /* "- [R----F--] rhesus             214059       Oct 20 15:27    cx.exe" */
00224     /* Also NetPresenz for the Mac: */
00225     /* "-------r--         326  1391972  1392298 Nov 22  1995 MegaPhone.sit" */
00226     /* "drwxrwxr-x               folder        2 May 10  1996 network" */
00227     case 'b':
00228     case 'c':
00229     case 'd':
00230     case 'l':
00231     case 'p':
00232     case 's':
00233     case '-':
00234 
00235       if (*buf == 'd') fp->flagtrycwd = 1;
00236       if (*buf == '-') fp->flagtryretr = 1;
00237       if (*buf == 'l') fp->flagtrycwd = fp->flagtryretr = 1;
00238 
00239       state = 1;
00240       i = 0;
00241       for (j = 1;j < len;++j)
00242         if ((buf[j] == ' ') && (buf[j - 1] != ' ')) {
00243           switch(state) {
00244             case 1: /* skipping perm */
00245               state = 2;
00246               break;
00247             case 2: /* skipping nlink */
00248               state = 3;
00249               if ((j - i == 6) && (buf[i] == 'f')) /* for NetPresenz */
00250                 state = 4;
00251               break;
00252             case 3: /* skipping uid */
00253               state = 4;
00254               break;
00255             case 4: /* getting tentative size */
00256               size = getlong(buf + i,j - i);
00257               state = 5;
00258               break;
00259             case 5: /* searching for month, otherwise getting tentative size */
00260               month = getmonth(buf + i,j - i);
00261               if (month >= 0)
00262                 state = 6;
00263               else
00264                 size = getlong(buf + i,j - i);
00265               break;
00266             case 6: /* have size and month */
00267               mday = getlong(buf + i,j - i);
00268               state = 7;
00269               break;
00270             case 7: /* have size, month, mday */
00271               if ((j - i == 4) && (buf[i + 1] == ':')) {
00272                 hour = getlong(buf + i,1);
00273                 minute = getlong(buf + i + 2,2);
00274                 fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
00275                 initbase();
00276                 fp->mtime = base + guesstai(month,mday) + hour * 3600 + minute * 60;
00277               } else if ((j - i == 5) && (buf[i + 2] == ':')) {
00278                 hour = getlong(buf + i,2);
00279                 minute = getlong(buf + i + 3,2);
00280                 fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
00281                 initbase();
00282                 fp->mtime = base + guesstai(month,mday) + hour * 3600 + minute * 60;
00283               }
00284               else if (j - i >= 4) {
00285                 year = getlong(buf + i,j - i);
00286                 fp->mtimetype = FTPPARSE_MTIME_REMOTEDAY;
00287                 initbase();
00288                 fp->mtime = base + totai(year,month,mday);
00289               }
00290               else
00291                 return 0;
00292               fp->name = buf + j + 1;
00293               fp->namelen = len - j - 1;
00294               state = 8;
00295               break;
00296             case 8: /* twiddling thumbs */
00297               break;
00298           }
00299           i = j + 1;
00300           while ((i < len) && (buf[i] == ' ')) ++i;
00301         }
00302 
00303       if (state != 8)
00304         return 0;
00305 
00306       fp->size = size;
00307       fp->sizetype = FTPPARSE_SIZE_BINARY;
00308 
00309       if (*buf == 'l')
00310         for (i = 0;i + 3 < fp->namelen;++i)
00311           if (fp->name[i] == ' ')
00312             if (fp->name[i + 1] == '-')
00313               if (fp->name[i + 2] == '>')
00314                 if (fp->name[i + 3] == ' ') {
00315                   fp->namelen = i;
00316                   break;
00317                 }
00318 
00319       /* eliminate extra NetWare spaces */
00320       if ((buf[1] == ' ') || (buf[1] == '['))
00321         if (fp->namelen > 3)
00322           if (fp->name[0] == ' ')
00323             if (fp->name[1] == ' ')
00324               if (fp->name[2] == ' ') {
00325                 fp->name += 3;
00326                 fp->namelen -= 3;
00327               }
00328 
00329       return 1;
00330   }
00331 
00332   /* MultiNet (some spaces removed from examples) */
00333   /* "00README.TXT;1      2 30-DEC-1996 17:44 [SYSTEM] (RWED,RWED,RE,RE)" */
00334   /* "CORE.DIR;1          1  8-SEP-1996 16:09 [SYSTEM] (RWE,RWE,RE,RE)" */
00335   /* and non-MutliNet VMS: */
00336   /* "CII-MANUAL.TEX;1  213/216  29-JAN-1996 03:33:12  [ANONYMOU,ANONYMOUS]   (RWED,RWED,,)" */
00337   for (i = 0;i < len;++i)
00338     if (buf[i] == ';')
00339       break;
00340   if (i < len) {
00341     fp->name = buf;
00342     fp->namelen = i;
00343     if (i > 4)
00344       if (buf[i - 4] == '.')
00345         if (buf[i - 3] == 'D')
00346           if (buf[i - 2] == 'I')
00347             if (buf[i - 1] == 'R') {
00348               fp->namelen -= 4;
00349               fp->flagtrycwd = 1;
00350             }
00351     if (!fp->flagtrycwd)
00352       fp->flagtryretr = 1;
00353     while (buf[i] != ' ') if (++i == len) return 0;
00354     while (buf[i] == ' ') if (++i == len) return 0;
00355     while (buf[i] != ' ') if (++i == len) return 0;
00356     while (buf[i] == ' ') if (++i == len) return 0;
00357     j = i;
00358     while (buf[j] != '-') if (++j == len) return 0;
00359     mday = getlong(buf + i,j - i);
00360     while (buf[j] == '-') if (++j == len) return 0;
00361     i = j;
00362     while (buf[j] != '-') if (++j == len) return 0;
00363     month = getmonth(buf + i,j - i);
00364     if (month < 0) return 0;
00365     while (buf[j] == '-') if (++j == len) return 0;
00366     i = j;
00367     while (buf[j] != ' ') if (++j == len) return 0;
00368     year = getlong(buf + i,j - i);
00369     while (buf[j] == ' ') if (++j == len) return 0;
00370     i = j;
00371     while (buf[j] != ':') if (++j == len) return 0;
00372     hour = getlong(buf + i,j - i);
00373     while (buf[j] == ':') if (++j == len) return 0;
00374     i = j;
00375     while ((buf[j] != ':') && (buf[j] != ' ')) if (++j == len) return 0;
00376     minute = getlong(buf + i,j - i);
00377 
00378     fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
00379     initbase();
00380     fp->mtime = base + totai(year,month,mday) + hour * 3600 + minute * 60;
00381 
00382     return 1;
00383   }
00384 
00385   /* MSDOS format */
00386   /* 04-27-00  09:09PM       <DIR>          licensed */
00387   /* 07-18-00  10:16AM       <DIR>          pub */
00388   /* 04-14-00  03:47PM                  589 readme.htm */
00389   if ((*buf >= '0') && (*buf <= '9')) {
00390     i = 0;
00391     j = 0;
00392     while (buf[j] != '-') if (++j == len) return 0;
00393     month = getlong(buf + i,j - i) - 1;
00394     while (buf[j] == '-') if (++j == len) return 0;
00395     i = j;
00396     while (buf[j] != '-') if (++j == len) return 0;
00397     mday = getlong(buf + i,j - i);
00398     while (buf[j] == '-') if (++j == len) return 0;
00399     i = j;
00400     while (buf[j] != ' ') if (++j == len) return 0;
00401     year = getlong(buf + i,j - i);
00402     if (year < 50) year += 2000;
00403     if (year < 1000) year += 1900;
00404     while (buf[j] == ' ') if (++j == len) return 0;
00405     i = j;
00406     while (buf[j] != ':') if (++j == len) return 0;
00407     hour = getlong(buf + i,j - i);
00408     while (buf[j] == ':') if (++j == len) return 0;
00409     i = j;
00410     while ((buf[j] != 'A') && (buf[j] != 'P')) if (++j == len) return 0;
00411     minute = getlong(buf + i,j - i);
00412     if (hour == 12) hour = 0;
00413     if (buf[j] == 'A') if (++j == len) return 0;
00414     if (buf[j] == 'P') { hour += 12; if (++j == len) return 0; }
00415     if (buf[j] == 'M') if (++j == len) return 0;
00416 
00417     while (buf[j] == ' ') if (++j == len) return 0;
00418     if (buf[j] == '<') {
00419       fp->flagtrycwd = 1;
00420       while (buf[j] != ' ') if (++j == len) return 0;
00421     }
00422     else {
00423       i = j;
00424       while (buf[j] != ' ') if (++j == len) return 0;
00425       fp->size = getlong(buf + i,j - i);
00426       fp->sizetype = FTPPARSE_SIZE_BINARY;
00427       fp->flagtryretr = 1;
00428     }
00429     while (buf[j] == ' ') if (++j == len) return 0;
00430 
00431     fp->name = buf + j;
00432     fp->namelen = len - j;
00433 
00434     fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
00435     initbase();
00436     fp->mtime = base + totai(year,month,mday) + hour * 3600 + minute * 60;
00437 
00438     return 1;
00439   }
00440 
00441   /* Some useless lines, safely ignored: */
00442   /* "Total of 11 Files, 10966 Blocks." (VMS) */
00443   /* "total 14786" (UNIX) */
00444   /* "DISK$ANONFTP:[ANONYMOUS]" (VMS) */
00445   /* "Directory DISK$PCSA:[ANONYM]" (VMS) */
00446 
00447   return 0;
00448 }