WvStreams
wvbackslash.cc
00001 /*
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 2002 Net Integration Technologies, Inc.
00004  * 
00005  * Performs C-style backslash escaping and unescaping of strings.
00006  */
00007 #include <ctype.h>
00008 #include "wvbackslash.h"
00009 
00010 static const char *escapein = "\a\b\f\n\r\t\v";
00011 static const char *escapeout = "abfnrtv";
00012 
00013 static inline char tohex(int digit, char alphabase = ('a' - 10))
00014 {
00015     return (digit < 10 ? '0' : alphabase) + digit;
00016 }
00017 
00018 static inline int fromhex(char digit)
00019 {
00020     if (isdigit(digit))
00021         return digit - '0';
00022     if (digit >= 'A' && digit <= 'F')
00023         return digit - 'A' + 10;
00024     if (digit >= 'a' && digit <= 'f')
00025         return digit - 'a' + 10;
00026     return -1;
00027 }
00028 
00029 static inline int fromoctal(char digit)
00030 {
00031     if (digit >= '0' && digit <= '7')
00032         return digit - '0';
00033     return -1;
00034 }
00035 
00036 
00037 /***** WvBackslashEncoder *****/
00038 
00039 WvBackslashEncoder::WvBackslashEncoder(WvStringParm _nasties) :
00040     nasties(_nasties)
00041 {
00042 }
00043 
00044 
00045 bool WvBackslashEncoder::_encode(WvBuf &inbuf, WvBuf &outbuf,
00046     bool flush)
00047 {
00048     size_t avail = outbuf.free();
00049     size_t len;
00050     while ((len = inbuf.optgettable()) != 0)
00051     {
00052         const unsigned char *datain = inbuf.get(len);
00053         for (size_t i = 0; i < len; ++i)
00054         {
00055             int c = datain[i];
00056             
00057             // handle 1 character escape sequences
00058             if (avail < 1)
00059                 { outbuf.unget(len - i); return ! flush; }
00060             const char *foundnasty = NULL;
00061             const char *foundspecial = NULL;
00062             if (c != '\0')
00063             {
00064                 foundnasty = strchr(nasties.cstr(), c);
00065                 if (! foundnasty)
00066                 {
00067                     foundspecial = strchr(escapein, c);
00068                     if (! foundspecial && isprint(c))
00069                     {
00070                         outbuf.putch(c);
00071                         avail -= 1;
00072                         continue;
00073                     }
00074                 }
00075             }
00076             
00077             // handle 2 character escape sequences
00078             if (avail < 2)
00079                 { outbuf.unget(len - i); return ! flush; }
00080             if (foundnasty != NULL)
00081             {
00082                 outbuf.putch('\\');
00083                 outbuf.putch(c);
00084                 avail -= 2;
00085                 continue;
00086             }
00087             if (foundspecial != NULL)
00088             {
00089                 outbuf.putch('\\');
00090                 outbuf.putch(escapeout[foundspecial - escapein]);
00091                 avail -= 2;
00092                 continue;
00093             }
00094 
00095             // handle 4 character escape sequences
00096             if (avail < 4)
00097                 { outbuf.unget(len - i); return ! flush; }
00098             outbuf.put("\\x", 2);
00099             outbuf.putch(tohex(c >> 4));
00100             outbuf.putch(tohex(c & 15));
00101             avail -= 4;
00102         }
00103     }
00104     return true;
00105 }
00106 
00107 
00108 bool WvBackslashEncoder::_reset()
00109 {
00110     return true;
00111 }
00112 
00113 
00114 /***** WvBackslashDecoder *****/
00115 
00116 WvBackslashDecoder::WvBackslashDecoder() : tmpbuf(4)
00117 {
00118     _reset();
00119 }
00120 
00121 
00122 bool WvBackslashDecoder::_encode(WvBuf &inbuf, WvBuf &outbuf,
00123     bool flush)
00124 {
00125     if (outbuf.free() == 0)
00126         return inbuf.used() == 0;
00127     if (! flushtmpbuf(outbuf))
00128         return false;
00129 
00130     size_t len;
00131     while ((len = inbuf.optgettable()) != 0)
00132     {
00133         const unsigned char *datain = inbuf.get(len);
00134         for (size_t i = 0; i < len; ++i)
00135         {
00136             int c = datain[i];
00137 
00138             switch (state)
00139             {
00140                 case Initial:
00141                     if (c == '\\')
00142                         state = Escape;
00143                     tmpbuf.putch(c);
00144                     break;
00145                 
00146                 case Escape:
00147                     if (c >= '0' && c <= '3')
00148                     {
00149                         tmpbuf.unalloc(1);
00150                         value = c - '0';
00151                         state = Octal1;
00152                     }
00153                     else if (c == 'x')
00154                     {
00155                         tmpbuf.putch(c);
00156                         state = Hex1;
00157                     }
00158                     else if (c == '\n')
00159                     {
00160                         // line continuation sequence
00161                         tmpbuf.unalloc(1);
00162                         tmpbuf.putch('\n');
00163                         state = Initial;
00164                     }
00165                     else
00166                     {
00167                         const char *found = strchr(escapeout, c);
00168                         tmpbuf.unalloc(1);
00169                         if (found != NULL)
00170                             c = escapein[found - escapeout];
00171                         // else we just drop the backslash
00172                         tmpbuf.putch(c);
00173                         state = Initial;
00174                     }
00175                     break;
00176 
00177                 case Hex2:
00178                 case Hex1: {
00179                     int digit = fromhex(c);
00180                     if (digit >= 0)
00181                     {
00182                         if (state == Hex1)
00183                         {
00184                             tmpbuf.unalloc(2);
00185                             value = digit;
00186                             state = Hex2;
00187                         }
00188                         else
00189                         {
00190                             value = (value << 4) | digit;
00191                             state = Initial;
00192                         }
00193                     }
00194                     else
00195                     {
00196                         i -= 1;
00197                         state = Initial;
00198                     }
00199                     break;
00200                 }
00201 
00202                 case Octal3:
00203                 case Octal2:
00204                 case Octal1: {
00205                     int digit = fromoctal(c);
00206                     if (digit >= 0)
00207                     {
00208                         value = (value << 3) | digit;
00209                         if (state != Octal3)
00210                             state = State(state + 1);
00211                         else
00212                             state = Initial;
00213                     }
00214                     else
00215                     {
00216                         i -= 1;
00217                         state = Initial;
00218                     }
00219                     break;
00220                 }
00221             }
00222 
00223             flushtmpbuf(outbuf);
00224             if (outbuf.free() == 0)
00225             {
00226                 inbuf.unget(len - i);
00227                 break;
00228             }
00229         }
00230     }
00231     if (flush)
00232     {
00233         if (inbuf.used() != 0)
00234             return false;
00235         state = Initial;
00236         return flushtmpbuf(outbuf);
00237     }
00238     return true;
00239 
00240 }
00241 
00242 
00243 bool WvBackslashDecoder::_reset()
00244 {
00245     state = Initial;
00246     value = -1;
00247     tmpbuf.zap();
00248     return true;
00249 }
00250 
00251 
00252 bool WvBackslashDecoder::flushtmpbuf(WvBuf &outbuf)
00253 {
00254     if (state != Initial)
00255         return true;
00256         
00257     if (value != -1)
00258     {
00259         tmpbuf.putch(value);
00260         value = -1;
00261     }
00262     
00263     size_t len = tmpbuf.used();
00264     if (len == 0)
00265         return true;
00266     size_t avail = outbuf.free();
00267     if (avail > len)
00268         avail = len;
00269     outbuf.merge(tmpbuf, avail);
00270     len -= avail;
00271     if (len == 0)
00272     {
00273         tmpbuf.zap();
00274         return true;
00275     }
00276     return false;
00277 }