WvStreams
|
00001 /* 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 2002 Net Integration Technologies, Inc. 00004 * 00005 * Performs C-style backslash escaping and unescaping of strings. 00006 */ 00007 #include <ctype.h> 00008 #include "wvbackslash.h" 00009 00010 static const char *escapein = "\a\b\f\n\r\t\v"; 00011 static const char *escapeout = "abfnrtv"; 00012 00013 static inline char tohex(int digit, char alphabase = ('a' - 10)) 00014 { 00015 return (digit < 10 ? '0' : alphabase) + digit; 00016 } 00017 00018 static inline int fromhex(char digit) 00019 { 00020 if (isdigit(digit)) 00021 return digit - '0'; 00022 if (digit >= 'A' && digit <= 'F') 00023 return digit - 'A' + 10; 00024 if (digit >= 'a' && digit <= 'f') 00025 return digit - 'a' + 10; 00026 return -1; 00027 } 00028 00029 static inline int fromoctal(char digit) 00030 { 00031 if (digit >= '0' && digit <= '7') 00032 return digit - '0'; 00033 return -1; 00034 } 00035 00036 00037 /***** WvBackslashEncoder *****/ 00038 00039 WvBackslashEncoder::WvBackslashEncoder(WvStringParm _nasties) : 00040 nasties(_nasties) 00041 { 00042 } 00043 00044 00045 bool WvBackslashEncoder::_encode(WvBuf &inbuf, WvBuf &outbuf, 00046 bool flush) 00047 { 00048 size_t avail = outbuf.free(); 00049 size_t len; 00050 while ((len = inbuf.optgettable()) != 0) 00051 { 00052 const unsigned char *datain = inbuf.get(len); 00053 for (size_t i = 0; i < len; ++i) 00054 { 00055 int c = datain[i]; 00056 00057 // handle 1 character escape sequences 00058 if (avail < 1) 00059 { outbuf.unget(len - i); return ! flush; } 00060 const char *foundnasty = NULL; 00061 const char *foundspecial = NULL; 00062 if (c != '\0') 00063 { 00064 foundnasty = strchr(nasties.cstr(), c); 00065 if (! foundnasty) 00066 { 00067 foundspecial = strchr(escapein, c); 00068 if (! foundspecial && isprint(c)) 00069 { 00070 outbuf.putch(c); 00071 avail -= 1; 00072 continue; 00073 } 00074 } 00075 } 00076 00077 // handle 2 character escape sequences 00078 if (avail < 2) 00079 { outbuf.unget(len - i); return ! flush; } 00080 if (foundnasty != NULL) 00081 { 00082 outbuf.putch('\\'); 00083 outbuf.putch(c); 00084 avail -= 2; 00085 continue; 00086 } 00087 if (foundspecial != NULL) 00088 { 00089 outbuf.putch('\\'); 00090 outbuf.putch(escapeout[foundspecial - escapein]); 00091 avail -= 2; 00092 continue; 00093 } 00094 00095 // handle 4 character escape sequences 00096 if (avail < 4) 00097 { outbuf.unget(len - i); return ! flush; } 00098 outbuf.put("\\x", 2); 00099 outbuf.putch(tohex(c >> 4)); 00100 outbuf.putch(tohex(c & 15)); 00101 avail -= 4; 00102 } 00103 } 00104 return true; 00105 } 00106 00107 00108 bool WvBackslashEncoder::_reset() 00109 { 00110 return true; 00111 } 00112 00113 00114 /***** WvBackslashDecoder *****/ 00115 00116 WvBackslashDecoder::WvBackslashDecoder() : tmpbuf(4) 00117 { 00118 _reset(); 00119 } 00120 00121 00122 bool WvBackslashDecoder::_encode(WvBuf &inbuf, WvBuf &outbuf, 00123 bool flush) 00124 { 00125 if (outbuf.free() == 0) 00126 return inbuf.used() == 0; 00127 if (! flushtmpbuf(outbuf)) 00128 return false; 00129 00130 size_t len; 00131 while ((len = inbuf.optgettable()) != 0) 00132 { 00133 const unsigned char *datain = inbuf.get(len); 00134 for (size_t i = 0; i < len; ++i) 00135 { 00136 int c = datain[i]; 00137 00138 switch (state) 00139 { 00140 case Initial: 00141 if (c == '\\') 00142 state = Escape; 00143 tmpbuf.putch(c); 00144 break; 00145 00146 case Escape: 00147 if (c >= '0' && c <= '3') 00148 { 00149 tmpbuf.unalloc(1); 00150 value = c - '0'; 00151 state = Octal1; 00152 } 00153 else if (c == 'x') 00154 { 00155 tmpbuf.putch(c); 00156 state = Hex1; 00157 } 00158 else if (c == '\n') 00159 { 00160 // line continuation sequence 00161 tmpbuf.unalloc(1); 00162 tmpbuf.putch('\n'); 00163 state = Initial; 00164 } 00165 else 00166 { 00167 const char *found = strchr(escapeout, c); 00168 tmpbuf.unalloc(1); 00169 if (found != NULL) 00170 c = escapein[found - escapeout]; 00171 // else we just drop the backslash 00172 tmpbuf.putch(c); 00173 state = Initial; 00174 } 00175 break; 00176 00177 case Hex2: 00178 case Hex1: { 00179 int digit = fromhex(c); 00180 if (digit >= 0) 00181 { 00182 if (state == Hex1) 00183 { 00184 tmpbuf.unalloc(2); 00185 value = digit; 00186 state = Hex2; 00187 } 00188 else 00189 { 00190 value = (value << 4) | digit; 00191 state = Initial; 00192 } 00193 } 00194 else 00195 { 00196 i -= 1; 00197 state = Initial; 00198 } 00199 break; 00200 } 00201 00202 case Octal3: 00203 case Octal2: 00204 case Octal1: { 00205 int digit = fromoctal(c); 00206 if (digit >= 0) 00207 { 00208 value = (value << 3) | digit; 00209 if (state != Octal3) 00210 state = State(state + 1); 00211 else 00212 state = Initial; 00213 } 00214 else 00215 { 00216 i -= 1; 00217 state = Initial; 00218 } 00219 break; 00220 } 00221 } 00222 00223 flushtmpbuf(outbuf); 00224 if (outbuf.free() == 0) 00225 { 00226 inbuf.unget(len - i); 00227 break; 00228 } 00229 } 00230 } 00231 if (flush) 00232 { 00233 if (inbuf.used() != 0) 00234 return false; 00235 state = Initial; 00236 return flushtmpbuf(outbuf); 00237 } 00238 return true; 00239 00240 } 00241 00242 00243 bool WvBackslashDecoder::_reset() 00244 { 00245 state = Initial; 00246 value = -1; 00247 tmpbuf.zap(); 00248 return true; 00249 } 00250 00251 00252 bool WvBackslashDecoder::flushtmpbuf(WvBuf &outbuf) 00253 { 00254 if (state != Initial) 00255 return true; 00256 00257 if (value != -1) 00258 { 00259 tmpbuf.putch(value); 00260 value = -1; 00261 } 00262 00263 size_t len = tmpbuf.used(); 00264 if (len == 0) 00265 return true; 00266 size_t avail = outbuf.free(); 00267 if (avail > len) 00268 avail = len; 00269 outbuf.merge(tmpbuf, avail); 00270 len -= avail; 00271 if (len == 0) 00272 { 00273 tmpbuf.zap(); 00274 return true; 00275 } 00276 return false; 00277 }