WvStreams
|
00001 /* 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 */ 00005 #include "wvbackslash.h" 00006 #include "wvbuf.h" 00007 #include "wvstream.h" 00008 #include "wvstring.h" 00009 #include "wvstringmask.h" 00010 #include "wvtclstring.h" 00011 #include <climits> 00012 00013 const WvStringMask WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR); 00014 const WvStringMask WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR); 00015 const WvStringMask WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR); 00016 00017 static size_t wvtcl_escape(char *dst, const char *s, size_t s_len, 00018 const WvStringMask &nasties, bool *verbatim = NULL) 00019 { 00020 if (verbatim) *verbatim = false; 00021 00022 // NULL strings remain such 00023 if (s == NULL) 00024 return 0; 00025 // empty strings are just {} 00026 if (s_len == 0) 00027 { 00028 if (dst) 00029 { 00030 dst[0] = '{'; 00031 dst[1] = '}'; 00032 } 00033 return 2; 00034 } 00035 00036 bool backslashify = false, inescape = false; 00037 int len = 0, unprintables = 0, bracecount = 0; 00038 const char *cptr, *cptr_end = s + s_len; 00039 00040 // figure out which method we need to use: backslashify or embrace. 00041 // also count the number of unprintable characters we'll need to 00042 // backslashify, if it turns out that's necessary. 00043 for (cptr = s; cptr != cptr_end; cptr++) 00044 { 00045 // Assume we do nothing 00046 if (dst) dst[len] = *cptr; 00047 ++len; 00048 00049 if (!inescape && *cptr == '{') 00050 bracecount++; 00051 else if (!inescape && *cptr == '}') 00052 bracecount--; 00053 if (bracecount < 0) 00054 backslashify = true; 00055 00056 bool doit = false; 00057 switch (*cptr) 00058 { 00059 case WVTCL_ALWAYS_NASTY_CASE: 00060 doit = true; 00061 break; 00062 default: 00063 if (nasties[*cptr]) 00064 doit = true; 00065 } 00066 if (doit) 00067 unprintables++; 00068 00069 if (*cptr == '\\') 00070 inescape = !inescape; 00071 else 00072 inescape = false; 00073 } 00074 00075 // if the braces aren't balanced, backslashify 00076 if (bracecount != 0 || inescape) 00077 backslashify = true; 00078 00079 if (!backslashify && !unprintables) 00080 { 00081 if (verbatim) *verbatim = true; 00082 return len; // no work needed! 00083 } 00084 00085 if (backslashify) 00086 { 00087 if (dst) 00088 { 00089 len = 0; 00090 for (cptr = s; cptr != cptr_end; ++cptr) 00091 { 00092 bool doit = false; 00093 switch (*cptr) 00094 { 00095 case WVTCL_ALWAYS_NASTY_CASE: 00096 doit = true; 00097 break; 00098 default: 00099 if (nasties[*cptr]) 00100 doit = true; 00101 } 00102 if (doit) 00103 dst[len++] = '\\'; 00104 00105 dst[len++] = *cptr; 00106 } 00107 return len; 00108 } 00109 else return len+unprintables; 00110 } 00111 else 00112 { 00113 // the embrace method: just take the string and put braces around it 00114 if (dst) 00115 { 00116 len = 0; 00117 dst[len++] = '{'; 00118 for (cptr = s; cptr != cptr_end; ++cptr) 00119 dst[len++] = *cptr; 00120 dst[len++] = '}'; 00121 return len; 00122 } 00123 else return len+2; 00124 } 00125 } 00126 00127 00128 WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties) 00129 { 00130 size_t s_len = s.len(); 00131 00132 bool verbatim; 00133 size_t len = wvtcl_escape(NULL, s, s_len, nasties, &verbatim); 00134 if (verbatim) return s; 00135 00136 WvString result; 00137 result.setsize(len); 00138 char *e = result.edit(); 00139 e += wvtcl_escape(e, s, s_len, nasties); 00140 *e = '\0'; 00141 return result; 00142 } 00143 00144 00145 static size_t wvtcl_unescape(char *dst, const char *s, size_t s_len, 00146 bool *verbatim = NULL) 00147 { 00148 //printf(" unescape '%s'\n", (const char *)s); 00149 00150 // empty or NULL strings remain themselves 00151 if (!s) 00152 { 00153 if (verbatim) *verbatim = true; 00154 return 0; 00155 } 00156 00157 if (verbatim) *verbatim = false; 00158 00159 // deal with embraced strings by simply removing the braces 00160 if (s[0] == '{' && s[s_len-1] == '}') 00161 { 00162 if (dst) memcpy(dst, &s[1], s_len-2); 00163 return s_len - 2; 00164 } 00165 00166 bool skipquotes = false; 00167 // deal with quoted strings by ignoring the quotes _and_ unbackslashifying. 00168 if (s[0] == '"' && s[s_len-1] == '"') 00169 skipquotes = true; 00170 00171 // otherwise, unbackslashify it. 00172 const char *start = s, *end = &s[s_len]; 00173 if (skipquotes) 00174 { 00175 ++start; 00176 --end; 00177 } 00178 size_t len = 0; 00179 bool inescape = false; 00180 for (; start != end; ++start) 00181 { 00182 if (*start == '\\') 00183 { 00184 if (inescape) 00185 { 00186 if (dst) dst[len] = *start; 00187 len++; 00188 inescape = false; 00189 } 00190 else 00191 inescape = true; 00192 } 00193 else 00194 { 00195 inescape = false; 00196 if (dst) dst[len] = *start; 00197 len++; 00198 } 00199 } 00200 return len; 00201 } 00202 00203 00204 WvString wvtcl_unescape(WvStringParm s) 00205 { 00206 size_t s_len = s.len(); 00207 00208 bool verbatim; 00209 size_t len = wvtcl_unescape(NULL, s, s_len, &verbatim); 00210 if (verbatim) return s; 00211 00212 WvString result; 00213 result.setsize(len+1); 00214 char *e = result.edit(); 00215 e += wvtcl_unescape(e, s, s_len); 00216 *e = '\0'; 00217 return result; 00218 } 00219 00220 00221 WvString wvtcl_encode(WvList<WvString> &l, const WvStringMask &nasties, 00222 const WvStringMask &splitchars) 00223 { 00224 int size = 0; 00225 00226 WvList<WvString>::Iter i(l); 00227 int count = 0; 00228 for (i.rewind(); i.next(); ) 00229 { 00230 size += wvtcl_escape(NULL, *i, i->len(), nasties); 00231 ++count; 00232 } 00233 00234 WvString result; 00235 result.setsize(size+(count-1)+1); 00236 00237 char *p = result.edit(); 00238 int j; 00239 for (i.rewind(), j=0; i.next(); ++j) 00240 { 00241 p += wvtcl_escape(p, *i, i->len(), nasties); 00242 if (j < count - 1) 00243 *p++ = splitchars.first(); 00244 } 00245 *p = '\0'; 00246 00247 return result; 00248 } 00249 00250 const size_t WVTCL_GETWORD_NONE (UINT_MAX); 00251 00252 static size_t wvtcl_getword(char *dst, const char *s, size_t s_len, 00253 const WvStringMask &splitchars, 00254 bool do_unescape, size_t *end = NULL) 00255 { 00256 //printf(" used=%d\n", origsize); 00257 if (!s_len) return WVTCL_GETWORD_NONE; 00258 00259 bool inescape = false, inquote = false, incontinuation = false; 00260 int bracecount = 0; 00261 const char *origend = s + s_len; 00262 const char *sptr, *eptr; 00263 00264 // skip leading separators 00265 for (sptr = s; sptr != origend; sptr++) 00266 { 00267 if (!splitchars[*sptr]) 00268 break; 00269 } 00270 00271 if (sptr == origend) // nothing left 00272 return WVTCL_GETWORD_NONE; 00273 00274 // detect initial quote 00275 if (*sptr == '"') 00276 { 00277 inquote = true; 00278 eptr = sptr+1; 00279 } 00280 else 00281 eptr = sptr; 00282 00283 // loop over string until something satisfactory is found 00284 for (; eptr != origend; eptr++) 00285 { 00286 char ch = *eptr; 00287 00288 incontinuation = false; 00289 00290 if (inescape) 00291 { 00292 if (ch == '\n') 00293 { 00294 // technically we've finished the line-continuation 00295 // sequence, but we require at least one more character 00296 // in order to prove that there's a next line somewhere 00297 // in the buffer. Otherwise we might stop parsing before 00298 // we're "really" done if we're given input line-by-line. 00299 // 00300 // A better way to do this would be for getword() to *never* 00301 // return a string unless it contains a separator character; 00302 // then we wouldn't need this weird special case. But it 00303 // don't work like that; we'll return the last word in the 00304 // buffer even if it *doesn't* end in a separator character. 00305 incontinuation = true; 00306 } 00307 inescape = false; 00308 } 00309 else if (ch == '\\') 00310 { 00311 inescape = true; 00312 // now we need a character to complete the escape 00313 } 00314 else // not an escape sequence 00315 { 00316 // detect end of a quoted/unquoted string 00317 if (bracecount == 0) 00318 { 00319 if (inquote) 00320 { 00321 if (ch == '"') 00322 { 00323 eptr++; 00324 break; 00325 } 00326 } 00327 else if (splitchars[ch]) 00328 break; 00329 } 00330 00331 // match braces 00332 if (!inquote) 00333 { 00334 if (ch == '{') 00335 bracecount++; 00336 else if (bracecount > 0 && ch == '}') 00337 bracecount--; 00338 } 00339 } 00340 } 00341 00342 if (bracecount || sptr==eptr || inquote || inescape || incontinuation) 00343 // not there yet... 00344 return WVTCL_GETWORD_NONE; 00345 00346 //printf("len=%d, unget=%d\n", eptr - sptr, origend - eptr); 00347 if (end) *end = eptr - s; 00348 00349 if (do_unescape) 00350 return wvtcl_unescape(dst, sptr, eptr-sptr); 00351 else 00352 { 00353 if (dst) memcpy(dst, sptr, eptr-sptr); 00354 return eptr - sptr; 00355 } 00356 } 00357 00358 00359 WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars, 00360 bool do_unescape) 00361 { 00362 int origsize = buf.used(); 00363 const char *origptr = (const char *)buf.get(origsize); 00364 00365 size_t end; 00366 size_t len = wvtcl_getword(NULL, origptr, origsize, 00367 splitchars, do_unescape, &end); 00368 if (len == WVTCL_GETWORD_NONE) 00369 { 00370 buf.unget(origsize); 00371 return WvString::null; 00372 } 00373 00374 WvString result; 00375 result.setsize(len+1); 00376 char *e = result.edit(); 00377 e += wvtcl_getword(e, origptr, origsize, splitchars, do_unescape); 00378 *e = '\0'; 00379 00380 buf.unget(origsize - end); 00381 00382 return result; 00383 } 00384 00385 00386 void wvtcl_decode(WvList<WvString> &l, WvStringParm _s, 00387 const WvStringMask &splitchars, bool do_unescape) 00388 { 00389 const char *s = _s; 00390 size_t s_len = _s.len(); 00391 for (;;) 00392 { 00393 size_t end; 00394 size_t len = wvtcl_getword(NULL, s, s_len, 00395 splitchars, do_unescape, &end); 00396 if (len == WVTCL_GETWORD_NONE) 00397 break; 00398 00399 WvString *word = new WvString(); 00400 word->setsize(len+1); 00401 00402 char *e = word->edit(); 00403 e += wvtcl_getword(e, s, s_len, splitchars, do_unescape); 00404 *e = '\0'; 00405 l.append(word, true); 00406 00407 s += end; 00408 s_len -= end; 00409 } 00410 }