00001
00002
00003
00004
00005 #include "wvbackslash.h"
00006 #include "wvbuf.h"
00007 #include "wvstream.h"
00008 #include "wvstring.h"
00009 #include "wvstringmask.h"
00010 #include "wvtclstring.h"
00011 #include <climits>
00012
00013 const WvStringMask WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR);
00014 const WvStringMask WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR);
00015 const WvStringMask WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR);
00016
00017 static size_t wvtcl_escape(char *dst, const char *s, size_t s_len,
00018 const WvStringMask &nasties, bool *verbatim = NULL)
00019 {
00020 if (verbatim) *verbatim = false;
00021
00022
00023 if (s == NULL)
00024 return 0;
00025
00026 if (s_len == 0)
00027 {
00028 if (dst)
00029 {
00030 dst[0] = '{';
00031 dst[1] = '}';
00032 }
00033 return 2;
00034 }
00035
00036 bool backslashify = false, inescape = false;
00037 int len = 0, unprintables = 0, bracecount = 0;
00038 const char *cptr, *cptr_end = s + s_len;
00039
00040
00041
00042
00043 for (cptr = s; cptr != cptr_end; cptr++)
00044 {
00045
00046 if (dst) dst[len] = *cptr;
00047 ++len;
00048
00049 if (!inescape && *cptr == '{')
00050 bracecount++;
00051 else if (!inescape && *cptr == '}')
00052 bracecount--;
00053 if (bracecount < 0)
00054 backslashify = true;
00055
00056 bool doit = false;
00057 switch (*cptr)
00058 {
00059 case WVTCL_ALWAYS_NASTY_CASE:
00060 doit = true;
00061 break;
00062 default:
00063 if (nasties[*cptr])
00064 doit = true;
00065 }
00066 if (doit)
00067 unprintables++;
00068
00069 if (*cptr == '\\')
00070 inescape = !inescape;
00071 else
00072 inescape = false;
00073 }
00074
00075
00076 if (bracecount != 0 || inescape)
00077 backslashify = true;
00078
00079 if (!backslashify && !unprintables)
00080 {
00081 if (verbatim) *verbatim = true;
00082 return len;
00083 }
00084
00085 if (backslashify)
00086 {
00087 if (dst)
00088 {
00089 len = 0;
00090 for (cptr = s; cptr != cptr_end; ++cptr)
00091 {
00092 bool doit = false;
00093 switch (*cptr)
00094 {
00095 case WVTCL_ALWAYS_NASTY_CASE:
00096 doit = true;
00097 break;
00098 default:
00099 if (nasties[*cptr])
00100 doit = true;
00101 }
00102 if (doit)
00103 dst[len++] = '\\';
00104
00105 dst[len++] = *cptr;
00106 }
00107 return len;
00108 }
00109 else return len+unprintables;
00110 }
00111 else
00112 {
00113
00114 if (dst)
00115 {
00116 len = 0;
00117 dst[len++] = '{';
00118 for (cptr = s; cptr != cptr_end; ++cptr)
00119 dst[len++] = *cptr;
00120 dst[len++] = '}';
00121 return len;
00122 }
00123 else return len+2;
00124 }
00125 }
00126
00127
00128 WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties)
00129 {
00130 size_t s_len = s.len();
00131
00132 bool verbatim;
00133 size_t len = wvtcl_escape(NULL, s, s_len, nasties, &verbatim);
00134 if (verbatim) return s;
00135
00136 WvString result;
00137 result.setsize(len);
00138 char *e = result.edit();
00139 e += wvtcl_escape(e, s, s_len, nasties);
00140 *e = '\0';
00141 return result;
00142 }
00143
00144
00145 static size_t wvtcl_unescape(char *dst, const char *s, size_t s_len,
00146 bool *verbatim = NULL)
00147 {
00148
00149
00150
00151 if (!s)
00152 {
00153 if (verbatim) *verbatim = true;
00154 return 0;
00155 }
00156
00157 if (verbatim) *verbatim = false;
00158
00159
00160 if (s[0] == '{' && s[s_len-1] == '}')
00161 {
00162 if (dst) memcpy(dst, &s[1], s_len-2);
00163 return s_len - 2;
00164 }
00165
00166 bool skipquotes = false;
00167
00168 if (s[0] == '"' && s[s_len-1] == '"')
00169 skipquotes = true;
00170
00171
00172 const char *start = s, *end = &s[s_len];
00173 if (skipquotes)
00174 {
00175 ++start;
00176 --end;
00177 }
00178 size_t len = 0;
00179 bool inescape = false;
00180 for (; start != end; ++start)
00181 {
00182 if (*start == '\\')
00183 {
00184 if (inescape)
00185 {
00186 if (dst) dst[len] = *start;
00187 len++;
00188 inescape = false;
00189 }
00190 else
00191 inescape = true;
00192 }
00193 else
00194 {
00195 inescape = false;
00196 if (dst) dst[len] = *start;
00197 len++;
00198 }
00199 }
00200 return len;
00201 }
00202
00203
00204 WvString wvtcl_unescape(WvStringParm s)
00205 {
00206 size_t s_len = s.len();
00207
00208 bool verbatim;
00209 size_t len = wvtcl_unescape(NULL, s, s_len, &verbatim);
00210 if (verbatim) return s;
00211
00212 WvString result;
00213 result.setsize(len+1);
00214 char *e = result.edit();
00215 e += wvtcl_unescape(e, s, s_len);
00216 *e = '\0';
00217 return result;
00218 }
00219
00220
00221 WvString wvtcl_encode(WvList<WvString> &l, const WvStringMask &nasties,
00222 const WvStringMask &splitchars)
00223 {
00224 int size = 0;
00225
00226 WvList<WvString>::Iter i(l);
00227 int count = 0;
00228 for (i.rewind(); i.next(); )
00229 {
00230 size += wvtcl_escape(NULL, *i, i->len(), nasties);
00231 ++count;
00232 }
00233
00234 WvString result;
00235 result.setsize(size+(count-1)+1);
00236
00237 char *p = result.edit();
00238 int j;
00239 for (i.rewind(), j=0; i.next(); ++j)
00240 {
00241 p += wvtcl_escape(p, *i, i->len(), nasties);
00242 if (j < count - 1)
00243 *p++ = splitchars.first();
00244 }
00245 *p = '\0';
00246
00247 return result;
00248 }
00249
00250 const size_t WVTCL_GETWORD_NONE (UINT_MAX);
00251
00252 static size_t wvtcl_getword(char *dst, const char *s, size_t s_len,
00253 const WvStringMask &splitchars,
00254 bool do_unescape, size_t *end = NULL)
00255 {
00256
00257 if (!s_len) return WVTCL_GETWORD_NONE;
00258
00259 bool inescape = false, inquote = false, incontinuation = false;
00260 int bracecount = 0;
00261 const char *origend = s + s_len;
00262 const char *sptr, *eptr;
00263
00264
00265 for (sptr = s; sptr != origend; sptr++)
00266 {
00267 if (!splitchars[*sptr])
00268 break;
00269 }
00270
00271 if (sptr == origend)
00272 return WVTCL_GETWORD_NONE;
00273
00274
00275 if (*sptr == '"')
00276 {
00277 inquote = true;
00278 eptr = sptr+1;
00279 }
00280 else
00281 eptr = sptr;
00282
00283
00284 for (; eptr != origend; eptr++)
00285 {
00286 char ch = *eptr;
00287
00288 incontinuation = false;
00289
00290 if (inescape)
00291 {
00292 if (ch == '\n')
00293 {
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305 incontinuation = true;
00306 }
00307 inescape = false;
00308 }
00309 else if (ch == '\\')
00310 {
00311 inescape = true;
00312
00313 }
00314 else
00315 {
00316
00317 if (bracecount == 0)
00318 {
00319 if (inquote)
00320 {
00321 if (ch == '"')
00322 {
00323 eptr++;
00324 break;
00325 }
00326 }
00327 else if (splitchars[ch])
00328 break;
00329 }
00330
00331
00332 if (!inquote)
00333 {
00334 if (ch == '{')
00335 bracecount++;
00336 else if (bracecount > 0 && ch == '}')
00337 bracecount--;
00338 }
00339 }
00340 }
00341
00342 if (bracecount || sptr==eptr || inquote || inescape || incontinuation)
00343
00344 return WVTCL_GETWORD_NONE;
00345
00346
00347 if (end) *end = eptr - s;
00348
00349 if (do_unescape)
00350 return wvtcl_unescape(dst, sptr, eptr-sptr);
00351 else
00352 {
00353 if (dst) memcpy(dst, sptr, eptr-sptr);
00354 return eptr - sptr;
00355 }
00356 }
00357
00358
00359 WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars,
00360 bool do_unescape)
00361 {
00362 int origsize = buf.used();
00363 const char *origptr = (const char *)buf.get(origsize);
00364
00365 size_t end;
00366 size_t len = wvtcl_getword(NULL, origptr, origsize,
00367 splitchars, do_unescape, &end);
00368 if (len == WVTCL_GETWORD_NONE)
00369 {
00370 buf.unget(origsize);
00371 return WvString::null;
00372 }
00373
00374 WvString result;
00375 result.setsize(len+1);
00376 char *e = result.edit();
00377 e += wvtcl_getword(e, origptr, origsize, splitchars, do_unescape);
00378 *e = '\0';
00379
00380 buf.unget(origsize - end);
00381
00382 return result;
00383 }
00384
00385
00386 void wvtcl_decode(WvList<WvString> &l, WvStringParm _s,
00387 const WvStringMask &splitchars, bool do_unescape)
00388 {
00389 const char *s = _s;
00390 size_t s_len = _s.len();
00391 for (;;)
00392 {
00393 size_t end;
00394 size_t len = wvtcl_getword(NULL, s, s_len,
00395 splitchars, do_unescape, &end);
00396 if (len == WVTCL_GETWORD_NONE)
00397 break;
00398
00399 WvString *word = new WvString();
00400 word->setsize(len+1);
00401
00402 char *e = word->edit();
00403 e += wvtcl_getword(e, s, s_len, splitchars, do_unescape);
00404 *e = '\0';
00405 l.append(word, true);
00406
00407 s += end;
00408 s_len -= end;
00409 }
00410 }