WvStreams
|
00001 /* -*- Mode: C++ -*- 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2004 Net Integration Technologies, Inc. 00004 * 00005 * Regular expression support though libc 00006 */ 00007 #ifndef __WVREGEX_H 00008 #define __WVREGEX_H 00009 00010 #include "wverror.h" 00011 #include "wvstring.h" 00012 #include <sys/types.h> 00013 #include <regex.h> 00014 00015 #define __WVRE_REG(n) __wvre_r##n 00016 #define __WVRE_DECL_FORM(n) WvString &__WVRE_REG(n) = WvRegex::__wvre_null_reg 00017 #define WVREGEX_REGS_DECL \ 00018 __WVRE_DECL_FORM( 0), __WVRE_DECL_FORM( 1), \ 00019 __WVRE_DECL_FORM( 2), __WVRE_DECL_FORM( 3), \ 00020 __WVRE_DECL_FORM( 4), __WVRE_DECL_FORM( 5), \ 00021 __WVRE_DECL_FORM( 6), __WVRE_DECL_FORM( 7), \ 00022 __WVRE_DECL_FORM( 8), __WVRE_DECL_FORM( 9), \ 00023 __WVRE_DECL_FORM(10), __WVRE_DECL_FORM(11), \ 00024 __WVRE_DECL_FORM(12), __WVRE_DECL_FORM(13), \ 00025 __WVRE_DECL_FORM(14), __WVRE_DECL_FORM(15), \ 00026 __WVRE_DECL_FORM(16), __WVRE_DECL_FORM(17), \ 00027 __WVRE_DECL_FORM(18), __WVRE_DECL_FORM(19) 00028 #define __WVRE_CALL_FORM(n) __WVRE_REG(n) 00029 #define WVREGEX_REGS_CALL \ 00030 __WVRE_CALL_FORM( 0), __WVRE_CALL_FORM( 1), \ 00031 __WVRE_CALL_FORM( 2), __WVRE_CALL_FORM( 3), \ 00032 __WVRE_CALL_FORM( 4), __WVRE_CALL_FORM( 5), \ 00033 __WVRE_CALL_FORM( 6), __WVRE_CALL_FORM( 7), \ 00034 __WVRE_CALL_FORM( 8), __WVRE_CALL_FORM( 9), \ 00035 __WVRE_CALL_FORM(10), __WVRE_CALL_FORM(11), \ 00036 __WVRE_CALL_FORM(12), __WVRE_CALL_FORM(13), \ 00037 __WVRE_CALL_FORM(14), __WVRE_CALL_FORM(15), \ 00038 __WVRE_CALL_FORM(16), __WVRE_CALL_FORM(17), \ 00039 __WVRE_CALL_FORM(18), __WVRE_CALL_FORM(19) 00040 00047 class WvRegex: public WvErrorBase 00048 { 00049 private: 00050 bool have_preg; 00051 regex_t preg; 00052 00053 bool match(WvStringParm string, int eflags, 00054 size_t nmatch, regmatch_t pmatch[]) const; 00055 00056 virtual void seterr(int _errnum); 00057 00058 bool _match(WvStringParm string, int eflags, 00059 int &match_start, int &match_end, WVREGEX_REGS_DECL) const 00060 { 00061 regmatch_t pmatch[21]; 00062 int nmatch = 1; 00063 00064 #define __WVRE_COUNT_REGS(n) \ 00065 if ( &__WVRE_REG(n) != &__wvre_null_reg) ++nmatch 00066 00067 __WVRE_COUNT_REGS( 0); __WVRE_COUNT_REGS( 1); 00068 __WVRE_COUNT_REGS( 2); __WVRE_COUNT_REGS( 3); 00069 __WVRE_COUNT_REGS( 4); __WVRE_COUNT_REGS( 5); 00070 __WVRE_COUNT_REGS( 6); __WVRE_COUNT_REGS( 7); 00071 __WVRE_COUNT_REGS( 8); __WVRE_COUNT_REGS( 9); 00072 __WVRE_COUNT_REGS(10); __WVRE_COUNT_REGS(11); 00073 __WVRE_COUNT_REGS(12); __WVRE_COUNT_REGS(13); 00074 __WVRE_COUNT_REGS(14); __WVRE_COUNT_REGS(15); 00075 __WVRE_COUNT_REGS(16); __WVRE_COUNT_REGS(17); 00076 __WVRE_COUNT_REGS(18); __WVRE_COUNT_REGS(19); 00077 00078 if (!match(string, eflags, nmatch, pmatch)) return false; 00079 00080 match_start = pmatch[0].rm_so; 00081 match_end = pmatch[0].rm_eo; 00082 00083 #define __WVRE_STORE_REGS(n) \ 00084 if (&__WVRE_REG(n) != &__wvre_null_reg \ 00085 && pmatch[n+1].rm_so != -1 && pmatch[n+1].rm_eo != -1) \ 00086 { \ 00087 int len = pmatch[n+1].rm_eo-pmatch[n+1].rm_so; \ 00088 __WVRE_REG(n).setsize(len+1); \ 00089 memcpy(__WVRE_REG(n).edit(), &string[pmatch[n+1].rm_so], len); \ 00090 __WVRE_REG(n).edit()[len] = '\0'; \ 00091 } 00092 00093 __WVRE_STORE_REGS( 0); __WVRE_STORE_REGS( 1); 00094 __WVRE_STORE_REGS( 2); __WVRE_STORE_REGS( 3); 00095 __WVRE_STORE_REGS( 4); __WVRE_STORE_REGS( 5); 00096 __WVRE_STORE_REGS( 6); __WVRE_STORE_REGS( 7); 00097 __WVRE_STORE_REGS( 8); __WVRE_STORE_REGS( 9); 00098 __WVRE_STORE_REGS(10); __WVRE_STORE_REGS(11); 00099 __WVRE_STORE_REGS(12); __WVRE_STORE_REGS(13); 00100 __WVRE_STORE_REGS(14); __WVRE_STORE_REGS(15); 00101 __WVRE_STORE_REGS(16); __WVRE_STORE_REGS(17); 00102 __WVRE_STORE_REGS(18); __WVRE_STORE_REGS(19); 00103 00104 return true; 00105 } 00106 00107 public: 00112 enum CFlags { 00113 // Use (obsolete) basic regex syntax (like grep). See regex(7). 00114 BASIC = 0, 00115 // Use extended regex syntax (like egrep). See regex(7). 00116 EXTENDED = REG_EXTENDED, 00117 // Case insensitive 00118 ICASE = REG_ICASE, 00119 // Do not collect match start and end or registers; faster 00120 NOSUB = REG_NOSUB, 00121 // Match-any-character operators don't match a newline. See regex(3) 00122 NEWLINE = REG_NEWLINE 00123 }; 00124 static const int default_cflags; 00125 00130 enum EFlags 00131 { 00132 // Matching begining of line always fails (unless NEWLINE cflag is set) 00133 NOTBOL = REG_NOTBOL, 00134 // Matching end of line always fails (unless NEWLINE cflag is set) 00135 NOTEOL = REG_NOTEOL 00136 }; 00137 static const int default_eflags; 00138 00142 static WvString __wvre_null_reg; 00143 00148 WvRegex() : have_preg(false) {} 00156 WvRegex(WvStringParm regex, int cflags = default_cflags) : have_preg(false) 00157 { set(regex, cflags); } 00158 ~WvRegex(); 00159 00167 bool set(WvStringParm regex, int cflags = default_cflags); 00168 00183 bool match(WvStringParm string, WVREGEX_REGS_DECL) const 00184 { 00185 int match_start, match_end; 00186 return _match(string, default_eflags, 00187 match_start, match_end, WVREGEX_REGS_CALL); 00188 } 00197 bool match(WvStringParm string, int eflags, WVREGEX_REGS_DECL) const 00198 { 00199 int match_start, match_end; 00200 return _match(string, eflags, 00201 match_start, match_end, WVREGEX_REGS_CALL); 00202 } 00203 00230 bool continuable_match(WvStringParm string, 00231 int &match_start, int &match_end, 00232 WVREGEX_REGS_DECL) const 00233 { 00234 return _match(string, default_eflags, 00235 match_start, match_end, WVREGEX_REGS_CALL); 00236 } 00250 bool continuable_match(WvStringParm string, int eflags, 00251 int &match_start, int &match_end, 00252 WVREGEX_REGS_DECL) const 00253 { 00254 return _match(string, eflags, 00255 match_start, match_end, WVREGEX_REGS_CALL); 00256 } 00257 }; 00258 00259 #endif // __WVREGEX_H