WvStreams
wvregex.h
00001 /* -*- Mode: C++ -*-
00002  * Worldvisions Weaver Software:
00003  *   Copyright (C) 1997-2004 Net Integration Technologies, Inc.
00004  * 
00005  * Regular expression support though libc
00006  */ 
00007 #ifndef __WVREGEX_H
00008 #define __WVREGEX_H
00009 
00010 #include "wverror.h"
00011 #include "wvstring.h"
00012 #include <sys/types.h>
00013 #include <regex.h>
00014 
00015 #define __WVRE_REG(n) __wvre_r##n
00016 #define __WVRE_DECL_FORM(n) WvString &__WVRE_REG(n) = WvRegex::__wvre_null_reg
00017 #define WVREGEX_REGS_DECL \
00018                 __WVRE_DECL_FORM( 0), __WVRE_DECL_FORM( 1), \
00019                 __WVRE_DECL_FORM( 2), __WVRE_DECL_FORM( 3), \
00020                 __WVRE_DECL_FORM( 4), __WVRE_DECL_FORM( 5), \
00021                 __WVRE_DECL_FORM( 6), __WVRE_DECL_FORM( 7), \
00022                 __WVRE_DECL_FORM( 8), __WVRE_DECL_FORM( 9), \
00023                 __WVRE_DECL_FORM(10), __WVRE_DECL_FORM(11), \
00024                 __WVRE_DECL_FORM(12), __WVRE_DECL_FORM(13), \
00025                 __WVRE_DECL_FORM(14), __WVRE_DECL_FORM(15), \
00026                 __WVRE_DECL_FORM(16), __WVRE_DECL_FORM(17), \
00027                 __WVRE_DECL_FORM(18), __WVRE_DECL_FORM(19)
00028 #define __WVRE_CALL_FORM(n) __WVRE_REG(n)
00029 #define WVREGEX_REGS_CALL \
00030                 __WVRE_CALL_FORM( 0), __WVRE_CALL_FORM( 1), \
00031                 __WVRE_CALL_FORM( 2), __WVRE_CALL_FORM( 3), \
00032                 __WVRE_CALL_FORM( 4), __WVRE_CALL_FORM( 5), \
00033                 __WVRE_CALL_FORM( 6), __WVRE_CALL_FORM( 7), \
00034                 __WVRE_CALL_FORM( 8), __WVRE_CALL_FORM( 9), \
00035                 __WVRE_CALL_FORM(10), __WVRE_CALL_FORM(11), \
00036                 __WVRE_CALL_FORM(12), __WVRE_CALL_FORM(13), \
00037                 __WVRE_CALL_FORM(14), __WVRE_CALL_FORM(15), \
00038                 __WVRE_CALL_FORM(16), __WVRE_CALL_FORM(17), \
00039                 __WVRE_CALL_FORM(18), __WVRE_CALL_FORM(19)
00040 
00047 class WvRegex: public WvErrorBase
00048 {
00049 private:
00050     bool have_preg;
00051     regex_t preg;
00052     
00053     bool match(WvStringParm string, int eflags,
00054             size_t nmatch, regmatch_t pmatch[]) const;
00055 
00056     virtual void seterr(int _errnum);
00057 
00058     bool _match(WvStringParm string, int eflags,
00059             int &match_start, int &match_end, WVREGEX_REGS_DECL) const
00060     {
00061         regmatch_t pmatch[21];
00062         int nmatch = 1;
00063 
00064 #define __WVRE_COUNT_REGS(n) \
00065         if ( &__WVRE_REG(n) != &__wvre_null_reg) ++nmatch
00066 
00067         __WVRE_COUNT_REGS( 0); __WVRE_COUNT_REGS( 1);
00068         __WVRE_COUNT_REGS( 2); __WVRE_COUNT_REGS( 3);
00069         __WVRE_COUNT_REGS( 4); __WVRE_COUNT_REGS( 5);
00070         __WVRE_COUNT_REGS( 6); __WVRE_COUNT_REGS( 7);
00071         __WVRE_COUNT_REGS( 8); __WVRE_COUNT_REGS( 9);
00072         __WVRE_COUNT_REGS(10); __WVRE_COUNT_REGS(11);
00073         __WVRE_COUNT_REGS(12); __WVRE_COUNT_REGS(13);
00074         __WVRE_COUNT_REGS(14); __WVRE_COUNT_REGS(15);
00075         __WVRE_COUNT_REGS(16); __WVRE_COUNT_REGS(17);
00076         __WVRE_COUNT_REGS(18); __WVRE_COUNT_REGS(19);
00077 
00078         if (!match(string, eflags, nmatch, pmatch)) return false;
00079 
00080         match_start = pmatch[0].rm_so;
00081         match_end = pmatch[0].rm_eo;
00082 
00083 #define __WVRE_STORE_REGS(n) \
00084         if (&__WVRE_REG(n) != &__wvre_null_reg \
00085                 && pmatch[n+1].rm_so != -1 && pmatch[n+1].rm_eo != -1) \
00086         { \
00087             int len = pmatch[n+1].rm_eo-pmatch[n+1].rm_so; \
00088             __WVRE_REG(n).setsize(len+1); \
00089             memcpy(__WVRE_REG(n).edit(), &string[pmatch[n+1].rm_so], len); \
00090             __WVRE_REG(n).edit()[len] = '\0'; \
00091         }
00092 
00093         __WVRE_STORE_REGS( 0); __WVRE_STORE_REGS( 1);
00094         __WVRE_STORE_REGS( 2); __WVRE_STORE_REGS( 3);
00095         __WVRE_STORE_REGS( 4); __WVRE_STORE_REGS( 5);
00096         __WVRE_STORE_REGS( 6); __WVRE_STORE_REGS( 7);
00097         __WVRE_STORE_REGS( 8); __WVRE_STORE_REGS( 9);
00098         __WVRE_STORE_REGS(10); __WVRE_STORE_REGS(11);
00099         __WVRE_STORE_REGS(12); __WVRE_STORE_REGS(13);
00100         __WVRE_STORE_REGS(14); __WVRE_STORE_REGS(15);
00101         __WVRE_STORE_REGS(16); __WVRE_STORE_REGS(17);
00102         __WVRE_STORE_REGS(18); __WVRE_STORE_REGS(19);
00103                 
00104         return true;
00105     }
00106 
00107 public:
00112     enum CFlags {
00113         // Use (obsolete) basic regex syntax (like grep).  See regex(7).
00114         BASIC = 0,
00115         // Use extended regex syntax (like egrep).  See regex(7).
00116         EXTENDED = REG_EXTENDED,
00117         // Case insensitive
00118         ICASE = REG_ICASE,
00119         // Do not collect match start and end or registers; faster
00120         NOSUB = REG_NOSUB,
00121         // Match-any-character operators don't match a newline.  See regex(3)
00122         NEWLINE = REG_NEWLINE
00123     };
00124     static const int default_cflags;
00125 
00130     enum EFlags
00131     {
00132         // Matching begining of line always fails (unless NEWLINE cflag is set)
00133         NOTBOL = REG_NOTBOL,
00134         // Matching end of line always fails (unless NEWLINE cflag is set)
00135         NOTEOL = REG_NOTEOL
00136     };
00137     static const int default_eflags;
00138 
00142     static WvString __wvre_null_reg;
00143 
00148     WvRegex() : have_preg(false) {}
00156     WvRegex(WvStringParm regex, int cflags = default_cflags) : have_preg(false)
00157         { set(regex, cflags); }
00158     ~WvRegex();
00159     
00167     bool set(WvStringParm regex, int cflags = default_cflags);
00168     
00183     bool match(WvStringParm string, WVREGEX_REGS_DECL) const
00184     {
00185         int match_start, match_end;
00186         return _match(string, default_eflags,
00187                 match_start, match_end, WVREGEX_REGS_CALL); 
00188     }
00197     bool match(WvStringParm string, int eflags, WVREGEX_REGS_DECL) const
00198     {
00199         int match_start, match_end;
00200         return _match(string, eflags,
00201                 match_start, match_end, WVREGEX_REGS_CALL); 
00202     }
00203     
00230     bool continuable_match(WvStringParm string,
00231             int &match_start, int &match_end,
00232             WVREGEX_REGS_DECL) const
00233     {
00234         return _match(string, default_eflags,
00235                 match_start, match_end, WVREGEX_REGS_CALL); 
00236     }
00250     bool continuable_match(WvStringParm string, int eflags,
00251             int &match_start, int &match_end,
00252             WVREGEX_REGS_DECL) const
00253     {
00254         return _match(string, eflags,
00255                 match_start, match_end, WVREGEX_REGS_CALL); 
00256     }
00257 };
00258 
00259 #endif // __WVREGEX_H