00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include "regparse.h"
00032
00033 #define WARN_BUFSIZE 256
00034
00035 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
00036
00037
00038 const OnigSyntaxType OnigSyntaxRuby = {
00039 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
00040 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
00041 ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
00042 ONIG_SYN_OP_ESC_C_CONTROL )
00043 & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
00044 , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
00045 ONIG_SYN_OP2_OPTION_RUBY |
00046 ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
00047 ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
00048 ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY |
00049 ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT |
00050 ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT |
00051 ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
00052 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
00053 ONIG_SYN_OP2_ESC_H_XDIGIT )
00054 , ( SYN_GNU_REGEX_BV |
00055 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV |
00056 ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND |
00057 ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
00058 ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
00059 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
00060 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
00061 ONIG_SYN_WARN_CC_DUP |
00062 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
00063 , ONIG_OPTION_NONE
00064 ,
00065 {
00066 (OnigCodePoint )'\\'
00067 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00068 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00069 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00070 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00071 , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR
00072 }
00073 };
00074
00075 const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
00076
00077 extern void onig_null_warn(const char* s ARG_UNUSED) { }
00078
00079 #ifdef DEFAULT_WARN_FUNCTION
00080 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
00081 #else
00082 static OnigWarnFunc onig_warn = onig_null_warn;
00083 #endif
00084
00085 #ifdef DEFAULT_VERB_WARN_FUNCTION
00086 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
00087 #else
00088 static OnigWarnFunc onig_verb_warn = onig_null_warn;
00089 #endif
00090
00091 extern void onig_set_warn_func(OnigWarnFunc f)
00092 {
00093 onig_warn = f;
00094 }
00095
00096 extern void onig_set_verb_warn_func(OnigWarnFunc f)
00097 {
00098 onig_verb_warn = f;
00099 }
00100
00101 static void CC_DUP_WARN(ScanEnv *env);
00102
00103 static void
00104 bbuf_free(BBuf* bbuf)
00105 {
00106 if (IS_NOT_NULL(bbuf)) {
00107 if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
00108 xfree(bbuf);
00109 }
00110 }
00111
00112 static int
00113 bbuf_clone(BBuf** rto, BBuf* from)
00114 {
00115 int r;
00116 BBuf *to;
00117
00118 *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
00119 CHECK_NULL_RETURN_MEMERR(to);
00120 r = BBUF_INIT(to, from->alloc);
00121 if (r != 0) return r;
00122 to->used = from->used;
00123 xmemcpy(to->p, from->p, from->used);
00124 return 0;
00125 }
00126
00127 #define BACKREF_REL_TO_ABS(rel_no, env) \
00128 ((env)->num_mem + 1 + (rel_no))
00129
00130 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
00131
00132 #define MBCODE_START_POS(enc) \
00133 (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
00134
00135 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
00136 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ~((OnigCodePoint )0))
00137
00138 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
00139 if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
00140 r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
00141 if (r) return r;\
00142 }\
00143 } while (0)
00144
00145
00146 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
00147 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
00148 BS_ROOM(bs, pos) |= BS_BIT(pos); \
00149 } while (0)
00150
00151 #define BITSET_IS_EMPTY(bs,empty) do {\
00152 int i;\
00153 empty = 1;\
00154 for (i = 0; i < (int )BITSET_SIZE; i++) {\
00155 if ((bs)[i] != 0) {\
00156 empty = 0; break;\
00157 }\
00158 }\
00159 } while (0)
00160
00161 static void
00162 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
00163 {
00164 int i;
00165 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
00166 BITSET_SET_BIT_CHKDUP(bs, i);
00167 }
00168 }
00169
00170 #if 0
00171 static void
00172 bitset_set_all(BitSetRef bs)
00173 {
00174 int i;
00175 for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
00176 }
00177 #endif
00178
00179 static void
00180 bitset_invert(BitSetRef bs)
00181 {
00182 int i;
00183 for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
00184 }
00185
00186 static void
00187 bitset_invert_to(BitSetRef from, BitSetRef to)
00188 {
00189 int i;
00190 for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
00191 }
00192
00193 static void
00194 bitset_and(BitSetRef dest, BitSetRef bs)
00195 {
00196 int i;
00197 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
00198 }
00199
00200 static void
00201 bitset_or(BitSetRef dest, BitSetRef bs)
00202 {
00203 int i;
00204 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
00205 }
00206
00207 static void
00208 bitset_copy(BitSetRef dest, BitSetRef bs)
00209 {
00210 int i;
00211 for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
00212 }
00213
00214 extern int
00215 onig_strncmp(const UChar* s1, const UChar* s2, int n)
00216 {
00217 int x;
00218
00219 while (n-- > 0) {
00220 x = *s2++ - *s1++;
00221 if (x) return x;
00222 }
00223 return 0;
00224 }
00225
00226 extern void
00227 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
00228 {
00229 ptrdiff_t len = end - src;
00230 if (len > 0) {
00231 xmemcpy(dest, src, len);
00232 dest[len] = (UChar )0;
00233 }
00234 }
00235
00236 #ifdef USE_NAMED_GROUP
00237 static UChar*
00238 strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
00239 {
00240 ptrdiff_t slen;
00241 int term_len, i;
00242 UChar *r;
00243
00244 slen = end - s;
00245 term_len = ONIGENC_MBC_MINLEN(enc);
00246
00247 r = (UChar* )xmalloc(slen + term_len);
00248 CHECK_NULL_RETURN(r);
00249 xmemcpy(r, s, slen);
00250
00251 for (i = 0; i < term_len; i++)
00252 r[slen + i] = (UChar )0;
00253
00254 return r;
00255 }
00256 #endif
00257
00258
00259 #define PEND_VALUE 0
00260
00261 #define PFETCH_READY UChar* pfetch_prev
00262 #define PEND (p < end ? 0 : 1)
00263 #define PUNFETCH p = pfetch_prev
00264 #define PINC do { \
00265 pfetch_prev = p; \
00266 p += enclen(enc, p, end); \
00267 } while (0)
00268 #define PFETCH(c) do { \
00269 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
00270 pfetch_prev = p; \
00271 p += enclen(enc, p, end); \
00272 } while (0)
00273
00274 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
00275 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
00276
00277 static UChar*
00278 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
00279 int capa)
00280 {
00281 UChar* r;
00282
00283 if (dest)
00284 r = (UChar* )xrealloc(dest, capa + 1);
00285 else
00286 r = (UChar* )xmalloc(capa + 1);
00287
00288 CHECK_NULL_RETURN(r);
00289 onig_strcpy(r + (dest_end - dest), src, src_end);
00290 return r;
00291 }
00292
00293
00294 static UChar*
00295 strcat_capa_from_static(UChar* dest, UChar* dest_end,
00296 const UChar* src, const UChar* src_end, int capa)
00297 {
00298 UChar* r;
00299
00300 r = (UChar* )xmalloc(capa + 1);
00301 CHECK_NULL_RETURN(r);
00302 onig_strcpy(r, dest, dest_end);
00303 onig_strcpy(r + (dest_end - dest), src, src_end);
00304 return r;
00305 }
00306
00307
00308 #ifdef USE_ST_LIBRARY
00309
00310 #include "ruby/st.h"
00311
00312 typedef struct {
00313 const UChar* s;
00314 const UChar* end;
00315 } st_str_end_key;
00316
00317 static int
00318 str_end_cmp(st_data_t xp, st_data_t yp)
00319 {
00320 const st_str_end_key *x, *y;
00321 const UChar *p, *q;
00322 int c;
00323
00324 x = (const st_str_end_key *)xp;
00325 y = (const st_str_end_key *)yp;
00326 if ((x->end - x->s) != (y->end - y->s))
00327 return 1;
00328
00329 p = x->s;
00330 q = y->s;
00331 while (p < x->end) {
00332 c = (int )*p - (int )*q;
00333 if (c != 0) return c;
00334
00335 p++; q++;
00336 }
00337
00338 return 0;
00339 }
00340
00341 static st_index_t
00342 str_end_hash(st_data_t xp)
00343 {
00344 const st_str_end_key *x = (const st_str_end_key *)xp;
00345 const UChar *p;
00346 st_index_t val = 0;
00347
00348 p = x->s;
00349 while (p < x->end) {
00350 val = val * 997 + (int )*p++;
00351 }
00352
00353 return val + (val >> 5);
00354 }
00355
00356 extern hash_table_type*
00357 onig_st_init_strend_table_with_size(st_index_t size)
00358 {
00359 static const struct st_hash_type hashType = {
00360 str_end_cmp,
00361 str_end_hash,
00362 };
00363
00364 return (hash_table_type* )
00365 onig_st_init_table_with_size(&hashType, size);
00366 }
00367
00368 extern int
00369 onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
00370 const UChar* end_key, hash_data_type *value)
00371 {
00372 st_str_end_key key;
00373
00374 key.s = (UChar* )str_key;
00375 key.end = (UChar* )end_key;
00376
00377 return onig_st_lookup(table, (st_data_t )(&key), value);
00378 }
00379
00380 extern int
00381 onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
00382 const UChar* end_key, hash_data_type value)
00383 {
00384 st_str_end_key* key;
00385 int result;
00386
00387 key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
00388 key->s = (UChar* )str_key;
00389 key->end = (UChar* )end_key;
00390 result = onig_st_insert(table, (st_data_t )key, value);
00391 if (result) {
00392 xfree(key);
00393 }
00394 return result;
00395 }
00396
00397 #endif
00398
00399
00400 #ifdef USE_NAMED_GROUP
00401
00402 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
00403
00404 typedef struct {
00405 UChar* name;
00406 size_t name_len;
00407 int back_num;
00408 int back_alloc;
00409 int back_ref1;
00410 int* back_refs;
00411 } NameEntry;
00412
00413 #ifdef USE_ST_LIBRARY
00414
00415 typedef st_table NameTable;
00416 typedef st_data_t HashDataType;
00417
00418 #define NAMEBUF_SIZE 24
00419 #define NAMEBUF_SIZE_1 25
00420
00421 #ifdef ONIG_DEBUG
00422 static int
00423 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
00424 {
00425 int i;
00426 FILE* fp = (FILE* )arg;
00427
00428 fprintf(fp, "%s: ", e->name);
00429 if (e->back_num == 0)
00430 fputs("-", fp);
00431 else if (e->back_num == 1)
00432 fprintf(fp, "%d", e->back_ref1);
00433 else {
00434 for (i = 0; i < e->back_num; i++) {
00435 if (i > 0) fprintf(fp, ", ");
00436 fprintf(fp, "%d", e->back_refs[i]);
00437 }
00438 }
00439 fputs("\n", fp);
00440 return ST_CONTINUE;
00441 }
00442
00443 extern int
00444 onig_print_names(FILE* fp, regex_t* reg)
00445 {
00446 NameTable* t = (NameTable* )reg->name_table;
00447
00448 if (IS_NOT_NULL(t)) {
00449 fprintf(fp, "name table\n");
00450 onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
00451 fputs("\n", fp);
00452 }
00453 return 0;
00454 }
00455 #endif
00456
00457 static int
00458 i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
00459 {
00460 xfree(e->name);
00461 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00462 xfree(key);
00463 xfree(e);
00464 return ST_DELETE;
00465 }
00466
00467 static int
00468 names_clear(regex_t* reg)
00469 {
00470 NameTable* t = (NameTable* )reg->name_table;
00471
00472 if (IS_NOT_NULL(t)) {
00473 onig_st_foreach(t, i_free_name_entry, 0);
00474 }
00475 return 0;
00476 }
00477
00478 extern int
00479 onig_names_free(regex_t* reg)
00480 {
00481 int r;
00482 NameTable* t;
00483
00484 r = names_clear(reg);
00485 if (r) return r;
00486
00487 t = (NameTable* )reg->name_table;
00488 if (IS_NOT_NULL(t)) onig_st_free_table(t);
00489 reg->name_table = (void* )NULL;
00490 return 0;
00491 }
00492
00493 static NameEntry*
00494 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
00495 {
00496 NameEntry* e;
00497 NameTable* t = (NameTable* )reg->name_table;
00498
00499 e = (NameEntry* )NULL;
00500 if (IS_NOT_NULL(t)) {
00501 onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
00502 }
00503 return e;
00504 }
00505
00506 typedef struct {
00507 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
00508 regex_t* reg;
00509 void* arg;
00510 int ret;
00511 OnigEncoding enc;
00512 } INamesArg;
00513
00514 static int
00515 i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
00516 {
00517 int r = (*(arg->func))(e->name,
00518 e->name + e->name_len,
00519 e->back_num,
00520 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00521 arg->reg, arg->arg);
00522 if (r != 0) {
00523 arg->ret = r;
00524 return ST_STOP;
00525 }
00526 return ST_CONTINUE;
00527 }
00528
00529 extern int
00530 onig_foreach_name(regex_t* reg,
00531 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00532 {
00533 INamesArg narg;
00534 NameTable* t = (NameTable* )reg->name_table;
00535
00536 narg.ret = 0;
00537 if (IS_NOT_NULL(t)) {
00538 narg.func = func;
00539 narg.reg = reg;
00540 narg.arg = arg;
00541 narg.enc = reg->enc;
00542 onig_st_foreach(t, i_names, (HashDataType )&narg);
00543 }
00544 return narg.ret;
00545 }
00546
00547 static int
00548 i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
00549 {
00550 int i;
00551
00552 if (e->back_num > 1) {
00553 for (i = 0; i < e->back_num; i++) {
00554 e->back_refs[i] = map[e->back_refs[i]].new_val;
00555 }
00556 }
00557 else if (e->back_num == 1) {
00558 e->back_ref1 = map[e->back_ref1].new_val;
00559 }
00560
00561 return ST_CONTINUE;
00562 }
00563
00564 extern int
00565 onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
00566 {
00567 NameTable* t = (NameTable* )reg->name_table;
00568
00569 if (IS_NOT_NULL(t)) {
00570 onig_st_foreach(t, i_renumber_name, (HashDataType )map);
00571 }
00572 return 0;
00573 }
00574
00575
00576 extern int
00577 onig_number_of_names(regex_t* reg)
00578 {
00579 NameTable* t = (NameTable* )reg->name_table;
00580
00581 if (IS_NOT_NULL(t))
00582 return t->num_entries;
00583 else
00584 return 0;
00585 }
00586
00587 #else
00588
00589 #define INIT_NAMES_ALLOC_NUM 8
00590
00591 typedef struct {
00592 NameEntry* e;
00593 int num;
00594 int alloc;
00595 } NameTable;
00596
00597 #ifdef ONIG_DEBUG
00598 extern int
00599 onig_print_names(FILE* fp, regex_t* reg)
00600 {
00601 int i, j;
00602 NameEntry* e;
00603 NameTable* t = (NameTable* )reg->name_table;
00604
00605 if (IS_NOT_NULL(t) && t->num > 0) {
00606 fprintf(fp, "name table\n");
00607 for (i = 0; i < t->num; i++) {
00608 e = &(t->e[i]);
00609 fprintf(fp, "%s: ", e->name);
00610 if (e->back_num == 0) {
00611 fputs("-", fp);
00612 }
00613 else if (e->back_num == 1) {
00614 fprintf(fp, "%d", e->back_ref1);
00615 }
00616 else {
00617 for (j = 0; j < e->back_num; j++) {
00618 if (j > 0) fprintf(fp, ", ");
00619 fprintf(fp, "%d", e->back_refs[j]);
00620 }
00621 }
00622 fputs("\n", fp);
00623 }
00624 fputs("\n", fp);
00625 }
00626 return 0;
00627 }
00628 #endif
00629
00630 static int
00631 names_clear(regex_t* reg)
00632 {
00633 int i;
00634 NameEntry* e;
00635 NameTable* t = (NameTable* )reg->name_table;
00636
00637 if (IS_NOT_NULL(t)) {
00638 for (i = 0; i < t->num; i++) {
00639 e = &(t->e[i]);
00640 if (IS_NOT_NULL(e->name)) {
00641 xfree(e->name);
00642 e->name = NULL;
00643 e->name_len = 0;
00644 e->back_num = 0;
00645 e->back_alloc = 0;
00646 if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
00647 e->back_refs = (int* )NULL;
00648 }
00649 }
00650 if (IS_NOT_NULL(t->e)) {
00651 xfree(t->e);
00652 t->e = NULL;
00653 }
00654 t->num = 0;
00655 }
00656 return 0;
00657 }
00658
00659 extern int
00660 onig_names_free(regex_t* reg)
00661 {
00662 int r;
00663 NameTable* t;
00664
00665 r = names_clear(reg);
00666 if (r) return r;
00667
00668 t = (NameTable* )reg->name_table;
00669 if (IS_NOT_NULL(t)) xfree(t);
00670 reg->name_table = NULL;
00671 return 0;
00672 }
00673
00674 static NameEntry*
00675 name_find(regex_t* reg, UChar* name, UChar* name_end)
00676 {
00677 int i, len;
00678 NameEntry* e;
00679 NameTable* t = (NameTable* )reg->name_table;
00680
00681 if (IS_NOT_NULL(t)) {
00682 len = name_end - name;
00683 for (i = 0; i < t->num; i++) {
00684 e = &(t->e[i]);
00685 if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
00686 return e;
00687 }
00688 }
00689 return (NameEntry* )NULL;
00690 }
00691
00692 extern int
00693 onig_foreach_name(regex_t* reg,
00694 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00695 {
00696 int i, r;
00697 NameEntry* e;
00698 NameTable* t = (NameTable* )reg->name_table;
00699
00700 if (IS_NOT_NULL(t)) {
00701 for (i = 0; i < t->num; i++) {
00702 e = &(t->e[i]);
00703 r = (*func)(e->name, e->name + e->name_len, e->back_num,
00704 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
00705 reg, arg);
00706 if (r != 0) return r;
00707 }
00708 }
00709 return 0;
00710 }
00711
00712 extern int
00713 onig_number_of_names(regex_t* reg)
00714 {
00715 NameTable* t = (NameTable* )reg->name_table;
00716
00717 if (IS_NOT_NULL(t))
00718 return t->num;
00719 else
00720 return 0;
00721 }
00722
00723 #endif
00724
00725 static int
00726 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
00727 {
00728 int alloc;
00729 NameEntry* e;
00730 NameTable* t = (NameTable* )reg->name_table;
00731
00732 if (name_end - name <= 0)
00733 return ONIGERR_EMPTY_GROUP_NAME;
00734
00735 e = name_find(reg, name, name_end);
00736 if (IS_NULL(e)) {
00737 #ifdef USE_ST_LIBRARY
00738 if (IS_NULL(t)) {
00739 t = onig_st_init_strend_table_with_size(5);
00740 reg->name_table = (void* )t;
00741 }
00742 e = (NameEntry* )xmalloc(sizeof(NameEntry));
00743 CHECK_NULL_RETURN_MEMERR(e);
00744
00745 e->name = strdup_with_null(reg->enc, name, name_end);
00746 if (IS_NULL(e->name)) {
00747 xfree(e);
00748 return ONIGERR_MEMORY;
00749 }
00750 onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
00751 (HashDataType )e);
00752
00753 e->name_len = name_end - name;
00754 e->back_num = 0;
00755 e->back_alloc = 0;
00756 e->back_refs = (int* )NULL;
00757
00758 #else
00759
00760 if (IS_NULL(t)) {
00761 alloc = INIT_NAMES_ALLOC_NUM;
00762 t = (NameTable* )xmalloc(sizeof(NameTable));
00763 CHECK_NULL_RETURN_MEMERR(t);
00764 t->e = NULL;
00765 t->alloc = 0;
00766 t->num = 0;
00767
00768 t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
00769 if (IS_NULL(t->e)) {
00770 xfree(t);
00771 return ONIGERR_MEMORY;
00772 }
00773 t->alloc = alloc;
00774 reg->name_table = t;
00775 goto clear;
00776 }
00777 else if (t->num == t->alloc) {
00778 int i;
00779
00780 alloc = t->alloc * 2;
00781 t->e = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
00782 CHECK_NULL_RETURN_MEMERR(t->e);
00783 t->alloc = alloc;
00784
00785 clear:
00786 for (i = t->num; i < t->alloc; i++) {
00787 t->e[i].name = NULL;
00788 t->e[i].name_len = 0;
00789 t->e[i].back_num = 0;
00790 t->e[i].back_alloc = 0;
00791 t->e[i].back_refs = (int* )NULL;
00792 }
00793 }
00794 e = &(t->e[t->num]);
00795 t->num++;
00796 e->name = strdup_with_null(reg->enc, name, name_end);
00797 if (IS_NULL(e->name)) return ONIGERR_MEMORY;
00798 e->name_len = name_end - name;
00799 #endif
00800 }
00801
00802 if (e->back_num >= 1 &&
00803 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME)) {
00804 onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
00805 name, name_end);
00806 return ONIGERR_MULTIPLEX_DEFINED_NAME;
00807 }
00808
00809 e->back_num++;
00810 if (e->back_num == 1) {
00811 e->back_ref1 = backref;
00812 }
00813 else {
00814 if (e->back_num == 2) {
00815 alloc = INIT_NAME_BACKREFS_ALLOC_NUM;
00816 e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
00817 CHECK_NULL_RETURN_MEMERR(e->back_refs);
00818 e->back_alloc = alloc;
00819 e->back_refs[0] = e->back_ref1;
00820 e->back_refs[1] = backref;
00821 }
00822 else {
00823 if (e->back_num > e->back_alloc) {
00824 alloc = e->back_alloc * 2;
00825 e->back_refs = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
00826 CHECK_NULL_RETURN_MEMERR(e->back_refs);
00827 e->back_alloc = alloc;
00828 }
00829 e->back_refs[e->back_num - 1] = backref;
00830 }
00831 }
00832
00833 return 0;
00834 }
00835
00836 extern int
00837 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00838 const UChar* name_end, int** nums)
00839 {
00840 NameEntry* e = name_find(reg, name, name_end);
00841
00842 if (IS_NULL(e)) return ONIGERR_UNDEFINED_NAME_REFERENCE;
00843
00844 switch (e->back_num) {
00845 case 0:
00846 *nums = 0;
00847 break;
00848 case 1:
00849 *nums = &(e->back_ref1);
00850 break;
00851 default:
00852 *nums = e->back_refs;
00853 break;
00854 }
00855 return e->back_num;
00856 }
00857
00858 extern int
00859 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00860 const UChar* name_end, OnigRegion *region)
00861 {
00862 int i, n, *nums;
00863
00864 n = onig_name_to_group_numbers(reg, name, name_end, &nums);
00865 if (n < 0)
00866 return n;
00867 else if (n == 0)
00868 return ONIGERR_PARSER_BUG;
00869 else if (n == 1)
00870 return nums[0];
00871 else {
00872 if (IS_NOT_NULL(region)) {
00873 for (i = n - 1; i >= 0; i--) {
00874 if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
00875 return nums[i];
00876 }
00877 }
00878 return nums[n - 1];
00879 }
00880 }
00881
00882 #else
00883
00884 extern int
00885 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
00886 const UChar* name_end, int** nums)
00887 {
00888 return ONIG_NO_SUPPORT_CONFIG;
00889 }
00890
00891 extern int
00892 onig_name_to_backref_number(regex_t* reg, const UChar* name,
00893 const UChar* name_end, OnigRegion* region)
00894 {
00895 return ONIG_NO_SUPPORT_CONFIG;
00896 }
00897
00898 extern int
00899 onig_foreach_name(regex_t* reg,
00900 int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
00901 {
00902 return ONIG_NO_SUPPORT_CONFIG;
00903 }
00904
00905 extern int
00906 onig_number_of_names(regex_t* reg)
00907 {
00908 return 0;
00909 }
00910 #endif
00911
00912 extern int
00913 onig_noname_group_capture_is_active(regex_t* reg)
00914 {
00915 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
00916 return 0;
00917
00918 #ifdef USE_NAMED_GROUP
00919 if (onig_number_of_names(reg) > 0 &&
00920 IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
00921 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
00922 return 0;
00923 }
00924 #endif
00925
00926 return 1;
00927 }
00928
00929
00930 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
00931
00932 static void
00933 scan_env_clear(ScanEnv* env)
00934 {
00935 int i;
00936
00937 BIT_STATUS_CLEAR(env->capture_history);
00938 BIT_STATUS_CLEAR(env->bt_mem_start);
00939 BIT_STATUS_CLEAR(env->bt_mem_end);
00940 BIT_STATUS_CLEAR(env->backrefed_mem);
00941 env->error = (UChar* )NULL;
00942 env->error_end = (UChar* )NULL;
00943 env->num_call = 0;
00944 env->num_mem = 0;
00945 #ifdef USE_NAMED_GROUP
00946 env->num_named = 0;
00947 #endif
00948 env->mem_alloc = 0;
00949 env->mem_nodes_dynamic = (Node** )NULL;
00950
00951 for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
00952 env->mem_nodes_static[i] = NULL_NODE;
00953
00954 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00955 env->num_comb_exp_check = 0;
00956 env->comb_exp_max_regnum = 0;
00957 env->curr_max_regnum = 0;
00958 env->has_recursion = 0;
00959 #endif
00960 env->warnings_flag = 0;
00961 }
00962
00963 static int
00964 scan_env_add_mem_entry(ScanEnv* env)
00965 {
00966 int i, need, alloc;
00967 Node** p;
00968
00969 need = env->num_mem + 1;
00970 if (need >= SCANENV_MEMNODES_SIZE) {
00971 if (env->mem_alloc <= need) {
00972 if (IS_NULL(env->mem_nodes_dynamic)) {
00973 alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
00974 p = (Node** )xmalloc(sizeof(Node*) * alloc);
00975 xmemcpy(p, env->mem_nodes_static,
00976 sizeof(Node*) * SCANENV_MEMNODES_SIZE);
00977 }
00978 else {
00979 alloc = env->mem_alloc * 2;
00980 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
00981 }
00982 CHECK_NULL_RETURN_MEMERR(p);
00983
00984 for (i = env->num_mem + 1; i < alloc; i++)
00985 p[i] = NULL_NODE;
00986
00987 env->mem_nodes_dynamic = p;
00988 env->mem_alloc = alloc;
00989 }
00990 }
00991
00992 env->num_mem++;
00993 return env->num_mem;
00994 }
00995
00996 static int
00997 scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
00998 {
00999 if (env->num_mem >= num)
01000 SCANENV_MEM_NODES(env)[num] = node;
01001 else
01002 return ONIGERR_PARSER_BUG;
01003 return 0;
01004 }
01005
01006
01007 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01008 typedef struct _FreeNode {
01009 struct _FreeNode* next;
01010 } FreeNode;
01011
01012 static FreeNode* FreeNodeList = (FreeNode* )NULL;
01013 #endif
01014
01015 extern void
01016 onig_node_free(Node* node)
01017 {
01018 start:
01019 if (IS_NULL(node)) return ;
01020
01021 switch (NTYPE(node)) {
01022 case NT_STR:
01023 if (NSTR(node)->capa != 0 &&
01024 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01025 xfree(NSTR(node)->s);
01026 }
01027 break;
01028
01029 case NT_LIST:
01030 case NT_ALT:
01031 onig_node_free(NCAR(node));
01032 {
01033 Node* next_node = NCDR(node);
01034
01035 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01036 {
01037 FreeNode* n = (FreeNode* )node;
01038
01039 THREAD_ATOMIC_START;
01040 n->next = FreeNodeList;
01041 FreeNodeList = n;
01042 THREAD_ATOMIC_END;
01043 }
01044 #else
01045 xfree(node);
01046 #endif
01047 node = next_node;
01048 goto start;
01049 }
01050 break;
01051
01052 case NT_CCLASS:
01053 {
01054 CClassNode* cc = NCCLASS(node);
01055
01056 if (IS_NCCLASS_SHARE(cc)) return ;
01057 if (cc->mbuf)
01058 bbuf_free(cc->mbuf);
01059 }
01060 break;
01061
01062 case NT_QTFR:
01063 if (NQTFR(node)->target)
01064 onig_node_free(NQTFR(node)->target);
01065 break;
01066
01067 case NT_ENCLOSE:
01068 if (NENCLOSE(node)->target)
01069 onig_node_free(NENCLOSE(node)->target);
01070 break;
01071
01072 case NT_BREF:
01073 if (IS_NOT_NULL(NBREF(node)->back_dynamic))
01074 xfree(NBREF(node)->back_dynamic);
01075 break;
01076
01077 case NT_ANCHOR:
01078 if (NANCHOR(node)->target)
01079 onig_node_free(NANCHOR(node)->target);
01080 break;
01081 }
01082
01083 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01084 {
01085 FreeNode* n = (FreeNode* )node;
01086
01087 THREAD_ATOMIC_START;
01088 n->next = FreeNodeList;
01089 FreeNodeList = n;
01090 THREAD_ATOMIC_END;
01091 }
01092 #else
01093 xfree(node);
01094 #endif
01095 }
01096
01097 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01098 extern int
01099 onig_free_node_list(void)
01100 {
01101 FreeNode* n;
01102
01103
01104 while (IS_NOT_NULL(FreeNodeList)) {
01105 n = FreeNodeList;
01106 FreeNodeList = FreeNodeList->next;
01107 xfree(n);
01108 }
01109
01110 return 0;
01111 }
01112 #endif
01113
01114 static Node*
01115 node_new(void)
01116 {
01117 Node* node;
01118
01119 #ifdef USE_PARSE_TREE_NODE_RECYCLE
01120 THREAD_ATOMIC_START;
01121 if (IS_NOT_NULL(FreeNodeList)) {
01122 node = (Node* )FreeNodeList;
01123 FreeNodeList = FreeNodeList->next;
01124 THREAD_ATOMIC_END;
01125 return node;
01126 }
01127 THREAD_ATOMIC_END;
01128 #endif
01129
01130 node = (Node* )xmalloc(sizeof(Node));
01131
01132 return node;
01133 }
01134
01135
01136 static void
01137 initialize_cclass(CClassNode* cc)
01138 {
01139 BITSET_CLEAR(cc->bs);
01140
01141 cc->flags = 0;
01142 cc->mbuf = NULL;
01143 }
01144
01145 static Node*
01146 node_new_cclass(void)
01147 {
01148 Node* node = node_new();
01149 CHECK_NULL_RETURN(node);
01150
01151 SET_NTYPE(node, NT_CCLASS);
01152 initialize_cclass(NCCLASS(node));
01153 return node;
01154 }
01155
01156 static Node*
01157 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
01158 const OnigCodePoint ranges[])
01159 {
01160 int n, i;
01161 CClassNode* cc;
01162 OnigCodePoint j;
01163
01164 Node* node = node_new_cclass();
01165 CHECK_NULL_RETURN(node);
01166
01167 cc = NCCLASS(node);
01168 if (not != 0) NCCLASS_SET_NOT(cc);
01169
01170 BITSET_CLEAR(cc->bs);
01171 if (sb_out > 0 && IS_NOT_NULL(ranges)) {
01172 n = ONIGENC_CODE_RANGE_NUM(ranges);
01173 for (i = 0; i < n; i++) {
01174 for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
01175 j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
01176 if (j >= sb_out) goto sb_end;
01177
01178 BITSET_SET_BIT(cc->bs, j);
01179 }
01180 }
01181 }
01182
01183 sb_end:
01184 if (IS_NULL(ranges)) {
01185 is_null:
01186 cc->mbuf = NULL;
01187 }
01188 else {
01189 BBuf* bbuf;
01190
01191 n = ONIGENC_CODE_RANGE_NUM(ranges);
01192 if (n == 0) goto is_null;
01193
01194 bbuf = (BBuf* )xmalloc(sizeof(BBuf));
01195 CHECK_NULL_RETURN(bbuf);
01196 bbuf->alloc = n + 1;
01197 bbuf->used = n + 1;
01198 bbuf->p = (UChar* )((void* )ranges);
01199
01200 cc->mbuf = bbuf;
01201 }
01202
01203 return node;
01204 }
01205
01206 static Node*
01207 node_new_ctype(int type, int not)
01208 {
01209 Node* node = node_new();
01210 CHECK_NULL_RETURN(node);
01211
01212 SET_NTYPE(node, NT_CTYPE);
01213 NCTYPE(node)->ctype = type;
01214 NCTYPE(node)->not = not;
01215 return node;
01216 }
01217
01218 static Node*
01219 node_new_anychar(void)
01220 {
01221 Node* node = node_new();
01222 CHECK_NULL_RETURN(node);
01223
01224 SET_NTYPE(node, NT_CANY);
01225 return node;
01226 }
01227
01228 static Node*
01229 node_new_list(Node* left, Node* right)
01230 {
01231 Node* node = node_new();
01232 CHECK_NULL_RETURN(node);
01233
01234 SET_NTYPE(node, NT_LIST);
01235 NCAR(node) = left;
01236 NCDR(node) = right;
01237 return node;
01238 }
01239
01240 extern Node*
01241 onig_node_new_list(Node* left, Node* right)
01242 {
01243 return node_new_list(left, right);
01244 }
01245
01246 extern Node*
01247 onig_node_list_add(Node* list, Node* x)
01248 {
01249 Node *n;
01250
01251 n = onig_node_new_list(x, NULL);
01252 if (IS_NULL(n)) return NULL_NODE;
01253
01254 if (IS_NOT_NULL(list)) {
01255 while (IS_NOT_NULL(NCDR(list)))
01256 list = NCDR(list);
01257
01258 NCDR(list) = n;
01259 }
01260
01261 return n;
01262 }
01263
01264 extern Node*
01265 onig_node_new_alt(Node* left, Node* right)
01266 {
01267 Node* node = node_new();
01268 CHECK_NULL_RETURN(node);
01269
01270 SET_NTYPE(node, NT_ALT);
01271 NCAR(node) = left;
01272 NCDR(node) = right;
01273 return node;
01274 }
01275
01276 extern Node*
01277 onig_node_new_anchor(int type)
01278 {
01279 Node* node = node_new();
01280 CHECK_NULL_RETURN(node);
01281
01282 SET_NTYPE(node, NT_ANCHOR);
01283 NANCHOR(node)->type = type;
01284 NANCHOR(node)->target = NULL;
01285 NANCHOR(node)->char_len = -1;
01286 return node;
01287 }
01288
01289 static Node*
01290 node_new_backref(int back_num, int* backrefs, int by_name,
01291 #ifdef USE_BACKREF_WITH_LEVEL
01292 int exist_level, int nest_level,
01293 #endif
01294 ScanEnv* env)
01295 {
01296 int i;
01297 Node* node = node_new();
01298
01299 CHECK_NULL_RETURN(node);
01300
01301 SET_NTYPE(node, NT_BREF);
01302 NBREF(node)->state = 0;
01303 NBREF(node)->back_num = back_num;
01304 NBREF(node)->back_dynamic = (int* )NULL;
01305 if (by_name != 0)
01306 NBREF(node)->state |= NST_NAME_REF;
01307
01308 #ifdef USE_BACKREF_WITH_LEVEL
01309 if (exist_level != 0) {
01310 NBREF(node)->state |= NST_NEST_LEVEL;
01311 NBREF(node)->nest_level = nest_level;
01312 }
01313 #endif
01314
01315 for (i = 0; i < back_num; i++) {
01316 if (backrefs[i] <= env->num_mem &&
01317 IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
01318 NBREF(node)->state |= NST_RECURSION;
01319 break;
01320 }
01321 }
01322
01323 if (back_num <= NODE_BACKREFS_SIZE) {
01324 for (i = 0; i < back_num; i++)
01325 NBREF(node)->back_static[i] = backrefs[i];
01326 }
01327 else {
01328 int* p = (int* )xmalloc(sizeof(int) * back_num);
01329 if (IS_NULL(p)) {
01330 onig_node_free(node);
01331 return NULL;
01332 }
01333 NBREF(node)->back_dynamic = p;
01334 for (i = 0; i < back_num; i++)
01335 p[i] = backrefs[i];
01336 }
01337 return node;
01338 }
01339
01340 #ifdef USE_SUBEXP_CALL
01341 static Node*
01342 node_new_call(UChar* name, UChar* name_end, int gnum)
01343 {
01344 Node* node = node_new();
01345 CHECK_NULL_RETURN(node);
01346
01347 SET_NTYPE(node, NT_CALL);
01348 NCALL(node)->state = 0;
01349 NCALL(node)->target = NULL_NODE;
01350 NCALL(node)->name = name;
01351 NCALL(node)->name_end = name_end;
01352 NCALL(node)->group_num = gnum;
01353 return node;
01354 }
01355 #endif
01356
01357 static Node*
01358 node_new_quantifier(int lower, int upper, int by_number)
01359 {
01360 Node* node = node_new();
01361 CHECK_NULL_RETURN(node);
01362
01363 SET_NTYPE(node, NT_QTFR);
01364 NQTFR(node)->state = 0;
01365 NQTFR(node)->target = NULL;
01366 NQTFR(node)->lower = lower;
01367 NQTFR(node)->upper = upper;
01368 NQTFR(node)->greedy = 1;
01369 NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
01370 NQTFR(node)->head_exact = NULL_NODE;
01371 NQTFR(node)->next_head_exact = NULL_NODE;
01372 NQTFR(node)->is_refered = 0;
01373 if (by_number != 0)
01374 NQTFR(node)->state |= NST_BY_NUMBER;
01375
01376 #ifdef USE_COMBINATION_EXPLOSION_CHECK
01377 NQTFR(node)->comb_exp_check_num = 0;
01378 #endif
01379
01380 return node;
01381 }
01382
01383 static Node*
01384 node_new_enclose(int type)
01385 {
01386 Node* node = node_new();
01387 CHECK_NULL_RETURN(node);
01388
01389 SET_NTYPE(node, NT_ENCLOSE);
01390 NENCLOSE(node)->type = type;
01391 NENCLOSE(node)->state = 0;
01392 NENCLOSE(node)->regnum = 0;
01393 NENCLOSE(node)->option = 0;
01394 NENCLOSE(node)->target = NULL;
01395 NENCLOSE(node)->call_addr = -1;
01396 NENCLOSE(node)->opt_count = 0;
01397 return node;
01398 }
01399
01400 extern Node*
01401 onig_node_new_enclose(int type)
01402 {
01403 return node_new_enclose(type);
01404 }
01405
01406 static Node*
01407 node_new_enclose_memory(OnigOptionType option, int is_named)
01408 {
01409 Node* node = node_new_enclose(ENCLOSE_MEMORY);
01410 CHECK_NULL_RETURN(node);
01411 if (is_named != 0)
01412 SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
01413
01414 #ifdef USE_SUBEXP_CALL
01415 NENCLOSE(node)->option = option;
01416 #endif
01417 return node;
01418 }
01419
01420 static Node*
01421 node_new_option(OnigOptionType option)
01422 {
01423 Node* node = node_new_enclose(ENCLOSE_OPTION);
01424 CHECK_NULL_RETURN(node);
01425 NENCLOSE(node)->option = option;
01426 return node;
01427 }
01428
01429 extern int
01430 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
01431 {
01432 ptrdiff_t addlen = end - s;
01433
01434 if (addlen > 0) {
01435 ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
01436
01437 if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
01438 UChar* p;
01439 ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
01440
01441 if (capa <= NSTR(node)->capa) {
01442 onig_strcpy(NSTR(node)->s + len, s, end);
01443 }
01444 else {
01445 if (NSTR(node)->s == NSTR(node)->buf)
01446 p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
01447 s, end, capa);
01448 else
01449 p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
01450
01451 CHECK_NULL_RETURN_MEMERR(p);
01452 NSTR(node)->s = p;
01453 NSTR(node)->capa = capa;
01454 }
01455 }
01456 else {
01457 onig_strcpy(NSTR(node)->s + len, s, end);
01458 }
01459 NSTR(node)->end = NSTR(node)->s + len + addlen;
01460 }
01461
01462 return 0;
01463 }
01464
01465 extern int
01466 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
01467 {
01468 onig_node_str_clear(node);
01469 return onig_node_str_cat(node, s, end);
01470 }
01471
01472 static int
01473 node_str_cat_char(Node* node, UChar c)
01474 {
01475 UChar s[1];
01476
01477 s[0] = c;
01478 return onig_node_str_cat(node, s, s + 1);
01479 }
01480
01481 extern void
01482 onig_node_conv_to_str_node(Node* node, int flag)
01483 {
01484 SET_NTYPE(node, NT_STR);
01485 NSTR(node)->flag = flag;
01486 NSTR(node)->capa = 0;
01487 NSTR(node)->s = NSTR(node)->buf;
01488 NSTR(node)->end = NSTR(node)->buf;
01489 }
01490
01491 extern void
01492 onig_node_str_clear(Node* node)
01493 {
01494 if (NSTR(node)->capa != 0 &&
01495 IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
01496 xfree(NSTR(node)->s);
01497 }
01498
01499 NSTR(node)->capa = 0;
01500 NSTR(node)->flag = 0;
01501 NSTR(node)->s = NSTR(node)->buf;
01502 NSTR(node)->end = NSTR(node)->buf;
01503 }
01504
01505 static Node*
01506 node_new_str(const UChar* s, const UChar* end)
01507 {
01508 Node* node = node_new();
01509 CHECK_NULL_RETURN(node);
01510
01511 SET_NTYPE(node, NT_STR);
01512 NSTR(node)->capa = 0;
01513 NSTR(node)->flag = 0;
01514 NSTR(node)->s = NSTR(node)->buf;
01515 NSTR(node)->end = NSTR(node)->buf;
01516 if (onig_node_str_cat(node, s, end)) {
01517 onig_node_free(node);
01518 return NULL;
01519 }
01520 return node;
01521 }
01522
01523 extern Node*
01524 onig_node_new_str(const UChar* s, const UChar* end)
01525 {
01526 return node_new_str(s, end);
01527 }
01528
01529 static Node*
01530 node_new_str_raw(UChar* s, UChar* end)
01531 {
01532 Node* node = node_new_str(s, end);
01533 NSTRING_SET_RAW(node);
01534 return node;
01535 }
01536
01537 static Node*
01538 node_new_empty(void)
01539 {
01540 return node_new_str(NULL, NULL);
01541 }
01542
01543 static Node*
01544 node_new_str_raw_char(UChar c)
01545 {
01546 UChar p[1];
01547
01548 p[0] = c;
01549 return node_new_str_raw(p, p + 1);
01550 }
01551
01552 static Node*
01553 str_node_split_last_char(StrNode* sn, OnigEncoding enc)
01554 {
01555 const UChar *p;
01556 Node* n = NULL_NODE;
01557
01558 if (sn->end > sn->s) {
01559 p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
01560 if (p && p > sn->s) {
01561 n = node_new_str(p, sn->end);
01562 if ((sn->flag & NSTR_RAW) != 0)
01563 NSTRING_SET_RAW(n);
01564 sn->end = (UChar* )p;
01565 }
01566 }
01567 return n;
01568 }
01569
01570 static int
01571 str_node_can_be_split(StrNode* sn, OnigEncoding enc)
01572 {
01573 if (sn->end > sn->s) {
01574 return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
01575 }
01576 return 0;
01577 }
01578
01579 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
01580 static int
01581 node_str_head_pad(StrNode* sn, int num, UChar val)
01582 {
01583 UChar buf[NODE_STR_BUF_SIZE];
01584 int i, len;
01585
01586 len = sn->end - sn->s;
01587 onig_strcpy(buf, sn->s, sn->end);
01588 onig_strcpy(&(sn->s[num]), buf, buf + len);
01589 sn->end += num;
01590
01591 for (i = 0; i < num; i++) {
01592 sn->s[i] = val;
01593 }
01594 }
01595 #endif
01596
01597 extern int
01598 onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
01599 {
01600 unsigned int num, val;
01601 OnigCodePoint c;
01602 UChar* p = *src;
01603 PFETCH_READY;
01604
01605 num = 0;
01606 while (!PEND) {
01607 PFETCH(c);
01608 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
01609 val = (unsigned int )DIGITVAL(c);
01610 if ((INT_MAX_LIMIT - val) / 10UL < num)
01611 return -1;
01612
01613 num = num * 10 + val;
01614 }
01615 else {
01616 PUNFETCH;
01617 break;
01618 }
01619 }
01620 *src = p;
01621 return num;
01622 }
01623
01624 static int
01625 scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
01626 OnigEncoding enc)
01627 {
01628 OnigCodePoint c;
01629 unsigned int num, val;
01630 UChar* p = *src;
01631 PFETCH_READY;
01632
01633 num = 0;
01634 while (!PEND && maxlen-- != 0) {
01635 PFETCH(c);
01636 if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
01637 val = (unsigned int )XDIGITVAL(enc,c);
01638 if ((INT_MAX_LIMIT - val) / 16UL < num)
01639 return -1;
01640
01641 num = (num << 4) + XDIGITVAL(enc,c);
01642 }
01643 else {
01644 PUNFETCH;
01645 break;
01646 }
01647 }
01648 *src = p;
01649 return num;
01650 }
01651
01652 static int
01653 scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
01654 OnigEncoding enc)
01655 {
01656 OnigCodePoint c;
01657 unsigned int num, val;
01658 UChar* p = *src;
01659 PFETCH_READY;
01660
01661 num = 0;
01662 while (!PEND && maxlen-- != 0) {
01663 PFETCH(c);
01664 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
01665 val = ODIGITVAL(c);
01666 if ((INT_MAX_LIMIT - val) / 8UL < num)
01667 return -1;
01668
01669 num = (num << 3) + val;
01670 }
01671 else {
01672 PUNFETCH;
01673 break;
01674 }
01675 }
01676 *src = p;
01677 return num;
01678 }
01679
01680
01681 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
01682 BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
01683
01684
01685
01686
01687
01688 static int
01689 new_code_range(BBuf** pbuf)
01690 {
01691 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
01692 int r;
01693 OnigCodePoint n;
01694 BBuf* bbuf;
01695
01696 bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
01697 CHECK_NULL_RETURN_MEMERR(*pbuf);
01698 r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
01699 if (r) return r;
01700
01701 n = 0;
01702 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01703 return 0;
01704 }
01705
01706 static int
01707 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
01708 int checkdup)
01709 {
01710 int r, inc_n, pos;
01711 int low, high, bound, x;
01712 OnigCodePoint n, *data;
01713 BBuf* bbuf;
01714
01715 if (from > to) {
01716 n = from; from = to; to = n;
01717 }
01718
01719 if (IS_NULL(*pbuf)) {
01720 r = new_code_range(pbuf);
01721 if (r) return r;
01722 bbuf = *pbuf;
01723 n = 0;
01724 }
01725 else {
01726 bbuf = *pbuf;
01727 GET_CODE_POINT(n, bbuf->p);
01728 }
01729 data = (OnigCodePoint* )(bbuf->p);
01730 data++;
01731
01732 for (low = 0, bound = n; low < bound; ) {
01733 x = (low + bound) >> 1;
01734 if (from > data[x*2 + 1])
01735 low = x + 1;
01736 else
01737 bound = x;
01738 }
01739
01740 for (high = low, bound = n; high < bound; ) {
01741 x = (high + bound) >> 1;
01742 if (to >= data[x*2] - 1)
01743 high = x + 1;
01744 else
01745 bound = x;
01746 }
01747
01748 inc_n = low + 1 - high;
01749 if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
01750 return ONIGERR_TOO_MANY_MULTI_BYTE_RANGES;
01751
01752 if (inc_n != 1) {
01753 if (checkdup && to >= data[low*2]) CC_DUP_WARN(env);
01754 if (from > data[low*2])
01755 from = data[low*2];
01756 if (to < data[(high - 1)*2 + 1])
01757 to = data[(high - 1)*2 + 1];
01758 }
01759
01760 if (inc_n != 0 && (OnigCodePoint )high < n) {
01761 int from_pos = SIZE_CODE_POINT * (1 + high * 2);
01762 int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
01763 int size = (n - high) * 2 * SIZE_CODE_POINT;
01764
01765 if (inc_n > 0) {
01766 BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
01767 }
01768 else {
01769 BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
01770 }
01771 }
01772
01773 pos = SIZE_CODE_POINT * (1 + low * 2);
01774 BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
01775 BBUF_WRITE_CODE_POINT(bbuf, pos, from);
01776 BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
01777 n += inc_n;
01778 BBUF_WRITE_CODE_POINT(bbuf, 0, n);
01779
01780 return 0;
01781 }
01782
01783 static int
01784 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01785 {
01786 return add_code_range_to_buf0(pbuf, env, from, to, 1);
01787 }
01788
01789 static int
01790 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
01791 {
01792 if (from > to) {
01793 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
01794 return 0;
01795 else
01796 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
01797 }
01798
01799 return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
01800 }
01801
01802 static int
01803 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
01804 {
01805 return add_code_range0(pbuf, env, from, to, 1);
01806 }
01807
01808 static int
01809 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env)
01810 {
01811 int r, i, n;
01812 OnigCodePoint pre, from, *data, to = 0;
01813
01814 *pbuf = (BBuf* )NULL;
01815 if (IS_NULL(bbuf)) {
01816 set_all:
01817 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01818 }
01819
01820 data = (OnigCodePoint* )(bbuf->p);
01821 GET_CODE_POINT(n, data);
01822 data++;
01823 if (n <= 0) goto set_all;
01824
01825 r = 0;
01826 pre = MBCODE_START_POS(enc);
01827 for (i = 0; i < n; i++) {
01828 from = data[i*2];
01829 to = data[i*2+1];
01830 if (pre <= from - 1) {
01831 r = add_code_range_to_buf(pbuf, env, pre, from - 1);
01832 if (r != 0) return r;
01833 }
01834 if (to == ~((OnigCodePoint )0)) break;
01835 pre = to + 1;
01836 }
01837 if (to < ~((OnigCodePoint )0)) {
01838 r = add_code_range_to_buf(pbuf, env, to + 1, ~((OnigCodePoint )0));
01839 }
01840 return r;
01841 }
01842
01843 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
01844 BBuf *tbuf; \
01845 int tnot; \
01846 tnot = not1; not1 = not2; not2 = tnot; \
01847 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
01848 } while (0)
01849
01850 static int
01851 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
01852 BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01853 {
01854 int r;
01855 OnigCodePoint i, n1, *data1;
01856 OnigCodePoint from, to;
01857
01858 *pbuf = (BBuf* )NULL;
01859 if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
01860 if (not1 != 0 || not2 != 0)
01861 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01862 return 0;
01863 }
01864
01865 r = 0;
01866 if (IS_NULL(bbuf2))
01867 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01868
01869 if (IS_NULL(bbuf1)) {
01870 if (not1 != 0) {
01871 return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
01872 }
01873 else {
01874 if (not2 == 0) {
01875 return bbuf_clone(pbuf, bbuf2);
01876 }
01877 else {
01878 return not_code_range_buf(enc, bbuf2, pbuf, env);
01879 }
01880 }
01881 }
01882
01883 if (not1 != 0)
01884 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01885
01886 data1 = (OnigCodePoint* )(bbuf1->p);
01887 GET_CODE_POINT(n1, data1);
01888 data1++;
01889
01890 if (not2 == 0 && not1 == 0) {
01891 r = bbuf_clone(pbuf, bbuf2);
01892 }
01893 else if (not1 == 0) {
01894 r = not_code_range_buf(enc, bbuf2, pbuf, env);
01895 }
01896 if (r != 0) return r;
01897
01898 for (i = 0; i < n1; i++) {
01899 from = data1[i*2];
01900 to = data1[i*2+1];
01901 r = add_code_range_to_buf(pbuf, env, from, to);
01902 if (r != 0) return r;
01903 }
01904 return 0;
01905 }
01906
01907 static int
01908 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
01909 OnigCodePoint* data, int n)
01910 {
01911 int i, r;
01912 OnigCodePoint from2, to2;
01913
01914 for (i = 0; i < n; i++) {
01915 from2 = data[i*2];
01916 to2 = data[i*2+1];
01917 if (from2 < from1) {
01918 if (to2 < from1) continue;
01919 else {
01920 from1 = to2 + 1;
01921 }
01922 }
01923 else if (from2 <= to1) {
01924 if (to2 < to1) {
01925 if (from1 <= from2 - 1) {
01926 r = add_code_range_to_buf(pbuf, env, from1, from2-1);
01927 if (r != 0) return r;
01928 }
01929 from1 = to2 + 1;
01930 }
01931 else {
01932 to1 = from2 - 1;
01933 }
01934 }
01935 else {
01936 from1 = from2;
01937 }
01938 if (from1 > to1) break;
01939 }
01940 if (from1 <= to1) {
01941 r = add_code_range_to_buf(pbuf, env, from1, to1);
01942 if (r != 0) return r;
01943 }
01944 return 0;
01945 }
01946
01947 static int
01948 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
01949 {
01950 int r;
01951 OnigCodePoint i, j, n1, n2, *data1, *data2;
01952 OnigCodePoint from, to, from1, to1, from2, to2;
01953
01954 *pbuf = (BBuf* )NULL;
01955 if (IS_NULL(bbuf1)) {
01956 if (not1 != 0 && IS_NOT_NULL(bbuf2))
01957 return bbuf_clone(pbuf, bbuf2);
01958 return 0;
01959 }
01960 else if (IS_NULL(bbuf2)) {
01961 if (not2 != 0)
01962 return bbuf_clone(pbuf, bbuf1);
01963 return 0;
01964 }
01965
01966 if (not1 != 0)
01967 SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
01968
01969 data1 = (OnigCodePoint* )(bbuf1->p);
01970 data2 = (OnigCodePoint* )(bbuf2->p);
01971 GET_CODE_POINT(n1, data1);
01972 GET_CODE_POINT(n2, data2);
01973 data1++;
01974 data2++;
01975
01976 if (not2 == 0 && not1 == 0) {
01977 for (i = 0; i < n1; i++) {
01978 from1 = data1[i*2];
01979 to1 = data1[i*2+1];
01980 for (j = 0; j < n2; j++) {
01981 from2 = data2[j*2];
01982 to2 = data2[j*2+1];
01983 if (from2 > to1) break;
01984 if (to2 < from1) continue;
01985 from = MAX(from1, from2);
01986 to = MIN(to1, to2);
01987 r = add_code_range_to_buf(pbuf, env, from, to);
01988 if (r != 0) return r;
01989 }
01990 }
01991 }
01992 else if (not1 == 0) {
01993 for (i = 0; i < n1; i++) {
01994 from1 = data1[i*2];
01995 to1 = data1[i*2+1];
01996 r = and_code_range1(pbuf, env, from1, to1, data2, n2);
01997 if (r != 0) return r;
01998 }
01999 }
02000
02001 return 0;
02002 }
02003
02004 static int
02005 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02006 {
02007 OnigEncoding enc = env->enc;
02008 int r, not1, not2;
02009 BBuf *buf1, *buf2, *pbuf = 0;
02010 BitSetRef bsr1, bsr2;
02011 BitSet bs1, bs2;
02012
02013 not1 = IS_NCCLASS_NOT(dest);
02014 bsr1 = dest->bs;
02015 buf1 = dest->mbuf;
02016 not2 = IS_NCCLASS_NOT(cc);
02017 bsr2 = cc->bs;
02018 buf2 = cc->mbuf;
02019
02020 if (not1 != 0) {
02021 bitset_invert_to(bsr1, bs1);
02022 bsr1 = bs1;
02023 }
02024 if (not2 != 0) {
02025 bitset_invert_to(bsr2, bs2);
02026 bsr2 = bs2;
02027 }
02028 bitset_and(bsr1, bsr2);
02029 if (bsr1 != dest->bs) {
02030 bitset_copy(dest->bs, bsr1);
02031 bsr1 = dest->bs;
02032 }
02033 if (not1 != 0) {
02034 bitset_invert(dest->bs);
02035 }
02036
02037 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02038 if (not1 != 0 && not2 != 0) {
02039 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
02040 }
02041 else {
02042 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
02043 if (r == 0 && not1 != 0) {
02044 BBuf *tbuf = 0;
02045 r = not_code_range_buf(enc, pbuf, &tbuf, env);
02046 bbuf_free(pbuf);
02047 pbuf = tbuf;
02048 }
02049 }
02050 if (r != 0) {
02051 bbuf_free(pbuf);
02052 return r;
02053 }
02054
02055 dest->mbuf = pbuf;
02056 bbuf_free(buf1);
02057 return r;
02058 }
02059 return 0;
02060 }
02061
02062 static int
02063 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
02064 {
02065 OnigEncoding enc = env->enc;
02066 int r, not1, not2;
02067 BBuf *buf1, *buf2, *pbuf = 0;
02068 BitSetRef bsr1, bsr2;
02069 BitSet bs1, bs2;
02070
02071 not1 = IS_NCCLASS_NOT(dest);
02072 bsr1 = dest->bs;
02073 buf1 = dest->mbuf;
02074 not2 = IS_NCCLASS_NOT(cc);
02075 bsr2 = cc->bs;
02076 buf2 = cc->mbuf;
02077
02078 if (not1 != 0) {
02079 bitset_invert_to(bsr1, bs1);
02080 bsr1 = bs1;
02081 }
02082 if (not2 != 0) {
02083 bitset_invert_to(bsr2, bs2);
02084 bsr2 = bs2;
02085 }
02086 bitset_or(bsr1, bsr2);
02087 if (bsr1 != dest->bs) {
02088 bitset_copy(dest->bs, bsr1);
02089 bsr1 = dest->bs;
02090 }
02091 if (not1 != 0) {
02092 bitset_invert(dest->bs);
02093 }
02094
02095 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
02096 if (not1 != 0 && not2 != 0) {
02097 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
02098 }
02099 else {
02100 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
02101 if (r == 0 && not1 != 0) {
02102 BBuf *tbuf = 0;
02103 r = not_code_range_buf(enc, pbuf, &tbuf, env);
02104 bbuf_free(pbuf);
02105 pbuf = tbuf;
02106 }
02107 }
02108 if (r != 0) {
02109 bbuf_free(pbuf);
02110 return r;
02111 }
02112
02113 dest->mbuf = pbuf;
02114 bbuf_free(buf1);
02115 return r;
02116 }
02117 else
02118 return 0;
02119 }
02120
02121 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
02122
02123 static int
02124 conv_backslash_value(int c, ScanEnv* env)
02125 {
02126 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
02127 switch (c) {
02128 case 'n': return '\n';
02129 case 't': return '\t';
02130 case 'r': return '\r';
02131 case 'f': return '\f';
02132 case 'a': return '\007';
02133 case 'b': return '\010';
02134 case 'e': return '\033';
02135 case 'v':
02136 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
02137 return '\v';
02138 break;
02139
02140 default:
02141 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
02142 UNKNOWN_ESC_WARN(env, c);
02143 break;
02144 }
02145 }
02146 return c;
02147 }
02148
02149 #if 0
02150 static int
02151 is_invalid_quantifier_target(Node* node)
02152 {
02153 switch (NTYPE(node)) {
02154 case NT_ANCHOR:
02155 return 1;
02156 break;
02157
02158 case NT_ENCLOSE:
02159
02160
02161 break;
02162
02163 case NT_LIST:
02164 do {
02165 if (! is_invalid_quantifier_target(NCAR(node))) return 0;
02166 } while (IS_NOT_NULL(node = NCDR(node)));
02167 return 0;
02168 break;
02169
02170 case NT_ALT:
02171 do {
02172 if (is_invalid_quantifier_target(NCAR(node))) return 1;
02173 } while (IS_NOT_NULL(node = NCDR(node)));
02174 break;
02175
02176 default:
02177 break;
02178 }
02179 return 0;
02180 }
02181 #else
02182 #define is_invalid_quantifier_target(node) 0
02183 #endif
02184
02185
02186 static int
02187 popular_quantifier_num(QtfrNode* q)
02188 {
02189 if (q->greedy) {
02190 if (q->lower == 0) {
02191 if (q->upper == 1) return 0;
02192 else if (IS_REPEAT_INFINITE(q->upper)) return 1;
02193 }
02194 else if (q->lower == 1) {
02195 if (IS_REPEAT_INFINITE(q->upper)) return 2;
02196 }
02197 }
02198 else {
02199 if (q->lower == 0) {
02200 if (q->upper == 1) return 3;
02201 else if (IS_REPEAT_INFINITE(q->upper)) return 4;
02202 }
02203 else if (q->lower == 1) {
02204 if (IS_REPEAT_INFINITE(q->upper)) return 5;
02205 }
02206 }
02207 return -1;
02208 }
02209
02210
02211 enum ReduceType {
02212 RQ_ASIS = 0,
02213 RQ_DEL = 1,
02214 RQ_A,
02215 RQ_AQ,
02216 RQ_QQ,
02217 RQ_P_QQ,
02218 RQ_PQ_Q
02219 };
02220
02221 static enum ReduceType const ReduceTypeTable[6][6] = {
02222 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS},
02223 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL},
02224 {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL},
02225 {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ},
02226 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL},
02227 {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL}
02228 };
02229
02230 extern void
02231 onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
02232 {
02233 int pnum, cnum;
02234 QtfrNode *p, *c;
02235
02236 p = NQTFR(pnode);
02237 c = NQTFR(cnode);
02238 pnum = popular_quantifier_num(p);
02239 cnum = popular_quantifier_num(c);
02240 if (pnum < 0 || cnum < 0) return ;
02241
02242 switch(ReduceTypeTable[cnum][pnum]) {
02243 case RQ_DEL:
02244 *pnode = *cnode;
02245 break;
02246 case RQ_A:
02247 p->target = c->target;
02248 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
02249 break;
02250 case RQ_AQ:
02251 p->target = c->target;
02252 p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
02253 break;
02254 case RQ_QQ:
02255 p->target = c->target;
02256 p->lower = 0; p->upper = 1; p->greedy = 0;
02257 break;
02258 case RQ_P_QQ:
02259 p->target = cnode;
02260 p->lower = 0; p->upper = 1; p->greedy = 0;
02261 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
02262 return ;
02263 break;
02264 case RQ_PQ_Q:
02265 p->target = cnode;
02266 p->lower = 0; p->upper = 1; p->greedy = 1;
02267 c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
02268 return ;
02269 break;
02270 case RQ_ASIS:
02271 p->target = cnode;
02272 return ;
02273 break;
02274 }
02275
02276 c->target = NULL_NODE;
02277 onig_node_free(cnode);
02278 }
02279
02280
02281 enum TokenSyms {
02282 TK_EOT = 0,
02283 TK_RAW_BYTE = 1,
02284 TK_CHAR,
02285 TK_STRING,
02286 TK_CODE_POINT,
02287 TK_ANYCHAR,
02288 TK_CHAR_TYPE,
02289 TK_BACKREF,
02290 TK_CALL,
02291 TK_ANCHOR,
02292 TK_OP_REPEAT,
02293 TK_INTERVAL,
02294 TK_ANYCHAR_ANYTIME,
02295 TK_ALT,
02296 TK_SUBEXP_OPEN,
02297 TK_SUBEXP_CLOSE,
02298 TK_CC_OPEN,
02299 TK_QUOTE_OPEN,
02300 TK_CHAR_PROPERTY,
02301
02302 TK_CC_CLOSE,
02303 TK_CC_RANGE,
02304 TK_POSIX_BRACKET_OPEN,
02305 TK_CC_AND,
02306 TK_CC_CC_OPEN
02307 };
02308
02309 typedef struct {
02310 enum TokenSyms type;
02311 int escaped;
02312 int base;
02313 UChar* backp;
02314 union {
02315 UChar* s;
02316 int c;
02317 OnigCodePoint code;
02318 int anchor;
02319 int subtype;
02320 struct {
02321 int lower;
02322 int upper;
02323 int greedy;
02324 int possessive;
02325 } repeat;
02326 struct {
02327 int num;
02328 int ref1;
02329 int* refs;
02330 int by_name;
02331 #ifdef USE_BACKREF_WITH_LEVEL
02332 int exist_level;
02333 int level;
02334 #endif
02335 } backref;
02336 struct {
02337 UChar* name;
02338 UChar* name_end;
02339 int gnum;
02340 } call;
02341 struct {
02342 int ctype;
02343 int not;
02344 } prop;
02345 } u;
02346 } OnigToken;
02347
02348
02349 static int
02350 fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
02351 {
02352 int low, up, syn_allow, non_low = 0;
02353 int r = 0;
02354 OnigCodePoint c;
02355 OnigEncoding enc = env->enc;
02356 UChar* p = *src;
02357 PFETCH_READY;
02358
02359 syn_allow = IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INVALID_INTERVAL);
02360
02361 if (PEND) {
02362 if (syn_allow)
02363 return 1;
02364 else
02365 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02366 }
02367
02368 if (! syn_allow) {
02369 c = PPEEK;
02370 if (c == ')' || c == '(' || c == '|') {
02371 return ONIGERR_END_PATTERN_AT_LEFT_BRACE;
02372 }
02373 }
02374
02375 low = onig_scan_unsigned_number(&p, end, env->enc);
02376 if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02377 if (low > ONIG_MAX_REPEAT_NUM)
02378 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02379
02380 if (p == *src) {
02381 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV)) {
02382
02383 low = 0;
02384 non_low = 1;
02385 }
02386 else
02387 goto invalid;
02388 }
02389
02390 if (PEND) goto invalid;
02391 PFETCH(c);
02392 if (c == ',') {
02393 UChar* prev = p;
02394 up = onig_scan_unsigned_number(&p, end, env->enc);
02395 if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02396 if (up > ONIG_MAX_REPEAT_NUM)
02397 return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
02398
02399 if (p == prev) {
02400 if (non_low != 0)
02401 goto invalid;
02402 up = REPEAT_INFINITE;
02403 }
02404 }
02405 else {
02406 if (non_low != 0)
02407 goto invalid;
02408
02409 PUNFETCH;
02410 up = low;
02411 r = 2;
02412 }
02413
02414 if (PEND) goto invalid;
02415 PFETCH(c);
02416 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
02417 if (c != MC_ESC(env->syntax)) goto invalid;
02418 PFETCH(c);
02419 }
02420 if (c != '}') goto invalid;
02421
02422 if (!IS_REPEAT_INFINITE(up) && low > up) {
02423 return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
02424 }
02425
02426 tok->type = TK_INTERVAL;
02427 tok->u.repeat.lower = low;
02428 tok->u.repeat.upper = up;
02429 *src = p;
02430 return r;
02431
02432 invalid:
02433 if (syn_allow)
02434 return 1;
02435 else
02436 return ONIGERR_INVALID_REPEAT_RANGE_PATTERN;
02437 }
02438
02439
02440 static int
02441 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
02442 {
02443 int v;
02444 OnigCodePoint c;
02445 OnigEncoding enc = env->enc;
02446 UChar* p = *src;
02447 PFETCH_READY;
02448
02449 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
02450
02451 PFETCH(c);
02452 switch (c) {
02453 case 'M':
02454 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META)) {
02455 if (PEND) return ONIGERR_END_PATTERN_AT_META;
02456 PFETCH(c);
02457 if (c != '-') return ONIGERR_META_CODE_SYNTAX;
02458 if (PEND) return ONIGERR_END_PATTERN_AT_META;
02459 PFETCH(c);
02460 if (c == MC_ESC(env->syntax)) {
02461 v = fetch_escaped_value(&p, end, env);
02462 if (v < 0) return v;
02463 c = (OnigCodePoint )v;
02464 }
02465 c = ((c & 0xff) | 0x80);
02466 }
02467 else
02468 goto backslash;
02469 break;
02470
02471 case 'C':
02472 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL)) {
02473 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02474 PFETCH(c);
02475 if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
02476 goto control;
02477 }
02478 else
02479 goto backslash;
02480
02481 case 'c':
02482 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_C_CONTROL)) {
02483 control:
02484 if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
02485 PFETCH(c);
02486 if (c == '?') {
02487 c = 0177;
02488 }
02489 else {
02490 if (c == MC_ESC(env->syntax)) {
02491 v = fetch_escaped_value(&p, end, env);
02492 if (v < 0) return v;
02493 c = (OnigCodePoint )v;
02494 }
02495 c &= 0x9f;
02496 }
02497 break;
02498 }
02499
02500
02501 default:
02502 {
02503 backslash:
02504 c = conv_backslash_value(c, env);
02505 }
02506 break;
02507 }
02508
02509 *src = p;
02510 return c;
02511 }
02512
02513 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
02514
02515 static OnigCodePoint
02516 get_name_end_code_point(OnigCodePoint start)
02517 {
02518 switch (start) {
02519 case '<': return (OnigCodePoint )'>'; break;
02520 case '\'': return (OnigCodePoint )'\''; break;
02521 default:
02522 break;
02523 }
02524
02525 return (OnigCodePoint )0;
02526 }
02527
02528 #ifdef USE_NAMED_GROUP
02529 #ifdef USE_BACKREF_WITH_LEVEL
02530
02531
02532
02533
02534
02535 static int
02536 fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
02537 UChar** rname_end, ScanEnv* env,
02538 int* rback_num, int* rlevel)
02539 {
02540 int r, sign, is_num, exist_level;
02541 OnigCodePoint end_code;
02542 OnigCodePoint c = 0;
02543 OnigEncoding enc = env->enc;
02544 UChar *name_end;
02545 UChar *pnum_head;
02546 UChar *p = *src;
02547 PFETCH_READY;
02548
02549 *rback_num = 0;
02550 is_num = exist_level = 0;
02551 sign = 1;
02552 pnum_head = *src;
02553
02554 end_code = get_name_end_code_point(start_code);
02555
02556 name_end = end;
02557 r = 0;
02558 if (PEND) {
02559 return ONIGERR_EMPTY_GROUP_NAME;
02560 }
02561 else {
02562 PFETCH(c);
02563 if (c == end_code)
02564 return ONIGERR_EMPTY_GROUP_NAME;
02565
02566 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02567 is_num = 1;
02568 }
02569 else if (c == '-') {
02570 is_num = 2;
02571 sign = -1;
02572 pnum_head = p;
02573 }
02574 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02575 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02576 }
02577 }
02578
02579 while (!PEND) {
02580 name_end = p;
02581 PFETCH(c);
02582 if (c == end_code || c == ')' || c == '+' || c == '-') {
02583 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
02584 break;
02585 }
02586
02587 if (is_num != 0) {
02588 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02589 is_num = 1;
02590 }
02591 else {
02592 r = ONIGERR_INVALID_GROUP_NAME;
02593 is_num = 0;
02594 }
02595 }
02596 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02597 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02598 }
02599 }
02600
02601 if (r == 0 && c != end_code) {
02602 if (c == '+' || c == '-') {
02603 int level;
02604 int flag = (c == '-' ? -1 : 1);
02605
02606 PFETCH(c);
02607 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
02608 PUNFETCH;
02609 level = onig_scan_unsigned_number(&p, end, enc);
02610 if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
02611 *rlevel = (level * flag);
02612 exist_level = 1;
02613
02614 PFETCH(c);
02615 if (c == end_code)
02616 goto end;
02617 }
02618
02619 err:
02620 r = ONIGERR_INVALID_GROUP_NAME;
02621 name_end = end;
02622 }
02623
02624 end:
02625 if (r == 0) {
02626 if (is_num != 0) {
02627 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02628 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02629 else if (*rback_num == 0) goto err;
02630
02631 *rback_num *= sign;
02632 }
02633
02634 *rname_end = name_end;
02635 *src = p;
02636 return (exist_level ? 1 : 0);
02637 }
02638 else {
02639 onig_scan_env_set_error_string(env, r, *src, name_end);
02640 return r;
02641 }
02642 }
02643 #endif
02644
02645
02646
02647
02648
02649 static int
02650 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02651 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02652 {
02653 int r, is_num, sign;
02654 OnigCodePoint end_code;
02655 OnigCodePoint c = 0;
02656 OnigEncoding enc = env->enc;
02657 UChar *name_end;
02658 UChar *pnum_head;
02659 UChar *p = *src;
02660 PFETCH_READY;
02661
02662 *rback_num = 0;
02663
02664 end_code = get_name_end_code_point(start_code);
02665
02666 name_end = end;
02667 pnum_head = *src;
02668 r = 0;
02669 is_num = 0;
02670 sign = 1;
02671 if (PEND) {
02672 return ONIGERR_EMPTY_GROUP_NAME;
02673 }
02674 else {
02675 PFETCH(c);
02676 if (c == end_code)
02677 return ONIGERR_EMPTY_GROUP_NAME;
02678
02679 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02680 if (ref == 1)
02681 is_num = 1;
02682 else {
02683 r = ONIGERR_INVALID_GROUP_NAME;
02684 is_num = 0;
02685 }
02686 }
02687 else if (c == '-') {
02688 if (ref == 1) {
02689 is_num = 2;
02690 sign = -1;
02691 pnum_head = p;
02692 }
02693 else {
02694 r = ONIGERR_INVALID_GROUP_NAME;
02695 is_num = 0;
02696 }
02697 }
02698 else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02699 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02700 }
02701 }
02702
02703 if (r == 0) {
02704 while (!PEND) {
02705 name_end = p;
02706 PFETCH(c);
02707 if (c == end_code || c == ')') {
02708 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
02709 break;
02710 }
02711
02712 if (is_num != 0) {
02713 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02714 is_num = 1;
02715 }
02716 else {
02717 if (!ONIGENC_IS_CODE_WORD(enc, c))
02718 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02719 else
02720 r = ONIGERR_INVALID_GROUP_NAME;
02721
02722 is_num = 0;
02723 }
02724 }
02725 else {
02726 if (!ONIGENC_IS_CODE_WORD(enc, c)) {
02727 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02728 }
02729 }
02730 }
02731
02732 if (c != end_code) {
02733 r = ONIGERR_INVALID_GROUP_NAME;
02734 name_end = end;
02735 }
02736
02737 if (is_num != 0) {
02738 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02739 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02740 else if (*rback_num == 0) {
02741 r = ONIGERR_INVALID_GROUP_NAME;
02742 goto err;
02743 }
02744
02745 *rback_num *= sign;
02746 }
02747
02748 *rname_end = name_end;
02749 *src = p;
02750 return 0;
02751 }
02752 else {
02753 while (!PEND) {
02754 name_end = p;
02755 PFETCH(c);
02756 if (c == end_code || c == ')')
02757 break;
02758 }
02759 if (PEND)
02760 name_end = end;
02761
02762 err:
02763 onig_scan_env_set_error_string(env, r, *src, name_end);
02764 return r;
02765 }
02766 }
02767 #else
02768 static int
02769 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
02770 UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
02771 {
02772 int r, is_num, sign;
02773 OnigCodePoint end_code;
02774 OnigCodePoint c = 0;
02775 UChar *name_end;
02776 OnigEncoding enc = env->enc;
02777 UChar *pnum_head;
02778 UChar *p = *src;
02779 PFETCH_READY;
02780
02781 *rback_num = 0;
02782
02783 end_code = get_name_end_code_point(start_code);
02784
02785 *rname_end = name_end = end;
02786 r = 0;
02787 pnum_head = *src;
02788 is_num = 0;
02789 sign = 1;
02790
02791 if (PEND) {
02792 return ONIGERR_EMPTY_GROUP_NAME;
02793 }
02794 else {
02795 PFETCH(c);
02796 if (c == end_code)
02797 return ONIGERR_EMPTY_GROUP_NAME;
02798
02799 if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
02800 is_num = 1;
02801 }
02802 else if (c == '-') {
02803 is_num = 2;
02804 sign = -1;
02805 pnum_head = p;
02806 }
02807 else {
02808 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02809 }
02810 }
02811
02812 while (!PEND) {
02813 name_end = p;
02814
02815 PFETCH(c);
02816 if (c == end_code || c == ')') break;
02817 if (! ONIGENC_IS_CODE_DIGIT(enc, c))
02818 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
02819 }
02820 if (r == 0 && c != end_code) {
02821 r = ONIGERR_INVALID_GROUP_NAME;
02822 name_end = end;
02823 }
02824
02825 if (r == 0) {
02826 *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
02827 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
02828 else if (*rback_num == 0) {
02829 r = ONIGERR_INVALID_GROUP_NAME;
02830 goto err;
02831 }
02832 *rback_num *= sign;
02833
02834 *rname_end = name_end;
02835 *src = p;
02836 return 0;
02837 }
02838 else {
02839 err:
02840 onig_scan_env_set_error_string(env, r, *src, name_end);
02841 return r;
02842 }
02843 }
02844 #endif
02845
02846 void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
02847 UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
02848
02849 static void
02850 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
02851 {
02852 va_list args;
02853 UChar buf[WARN_BUFSIZE];
02854 va_start(args, fmt);
02855 onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
02856 env->pattern, env->pattern_end,
02857 (const UChar *)fmt, args);
02858 va_end(args);
02859 if (env->sourcefile == NULL)
02860 rb_warn("%s", (char *)buf);
02861 else
02862 rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
02863 }
02864
02865 static void
02866 CC_ESC_WARN(ScanEnv *env, UChar *c)
02867 {
02868 if (onig_warn == onig_null_warn) return ;
02869
02870 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) &&
02871 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) {
02872 onig_syntax_warn(env, "character class has '%s' without escape", c);
02873 }
02874 }
02875
02876 static void
02877 CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
02878 {
02879 if (onig_warn == onig_null_warn) return ;
02880
02881 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) {
02882 onig_syntax_warn(env, "regular expression has '%s' without escape", c);
02883 }
02884 }
02885
02886 static void
02887 CC_DUP_WARN(ScanEnv *env)
02888 {
02889 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02890
02891 if (IS_SYNTAX_BV((env)->syntax, ONIG_SYN_WARN_CC_DUP) &&
02892 !((env)->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
02893 (env)->warnings_flag |= ONIG_SYN_WARN_CC_DUP;
02894 onig_syntax_warn(env, "character class has duplicated range");
02895 }
02896 }
02897
02898 static void
02899 UNKNOWN_ESC_WARN(ScanEnv *env, int c)
02900 {
02901 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ;
02902 onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
02903 }
02904
02905 static UChar*
02906 find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
02907 UChar **next, OnigEncoding enc)
02908 {
02909 int i;
02910 OnigCodePoint x;
02911 UChar *q;
02912 UChar *p = from;
02913
02914 while (p < to) {
02915 x = ONIGENC_MBC_TO_CODE(enc, p, to);
02916 q = p + enclen(enc, p, to);
02917 if (x == s[0]) {
02918 for (i = 1; i < n && q < to; i++) {
02919 x = ONIGENC_MBC_TO_CODE(enc, q, to);
02920 if (x != s[i]) break;
02921 q += enclen(enc, q, to);
02922 }
02923 if (i >= n) {
02924 if (IS_NOT_NULL(next))
02925 *next = q;
02926 return p;
02927 }
02928 }
02929 p = q;
02930 }
02931 return NULL_UCHARP;
02932 }
02933
02934 static int
02935 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
02936 OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
02937 {
02938 int i, in_esc;
02939 OnigCodePoint x;
02940 UChar *q;
02941 UChar *p = from;
02942
02943 in_esc = 0;
02944 while (p < to) {
02945 if (in_esc) {
02946 in_esc = 0;
02947 p += enclen(enc, p, to);
02948 }
02949 else {
02950 x = ONIGENC_MBC_TO_CODE(enc, p, to);
02951 q = p + enclen(enc, p, to);
02952 if (x == s[0]) {
02953 for (i = 1; i < n && q < to; i++) {
02954 x = ONIGENC_MBC_TO_CODE(enc, q, to);
02955 if (x != s[i]) break;
02956 q += enclen(enc, q, to);
02957 }
02958 if (i >= n) return 1;
02959 p += enclen(enc, p, to);
02960 }
02961 else {
02962 x = ONIGENC_MBC_TO_CODE(enc, p, to);
02963 if (x == bad) return 0;
02964 else if (x == MC_ESC(syn)) in_esc = 1;
02965 p = q;
02966 }
02967 }
02968 }
02969 return 0;
02970 }
02971
02972 static int
02973 fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
02974 {
02975 int num;
02976 OnigCodePoint c, c2;
02977 const OnigSyntaxType* syn = env->syntax;
02978 OnigEncoding enc = env->enc;
02979 UChar* prev;
02980 UChar* p = *src;
02981 PFETCH_READY;
02982
02983 if (PEND) {
02984 tok->type = TK_EOT;
02985 return tok->type;
02986 }
02987
02988 PFETCH(c);
02989 tok->type = TK_CHAR;
02990 tok->base = 0;
02991 tok->u.c = c;
02992 tok->escaped = 0;
02993
02994 if (c == ']') {
02995 tok->type = TK_CC_CLOSE;
02996 }
02997 else if (c == '-') {
02998 tok->type = TK_CC_RANGE;
02999 }
03000 else if (c == MC_ESC(syn)) {
03001 if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
03002 goto end;
03003
03004 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03005
03006 PFETCH(c);
03007 tok->escaped = 1;
03008 tok->u.c = c;
03009 switch (c) {
03010 case 'w':
03011 tok->type = TK_CHAR_TYPE;
03012 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03013 tok->u.prop.not = 0;
03014 break;
03015 case 'W':
03016 tok->type = TK_CHAR_TYPE;
03017 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03018 tok->u.prop.not = 1;
03019 break;
03020 case 'd':
03021 tok->type = TK_CHAR_TYPE;
03022 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03023 tok->u.prop.not = 0;
03024 break;
03025 case 'D':
03026 tok->type = TK_CHAR_TYPE;
03027 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03028 tok->u.prop.not = 1;
03029 break;
03030 case 's':
03031 tok->type = TK_CHAR_TYPE;
03032 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03033 tok->u.prop.not = 0;
03034 break;
03035 case 'S':
03036 tok->type = TK_CHAR_TYPE;
03037 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03038 tok->u.prop.not = 1;
03039 break;
03040 case 'h':
03041 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03042 tok->type = TK_CHAR_TYPE;
03043 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03044 tok->u.prop.not = 0;
03045 break;
03046 case 'H':
03047 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03048 tok->type = TK_CHAR_TYPE;
03049 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03050 tok->u.prop.not = 1;
03051 break;
03052
03053 case 'p':
03054 case 'P':
03055 c2 = PPEEK;
03056 if (c2 == '{' &&
03057 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03058 PINC;
03059 tok->type = TK_CHAR_PROPERTY;
03060 tok->u.prop.not = (c == 'P' ? 1 : 0);
03061
03062 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03063 PFETCH(c2);
03064 if (c2 == '^') {
03065 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03066 }
03067 else
03068 PUNFETCH;
03069 }
03070 }
03071 else {
03072 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03073 }
03074 break;
03075
03076 case 'x':
03077 if (PEND) break;
03078
03079 prev = p;
03080 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03081 PINC;
03082 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03083 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03084 if (!PEND) {
03085 c2 = PPEEK;
03086 if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
03087 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03088 }
03089
03090 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
03091 PINC;
03092 tok->type = TK_CODE_POINT;
03093 tok->base = 16;
03094 tok->u.code = (OnigCodePoint )num;
03095 }
03096 else {
03097
03098 p = prev;
03099 }
03100 }
03101 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03102 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03103 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03104 if (p == prev) {
03105 num = 0;
03106 }
03107 tok->type = TK_RAW_BYTE;
03108 tok->base = 16;
03109 tok->u.c = num;
03110 }
03111 break;
03112
03113 case 'u':
03114 if (PEND) break;
03115
03116 prev = p;
03117 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03118 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03119 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03120 if (p == prev) {
03121 num = 0;
03122 }
03123 tok->type = TK_CODE_POINT;
03124 tok->base = 16;
03125 tok->u.code = (OnigCodePoint )num;
03126 }
03127 break;
03128
03129 case '0':
03130 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
03131 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03132 PUNFETCH;
03133 prev = p;
03134 num = scan_unsigned_octal_number(&p, end, 3, enc);
03135 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03136 if (p == prev) {
03137 num = 0;
03138 }
03139 tok->type = TK_RAW_BYTE;
03140 tok->base = 8;
03141 tok->u.c = num;
03142 }
03143 break;
03144
03145 default:
03146 PUNFETCH;
03147 num = fetch_escaped_value(&p, end, env);
03148 if (num < 0) return num;
03149 if (tok->u.c != num) {
03150 tok->u.code = (OnigCodePoint )num;
03151 tok->type = TK_CODE_POINT;
03152 }
03153 break;
03154 }
03155 }
03156 else if (c == '[') {
03157 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
03158 OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
03159 tok->backp = p;
03160 PINC;
03161 if (str_exist_check_with_esc(send, 2, p, end,
03162 (OnigCodePoint )']', enc, syn)) {
03163 tok->type = TK_POSIX_BRACKET_OPEN;
03164 }
03165 else {
03166 PUNFETCH;
03167 goto cc_in_cc;
03168 }
03169 }
03170 else {
03171 cc_in_cc:
03172 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
03173 tok->type = TK_CC_CC_OPEN;
03174 }
03175 else {
03176 CC_ESC_WARN(env, (UChar* )"[");
03177 }
03178 }
03179 }
03180 else if (c == '&') {
03181 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP) &&
03182 !PEND && (PPEEK_IS('&'))) {
03183 PINC;
03184 tok->type = TK_CC_AND;
03185 }
03186 }
03187
03188 end:
03189 *src = p;
03190 return tok->type;
03191 }
03192
03193 static int
03194 fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
03195 {
03196 int r, num;
03197 OnigCodePoint c;
03198 OnigEncoding enc = env->enc;
03199 const OnigSyntaxType* syn = env->syntax;
03200 UChar* prev;
03201 UChar* p = *src;
03202 PFETCH_READY;
03203
03204 start:
03205 if (PEND) {
03206 tok->type = TK_EOT;
03207 return tok->type;
03208 }
03209
03210 tok->type = TK_STRING;
03211 tok->base = 0;
03212 tok->backp = p;
03213
03214 PFETCH(c);
03215 if (IS_MC_ESC_CODE(c, syn)) {
03216 if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
03217
03218 tok->backp = p;
03219 PFETCH(c);
03220
03221 tok->u.c = c;
03222 tok->escaped = 1;
03223 switch (c) {
03224 case '*':
03225 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
03226 tok->type = TK_OP_REPEAT;
03227 tok->u.repeat.lower = 0;
03228 tok->u.repeat.upper = REPEAT_INFINITE;
03229 goto greedy_check;
03230 break;
03231
03232 case '+':
03233 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
03234 tok->type = TK_OP_REPEAT;
03235 tok->u.repeat.lower = 1;
03236 tok->u.repeat.upper = REPEAT_INFINITE;
03237 goto greedy_check;
03238 break;
03239
03240 case '?':
03241 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
03242 tok->type = TK_OP_REPEAT;
03243 tok->u.repeat.lower = 0;
03244 tok->u.repeat.upper = 1;
03245 greedy_check:
03246 if (!PEND && PPEEK_IS('?') &&
03247 IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
03248 PFETCH(c);
03249 tok->u.repeat.greedy = 0;
03250 tok->u.repeat.possessive = 0;
03251 }
03252 else {
03253 possessive_check:
03254 if (!PEND && PPEEK_IS('+') &&
03255 ((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
03256 tok->type != TK_INTERVAL) ||
03257 (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
03258 tok->type == TK_INTERVAL))) {
03259 PFETCH(c);
03260 tok->u.repeat.greedy = 1;
03261 tok->u.repeat.possessive = 1;
03262 }
03263 else {
03264 tok->u.repeat.greedy = 1;
03265 tok->u.repeat.possessive = 0;
03266 }
03267 }
03268 break;
03269
03270 case '{':
03271 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
03272 r = fetch_range_quantifier(&p, end, tok, env);
03273 if (r < 0) return r;
03274 if (r == 0) goto greedy_check;
03275 else if (r == 2) {
03276 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03277 goto possessive_check;
03278
03279 goto greedy_check;
03280 }
03281
03282 break;
03283
03284 case '|':
03285 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
03286 tok->type = TK_ALT;
03287 break;
03288
03289 case '(':
03290 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03291 tok->type = TK_SUBEXP_OPEN;
03292 break;
03293
03294 case ')':
03295 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
03296 tok->type = TK_SUBEXP_CLOSE;
03297 break;
03298
03299 case 'w':
03300 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03301 tok->type = TK_CHAR_TYPE;
03302 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03303 tok->u.prop.not = 0;
03304 break;
03305
03306 case 'W':
03307 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
03308 tok->type = TK_CHAR_TYPE;
03309 tok->u.prop.ctype = ONIGENC_CTYPE_W;
03310 tok->u.prop.not = 1;
03311 break;
03312
03313 case 'b':
03314 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03315 tok->type = TK_ANCHOR;
03316 tok->u.anchor = ANCHOR_WORD_BOUND;
03317 break;
03318
03319 case 'B':
03320 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
03321 tok->type = TK_ANCHOR;
03322 tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
03323 break;
03324
03325 #ifdef USE_WORD_BEGIN_END
03326 case '<':
03327 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03328 tok->type = TK_ANCHOR;
03329 tok->u.anchor = ANCHOR_WORD_BEGIN;
03330 break;
03331
03332 case '>':
03333 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break;
03334 tok->type = TK_ANCHOR;
03335 tok->u.anchor = ANCHOR_WORD_END;
03336 break;
03337 #endif
03338
03339 case 's':
03340 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03341 tok->type = TK_CHAR_TYPE;
03342 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03343 tok->u.prop.not = 0;
03344 break;
03345
03346 case 'S':
03347 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
03348 tok->type = TK_CHAR_TYPE;
03349 tok->u.prop.ctype = ONIGENC_CTYPE_S;
03350 tok->u.prop.not = 1;
03351 break;
03352
03353 case 'd':
03354 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03355 tok->type = TK_CHAR_TYPE;
03356 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03357 tok->u.prop.not = 0;
03358 break;
03359
03360 case 'D':
03361 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
03362 tok->type = TK_CHAR_TYPE;
03363 tok->u.prop.ctype = ONIGENC_CTYPE_D;
03364 tok->u.prop.not = 1;
03365 break;
03366
03367 case 'h':
03368 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03369 tok->type = TK_CHAR_TYPE;
03370 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03371 tok->u.prop.not = 0;
03372 break;
03373
03374 case 'H':
03375 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
03376 tok->type = TK_CHAR_TYPE;
03377 tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
03378 tok->u.prop.not = 1;
03379 break;
03380
03381 case 'A':
03382 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03383 begin_buf:
03384 tok->type = TK_ANCHOR;
03385 tok->u.subtype = ANCHOR_BEGIN_BUF;
03386 break;
03387
03388 case 'Z':
03389 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03390 tok->type = TK_ANCHOR;
03391 tok->u.subtype = ANCHOR_SEMI_END_BUF;
03392 break;
03393
03394 case 'z':
03395 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
03396 end_buf:
03397 tok->type = TK_ANCHOR;
03398 tok->u.subtype = ANCHOR_END_BUF;
03399 break;
03400
03401 case 'G':
03402 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break;
03403 tok->type = TK_ANCHOR;
03404 tok->u.subtype = ANCHOR_BEGIN_POSITION;
03405 break;
03406
03407 case '`':
03408 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03409 goto begin_buf;
03410 break;
03411
03412 case '\'':
03413 if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR)) break;
03414 goto end_buf;
03415 break;
03416
03417 case 'x':
03418 if (PEND) break;
03419
03420 prev = p;
03421 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
03422 PINC;
03423 num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
03424 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
03425 if (!PEND) {
03426 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
03427 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
03428 }
03429
03430 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
03431 PINC;
03432 tok->type = TK_CODE_POINT;
03433 tok->u.code = (OnigCodePoint )num;
03434 }
03435 else {
03436
03437 p = prev;
03438 }
03439 }
03440 else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
03441 num = scan_unsigned_hexadecimal_number(&p, end, 2, enc);
03442 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03443 if (p == prev) {
03444 num = 0;
03445 }
03446 tok->type = TK_RAW_BYTE;
03447 tok->base = 16;
03448 tok->u.c = num;
03449 }
03450 break;
03451
03452 case 'u':
03453 if (PEND) break;
03454
03455 prev = p;
03456 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
03457 num = scan_unsigned_hexadecimal_number(&p, end, 4, enc);
03458 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03459 if (p == prev) {
03460 num = 0;
03461 }
03462 tok->type = TK_CODE_POINT;
03463 tok->base = 16;
03464 tok->u.code = (OnigCodePoint )num;
03465 }
03466 break;
03467
03468 case '1': case '2': case '3': case '4':
03469 case '5': case '6': case '7': case '8': case '9':
03470 PUNFETCH;
03471 prev = p;
03472 num = onig_scan_unsigned_number(&p, end, enc);
03473 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
03474 goto skip_backref;
03475 }
03476
03477 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
03478 (num <= env->num_mem || num <= 9)) {
03479 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03480 if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
03481 return ONIGERR_INVALID_BACKREF;
03482 }
03483
03484 tok->type = TK_BACKREF;
03485 tok->u.backref.num = 1;
03486 tok->u.backref.ref1 = num;
03487 tok->u.backref.by_name = 0;
03488 #ifdef USE_BACKREF_WITH_LEVEL
03489 tok->u.backref.exist_level = 0;
03490 #endif
03491 break;
03492 }
03493
03494 skip_backref:
03495 if (c == '8' || c == '9') {
03496
03497 p = prev; PINC;
03498 break;
03499 }
03500
03501 p = prev;
03502
03503 case '0':
03504 if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
03505 prev = p;
03506 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
03507 if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
03508 if (p == prev) {
03509 num = 0;
03510 }
03511 tok->type = TK_RAW_BYTE;
03512 tok->base = 8;
03513 tok->u.c = num;
03514 }
03515 else if (c != '0') {
03516 PINC;
03517 }
03518 break;
03519
03520 #ifdef USE_NAMED_GROUP
03521 case 'k':
03522 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
03523 PFETCH(c);
03524 if (c == '<' || c == '\'') {
03525 UChar* name_end;
03526 int* backs;
03527 int back_num;
03528
03529 prev = p;
03530
03531 #ifdef USE_BACKREF_WITH_LEVEL
03532 name_end = NULL_UCHARP;
03533 r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
03534 env, &back_num, &tok->u.backref.level);
03535 if (r == 1) tok->u.backref.exist_level = 1;
03536 else tok->u.backref.exist_level = 0;
03537 #else
03538 r = fetch_name(&p, end, &name_end, env, &back_num, 1);
03539 #endif
03540 if (r < 0) return r;
03541
03542 if (back_num != 0) {
03543 if (back_num < 0) {
03544 back_num = BACKREF_REL_TO_ABS(back_num, env);
03545 if (back_num <= 0)
03546 return ONIGERR_INVALID_BACKREF;
03547 }
03548
03549 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03550 if (back_num > env->num_mem ||
03551 IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
03552 return ONIGERR_INVALID_BACKREF;
03553 }
03554 tok->type = TK_BACKREF;
03555 tok->u.backref.by_name = 0;
03556 tok->u.backref.num = 1;
03557 tok->u.backref.ref1 = back_num;
03558 }
03559 else {
03560 num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
03561 if (num <= 0) {
03562 onig_scan_env_set_error_string(env,
03563 ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
03564 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03565 }
03566 if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
03567 int i;
03568 for (i = 0; i < num; i++) {
03569 if (backs[i] > env->num_mem ||
03570 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
03571 return ONIGERR_INVALID_BACKREF;
03572 }
03573 }
03574
03575 tok->type = TK_BACKREF;
03576 tok->u.backref.by_name = 1;
03577 if (num == 1) {
03578 tok->u.backref.num = 1;
03579 tok->u.backref.ref1 = backs[0];
03580 }
03581 else {
03582 tok->u.backref.num = num;
03583 tok->u.backref.refs = backs;
03584 }
03585 }
03586 }
03587 else {
03588 PUNFETCH;
03589 onig_syntax_warn(env, "invalid back reference");
03590 }
03591 }
03592 break;
03593 #endif
03594
03595 #ifdef USE_SUBEXP_CALL
03596 case 'g':
03597 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
03598 PFETCH(c);
03599 if (c == '<' || c == '\'') {
03600 int gnum;
03601 UChar* name_end;
03602
03603 prev = p;
03604 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
03605 if (r < 0) return r;
03606
03607 tok->type = TK_CALL;
03608 tok->u.call.name = prev;
03609 tok->u.call.name_end = name_end;
03610 tok->u.call.gnum = gnum;
03611 }
03612 else {
03613 onig_syntax_warn(env, "invalid subexp call");
03614 PUNFETCH;
03615 }
03616 }
03617 break;
03618 #endif
03619
03620 case 'Q':
03621 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE)) {
03622 tok->type = TK_QUOTE_OPEN;
03623 }
03624 break;
03625
03626 case 'p':
03627 case 'P':
03628 if (PPEEK_IS('{') &&
03629 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
03630 PINC;
03631 tok->type = TK_CHAR_PROPERTY;
03632 tok->u.prop.not = (c == 'P' ? 1 : 0);
03633
03634 if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
03635 PFETCH(c);
03636 if (c == '^') {
03637 tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
03638 }
03639 else
03640 PUNFETCH;
03641 }
03642 }
03643 else {
03644 onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
03645 }
03646 break;
03647
03648 default:
03649 PUNFETCH;
03650 num = fetch_escaped_value(&p, end, env);
03651 if (num < 0) return num;
03652
03653 if (tok->u.c != num) {
03654 tok->type = TK_CODE_POINT;
03655 tok->u.code = (OnigCodePoint )num;
03656 }
03657 else {
03658 p = tok->backp + enclen(enc, tok->backp, end);
03659 }
03660 break;
03661 }
03662 }
03663 else {
03664 tok->u.c = c;
03665 tok->escaped = 0;
03666
03667 #ifdef USE_VARIABLE_META_CHARS
03668 if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
03669 IS_SYNTAX_OP(syn, ONIG_SYN_OP_VARIABLE_META_CHARACTERS)) {
03670 if (c == MC_ANYCHAR(syn))
03671 goto any_char;
03672 else if (c == MC_ANYTIME(syn))
03673 goto anytime;
03674 else if (c == MC_ZERO_OR_ONE_TIME(syn))
03675 goto zero_or_one_time;
03676 else if (c == MC_ONE_OR_MORE_TIME(syn))
03677 goto one_or_more_time;
03678 else if (c == MC_ANYCHAR_ANYTIME(syn)) {
03679 tok->type = TK_ANYCHAR_ANYTIME;
03680 goto out;
03681 }
03682 }
03683 #endif
03684
03685 switch (c) {
03686 case '.':
03687 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
03688 #ifdef USE_VARIABLE_META_CHARS
03689 any_char:
03690 #endif
03691 tok->type = TK_ANYCHAR;
03692 break;
03693
03694 case '*':
03695 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
03696 #ifdef USE_VARIABLE_META_CHARS
03697 anytime:
03698 #endif
03699 tok->type = TK_OP_REPEAT;
03700 tok->u.repeat.lower = 0;
03701 tok->u.repeat.upper = REPEAT_INFINITE;
03702 goto greedy_check;
03703 break;
03704
03705 case '+':
03706 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
03707 #ifdef USE_VARIABLE_META_CHARS
03708 one_or_more_time:
03709 #endif
03710 tok->type = TK_OP_REPEAT;
03711 tok->u.repeat.lower = 1;
03712 tok->u.repeat.upper = REPEAT_INFINITE;
03713 goto greedy_check;
03714 break;
03715
03716 case '?':
03717 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
03718 #ifdef USE_VARIABLE_META_CHARS
03719 zero_or_one_time:
03720 #endif
03721 tok->type = TK_OP_REPEAT;
03722 tok->u.repeat.lower = 0;
03723 tok->u.repeat.upper = 1;
03724 goto greedy_check;
03725 break;
03726
03727 case '{':
03728 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
03729 r = fetch_range_quantifier(&p, end, tok, env);
03730 if (r < 0) return r;
03731 if (r == 0) goto greedy_check;
03732 else if (r == 2) {
03733 if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
03734 goto possessive_check;
03735
03736 goto greedy_check;
03737 }
03738
03739 break;
03740
03741 case '|':
03742 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
03743 tok->type = TK_ALT;
03744 break;
03745
03746 case '(':
03747 if (PPEEK_IS('?') &&
03748 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
03749 PINC;
03750 if (PPEEK_IS('#')) {
03751 PFETCH(c);
03752 while (1) {
03753 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
03754 PFETCH(c);
03755 if (c == MC_ESC(syn)) {
03756 if (!PEND) PFETCH(c);
03757 }
03758 else {
03759 if (c == ')') break;
03760 }
03761 }
03762 goto start;
03763 }
03764 PUNFETCH;
03765 }
03766
03767 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03768 tok->type = TK_SUBEXP_OPEN;
03769 break;
03770
03771 case ')':
03772 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
03773 tok->type = TK_SUBEXP_CLOSE;
03774 break;
03775
03776 case '^':
03777 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03778 tok->type = TK_ANCHOR;
03779 tok->u.subtype = (IS_SINGLELINE(env->option)
03780 ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE);
03781 break;
03782
03783 case '$':
03784 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
03785 tok->type = TK_ANCHOR;
03786 tok->u.subtype = (IS_SINGLELINE(env->option)
03787 ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE);
03788 break;
03789
03790 case '[':
03791 if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
03792 tok->type = TK_CC_OPEN;
03793 break;
03794
03795 case ']':
03796 if (*src > env->pattern)
03797 CLOSE_BRACKET_WITHOUT_ESC_WARN(env, (UChar* )"]");
03798 break;
03799
03800 case '#':
03801 if (IS_EXTEND(env->option)) {
03802 while (!PEND) {
03803 PFETCH(c);
03804 if (ONIGENC_IS_CODE_NEWLINE(enc, c))
03805 break;
03806 }
03807 goto start;
03808 break;
03809 }
03810 break;
03811
03812 case ' ': case '\t': case '\n': case '\r': case '\f':
03813 if (IS_EXTEND(env->option))
03814 goto start;
03815 break;
03816
03817 default:
03818
03819 break;
03820 }
03821 }
03822
03823 #ifdef USE_VARIABLE_META_CHARS
03824 out:
03825 #endif
03826 *src = p;
03827 return tok->type;
03828 }
03829
03830 static int
03831 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
03832 ScanEnv* env,
03833 OnigCodePoint sb_out, const OnigCodePoint mbr[])
03834 {
03835 int i, r;
03836 OnigCodePoint j;
03837
03838 int n = ONIGENC_CODE_RANGE_NUM(mbr);
03839
03840 if (not == 0) {
03841 for (i = 0; i < n; i++) {
03842 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
03843 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
03844 if (j >= sb_out) {
03845 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03846 r = add_code_range_to_buf(&(cc->mbuf), env, j,
03847 ONIGENC_CODE_RANGE_TO(mbr, i));
03848 if (r != 0) return r;
03849 i++;
03850 }
03851
03852 goto sb_end;
03853 }
03854 BITSET_SET_BIT_CHKDUP(cc->bs, j);
03855 }
03856 }
03857
03858 sb_end:
03859 for ( ; i < n; i++) {
03860 r = add_code_range_to_buf(&(cc->mbuf), env,
03861 ONIGENC_CODE_RANGE_FROM(mbr, i),
03862 ONIGENC_CODE_RANGE_TO(mbr, i));
03863 if (r != 0) return r;
03864 }
03865 }
03866 else {
03867 OnigCodePoint prev = 0;
03868
03869 for (i = 0; i < n; i++) {
03870 for (j = prev;
03871 j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
03872 if (j >= sb_out) {
03873 goto sb_end2;
03874 }
03875 BITSET_SET_BIT_CHKDUP(cc->bs, j);
03876 }
03877 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03878 }
03879 for (j = prev; j < sb_out; j++) {
03880 BITSET_SET_BIT_CHKDUP(cc->bs, j);
03881 }
03882
03883 sb_end2:
03884 prev = sb_out;
03885
03886 for (i = 0; i < n; i++) {
03887 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
03888 r = add_code_range_to_buf(&(cc->mbuf), env, prev,
03889 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
03890 if (r != 0) return r;
03891 }
03892 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
03893 }
03894 if (prev < 0x7fffffff) {
03895 r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
03896 if (r != 0) return r;
03897 }
03898 }
03899
03900 return 0;
03901 }
03902
03903 static int
03904 add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
03905 {
03906 int c, r;
03907 const OnigCodePoint *ranges;
03908 OnigCodePoint sb_out;
03909 OnigEncoding enc = env->enc;
03910
03911 switch (ctype) {
03912 case ONIGENC_CTYPE_D:
03913 case ONIGENC_CTYPE_S:
03914 case ONIGENC_CTYPE_W:
03915 ctype ^= ONIGENC_CTYPE_SPECIAL_MASK;
03916 if (not != 0) {
03917 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03918 if (! ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
03919 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03920 }
03921 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03922 }
03923 else {
03924 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03925 if (ONIGENC_IS_ASCII_CODE_CTYPE((OnigCodePoint )c, ctype))
03926 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03927 }
03928 }
03929 return 0;
03930 break;
03931 }
03932
03933 r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
03934 if (r == 0) {
03935 return add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
03936 }
03937 else if (r != ONIG_NO_SUPPORT_CONFIG) {
03938 return r;
03939 }
03940
03941 r = 0;
03942 switch (ctype) {
03943 case ONIGENC_CTYPE_ALPHA:
03944 case ONIGENC_CTYPE_BLANK:
03945 case ONIGENC_CTYPE_CNTRL:
03946 case ONIGENC_CTYPE_DIGIT:
03947 case ONIGENC_CTYPE_LOWER:
03948 case ONIGENC_CTYPE_PUNCT:
03949 case ONIGENC_CTYPE_SPACE:
03950 case ONIGENC_CTYPE_UPPER:
03951 case ONIGENC_CTYPE_XDIGIT:
03952 case ONIGENC_CTYPE_ASCII:
03953 case ONIGENC_CTYPE_ALNUM:
03954 if (not != 0) {
03955 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03956 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03957 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03958 }
03959 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03960 }
03961 else {
03962 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03963 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03964 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03965 }
03966 }
03967 break;
03968
03969 case ONIGENC_CTYPE_GRAPH:
03970 case ONIGENC_CTYPE_PRINT:
03971 if (not != 0) {
03972 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03973 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03974 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03975 }
03976 }
03977 else {
03978 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03979 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
03980 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03981 }
03982 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03983 }
03984 break;
03985
03986 case ONIGENC_CTYPE_WORD:
03987 if (not == 0) {
03988 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03989 if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
03990 }
03991 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
03992 }
03993 else {
03994 for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
03995 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
03996 && ! ONIGENC_IS_CODE_WORD(enc, c))
03997 BITSET_SET_BIT_CHKDUP(cc->bs, c);
03998 }
03999 }
04000 break;
04001
04002 default:
04003 return ONIGERR_PARSER_BUG;
04004 break;
04005 }
04006
04007 return r;
04008 }
04009
04010 static int
04011 parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
04012 {
04013 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
04014 #define POSIX_BRACKET_NAME_MIN_LEN 4
04015
04016 static const PosixBracketEntryType PBS[] = {
04017 { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
04018 { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
04019 { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
04020 { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
04021 { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
04022 { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
04023 { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
04024 { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
04025 { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
04026 { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
04027 { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
04028 { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
04029 { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
04030 { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
04031 { (UChar* )NULL, -1, 0 }
04032 };
04033
04034 const PosixBracketEntryType *pb;
04035 int not, i, r;
04036 OnigCodePoint c;
04037 OnigEncoding enc = env->enc;
04038 UChar *p = *src;
04039 PFETCH_READY;
04040
04041 if (PPEEK_IS('^')) {
04042 PINC;
04043 not = 1;
04044 }
04045 else
04046 not = 0;
04047
04048 if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
04049 goto not_posix_bracket;
04050
04051 for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
04052 if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
04053 p = (UChar* )onigenc_step(enc, p, end, pb->len);
04054 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
04055 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04056
04057 r = add_ctype_to_cc(cc, pb->ctype, not, env);
04058 if (r != 0) return r;
04059
04060 PINC; PINC;
04061 *src = p;
04062 return 0;
04063 }
04064 }
04065
04066 not_posix_bracket:
04067 c = 0;
04068 i = 0;
04069 while (!PEND && ((c = PPEEK) != ':') && c != ']') {
04070 PINC;
04071 if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
04072 }
04073 if (c == ':' && ! PEND) {
04074 PINC;
04075 if (! PEND) {
04076 PFETCH(c);
04077 if (c == ']')
04078 return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
04079 }
04080 }
04081
04082 return 1;
04083 }
04084
04085 static int
04086 fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
04087 {
04088 int r;
04089 OnigCodePoint c;
04090 OnigEncoding enc = env->enc;
04091 UChar *prev, *start, *p = *src;
04092 PFETCH_READY;
04093
04094 r = 0;
04095 start = prev = p;
04096
04097 while (!PEND) {
04098 prev = p;
04099 PFETCH(c);
04100 if (c == '}') {
04101 r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
04102 if (r < 0) break;
04103
04104 *src = p;
04105 return r;
04106 }
04107 else if (c == '(' || c == ')' || c == '{' || c == '|') {
04108 r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
04109 break;
04110 }
04111 }
04112
04113 onig_scan_env_set_error_string(env, r, *src, prev);
04114 return r;
04115 }
04116
04117 static int
04118 parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
04119 ScanEnv* env)
04120 {
04121 int r, ctype;
04122 CClassNode* cc;
04123
04124 ctype = fetch_char_property_to_ctype(src, end, env);
04125 if (ctype < 0) return ctype;
04126
04127 *np = node_new_cclass();
04128 CHECK_NULL_RETURN_MEMERR(*np);
04129 cc = NCCLASS(*np);
04130 r = add_ctype_to_cc(cc, ctype, 0, env);
04131 if (r != 0) return r;
04132 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
04133
04134 return 0;
04135 }
04136
04137
04138 enum CCSTATE {
04139 CCS_VALUE,
04140 CCS_RANGE,
04141 CCS_COMPLETE,
04142 CCS_START
04143 };
04144
04145 enum CCVALTYPE {
04146 CCV_SB,
04147 CCV_CODE_POINT,
04148 CCV_CLASS
04149 };
04150
04151 static int
04152 next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
04153 enum CCSTATE* state, ScanEnv* env)
04154 {
04155 int r;
04156
04157 if (*state == CCS_RANGE)
04158 return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
04159
04160 if (*state == CCS_VALUE && *type != CCV_CLASS) {
04161 if (*type == CCV_SB)
04162 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04163 else if (*type == CCV_CODE_POINT) {
04164 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04165 if (r < 0) return r;
04166 }
04167 }
04168
04169 *state = CCS_VALUE;
04170 *type = CCV_CLASS;
04171 return 0;
04172 }
04173
04174 static int
04175 next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
04176 int* vs_israw, int v_israw,
04177 enum CCVALTYPE intype, enum CCVALTYPE* type,
04178 enum CCSTATE* state, ScanEnv* env)
04179 {
04180 int r;
04181
04182 switch (*state) {
04183 case CCS_VALUE:
04184 if (*type == CCV_SB)
04185 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
04186 else if (*type == CCV_CODE_POINT) {
04187 r = add_code_range(&(cc->mbuf), env, *vs, *vs);
04188 if (r < 0) return r;
04189 }
04190 break;
04191
04192 case CCS_RANGE:
04193 if (intype == *type) {
04194 if (intype == CCV_SB) {
04195 if (*vs > 0xff || v > 0xff)
04196 return ONIGERR_INVALID_CODE_POINT_VALUE;
04197
04198 if (*vs > v) {
04199 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04200 goto ccs_range_end;
04201 else
04202 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04203 }
04204 bitset_set_range(env, cc->bs, (int )*vs, (int )v);
04205 }
04206 else {
04207 r = add_code_range(&(cc->mbuf), env, *vs, v);
04208 if (r < 0) return r;
04209 }
04210 }
04211 else {
04212 #if 0
04213 if (intype == CCV_CODE_POINT && *type == CCV_SB) {
04214 #endif
04215 if (*vs > v) {
04216 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
04217 goto ccs_range_end;
04218 else
04219 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
04220 }
04221 bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
04222 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
04223 if (r < 0) return r;
04224 #if 0
04225 }
04226 else
04227 return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
04228 #endif
04229 }
04230 ccs_range_end:
04231 *state = CCS_COMPLETE;
04232 break;
04233
04234 case CCS_COMPLETE:
04235 case CCS_START:
04236 *state = CCS_VALUE;
04237 break;
04238
04239 default:
04240 break;
04241 }
04242
04243 *vs_israw = v_israw;
04244 *vs = v;
04245 *type = intype;
04246 return 0;
04247 }
04248
04249 static int
04250 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
04251 ScanEnv* env)
04252 {
04253 int in_esc;
04254 OnigCodePoint code;
04255 OnigEncoding enc = env->enc;
04256 UChar* p = from;
04257 PFETCH_READY;
04258
04259 in_esc = 0;
04260 while (! PEND) {
04261 if (ignore_escaped && in_esc) {
04262 in_esc = 0;
04263 }
04264 else {
04265 PFETCH(code);
04266 if (code == c) return 1;
04267 if (code == MC_ESC(env->syntax)) in_esc = 1;
04268 }
04269 }
04270 return 0;
04271 }
04272
04273 static int
04274 parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
04275 ScanEnv* env)
04276 {
04277 int r, neg, len, fetched, and_start;
04278 OnigCodePoint v, vs;
04279 UChar *p;
04280 Node* node;
04281 CClassNode *cc, *prev_cc;
04282 CClassNode work_cc;
04283
04284 enum CCSTATE state;
04285 enum CCVALTYPE val_type, in_type;
04286 int val_israw, in_israw;
04287
04288 prev_cc = (CClassNode* )NULL;
04289 *np = NULL_NODE;
04290 r = fetch_token_in_cc(tok, src, end, env);
04291 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
04292 neg = 1;
04293 r = fetch_token_in_cc(tok, src, end, env);
04294 }
04295 else {
04296 neg = 0;
04297 }
04298
04299 if (r < 0) return r;
04300 if (r == TK_CC_CLOSE) {
04301 if (! code_exist_check((OnigCodePoint )']',
04302 *src, env->pattern_end, 1, env))
04303 return ONIGERR_EMPTY_CHAR_CLASS;
04304
04305 CC_ESC_WARN(env, (UChar* )"]");
04306 r = tok->type = TK_CHAR;
04307 }
04308
04309 *np = node = node_new_cclass();
04310 CHECK_NULL_RETURN_MEMERR(node);
04311 cc = NCCLASS(node);
04312
04313 and_start = 0;
04314 state = CCS_START;
04315 p = *src;
04316 while (r != TK_CC_CLOSE) {
04317 fetched = 0;
04318 switch (r) {
04319 case TK_CHAR:
04320 if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
04321 (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
04322 in_type = CCV_CODE_POINT;
04323 }
04324 else if (len < 0) {
04325 r = len;
04326 goto err;
04327 }
04328 else {
04329 sb_char:
04330 in_type = CCV_SB;
04331 }
04332 v = (OnigCodePoint )tok->u.c;
04333 in_israw = 0;
04334 goto val_entry2;
04335 break;
04336
04337 case TK_RAW_BYTE:
04338
04339 if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
04340 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04341 UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
04342 UChar* psave = p;
04343 int i, base = tok->base;
04344
04345 buf[0] = tok->u.c;
04346 for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
04347 r = fetch_token_in_cc(tok, &p, end, env);
04348 if (r < 0) goto err;
04349 if (r != TK_RAW_BYTE || tok->base != base) {
04350 fetched = 1;
04351 break;
04352 }
04353 buf[i] = tok->u.c;
04354 }
04355
04356 if (i < ONIGENC_MBC_MINLEN(env->enc)) {
04357 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04358 goto err;
04359 }
04360
04361 len = enclen(env->enc, buf, buf+i);
04362 if (i < len) {
04363 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
04364 goto err;
04365 }
04366 else if (i > len) {
04367 p = psave;
04368 for (i = 1; i < len; i++) {
04369 r = fetch_token_in_cc(tok, &p, end, env);
04370 }
04371 fetched = 0;
04372 }
04373
04374 if (i == 1) {
04375 v = (OnigCodePoint )buf[0];
04376 goto raw_single;
04377 }
04378 else {
04379 v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
04380 in_type = CCV_CODE_POINT;
04381 }
04382 }
04383 else {
04384 v = (OnigCodePoint )tok->u.c;
04385 raw_single:
04386 in_type = CCV_SB;
04387 }
04388 in_israw = 1;
04389 goto val_entry2;
04390 break;
04391
04392 case TK_CODE_POINT:
04393 v = tok->u.code;
04394 in_israw = 1;
04395 val_entry:
04396 len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
04397 if (len < 0) {
04398 r = len;
04399 goto err;
04400 }
04401 in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
04402 val_entry2:
04403 r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
04404 &state, env);
04405 if (r != 0) goto err;
04406 break;
04407
04408 case TK_POSIX_BRACKET_OPEN:
04409 r = parse_posix_bracket(cc, &p, end, env);
04410 if (r < 0) goto err;
04411 if (r == 1) {
04412 CC_ESC_WARN(env, (UChar* )"[");
04413 p = tok->backp;
04414 v = (OnigCodePoint )tok->u.c;
04415 in_israw = 0;
04416 goto val_entry;
04417 }
04418 goto next_class;
04419 break;
04420
04421 case TK_CHAR_TYPE:
04422 r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
04423 if (r != 0) return r;
04424
04425 next_class:
04426 r = next_state_class(cc, &vs, &val_type, &state, env);
04427 if (r != 0) goto err;
04428 break;
04429
04430 case TK_CHAR_PROPERTY:
04431 {
04432 int ctype;
04433
04434 ctype = fetch_char_property_to_ctype(&p, end, env);
04435 if (ctype < 0) return ctype;
04436 r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
04437 if (r != 0) return r;
04438 goto next_class;
04439 }
04440 break;
04441
04442 case TK_CC_RANGE:
04443 if (state == CCS_VALUE) {
04444 r = fetch_token_in_cc(tok, &p, end, env);
04445 if (r < 0) goto err;
04446 fetched = 1;
04447 if (r == TK_CC_CLOSE) {
04448 range_end_val:
04449 v = (OnigCodePoint )'-';
04450 in_israw = 0;
04451 goto val_entry;
04452 }
04453 else if (r == TK_CC_AND) {
04454 CC_ESC_WARN(env, (UChar* )"-");
04455 goto range_end_val;
04456 }
04457 state = CCS_RANGE;
04458 }
04459 else if (state == CCS_START) {
04460
04461 v = (OnigCodePoint )tok->u.c;
04462 in_israw = 0;
04463
04464 r = fetch_token_in_cc(tok, &p, end, env);
04465 if (r < 0) goto err;
04466 fetched = 1;
04467
04468 if (r == TK_CC_RANGE || and_start != 0)
04469 CC_ESC_WARN(env, (UChar* )"-");
04470
04471 goto val_entry;
04472 }
04473 else if (state == CCS_RANGE) {
04474 CC_ESC_WARN(env, (UChar* )"-");
04475 goto sb_char;
04476 }
04477 else {
04478 r = fetch_token_in_cc(tok, &p, end, env);
04479 if (r < 0) goto err;
04480 fetched = 1;
04481 if (r == TK_CC_CLOSE) goto range_end_val;
04482 else if (r == TK_CC_AND) {
04483 CC_ESC_WARN(env, (UChar* )"-");
04484 goto range_end_val;
04485 }
04486
04487 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) {
04488 CC_ESC_WARN(env, (UChar* )"-");
04489 goto sb_char;
04490 }
04491 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
04492 goto err;
04493 }
04494 break;
04495
04496 case TK_CC_CC_OPEN:
04497 {
04498 Node *anode;
04499 CClassNode* acc;
04500
04501 r = parse_char_class(&anode, tok, &p, end, env);
04502 if (r == 0) {
04503 acc = NCCLASS(anode);
04504 r = or_cclass(cc, acc, env);
04505 }
04506 onig_node_free(anode);
04507 if (r != 0) goto err;
04508 }
04509 break;
04510
04511 case TK_CC_AND:
04512 {
04513 if (state == CCS_VALUE) {
04514 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04515 &val_type, &state, env);
04516 if (r != 0) goto err;
04517 }
04518
04519 and_start = 1;
04520 state = CCS_START;
04521
04522 if (IS_NOT_NULL(prev_cc)) {
04523 r = and_cclass(prev_cc, cc, env);
04524 if (r != 0) goto err;
04525 bbuf_free(cc->mbuf);
04526 }
04527 else {
04528 prev_cc = cc;
04529 cc = &work_cc;
04530 }
04531 initialize_cclass(cc);
04532 }
04533 break;
04534
04535 case TK_EOT:
04536 r = ONIGERR_PREMATURE_END_OF_CHAR_CLASS;
04537 goto err;
04538 break;
04539 default:
04540 r = ONIGERR_PARSER_BUG;
04541 goto err;
04542 break;
04543 }
04544
04545 if (fetched)
04546 r = tok->type;
04547 else {
04548 r = fetch_token_in_cc(tok, &p, end, env);
04549 if (r < 0) goto err;
04550 }
04551 }
04552
04553 if (state == CCS_VALUE) {
04554 r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
04555 &val_type, &state, env);
04556 if (r != 0) goto err;
04557 }
04558
04559 if (IS_NOT_NULL(prev_cc)) {
04560 r = and_cclass(prev_cc, cc, env);
04561 if (r != 0) goto err;
04562 bbuf_free(cc->mbuf);
04563 cc = prev_cc;
04564 }
04565
04566 if (neg != 0)
04567 NCCLASS_SET_NOT(cc);
04568 else
04569 NCCLASS_CLEAR_NOT(cc);
04570 if (IS_NCCLASS_NOT(cc) &&
04571 IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
04572 int is_empty;
04573
04574 is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
04575 if (is_empty != 0)
04576 BITSET_IS_EMPTY(cc->bs, is_empty);
04577
04578 if (is_empty == 0) {
04579 #define NEWLINE_CODE 0x0a
04580
04581 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
04582 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
04583 BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
04584 else
04585 add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
04586 }
04587 }
04588 }
04589 *src = p;
04590 return 0;
04591
04592 err:
04593 if (cc != NCCLASS(*np))
04594 bbuf_free(cc->mbuf);
04595 return r;
04596 }
04597
04598 static int parse_subexp(Node** top, OnigToken* tok, int term,
04599 UChar** src, UChar* end, ScanEnv* env);
04600
04601 static int
04602 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
04603 ScanEnv* env)
04604 {
04605 int r, num;
04606 Node *target;
04607 OnigOptionType option;
04608 OnigCodePoint c;
04609 OnigEncoding enc = env->enc;
04610
04611 #ifdef USE_NAMED_GROUP
04612 int list_capture;
04613 #endif
04614
04615 UChar* p = *src;
04616 PFETCH_READY;
04617
04618 *np = NULL;
04619 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
04620
04621 option = env->option;
04622 if (PPEEK_IS('?') &&
04623 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
04624 PINC;
04625 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04626
04627 PFETCH(c);
04628 switch (c) {
04629 case ':':
04630 group:
04631 r = fetch_token(tok, &p, end, env);
04632 if (r < 0) return r;
04633 r = parse_subexp(np, tok, term, &p, end, env);
04634 if (r < 0) return r;
04635 *src = p;
04636 return 1;
04637 break;
04638
04639 case '=':
04640 *np = onig_node_new_anchor(ANCHOR_PREC_READ);
04641 break;
04642 case '!':
04643 *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
04644 break;
04645 case '>':
04646 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
04647 break;
04648
04649 #ifdef USE_NAMED_GROUP
04650 case '\'':
04651 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04652 goto named_group1;
04653 }
04654 else
04655 return ONIGERR_UNDEFINED_GROUP_OPTION;
04656 break;
04657 #endif
04658
04659 case '<':
04660 PFETCH(c);
04661 if (c == '=')
04662 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
04663 else if (c == '!')
04664 *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
04665 #ifdef USE_NAMED_GROUP
04666 else {
04667 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04668 UChar *name;
04669 UChar *name_end;
04670
04671 PUNFETCH;
04672 c = '<';
04673
04674 named_group1:
04675 list_capture = 0;
04676
04677 named_group2:
04678 name = p;
04679 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
04680 if (r < 0) return r;
04681
04682 num = scan_env_add_mem_entry(env);
04683 if (num < 0) return num;
04684 if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
04685 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04686
04687 r = name_add(env->reg, name, name_end, num, env);
04688 if (r != 0) return r;
04689 *np = node_new_enclose_memory(env->option, 1);
04690 CHECK_NULL_RETURN_MEMERR(*np);
04691 NENCLOSE(*np)->regnum = num;
04692 if (list_capture != 0)
04693 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04694 env->num_named++;
04695 }
04696 else {
04697 return ONIGERR_UNDEFINED_GROUP_OPTION;
04698 }
04699 }
04700 #else
04701 else {
04702 return ONIGERR_UNDEFINED_GROUP_OPTION;
04703 }
04704 #endif
04705 break;
04706
04707 case '@':
04708 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
04709 #ifdef USE_NAMED_GROUP
04710 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
04711 PFETCH(c);
04712 if (c == '<' || c == '\'') {
04713 list_capture = 1;
04714 goto named_group2;
04715 }
04716 PUNFETCH;
04717 }
04718 #endif
04719 *np = node_new_enclose_memory(env->option, 0);
04720 CHECK_NULL_RETURN_MEMERR(*np);
04721 num = scan_env_add_mem_entry(env);
04722 if (num < 0) {
04723 onig_node_free(*np);
04724 return num;
04725 }
04726 else if (num >= (int )BIT_STATUS_BITS_NUM) {
04727 onig_node_free(*np);
04728 return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
04729 }
04730 NENCLOSE(*np)->regnum = num;
04731 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
04732 }
04733 else {
04734 return ONIGERR_UNDEFINED_GROUP_OPTION;
04735 }
04736 break;
04737
04738 #ifdef USE_POSIXLINE_OPTION
04739 case 'p':
04740 #endif
04741 case '-': case 'i': case 'm': case 's': case 'x':
04742 {
04743 int neg = 0;
04744
04745 while (1) {
04746 switch (c) {
04747 case ':':
04748 case ')':
04749 break;
04750
04751 case '-': neg = 1; break;
04752 case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
04753 case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
04754 case 's':
04755 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04756 ONOFF(option, ONIG_OPTION_MULTILINE, neg);
04757 }
04758 else
04759 return ONIGERR_UNDEFINED_GROUP_OPTION;
04760 break;
04761
04762 case 'm':
04763 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
04764 ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
04765 }
04766 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
04767 ONOFF(option, ONIG_OPTION_MULTILINE, neg);
04768 }
04769 else
04770 return ONIGERR_UNDEFINED_GROUP_OPTION;
04771 break;
04772 #ifdef USE_POSIXLINE_OPTION
04773 case 'p':
04774 ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
04775 break;
04776 #endif
04777 default:
04778 return ONIGERR_UNDEFINED_GROUP_OPTION;
04779 }
04780
04781 if (c == ')') {
04782 *np = node_new_option(option);
04783 CHECK_NULL_RETURN_MEMERR(*np);
04784 *src = p;
04785 return 2;
04786 }
04787 else if (c == ':') {
04788 OnigOptionType prev = env->option;
04789
04790 env->option = option;
04791 r = fetch_token(tok, &p, end, env);
04792 if (r < 0) return r;
04793 r = parse_subexp(&target, tok, term, &p, end, env);
04794 env->option = prev;
04795 if (r < 0) return r;
04796 *np = node_new_option(option);
04797 CHECK_NULL_RETURN_MEMERR(*np);
04798 NENCLOSE(*np)->target = target;
04799 *src = p;
04800 return 0;
04801 }
04802
04803 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
04804 PFETCH(c);
04805 }
04806 }
04807 break;
04808
04809 default:
04810 return ONIGERR_UNDEFINED_GROUP_OPTION;
04811 }
04812 }
04813 else {
04814 if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
04815 goto group;
04816
04817 *np = node_new_enclose_memory(env->option, 0);
04818 CHECK_NULL_RETURN_MEMERR(*np);
04819 num = scan_env_add_mem_entry(env);
04820 if (num < 0) return num;
04821 NENCLOSE(*np)->regnum = num;
04822 }
04823
04824 CHECK_NULL_RETURN_MEMERR(*np);
04825 r = fetch_token(tok, &p, end, env);
04826 if (r < 0) return r;
04827 r = parse_subexp(&target, tok, term, &p, end, env);
04828 if (r < 0) {
04829 onig_node_free(target);
04830 return r;
04831 }
04832
04833 if (NTYPE(*np) == NT_ANCHOR)
04834 NANCHOR(*np)->target = target;
04835 else {
04836 NENCLOSE(*np)->target = target;
04837 if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
04838
04839 r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
04840 if (r != 0) return r;
04841 }
04842 }
04843
04844 *src = p;
04845 return 0;
04846 }
04847
04848 static const char* const PopularQStr[] = {
04849 "?", "*", "+", "??", "*?", "+?"
04850 };
04851
04852 static const char* const ReduceQStr[] = {
04853 "", "", "*", "*?", "??", "+ and ??", "+? and ?"
04854 };
04855
04856 static int
04857 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
04858 {
04859 QtfrNode* qn;
04860
04861 qn = NQTFR(qnode);
04862 if (qn->lower == 1 && qn->upper == 1) {
04863 return 1;
04864 }
04865
04866 switch (NTYPE(target)) {
04867 case NT_STR:
04868 if (! group) {
04869 StrNode* sn = NSTR(target);
04870 if (str_node_can_be_split(sn, env->enc)) {
04871 Node* n = str_node_split_last_char(sn, env->enc);
04872 if (IS_NOT_NULL(n)) {
04873 qn->target = n;
04874 return 2;
04875 }
04876 }
04877 }
04878 break;
04879
04880 case NT_QTFR:
04881 {
04882
04883 QtfrNode* qnt = NQTFR(target);
04884 int nestq_num = popular_quantifier_num(qn);
04885 int targetq_num = popular_quantifier_num(qnt);
04886
04887 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
04888 if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
04889 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
04890 UChar buf[WARN_BUFSIZE];
04891
04892 switch(ReduceTypeTable[targetq_num][nestq_num]) {
04893 case RQ_ASIS:
04894 break;
04895
04896 case RQ_DEL:
04897 if (onig_verb_warn != onig_null_warn) {
04898 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04899 env->pattern, env->pattern_end,
04900 (UChar* )"redundant nested repeat operator");
04901 (*onig_verb_warn)((char* )buf);
04902 }
04903 goto warn_exit;
04904 break;
04905
04906 default:
04907 if (onig_verb_warn != onig_null_warn) {
04908 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
04909 env->pattern, env->pattern_end,
04910 (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
04911 PopularQStr[targetq_num], PopularQStr[nestq_num],
04912 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
04913 (*onig_verb_warn)((char* )buf);
04914 }
04915 goto warn_exit;
04916 break;
04917 }
04918 }
04919
04920 warn_exit:
04921 #endif
04922 if (targetq_num >= 0) {
04923 if (nestq_num >= 0) {
04924 onig_reduce_nested_quantifier(qnode, target);
04925 goto q_exit;
04926 }
04927 else if (targetq_num == 1 || targetq_num == 2) {
04928
04929 if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
04930 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
04931 }
04932 }
04933 }
04934 }
04935 break;
04936
04937 default:
04938 break;
04939 }
04940
04941 qn->target = target;
04942 q_exit:
04943 return 0;
04944 }
04945
04946
04947 #ifdef USE_SHARED_CCLASS_TABLE
04948
04949 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
04950
04951
04952
04953 typedef struct {
04954 OnigEncoding enc;
04955 int not;
04956 int type;
04957 } type_cclass_key;
04958
04959 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
04960 {
04961 if (x->type != y->type) return 1;
04962 if (x->enc != y->enc) return 1;
04963 if (x->not != y->not) return 1;
04964 return 0;
04965 }
04966
04967 static st_index_t type_cclass_hash(type_cclass_key* key)
04968 {
04969 int i, val;
04970 UChar *p;
04971
04972 val = 0;
04973
04974 p = (UChar* )&(key->enc);
04975 for (i = 0; i < (int )sizeof(key->enc); i++) {
04976 val = val * 997 + (int )*p++;
04977 }
04978
04979 p = (UChar* )(&key->type);
04980 for (i = 0; i < (int )sizeof(key->type); i++) {
04981 val = val * 997 + (int )*p++;
04982 }
04983
04984 val += key->not;
04985 return val + (val >> 5);
04986 }
04987
04988 static const struct st_hash_type type_type_cclass_hash = {
04989 type_cclass_cmp,
04990 type_cclass_hash,
04991 };
04992
04993 static st_table* OnigTypeCClassTable;
04994
04995
04996 static int
04997 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
04998 {
04999 if (IS_NOT_NULL(node)) {
05000 CClassNode* cc = NCCLASS(node);
05001 if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
05002 xfree(node);
05003 }
05004
05005 if (IS_NOT_NULL(key)) xfree(key);
05006 return ST_DELETE;
05007 }
05008
05009 extern int
05010 onig_free_shared_cclass_table(void)
05011 {
05012 THREAD_ATOMIC_START;
05013 if (IS_NOT_NULL(OnigTypeCClassTable)) {
05014 onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
05015 onig_st_free_table(OnigTypeCClassTable);
05016 OnigTypeCClassTable = NULL;
05017 }
05018 THREAD_ATOMIC_END;
05019
05020 return 0;
05021 }
05022
05023 #endif
05024
05025
05026 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05027 static int
05028 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
05029 {
05030 BBuf *tbuf;
05031 int r;
05032
05033 if (IS_NCCLASS_NOT(cc)) {
05034 bitset_invert(cc->bs);
05035
05036 if (! ONIGENC_IS_SINGLEBYTE(enc)) {
05037 r = not_code_range_buf(enc, cc->mbuf, &tbuf);
05038 if (r != 0) return r;
05039
05040 bbuf_free(cc->mbuf);
05041 cc->mbuf = tbuf;
05042 }
05043
05044 NCCLASS_CLEAR_NOT(cc);
05045 }
05046
05047 return 0;
05048 }
05049 #endif
05050
05051 typedef struct {
05052 ScanEnv* env;
05053 CClassNode* cc;
05054 Node* alt_root;
05055 Node** ptail;
05056 } IApplyCaseFoldArg;
05057
05058 static int
05059 i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
05060 int to_len, void* arg)
05061 {
05062 IApplyCaseFoldArg* iarg;
05063 ScanEnv* env;
05064 CClassNode* cc;
05065 BitSetRef bs;
05066
05067 iarg = (IApplyCaseFoldArg* )arg;
05068 env = iarg->env;
05069 cc = iarg->cc;
05070 bs = cc->bs;
05071
05072 if (to_len == 1) {
05073 int is_in = onig_is_code_in_cc(env->enc, from, cc);
05074 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05075 if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
05076 (is_in == 0 && IS_NCCLASS_NOT(cc))) {
05077 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05078 add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05079 }
05080 else {
05081 BITSET_SET_BIT(bs, *to);
05082 }
05083 }
05084 #else
05085 if (is_in != 0) {
05086 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
05087 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
05088 add_code_range0(&(cc->mbuf), env, *to, *to, 0);
05089 }
05090 else {
05091 if (IS_NCCLASS_NOT(cc)) {
05092 BITSET_CLEAR_BIT(bs, *to);
05093 }
05094 else
05095 BITSET_SET_BIT(bs, *to);
05096 }
05097 }
05098 #endif
05099 }
05100 else {
05101 int r, i, len;
05102 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05103 Node *snode = NULL_NODE;
05104
05105 if (onig_is_code_in_cc(env->enc, from, cc)
05106 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
05107 && !IS_NCCLASS_NOT(cc)
05108 #endif
05109 ) {
05110 for (i = 0; i < to_len; i++) {
05111 len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
05112 if (i == 0) {
05113 snode = onig_node_new_str(buf, buf + len);
05114 CHECK_NULL_RETURN_MEMERR(snode);
05115
05116
05117
05118 NSTRING_SET_AMBIG(snode);
05119 }
05120 else {
05121 r = onig_node_str_cat(snode, buf, buf + len);
05122 if (r < 0) {
05123 onig_node_free(snode);
05124 return r;
05125 }
05126 }
05127 }
05128
05129 *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
05130 CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
05131 iarg->ptail = &(NCDR((*(iarg->ptail))));
05132 }
05133 }
05134
05135 return 0;
05136 }
05137
05138 static int
05139 parse_exp(Node** np, OnigToken* tok, int term,
05140 UChar** src, UChar* end, ScanEnv* env)
05141 {
05142 int r, len, group = 0;
05143 Node* qn;
05144 Node** targetp;
05145
05146 *np = NULL;
05147 if (tok->type == (enum TokenSyms )term)
05148 goto end_of_token;
05149
05150 switch (tok->type) {
05151 case TK_ALT:
05152 case TK_EOT:
05153 end_of_token:
05154 *np = node_new_empty();
05155 return tok->type;
05156
05157 case TK_SUBEXP_OPEN:
05158 r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
05159 if (r < 0) return r;
05160 if (r == 1) group = 1;
05161 else if (r == 2) {
05162 Node* target;
05163 OnigOptionType prev = env->option;
05164
05165 env->option = NENCLOSE(*np)->option;
05166 r = fetch_token(tok, src, end, env);
05167 if (r < 0) return r;
05168 r = parse_subexp(&target, tok, term, src, end, env);
05169 env->option = prev;
05170 if (r < 0) {
05171 onig_node_free(target);
05172 return r;
05173 }
05174 NENCLOSE(*np)->target = target;
05175 return tok->type;
05176 }
05177 break;
05178
05179 case TK_SUBEXP_CLOSE:
05180 if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
05181 return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
05182
05183 if (tok->escaped) goto tk_raw_byte;
05184 else goto tk_byte;
05185 break;
05186
05187 case TK_STRING:
05188 tk_byte:
05189 {
05190 *np = node_new_str(tok->backp, *src);
05191 CHECK_NULL_RETURN_MEMERR(*np);
05192
05193 while (1) {
05194 r = fetch_token(tok, src, end, env);
05195 if (r < 0) return r;
05196 if (r != TK_STRING) break;
05197
05198 r = onig_node_str_cat(*np, tok->backp, *src);
05199 if (r < 0) return r;
05200 }
05201
05202 string_end:
05203 targetp = np;
05204 goto repeat;
05205 }
05206 break;
05207
05208 case TK_RAW_BYTE:
05209 tk_raw_byte:
05210 {
05211 *np = node_new_str_raw_char((UChar )tok->u.c);
05212 CHECK_NULL_RETURN_MEMERR(*np);
05213 len = 1;
05214 while (1) {
05215 if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
05216 if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
05217 r = fetch_token(tok, src, end, env);
05218 NSTRING_CLEAR_RAW(*np);
05219 goto string_end;
05220 }
05221 }
05222
05223 r = fetch_token(tok, src, end, env);
05224 if (r < 0) return r;
05225 if (r != TK_RAW_BYTE) {
05226
05227 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
05228 int rem;
05229 if (len < ONIGENC_MBC_MINLEN(env->enc)) {
05230 rem = ONIGENC_MBC_MINLEN(env->enc) - len;
05231 (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
05232 if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
05233 NSTRING_CLEAR_RAW(*np);
05234 goto string_end;
05235 }
05236 }
05237 #endif
05238 return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
05239 }
05240
05241 r = node_str_cat_char(*np, (UChar )tok->u.c);
05242 if (r < 0) return r;
05243
05244 len++;
05245 }
05246 }
05247 break;
05248
05249 case TK_CODE_POINT:
05250 {
05251 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
05252 int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
05253 if (num < 0) return num;
05254 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
05255 *np = node_new_str_raw(buf, buf + num);
05256 #else
05257 *np = node_new_str(buf, buf + num);
05258 #endif
05259 CHECK_NULL_RETURN_MEMERR(*np);
05260 }
05261 break;
05262
05263 case TK_QUOTE_OPEN:
05264 {
05265 OnigCodePoint end_op[2];
05266 UChar *qstart, *qend, *nextp;
05267
05268 end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
05269 end_op[1] = (OnigCodePoint )'E';
05270 qstart = *src;
05271 qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
05272 if (IS_NULL(qend)) {
05273 nextp = qend = end;
05274 }
05275 *np = node_new_str(qstart, qend);
05276 CHECK_NULL_RETURN_MEMERR(*np);
05277 *src = nextp;
05278 }
05279 break;
05280
05281 case TK_CHAR_TYPE:
05282 {
05283 switch (tok->u.prop.ctype) {
05284 case ONIGENC_CTYPE_D:
05285 case ONIGENC_CTYPE_S:
05286 case ONIGENC_CTYPE_W:
05287 {
05288 CClassNode* cc;
05289 *np = node_new_cclass();
05290 CHECK_NULL_RETURN_MEMERR(*np);
05291 cc = NCCLASS(*np);
05292 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
05293 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05294 }
05295 break;
05296
05297 case ONIGENC_CTYPE_WORD:
05298 *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
05299 CHECK_NULL_RETURN_MEMERR(*np);
05300 break;
05301
05302 case ONIGENC_CTYPE_SPACE:
05303 case ONIGENC_CTYPE_DIGIT:
05304 case ONIGENC_CTYPE_XDIGIT:
05305 {
05306 CClassNode* cc;
05307
05308 #ifdef USE_SHARED_CCLASS_TABLE
05309 const OnigCodePoint *mbr;
05310 OnigCodePoint sb_out;
05311
05312 r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
05313 &sb_out, &mbr);
05314 if (r == 0 &&
05315 ONIGENC_CODE_RANGE_NUM(mbr)
05316 >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
05317 type_cclass_key key;
05318 type_cclass_key* new_key;
05319
05320 key.enc = env->enc;
05321 key.not = tok->u.prop.not;
05322 key.type = tok->u.prop.ctype;
05323
05324 THREAD_ATOMIC_START;
05325
05326 if (IS_NULL(OnigTypeCClassTable)) {
05327 OnigTypeCClassTable
05328 = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
05329 if (IS_NULL(OnigTypeCClassTable)) {
05330 THREAD_ATOMIC_END;
05331 return ONIGERR_MEMORY;
05332 }
05333 }
05334 else {
05335 if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
05336 (st_data_t* )np)) {
05337 THREAD_ATOMIC_END;
05338 break;
05339 }
05340 }
05341
05342 *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
05343 sb_out, mbr);
05344 if (IS_NULL(*np)) {
05345 THREAD_ATOMIC_END;
05346 return ONIGERR_MEMORY;
05347 }
05348
05349 cc = NCCLASS(*np);
05350 NCCLASS_SET_SHARE(cc);
05351 new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
05352 xmemcpy(new_key, &key, sizeof(type_cclass_key));
05353 onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
05354 (st_data_t )*np);
05355
05356 THREAD_ATOMIC_END;
05357 }
05358 else {
05359 #endif
05360 *np = node_new_cclass();
05361 CHECK_NULL_RETURN_MEMERR(*np);
05362 cc = NCCLASS(*np);
05363 add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
05364 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
05365 #ifdef USE_SHARED_CCLASS_TABLE
05366 }
05367 #endif
05368 }
05369 break;
05370
05371 default:
05372 return ONIGERR_PARSER_BUG;
05373 break;
05374 }
05375 }
05376 break;
05377
05378 case TK_CHAR_PROPERTY:
05379 r = parse_char_property(np, tok, src, end, env);
05380 if (r != 0) return r;
05381 break;
05382
05383 case TK_CC_OPEN:
05384 {
05385 CClassNode* cc;
05386
05387 r = parse_char_class(np, tok, src, end, env);
05388 if (r != 0) return r;
05389
05390 cc = NCCLASS(*np);
05391 if (IS_IGNORECASE(env->option)) {
05392 IApplyCaseFoldArg iarg;
05393
05394 iarg.env = env;
05395 iarg.cc = cc;
05396 iarg.alt_root = NULL_NODE;
05397 iarg.ptail = &(iarg.alt_root);
05398
05399 r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
05400 i_apply_case_fold, &iarg);
05401 if (r != 0) {
05402 onig_node_free(iarg.alt_root);
05403 return r;
05404 }
05405 if (IS_NOT_NULL(iarg.alt_root)) {
05406 Node* work = onig_node_new_alt(*np, iarg.alt_root);
05407 if (IS_NULL(work)) {
05408 onig_node_free(iarg.alt_root);
05409 return ONIGERR_MEMORY;
05410 }
05411 *np = work;
05412 }
05413 }
05414 }
05415 break;
05416
05417 case TK_ANYCHAR:
05418 *np = node_new_anychar();
05419 CHECK_NULL_RETURN_MEMERR(*np);
05420 break;
05421
05422 case TK_ANYCHAR_ANYTIME:
05423 *np = node_new_anychar();
05424 CHECK_NULL_RETURN_MEMERR(*np);
05425 qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
05426 CHECK_NULL_RETURN_MEMERR(qn);
05427 NQTFR(qn)->target = *np;
05428 *np = qn;
05429 break;
05430
05431 case TK_BACKREF:
05432 len = tok->u.backref.num;
05433 *np = node_new_backref(len,
05434 (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
05435 tok->u.backref.by_name,
05436 #ifdef USE_BACKREF_WITH_LEVEL
05437 tok->u.backref.exist_level,
05438 tok->u.backref.level,
05439 #endif
05440 env);
05441 CHECK_NULL_RETURN_MEMERR(*np);
05442 break;
05443
05444 #ifdef USE_SUBEXP_CALL
05445 case TK_CALL:
05446 {
05447 int gnum = tok->u.call.gnum;
05448
05449 if (gnum < 0) {
05450 gnum = BACKREF_REL_TO_ABS(gnum, env);
05451 if (gnum <= 0)
05452 return ONIGERR_INVALID_BACKREF;
05453 }
05454 *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
05455 CHECK_NULL_RETURN_MEMERR(*np);
05456 env->num_call++;
05457 }
05458 break;
05459 #endif
05460
05461 case TK_ANCHOR:
05462 *np = onig_node_new_anchor(tok->u.anchor);
05463 break;
05464
05465 case TK_OP_REPEAT:
05466 case TK_INTERVAL:
05467 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
05468 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
05469 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED;
05470 else
05471 *np = node_new_empty();
05472 }
05473 else {
05474 goto tk_byte;
05475 }
05476 break;
05477
05478 default:
05479 return ONIGERR_PARSER_BUG;
05480 break;
05481 }
05482
05483 {
05484 targetp = np;
05485
05486 re_entry:
05487 r = fetch_token(tok, src, end, env);
05488 if (r < 0) return r;
05489
05490 repeat:
05491 if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
05492 if (is_invalid_quantifier_target(*targetp))
05493 return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
05494
05495 qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
05496 (r == TK_INTERVAL ? 1 : 0));
05497 CHECK_NULL_RETURN_MEMERR(qn);
05498 NQTFR(qn)->greedy = tok->u.repeat.greedy;
05499 r = set_quantifier(qn, *targetp, group, env);
05500 if (r < 0) {
05501 onig_node_free(qn);
05502 return r;
05503 }
05504
05505 if (tok->u.repeat.possessive != 0) {
05506 Node* en;
05507 en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
05508 if (IS_NULL(en)) {
05509 onig_node_free(qn);
05510 return ONIGERR_MEMORY;
05511 }
05512 NENCLOSE(en)->target = qn;
05513 qn = en;
05514 }
05515
05516 if (r == 0) {
05517 *targetp = qn;
05518 }
05519 else if (r == 1) {
05520 onig_node_free(qn);
05521 }
05522 else if (r == 2) {
05523 Node *tmp;
05524
05525 *targetp = node_new_list(*targetp, NULL);
05526 if (IS_NULL(*targetp)) {
05527 onig_node_free(qn);
05528 return ONIGERR_MEMORY;
05529 }
05530 tmp = NCDR(*targetp) = node_new_list(qn, NULL);
05531 if (IS_NULL(tmp)) {
05532 onig_node_free(qn);
05533 return ONIGERR_MEMORY;
05534 }
05535 targetp = &(NCAR(tmp));
05536 }
05537 goto re_entry;
05538 }
05539 }
05540
05541 return r;
05542 }
05543
05544 static int
05545 parse_branch(Node** top, OnigToken* tok, int term,
05546 UChar** src, UChar* end, ScanEnv* env)
05547 {
05548 int r;
05549 Node *node, **headp;
05550
05551 *top = NULL;
05552 r = parse_exp(&node, tok, term, src, end, env);
05553 if (r < 0) {
05554 onig_node_free(node);
05555 return r;
05556 }
05557
05558 if (r == TK_EOT || r == term || r == TK_ALT) {
05559 *top = node;
05560 }
05561 else {
05562 *top = node_new_list(node, NULL);
05563 headp = &(NCDR(*top));
05564 while (r != TK_EOT && r != term && r != TK_ALT) {
05565 r = parse_exp(&node, tok, term, src, end, env);
05566 if (r < 0) {
05567 onig_node_free(node);
05568 return r;
05569 }
05570
05571 if (NTYPE(node) == NT_LIST) {
05572 *headp = node;
05573 while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
05574 headp = &(NCDR(node));
05575 }
05576 else {
05577 *headp = node_new_list(node, NULL);
05578 headp = &(NCDR(*headp));
05579 }
05580 }
05581 }
05582
05583 return r;
05584 }
05585
05586
05587 static int
05588 parse_subexp(Node** top, OnigToken* tok, int term,
05589 UChar** src, UChar* end, ScanEnv* env)
05590 {
05591 int r;
05592 Node *node, **headp;
05593
05594 *top = NULL;
05595 r = parse_branch(&node, tok, term, src, end, env);
05596 if (r < 0) {
05597 onig_node_free(node);
05598 return r;
05599 }
05600
05601 if (r == term) {
05602 *top = node;
05603 }
05604 else if (r == TK_ALT) {
05605 *top = onig_node_new_alt(node, NULL);
05606 headp = &(NCDR(*top));
05607 while (r == TK_ALT) {
05608 r = fetch_token(tok, src, end, env);
05609 if (r < 0) return r;
05610 r = parse_branch(&node, tok, term, src, end, env);
05611 if (r < 0) {
05612 onig_node_free(node);
05613 return r;
05614 }
05615
05616 *headp = onig_node_new_alt(node, NULL);
05617 headp = &(NCDR(*headp));
05618 }
05619
05620 if (tok->type != (enum TokenSyms )term)
05621 goto err;
05622 }
05623 else {
05624 onig_node_free(node);
05625 err:
05626 if (term == TK_SUBEXP_CLOSE)
05627 return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
05628 else
05629 return ONIGERR_PARSER_BUG;
05630 }
05631
05632 return r;
05633 }
05634
05635 static int
05636 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
05637 {
05638 int r;
05639 OnigToken tok;
05640
05641 r = fetch_token(&tok, src, end, env);
05642 if (r < 0) return r;
05643 r = parse_subexp(top, &tok, TK_EOT, src, end, env);
05644 if (r < 0) return r;
05645 return 0;
05646 }
05647
05648 extern int
05649 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
05650 regex_t* reg, ScanEnv* env)
05651 {
05652 int r;
05653 UChar* p;
05654
05655 #ifdef USE_NAMED_GROUP
05656 names_clear(reg);
05657 #endif
05658
05659 scan_env_clear(env);
05660 env->option = reg->options;
05661 env->case_fold_flag = reg->case_fold_flag;
05662 env->enc = reg->enc;
05663 env->syntax = reg->syntax;
05664 env->pattern = (UChar* )pattern;
05665 env->pattern_end = (UChar* )end;
05666 env->reg = reg;
05667
05668 *root = NULL;
05669 p = (UChar* )pattern;
05670 r = parse_regexp(root, &p, (UChar* )end, env);
05671 reg->num_mem = env->num_mem;
05672 return r;
05673 }
05674
05675 extern void
05676 onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
05677 UChar* arg, UChar* arg_end)
05678 {
05679 env->error = arg;
05680 env->error_end = arg_end;
05681 }
05682