00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regparse.h"
00031
00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00033
00034 extern OnigCaseFoldType
00035 onig_get_default_case_fold_flag(void)
00036 {
00037 return OnigDefaultCaseFoldFlag;
00038 }
00039
00040 extern int
00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00042 {
00043 OnigDefaultCaseFoldFlag = case_fold_flag;
00044 return 0;
00045 }
00046
00047
00048 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00049 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00050 #endif
00051
00052 static UChar*
00053 str_dup(UChar* s, UChar* end)
00054 {
00055 ptrdiff_t len = end - s;
00056
00057 if (len > 0) {
00058 UChar* r = (UChar* )xmalloc(len + 1);
00059 CHECK_NULL_RETURN(r);
00060 xmemcpy(r, s, len);
00061 r[len] = (UChar )0;
00062 return r;
00063 }
00064 else return NULL;
00065 }
00066
00067 static void
00068 swap_node(Node* a, Node* b)
00069 {
00070 Node c;
00071 c = *a; *a = *b; *b = c;
00072
00073 if (NTYPE(a) == NT_STR) {
00074 StrNode* sn = NSTR(a);
00075 if (sn->capa == 0) {
00076 size_t len = sn->end - sn->s;
00077 sn->s = sn->buf;
00078 sn->end = sn->s + len;
00079 }
00080 }
00081
00082 if (NTYPE(b) == NT_STR) {
00083 StrNode* sn = NSTR(b);
00084 if (sn->capa == 0) {
00085 size_t len = sn->end - sn->s;
00086 sn->s = sn->buf;
00087 sn->end = sn->s + len;
00088 }
00089 }
00090 }
00091
00092 static OnigDistance
00093 distance_add(OnigDistance d1, OnigDistance d2)
00094 {
00095 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00096 return ONIG_INFINITE_DISTANCE;
00097 else {
00098 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00099 else return ONIG_INFINITE_DISTANCE;
00100 }
00101 }
00102
00103 static OnigDistance
00104 distance_multiply(OnigDistance d, int m)
00105 {
00106 if (m == 0) return 0;
00107
00108 if (d < ONIG_INFINITE_DISTANCE / m)
00109 return d * m;
00110 else
00111 return ONIG_INFINITE_DISTANCE;
00112 }
00113
00114 static int
00115 bitset_is_empty(BitSetRef bs)
00116 {
00117 int i;
00118 for (i = 0; i < (int )BITSET_SIZE; i++) {
00119 if (bs[i] != 0) return 0;
00120 }
00121 return 1;
00122 }
00123
00124 #ifdef ONIG_DEBUG
00125 static int
00126 bitset_on_num(BitSetRef bs)
00127 {
00128 int i, n;
00129
00130 n = 0;
00131 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00132 if (BITSET_AT(bs, i)) n++;
00133 }
00134 return n;
00135 }
00136 #endif
00137
00138 extern int
00139 onig_bbuf_init(BBuf* buf, int size)
00140 {
00141 if (size <= 0) {
00142 size = 0;
00143 buf->p = NULL;
00144 }
00145 else {
00146 buf->p = (UChar* )xmalloc(size);
00147 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00148 }
00149
00150 buf->alloc = size;
00151 buf->used = 0;
00152 return 0;
00153 }
00154
00155
00156 #ifdef USE_SUBEXP_CALL
00157
00158 static int
00159 unset_addr_list_init(UnsetAddrList* uslist, int size)
00160 {
00161 UnsetAddr* p;
00162
00163 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00164 CHECK_NULL_RETURN_MEMERR(p);
00165 uslist->num = 0;
00166 uslist->alloc = size;
00167 uslist->us = p;
00168 return 0;
00169 }
00170
00171 static void
00172 unset_addr_list_end(UnsetAddrList* uslist)
00173 {
00174 if (IS_NOT_NULL(uslist->us))
00175 xfree(uslist->us);
00176 }
00177
00178 static int
00179 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00180 {
00181 UnsetAddr* p;
00182 int size;
00183
00184 if (uslist->num >= uslist->alloc) {
00185 size = uslist->alloc * 2;
00186 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00187 CHECK_NULL_RETURN_MEMERR(p);
00188 uslist->alloc = size;
00189 uslist->us = p;
00190 }
00191
00192 uslist->us[uslist->num].offset = offset;
00193 uslist->us[uslist->num].target = node;
00194 uslist->num++;
00195 return 0;
00196 }
00197 #endif
00198
00199
00200 static int
00201 add_opcode(regex_t* reg, int opcode)
00202 {
00203 BBUF_ADD1(reg, opcode);
00204 return 0;
00205 }
00206
00207 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00208 static int
00209 add_state_check_num(regex_t* reg, int num)
00210 {
00211 StateCheckNumType n = (StateCheckNumType )num;
00212
00213 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00214 return 0;
00215 }
00216 #endif
00217
00218 static int
00219 add_rel_addr(regex_t* reg, int addr)
00220 {
00221 RelAddrType ra = (RelAddrType )addr;
00222
00223 BBUF_ADD(reg, &ra, SIZE_RELADDR);
00224 return 0;
00225 }
00226
00227 static int
00228 add_abs_addr(regex_t* reg, int addr)
00229 {
00230 AbsAddrType ra = (AbsAddrType )addr;
00231
00232 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00233 return 0;
00234 }
00235
00236 static int
00237 add_length(regex_t* reg, int len)
00238 {
00239 LengthType l = (LengthType )len;
00240
00241 BBUF_ADD(reg, &l, SIZE_LENGTH);
00242 return 0;
00243 }
00244
00245 static int
00246 add_mem_num(regex_t* reg, int num)
00247 {
00248 MemNumType n = (MemNumType )num;
00249
00250 BBUF_ADD(reg, &n, SIZE_MEMNUM);
00251 return 0;
00252 }
00253
00254 static int
00255 add_pointer(regex_t* reg, void* addr)
00256 {
00257 PointerType ptr = (PointerType )addr;
00258
00259 BBUF_ADD(reg, &ptr, SIZE_POINTER);
00260 return 0;
00261 }
00262
00263 static int
00264 add_option(regex_t* reg, OnigOptionType option)
00265 {
00266 BBUF_ADD(reg, &option, SIZE_OPTION);
00267 return 0;
00268 }
00269
00270 static int
00271 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00272 {
00273 int r;
00274
00275 r = add_opcode(reg, opcode);
00276 if (r) return r;
00277 r = add_rel_addr(reg, addr);
00278 return r;
00279 }
00280
00281 static int
00282 add_bytes(regex_t* reg, UChar* bytes, int len)
00283 {
00284 BBUF_ADD(reg, bytes, len);
00285 return 0;
00286 }
00287
00288 static int
00289 add_bitset(regex_t* reg, BitSetRef bs)
00290 {
00291 BBUF_ADD(reg, bs, SIZE_BITSET);
00292 return 0;
00293 }
00294
00295 static int
00296 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00297 {
00298 int r;
00299
00300 r = add_opcode(reg, opcode);
00301 if (r) return r;
00302 r = add_option(reg, option);
00303 return r;
00304 }
00305
00306 static int compile_length_tree(Node* node, regex_t* reg);
00307 static int compile_tree(Node* node, regex_t* reg);
00308
00309
00310 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00311 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
00312 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
00313
00314 static int
00315 select_str_opcode(int mb_len, int str_len, int ignore_case)
00316 {
00317 int op;
00318
00319 if (ignore_case) {
00320 switch (str_len) {
00321 case 1: op = OP_EXACT1_IC; break;
00322 default: op = OP_EXACTN_IC; break;
00323 }
00324 }
00325 else {
00326 switch (mb_len) {
00327 case 1:
00328 switch (str_len) {
00329 case 1: op = OP_EXACT1; break;
00330 case 2: op = OP_EXACT2; break;
00331 case 3: op = OP_EXACT3; break;
00332 case 4: op = OP_EXACT4; break;
00333 case 5: op = OP_EXACT5; break;
00334 default: op = OP_EXACTN; break;
00335 }
00336 break;
00337
00338 case 2:
00339 switch (str_len) {
00340 case 1: op = OP_EXACTMB2N1; break;
00341 case 2: op = OP_EXACTMB2N2; break;
00342 case 3: op = OP_EXACTMB2N3; break;
00343 default: op = OP_EXACTMB2N; break;
00344 }
00345 break;
00346
00347 case 3:
00348 op = OP_EXACTMB3N;
00349 break;
00350
00351 default:
00352 op = OP_EXACTMBN;
00353 break;
00354 }
00355 }
00356 return op;
00357 }
00358
00359 static int
00360 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00361 {
00362 int r;
00363 int saved_num_null_check = reg->num_null_check;
00364
00365 if (empty_info != 0) {
00366 r = add_opcode(reg, OP_NULL_CHECK_START);
00367 if (r) return r;
00368 r = add_mem_num(reg, reg->num_null_check);
00369 if (r) return r;
00370 reg->num_null_check++;
00371 }
00372
00373 r = compile_tree(node, reg);
00374 if (r) return r;
00375
00376 if (empty_info != 0) {
00377 if (empty_info == NQ_TARGET_IS_EMPTY)
00378 r = add_opcode(reg, OP_NULL_CHECK_END);
00379 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00380 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00381 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00382 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00383
00384 if (r) return r;
00385 r = add_mem_num(reg, saved_num_null_check);
00386 }
00387 return r;
00388 }
00389
00390 #ifdef USE_SUBEXP_CALL
00391 static int
00392 compile_call(CallNode* node, regex_t* reg)
00393 {
00394 int r;
00395
00396 r = add_opcode(reg, OP_CALL);
00397 if (r) return r;
00398 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00399 node->target);
00400 if (r) return r;
00401 r = add_abs_addr(reg, 0 );
00402 return r;
00403 }
00404 #endif
00405
00406 static int
00407 compile_tree_n_times(Node* node, int n, regex_t* reg)
00408 {
00409 int i, r;
00410
00411 for (i = 0; i < n; i++) {
00412 r = compile_tree(node, reg);
00413 if (r) return r;
00414 }
00415 return 0;
00416 }
00417
00418 static int
00419 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00420 regex_t* reg ARG_UNUSED, int ignore_case)
00421 {
00422 int len;
00423 int op = select_str_opcode(mb_len, str_len, ignore_case);
00424
00425 len = SIZE_OPCODE;
00426
00427 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
00428 if (IS_NEED_STR_LEN_OP_EXACT(op))
00429 len += SIZE_LENGTH;
00430
00431 len += mb_len * str_len;
00432 return len;
00433 }
00434
00435 static int
00436 add_compile_string(UChar* s, int mb_len, int str_len,
00437 regex_t* reg, int ignore_case)
00438 {
00439 int op = select_str_opcode(mb_len, str_len, ignore_case);
00440 add_opcode(reg, op);
00441
00442 if (op == OP_EXACTMBN)
00443 add_length(reg, mb_len);
00444
00445 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00446 if (op == OP_EXACTN_IC)
00447 add_length(reg, mb_len * str_len);
00448 else
00449 add_length(reg, str_len);
00450 }
00451
00452 add_bytes(reg, s, mb_len * str_len);
00453 return 0;
00454 }
00455
00456
00457 static int
00458 compile_length_string_node(Node* node, regex_t* reg)
00459 {
00460 int rlen, r, len, prev_len, slen, ambig;
00461 OnigEncoding enc = reg->enc;
00462 UChar *p, *prev;
00463 StrNode* sn;
00464
00465 sn = NSTR(node);
00466 if (sn->end <= sn->s)
00467 return 0;
00468
00469 ambig = NSTRING_IS_AMBIG(node);
00470
00471 p = prev = sn->s;
00472 prev_len = enclen(enc, p, sn->end);
00473 p += prev_len;
00474 slen = 1;
00475 rlen = 0;
00476
00477 for (; p < sn->end; ) {
00478 len = enclen(enc, p, sn->end);
00479 if (len == prev_len) {
00480 slen++;
00481 }
00482 else {
00483 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00484 rlen += r;
00485 prev = p;
00486 slen = 1;
00487 prev_len = len;
00488 }
00489 p += len;
00490 }
00491 r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00492 rlen += r;
00493 return rlen;
00494 }
00495
00496 static int
00497 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00498 {
00499 if (sn->end <= sn->s)
00500 return 0;
00501
00502 return add_compile_string_length(sn->s, 1 , sn->end - sn->s, reg, 0);
00503 }
00504
00505 static int
00506 compile_string_node(Node* node, regex_t* reg)
00507 {
00508 int r, len, prev_len, slen, ambig;
00509 OnigEncoding enc = reg->enc;
00510 UChar *p, *prev, *end;
00511 StrNode* sn;
00512
00513 sn = NSTR(node);
00514 if (sn->end <= sn->s)
00515 return 0;
00516
00517 end = sn->end;
00518 ambig = NSTRING_IS_AMBIG(node);
00519
00520 p = prev = sn->s;
00521 prev_len = enclen(enc, p, end);
00522 p += prev_len;
00523 slen = 1;
00524
00525 for (; p < end; ) {
00526 len = enclen(enc, p, end);
00527 if (len == prev_len) {
00528 slen++;
00529 }
00530 else {
00531 r = add_compile_string(prev, prev_len, slen, reg, ambig);
00532 if (r) return r;
00533
00534 prev = p;
00535 slen = 1;
00536 prev_len = len;
00537 }
00538
00539 p += len;
00540 }
00541 return add_compile_string(prev, prev_len, slen, reg, ambig);
00542 }
00543
00544 static int
00545 compile_string_raw_node(StrNode* sn, regex_t* reg)
00546 {
00547 if (sn->end <= sn->s)
00548 return 0;
00549
00550 return add_compile_string(sn->s, 1 , sn->end - sn->s, reg, 0);
00551 }
00552
00553 static int
00554 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00555 {
00556 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00557 add_length(reg, mbuf->used);
00558 return add_bytes(reg, mbuf->p, mbuf->used);
00559 #else
00560 int r, pad_size;
00561 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00562
00563 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00564 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00565 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00566
00567 r = add_bytes(reg, mbuf->p, mbuf->used);
00568
00569
00570 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00571 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00572 return r;
00573 #endif
00574 }
00575
00576 static int
00577 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00578 {
00579 int len;
00580
00581 if (IS_NCCLASS_SHARE(cc)) {
00582 len = SIZE_OPCODE + SIZE_POINTER;
00583 return len;
00584 }
00585
00586 if (IS_NULL(cc->mbuf)) {
00587 len = SIZE_OPCODE + SIZE_BITSET;
00588 }
00589 else {
00590 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00591 len = SIZE_OPCODE;
00592 }
00593 else {
00594 len = SIZE_OPCODE + SIZE_BITSET;
00595 }
00596 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00597 len += SIZE_LENGTH + cc->mbuf->used;
00598 #else
00599 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00600 #endif
00601 }
00602
00603 return len;
00604 }
00605
00606 static int
00607 compile_cclass_node(CClassNode* cc, regex_t* reg)
00608 {
00609 int r;
00610
00611 if (IS_NCCLASS_SHARE(cc)) {
00612 add_opcode(reg, OP_CCLASS_NODE);
00613 r = add_pointer(reg, cc);
00614 return r;
00615 }
00616
00617 if (IS_NULL(cc->mbuf)) {
00618 if (IS_NCCLASS_NOT(cc))
00619 add_opcode(reg, OP_CCLASS_NOT);
00620 else
00621 add_opcode(reg, OP_CCLASS);
00622
00623 r = add_bitset(reg, cc->bs);
00624 }
00625 else {
00626 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00627 if (IS_NCCLASS_NOT(cc))
00628 add_opcode(reg, OP_CCLASS_MB_NOT);
00629 else
00630 add_opcode(reg, OP_CCLASS_MB);
00631
00632 r = add_multi_byte_cclass(cc->mbuf, reg);
00633 }
00634 else {
00635 if (IS_NCCLASS_NOT(cc))
00636 add_opcode(reg, OP_CCLASS_MIX_NOT);
00637 else
00638 add_opcode(reg, OP_CCLASS_MIX);
00639
00640 r = add_bitset(reg, cc->bs);
00641 if (r) return r;
00642 r = add_multi_byte_cclass(cc->mbuf, reg);
00643 }
00644 }
00645
00646 return r;
00647 }
00648
00649 static int
00650 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00651 {
00652 #define REPEAT_RANGE_ALLOC 4
00653
00654 OnigRepeatRange* p;
00655
00656 if (reg->repeat_range_alloc == 0) {
00657 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00658 CHECK_NULL_RETURN_MEMERR(p);
00659 reg->repeat_range = p;
00660 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00661 }
00662 else if (reg->repeat_range_alloc <= id) {
00663 int n;
00664 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00665 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00666 sizeof(OnigRepeatRange) * n);
00667 CHECK_NULL_RETURN_MEMERR(p);
00668 reg->repeat_range = p;
00669 reg->repeat_range_alloc = n;
00670 }
00671 else {
00672 p = reg->repeat_range;
00673 }
00674
00675 p[id].lower = lower;
00676 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00677 return 0;
00678 }
00679
00680 static int
00681 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00682 regex_t* reg)
00683 {
00684 int r;
00685 int num_repeat = reg->num_repeat;
00686
00687 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00688 if (r) return r;
00689 r = add_mem_num(reg, num_repeat);
00690 reg->num_repeat++;
00691 if (r) return r;
00692 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00693 if (r) return r;
00694
00695 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00696 if (r) return r;
00697
00698 r = compile_tree_empty_check(qn->target, reg, empty_info);
00699 if (r) return r;
00700
00701 if (
00702 #ifdef USE_SUBEXP_CALL
00703 reg->num_call > 0 ||
00704 #endif
00705 IS_QUANTIFIER_IN_REPEAT(qn)) {
00706 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00707 }
00708 else {
00709 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00710 }
00711 if (r) return r;
00712 r = add_mem_num(reg, num_repeat);
00713 return r;
00714 }
00715
00716 static int
00717 is_anychar_star_quantifier(QtfrNode* qn)
00718 {
00719 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00720 NTYPE(qn->target) == NT_CANY)
00721 return 1;
00722 else
00723 return 0;
00724 }
00725
00726 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
00727 #define CKN_ON (ckn > 0)
00728
00729 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00730
00731 static int
00732 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00733 {
00734 int len, mod_tlen, cklen;
00735 int ckn;
00736 int infinite = IS_REPEAT_INFINITE(qn->upper);
00737 int empty_info = qn->target_empty_info;
00738 int tlen = compile_length_tree(qn->target, reg);
00739
00740 if (tlen < 0) return tlen;
00741
00742 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00743
00744 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00745
00746
00747 if (NTYPE(qn->target) == NT_CANY) {
00748 if (qn->greedy && infinite) {
00749 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00750 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00751 else
00752 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00753 }
00754 }
00755
00756 if (empty_info != 0)
00757 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00758 else
00759 mod_tlen = tlen;
00760
00761 if (infinite && qn->lower <= 1) {
00762 if (qn->greedy) {
00763 if (qn->lower == 1)
00764 len = SIZE_OP_JUMP;
00765 else
00766 len = 0;
00767
00768 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00769 }
00770 else {
00771 if (qn->lower == 0)
00772 len = SIZE_OP_JUMP;
00773 else
00774 len = 0;
00775
00776 len += mod_tlen + SIZE_OP_PUSH + cklen;
00777 }
00778 }
00779 else if (qn->upper == 0) {
00780 if (qn->is_refered != 0)
00781 len = SIZE_OP_JUMP + tlen;
00782 else
00783 len = 0;
00784 }
00785 else if (qn->upper == 1 && qn->greedy) {
00786 if (qn->lower == 0) {
00787 if (CKN_ON) {
00788 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00789 }
00790 else {
00791 len = SIZE_OP_PUSH + tlen;
00792 }
00793 }
00794 else {
00795 len = tlen;
00796 }
00797 }
00798 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00799 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00800 }
00801 else {
00802 len = SIZE_OP_REPEAT_INC
00803 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00804 if (CKN_ON)
00805 len += SIZE_OP_STATE_CHECK;
00806 }
00807
00808 return len;
00809 }
00810
00811 static int
00812 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00813 {
00814 int r, mod_tlen;
00815 int ckn;
00816 int infinite = IS_REPEAT_INFINITE(qn->upper);
00817 int empty_info = qn->target_empty_info;
00818 int tlen = compile_length_tree(qn->target, reg);
00819
00820 if (tlen < 0) return tlen;
00821
00822 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00823
00824 if (is_anychar_star_quantifier(qn)) {
00825 r = compile_tree_n_times(qn->target, qn->lower, reg);
00826 if (r) return r;
00827 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00828 if (IS_MULTILINE(reg->options))
00829 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00830 else
00831 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00832 if (r) return r;
00833 if (CKN_ON) {
00834 r = add_state_check_num(reg, ckn);
00835 if (r) return r;
00836 }
00837
00838 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00839 }
00840 else {
00841 if (IS_MULTILINE(reg->options)) {
00842 r = add_opcode(reg, (CKN_ON ?
00843 OP_STATE_CHECK_ANYCHAR_ML_STAR
00844 : OP_ANYCHAR_ML_STAR));
00845 }
00846 else {
00847 r = add_opcode(reg, (CKN_ON ?
00848 OP_STATE_CHECK_ANYCHAR_STAR
00849 : OP_ANYCHAR_STAR));
00850 }
00851 if (r) return r;
00852 if (CKN_ON)
00853 r = add_state_check_num(reg, ckn);
00854
00855 return r;
00856 }
00857 }
00858
00859 if (empty_info != 0)
00860 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00861 else
00862 mod_tlen = tlen;
00863
00864 if (infinite && qn->lower <= 1) {
00865 if (qn->greedy) {
00866 if (qn->lower == 1) {
00867 r = add_opcode_rel_addr(reg, OP_JUMP,
00868 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00869 if (r) return r;
00870 }
00871
00872 if (CKN_ON) {
00873 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00874 if (r) return r;
00875 r = add_state_check_num(reg, ckn);
00876 if (r) return r;
00877 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00878 }
00879 else {
00880 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00881 }
00882 if (r) return r;
00883 r = compile_tree_empty_check(qn->target, reg, empty_info);
00884 if (r) return r;
00885 r = add_opcode_rel_addr(reg, OP_JUMP,
00886 -(mod_tlen + (int )SIZE_OP_JUMP
00887 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00888 }
00889 else {
00890 if (qn->lower == 0) {
00891 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00892 if (r) return r;
00893 }
00894 r = compile_tree_empty_check(qn->target, reg, empty_info);
00895 if (r) return r;
00896 if (CKN_ON) {
00897 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00898 if (r) return r;
00899 r = add_state_check_num(reg, ckn);
00900 if (r) return r;
00901 r = add_rel_addr(reg,
00902 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00903 }
00904 else
00905 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00906 }
00907 }
00908 else if (qn->upper == 0) {
00909 if (qn->is_refered != 0) {
00910 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00911 if (r) return r;
00912 r = compile_tree(qn->target, reg);
00913 }
00914 else
00915 r = 0;
00916 }
00917 else if (qn->upper == 1 && qn->greedy) {
00918 if (qn->lower == 0) {
00919 if (CKN_ON) {
00920 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00921 if (r) return r;
00922 r = add_state_check_num(reg, ckn);
00923 if (r) return r;
00924 r = add_rel_addr(reg, tlen);
00925 }
00926 else {
00927 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00928 }
00929 if (r) return r;
00930 }
00931
00932 r = compile_tree(qn->target, reg);
00933 }
00934 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
00935 if (CKN_ON) {
00936 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00937 if (r) return r;
00938 r = add_state_check_num(reg, ckn);
00939 if (r) return r;
00940 r = add_rel_addr(reg, SIZE_OP_JUMP);
00941 }
00942 else {
00943 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00944 }
00945
00946 if (r) return r;
00947 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00948 if (r) return r;
00949 r = compile_tree(qn->target, reg);
00950 }
00951 else {
00952 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00953 if (CKN_ON) {
00954 if (r) return r;
00955 r = add_opcode(reg, OP_STATE_CHECK);
00956 if (r) return r;
00957 r = add_state_check_num(reg, ckn);
00958 }
00959 }
00960 return r;
00961 }
00962
00963 #else
00964
00965 static int
00966 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00967 {
00968 int len, mod_tlen;
00969 int infinite = IS_REPEAT_INFINITE(qn->upper);
00970 int empty_info = qn->target_empty_info;
00971 int tlen = compile_length_tree(qn->target, reg);
00972
00973 if (tlen < 0) return tlen;
00974
00975
00976 if (NTYPE(qn->target) == NT_CANY) {
00977 if (qn->greedy && infinite) {
00978 if (IS_NOT_NULL(qn->next_head_exact))
00979 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00980 else
00981 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00982 }
00983 }
00984
00985 if (empty_info != 0)
00986 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00987 else
00988 mod_tlen = tlen;
00989
00990 if (infinite &&
00991 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00992 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00993 len = SIZE_OP_JUMP;
00994 }
00995 else {
00996 len = tlen * qn->lower;
00997 }
00998
00999 if (qn->greedy) {
01000 if (IS_NOT_NULL(qn->head_exact))
01001 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01002 else if (IS_NOT_NULL(qn->next_head_exact))
01003 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01004 else
01005 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01006 }
01007 else
01008 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01009 }
01010 else if (qn->upper == 0 && qn->is_refered != 0) {
01011 len = SIZE_OP_JUMP + tlen;
01012 }
01013 else if (!infinite && qn->greedy &&
01014 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01015 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01016 len = tlen * qn->lower;
01017 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01018 }
01019 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01020 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01021 }
01022 else {
01023 len = SIZE_OP_REPEAT_INC
01024 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01025 }
01026
01027 return len;
01028 }
01029
01030 static int
01031 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01032 {
01033 int i, r, mod_tlen;
01034 int infinite = IS_REPEAT_INFINITE(qn->upper);
01035 int empty_info = qn->target_empty_info;
01036 int tlen = compile_length_tree(qn->target, reg);
01037
01038 if (tlen < 0) return tlen;
01039
01040 if (is_anychar_star_quantifier(qn)) {
01041 r = compile_tree_n_times(qn->target, qn->lower, reg);
01042 if (r) return r;
01043 if (IS_NOT_NULL(qn->next_head_exact)) {
01044 if (IS_MULTILINE(reg->options))
01045 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01046 else
01047 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01048 if (r) return r;
01049 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01050 }
01051 else {
01052 if (IS_MULTILINE(reg->options))
01053 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01054 else
01055 return add_opcode(reg, OP_ANYCHAR_STAR);
01056 }
01057 }
01058
01059 if (empty_info != 0)
01060 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01061 else
01062 mod_tlen = tlen;
01063
01064 if (infinite &&
01065 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01066 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01067 if (qn->greedy) {
01068 if (IS_NOT_NULL(qn->head_exact))
01069 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01070 else if (IS_NOT_NULL(qn->next_head_exact))
01071 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01072 else
01073 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01074 }
01075 else {
01076 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01077 }
01078 if (r) return r;
01079 }
01080 else {
01081 r = compile_tree_n_times(qn->target, qn->lower, reg);
01082 if (r) return r;
01083 }
01084
01085 if (qn->greedy) {
01086 if (IS_NOT_NULL(qn->head_exact)) {
01087 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01088 mod_tlen + SIZE_OP_JUMP);
01089 if (r) return r;
01090 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01091 r = compile_tree_empty_check(qn->target, reg, empty_info);
01092 if (r) return r;
01093 r = add_opcode_rel_addr(reg, OP_JUMP,
01094 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01095 }
01096 else if (IS_NOT_NULL(qn->next_head_exact)) {
01097 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01098 mod_tlen + SIZE_OP_JUMP);
01099 if (r) return r;
01100 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01101 r = compile_tree_empty_check(qn->target, reg, empty_info);
01102 if (r) return r;
01103 r = add_opcode_rel_addr(reg, OP_JUMP,
01104 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01105 }
01106 else {
01107 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01108 if (r) return r;
01109 r = compile_tree_empty_check(qn->target, reg, empty_info);
01110 if (r) return r;
01111 r = add_opcode_rel_addr(reg, OP_JUMP,
01112 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01113 }
01114 }
01115 else {
01116 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01117 if (r) return r;
01118 r = compile_tree_empty_check(qn->target, reg, empty_info);
01119 if (r) return r;
01120 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01121 }
01122 }
01123 else if (qn->upper == 0 && qn->is_refered != 0) {
01124 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01125 if (r) return r;
01126 r = compile_tree(qn->target, reg);
01127 }
01128 else if (!infinite && qn->greedy &&
01129 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01130 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01131 int n = qn->upper - qn->lower;
01132
01133 r = compile_tree_n_times(qn->target, qn->lower, reg);
01134 if (r) return r;
01135
01136 for (i = 0; i < n; i++) {
01137 r = add_opcode_rel_addr(reg, OP_PUSH,
01138 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01139 if (r) return r;
01140 r = compile_tree(qn->target, reg);
01141 if (r) return r;
01142 }
01143 }
01144 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) {
01145 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01146 if (r) return r;
01147 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01148 if (r) return r;
01149 r = compile_tree(qn->target, reg);
01150 }
01151 else {
01152 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01153 }
01154 return r;
01155 }
01156 #endif
01157
01158 static int
01159 compile_length_option_node(EncloseNode* node, regex_t* reg)
01160 {
01161 int tlen;
01162 OnigOptionType prev = reg->options;
01163
01164 reg->options = node->option;
01165 tlen = compile_length_tree(node->target, reg);
01166 reg->options = prev;
01167
01168 if (tlen < 0) return tlen;
01169
01170 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01171 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01172 + tlen + SIZE_OP_SET_OPTION;
01173 }
01174 else
01175 return tlen;
01176 }
01177
01178 static int
01179 compile_option_node(EncloseNode* node, regex_t* reg)
01180 {
01181 int r;
01182 OnigOptionType prev = reg->options;
01183
01184 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01185 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01186 if (r) return r;
01187 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01188 if (r) return r;
01189 r = add_opcode(reg, OP_FAIL);
01190 if (r) return r;
01191 }
01192
01193 reg->options = node->option;
01194 r = compile_tree(node->target, reg);
01195 reg->options = prev;
01196
01197 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01198 if (r) return r;
01199 r = add_opcode_option(reg, OP_SET_OPTION, prev);
01200 }
01201 return r;
01202 }
01203
01204 static int
01205 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01206 {
01207 int len;
01208 int tlen;
01209
01210 if (node->type == ENCLOSE_OPTION)
01211 return compile_length_option_node(node, reg);
01212
01213 if (node->target) {
01214 tlen = compile_length_tree(node->target, reg);
01215 if (tlen < 0) return tlen;
01216 }
01217 else
01218 tlen = 0;
01219
01220 switch (node->type) {
01221 case ENCLOSE_MEMORY:
01222 #ifdef USE_SUBEXP_CALL
01223 if (IS_ENCLOSE_CALLED(node)) {
01224 len = SIZE_OP_MEMORY_START_PUSH + tlen
01225 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01226 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01227 len += (IS_ENCLOSE_RECURSION(node)
01228 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01229 else
01230 len += (IS_ENCLOSE_RECURSION(node)
01231 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01232 }
01233 else
01234 #endif
01235 {
01236 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01237 len = SIZE_OP_MEMORY_START_PUSH;
01238 else
01239 len = SIZE_OP_MEMORY_START;
01240
01241 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01242 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01243 }
01244 break;
01245
01246 case ENCLOSE_STOP_BACKTRACK:
01247 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01248 QtfrNode* qn = NQTFR(node->target);
01249 tlen = compile_length_tree(qn->target, reg);
01250 if (tlen < 0) return tlen;
01251
01252 len = tlen * qn->lower
01253 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01254 }
01255 else {
01256 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01257 }
01258 break;
01259
01260 default:
01261 return ONIGERR_TYPE_BUG;
01262 break;
01263 }
01264
01265 return len;
01266 }
01267
01268 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01269
01270 static int
01271 compile_enclose_node(EncloseNode* node, regex_t* reg)
01272 {
01273 int r, len;
01274
01275 if (node->type == ENCLOSE_OPTION)
01276 return compile_option_node(node, reg);
01277
01278 switch (node->type) {
01279 case ENCLOSE_MEMORY:
01280 #ifdef USE_SUBEXP_CALL
01281 if (IS_ENCLOSE_CALLED(node)) {
01282 r = add_opcode(reg, OP_CALL);
01283 if (r) return r;
01284 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01285 node->state |= NST_ADDR_FIXED;
01286 r = add_abs_addr(reg, (int )node->call_addr);
01287 if (r) return r;
01288 len = compile_length_tree(node->target, reg);
01289 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01290 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01291 len += (IS_ENCLOSE_RECURSION(node)
01292 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01293 else
01294 len += (IS_ENCLOSE_RECURSION(node)
01295 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01296
01297 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01298 if (r) return r;
01299 }
01300 #endif
01301 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01302 r = add_opcode(reg, OP_MEMORY_START_PUSH);
01303 else
01304 r = add_opcode(reg, OP_MEMORY_START);
01305 if (r) return r;
01306 r = add_mem_num(reg, node->regnum);
01307 if (r) return r;
01308 r = compile_tree(node->target, reg);
01309 if (r) return r;
01310 #ifdef USE_SUBEXP_CALL
01311 if (IS_ENCLOSE_CALLED(node)) {
01312 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01313 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01314 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01315 else
01316 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01317 ? OP_MEMORY_END_REC : OP_MEMORY_END));
01318
01319 if (r) return r;
01320 r = add_mem_num(reg, node->regnum);
01321 if (r) return r;
01322 r = add_opcode(reg, OP_RETURN);
01323 }
01324 else
01325 #endif
01326 {
01327 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01328 r = add_opcode(reg, OP_MEMORY_END_PUSH);
01329 else
01330 r = add_opcode(reg, OP_MEMORY_END);
01331 if (r) return r;
01332 r = add_mem_num(reg, node->regnum);
01333 }
01334 break;
01335
01336 case ENCLOSE_STOP_BACKTRACK:
01337 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01338 QtfrNode* qn = NQTFR(node->target);
01339 r = compile_tree_n_times(qn->target, qn->lower, reg);
01340 if (r) return r;
01341
01342 len = compile_length_tree(qn->target, reg);
01343 if (len < 0) return len;
01344
01345 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01346 if (r) return r;
01347 r = compile_tree(qn->target, reg);
01348 if (r) return r;
01349 r = add_opcode(reg, OP_POP);
01350 if (r) return r;
01351 r = add_opcode_rel_addr(reg, OP_JUMP,
01352 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01353 }
01354 else {
01355 r = add_opcode(reg, OP_PUSH_STOP_BT);
01356 if (r) return r;
01357 r = compile_tree(node->target, reg);
01358 if (r) return r;
01359 r = add_opcode(reg, OP_POP_STOP_BT);
01360 }
01361 break;
01362
01363 default:
01364 return ONIGERR_TYPE_BUG;
01365 break;
01366 }
01367
01368 return r;
01369 }
01370
01371 static int
01372 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01373 {
01374 int len;
01375 int tlen = 0;
01376
01377 if (node->target) {
01378 tlen = compile_length_tree(node->target, reg);
01379 if (tlen < 0) return tlen;
01380 }
01381
01382 switch (node->type) {
01383 case ANCHOR_PREC_READ:
01384 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01385 break;
01386 case ANCHOR_PREC_READ_NOT:
01387 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01388 break;
01389 case ANCHOR_LOOK_BEHIND:
01390 len = SIZE_OP_LOOK_BEHIND + tlen;
01391 break;
01392 case ANCHOR_LOOK_BEHIND_NOT:
01393 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01394 break;
01395
01396 default:
01397 len = SIZE_OPCODE;
01398 break;
01399 }
01400
01401 return len;
01402 }
01403
01404 static int
01405 compile_anchor_node(AnchorNode* node, regex_t* reg)
01406 {
01407 int r, len;
01408
01409 switch (node->type) {
01410 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
01411 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
01412 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
01413 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
01414 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
01415 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01416
01417 case ANCHOR_WORD_BOUND: r = add_opcode(reg, OP_WORD_BOUND); break;
01418 case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01419 #ifdef USE_WORD_BEGIN_END
01420 case ANCHOR_WORD_BEGIN: r = add_opcode(reg, OP_WORD_BEGIN); break;
01421 case ANCHOR_WORD_END: r = add_opcode(reg, OP_WORD_END); break;
01422 #endif
01423
01424 case ANCHOR_PREC_READ:
01425 r = add_opcode(reg, OP_PUSH_POS);
01426 if (r) return r;
01427 r = compile_tree(node->target, reg);
01428 if (r) return r;
01429 r = add_opcode(reg, OP_POP_POS);
01430 break;
01431
01432 case ANCHOR_PREC_READ_NOT:
01433 len = compile_length_tree(node->target, reg);
01434 if (len < 0) return len;
01435 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01436 if (r) return r;
01437 r = compile_tree(node->target, reg);
01438 if (r) return r;
01439 r = add_opcode(reg, OP_FAIL_POS);
01440 break;
01441
01442 case ANCHOR_LOOK_BEHIND:
01443 {
01444 int n;
01445 r = add_opcode(reg, OP_LOOK_BEHIND);
01446 if (r) return r;
01447 if (node->char_len < 0) {
01448 r = get_char_length_tree(node->target, reg, &n);
01449 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01450 }
01451 else
01452 n = node->char_len;
01453 r = add_length(reg, n);
01454 if (r) return r;
01455 r = compile_tree(node->target, reg);
01456 }
01457 break;
01458
01459 case ANCHOR_LOOK_BEHIND_NOT:
01460 {
01461 int n;
01462 len = compile_length_tree(node->target, reg);
01463 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01464 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01465 if (r) return r;
01466 if (node->char_len < 0) {
01467 r = get_char_length_tree(node->target, reg, &n);
01468 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01469 }
01470 else
01471 n = node->char_len;
01472 r = add_length(reg, n);
01473 if (r) return r;
01474 r = compile_tree(node->target, reg);
01475 if (r) return r;
01476 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01477 }
01478 break;
01479
01480 default:
01481 return ONIGERR_TYPE_BUG;
01482 break;
01483 }
01484
01485 return r;
01486 }
01487
01488 static int
01489 compile_length_tree(Node* node, regex_t* reg)
01490 {
01491 int len, type, r;
01492
01493 type = NTYPE(node);
01494 switch (type) {
01495 case NT_LIST:
01496 len = 0;
01497 do {
01498 r = compile_length_tree(NCAR(node), reg);
01499 if (r < 0) return r;
01500 len += r;
01501 } while (IS_NOT_NULL(node = NCDR(node)));
01502 r = len;
01503 break;
01504
01505 case NT_ALT:
01506 {
01507 int n;
01508
01509 n = r = 0;
01510 do {
01511 r += compile_length_tree(NCAR(node), reg);
01512 n++;
01513 } while (IS_NOT_NULL(node = NCDR(node)));
01514 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01515 }
01516 break;
01517
01518 case NT_STR:
01519 if (NSTRING_IS_RAW(node))
01520 r = compile_length_string_raw_node(NSTR(node), reg);
01521 else
01522 r = compile_length_string_node(node, reg);
01523 break;
01524
01525 case NT_CCLASS:
01526 r = compile_length_cclass_node(NCCLASS(node), reg);
01527 break;
01528
01529 case NT_CTYPE:
01530 case NT_CANY:
01531 r = SIZE_OPCODE;
01532 break;
01533
01534 case NT_BREF:
01535 {
01536 BRefNode* br = NBREF(node);
01537
01538 #ifdef USE_BACKREF_WITH_LEVEL
01539 if (IS_BACKREF_NEST_LEVEL(br)) {
01540 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01541 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01542 }
01543 else
01544 #endif
01545 if (br->back_num == 1) {
01546 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01547 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01548 }
01549 else {
01550 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01551 }
01552 }
01553 break;
01554
01555 #ifdef USE_SUBEXP_CALL
01556 case NT_CALL:
01557 r = SIZE_OP_CALL;
01558 break;
01559 #endif
01560
01561 case NT_QTFR:
01562 r = compile_length_quantifier_node(NQTFR(node), reg);
01563 break;
01564
01565 case NT_ENCLOSE:
01566 r = compile_length_enclose_node(NENCLOSE(node), reg);
01567 break;
01568
01569 case NT_ANCHOR:
01570 r = compile_length_anchor_node(NANCHOR(node), reg);
01571 break;
01572
01573 default:
01574 return ONIGERR_TYPE_BUG;
01575 break;
01576 }
01577
01578 return r;
01579 }
01580
01581 static int
01582 compile_tree(Node* node, regex_t* reg)
01583 {
01584 int n, type, len, pos, r = 0;
01585
01586 type = NTYPE(node);
01587 switch (type) {
01588 case NT_LIST:
01589 do {
01590 r = compile_tree(NCAR(node), reg);
01591 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01592 break;
01593
01594 case NT_ALT:
01595 {
01596 Node* x = node;
01597 len = 0;
01598 do {
01599 len += compile_length_tree(NCAR(x), reg);
01600 if (NCDR(x) != NULL) {
01601 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01602 }
01603 } while (IS_NOT_NULL(x = NCDR(x)));
01604 pos = reg->used + len;
01605
01606 do {
01607 len = compile_length_tree(NCAR(node), reg);
01608 if (IS_NOT_NULL(NCDR(node))) {
01609 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01610 if (r) break;
01611 }
01612 r = compile_tree(NCAR(node), reg);
01613 if (r) break;
01614 if (IS_NOT_NULL(NCDR(node))) {
01615 len = pos - (reg->used + SIZE_OP_JUMP);
01616 r = add_opcode_rel_addr(reg, OP_JUMP, len);
01617 if (r) break;
01618 }
01619 } while (IS_NOT_NULL(node = NCDR(node)));
01620 }
01621 break;
01622
01623 case NT_STR:
01624 if (NSTRING_IS_RAW(node))
01625 r = compile_string_raw_node(NSTR(node), reg);
01626 else
01627 r = compile_string_node(node, reg);
01628 break;
01629
01630 case NT_CCLASS:
01631 r = compile_cclass_node(NCCLASS(node), reg);
01632 break;
01633
01634 case NT_CTYPE:
01635 {
01636 int op;
01637
01638 switch (NCTYPE(node)->ctype) {
01639 case ONIGENC_CTYPE_WORD:
01640 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
01641 else op = OP_WORD;
01642 break;
01643 default:
01644 return ONIGERR_TYPE_BUG;
01645 break;
01646 }
01647 r = add_opcode(reg, op);
01648 }
01649 break;
01650
01651 case NT_CANY:
01652 if (IS_MULTILINE(reg->options))
01653 r = add_opcode(reg, OP_ANYCHAR_ML);
01654 else
01655 r = add_opcode(reg, OP_ANYCHAR);
01656 break;
01657
01658 case NT_BREF:
01659 {
01660 BRefNode* br = NBREF(node);
01661
01662 #ifdef USE_BACKREF_WITH_LEVEL
01663 if (IS_BACKREF_NEST_LEVEL(br)) {
01664 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01665 if (r) return r;
01666 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01667 if (r) return r;
01668 r = add_length(reg, br->nest_level);
01669 if (r) return r;
01670
01671 goto add_bacref_mems;
01672 }
01673 else
01674 #endif
01675 if (br->back_num == 1) {
01676 n = br->back_static[0];
01677 if (IS_IGNORECASE(reg->options)) {
01678 r = add_opcode(reg, OP_BACKREFN_IC);
01679 if (r) return r;
01680 r = add_mem_num(reg, n);
01681 }
01682 else {
01683 switch (n) {
01684 case 1: r = add_opcode(reg, OP_BACKREF1); break;
01685 case 2: r = add_opcode(reg, OP_BACKREF2); break;
01686 default:
01687 r = add_opcode(reg, OP_BACKREFN);
01688 if (r) return r;
01689 r = add_mem_num(reg, n);
01690 break;
01691 }
01692 }
01693 }
01694 else {
01695 int i;
01696 int* p;
01697
01698 if (IS_IGNORECASE(reg->options)) {
01699 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01700 }
01701 else {
01702 r = add_opcode(reg, OP_BACKREF_MULTI);
01703 }
01704 if (r) return r;
01705
01706 #ifdef USE_BACKREF_WITH_LEVEL
01707 add_bacref_mems:
01708 #endif
01709 r = add_length(reg, br->back_num);
01710 if (r) return r;
01711 p = BACKREFS_P(br);
01712 for (i = br->back_num - 1; i >= 0; i--) {
01713 r = add_mem_num(reg, p[i]);
01714 if (r) return r;
01715 }
01716 }
01717 }
01718 break;
01719
01720 #ifdef USE_SUBEXP_CALL
01721 case NT_CALL:
01722 r = compile_call(NCALL(node), reg);
01723 break;
01724 #endif
01725
01726 case NT_QTFR:
01727 r = compile_quantifier_node(NQTFR(node), reg);
01728 break;
01729
01730 case NT_ENCLOSE:
01731 r = compile_enclose_node(NENCLOSE(node), reg);
01732 break;
01733
01734 case NT_ANCHOR:
01735 r = compile_anchor_node(NANCHOR(node), reg);
01736 break;
01737
01738 default:
01739 #ifdef ONIG_DEBUG
01740 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01741 #endif
01742 break;
01743 }
01744
01745 return r;
01746 }
01747
01748 #ifdef USE_NAMED_GROUP
01749
01750 static int
01751 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01752 {
01753 int r = 0;
01754 Node* node = *plink;
01755
01756 switch (NTYPE(node)) {
01757 case NT_LIST:
01758 case NT_ALT:
01759 do {
01760 r = noname_disable_map(&(NCAR(node)), map, counter);
01761 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01762 break;
01763
01764 case NT_QTFR:
01765 {
01766 Node** ptarget = &(NQTFR(node)->target);
01767 Node* old = *ptarget;
01768 r = noname_disable_map(ptarget, map, counter);
01769 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01770 onig_reduce_nested_quantifier(node, *ptarget);
01771 }
01772 }
01773 break;
01774
01775 case NT_ENCLOSE:
01776 {
01777 EncloseNode* en = NENCLOSE(node);
01778 if (en->type == ENCLOSE_MEMORY) {
01779 if (IS_ENCLOSE_NAMED_GROUP(en)) {
01780 (*counter)++;
01781 map[en->regnum].new_val = *counter;
01782 en->regnum = *counter;
01783 r = noname_disable_map(&(en->target), map, counter);
01784 }
01785 else {
01786 *plink = en->target;
01787 en->target = NULL_NODE;
01788 onig_node_free(node);
01789 r = noname_disable_map(plink, map, counter);
01790 }
01791 }
01792 else
01793 r = noname_disable_map(&(en->target), map, counter);
01794 }
01795 break;
01796
01797 case NT_ANCHOR:
01798 {
01799 AnchorNode* an = NANCHOR(node);
01800 switch (an->type) {
01801 case ANCHOR_PREC_READ:
01802 case ANCHOR_PREC_READ_NOT:
01803 case ANCHOR_LOOK_BEHIND:
01804 case ANCHOR_LOOK_BEHIND_NOT:
01805 r = noname_disable_map(&(an->target), map, counter);
01806 break;
01807 }
01808 }
01809 break;
01810
01811 default:
01812 break;
01813 }
01814
01815 return r;
01816 }
01817
01818 static int
01819 renumber_node_backref(Node* node, GroupNumRemap* map)
01820 {
01821 int i, pos, n, old_num;
01822 int *backs;
01823 BRefNode* bn = NBREF(node);
01824
01825 if (! IS_BACKREF_NAME_REF(bn))
01826 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01827
01828 old_num = bn->back_num;
01829 if (IS_NULL(bn->back_dynamic))
01830 backs = bn->back_static;
01831 else
01832 backs = bn->back_dynamic;
01833
01834 for (i = 0, pos = 0; i < old_num; i++) {
01835 n = map[backs[i]].new_val;
01836 if (n > 0) {
01837 backs[pos] = n;
01838 pos++;
01839 }
01840 }
01841
01842 bn->back_num = pos;
01843 return 0;
01844 }
01845
01846 static int
01847 renumber_by_map(Node* node, GroupNumRemap* map)
01848 {
01849 int r = 0;
01850
01851 switch (NTYPE(node)) {
01852 case NT_LIST:
01853 case NT_ALT:
01854 do {
01855 r = renumber_by_map(NCAR(node), map);
01856 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01857 break;
01858 case NT_QTFR:
01859 r = renumber_by_map(NQTFR(node)->target, map);
01860 break;
01861 case NT_ENCLOSE:
01862 r = renumber_by_map(NENCLOSE(node)->target, map);
01863 break;
01864
01865 case NT_BREF:
01866 r = renumber_node_backref(node, map);
01867 break;
01868
01869 case NT_ANCHOR:
01870 {
01871 AnchorNode* an = NANCHOR(node);
01872 switch (an->type) {
01873 case ANCHOR_PREC_READ:
01874 case ANCHOR_PREC_READ_NOT:
01875 case ANCHOR_LOOK_BEHIND:
01876 case ANCHOR_LOOK_BEHIND_NOT:
01877 r = renumber_by_map(an->target, map);
01878 break;
01879 }
01880 }
01881 break;
01882
01883 default:
01884 break;
01885 }
01886
01887 return r;
01888 }
01889
01890 static int
01891 numbered_ref_check(Node* node)
01892 {
01893 int r = 0;
01894
01895 switch (NTYPE(node)) {
01896 case NT_LIST:
01897 case NT_ALT:
01898 do {
01899 r = numbered_ref_check(NCAR(node));
01900 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01901 break;
01902 case NT_QTFR:
01903 r = numbered_ref_check(NQTFR(node)->target);
01904 break;
01905 case NT_ENCLOSE:
01906 r = numbered_ref_check(NENCLOSE(node)->target);
01907 break;
01908
01909 case NT_BREF:
01910 if (! IS_BACKREF_NAME_REF(NBREF(node)))
01911 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01912 break;
01913
01914 default:
01915 break;
01916 }
01917
01918 return r;
01919 }
01920
01921 static int
01922 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01923 {
01924 int r, i, pos, counter;
01925 BitStatusType loc;
01926 GroupNumRemap* map;
01927
01928 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01929 CHECK_NULL_RETURN_MEMERR(map);
01930 for (i = 1; i <= env->num_mem; i++) {
01931 map[i].new_val = 0;
01932 }
01933 counter = 0;
01934 r = noname_disable_map(root, map, &counter);
01935 if (r != 0) return r;
01936
01937 r = renumber_by_map(*root, map);
01938 if (r != 0) return r;
01939
01940 for (i = 1, pos = 1; i <= env->num_mem; i++) {
01941 if (map[i].new_val > 0) {
01942 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01943 pos++;
01944 }
01945 }
01946
01947 loc = env->capture_history;
01948 BIT_STATUS_CLEAR(env->capture_history);
01949 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01950 if (BIT_STATUS_AT(loc, i)) {
01951 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01952 }
01953 }
01954
01955 env->num_mem = env->num_named;
01956 reg->num_mem = env->num_named;
01957
01958 return onig_renumber_name_table(reg, map);
01959 }
01960 #endif
01961
01962 #ifdef USE_SUBEXP_CALL
01963 static int
01964 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01965 {
01966 int i, offset;
01967 EncloseNode* en;
01968 AbsAddrType addr;
01969
01970 for (i = 0; i < uslist->num; i++) {
01971 en = NENCLOSE(uslist->us[i].target);
01972 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01973 addr = en->call_addr;
01974 offset = uslist->us[i].offset;
01975
01976 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01977 }
01978 return 0;
01979 }
01980 #endif
01981
01982 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
01983 static int
01984 quantifiers_memory_node_info(Node* node)
01985 {
01986 int r = 0;
01987
01988 switch (NTYPE(node)) {
01989 case NT_LIST:
01990 case NT_ALT:
01991 {
01992 int v;
01993 do {
01994 v = quantifiers_memory_node_info(NCAR(node));
01995 if (v > r) r = v;
01996 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
01997 }
01998 break;
01999
02000 #ifdef USE_SUBEXP_CALL
02001 case NT_CALL:
02002 if (IS_CALL_RECURSION(NCALL(node))) {
02003 return NQ_TARGET_IS_EMPTY_REC;
02004 }
02005 else
02006 r = quantifiers_memory_node_info(NCALL(node)->target);
02007 break;
02008 #endif
02009
02010 case NT_QTFR:
02011 {
02012 QtfrNode* qn = NQTFR(node);
02013 if (qn->upper != 0) {
02014 r = quantifiers_memory_node_info(qn->target);
02015 }
02016 }
02017 break;
02018
02019 case NT_ENCLOSE:
02020 {
02021 EncloseNode* en = NENCLOSE(node);
02022 switch (en->type) {
02023 case ENCLOSE_MEMORY:
02024 return NQ_TARGET_IS_EMPTY_MEM;
02025 break;
02026
02027 case ENCLOSE_OPTION:
02028 case ENCLOSE_STOP_BACKTRACK:
02029 r = quantifiers_memory_node_info(en->target);
02030 break;
02031 default:
02032 break;
02033 }
02034 }
02035 break;
02036
02037 case NT_BREF:
02038 case NT_STR:
02039 case NT_CTYPE:
02040 case NT_CCLASS:
02041 case NT_CANY:
02042 case NT_ANCHOR:
02043 default:
02044 break;
02045 }
02046
02047 return r;
02048 }
02049 #endif
02050
02051 static int
02052 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02053 {
02054 OnigDistance tmin;
02055 int r = 0;
02056
02057 *min = 0;
02058 switch (NTYPE(node)) {
02059 case NT_BREF:
02060 {
02061 int i;
02062 int* backs;
02063 Node** nodes = SCANENV_MEM_NODES(env);
02064 BRefNode* br = NBREF(node);
02065 if (br->state & NST_RECURSION) break;
02066
02067 backs = BACKREFS_P(br);
02068 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02069 r = get_min_match_length(nodes[backs[0]], min, env);
02070 if (r != 0) break;
02071 for (i = 1; i < br->back_num; i++) {
02072 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02073 r = get_min_match_length(nodes[backs[i]], &tmin, env);
02074 if (r != 0) break;
02075 if (*min > tmin) *min = tmin;
02076 }
02077 }
02078 break;
02079
02080 #ifdef USE_SUBEXP_CALL
02081 case NT_CALL:
02082 if (IS_CALL_RECURSION(NCALL(node))) {
02083 EncloseNode* en = NENCLOSE(NCALL(node)->target);
02084 if (IS_ENCLOSE_MIN_FIXED(en))
02085 *min = en->min_len;
02086 }
02087 else
02088 r = get_min_match_length(NCALL(node)->target, min, env);
02089 break;
02090 #endif
02091
02092 case NT_LIST:
02093 do {
02094 r = get_min_match_length(NCAR(node), &tmin, env);
02095 if (r == 0) *min += tmin;
02096 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02097 break;
02098
02099 case NT_ALT:
02100 {
02101 Node *x, *y;
02102 y = node;
02103 do {
02104 x = NCAR(y);
02105 r = get_min_match_length(x, &tmin, env);
02106 if (r != 0) break;
02107 if (y == node) *min = tmin;
02108 else if (*min > tmin) *min = tmin;
02109 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02110 }
02111 break;
02112
02113 case NT_STR:
02114 {
02115 StrNode* sn = NSTR(node);
02116 *min = sn->end - sn->s;
02117 }
02118 break;
02119
02120 case NT_CTYPE:
02121 *min = 1;
02122 break;
02123
02124 case NT_CCLASS:
02125 case NT_CANY:
02126 *min = 1;
02127 break;
02128
02129 case NT_QTFR:
02130 {
02131 QtfrNode* qn = NQTFR(node);
02132
02133 if (qn->lower > 0) {
02134 r = get_min_match_length(qn->target, min, env);
02135 if (r == 0)
02136 *min = distance_multiply(*min, qn->lower);
02137 }
02138 }
02139 break;
02140
02141 case NT_ENCLOSE:
02142 {
02143 EncloseNode* en = NENCLOSE(node);
02144 switch (en->type) {
02145 case ENCLOSE_MEMORY:
02146 #ifdef USE_SUBEXP_CALL
02147 if (IS_ENCLOSE_MIN_FIXED(en))
02148 *min = en->min_len;
02149 else {
02150 r = get_min_match_length(en->target, min, env);
02151 if (r == 0) {
02152 en->min_len = *min;
02153 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02154 }
02155 }
02156 break;
02157 #endif
02158 case ENCLOSE_OPTION:
02159 case ENCLOSE_STOP_BACKTRACK:
02160 r = get_min_match_length(en->target, min, env);
02161 break;
02162 }
02163 }
02164 break;
02165
02166 case NT_ANCHOR:
02167 default:
02168 break;
02169 }
02170
02171 return r;
02172 }
02173
02174 static int
02175 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02176 {
02177 OnigDistance tmax;
02178 int r = 0;
02179
02180 *max = 0;
02181 switch (NTYPE(node)) {
02182 case NT_LIST:
02183 do {
02184 r = get_max_match_length(NCAR(node), &tmax, env);
02185 if (r == 0)
02186 *max = distance_add(*max, tmax);
02187 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02188 break;
02189
02190 case NT_ALT:
02191 do {
02192 r = get_max_match_length(NCAR(node), &tmax, env);
02193 if (r == 0 && *max < tmax) *max = tmax;
02194 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02195 break;
02196
02197 case NT_STR:
02198 {
02199 StrNode* sn = NSTR(node);
02200 *max = sn->end - sn->s;
02201 }
02202 break;
02203
02204 case NT_CTYPE:
02205 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02206 break;
02207
02208 case NT_CCLASS:
02209 case NT_CANY:
02210 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02211 break;
02212
02213 case NT_BREF:
02214 {
02215 int i;
02216 int* backs;
02217 Node** nodes = SCANENV_MEM_NODES(env);
02218 BRefNode* br = NBREF(node);
02219 if (br->state & NST_RECURSION) {
02220 *max = ONIG_INFINITE_DISTANCE;
02221 break;
02222 }
02223 backs = BACKREFS_P(br);
02224 for (i = 0; i < br->back_num; i++) {
02225 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
02226 r = get_max_match_length(nodes[backs[i]], &tmax, env);
02227 if (r != 0) break;
02228 if (*max < tmax) *max = tmax;
02229 }
02230 }
02231 break;
02232
02233 #ifdef USE_SUBEXP_CALL
02234 case NT_CALL:
02235 if (! IS_CALL_RECURSION(NCALL(node)))
02236 r = get_max_match_length(NCALL(node)->target, max, env);
02237 else
02238 *max = ONIG_INFINITE_DISTANCE;
02239 break;
02240 #endif
02241
02242 case NT_QTFR:
02243 {
02244 QtfrNode* qn = NQTFR(node);
02245
02246 if (qn->upper != 0) {
02247 r = get_max_match_length(qn->target, max, env);
02248 if (r == 0 && *max != 0) {
02249 if (! IS_REPEAT_INFINITE(qn->upper))
02250 *max = distance_multiply(*max, qn->upper);
02251 else
02252 *max = ONIG_INFINITE_DISTANCE;
02253 }
02254 }
02255 }
02256 break;
02257
02258 case NT_ENCLOSE:
02259 {
02260 EncloseNode* en = NENCLOSE(node);
02261 switch (en->type) {
02262 case ENCLOSE_MEMORY:
02263 #ifdef USE_SUBEXP_CALL
02264 if (IS_ENCLOSE_MAX_FIXED(en))
02265 *max = en->max_len;
02266 else {
02267 r = get_max_match_length(en->target, max, env);
02268 if (r == 0) {
02269 en->max_len = *max;
02270 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02271 }
02272 }
02273 break;
02274 #endif
02275 case ENCLOSE_OPTION:
02276 case ENCLOSE_STOP_BACKTRACK:
02277 r = get_max_match_length(en->target, max, env);
02278 break;
02279 }
02280 }
02281 break;
02282
02283 case NT_ANCHOR:
02284 default:
02285 break;
02286 }
02287
02288 return r;
02289 }
02290
02291 #define GET_CHAR_LEN_VARLEN -1
02292 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
02293
02294
02295 static int
02296 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02297 {
02298 int tlen;
02299 int r = 0;
02300
02301 level++;
02302 *len = 0;
02303 switch (NTYPE(node)) {
02304 case NT_LIST:
02305 do {
02306 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02307 if (r == 0)
02308 *len = distance_add(*len, tlen);
02309 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02310 break;
02311
02312 case NT_ALT:
02313 {
02314 int tlen2;
02315 int varlen = 0;
02316
02317 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02318 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02319 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02320 if (r == 0) {
02321 if (tlen != tlen2)
02322 varlen = 1;
02323 }
02324 }
02325 if (r == 0) {
02326 if (varlen != 0) {
02327 if (level == 1)
02328 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02329 else
02330 r = GET_CHAR_LEN_VARLEN;
02331 }
02332 else
02333 *len = tlen;
02334 }
02335 }
02336 break;
02337
02338 case NT_STR:
02339 {
02340 StrNode* sn = NSTR(node);
02341 UChar *s = sn->s;
02342 while (s < sn->end) {
02343 s += enclen(reg->enc, s, sn->end);
02344 (*len)++;
02345 }
02346 }
02347 break;
02348
02349 case NT_QTFR:
02350 {
02351 QtfrNode* qn = NQTFR(node);
02352 if (qn->lower == qn->upper) {
02353 r = get_char_length_tree1(qn->target, reg, &tlen, level);
02354 if (r == 0)
02355 *len = distance_multiply(tlen, qn->lower);
02356 }
02357 else
02358 r = GET_CHAR_LEN_VARLEN;
02359 }
02360 break;
02361
02362 #ifdef USE_SUBEXP_CALL
02363 case NT_CALL:
02364 if (! IS_CALL_RECURSION(NCALL(node)))
02365 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02366 else
02367 r = GET_CHAR_LEN_VARLEN;
02368 break;
02369 #endif
02370
02371 case NT_CTYPE:
02372 *len = 1;
02373 break;
02374
02375 case NT_CCLASS:
02376 case NT_CANY:
02377 *len = 1;
02378 break;
02379
02380 case NT_ENCLOSE:
02381 {
02382 EncloseNode* en = NENCLOSE(node);
02383 switch (en->type) {
02384 case ENCLOSE_MEMORY:
02385 #ifdef USE_SUBEXP_CALL
02386 if (IS_ENCLOSE_CLEN_FIXED(en))
02387 *len = en->char_len;
02388 else {
02389 r = get_char_length_tree1(en->target, reg, len, level);
02390 if (r == 0) {
02391 en->char_len = *len;
02392 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02393 }
02394 }
02395 break;
02396 #endif
02397 case ENCLOSE_OPTION:
02398 case ENCLOSE_STOP_BACKTRACK:
02399 r = get_char_length_tree1(en->target, reg, len, level);
02400 break;
02401 default:
02402 break;
02403 }
02404 }
02405 break;
02406
02407 case NT_ANCHOR:
02408 break;
02409
02410 default:
02411 r = GET_CHAR_LEN_VARLEN;
02412 break;
02413 }
02414
02415 return r;
02416 }
02417
02418 static int
02419 get_char_length_tree(Node* node, regex_t* reg, int* len)
02420 {
02421 return get_char_length_tree1(node, reg, len, 0);
02422 }
02423
02424
02425 static int
02426 is_not_included(Node* x, Node* y, regex_t* reg)
02427 {
02428 int i, len;
02429 OnigCodePoint code;
02430 UChar *p, c;
02431 int ytype;
02432
02433 retry:
02434 ytype = NTYPE(y);
02435 switch (NTYPE(x)) {
02436 case NT_CTYPE:
02437 {
02438 switch (ytype) {
02439 case NT_CTYPE:
02440 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02441 NCTYPE(y)->not != NCTYPE(x)->not)
02442 return 1;
02443 else
02444 return 0;
02445 break;
02446
02447 case NT_CCLASS:
02448 swap:
02449 {
02450 Node* tmp;
02451 tmp = x; x = y; y = tmp;
02452 goto retry;
02453 }
02454 break;
02455
02456 case NT_STR:
02457 goto swap;
02458 break;
02459
02460 default:
02461 break;
02462 }
02463 }
02464 break;
02465
02466 case NT_CCLASS:
02467 {
02468 CClassNode* xc = NCCLASS(x);
02469 switch (ytype) {
02470 case NT_CTYPE:
02471 switch (NCTYPE(y)->ctype) {
02472 case ONIGENC_CTYPE_WORD:
02473 if (NCTYPE(y)->not == 0) {
02474 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02475 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02476 if (BITSET_AT(xc->bs, i)) {
02477 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02478 }
02479 }
02480 return 1;
02481 }
02482 return 0;
02483 }
02484 else {
02485 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02486 if (! IS_CODE_SB_WORD(reg->enc, i)) {
02487 if (!IS_NCCLASS_NOT(xc)) {
02488 if (BITSET_AT(xc->bs, i))
02489 return 0;
02490 }
02491 else {
02492 if (! BITSET_AT(xc->bs, i))
02493 return 0;
02494 }
02495 }
02496 }
02497 return 1;
02498 }
02499 break;
02500
02501 default:
02502 break;
02503 }
02504 break;
02505
02506 case NT_CCLASS:
02507 {
02508 int v;
02509 CClassNode* yc = NCCLASS(y);
02510
02511 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02512 v = BITSET_AT(xc->bs, i);
02513 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02514 (v == 0 && IS_NCCLASS_NOT(xc))) {
02515 v = BITSET_AT(yc->bs, i);
02516 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02517 (v == 0 && IS_NCCLASS_NOT(yc)))
02518 return 0;
02519 }
02520 }
02521 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02522 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02523 return 1;
02524 return 0;
02525 }
02526 break;
02527
02528 case NT_STR:
02529 goto swap;
02530 break;
02531
02532 default:
02533 break;
02534 }
02535 }
02536 break;
02537
02538 case NT_STR:
02539 {
02540 StrNode* xs = NSTR(x);
02541 if (NSTRING_LEN(x) == 0)
02542 break;
02543
02544 c = *(xs->s);
02545 switch (ytype) {
02546 case NT_CTYPE:
02547 switch (NCTYPE(y)->ctype) {
02548 case ONIGENC_CTYPE_WORD:
02549 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02550 return NCTYPE(y)->not;
02551 else
02552 return !(NCTYPE(y)->not);
02553 break;
02554 default:
02555 break;
02556 }
02557 break;
02558
02559 case NT_CCLASS:
02560 {
02561 CClassNode* cc = NCCLASS(y);
02562
02563 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02564 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02565 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02566 }
02567 break;
02568
02569 case NT_STR:
02570 {
02571 UChar *q;
02572 StrNode* ys = NSTR(y);
02573 len = NSTRING_LEN(x);
02574 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02575 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02576
02577 return 0;
02578 }
02579 else {
02580 for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
02581 if (*p != *q) return 1;
02582 }
02583 }
02584 }
02585 break;
02586
02587 default:
02588 break;
02589 }
02590 }
02591 break;
02592
02593 default:
02594 break;
02595 }
02596
02597 return 0;
02598 }
02599
02600 static Node*
02601 get_head_value_node(Node* node, int exact, regex_t* reg)
02602 {
02603 Node* n = NULL_NODE;
02604
02605 switch (NTYPE(node)) {
02606 case NT_BREF:
02607 case NT_ALT:
02608 case NT_CANY:
02609 #ifdef USE_SUBEXP_CALL
02610 case NT_CALL:
02611 #endif
02612 break;
02613
02614 case NT_CTYPE:
02615 case NT_CCLASS:
02616 if (exact == 0) {
02617 n = node;
02618 }
02619 break;
02620
02621 case NT_LIST:
02622 n = get_head_value_node(NCAR(node), exact, reg);
02623 break;
02624
02625 case NT_STR:
02626 {
02627 StrNode* sn = NSTR(node);
02628
02629 if (sn->end <= sn->s)
02630 break;
02631
02632 if (exact != 0 &&
02633 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02634 }
02635 else {
02636 n = node;
02637 }
02638 }
02639 break;
02640
02641 case NT_QTFR:
02642 {
02643 QtfrNode* qn = NQTFR(node);
02644 if (qn->lower > 0) {
02645 if (IS_NOT_NULL(qn->head_exact))
02646 n = qn->head_exact;
02647 else
02648 n = get_head_value_node(qn->target, exact, reg);
02649 }
02650 }
02651 break;
02652
02653 case NT_ENCLOSE:
02654 {
02655 EncloseNode* en = NENCLOSE(node);
02656 switch (en->type) {
02657 case ENCLOSE_OPTION:
02658 {
02659 OnigOptionType options = reg->options;
02660
02661 reg->options = NENCLOSE(node)->option;
02662 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02663 reg->options = options;
02664 }
02665 break;
02666
02667 case ENCLOSE_MEMORY:
02668 case ENCLOSE_STOP_BACKTRACK:
02669 n = get_head_value_node(en->target, exact, reg);
02670 break;
02671 }
02672 }
02673 break;
02674
02675 case NT_ANCHOR:
02676 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02677 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02678 break;
02679
02680 default:
02681 break;
02682 }
02683
02684 return n;
02685 }
02686
02687 static int
02688 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02689 {
02690 int type, r = 0;
02691
02692 type = NTYPE(node);
02693 if ((NTYPE2BIT(type) & type_mask) == 0)
02694 return 1;
02695
02696 switch (type) {
02697 case NT_LIST:
02698 case NT_ALT:
02699 do {
02700 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02701 anchor_mask);
02702 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02703 break;
02704
02705 case NT_QTFR:
02706 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02707 anchor_mask);
02708 break;
02709
02710 case NT_ENCLOSE:
02711 {
02712 EncloseNode* en = NENCLOSE(node);
02713 if ((en->type & enclose_mask) == 0)
02714 return 1;
02715
02716 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02717 }
02718 break;
02719
02720 case NT_ANCHOR:
02721 type = NANCHOR(node)->type;
02722 if ((type & anchor_mask) == 0)
02723 return 1;
02724
02725 if (NANCHOR(node)->target)
02726 r = check_type_tree(NANCHOR(node)->target,
02727 type_mask, enclose_mask, anchor_mask);
02728 break;
02729
02730 default:
02731 break;
02732 }
02733 return r;
02734 }
02735
02736 #ifdef USE_SUBEXP_CALL
02737
02738 #define RECURSION_EXIST 1
02739 #define RECURSION_INFINITE 2
02740
02741 static int
02742 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02743 {
02744 int type;
02745 int r = 0;
02746
02747 type = NTYPE(node);
02748 switch (type) {
02749 case NT_LIST:
02750 {
02751 Node *x;
02752 OnigDistance min;
02753 int ret;
02754
02755 x = node;
02756 do {
02757 ret = subexp_inf_recursive_check(NCAR(x), env, head);
02758 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02759 r |= ret;
02760 if (head) {
02761 ret = get_min_match_length(NCAR(x), &min, env);
02762 if (ret != 0) return ret;
02763 if (min != 0) head = 0;
02764 }
02765 } while (IS_NOT_NULL(x = NCDR(x)));
02766 }
02767 break;
02768
02769 case NT_ALT:
02770 {
02771 int ret;
02772 r = RECURSION_EXIST;
02773 do {
02774 ret = subexp_inf_recursive_check(NCAR(node), env, head);
02775 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02776 r &= ret;
02777 } while (IS_NOT_NULL(node = NCDR(node)));
02778 }
02779 break;
02780
02781 case NT_QTFR:
02782 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02783 if (r == RECURSION_EXIST) {
02784 if (NQTFR(node)->lower == 0) r = 0;
02785 }
02786 break;
02787
02788 case NT_ANCHOR:
02789 {
02790 AnchorNode* an = NANCHOR(node);
02791 switch (an->type) {
02792 case ANCHOR_PREC_READ:
02793 case ANCHOR_PREC_READ_NOT:
02794 case ANCHOR_LOOK_BEHIND:
02795 case ANCHOR_LOOK_BEHIND_NOT:
02796 r = subexp_inf_recursive_check(an->target, env, head);
02797 break;
02798 }
02799 }
02800 break;
02801
02802 case NT_CALL:
02803 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02804 break;
02805
02806 case NT_ENCLOSE:
02807 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02808 return 0;
02809 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02810 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02811 else {
02812 SET_ENCLOSE_STATUS(node, NST_MARK2);
02813 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02814 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02815 }
02816 break;
02817
02818 default:
02819 break;
02820 }
02821
02822 return r;
02823 }
02824
02825 static int
02826 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02827 {
02828 int type;
02829 int r = 0;
02830
02831 type = NTYPE(node);
02832 switch (type) {
02833 case NT_LIST:
02834 case NT_ALT:
02835 do {
02836 r = subexp_inf_recursive_check_trav(NCAR(node), env);
02837 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02838 break;
02839
02840 case NT_QTFR:
02841 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02842 break;
02843
02844 case NT_ANCHOR:
02845 {
02846 AnchorNode* an = NANCHOR(node);
02847 switch (an->type) {
02848 case ANCHOR_PREC_READ:
02849 case ANCHOR_PREC_READ_NOT:
02850 case ANCHOR_LOOK_BEHIND:
02851 case ANCHOR_LOOK_BEHIND_NOT:
02852 r = subexp_inf_recursive_check_trav(an->target, env);
02853 break;
02854 }
02855 }
02856 break;
02857
02858 case NT_ENCLOSE:
02859 {
02860 EncloseNode* en = NENCLOSE(node);
02861
02862 if (IS_ENCLOSE_RECURSION(en)) {
02863 SET_ENCLOSE_STATUS(node, NST_MARK1);
02864 r = subexp_inf_recursive_check(en->target, env, 1);
02865 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02866 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02867 }
02868 r = subexp_inf_recursive_check_trav(en->target, env);
02869 }
02870
02871 break;
02872
02873 default:
02874 break;
02875 }
02876
02877 return r;
02878 }
02879
02880 static int
02881 subexp_recursive_check(Node* node)
02882 {
02883 int r = 0;
02884
02885 switch (NTYPE(node)) {
02886 case NT_LIST:
02887 case NT_ALT:
02888 do {
02889 r |= subexp_recursive_check(NCAR(node));
02890 } while (IS_NOT_NULL(node = NCDR(node)));
02891 break;
02892
02893 case NT_QTFR:
02894 r = subexp_recursive_check(NQTFR(node)->target);
02895 break;
02896
02897 case NT_ANCHOR:
02898 {
02899 AnchorNode* an = NANCHOR(node);
02900 switch (an->type) {
02901 case ANCHOR_PREC_READ:
02902 case ANCHOR_PREC_READ_NOT:
02903 case ANCHOR_LOOK_BEHIND:
02904 case ANCHOR_LOOK_BEHIND_NOT:
02905 r = subexp_recursive_check(an->target);
02906 break;
02907 }
02908 }
02909 break;
02910
02911 case NT_CALL:
02912 r = subexp_recursive_check(NCALL(node)->target);
02913 if (r != 0) SET_CALL_RECURSION(node);
02914 break;
02915
02916 case NT_ENCLOSE:
02917 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02918 return 0;
02919 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02920 return 1;
02921 else {
02922 SET_ENCLOSE_STATUS(node, NST_MARK2);
02923 r = subexp_recursive_check(NENCLOSE(node)->target);
02924 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02925 }
02926 break;
02927
02928 default:
02929 break;
02930 }
02931
02932 return r;
02933 }
02934
02935
02936 static int
02937 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02938 {
02939 #define FOUND_CALLED_NODE 1
02940
02941 int type;
02942 int r = 0;
02943
02944 type = NTYPE(node);
02945 switch (type) {
02946 case NT_LIST:
02947 case NT_ALT:
02948 {
02949 int ret;
02950 do {
02951 ret = subexp_recursive_check_trav(NCAR(node), env);
02952 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02953 else if (ret < 0) return ret;
02954 } while (IS_NOT_NULL(node = NCDR(node)));
02955 }
02956 break;
02957
02958 case NT_QTFR:
02959 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
02960 if (NQTFR(node)->upper == 0) {
02961 if (r == FOUND_CALLED_NODE)
02962 NQTFR(node)->is_refered = 1;
02963 }
02964 break;
02965
02966 case NT_ANCHOR:
02967 {
02968 AnchorNode* an = NANCHOR(node);
02969 switch (an->type) {
02970 case ANCHOR_PREC_READ:
02971 case ANCHOR_PREC_READ_NOT:
02972 case ANCHOR_LOOK_BEHIND:
02973 case ANCHOR_LOOK_BEHIND_NOT:
02974 r = subexp_recursive_check_trav(an->target, env);
02975 break;
02976 }
02977 }
02978 break;
02979
02980 case NT_ENCLOSE:
02981 {
02982 EncloseNode* en = NENCLOSE(node);
02983
02984 if (! IS_ENCLOSE_RECURSION(en)) {
02985 if (IS_ENCLOSE_CALLED(en)) {
02986 SET_ENCLOSE_STATUS(node, NST_MARK1);
02987 r = subexp_recursive_check(en->target);
02988 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
02989 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02990 }
02991 }
02992 r = subexp_recursive_check_trav(en->target, env);
02993 if (IS_ENCLOSE_CALLED(en))
02994 r |= FOUND_CALLED_NODE;
02995 }
02996 break;
02997
02998 default:
02999 break;
03000 }
03001
03002 return r;
03003 }
03004
03005 static int
03006 setup_subexp_call(Node* node, ScanEnv* env)
03007 {
03008 int type;
03009 int r = 0;
03010
03011 type = NTYPE(node);
03012 switch (type) {
03013 case NT_LIST:
03014 do {
03015 r = setup_subexp_call(NCAR(node), env);
03016 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03017 break;
03018
03019 case NT_ALT:
03020 do {
03021 r = setup_subexp_call(NCAR(node), env);
03022 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03023 break;
03024
03025 case NT_QTFR:
03026 r = setup_subexp_call(NQTFR(node)->target, env);
03027 break;
03028 case NT_ENCLOSE:
03029 r = setup_subexp_call(NENCLOSE(node)->target, env);
03030 break;
03031
03032 case NT_CALL:
03033 {
03034 CallNode* cn = NCALL(node);
03035 Node** nodes = SCANENV_MEM_NODES(env);
03036
03037 if (cn->group_num != 0) {
03038 int gnum = cn->group_num;
03039
03040 #ifdef USE_NAMED_GROUP
03041 if (env->num_named > 0 &&
03042 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03043 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03044 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03045 }
03046 #endif
03047 if (gnum > env->num_mem) {
03048 onig_scan_env_set_error_string(env,
03049 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03050 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03051 }
03052
03053 #ifdef USE_NAMED_GROUP
03054 set_call_attr:
03055 #endif
03056 cn->target = nodes[cn->group_num];
03057 if (IS_NULL(cn->target)) {
03058 onig_scan_env_set_error_string(env,
03059 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03060 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03061 }
03062 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03063 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03064 cn->unset_addr_list = env->unset_addr_list;
03065 }
03066 #ifdef USE_NAMED_GROUP
03067 else {
03068 int *refs;
03069
03070 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03071 &refs);
03072 if (n <= 0) {
03073 onig_scan_env_set_error_string(env,
03074 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03075 return ONIGERR_UNDEFINED_NAME_REFERENCE;
03076 }
03077 else if (n > 1) {
03078 onig_scan_env_set_error_string(env,
03079 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03080 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03081 }
03082 else {
03083 cn->group_num = refs[0];
03084 goto set_call_attr;
03085 }
03086 }
03087 #endif
03088 }
03089 break;
03090
03091 case NT_ANCHOR:
03092 {
03093 AnchorNode* an = NANCHOR(node);
03094
03095 switch (an->type) {
03096 case ANCHOR_PREC_READ:
03097 case ANCHOR_PREC_READ_NOT:
03098 case ANCHOR_LOOK_BEHIND:
03099 case ANCHOR_LOOK_BEHIND_NOT:
03100 r = setup_subexp_call(an->target, env);
03101 break;
03102 }
03103 }
03104 break;
03105
03106 default:
03107 break;
03108 }
03109
03110 return r;
03111 }
03112 #endif
03113
03114
03115
03116
03117
03118 static int
03119 divide_look_behind_alternatives(Node* node)
03120 {
03121 Node *head, *np, *insert_node;
03122 AnchorNode* an = NANCHOR(node);
03123 int anc_type = an->type;
03124
03125 head = an->target;
03126 np = NCAR(head);
03127 swap_node(node, head);
03128 NCAR(node) = head;
03129 NANCHOR(head)->target = np;
03130
03131 np = node;
03132 while ((np = NCDR(np)) != NULL_NODE) {
03133 insert_node = onig_node_new_anchor(anc_type);
03134 CHECK_NULL_RETURN_MEMERR(insert_node);
03135 NANCHOR(insert_node)->target = NCAR(np);
03136 NCAR(np) = insert_node;
03137 }
03138
03139 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03140 np = node;
03141 do {
03142 SET_NTYPE(np, NT_LIST);
03143 } while ((np = NCDR(np)) != NULL_NODE);
03144 }
03145 return 0;
03146 }
03147
03148 static int
03149 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03150 {
03151 int r, len;
03152 AnchorNode* an = NANCHOR(node);
03153
03154 r = get_char_length_tree(an->target, reg, &len);
03155 if (r == 0)
03156 an->char_len = len;
03157 else if (r == GET_CHAR_LEN_VARLEN)
03158 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03159 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03160 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03161 r = divide_look_behind_alternatives(node);
03162 else
03163 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03164 }
03165
03166 return r;
03167 }
03168
03169 static int
03170 next_setup(Node* node, Node* next_node, regex_t* reg)
03171 {
03172 int type;
03173
03174 retry:
03175 type = NTYPE(node);
03176 if (type == NT_QTFR) {
03177 QtfrNode* qn = NQTFR(node);
03178 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03179 #ifdef USE_QTFR_PEEK_NEXT
03180 Node* n = get_head_value_node(next_node, 1, reg);
03181
03182 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03183 qn->next_head_exact = n;
03184 }
03185 #endif
03186
03187 if (qn->lower <= 1) {
03188 int ttype = NTYPE(qn->target);
03189 if (IS_NODE_TYPE_SIMPLE(ttype)) {
03190 Node *x, *y;
03191 x = get_head_value_node(qn->target, 0, reg);
03192 if (IS_NOT_NULL(x)) {
03193 y = get_head_value_node(next_node, 0, reg);
03194 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03195 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03196 CHECK_NULL_RETURN_MEMERR(en);
03197 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03198 swap_node(node, en);
03199 NENCLOSE(node)->target = en;
03200 }
03201 }
03202 }
03203 }
03204 }
03205 }
03206 else if (type == NT_ENCLOSE) {
03207 EncloseNode* en = NENCLOSE(node);
03208 if (en->type == ENCLOSE_MEMORY) {
03209 node = en->target;
03210 goto retry;
03211 }
03212 }
03213 return 0;
03214 }
03215
03216
03217 static int
03218 update_string_node_case_fold(regex_t* reg, Node *node)
03219 {
03220 UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03221 UChar *sbuf, *ebuf, *sp;
03222 int r, i, len, sbuf_size;
03223 StrNode* sn = NSTR(node);
03224
03225 end = sn->end;
03226 sbuf_size = (end - sn->s) * 2;
03227 sbuf = (UChar* )xmalloc(sbuf_size);
03228 CHECK_NULL_RETURN_MEMERR(sbuf);
03229 ebuf = sbuf + sbuf_size;
03230
03231 sp = sbuf;
03232 p = sn->s;
03233 while (p < end) {
03234 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03235 q = buf;
03236 for (i = 0; i < len; i++) {
03237 if (sp >= ebuf) {
03238 sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03239 CHECK_NULL_RETURN_MEMERR(sbuf);
03240 sp = sbuf + sbuf_size;
03241 sbuf_size *= 2;
03242 ebuf = sbuf + sbuf_size;
03243 }
03244
03245 *sp++ = buf[i];
03246 }
03247 }
03248
03249 r = onig_node_str_set(node, sbuf, sp);
03250 if (r != 0) {
03251 xfree(sbuf);
03252 return r;
03253 }
03254
03255 xfree(sbuf);
03256 return 0;
03257 }
03258
03259 static int
03260 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03261 regex_t* reg)
03262 {
03263 int r;
03264 Node *node;
03265
03266 node = onig_node_new_str(s, end);
03267 if (IS_NULL(node)) return ONIGERR_MEMORY;
03268
03269 r = update_string_node_case_fold(reg, node);
03270 if (r != 0) {
03271 onig_node_free(node);
03272 return r;
03273 }
03274
03275 NSTRING_SET_AMBIG(node);
03276 NSTRING_SET_DONT_GET_OPT_INFO(node);
03277 *rnode = node;
03278 return 0;
03279 }
03280
03281 static int
03282 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03283 UChar *p, int slen, UChar *end,
03284 regex_t* reg, Node **rnode)
03285 {
03286 int r, i, j, len, varlen;
03287 Node *anode, *var_anode, *snode, *xnode, *an;
03288 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03289
03290 *rnode = var_anode = NULL_NODE;
03291
03292 varlen = 0;
03293 for (i = 0; i < item_num; i++) {
03294 if (items[i].byte_len != slen) {
03295 varlen = 1;
03296 break;
03297 }
03298 }
03299
03300 if (varlen != 0) {
03301 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03302 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03303
03304 xnode = onig_node_new_list(NULL, NULL);
03305 if (IS_NULL(xnode)) goto mem_err;
03306 NCAR(var_anode) = xnode;
03307
03308 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03309 if (IS_NULL(anode)) goto mem_err;
03310 NCAR(xnode) = anode;
03311 }
03312 else {
03313 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03314 if (IS_NULL(anode)) return ONIGERR_MEMORY;
03315 }
03316
03317 snode = onig_node_new_str(p, p + slen);
03318 if (IS_NULL(snode)) goto mem_err;
03319
03320 NCAR(anode) = snode;
03321
03322 for (i = 0; i < item_num; i++) {
03323 snode = onig_node_new_str(NULL, NULL);
03324 if (IS_NULL(snode)) goto mem_err;
03325
03326 for (j = 0; j < items[i].code_len; j++) {
03327 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03328 if (len < 0) {
03329 r = len;
03330 goto mem_err2;
03331 }
03332
03333 r = onig_node_str_cat(snode, buf, buf + len);
03334 if (r != 0) goto mem_err2;
03335 }
03336
03337 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03338 if (IS_NULL(an)) {
03339 goto mem_err2;
03340 }
03341
03342 if (items[i].byte_len != slen) {
03343 Node *rem;
03344 UChar *q = p + items[i].byte_len;
03345
03346 if (q < end) {
03347 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03348 if (r != 0) {
03349 onig_node_free(an);
03350 goto mem_err2;
03351 }
03352
03353 xnode = onig_node_list_add(NULL_NODE, snode);
03354 if (IS_NULL(xnode)) {
03355 onig_node_free(an);
03356 onig_node_free(rem);
03357 goto mem_err2;
03358 }
03359 if (IS_NULL(onig_node_list_add(xnode, rem))) {
03360 onig_node_free(an);
03361 onig_node_free(xnode);
03362 onig_node_free(rem);
03363 goto mem_err;
03364 }
03365
03366 NCAR(an) = xnode;
03367 }
03368 else {
03369 NCAR(an) = snode;
03370 }
03371
03372 NCDR(var_anode) = an;
03373 var_anode = an;
03374 }
03375 else {
03376 NCAR(an) = snode;
03377 NCDR(anode) = an;
03378 anode = an;
03379 }
03380 }
03381
03382 return varlen;
03383
03384 mem_err2:
03385 onig_node_free(snode);
03386
03387 mem_err:
03388 onig_node_free(*rnode);
03389
03390 return ONIGERR_MEMORY;
03391 }
03392
03393 static int
03394 expand_case_fold_string(Node* node, regex_t* reg)
03395 {
03396 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
03397
03398 int r, n, len, alt_num;
03399 UChar *start, *end, *p;
03400 Node *top_root, *root, *snode, *prev_node;
03401 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03402 StrNode* sn = NSTR(node);
03403
03404 if (NSTRING_IS_AMBIG(node)) return 0;
03405
03406 start = sn->s;
03407 end = sn->end;
03408 if (start >= end) return 0;
03409
03410 r = 0;
03411 top_root = root = prev_node = snode = NULL_NODE;
03412 alt_num = 1;
03413 p = start;
03414 while (p < end) {
03415 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03416 p, end, items);
03417 if (n < 0) {
03418 r = n;
03419 goto err;
03420 }
03421
03422 len = enclen(reg->enc, p, end);
03423
03424 if (n == 0) {
03425 if (IS_NULL(snode)) {
03426 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03427 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03428 if (IS_NULL(root)) {
03429 onig_node_free(prev_node);
03430 goto mem_err;
03431 }
03432 }
03433
03434 prev_node = snode = onig_node_new_str(NULL, NULL);
03435 if (IS_NULL(snode)) goto mem_err;
03436 if (IS_NOT_NULL(root)) {
03437 if (IS_NULL(onig_node_list_add(root, snode))) {
03438 onig_node_free(snode);
03439 goto mem_err;
03440 }
03441 }
03442 }
03443
03444 r = onig_node_str_cat(snode, p, p + len);
03445 if (r != 0) goto err;
03446 }
03447 else {
03448 alt_num *= (n + 1);
03449 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03450
03451 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03452 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03453 if (IS_NULL(root)) {
03454 onig_node_free(prev_node);
03455 goto mem_err;
03456 }
03457 }
03458
03459 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03460 if (r < 0) goto mem_err;
03461 if (r == 1) {
03462 if (IS_NULL(root)) {
03463 top_root = prev_node;
03464 }
03465 else {
03466 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03467 onig_node_free(prev_node);
03468 goto mem_err;
03469 }
03470 }
03471
03472 root = NCAR(prev_node);
03473 }
03474 else {
03475 if (IS_NOT_NULL(root)) {
03476 if (IS_NULL(onig_node_list_add(root, prev_node))) {
03477 onig_node_free(prev_node);
03478 goto mem_err;
03479 }
03480 }
03481 }
03482
03483 snode = NULL_NODE;
03484 }
03485
03486 p += len;
03487 }
03488
03489 if (p < end) {
03490 Node *srem;
03491
03492 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03493 if (r != 0) goto mem_err;
03494
03495 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03496 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03497 if (IS_NULL(root)) {
03498 onig_node_free(srem);
03499 onig_node_free(prev_node);
03500 goto mem_err;
03501 }
03502 }
03503
03504 if (IS_NULL(root)) {
03505 prev_node = srem;
03506 }
03507 else {
03508 if (IS_NULL(onig_node_list_add(root, srem))) {
03509 onig_node_free(srem);
03510 goto mem_err;
03511 }
03512 }
03513 }
03514
03515
03516 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03517 swap_node(node, top_root);
03518 onig_node_free(top_root);
03519 return 0;
03520
03521 mem_err:
03522 r = ONIGERR_MEMORY;
03523
03524 err:
03525 onig_node_free(top_root);
03526 return r;
03527 }
03528
03529
03530 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03531
03532 #define CEC_THRES_NUM_BIG_REPEAT 512
03533 #define CEC_INFINITE_NUM 0x7fffffff
03534
03535 #define CEC_IN_INFINITE_REPEAT (1<<0)
03536 #define CEC_IN_FINITE_REPEAT (1<<1)
03537 #define CEC_CONT_BIG_REPEAT (1<<2)
03538
03539 static int
03540 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03541 {
03542 int type;
03543 int r = state;
03544
03545 type = NTYPE(node);
03546 switch (type) {
03547 case NT_LIST:
03548 {
03549 Node* prev = NULL_NODE;
03550 do {
03551 r = setup_comb_exp_check(NCAR(node), r, env);
03552 prev = NCAR(node);
03553 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03554 }
03555 break;
03556
03557 case NT_ALT:
03558 {
03559 int ret;
03560 do {
03561 ret = setup_comb_exp_check(NCAR(node), state, env);
03562 r |= ret;
03563 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03564 }
03565 break;
03566
03567 case NT_QTFR:
03568 {
03569 int child_state = state;
03570 int add_state = 0;
03571 QtfrNode* qn = NQTFR(node);
03572 Node* target = qn->target;
03573 int var_num;
03574
03575 if (! IS_REPEAT_INFINITE(qn->upper)) {
03576 if (qn->upper > 1) {
03577
03578 child_state |= CEC_IN_FINITE_REPEAT;
03579
03580
03581 if (env->backrefed_mem == 0) {
03582 if (NTYPE(qn->target) == NT_ENCLOSE) {
03583 EncloseNode* en = NENCLOSE(qn->target);
03584 if (en->type == ENCLOSE_MEMORY) {
03585 if (NTYPE(en->target) == NT_QTFR) {
03586 QtfrNode* q = NQTFR(en->target);
03587 if (IS_REPEAT_INFINITE(q->upper)
03588 && q->greedy == qn->greedy) {
03589 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03590 if (qn->upper == 1)
03591 child_state = state;
03592 }
03593 }
03594 }
03595 }
03596 }
03597 }
03598 }
03599
03600 if (state & CEC_IN_FINITE_REPEAT) {
03601 qn->comb_exp_check_num = -1;
03602 }
03603 else {
03604 if (IS_REPEAT_INFINITE(qn->upper)) {
03605 var_num = CEC_INFINITE_NUM;
03606 child_state |= CEC_IN_INFINITE_REPEAT;
03607 }
03608 else {
03609 var_num = qn->upper - qn->lower;
03610 }
03611
03612 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03613 add_state |= CEC_CONT_BIG_REPEAT;
03614
03615 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03616 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03617 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03618 if (qn->comb_exp_check_num == 0) {
03619 env->num_comb_exp_check++;
03620 qn->comb_exp_check_num = env->num_comb_exp_check;
03621 if (env->curr_max_regnum > env->comb_exp_max_regnum)
03622 env->comb_exp_max_regnum = env->curr_max_regnum;
03623 }
03624 }
03625 }
03626
03627 r = setup_comb_exp_check(target, child_state, env);
03628 r |= add_state;
03629 }
03630 break;
03631
03632 case NT_ENCLOSE:
03633 {
03634 EncloseNode* en = NENCLOSE(node);
03635
03636 switch (en->type) {
03637 case ENCLOSE_MEMORY:
03638 {
03639 if (env->curr_max_regnum < en->regnum)
03640 env->curr_max_regnum = en->regnum;
03641
03642 r = setup_comb_exp_check(en->target, state, env);
03643 }
03644 break;
03645
03646 default:
03647 r = setup_comb_exp_check(en->target, state, env);
03648 break;
03649 }
03650 }
03651 break;
03652
03653 #ifdef USE_SUBEXP_CALL
03654 case NT_CALL:
03655 if (IS_CALL_RECURSION(NCALL(node)))
03656 env->has_recursion = 1;
03657 else
03658 r = setup_comb_exp_check(NCALL(node)->target, state, env);
03659 break;
03660 #endif
03661
03662 default:
03663 break;
03664 }
03665
03666 return r;
03667 }
03668 #endif
03669
03670 #define IN_ALT (1<<0)
03671 #define IN_NOT (1<<1)
03672 #define IN_REPEAT (1<<2)
03673 #define IN_VAR_REPEAT (1<<3)
03674
03675
03676
03677
03678
03679
03680
03681
03682
03683 static int
03684 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03685 {
03686 int type;
03687 int r = 0;
03688
03689 type = NTYPE(node);
03690 switch (type) {
03691 case NT_LIST:
03692 {
03693 Node* prev = NULL_NODE;
03694 do {
03695 r = setup_tree(NCAR(node), reg, state, env);
03696 if (IS_NOT_NULL(prev) && r == 0) {
03697 r = next_setup(prev, NCAR(node), reg);
03698 }
03699 prev = NCAR(node);
03700 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03701 }
03702 break;
03703
03704 case NT_ALT:
03705 do {
03706 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03707 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03708 break;
03709
03710 case NT_CCLASS:
03711 break;
03712
03713 case NT_STR:
03714 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03715 r = expand_case_fold_string(node, reg);
03716 }
03717 break;
03718
03719 case NT_CTYPE:
03720 case NT_CANY:
03721 break;
03722
03723 #ifdef USE_SUBEXP_CALL
03724 case NT_CALL:
03725 break;
03726 #endif
03727
03728 case NT_BREF:
03729 {
03730 int i;
03731 int* p;
03732 Node** nodes = SCANENV_MEM_NODES(env);
03733 BRefNode* br = NBREF(node);
03734 p = BACKREFS_P(br);
03735 for (i = 0; i < br->back_num; i++) {
03736 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
03737 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03738 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03739 #ifdef USE_BACKREF_WITH_LEVEL
03740 if (IS_BACKREF_NEST_LEVEL(br)) {
03741 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03742 }
03743 #endif
03744 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03745 }
03746 }
03747 break;
03748
03749 case NT_QTFR:
03750 {
03751 OnigDistance d;
03752 QtfrNode* qn = NQTFR(node);
03753 Node* target = qn->target;
03754
03755 if ((state & IN_REPEAT) != 0) {
03756 qn->state |= NST_IN_REPEAT;
03757 }
03758
03759 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03760 r = get_min_match_length(target, &d, env);
03761 if (r) break;
03762 if (d == 0) {
03763 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03764 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03765 r = quantifiers_memory_node_info(target);
03766 if (r < 0) break;
03767 if (r > 0) {
03768 qn->target_empty_info = r;
03769 }
03770 #endif
03771 #if 0
03772 r = get_max_match_length(target, &d, env);
03773 if (r == 0 && d == 0) {
03774
03775 qn->upper = 1;
03776 if (qn->lower > 1) qn->lower = 1;
03777 if (NTYPE(target) == NT_STR) {
03778 qn->upper = qn->lower = 0;
03779 }
03780 }
03781 #endif
03782 }
03783 }
03784
03785 state |= IN_REPEAT;
03786 if (qn->lower != qn->upper)
03787 state |= IN_VAR_REPEAT;
03788 r = setup_tree(target, reg, state, env);
03789 if (r) break;
03790
03791
03792 #define EXPAND_STRING_MAX_LENGTH 100
03793 if (NTYPE(target) == NT_STR) {
03794 if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03795 qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03796 int len = NSTRING_LEN(target);
03797 StrNode* sn = NSTR(target);
03798
03799 if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03800 int i, n = qn->lower;
03801 onig_node_conv_to_str_node(node, NSTR(target)->flag);
03802 for (i = 0; i < n; i++) {
03803 r = onig_node_str_cat(node, sn->s, sn->end);
03804 if (r) break;
03805 }
03806 onig_node_free(target);
03807 break;
03808 }
03809 }
03810 }
03811
03812 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03813 if (qn->greedy && (qn->target_empty_info != 0)) {
03814 if (NTYPE(target) == NT_QTFR) {
03815 QtfrNode* tqn = NQTFR(target);
03816 if (IS_NOT_NULL(tqn->head_exact)) {
03817 qn->head_exact = tqn->head_exact;
03818 tqn->head_exact = NULL;
03819 }
03820 }
03821 else {
03822 qn->head_exact = get_head_value_node(qn->target, 1, reg);
03823 }
03824 }
03825 #endif
03826 }
03827 break;
03828
03829 case NT_ENCLOSE:
03830 {
03831 EncloseNode* en = NENCLOSE(node);
03832
03833 switch (en->type) {
03834 case ENCLOSE_OPTION:
03835 {
03836 OnigOptionType options = reg->options;
03837 reg->options = NENCLOSE(node)->option;
03838 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
03839 reg->options = options;
03840 }
03841 break;
03842
03843 case ENCLOSE_MEMORY:
03844 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03845 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03846
03847 }
03848 r = setup_tree(en->target, reg, state, env);
03849 break;
03850
03851 case ENCLOSE_STOP_BACKTRACK:
03852 {
03853 Node* target = en->target;
03854 r = setup_tree(target, reg, state, env);
03855 if (NTYPE(target) == NT_QTFR) {
03856 QtfrNode* tqn = NQTFR(target);
03857 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03858 tqn->greedy != 0) {
03859 int qtype = NTYPE(tqn->target);
03860 if (IS_NODE_TYPE_SIMPLE(qtype))
03861 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03862 }
03863 }
03864 }
03865 break;
03866 }
03867 }
03868 break;
03869
03870 case NT_ANCHOR:
03871 {
03872 AnchorNode* an = NANCHOR(node);
03873
03874 switch (an->type) {
03875 case ANCHOR_PREC_READ:
03876 r = setup_tree(an->target, reg, state, env);
03877 break;
03878 case ANCHOR_PREC_READ_NOT:
03879 r = setup_tree(an->target, reg, (state | IN_NOT), env);
03880 break;
03881
03882
03883 #define ALLOWED_TYPE_IN_LB \
03884 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
03885 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
03886
03887 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
03888 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
03889
03890 #define ALLOWED_ANCHOR_IN_LB \
03891 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03892 #define ALLOWED_ANCHOR_IN_LB_NOT \
03893 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03894
03895 case ANCHOR_LOOK_BEHIND:
03896 {
03897 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03898 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
03899 if (r < 0) return r;
03900 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03901 r = setup_look_behind(node, reg, env);
03902 if (r != 0) return r;
03903 r = setup_tree(an->target, reg, state, env);
03904 }
03905 break;
03906
03907 case ANCHOR_LOOK_BEHIND_NOT:
03908 {
03909 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03910 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03911 if (r < 0) return r;
03912 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03913 r = setup_look_behind(node, reg, env);
03914 if (r != 0) return r;
03915 r = setup_tree(an->target, reg, (state | IN_NOT), env);
03916 }
03917 break;
03918 }
03919 }
03920 break;
03921
03922 default:
03923 break;
03924 }
03925
03926 return r;
03927 }
03928
03929
03930 static int
03931 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03932 UChar skip[], int** int_skip)
03933 {
03934 int i, len;
03935
03936 len = end - s;
03937 if (len < ONIG_CHAR_TABLE_SIZE) {
03938 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03939
03940 for (i = 0; i < len - 1; i++)
03941 skip[s[i]] = len - 1 - i;
03942 }
03943 else {
03944 if (IS_NULL(*int_skip)) {
03945 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03946 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03947 }
03948 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
03949
03950 for (i = 0; i < len - 1; i++)
03951 (*int_skip)[s[i]] = len - 1 - i;
03952 }
03953 return 0;
03954 }
03955
03956 #define OPT_EXACT_MAXLEN 24
03957
03958 typedef struct {
03959 OnigDistance min;
03960 OnigDistance max;
03961 } MinMaxLen;
03962
03963 typedef struct {
03964 MinMaxLen mmd;
03965 OnigEncoding enc;
03966 OnigOptionType options;
03967 OnigCaseFoldType case_fold_flag;
03968 ScanEnv* scan_env;
03969 } OptEnv;
03970
03971 typedef struct {
03972 int left_anchor;
03973 int right_anchor;
03974 } OptAncInfo;
03975
03976 typedef struct {
03977 MinMaxLen mmd;
03978 OptAncInfo anc;
03979
03980 int reach_end;
03981 int ignore_case;
03982 int len;
03983 UChar s[OPT_EXACT_MAXLEN];
03984 } OptExactInfo;
03985
03986 typedef struct {
03987 MinMaxLen mmd;
03988 OptAncInfo anc;
03989
03990 int value;
03991 UChar map[ONIG_CHAR_TABLE_SIZE];
03992 } OptMapInfo;
03993
03994 typedef struct {
03995 MinMaxLen len;
03996
03997 OptAncInfo anc;
03998 OptExactInfo exb;
03999 OptExactInfo exm;
04000 OptExactInfo expr;
04001
04002 OptMapInfo map;
04003 } NodeOptInfo;
04004
04005
04006 static int
04007 map_position_value(OnigEncoding enc, int i)
04008 {
04009 static const short int ByteValTable[] = {
04010 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
04011 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
04012 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
04013 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
04014 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04015 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
04016 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
04017 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
04018 };
04019
04020 if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04021 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04022 return 20;
04023 else
04024 return (int )ByteValTable[i];
04025 }
04026 else
04027 return 4;
04028 }
04029
04030 static int
04031 distance_value(MinMaxLen* mm)
04032 {
04033
04034 static const short int dist_vals[] = {
04035 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
04036 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
04037 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
04038 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
04039 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
04040 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
04041 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
04042 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
04043 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
04044 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
04045 };
04046
04047 int d;
04048
04049 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04050
04051 d = mm->max - mm->min;
04052 if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
04053
04054 return (int )dist_vals[d];
04055 else
04056 return 1;
04057 }
04058
04059 static int
04060 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04061 {
04062 if (v2 <= 0) return -1;
04063 if (v1 <= 0) return 1;
04064
04065 v1 *= distance_value(d1);
04066 v2 *= distance_value(d2);
04067
04068 if (v2 > v1) return 1;
04069 if (v2 < v1) return -1;
04070
04071 if (d2->min < d1->min) return 1;
04072 if (d2->min > d1->min) return -1;
04073 return 0;
04074 }
04075
04076 static int
04077 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04078 {
04079 return (a->min == b->min && a->max == b->max) ? 1 : 0;
04080 }
04081
04082
04083 static void
04084 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04085 {
04086 mml->min = min;
04087 mml->max = max;
04088 }
04089
04090 static void
04091 clear_mml(MinMaxLen* mml)
04092 {
04093 mml->min = mml->max = 0;
04094 }
04095
04096 static void
04097 copy_mml(MinMaxLen* to, MinMaxLen* from)
04098 {
04099 to->min = from->min;
04100 to->max = from->max;
04101 }
04102
04103 static void
04104 add_mml(MinMaxLen* to, MinMaxLen* from)
04105 {
04106 to->min = distance_add(to->min, from->min);
04107 to->max = distance_add(to->max, from->max);
04108 }
04109
04110 #if 0
04111 static void
04112 add_len_mml(MinMaxLen* to, OnigDistance len)
04113 {
04114 to->min = distance_add(to->min, len);
04115 to->max = distance_add(to->max, len);
04116 }
04117 #endif
04118
04119 static void
04120 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04121 {
04122 if (to->min > from->min) to->min = from->min;
04123 if (to->max < from->max) to->max = from->max;
04124 }
04125
04126 static void
04127 copy_opt_env(OptEnv* to, OptEnv* from)
04128 {
04129 *to = *from;
04130 }
04131
04132 static void
04133 clear_opt_anc_info(OptAncInfo* anc)
04134 {
04135 anc->left_anchor = 0;
04136 anc->right_anchor = 0;
04137 }
04138
04139 static void
04140 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04141 {
04142 *to = *from;
04143 }
04144
04145 static void
04146 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04147 OnigDistance left_len, OnigDistance right_len)
04148 {
04149 clear_opt_anc_info(to);
04150
04151 to->left_anchor = left->left_anchor;
04152 if (left_len == 0) {
04153 to->left_anchor |= right->left_anchor;
04154 }
04155
04156 to->right_anchor = right->right_anchor;
04157 if (right_len == 0) {
04158 to->right_anchor |= left->right_anchor;
04159 }
04160 }
04161
04162 static int
04163 is_left_anchor(int anc)
04164 {
04165 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04166 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04167 anc == ANCHOR_PREC_READ_NOT)
04168 return 0;
04169
04170 return 1;
04171 }
04172
04173 static int
04174 is_set_opt_anc_info(OptAncInfo* to, int anc)
04175 {
04176 if ((to->left_anchor & anc) != 0) return 1;
04177
04178 return ((to->right_anchor & anc) != 0 ? 1 : 0);
04179 }
04180
04181 static void
04182 add_opt_anc_info(OptAncInfo* to, int anc)
04183 {
04184 if (is_left_anchor(anc))
04185 to->left_anchor |= anc;
04186 else
04187 to->right_anchor |= anc;
04188 }
04189
04190 static void
04191 remove_opt_anc_info(OptAncInfo* to, int anc)
04192 {
04193 if (is_left_anchor(anc))
04194 to->left_anchor &= ~anc;
04195 else
04196 to->right_anchor &= ~anc;
04197 }
04198
04199 static void
04200 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04201 {
04202 to->left_anchor &= add->left_anchor;
04203 to->right_anchor &= add->right_anchor;
04204 }
04205
04206 static int
04207 is_full_opt_exact_info(OptExactInfo* ex)
04208 {
04209 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04210 }
04211
04212 static void
04213 clear_opt_exact_info(OptExactInfo* ex)
04214 {
04215 clear_mml(&ex->mmd);
04216 clear_opt_anc_info(&ex->anc);
04217 ex->reach_end = 0;
04218 ex->ignore_case = 0;
04219 ex->len = 0;
04220 ex->s[0] = '\0';
04221 }
04222
04223 static void
04224 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04225 {
04226 *to = *from;
04227 }
04228
04229 static void
04230 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04231 {
04232 int i, j, len;
04233 UChar *p, *end;
04234 OptAncInfo tanc;
04235
04236 if (! to->ignore_case && add->ignore_case) {
04237 if (to->len >= add->len) return ;
04238
04239 to->ignore_case = 1;
04240 }
04241
04242 p = add->s;
04243 end = p + add->len;
04244 for (i = to->len; p < end; ) {
04245 len = enclen(enc, p, end);
04246 if (i + len > OPT_EXACT_MAXLEN) break;
04247 for (j = 0; j < len && p < end; j++)
04248 to->s[i++] = *p++;
04249 }
04250
04251 to->len = i;
04252 to->reach_end = (p == end ? add->reach_end : 0);
04253
04254 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04255 if (! to->reach_end) tanc.right_anchor = 0;
04256 copy_opt_anc_info(&to->anc, &tanc);
04257 }
04258
04259 static void
04260 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04261 int raw ARG_UNUSED, OnigEncoding enc)
04262 {
04263 int i, j, len;
04264 UChar *p;
04265
04266 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04267 len = enclen(enc, p, end);
04268 if (i + len > OPT_EXACT_MAXLEN) break;
04269 for (j = 0; j < len && p < end; j++)
04270 to->s[i++] = *p++;
04271 }
04272
04273 to->len = i;
04274 }
04275
04276 static void
04277 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04278 {
04279 int i, j, len;
04280
04281 if (add->len == 0 || to->len == 0) {
04282 clear_opt_exact_info(to);
04283 return ;
04284 }
04285
04286 if (! is_equal_mml(&to->mmd, &add->mmd)) {
04287 clear_opt_exact_info(to);
04288 return ;
04289 }
04290
04291 for (i = 0; i < to->len && i < add->len; ) {
04292 if (to->s[i] != add->s[i]) break;
04293 len = enclen(env->enc, to->s + i, to->s + to->len);
04294
04295 for (j = 1; j < len; j++) {
04296 if (to->s[i+j] != add->s[i+j]) break;
04297 }
04298 if (j < len) break;
04299 i += len;
04300 }
04301
04302 if (! add->reach_end || i < add->len || i < to->len) {
04303 to->reach_end = 0;
04304 }
04305 to->len = i;
04306 to->ignore_case |= add->ignore_case;
04307
04308 alt_merge_opt_anc_info(&to->anc, &add->anc);
04309 if (! to->reach_end) to->anc.right_anchor = 0;
04310 }
04311
04312 static void
04313 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04314 {
04315 int v1, v2;
04316
04317 v1 = now->len;
04318 v2 = alt->len;
04319
04320 if (v2 == 0) {
04321 return ;
04322 }
04323 else if (v1 == 0) {
04324 copy_opt_exact_info(now, alt);
04325 return ;
04326 }
04327 else if (v1 <= 2 && v2 <= 2) {
04328
04329 v2 = map_position_value(enc, now->s[0]);
04330 v1 = map_position_value(enc, alt->s[0]);
04331
04332 if (now->len > 1) v1 += 5;
04333 if (alt->len > 1) v2 += 5;
04334 }
04335
04336 if (now->ignore_case == 0) v1 *= 2;
04337 if (alt->ignore_case == 0) v2 *= 2;
04338
04339 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04340 copy_opt_exact_info(now, alt);
04341 }
04342
04343 static void
04344 clear_opt_map_info(OptMapInfo* map)
04345 {
04346 static const OptMapInfo clean_info = {
04347 {0, 0}, {0, 0}, 0,
04348 {
04349 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04350 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04351 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04352 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04353 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04354 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04355 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04356 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04357 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04358 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04359 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04360 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04361 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04362 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04363 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04364 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04365 }
04366 };
04367
04368 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04369 }
04370
04371 static void
04372 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04373 {
04374 *to = *from;
04375 }
04376
04377 static void
04378 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04379 {
04380 if (map->map[c] == 0) {
04381 map->map[c] = 1;
04382 map->value += map_position_value(enc, c);
04383 }
04384 }
04385
04386 static int
04387 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04388 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04389 {
04390 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04391 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04392 int i, n;
04393
04394 add_char_opt_map_info(map, p[0], enc);
04395
04396 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04397 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04398 if (n < 0) return n;
04399
04400 for (i = 0; i < n; i++) {
04401 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04402 add_char_opt_map_info(map, buf[0], enc);
04403 }
04404
04405 return 0;
04406 }
04407
04408 static void
04409 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04410 {
04411 const int z = 1<<15;
04412
04413 int v1, v2;
04414
04415 if (alt->value == 0) return ;
04416 if (now->value == 0) {
04417 copy_opt_map_info(now, alt);
04418 return ;
04419 }
04420
04421 v1 = z / now->value;
04422 v2 = z / alt->value;
04423 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04424 copy_opt_map_info(now, alt);
04425 }
04426
04427 static int
04428 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04429 {
04430 #define COMP_EM_BASE 20
04431 int ve, vm;
04432
04433 if (m->value <= 0) return -1;
04434
04435 ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04436 vm = COMP_EM_BASE * 5 * 2 / m->value;
04437 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04438 }
04439
04440 static void
04441 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04442 {
04443 int i, val;
04444
04445
04446 if (to->value == 0) return ;
04447 if (add->value == 0 || to->mmd.max < add->mmd.min) {
04448 clear_opt_map_info(to);
04449 return ;
04450 }
04451
04452 alt_merge_mml(&to->mmd, &add->mmd);
04453
04454 val = 0;
04455 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04456 if (add->map[i])
04457 to->map[i] = 1;
04458
04459 if (to->map[i])
04460 val += map_position_value(enc, i);
04461 }
04462 to->value = val;
04463
04464 alt_merge_opt_anc_info(&to->anc, &add->anc);
04465 }
04466
04467 static void
04468 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04469 {
04470 copy_mml(&(opt->exb.mmd), mmd);
04471 copy_mml(&(opt->expr.mmd), mmd);
04472 copy_mml(&(opt->map.mmd), mmd);
04473 }
04474
04475 static void
04476 clear_node_opt_info(NodeOptInfo* opt)
04477 {
04478 clear_mml(&opt->len);
04479 clear_opt_anc_info(&opt->anc);
04480 clear_opt_exact_info(&opt->exb);
04481 clear_opt_exact_info(&opt->exm);
04482 clear_opt_exact_info(&opt->expr);
04483 clear_opt_map_info(&opt->map);
04484 }
04485
04486 static void
04487 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04488 {
04489 *to = *from;
04490 }
04491
04492 static void
04493 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04494 {
04495 int exb_reach, exm_reach;
04496 OptAncInfo tanc;
04497
04498 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04499 copy_opt_anc_info(&to->anc, &tanc);
04500
04501 if (add->exb.len > 0 && to->len.max == 0) {
04502 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04503 to->len.max, add->len.max);
04504 copy_opt_anc_info(&add->exb.anc, &tanc);
04505 }
04506
04507 if (add->map.value > 0 && to->len.max == 0) {
04508 if (add->map.mmd.max == 0)
04509 add->map.anc.left_anchor |= to->anc.left_anchor;
04510 }
04511
04512 exb_reach = to->exb.reach_end;
04513 exm_reach = to->exm.reach_end;
04514
04515 if (add->len.max != 0)
04516 to->exb.reach_end = to->exm.reach_end = 0;
04517
04518 if (add->exb.len > 0) {
04519 if (exb_reach) {
04520 concat_opt_exact_info(&to->exb, &add->exb, enc);
04521 clear_opt_exact_info(&add->exb);
04522 }
04523 else if (exm_reach) {
04524 concat_opt_exact_info(&to->exm, &add->exb, enc);
04525 clear_opt_exact_info(&add->exb);
04526 }
04527 }
04528 select_opt_exact_info(enc, &to->exm, &add->exb);
04529 select_opt_exact_info(enc, &to->exm, &add->exm);
04530
04531 if (to->expr.len > 0) {
04532 if (add->len.max > 0) {
04533 if (to->expr.len > (int )add->len.max)
04534 to->expr.len = add->len.max;
04535
04536 if (to->expr.mmd.max == 0)
04537 select_opt_exact_info(enc, &to->exb, &to->expr);
04538 else
04539 select_opt_exact_info(enc, &to->exm, &to->expr);
04540 }
04541 }
04542 else if (add->expr.len > 0) {
04543 copy_opt_exact_info(&to->expr, &add->expr);
04544 }
04545
04546 select_opt_map_info(&to->map, &add->map);
04547
04548 add_mml(&to->len, &add->len);
04549 }
04550
04551 static void
04552 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04553 {
04554 alt_merge_opt_anc_info (&to->anc, &add->anc);
04555 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
04556 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
04557 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04558 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
04559
04560 alt_merge_mml(&to->len, &add->len);
04561 }
04562
04563
04564 #define MAX_NODE_OPT_INFO_REF_COUNT 5
04565
04566 static int
04567 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04568 {
04569 int type;
04570 int r = 0;
04571
04572 clear_node_opt_info(opt);
04573 set_bound_node_opt_info(opt, &env->mmd);
04574
04575 type = NTYPE(node);
04576 switch (type) {
04577 case NT_LIST:
04578 {
04579 OptEnv nenv;
04580 NodeOptInfo nopt;
04581 Node* nd = node;
04582
04583 copy_opt_env(&nenv, env);
04584 do {
04585 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04586 if (r == 0) {
04587 add_mml(&nenv.mmd, &nopt.len);
04588 concat_left_node_opt_info(env->enc, opt, &nopt);
04589 }
04590 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04591 }
04592 break;
04593
04594 case NT_ALT:
04595 {
04596 NodeOptInfo nopt;
04597 Node* nd = node;
04598
04599 do {
04600 r = optimize_node_left(NCAR(nd), &nopt, env);
04601 if (r == 0) {
04602 if (nd == node) copy_node_opt_info(opt, &nopt);
04603 else alt_merge_node_opt_info(opt, &nopt, env);
04604 }
04605 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04606 }
04607 break;
04608
04609 case NT_STR:
04610 {
04611 StrNode* sn = NSTR(node);
04612 int slen = sn->end - sn->s;
04613 int is_raw = NSTRING_IS_RAW(node);
04614
04615 if (! NSTRING_IS_AMBIG(node)) {
04616 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04617 NSTRING_IS_RAW(node), env->enc);
04618 if (slen > 0) {
04619 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04620 }
04621 set_mml(&opt->len, slen, slen);
04622 }
04623 else {
04624 int max;
04625
04626 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04627 int n = onigenc_strlen(env->enc, sn->s, sn->end);
04628 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04629 }
04630 else {
04631 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04632 is_raw, env->enc);
04633 opt->exb.ignore_case = 1;
04634
04635 if (slen > 0) {
04636 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04637 env->enc, env->case_fold_flag);
04638 if (r != 0) break;
04639 }
04640
04641 max = slen;
04642 }
04643
04644 set_mml(&opt->len, slen, max);
04645 }
04646
04647 if (opt->exb.len == slen)
04648 opt->exb.reach_end = 1;
04649 }
04650 break;
04651
04652 case NT_CCLASS:
04653 {
04654 int i, z;
04655 CClassNode* cc = NCCLASS(node);
04656
04657
04658
04659 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04660 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04661 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04662
04663 set_mml(&opt->len, min, max);
04664 }
04665 else {
04666 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04667 z = BITSET_AT(cc->bs, i);
04668 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
04669 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04670 }
04671 }
04672 set_mml(&opt->len, 1, 1);
04673 }
04674 }
04675 break;
04676
04677 case NT_CTYPE:
04678 {
04679 int i, min, max;
04680
04681 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04682
04683 if (max == 1) {
04684 min = 1;
04685
04686 switch (NCTYPE(node)->ctype) {
04687 case ONIGENC_CTYPE_WORD:
04688 if (NCTYPE(node)->not != 0) {
04689 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04690 if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04691 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04692 }
04693 }
04694 }
04695 else {
04696 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04697 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04698 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04699 }
04700 }
04701 }
04702 break;
04703 }
04704 }
04705 else {
04706 min = ONIGENC_MBC_MINLEN(env->enc);
04707 }
04708 set_mml(&opt->len, min, max);
04709 }
04710 break;
04711
04712 case NT_CANY:
04713 {
04714 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04715 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04716 set_mml(&opt->len, min, max);
04717 }
04718 break;
04719
04720 case NT_ANCHOR:
04721 switch (NANCHOR(node)->type) {
04722 case ANCHOR_BEGIN_BUF:
04723 case ANCHOR_BEGIN_POSITION:
04724 case ANCHOR_BEGIN_LINE:
04725 case ANCHOR_END_BUF:
04726 case ANCHOR_SEMI_END_BUF:
04727 case ANCHOR_END_LINE:
04728 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
04729 break;
04730
04731 case ANCHOR_PREC_READ:
04732 {
04733 NodeOptInfo nopt;
04734
04735 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
04736 if (r == 0) {
04737 if (nopt.exb.len > 0)
04738 copy_opt_exact_info(&opt->expr, &nopt.exb);
04739 else if (nopt.exm.len > 0)
04740 copy_opt_exact_info(&opt->expr, &nopt.exm);
04741
04742 opt->expr.reach_end = 0;
04743
04744 if (nopt.map.value > 0)
04745 copy_opt_map_info(&opt->map, &nopt.map);
04746 }
04747 }
04748 break;
04749
04750 case ANCHOR_PREC_READ_NOT:
04751 case ANCHOR_LOOK_BEHIND:
04752 case ANCHOR_LOOK_BEHIND_NOT:
04753 break;
04754 }
04755 break;
04756
04757 case NT_BREF:
04758 {
04759 int i;
04760 int* backs;
04761 OnigDistance min, max, tmin, tmax;
04762 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04763 BRefNode* br = NBREF(node);
04764
04765 if (br->state & NST_RECURSION) {
04766 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04767 break;
04768 }
04769 backs = BACKREFS_P(br);
04770 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04771 if (r != 0) break;
04772 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04773 if (r != 0) break;
04774 for (i = 1; i < br->back_num; i++) {
04775 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04776 if (r != 0) break;
04777 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04778 if (r != 0) break;
04779 if (min > tmin) min = tmin;
04780 if (max < tmax) max = tmax;
04781 }
04782 if (r == 0) set_mml(&opt->len, min, max);
04783 }
04784 break;
04785
04786 #ifdef USE_SUBEXP_CALL
04787 case NT_CALL:
04788 if (IS_CALL_RECURSION(NCALL(node)))
04789 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04790 else {
04791 OnigOptionType save = env->options;
04792 env->options = NENCLOSE(NCALL(node)->target)->option;
04793 r = optimize_node_left(NCALL(node)->target, opt, env);
04794 env->options = save;
04795 }
04796 break;
04797 #endif
04798
04799 case NT_QTFR:
04800 {
04801 int i;
04802 OnigDistance min, max;
04803 NodeOptInfo nopt;
04804 QtfrNode* qn = NQTFR(node);
04805
04806 r = optimize_node_left(qn->target, &nopt, env);
04807 if (r) break;
04808
04809 if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04810 if (env->mmd.max == 0 &&
04811 NTYPE(qn->target) == NT_CANY && qn->greedy) {
04812 if (IS_MULTILINE(env->options))
04813 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04814 else
04815 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04816 }
04817 }
04818 else {
04819 if (qn->lower > 0) {
04820 copy_node_opt_info(opt, &nopt);
04821 if (nopt.exb.len > 0) {
04822 if (nopt.exb.reach_end) {
04823 for (i = 2; i <= qn->lower &&
04824 ! is_full_opt_exact_info(&opt->exb); i++) {
04825 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04826 }
04827 if (i < qn->lower) {
04828 opt->exb.reach_end = 0;
04829 }
04830 }
04831 }
04832
04833 if (qn->lower != qn->upper) {
04834 opt->exb.reach_end = 0;
04835 opt->exm.reach_end = 0;
04836 }
04837 if (qn->lower > 1)
04838 opt->exm.reach_end = 0;
04839 }
04840 }
04841
04842 min = distance_multiply(nopt.len.min, qn->lower);
04843 if (IS_REPEAT_INFINITE(qn->upper))
04844 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04845 else
04846 max = distance_multiply(nopt.len.max, qn->upper);
04847
04848 set_mml(&opt->len, min, max);
04849 }
04850 break;
04851
04852 case NT_ENCLOSE:
04853 {
04854 EncloseNode* en = NENCLOSE(node);
04855
04856 switch (en->type) {
04857 case ENCLOSE_OPTION:
04858 {
04859 OnigOptionType save = env->options;
04860
04861 env->options = en->option;
04862 r = optimize_node_left(en->target, opt, env);
04863 env->options = save;
04864 }
04865 break;
04866
04867 case ENCLOSE_MEMORY:
04868 #ifdef USE_SUBEXP_CALL
04869 en->opt_count++;
04870 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04871 OnigDistance min, max;
04872
04873 min = 0;
04874 max = ONIG_INFINITE_DISTANCE;
04875 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
04876 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
04877 set_mml(&opt->len, min, max);
04878 }
04879 else
04880 #endif
04881 {
04882 r = optimize_node_left(en->target, opt, env);
04883
04884 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04885 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04886 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04887 }
04888 }
04889 break;
04890
04891 case ENCLOSE_STOP_BACKTRACK:
04892 r = optimize_node_left(en->target, opt, env);
04893 break;
04894 }
04895 }
04896 break;
04897
04898 default:
04899 #ifdef ONIG_DEBUG
04900 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04901 NTYPE(node));
04902 #endif
04903 r = ONIGERR_TYPE_BUG;
04904 break;
04905 }
04906
04907 return r;
04908 }
04909
04910 static int
04911 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04912 {
04913 int r;
04914
04915 if (e->len == 0) return 0;
04916
04917 if (e->ignore_case) {
04918 reg->exact = (UChar* )xmalloc(e->len);
04919 CHECK_NULL_RETURN_MEMERR(reg->exact);
04920 xmemcpy(reg->exact, e->s, e->len);
04921 reg->exact_end = reg->exact + e->len;
04922 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04923 }
04924 else {
04925 int allow_reverse;
04926
04927 reg->exact = str_dup(e->s, e->s + e->len);
04928 CHECK_NULL_RETURN_MEMERR(reg->exact);
04929 reg->exact_end = reg->exact + e->len;
04930
04931 allow_reverse =
04932 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04933
04934 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04935 r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04936 reg->map, &(reg->int_map));
04937 if (r) return r;
04938
04939 reg->optimize = (allow_reverse != 0
04940 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04941 }
04942 else {
04943 reg->optimize = ONIG_OPTIMIZE_EXACT;
04944 }
04945 }
04946
04947 reg->dmin = e->mmd.min;
04948 reg->dmax = e->mmd.max;
04949
04950 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04951 reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
04952 }
04953
04954 return 0;
04955 }
04956
04957 static void
04958 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04959 {
04960 int i;
04961
04962 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04963 reg->map[i] = m->map[i];
04964
04965 reg->optimize = ONIG_OPTIMIZE_MAP;
04966 reg->dmin = m->mmd.min;
04967 reg->dmax = m->mmd.max;
04968
04969 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04970 reg->threshold_len = reg->dmin + 1;
04971 }
04972 }
04973
04974 static void
04975 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04976 {
04977 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
04978 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04979 }
04980
04981 #ifdef ONIG_DEBUG
04982 static void print_optimize_info(FILE* f, regex_t* reg);
04983 #endif
04984
04985 static int
04986 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04987 {
04988
04989 int r;
04990 NodeOptInfo opt;
04991 OptEnv env;
04992
04993 env.enc = reg->enc;
04994 env.options = reg->options;
04995 env.case_fold_flag = reg->case_fold_flag;
04996 env.scan_env = scan_env;
04997 clear_mml(&env.mmd);
04998
04999 r = optimize_node_left(node, &opt, &env);
05000 if (r) return r;
05001
05002 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05003 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
05004
05005 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05006
05007 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05008 reg->anchor_dmin = opt.len.min;
05009 reg->anchor_dmax = opt.len.max;
05010 }
05011
05012 if (opt.exb.len > 0 || opt.exm.len > 0) {
05013 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05014 if (opt.map.value > 0 &&
05015 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05016 goto set_map;
05017 }
05018 else {
05019 r = set_optimize_exact_info(reg, &opt.exb);
05020 set_sub_anchor(reg, &opt.exb.anc);
05021 }
05022 }
05023 else if (opt.map.value > 0) {
05024 set_map:
05025 set_optimize_map_info(reg, &opt.map);
05026 set_sub_anchor(reg, &opt.map.anc);
05027 }
05028 else {
05029 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05030 if (opt.len.max == 0)
05031 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05032 }
05033
05034 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05035 print_optimize_info(stderr, reg);
05036 #endif
05037 return r;
05038 }
05039
05040 static void
05041 clear_optimize_info(regex_t* reg)
05042 {
05043 reg->optimize = ONIG_OPTIMIZE_NONE;
05044 reg->anchor = 0;
05045 reg->anchor_dmin = 0;
05046 reg->anchor_dmax = 0;
05047 reg->sub_anchor = 0;
05048 reg->exact_end = (UChar* )NULL;
05049 reg->threshold_len = 0;
05050 if (IS_NOT_NULL(reg->exact)) {
05051 xfree(reg->exact);
05052 reg->exact = (UChar* )NULL;
05053 }
05054 }
05055
05056 #ifdef ONIG_DEBUG
05057
05058 static void print_enc_string(FILE* fp, OnigEncoding enc,
05059 const UChar *s, const UChar *end)
05060 {
05061 fprintf(fp, "\nPATTERN: /");
05062
05063 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05064 const UChar *p;
05065 OnigCodePoint code;
05066
05067 p = s;
05068 while (p < end) {
05069 code = ONIGENC_MBC_TO_CODE(enc, p, end);
05070 if (code >= 0x80) {
05071 fprintf(fp, " 0x%04x ", (int )code);
05072 }
05073 else {
05074 fputc((int )code, fp);
05075 }
05076
05077 p += enclen(enc, p, end);
05078 }
05079 }
05080 else {
05081 while (s < end) {
05082 fputc((int )*s, fp);
05083 s++;
05084 }
05085 }
05086
05087 fprintf(fp, "/\n");
05088 }
05089
05090 static void
05091 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05092 {
05093 if (a == ONIG_INFINITE_DISTANCE)
05094 fputs("inf", f);
05095 else
05096 fprintf(f, "(%u)", a);
05097
05098 fputs("-", f);
05099
05100 if (b == ONIG_INFINITE_DISTANCE)
05101 fputs("inf", f);
05102 else
05103 fprintf(f, "(%u)", b);
05104 }
05105
05106 static void
05107 print_anchor(FILE* f, int anchor)
05108 {
05109 int q = 0;
05110
05111 fprintf(f, "[");
05112
05113 if (anchor & ANCHOR_BEGIN_BUF) {
05114 fprintf(f, "begin-buf");
05115 q = 1;
05116 }
05117 if (anchor & ANCHOR_BEGIN_LINE) {
05118 if (q) fprintf(f, ", ");
05119 q = 1;
05120 fprintf(f, "begin-line");
05121 }
05122 if (anchor & ANCHOR_BEGIN_POSITION) {
05123 if (q) fprintf(f, ", ");
05124 q = 1;
05125 fprintf(f, "begin-pos");
05126 }
05127 if (anchor & ANCHOR_END_BUF) {
05128 if (q) fprintf(f, ", ");
05129 q = 1;
05130 fprintf(f, "end-buf");
05131 }
05132 if (anchor & ANCHOR_SEMI_END_BUF) {
05133 if (q) fprintf(f, ", ");
05134 q = 1;
05135 fprintf(f, "semi-end-buf");
05136 }
05137 if (anchor & ANCHOR_END_LINE) {
05138 if (q) fprintf(f, ", ");
05139 q = 1;
05140 fprintf(f, "end-line");
05141 }
05142 if (anchor & ANCHOR_ANYCHAR_STAR) {
05143 if (q) fprintf(f, ", ");
05144 q = 1;
05145 fprintf(f, "anychar-star");
05146 }
05147 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05148 if (q) fprintf(f, ", ");
05149 fprintf(f, "anychar-star-pl");
05150 }
05151
05152 fprintf(f, "]");
05153 }
05154
05155 static void
05156 print_optimize_info(FILE* f, regex_t* reg)
05157 {
05158 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05159 "EXACT_IC", "MAP" };
05160
05161 fprintf(f, "optimize: %s\n", on[reg->optimize]);
05162 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
05163 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05164 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05165 fprintf(f, "\n");
05166
05167 if (reg->optimize) {
05168 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
05169 fprintf(f, "\n");
05170 }
05171 fprintf(f, "\n");
05172
05173 if (reg->exact) {
05174 UChar *p;
05175 fprintf(f, "exact: [");
05176 for (p = reg->exact; p < reg->exact_end; p++) {
05177 fputc(*p, f);
05178 }
05179 fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
05180 }
05181 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05182 int c, i, n = 0;
05183
05184 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05185 if (reg->map[i]) n++;
05186
05187 fprintf(f, "map: n=%d\n", n);
05188 if (n > 0) {
05189 c = 0;
05190 fputc('[', f);
05191 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05192 if (reg->map[i] != 0) {
05193 if (c > 0) fputs(", ", f);
05194 c++;
05195 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05196 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05197 fputc(i, f);
05198 else
05199 fprintf(f, "%d", i);
05200 }
05201 }
05202 fprintf(f, "]\n");
05203 }
05204 }
05205 }
05206 #endif
05207
05208
05209 extern void
05210 onig_free_body(regex_t* reg)
05211 {
05212 if (IS_NOT_NULL(reg)) {
05213 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
05214 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
05215 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
05216 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05217 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
05218 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
05219
05220 #ifdef USE_NAMED_GROUP
05221 onig_names_free(reg);
05222 #endif
05223 }
05224 }
05225
05226 extern void
05227 onig_free(regex_t* reg)
05228 {
05229 if (IS_NOT_NULL(reg)) {
05230 onig_free_body(reg);
05231 xfree(reg);
05232 }
05233 }
05234
05235 size_t
05236 onig_memsize(regex_t *reg)
05237 {
05238 size_t size = sizeof(regex_t);
05239 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
05240 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
05241 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05242 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05243 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05244 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
05245
05246 return size;
05247 }
05248
05249 #define REGEX_TRANSFER(to,from) do {\
05250 (to)->state = ONIG_STATE_MODIFY;\
05251 onig_free_body(to);\
05252 xmemcpy(to, from, sizeof(regex_t));\
05253 xfree(from);\
05254 } while (0)
05255
05256 extern void
05257 onig_transfer(regex_t* to, regex_t* from)
05258 {
05259 THREAD_ATOMIC_START;
05260 REGEX_TRANSFER(to, from);
05261 THREAD_ATOMIC_END;
05262 }
05263
05264 #define REGEX_CHAIN_HEAD(reg) do {\
05265 while (IS_NOT_NULL((reg)->chain)) {\
05266 (reg) = (reg)->chain;\
05267 }\
05268 } while (0)
05269
05270 extern void
05271 onig_chain_link_add(regex_t* to, regex_t* add)
05272 {
05273 THREAD_ATOMIC_START;
05274 REGEX_CHAIN_HEAD(to);
05275 to->chain = add;
05276 THREAD_ATOMIC_END;
05277 }
05278
05279 extern void
05280 onig_chain_reduce(regex_t* reg)
05281 {
05282 regex_t *head, *prev;
05283
05284 prev = reg;
05285 head = prev->chain;
05286 if (IS_NOT_NULL(head)) {
05287 reg->state = ONIG_STATE_MODIFY;
05288 while (IS_NOT_NULL(head->chain)) {
05289 prev = head;
05290 head = head->chain;
05291 }
05292 prev->chain = (regex_t* )NULL;
05293 REGEX_TRANSFER(reg, head);
05294 }
05295 }
05296
05297 #ifdef ONIG_DEBUG
05298 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05299 #endif
05300 #ifdef ONIG_DEBUG_PARSE_TREE
05301 static void print_tree P_((FILE* f, Node* node));
05302 #endif
05303
05304 extern int
05305 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05306 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05307 {
05308 #define COMPILE_INIT_SIZE 20
05309
05310 int r, init_size;
05311 Node* root;
05312 ScanEnv scan_env = {0};
05313 #ifdef USE_SUBEXP_CALL
05314 UnsetAddrList uslist;
05315 #endif
05316
05317 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05318
05319 scan_env.sourcefile = sourcefile;
05320 scan_env.sourceline = sourceline;
05321 reg->state = ONIG_STATE_COMPILING;
05322
05323 #ifdef ONIG_DEBUG
05324 print_enc_string(stderr, reg->enc, pattern, pattern_end);
05325 #endif
05326
05327 if (reg->alloc == 0) {
05328 init_size = (pattern_end - pattern) * 2;
05329 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05330 r = BBUF_INIT(reg, init_size);
05331 if (r != 0) goto end;
05332 }
05333 else
05334 reg->used = 0;
05335
05336 reg->num_mem = 0;
05337 reg->num_repeat = 0;
05338 reg->num_null_check = 0;
05339 reg->repeat_range_alloc = 0;
05340 reg->repeat_range = (OnigRepeatRange* )NULL;
05341 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05342 reg->num_comb_exp_check = 0;
05343 #endif
05344
05345 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05346 if (r != 0) goto err;
05347
05348 #ifdef USE_NAMED_GROUP
05349
05350 if (scan_env.num_named > 0 &&
05351 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05352 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05353 if (scan_env.num_named != scan_env.num_mem)
05354 r = disable_noname_group_capture(&root, reg, &scan_env);
05355 else
05356 r = numbered_ref_check(root);
05357
05358 if (r != 0) goto err;
05359 }
05360 #endif
05361
05362 #ifdef USE_SUBEXP_CALL
05363 if (scan_env.num_call > 0) {
05364 r = unset_addr_list_init(&uslist, scan_env.num_call);
05365 if (r != 0) goto err;
05366 scan_env.unset_addr_list = &uslist;
05367 r = setup_subexp_call(root, &scan_env);
05368 if (r != 0) goto err_unset;
05369 r = subexp_recursive_check_trav(root, &scan_env);
05370 if (r < 0) goto err_unset;
05371 r = subexp_inf_recursive_check_trav(root, &scan_env);
05372 if (r != 0) goto err_unset;
05373
05374 reg->num_call = scan_env.num_call;
05375 }
05376 else
05377 reg->num_call = 0;
05378 #endif
05379
05380 r = setup_tree(root, reg, 0, &scan_env);
05381 if (r != 0) goto err_unset;
05382
05383 #ifdef ONIG_DEBUG_PARSE_TREE
05384 print_tree(stderr, root);
05385 #endif
05386
05387 reg->capture_history = scan_env.capture_history;
05388 reg->bt_mem_start = scan_env.bt_mem_start;
05389 reg->bt_mem_start |= reg->capture_history;
05390 if (IS_FIND_CONDITION(reg->options))
05391 BIT_STATUS_ON_ALL(reg->bt_mem_end);
05392 else {
05393 reg->bt_mem_end = scan_env.bt_mem_end;
05394 reg->bt_mem_end |= reg->capture_history;
05395 }
05396
05397 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05398 if (scan_env.backrefed_mem == 0
05399 #ifdef USE_SUBEXP_CALL
05400 || scan_env.num_call == 0
05401 #endif
05402 ) {
05403 setup_comb_exp_check(root, 0, &scan_env);
05404 #ifdef USE_SUBEXP_CALL
05405 if (scan_env.has_recursion != 0) {
05406 scan_env.num_comb_exp_check = 0;
05407 }
05408 else
05409 #endif
05410 if (scan_env.comb_exp_max_regnum > 0) {
05411 int i;
05412 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05413 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05414 scan_env.num_comb_exp_check = 0;
05415 break;
05416 }
05417 }
05418 }
05419 }
05420
05421 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05422 #endif
05423
05424 clear_optimize_info(reg);
05425 #ifndef ONIG_DONT_OPTIMIZE
05426 r = set_optimize_info_from_tree(root, reg, &scan_env);
05427 if (r != 0) goto err_unset;
05428 #endif
05429
05430 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05431 xfree(scan_env.mem_nodes_dynamic);
05432 scan_env.mem_nodes_dynamic = (Node** )NULL;
05433 }
05434
05435 r = compile_tree(root, reg);
05436 if (r == 0) {
05437 r = add_opcode(reg, OP_END);
05438 #ifdef USE_SUBEXP_CALL
05439 if (scan_env.num_call > 0) {
05440 r = unset_addr_list_fix(&uslist, reg);
05441 unset_addr_list_end(&uslist);
05442 if (r) goto err;
05443 }
05444 #endif
05445
05446 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05447 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05448 else {
05449 if (reg->bt_mem_start != 0)
05450 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05451 else
05452 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05453 }
05454 }
05455 #ifdef USE_SUBEXP_CALL
05456 else if (scan_env.num_call > 0) {
05457 unset_addr_list_end(&uslist);
05458 }
05459 #endif
05460 onig_node_free(root);
05461
05462 #ifdef ONIG_DEBUG_COMPILE
05463 #ifdef USE_NAMED_GROUP
05464 onig_print_names(stderr, reg);
05465 #endif
05466 print_compiled_byte_code_list(stderr, reg);
05467 #endif
05468
05469 end:
05470 reg->state = ONIG_STATE_NORMAL;
05471 return r;
05472
05473 err_unset:
05474 #ifdef USE_SUBEXP_CALL
05475 if (scan_env.num_call > 0) {
05476 unset_addr_list_end(&uslist);
05477 }
05478 #endif
05479 err:
05480 if (IS_NOT_NULL(scan_env.error)) {
05481 if (IS_NOT_NULL(einfo)) {
05482 einfo->enc = scan_env.enc;
05483 einfo->par = scan_env.error;
05484 einfo->par_end = scan_env.error_end;
05485 }
05486 }
05487
05488 onig_node_free(root);
05489 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05490 xfree(scan_env.mem_nodes_dynamic);
05491 return r;
05492 }
05493
05494 #ifdef USE_RECOMPILE_API
05495 extern int
05496 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05497 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05498 OnigErrorInfo* einfo)
05499 {
05500 int r;
05501 regex_t *new_reg;
05502
05503 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05504 if (r) return r;
05505 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05506 onig_transfer(reg, new_reg);
05507 }
05508 else {
05509 onig_chain_link_add(reg, new_reg);
05510 }
05511 return 0;
05512 }
05513 #endif
05514
05515 static int onig_inited = 0;
05516
05517 extern int
05518 onig_reg_init(regex_t* reg, OnigOptionType option,
05519 OnigCaseFoldType case_fold_flag,
05520 OnigEncoding enc, const OnigSyntaxType* syntax)
05521 {
05522 if (! onig_inited)
05523 onig_init();
05524
05525 if (IS_NULL(reg))
05526 return ONIGERR_INVALID_ARGUMENT;
05527
05528 if (ONIGENC_IS_UNDEF(enc))
05529 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05530
05531 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05532 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05533 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05534 }
05535
05536 (reg)->state = ONIG_STATE_MODIFY;
05537
05538 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05539 option |= syntax->options;
05540 option &= ~ONIG_OPTION_SINGLELINE;
05541 }
05542 else
05543 option |= syntax->options;
05544
05545 (reg)->enc = enc;
05546 (reg)->options = option;
05547 (reg)->syntax = syntax;
05548 (reg)->optimize = 0;
05549 (reg)->exact = (UChar* )NULL;
05550 (reg)->int_map = (int* )NULL;
05551 (reg)->int_map_backward = (int* )NULL;
05552 (reg)->chain = (regex_t* )NULL;
05553
05554 (reg)->p = (UChar* )NULL;
05555 (reg)->alloc = 0;
05556 (reg)->used = 0;
05557 (reg)->name_table = (void* )NULL;
05558
05559 (reg)->case_fold_flag = case_fold_flag;
05560 return 0;
05561 }
05562
05563 extern int
05564 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05565 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05566 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05567 {
05568 int r;
05569
05570 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05571 if (r) return r;
05572
05573 r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05574 return r;
05575 }
05576
05577 extern int
05578 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05579 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05580 OnigErrorInfo* einfo)
05581 {
05582 int r;
05583
05584 *reg = (regex_t* )xmalloc(sizeof(regex_t));
05585 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05586
05587 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05588 if (r) goto err;
05589
05590 r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05591 if (r) {
05592 err:
05593 onig_free(*reg);
05594 *reg = NULL;
05595 }
05596 return r;
05597 }
05598
05599
05600 extern int
05601 onig_init(void)
05602 {
05603 if (onig_inited != 0)
05604 return 0;
05605
05606 THREAD_SYSTEM_INIT;
05607 THREAD_ATOMIC_START;
05608
05609 onig_inited = 1;
05610
05611 onigenc_init();
05612
05613
05614 #ifdef ONIG_DEBUG_STATISTICS
05615 onig_statistics_init();
05616 #endif
05617
05618 THREAD_ATOMIC_END;
05619 return 0;
05620 }
05621
05622
05623 extern int
05624 onig_end(void)
05625 {
05626 THREAD_ATOMIC_START;
05627
05628 #ifdef ONIG_DEBUG_STATISTICS
05629 onig_print_statistics(stderr);
05630 #endif
05631
05632 #ifdef USE_SHARED_CCLASS_TABLE
05633 onig_free_shared_cclass_table();
05634 #endif
05635
05636 #ifdef USE_PARSE_TREE_NODE_RECYCLE
05637 onig_free_node_list();
05638 #endif
05639
05640 onig_inited = 0;
05641
05642 THREAD_ATOMIC_END;
05643 THREAD_SYSTEM_END;
05644 return 0;
05645 }
05646
05647 extern int
05648 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
05649 {
05650 OnigCodePoint n, *data;
05651 OnigCodePoint low, high, x;
05652
05653 GET_CODE_POINT(n, p);
05654 data = (OnigCodePoint* )p;
05655 data++;
05656
05657 for (low = 0, high = n; low < high; ) {
05658 x = (low + high) >> 1;
05659 if (code > data[x * 2 + 1])
05660 low = x + 1;
05661 else
05662 high = x;
05663 }
05664
05665 return ((low < n && code >= data[low * 2]) ? 1 : 0);
05666 }
05667
05668 extern int
05669 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
05670 {
05671 int found;
05672
05673 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
05674 if (IS_NULL(cc->mbuf)) {
05675 found = 0;
05676 }
05677 else {
05678 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
05679 }
05680 }
05681 else {
05682 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
05683 }
05684
05685 if (IS_NCCLASS_NOT(cc))
05686 return !found;
05687 else
05688 return found;
05689 }
05690
05691 extern int
05692 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
05693 {
05694 int len;
05695
05696 if (ONIGENC_MBC_MINLEN(enc) > 1) {
05697 len = 2;
05698 }
05699 else {
05700 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
05701 }
05702 return onig_is_code_in_cc_len(len, code, cc);
05703 }
05704
05705
05706 #ifdef ONIG_DEBUG
05707
05708
05709 #define ARG_SPECIAL -1
05710 #define ARG_NON 0
05711 #define ARG_RELADDR 1
05712 #define ARG_ABSADDR 2
05713 #define ARG_LENGTH 3
05714 #define ARG_MEMNUM 4
05715 #define ARG_OPTION 5
05716 #define ARG_STATE_CHECK 6
05717
05718 OnigOpInfoType OnigOpInfo[] = {
05719 { OP_FINISH, "finish", ARG_NON },
05720 { OP_END, "end", ARG_NON },
05721 { OP_EXACT1, "exact1", ARG_SPECIAL },
05722 { OP_EXACT2, "exact2", ARG_SPECIAL },
05723 { OP_EXACT3, "exact3", ARG_SPECIAL },
05724 { OP_EXACT4, "exact4", ARG_SPECIAL },
05725 { OP_EXACT5, "exact5", ARG_SPECIAL },
05726 { OP_EXACTN, "exactn", ARG_SPECIAL },
05727 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
05728 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
05729 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
05730 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
05731 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
05732 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
05733 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
05734 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
05735 { OP_CCLASS, "cclass", ARG_SPECIAL },
05736 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
05737 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
05738 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
05739 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
05740 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
05741 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
05742 { OP_ANYCHAR, "anychar", ARG_NON },
05743 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
05744 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
05745 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
05746 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05747 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05748 { OP_WORD, "word", ARG_NON },
05749 { OP_NOT_WORD, "not-word", ARG_NON },
05750 { OP_WORD_BOUND, "word-bound", ARG_NON },
05751 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
05752 { OP_WORD_BEGIN, "word-begin", ARG_NON },
05753 { OP_WORD_END, "word-end", ARG_NON },
05754 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
05755 { OP_END_BUF, "end-buf", ARG_NON },
05756 { OP_BEGIN_LINE, "begin-line", ARG_NON },
05757 { OP_END_LINE, "end-line", ARG_NON },
05758 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
05759 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
05760 { OP_BACKREF1, "backref1", ARG_NON },
05761 { OP_BACKREF2, "backref2", ARG_NON },
05762 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
05763 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
05764 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
05765 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
05766 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
05767 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
05768 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
05769 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
05770 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
05771 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
05772 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
05773 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
05774 { OP_SET_OPTION, "set-option", ARG_OPTION },
05775 { OP_FAIL, "fail", ARG_NON },
05776 { OP_JUMP, "jump", ARG_RELADDR },
05777 { OP_PUSH, "push", ARG_RELADDR },
05778 { OP_POP, "pop", ARG_NON },
05779 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
05780 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
05781 { OP_REPEAT, "repeat", ARG_SPECIAL },
05782 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
05783 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
05784 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
05785 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
05786 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
05787 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
05788 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
05789 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
05790 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
05791 { OP_PUSH_POS, "push-pos", ARG_NON },
05792 { OP_POP_POS, "pop-pos", ARG_NON },
05793 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
05794 { OP_FAIL_POS, "fail-pos", ARG_NON },
05795 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
05796 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
05797 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
05798 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05799 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05800 { OP_CALL, "call", ARG_ABSADDR },
05801 { OP_RETURN, "return", ARG_NON },
05802 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
05803 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05804 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
05805 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
05806 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05807 "state-check-anychar-ml*", ARG_STATE_CHECK },
05808 { -1, "", ARG_NON }
05809 };
05810
05811 static char*
05812 op2name(int opcode)
05813 {
05814 int i;
05815
05816 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05817 if (opcode == OnigOpInfo[i].opcode)
05818 return OnigOpInfo[i].name;
05819 }
05820 return "";
05821 }
05822
05823 static int
05824 op2arg_type(int opcode)
05825 {
05826 int i;
05827
05828 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05829 if (opcode == OnigOpInfo[i].opcode)
05830 return OnigOpInfo[i].arg_type;
05831 }
05832 return ARG_SPECIAL;
05833 }
05834
05835 static void
05836 Indent(FILE* f, int indent)
05837 {
05838 int i;
05839 for (i = 0; i < indent; i++) putc(' ', f);
05840 }
05841
05842 static void
05843 p_string(FILE* f, int len, UChar* s)
05844 {
05845 fputs(":", f);
05846 while (len-- > 0) { fputc(*s++, f); }
05847 }
05848
05849 static void
05850 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05851 {
05852 int x = len * mb_len;
05853
05854 fprintf(f, ":%d:", len);
05855 while (x-- > 0) { fputc(*s++, f); }
05856 }
05857
05858 extern void
05859 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
05860 OnigEncoding enc)
05861 {
05862 int i, n, arg_type;
05863 RelAddrType addr;
05864 LengthType len;
05865 MemNumType mem;
05866 StateCheckNumType scn;
05867 OnigCodePoint code;
05868 UChar *q;
05869
05870 fprintf(f, "[%s", op2name(*bp));
05871 arg_type = op2arg_type(*bp);
05872 if (arg_type != ARG_SPECIAL) {
05873 bp++;
05874 switch (arg_type) {
05875 case ARG_NON:
05876 break;
05877 case ARG_RELADDR:
05878 GET_RELADDR_INC(addr, bp);
05879 fprintf(f, ":(%d)", addr);
05880 break;
05881 case ARG_ABSADDR:
05882 GET_ABSADDR_INC(addr, bp);
05883 fprintf(f, ":(%d)", addr);
05884 break;
05885 case ARG_LENGTH:
05886 GET_LENGTH_INC(len, bp);
05887 fprintf(f, ":%d", len);
05888 break;
05889 case ARG_MEMNUM:
05890 mem = *((MemNumType* )bp);
05891 bp += SIZE_MEMNUM;
05892 fprintf(f, ":%d", mem);
05893 break;
05894 case ARG_OPTION:
05895 {
05896 OnigOptionType option = *((OnigOptionType* )bp);
05897 bp += SIZE_OPTION;
05898 fprintf(f, ":%d", option);
05899 }
05900 break;
05901
05902 case ARG_STATE_CHECK:
05903 scn = *((StateCheckNumType* )bp);
05904 bp += SIZE_STATE_CHECK_NUM;
05905 fprintf(f, ":%d", scn);
05906 break;
05907 }
05908 }
05909 else {
05910 switch (*bp++) {
05911 case OP_EXACT1:
05912 case OP_ANYCHAR_STAR_PEEK_NEXT:
05913 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05914 p_string(f, 1, bp++); break;
05915 case OP_EXACT2:
05916 p_string(f, 2, bp); bp += 2; break;
05917 case OP_EXACT3:
05918 p_string(f, 3, bp); bp += 3; break;
05919 case OP_EXACT4:
05920 p_string(f, 4, bp); bp += 4; break;
05921 case OP_EXACT5:
05922 p_string(f, 5, bp); bp += 5; break;
05923 case OP_EXACTN:
05924 GET_LENGTH_INC(len, bp);
05925 p_len_string(f, len, 1, bp);
05926 bp += len;
05927 break;
05928
05929 case OP_EXACTMB2N1:
05930 p_string(f, 2, bp); bp += 2; break;
05931 case OP_EXACTMB2N2:
05932 p_string(f, 4, bp); bp += 4; break;
05933 case OP_EXACTMB2N3:
05934 p_string(f, 6, bp); bp += 6; break;
05935 case OP_EXACTMB2N:
05936 GET_LENGTH_INC(len, bp);
05937 p_len_string(f, len, 2, bp);
05938 bp += len * 2;
05939 break;
05940 case OP_EXACTMB3N:
05941 GET_LENGTH_INC(len, bp);
05942 p_len_string(f, len, 3, bp);
05943 bp += len * 3;
05944 break;
05945 case OP_EXACTMBN:
05946 {
05947 int mb_len;
05948
05949 GET_LENGTH_INC(mb_len, bp);
05950 GET_LENGTH_INC(len, bp);
05951 fprintf(f, ":%d:%d:", mb_len, len);
05952 n = len * mb_len;
05953 while (n-- > 0) { fputc(*bp++, f); }
05954 }
05955 break;
05956
05957 case OP_EXACT1_IC:
05958 len = enclen(enc, bp, bpend);
05959 p_string(f, len, bp);
05960 bp += len;
05961 break;
05962 case OP_EXACTN_IC:
05963 GET_LENGTH_INC(len, bp);
05964 p_len_string(f, len, 1, bp);
05965 bp += len;
05966 break;
05967
05968 case OP_CCLASS:
05969 n = bitset_on_num((BitSetRef )bp);
05970 bp += SIZE_BITSET;
05971 fprintf(f, ":%d", n);
05972 break;
05973
05974 case OP_CCLASS_NOT:
05975 n = bitset_on_num((BitSetRef )bp);
05976 bp += SIZE_BITSET;
05977 fprintf(f, ":%d", n);
05978 break;
05979
05980 case OP_CCLASS_MB:
05981 case OP_CCLASS_MB_NOT:
05982 GET_LENGTH_INC(len, bp);
05983 q = bp;
05984 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
05985 ALIGNMENT_RIGHT(q);
05986 #endif
05987 GET_CODE_POINT(code, q);
05988 bp += len;
05989 fprintf(f, ":%d:%d", (int )code, len);
05990 break;
05991
05992 case OP_CCLASS_MIX:
05993 case OP_CCLASS_MIX_NOT:
05994 n = bitset_on_num((BitSetRef )bp);
05995 bp += SIZE_BITSET;
05996 GET_LENGTH_INC(len, bp);
05997 q = bp;
05998 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
05999 ALIGNMENT_RIGHT(q);
06000 #endif
06001 GET_CODE_POINT(code, q);
06002 bp += len;
06003 fprintf(f, ":%d:%d:%d", n, (int )code, len);
06004 break;
06005
06006 case OP_CCLASS_NODE:
06007 {
06008 CClassNode *cc;
06009
06010 GET_POINTER_INC(cc, bp);
06011 n = bitset_on_num(cc->bs);
06012 fprintf(f, ":%u:%d", (unsigned int )cc, n);
06013 }
06014 break;
06015
06016 case OP_BACKREFN_IC:
06017 mem = *((MemNumType* )bp);
06018 bp += SIZE_MEMNUM;
06019 fprintf(f, ":%d", mem);
06020 break;
06021
06022 case OP_BACKREF_MULTI_IC:
06023 case OP_BACKREF_MULTI:
06024 fputs(" ", f);
06025 GET_LENGTH_INC(len, bp);
06026 for (i = 0; i < len; i++) {
06027 GET_MEMNUM_INC(mem, bp);
06028 if (i > 0) fputs(", ", f);
06029 fprintf(f, "%d", mem);
06030 }
06031 break;
06032
06033 case OP_BACKREF_WITH_LEVEL:
06034 {
06035 OnigOptionType option;
06036 LengthType level;
06037
06038 GET_OPTION_INC(option, bp);
06039 fprintf(f, ":%d", option);
06040 GET_LENGTH_INC(level, bp);
06041 fprintf(f, ":%d", level);
06042
06043 fputs(" ", f);
06044 GET_LENGTH_INC(len, bp);
06045 for (i = 0; i < len; i++) {
06046 GET_MEMNUM_INC(mem, bp);
06047 if (i > 0) fputs(", ", f);
06048 fprintf(f, "%d", mem);
06049 }
06050 }
06051 break;
06052
06053 case OP_REPEAT:
06054 case OP_REPEAT_NG:
06055 {
06056 mem = *((MemNumType* )bp);
06057 bp += SIZE_MEMNUM;
06058 addr = *((RelAddrType* )bp);
06059 bp += SIZE_RELADDR;
06060 fprintf(f, ":%d:%d", mem, addr);
06061 }
06062 break;
06063
06064 case OP_PUSH_OR_JUMP_EXACT1:
06065 case OP_PUSH_IF_PEEK_NEXT:
06066 addr = *((RelAddrType* )bp);
06067 bp += SIZE_RELADDR;
06068 fprintf(f, ":(%d)", addr);
06069 p_string(f, 1, bp);
06070 bp += 1;
06071 break;
06072
06073 case OP_LOOK_BEHIND:
06074 GET_LENGTH_INC(len, bp);
06075 fprintf(f, ":%d", len);
06076 break;
06077
06078 case OP_PUSH_LOOK_BEHIND_NOT:
06079 GET_RELADDR_INC(addr, bp);
06080 GET_LENGTH_INC(len, bp);
06081 fprintf(f, ":%d:(%d)", len, addr);
06082 break;
06083
06084 case OP_STATE_CHECK_PUSH:
06085 case OP_STATE_CHECK_PUSH_OR_JUMP:
06086 scn = *((StateCheckNumType* )bp);
06087 bp += SIZE_STATE_CHECK_NUM;
06088 addr = *((RelAddrType* )bp);
06089 bp += SIZE_RELADDR;
06090 fprintf(f, ":%d:(%d)", scn, addr);
06091 break;
06092
06093 default:
06094 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06095 *--bp);
06096 }
06097 }
06098 fputs("]", f);
06099 if (nextp) *nextp = bp;
06100 }
06101
06102 static void
06103 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06104 {
06105 int ncode;
06106 UChar* bp = reg->p;
06107 UChar* end = reg->p + reg->used;
06108
06109 fprintf(f, "code length: %d\n", reg->used);
06110
06111 ncode = 0;
06112 while (bp < end) {
06113 ncode++;
06114 if (bp > reg->p) {
06115 if (ncode % 5 == 0)
06116 fprintf(f, "\n");
06117 else
06118 fputs(" ", f);
06119 }
06120 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06121 }
06122
06123 fprintf(f, "\n");
06124 }
06125
06126 static void
06127 print_indent_tree(FILE* f, Node* node, int indent)
06128 {
06129 int i, type;
06130 int add = 3;
06131 UChar* p;
06132
06133 Indent(f, indent);
06134 if (IS_NULL(node)) {
06135 fprintf(f, "ERROR: null node!!!\n");
06136 exit (0);
06137 }
06138
06139 type = NTYPE(node);
06140 switch (type) {
06141 case NT_LIST:
06142 case NT_ALT:
06143 if (NTYPE(node) == NT_LIST)
06144 fprintf(f, "<list:%x>\n", (int )node);
06145 else
06146 fprintf(f, "<alt:%x>\n", (int )node);
06147
06148 print_indent_tree(f, NCAR(node), indent + add);
06149 while (IS_NOT_NULL(node = NCDR(node))) {
06150 if (NTYPE(node) != type) {
06151 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06152 exit(0);
06153 }
06154 print_indent_tree(f, NCAR(node), indent + add);
06155 }
06156 break;
06157
06158 case NT_STR:
06159 fprintf(f, "<string%s:%x>",
06160 (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
06161 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06162 if (*p >= 0x20 && *p < 0x7f)
06163 fputc(*p, f);
06164 else {
06165 fprintf(f, " 0x%02x", *p);
06166 }
06167 }
06168 break;
06169
06170 case NT_CCLASS:
06171 fprintf(f, "<cclass:%x>", (int )node);
06172 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06173 if (NCCLASS(node)->mbuf) {
06174 BBuf* bbuf = NCCLASS(node)->mbuf;
06175 for (i = 0; i < bbuf->used; i++) {
06176 if (i > 0) fprintf(f, ",");
06177 fprintf(f, "%0x", bbuf->p[i]);
06178 }
06179 }
06180 break;
06181
06182 case NT_CTYPE:
06183 fprintf(f, "<ctype:%x> ", (int )node);
06184 switch (NCTYPE(node)->ctype) {
06185 case ONIGENC_CTYPE_WORD:
06186 if (NCTYPE(node)->not != 0)
06187 fputs("not word", f);
06188 else
06189 fputs("word", f);
06190 break;
06191
06192 default:
06193 fprintf(f, "ERROR: undefined ctype.\n");
06194 exit(0);
06195 }
06196 break;
06197
06198 case NT_CANY:
06199 fprintf(f, "<anychar:%x>", (int )node);
06200 break;
06201
06202 case NT_ANCHOR:
06203 fprintf(f, "<anchor:%x> ", (int )node);
06204 switch (NANCHOR(node)->type) {
06205 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
06206 case ANCHOR_END_BUF: fputs("end buf", f); break;
06207 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
06208 case ANCHOR_END_LINE: fputs("end line", f); break;
06209 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
06210 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06211
06212 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
06213 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
06214 #ifdef USE_WORD_BEGIN_END
06215 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
06216 case ANCHOR_WORD_END: fputs("word end", f); break;
06217 #endif
06218 case ANCHOR_PREC_READ: fputs("prec read", f); break;
06219 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); break;
06220 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); break;
06221 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
06222
06223 default:
06224 fprintf(f, "ERROR: undefined anchor type.\n");
06225 break;
06226 }
06227 break;
06228
06229 case NT_BREF:
06230 {
06231 int* p;
06232 BRefNode* br = NBREF(node);
06233 p = BACKREFS_P(br);
06234 fprintf(f, "<backref:%x>", (int )node);
06235 for (i = 0; i < br->back_num; i++) {
06236 if (i > 0) fputs(", ", f);
06237 fprintf(f, "%d", p[i]);
06238 }
06239 }
06240 break;
06241
06242 #ifdef USE_SUBEXP_CALL
06243 case NT_CALL:
06244 {
06245 CallNode* cn = NCALL(node);
06246 fprintf(f, "<call:%x>", (int )node);
06247 p_string(f, cn->name_end - cn->name, cn->name);
06248 }
06249 break;
06250 #endif
06251
06252 case NT_QTFR:
06253 fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
06254 NQTFR(node)->lower, NQTFR(node)->upper,
06255 (NQTFR(node)->greedy ? "" : "?"));
06256 print_indent_tree(f, NQTFR(node)->target, indent + add);
06257 break;
06258
06259 case NT_ENCLOSE:
06260 fprintf(f, "<enclose:%x> ", (int )node);
06261 switch (NENCLOSE(node)->type) {
06262 case ENCLOSE_OPTION:
06263 fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06264 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06265 break;
06266 case ENCLOSE_MEMORY:
06267 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06268 break;
06269 case ENCLOSE_STOP_BACKTRACK:
06270 fprintf(f, "stop-bt");
06271 break;
06272
06273 default:
06274 break;
06275 }
06276 fprintf(f, "\n");
06277 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06278 break;
06279
06280 default:
06281 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06282 break;
06283 }
06284
06285 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06286 type != NT_ENCLOSE)
06287 fprintf(f, "\n");
06288 fflush(f);
06289 }
06290 #endif
06291
06292 #ifdef ONIG_DEBUG_PARSE_TREE
06293 static void
06294 print_tree(FILE* f, Node* node)
06295 {
06296 print_indent_tree(f, node, 0);
06297 }
06298 #endif
06299