• Main Page
  • Modules
  • Data Structures
  • Files
  • File List
  • Globals

regcomp.c

Go to the documentation of this file.
00001 /**********************************************************************
00002   regcomp.c -  Oniguruma (regular expression library)
00003 **********************************************************************/
00004 /*-
00005  * Copyright (c) 2002-2008  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
00006  * All rights reserved.
00007  *
00008  * Redistribution and use in source and binary forms, with or without
00009  * modification, are permitted provided that the following conditions
00010  * are met:
00011  * 1. Redistributions of source code must retain the above copyright
00012  *    notice, this list of conditions and the following disclaimer.
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in the
00015  *    documentation and/or other materials provided with the distribution.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00018  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00019  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00020  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00021  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00022  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00023  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00024  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00025  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00026  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00027  * SUCH DAMAGE.
00028  */
00029 
00030 #include "regparse.h"
00031 
00032 OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
00033 
00034 extern OnigCaseFoldType
00035 onig_get_default_case_fold_flag(void)
00036 {
00037   return OnigDefaultCaseFoldFlag;
00038 }
00039 
00040 extern int
00041 onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
00042 {
00043   OnigDefaultCaseFoldFlag = case_fold_flag;
00044   return 0;
00045 }
00046 
00047 
00048 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
00049 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
00050 #endif
00051 
00052 static UChar*
00053 str_dup(UChar* s, UChar* end)
00054 {
00055   ptrdiff_t len = end - s;
00056 
00057   if (len > 0) {
00058     UChar* r = (UChar* )xmalloc(len + 1);
00059     CHECK_NULL_RETURN(r);
00060     xmemcpy(r, s, len);
00061     r[len] = (UChar )0;
00062     return r;
00063   }
00064   else return NULL;
00065 }
00066 
00067 static void
00068 swap_node(Node* a, Node* b)
00069 {
00070   Node c;
00071   c = *a; *a = *b; *b = c;
00072 
00073   if (NTYPE(a) == NT_STR) {
00074     StrNode* sn = NSTR(a);
00075     if (sn->capa == 0) {
00076       size_t len = sn->end - sn->s;
00077       sn->s   = sn->buf;
00078       sn->end = sn->s + len;
00079     }
00080   }
00081 
00082   if (NTYPE(b) == NT_STR) {
00083     StrNode* sn = NSTR(b);
00084     if (sn->capa == 0) {
00085       size_t len = sn->end - sn->s;
00086       sn->s   = sn->buf;
00087       sn->end = sn->s + len;
00088     }
00089   }
00090 }
00091 
00092 static OnigDistance
00093 distance_add(OnigDistance d1, OnigDistance d2)
00094 {
00095   if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
00096     return ONIG_INFINITE_DISTANCE;
00097   else {
00098     if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
00099     else return ONIG_INFINITE_DISTANCE;
00100   }
00101 }
00102 
00103 static OnigDistance
00104 distance_multiply(OnigDistance d, int m)
00105 {
00106   if (m == 0) return 0;
00107 
00108   if (d < ONIG_INFINITE_DISTANCE / m)
00109     return d * m;
00110   else
00111     return ONIG_INFINITE_DISTANCE;
00112 }
00113 
00114 static int
00115 bitset_is_empty(BitSetRef bs)
00116 {
00117   int i;
00118   for (i = 0; i < (int )BITSET_SIZE; i++) {
00119     if (bs[i] != 0) return 0;
00120   }
00121   return 1;
00122 }
00123 
00124 #ifdef ONIG_DEBUG
00125 static int
00126 bitset_on_num(BitSetRef bs)
00127 {
00128   int i, n;
00129 
00130   n = 0;
00131   for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
00132     if (BITSET_AT(bs, i)) n++;
00133   }
00134   return n;
00135 }
00136 #endif
00137 
00138 extern int
00139 onig_bbuf_init(BBuf* buf, int size)
00140 {
00141   if (size <= 0) {
00142     size   = 0;
00143     buf->p = NULL;
00144   }
00145   else {
00146     buf->p = (UChar* )xmalloc(size);
00147     if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
00148   }
00149 
00150   buf->alloc = size;
00151   buf->used  = 0;
00152   return 0;
00153 }
00154 
00155 
00156 #ifdef USE_SUBEXP_CALL
00157 
00158 static int
00159 unset_addr_list_init(UnsetAddrList* uslist, int size)
00160 {
00161   UnsetAddr* p;
00162 
00163   p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
00164   CHECK_NULL_RETURN_MEMERR(p);
00165   uslist->num   = 0;
00166   uslist->alloc = size;
00167   uslist->us    = p;
00168   return 0;
00169 }
00170 
00171 static void
00172 unset_addr_list_end(UnsetAddrList* uslist)
00173 {
00174   if (IS_NOT_NULL(uslist->us))
00175     xfree(uslist->us);
00176 }
00177 
00178 static int
00179 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
00180 {
00181   UnsetAddr* p;
00182   int size;
00183 
00184   if (uslist->num >= uslist->alloc) {
00185     size = uslist->alloc * 2;
00186     p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
00187     CHECK_NULL_RETURN_MEMERR(p);
00188     uslist->alloc = size;
00189     uslist->us    = p;
00190   }
00191 
00192   uslist->us[uslist->num].offset = offset;
00193   uslist->us[uslist->num].target = node;
00194   uslist->num++;
00195   return 0;
00196 }
00197 #endif /* USE_SUBEXP_CALL */
00198 
00199 
00200 static int
00201 add_opcode(regex_t* reg, int opcode)
00202 {
00203   BBUF_ADD1(reg, opcode);
00204   return 0;
00205 }
00206 
00207 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00208 static int
00209 add_state_check_num(regex_t* reg, int num)
00210 {
00211   StateCheckNumType n = (StateCheckNumType )num;
00212 
00213   BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
00214   return 0;
00215 }
00216 #endif
00217 
00218 static int
00219 add_rel_addr(regex_t* reg, int addr)
00220 {
00221   RelAddrType ra = (RelAddrType )addr;
00222 
00223   BBUF_ADD(reg, &ra, SIZE_RELADDR);
00224   return 0;
00225 }
00226 
00227 static int
00228 add_abs_addr(regex_t* reg, int addr)
00229 {
00230   AbsAddrType ra = (AbsAddrType )addr;
00231 
00232   BBUF_ADD(reg, &ra, SIZE_ABSADDR);
00233   return 0;
00234 }
00235 
00236 static int
00237 add_length(regex_t* reg, int len)
00238 {
00239   LengthType l = (LengthType )len;
00240 
00241   BBUF_ADD(reg, &l, SIZE_LENGTH);
00242   return 0;
00243 }
00244 
00245 static int
00246 add_mem_num(regex_t* reg, int num)
00247 {
00248   MemNumType n = (MemNumType )num;
00249 
00250   BBUF_ADD(reg, &n, SIZE_MEMNUM);
00251   return 0;
00252 }
00253 
00254 static int
00255 add_pointer(regex_t* reg, void* addr)
00256 {
00257   PointerType ptr = (PointerType )addr;
00258 
00259   BBUF_ADD(reg, &ptr, SIZE_POINTER);
00260   return 0;
00261 }
00262 
00263 static int
00264 add_option(regex_t* reg, OnigOptionType option)
00265 {
00266   BBUF_ADD(reg, &option, SIZE_OPTION);
00267   return 0;
00268 }
00269 
00270 static int
00271 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
00272 {
00273   int r;
00274 
00275   r = add_opcode(reg, opcode);
00276   if (r) return r;
00277   r = add_rel_addr(reg, addr);
00278   return r;
00279 }
00280 
00281 static int
00282 add_bytes(regex_t* reg, UChar* bytes, int len)
00283 {
00284   BBUF_ADD(reg, bytes, len);
00285   return 0;
00286 }
00287 
00288 static int
00289 add_bitset(regex_t* reg, BitSetRef bs)
00290 {
00291   BBUF_ADD(reg, bs, SIZE_BITSET);
00292   return 0;
00293 }
00294 
00295 static int
00296 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
00297 {
00298   int r;
00299 
00300   r = add_opcode(reg, opcode);
00301   if (r) return r;
00302   r = add_option(reg, option);
00303   return r;
00304 }
00305 
00306 static int compile_length_tree(Node* node, regex_t* reg);
00307 static int compile_tree(Node* node, regex_t* reg);
00308 
00309 
00310 #define IS_NEED_STR_LEN_OP_EXACT(op) \
00311    ((op) == OP_EXACTN    || (op) == OP_EXACTMB2N ||\
00312     (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN  || (op) == OP_EXACTN_IC)
00313 
00314 static int
00315 select_str_opcode(int mb_len, int str_len, int ignore_case)
00316 {
00317   int op;
00318 
00319   if (ignore_case) {
00320     switch (str_len) {
00321     case 1:  op = OP_EXACT1_IC; break;
00322     default: op = OP_EXACTN_IC; break;
00323     }
00324   }
00325   else {
00326     switch (mb_len) {
00327     case 1:
00328       switch (str_len) {
00329       case 1:  op = OP_EXACT1; break;
00330       case 2:  op = OP_EXACT2; break;
00331       case 3:  op = OP_EXACT3; break;
00332       case 4:  op = OP_EXACT4; break;
00333       case 5:  op = OP_EXACT5; break;
00334       default: op = OP_EXACTN; break;
00335       }
00336       break;
00337 
00338     case 2:
00339       switch (str_len) {
00340       case 1:  op = OP_EXACTMB2N1; break;
00341       case 2:  op = OP_EXACTMB2N2; break;
00342       case 3:  op = OP_EXACTMB2N3; break;
00343       default: op = OP_EXACTMB2N;  break;
00344       }
00345       break;
00346 
00347     case 3:
00348       op = OP_EXACTMB3N;
00349       break;
00350 
00351     default:
00352       op = OP_EXACTMBN;
00353       break;
00354     }
00355   }
00356   return op;
00357 }
00358 
00359 static int
00360 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
00361 {
00362   int r;
00363   int saved_num_null_check = reg->num_null_check;
00364 
00365   if (empty_info != 0) {
00366     r = add_opcode(reg, OP_NULL_CHECK_START);
00367     if (r) return r;
00368     r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
00369     if (r) return r;
00370     reg->num_null_check++;
00371   }
00372 
00373   r = compile_tree(node, reg);
00374   if (r) return r;
00375 
00376   if (empty_info != 0) {
00377     if (empty_info == NQ_TARGET_IS_EMPTY)
00378       r = add_opcode(reg, OP_NULL_CHECK_END);
00379     else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
00380       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
00381     else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
00382       r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
00383 
00384     if (r) return r;
00385     r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
00386   }
00387   return r;
00388 }
00389 
00390 #ifdef USE_SUBEXP_CALL
00391 static int
00392 compile_call(CallNode* node, regex_t* reg)
00393 {
00394   int r;
00395 
00396   r = add_opcode(reg, OP_CALL);
00397   if (r) return r;
00398   r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
00399                           node->target);
00400   if (r) return r;
00401   r = add_abs_addr(reg, 0 /*dummy addr.*/);
00402   return r;
00403 }
00404 #endif
00405 
00406 static int
00407 compile_tree_n_times(Node* node, int n, regex_t* reg)
00408 {
00409   int i, r;
00410 
00411   for (i = 0; i < n; i++) {
00412     r = compile_tree(node, reg);
00413     if (r) return r;
00414   }
00415   return 0;
00416 }
00417 
00418 static int
00419 add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
00420                           regex_t* reg ARG_UNUSED, int ignore_case)
00421 {
00422   int len;
00423   int op = select_str_opcode(mb_len, str_len, ignore_case);
00424 
00425   len = SIZE_OPCODE;
00426 
00427   if (op == OP_EXACTMBN)  len += SIZE_LENGTH;
00428   if (IS_NEED_STR_LEN_OP_EXACT(op))
00429     len += SIZE_LENGTH;
00430 
00431   len += mb_len * str_len;
00432   return len;
00433 }
00434 
00435 static int
00436 add_compile_string(UChar* s, int mb_len, int str_len,
00437                    regex_t* reg, int ignore_case)
00438 {
00439   int op = select_str_opcode(mb_len, str_len, ignore_case);
00440   add_opcode(reg, op);
00441 
00442   if (op == OP_EXACTMBN)
00443     add_length(reg, mb_len);
00444 
00445   if (IS_NEED_STR_LEN_OP_EXACT(op)) {
00446     if (op == OP_EXACTN_IC)
00447       add_length(reg, mb_len * str_len);
00448     else
00449       add_length(reg, str_len);
00450   }
00451 
00452   add_bytes(reg, s, mb_len * str_len);
00453   return 0;
00454 }
00455 
00456 
00457 static int
00458 compile_length_string_node(Node* node, regex_t* reg)
00459 {
00460   int rlen, r, len, prev_len, slen, ambig;
00461   OnigEncoding enc = reg->enc;
00462   UChar *p, *prev;
00463   StrNode* sn;
00464 
00465   sn = NSTR(node);
00466   if (sn->end <= sn->s)
00467     return 0;
00468 
00469   ambig = NSTRING_IS_AMBIG(node);
00470 
00471   p = prev = sn->s;
00472   prev_len = enclen(enc, p, sn->end);
00473   p += prev_len;
00474   slen = 1;
00475   rlen = 0;
00476 
00477   for (; p < sn->end; ) {
00478     len = enclen(enc, p, sn->end);
00479     if (len == prev_len) {
00480       slen++;
00481     }
00482     else {
00483       r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00484       rlen += r;
00485       prev = p;
00486       slen = 1;
00487       prev_len = len;
00488     }
00489     p += len;
00490   }
00491   r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
00492   rlen += r;
00493   return rlen;
00494 }
00495 
00496 static int
00497 compile_length_string_raw_node(StrNode* sn, regex_t* reg)
00498 {
00499   if (sn->end <= sn->s)
00500     return 0;
00501 
00502   return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00503 }
00504 
00505 static int
00506 compile_string_node(Node* node, regex_t* reg)
00507 {
00508   int r, len, prev_len, slen, ambig;
00509   OnigEncoding enc = reg->enc;
00510   UChar *p, *prev, *end;
00511   StrNode* sn;
00512 
00513   sn = NSTR(node);
00514   if (sn->end <= sn->s)
00515     return 0;
00516 
00517   end = sn->end;
00518   ambig = NSTRING_IS_AMBIG(node);
00519 
00520   p = prev = sn->s;
00521   prev_len = enclen(enc, p, end);
00522   p += prev_len;
00523   slen = 1;
00524 
00525   for (; p < end; ) {
00526     len = enclen(enc, p, end);
00527     if (len == prev_len) {
00528       slen++;
00529     }
00530     else {
00531       r = add_compile_string(prev, prev_len, slen, reg, ambig);
00532       if (r) return r;
00533 
00534       prev  = p;
00535       slen  = 1;
00536       prev_len = len;
00537     }
00538 
00539     p += len;
00540   }
00541   return add_compile_string(prev, prev_len, slen, reg, ambig);
00542 }
00543 
00544 static int
00545 compile_string_raw_node(StrNode* sn, regex_t* reg)
00546 {
00547   if (sn->end <= sn->s)
00548     return 0;
00549 
00550   return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
00551 }
00552 
00553 static int
00554 add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
00555 {
00556 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00557   add_length(reg, mbuf->used);
00558   return add_bytes(reg, mbuf->p, mbuf->used);
00559 #else
00560   int r, pad_size;
00561   UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
00562 
00563   GET_ALIGNMENT_PAD_SIZE(p, pad_size);
00564   add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
00565   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00566 
00567   r = add_bytes(reg, mbuf->p, mbuf->used);
00568 
00569   /* padding for return value from compile_length_cclass_node() to be fix. */
00570   pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
00571   if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
00572   return r;
00573 #endif
00574 }
00575 
00576 static int
00577 compile_length_cclass_node(CClassNode* cc, regex_t* reg)
00578 {
00579   int len;
00580 
00581   if (IS_NCCLASS_SHARE(cc)) {
00582     len = SIZE_OPCODE + SIZE_POINTER;
00583     return len;
00584   }
00585 
00586   if (IS_NULL(cc->mbuf)) {
00587     len = SIZE_OPCODE + SIZE_BITSET;
00588   }
00589   else {
00590     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00591       len = SIZE_OPCODE;
00592     }
00593     else {
00594       len = SIZE_OPCODE + SIZE_BITSET;
00595     }
00596 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
00597     len += SIZE_LENGTH + cc->mbuf->used;
00598 #else
00599     len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
00600 #endif
00601   }
00602 
00603   return len;
00604 }
00605 
00606 static int
00607 compile_cclass_node(CClassNode* cc, regex_t* reg)
00608 {
00609   int r;
00610 
00611   if (IS_NCCLASS_SHARE(cc)) {
00612     add_opcode(reg, OP_CCLASS_NODE);
00613     r = add_pointer(reg, cc);
00614     return r;
00615   }
00616 
00617   if (IS_NULL(cc->mbuf)) {
00618     if (IS_NCCLASS_NOT(cc))
00619       add_opcode(reg, OP_CCLASS_NOT);
00620     else
00621       add_opcode(reg, OP_CCLASS);
00622 
00623     r = add_bitset(reg, cc->bs);
00624   }
00625   else {
00626     if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
00627       if (IS_NCCLASS_NOT(cc))
00628         add_opcode(reg, OP_CCLASS_MB_NOT);
00629       else
00630         add_opcode(reg, OP_CCLASS_MB);
00631 
00632       r = add_multi_byte_cclass(cc->mbuf, reg);
00633     }
00634     else {
00635       if (IS_NCCLASS_NOT(cc))
00636         add_opcode(reg, OP_CCLASS_MIX_NOT);
00637       else
00638         add_opcode(reg, OP_CCLASS_MIX);
00639 
00640       r = add_bitset(reg, cc->bs);
00641       if (r) return r;
00642       r = add_multi_byte_cclass(cc->mbuf, reg);
00643     }
00644   }
00645 
00646   return r;
00647 }
00648 
00649 static int
00650 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
00651 {
00652 #define REPEAT_RANGE_ALLOC  4
00653 
00654   OnigRepeatRange* p;
00655 
00656   if (reg->repeat_range_alloc == 0) {
00657     p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
00658     CHECK_NULL_RETURN_MEMERR(p);
00659     reg->repeat_range = p;
00660     reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
00661   }
00662   else if (reg->repeat_range_alloc <= id) {
00663     int n;
00664     n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
00665     p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
00666                                     sizeof(OnigRepeatRange) * n);
00667     CHECK_NULL_RETURN_MEMERR(p);
00668     reg->repeat_range = p;
00669     reg->repeat_range_alloc = n;
00670   }
00671   else {
00672     p = reg->repeat_range;
00673   }
00674 
00675   p[id].lower = lower;
00676   p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
00677   return 0;
00678 }
00679 
00680 static int
00681 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
00682                           regex_t* reg)
00683 {
00684   int r;
00685   int num_repeat = reg->num_repeat;
00686 
00687   r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
00688   if (r) return r;
00689   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00690   reg->num_repeat++;
00691   if (r) return r;
00692   r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
00693   if (r) return r;
00694 
00695   r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
00696   if (r) return r;
00697 
00698   r = compile_tree_empty_check(qn->target, reg, empty_info);
00699   if (r) return r;
00700 
00701   if (
00702 #ifdef USE_SUBEXP_CALL
00703       reg->num_call > 0 ||
00704 #endif
00705       IS_QUANTIFIER_IN_REPEAT(qn)) {
00706     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
00707   }
00708   else {
00709     r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
00710   }
00711   if (r) return r;
00712   r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
00713   return r;
00714 }
00715 
00716 static int
00717 is_anychar_star_quantifier(QtfrNode* qn)
00718 {
00719   if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
00720       NTYPE(qn->target) == NT_CANY)
00721     return 1;
00722   else
00723     return 0;
00724 }
00725 
00726 #define QUANTIFIER_EXPAND_LIMIT_SIZE   50
00727 #define CKN_ON   (ckn > 0)
00728 
00729 #ifdef USE_COMBINATION_EXPLOSION_CHECK
00730 
00731 static int
00732 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00733 {
00734   int len, mod_tlen, cklen;
00735   int ckn;
00736   int infinite = IS_REPEAT_INFINITE(qn->upper);
00737   int empty_info = qn->target_empty_info;
00738   int tlen = compile_length_tree(qn->target, reg);
00739 
00740   if (tlen < 0) return tlen;
00741 
00742   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00743 
00744   cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
00745 
00746   /* anychar repeat */
00747   if (NTYPE(qn->target) == NT_CANY) {
00748     if (qn->greedy && infinite) {
00749       if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
00750         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
00751       else
00752         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
00753     }
00754   }
00755 
00756   if (empty_info != 0)
00757     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00758   else
00759     mod_tlen = tlen;
00760 
00761   if (infinite && qn->lower <= 1) {
00762     if (qn->greedy) {
00763       if (qn->lower == 1)
00764         len = SIZE_OP_JUMP;
00765       else
00766         len = 0;
00767 
00768       len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
00769     }
00770     else {
00771       if (qn->lower == 0)
00772         len = SIZE_OP_JUMP;
00773       else
00774         len = 0;
00775 
00776       len += mod_tlen + SIZE_OP_PUSH + cklen;
00777     }
00778   }
00779   else if (qn->upper == 0) {
00780     if (qn->is_refered != 0) /* /(?<n>..){0}/ */
00781       len = SIZE_OP_JUMP + tlen;
00782     else
00783       len = 0;
00784   }
00785   else if (qn->upper == 1 && qn->greedy) {
00786     if (qn->lower == 0) {
00787       if (CKN_ON) {
00788         len = SIZE_OP_STATE_CHECK_PUSH + tlen;
00789       }
00790       else {
00791         len = SIZE_OP_PUSH + tlen;
00792       }
00793     }
00794     else {
00795       len = tlen;
00796     }
00797   }
00798   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00799     len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
00800   }
00801   else {
00802     len = SIZE_OP_REPEAT_INC
00803         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
00804     if (CKN_ON)
00805       len += SIZE_OP_STATE_CHECK;
00806   }
00807 
00808   return len;
00809 }
00810 
00811 static int
00812 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
00813 {
00814   int r, mod_tlen;
00815   int ckn;
00816   int infinite = IS_REPEAT_INFINITE(qn->upper);
00817   int empty_info = qn->target_empty_info;
00818   int tlen = compile_length_tree(qn->target, reg);
00819 
00820   if (tlen < 0) return tlen;
00821 
00822   ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
00823 
00824   if (is_anychar_star_quantifier(qn)) {
00825     r = compile_tree_n_times(qn->target, qn->lower, reg);
00826     if (r) return r;
00827     if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
00828       if (IS_MULTILINE(reg->options))
00829         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
00830       else
00831         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
00832       if (r) return r;
00833       if (CKN_ON) {
00834         r = add_state_check_num(reg, ckn);
00835         if (r) return r;
00836       }
00837 
00838       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
00839     }
00840     else {
00841       if (IS_MULTILINE(reg->options)) {
00842         r = add_opcode(reg, (CKN_ON ?
00843                                OP_STATE_CHECK_ANYCHAR_ML_STAR
00844                              : OP_ANYCHAR_ML_STAR));
00845       }
00846       else {
00847         r = add_opcode(reg, (CKN_ON ?
00848                                OP_STATE_CHECK_ANYCHAR_STAR
00849                              : OP_ANYCHAR_STAR));
00850       }
00851       if (r) return r;
00852       if (CKN_ON)
00853         r = add_state_check_num(reg, ckn);
00854 
00855       return r;
00856     }
00857   }
00858 
00859   if (empty_info != 0)
00860     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00861   else
00862     mod_tlen = tlen;
00863 
00864   if (infinite && qn->lower <= 1) {
00865     if (qn->greedy) {
00866       if (qn->lower == 1) {
00867         r = add_opcode_rel_addr(reg, OP_JUMP,
00868                         (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
00869         if (r) return r;
00870       }
00871 
00872       if (CKN_ON) {
00873         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00874         if (r) return r;
00875         r = add_state_check_num(reg, ckn);
00876         if (r) return r;
00877         r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
00878       }
00879       else {
00880         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
00881       }
00882       if (r) return r;
00883       r = compile_tree_empty_check(qn->target, reg, empty_info);
00884       if (r) return r;
00885       r = add_opcode_rel_addr(reg, OP_JUMP,
00886               -(mod_tlen + (int )SIZE_OP_JUMP
00887                 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
00888     }
00889     else {
00890       if (qn->lower == 0) {
00891         r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
00892         if (r) return r;
00893       }
00894       r = compile_tree_empty_check(qn->target, reg, empty_info);
00895       if (r) return r;
00896       if (CKN_ON) {
00897         r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
00898         if (r) return r;
00899         r = add_state_check_num(reg, ckn);
00900         if (r) return r;
00901         r = add_rel_addr(reg,
00902                  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
00903       }
00904       else
00905         r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
00906     }
00907   }
00908   else if (qn->upper == 0) {
00909     if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
00910       r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00911       if (r) return r;
00912       r = compile_tree(qn->target, reg);
00913     }
00914     else
00915       r = 0;
00916   }
00917   else if (qn->upper == 1 && qn->greedy) {
00918     if (qn->lower == 0) {
00919       if (CKN_ON) {
00920         r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00921         if (r) return r;
00922         r = add_state_check_num(reg, ckn);
00923         if (r) return r;
00924         r = add_rel_addr(reg, tlen);
00925       }
00926       else {
00927         r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
00928       }
00929       if (r) return r;
00930     }
00931 
00932     r = compile_tree(qn->target, reg);
00933   }
00934   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
00935     if (CKN_ON) {
00936       r = add_opcode(reg, OP_STATE_CHECK_PUSH);
00937       if (r) return r;
00938       r = add_state_check_num(reg, ckn);
00939       if (r) return r;
00940       r = add_rel_addr(reg, SIZE_OP_JUMP);
00941     }
00942     else {
00943       r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
00944     }
00945 
00946     if (r) return r;
00947     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
00948     if (r) return r;
00949     r = compile_tree(qn->target, reg);
00950   }
00951   else {
00952     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
00953     if (CKN_ON) {
00954       if (r) return r;
00955       r = add_opcode(reg, OP_STATE_CHECK);
00956       if (r) return r;
00957       r = add_state_check_num(reg, ckn);
00958     }
00959   }
00960   return r;
00961 }
00962 
00963 #else /* USE_COMBINATION_EXPLOSION_CHECK */
00964 
00965 static int
00966 compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
00967 {
00968   int len, mod_tlen;
00969   int infinite = IS_REPEAT_INFINITE(qn->upper);
00970   int empty_info = qn->target_empty_info;
00971   int tlen = compile_length_tree(qn->target, reg);
00972 
00973   if (tlen < 0) return tlen;
00974 
00975   /* anychar repeat */
00976   if (NTYPE(qn->target) == NT_CANY) {
00977     if (qn->greedy && infinite) {
00978       if (IS_NOT_NULL(qn->next_head_exact))
00979         return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
00980       else
00981         return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
00982     }
00983   }
00984 
00985   if (empty_info != 0)
00986     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
00987   else
00988     mod_tlen = tlen;
00989 
00990   if (infinite &&
00991       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
00992     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
00993       len = SIZE_OP_JUMP;
00994     }
00995     else {
00996       len = tlen * qn->lower;
00997     }
00998 
00999     if (qn->greedy) {
01000       if (IS_NOT_NULL(qn->head_exact))
01001         len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
01002       else if (IS_NOT_NULL(qn->next_head_exact))
01003         len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
01004       else
01005         len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
01006     }
01007     else
01008       len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
01009   }
01010   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01011     len = SIZE_OP_JUMP + tlen;
01012   }
01013   else if (!infinite && qn->greedy &&
01014            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01015                                       <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01016     len = tlen * qn->lower;
01017     len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
01018   }
01019   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01020     len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
01021   }
01022   else {
01023     len = SIZE_OP_REPEAT_INC
01024         + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
01025   }
01026 
01027   return len;
01028 }
01029 
01030 static int
01031 compile_quantifier_node(QtfrNode* qn, regex_t* reg)
01032 {
01033   int i, r, mod_tlen;
01034   int infinite = IS_REPEAT_INFINITE(qn->upper);
01035   int empty_info = qn->target_empty_info;
01036   int tlen = compile_length_tree(qn->target, reg);
01037 
01038   if (tlen < 0) return tlen;
01039 
01040   if (is_anychar_star_quantifier(qn)) {
01041     r = compile_tree_n_times(qn->target, qn->lower, reg);
01042     if (r) return r;
01043     if (IS_NOT_NULL(qn->next_head_exact)) {
01044       if (IS_MULTILINE(reg->options))
01045         r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
01046       else
01047         r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
01048       if (r) return r;
01049       return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01050     }
01051     else {
01052       if (IS_MULTILINE(reg->options))
01053         return add_opcode(reg, OP_ANYCHAR_ML_STAR);
01054       else
01055         return add_opcode(reg, OP_ANYCHAR_STAR);
01056     }
01057   }
01058 
01059   if (empty_info != 0)
01060     mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
01061   else
01062     mod_tlen = tlen;
01063 
01064   if (infinite &&
01065       (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01066     if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
01067       if (qn->greedy) {
01068         if (IS_NOT_NULL(qn->head_exact))
01069           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
01070         else if (IS_NOT_NULL(qn->next_head_exact))
01071           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
01072         else
01073           r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
01074       }
01075       else {
01076         r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
01077       }
01078       if (r) return r;
01079     }
01080     else {
01081       r = compile_tree_n_times(qn->target, qn->lower, reg);
01082       if (r) return r;
01083     }
01084 
01085     if (qn->greedy) {
01086       if (IS_NOT_NULL(qn->head_exact)) {
01087         r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
01088                              mod_tlen + SIZE_OP_JUMP);
01089         if (r) return r;
01090         add_bytes(reg, NSTR(qn->head_exact)->s, 1);
01091         r = compile_tree_empty_check(qn->target, reg, empty_info);
01092         if (r) return r;
01093         r = add_opcode_rel_addr(reg, OP_JUMP,
01094         -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
01095       }
01096       else if (IS_NOT_NULL(qn->next_head_exact)) {
01097         r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
01098                                 mod_tlen + SIZE_OP_JUMP);
01099         if (r) return r;
01100         add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
01101         r = compile_tree_empty_check(qn->target, reg, empty_info);
01102         if (r) return r;
01103         r = add_opcode_rel_addr(reg, OP_JUMP,
01104           -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
01105       }
01106       else {
01107         r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
01108         if (r) return r;
01109         r = compile_tree_empty_check(qn->target, reg, empty_info);
01110         if (r) return r;
01111         r = add_opcode_rel_addr(reg, OP_JUMP,
01112                      -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
01113       }
01114     }
01115     else {
01116       r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
01117       if (r) return r;
01118       r = compile_tree_empty_check(qn->target, reg, empty_info);
01119       if (r) return r;
01120       r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
01121     }
01122   }
01123   else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
01124     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01125     if (r) return r;
01126     r = compile_tree(qn->target, reg);
01127   }
01128   else if (!infinite && qn->greedy &&
01129            (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
01130                                   <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
01131     int n = qn->upper - qn->lower;
01132 
01133     r = compile_tree_n_times(qn->target, qn->lower, reg);
01134     if (r) return r;
01135 
01136     for (i = 0; i < n; i++) {
01137       r = add_opcode_rel_addr(reg, OP_PUSH,
01138                            (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
01139       if (r) return r;
01140       r = compile_tree(qn->target, reg);
01141       if (r) return r;
01142     }
01143   }
01144   else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
01145     r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
01146     if (r) return r;
01147     r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
01148     if (r) return r;
01149     r = compile_tree(qn->target, reg);
01150   }
01151   else {
01152     r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
01153   }
01154   return r;
01155 }
01156 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
01157 
01158 static int
01159 compile_length_option_node(EncloseNode* node, regex_t* reg)
01160 {
01161   int tlen;
01162   OnigOptionType prev = reg->options;
01163 
01164   reg->options = node->option;
01165   tlen = compile_length_tree(node->target, reg);
01166   reg->options = prev;
01167 
01168   if (tlen < 0) return tlen;
01169 
01170   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01171     return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
01172            + tlen + SIZE_OP_SET_OPTION;
01173   }
01174   else
01175     return tlen;
01176 }
01177 
01178 static int
01179 compile_option_node(EncloseNode* node, regex_t* reg)
01180 {
01181   int r;
01182   OnigOptionType prev = reg->options;
01183 
01184   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01185     r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
01186     if (r) return r;
01187     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01188     if (r) return r;
01189     r = add_opcode(reg, OP_FAIL);
01190     if (r) return r;
01191   }
01192 
01193   reg->options = node->option;
01194   r = compile_tree(node->target, reg);
01195   reg->options = prev;
01196 
01197   if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
01198     if (r) return r;
01199     r = add_opcode_option(reg, OP_SET_OPTION, prev);
01200   }
01201   return r;
01202 }
01203 
01204 static int
01205 compile_length_enclose_node(EncloseNode* node, regex_t* reg)
01206 {
01207   int len;
01208   int tlen;
01209 
01210   if (node->type == ENCLOSE_OPTION)
01211     return compile_length_option_node(node, reg);
01212 
01213   if (node->target) {
01214     tlen = compile_length_tree(node->target, reg);
01215     if (tlen < 0) return tlen;
01216   }
01217   else
01218     tlen = 0;
01219 
01220   switch (node->type) {
01221   case ENCLOSE_MEMORY:
01222 #ifdef USE_SUBEXP_CALL
01223     if (IS_ENCLOSE_CALLED(node)) {
01224       len = SIZE_OP_MEMORY_START_PUSH + tlen
01225           + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
01226       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01227         len += (IS_ENCLOSE_RECURSION(node)
01228                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01229       else
01230         len += (IS_ENCLOSE_RECURSION(node)
01231                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01232     }
01233     else
01234 #endif
01235     {
01236       if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01237         len = SIZE_OP_MEMORY_START_PUSH;
01238       else
01239         len = SIZE_OP_MEMORY_START;
01240 
01241       len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
01242                      ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
01243     }
01244     break;
01245 
01246   case ENCLOSE_STOP_BACKTRACK:
01247     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01248       QtfrNode* qn = NQTFR(node->target);
01249       tlen = compile_length_tree(qn->target, reg);
01250       if (tlen < 0) return tlen;
01251 
01252       len = tlen * qn->lower
01253           + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
01254     }
01255     else {
01256       len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
01257     }
01258     break;
01259 
01260   default:
01261     return ONIGERR_TYPE_BUG;
01262     break;
01263   }
01264 
01265   return len;
01266 }
01267 
01268 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
01269 
01270 static int
01271 compile_enclose_node(EncloseNode* node, regex_t* reg)
01272 {
01273   int r, len;
01274 
01275   if (node->type == ENCLOSE_OPTION)
01276     return compile_option_node(node, reg);
01277 
01278   switch (node->type) {
01279   case ENCLOSE_MEMORY:
01280 #ifdef USE_SUBEXP_CALL
01281     if (IS_ENCLOSE_CALLED(node)) {
01282       r = add_opcode(reg, OP_CALL);
01283       if (r) return r;
01284       node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
01285       node->state |= NST_ADDR_FIXED;
01286       r = add_abs_addr(reg, (int )node->call_addr);
01287       if (r) return r;
01288       len = compile_length_tree(node->target, reg);
01289       len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
01290       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01291         len += (IS_ENCLOSE_RECURSION(node)
01292                 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
01293       else
01294         len += (IS_ENCLOSE_RECURSION(node)
01295                 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
01296 
01297       r = add_opcode_rel_addr(reg, OP_JUMP, len);
01298       if (r) return r;
01299     }
01300 #endif
01301     if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
01302       r = add_opcode(reg, OP_MEMORY_START_PUSH);
01303     else
01304       r = add_opcode(reg, OP_MEMORY_START);
01305     if (r) return r;
01306     r = add_mem_num(reg, node->regnum);
01307     if (r) return r;
01308     r = compile_tree(node->target, reg);
01309     if (r) return r;
01310 #ifdef USE_SUBEXP_CALL
01311     if (IS_ENCLOSE_CALLED(node)) {
01312       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01313         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01314                              ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
01315       else
01316         r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
01317                              ? OP_MEMORY_END_REC : OP_MEMORY_END));
01318 
01319       if (r) return r;
01320       r = add_mem_num(reg, node->regnum);
01321       if (r) return r;
01322       r = add_opcode(reg, OP_RETURN);
01323     }
01324     else
01325 #endif
01326     {
01327       if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
01328         r = add_opcode(reg, OP_MEMORY_END_PUSH);
01329       else
01330         r = add_opcode(reg, OP_MEMORY_END);
01331       if (r) return r;
01332       r = add_mem_num(reg, node->regnum);
01333     }
01334     break;
01335 
01336   case ENCLOSE_STOP_BACKTRACK:
01337     if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
01338       QtfrNode* qn = NQTFR(node->target);
01339       r = compile_tree_n_times(qn->target, qn->lower, reg);
01340       if (r) return r;
01341 
01342       len = compile_length_tree(qn->target, reg);
01343       if (len < 0) return len;
01344 
01345       r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
01346       if (r) return r;
01347       r = compile_tree(qn->target, reg);
01348       if (r) return r;
01349       r = add_opcode(reg, OP_POP);
01350       if (r) return r;
01351       r = add_opcode_rel_addr(reg, OP_JUMP,
01352          -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
01353     }
01354     else {
01355       r = add_opcode(reg, OP_PUSH_STOP_BT);
01356       if (r) return r;
01357       r = compile_tree(node->target, reg);
01358       if (r) return r;
01359       r = add_opcode(reg, OP_POP_STOP_BT);
01360     }
01361     break;
01362 
01363   default:
01364     return ONIGERR_TYPE_BUG;
01365     break;
01366   }
01367 
01368   return r;
01369 }
01370 
01371 static int
01372 compile_length_anchor_node(AnchorNode* node, regex_t* reg)
01373 {
01374   int len;
01375   int tlen = 0;
01376 
01377   if (node->target) {
01378     tlen = compile_length_tree(node->target, reg);
01379     if (tlen < 0) return tlen;
01380   }
01381 
01382   switch (node->type) {
01383   case ANCHOR_PREC_READ:
01384     len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
01385     break;
01386   case ANCHOR_PREC_READ_NOT:
01387     len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
01388     break;
01389   case ANCHOR_LOOK_BEHIND:
01390     len = SIZE_OP_LOOK_BEHIND + tlen;
01391     break;
01392   case ANCHOR_LOOK_BEHIND_NOT:
01393     len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
01394     break;
01395 
01396   default:
01397     len = SIZE_OPCODE;
01398     break;
01399   }
01400 
01401   return len;
01402 }
01403 
01404 static int
01405 compile_anchor_node(AnchorNode* node, regex_t* reg)
01406 {
01407   int r, len;
01408 
01409   switch (node->type) {
01410   case ANCHOR_BEGIN_BUF:      r = add_opcode(reg, OP_BEGIN_BUF);      break;
01411   case ANCHOR_END_BUF:        r = add_opcode(reg, OP_END_BUF);        break;
01412   case ANCHOR_BEGIN_LINE:     r = add_opcode(reg, OP_BEGIN_LINE);     break;
01413   case ANCHOR_END_LINE:       r = add_opcode(reg, OP_END_LINE);       break;
01414   case ANCHOR_SEMI_END_BUF:   r = add_opcode(reg, OP_SEMI_END_BUF);   break;
01415   case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
01416 
01417   case ANCHOR_WORD_BOUND:     r = add_opcode(reg, OP_WORD_BOUND);     break;
01418   case ANCHOR_NOT_WORD_BOUND: r = add_opcode(reg, OP_NOT_WORD_BOUND); break;
01419 #ifdef USE_WORD_BEGIN_END
01420   case ANCHOR_WORD_BEGIN:     r = add_opcode(reg, OP_WORD_BEGIN);     break;
01421   case ANCHOR_WORD_END:       r = add_opcode(reg, OP_WORD_END);       break;
01422 #endif
01423 
01424   case ANCHOR_PREC_READ:
01425     r = add_opcode(reg, OP_PUSH_POS);
01426     if (r) return r;
01427     r = compile_tree(node->target, reg);
01428     if (r) return r;
01429     r = add_opcode(reg, OP_POP_POS);
01430     break;
01431 
01432   case ANCHOR_PREC_READ_NOT:
01433     len = compile_length_tree(node->target, reg);
01434     if (len < 0) return len;
01435     r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
01436     if (r) return r;
01437     r = compile_tree(node->target, reg);
01438     if (r) return r;
01439     r = add_opcode(reg, OP_FAIL_POS);
01440     break;
01441 
01442   case ANCHOR_LOOK_BEHIND:
01443     {
01444       int n;
01445       r = add_opcode(reg, OP_LOOK_BEHIND);
01446       if (r) return r;
01447       if (node->char_len < 0) {
01448         r = get_char_length_tree(node->target, reg, &n);
01449         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01450       }
01451       else
01452         n = node->char_len;
01453       r = add_length(reg, n);
01454       if (r) return r;
01455       r = compile_tree(node->target, reg);
01456     }
01457     break;
01458 
01459   case ANCHOR_LOOK_BEHIND_NOT:
01460     {
01461       int n;
01462       len = compile_length_tree(node->target, reg);
01463       r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
01464                            len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
01465       if (r) return r;
01466       if (node->char_len < 0) {
01467         r = get_char_length_tree(node->target, reg, &n);
01468         if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
01469       }
01470       else
01471         n = node->char_len;
01472       r = add_length(reg, n);
01473       if (r) return r;
01474       r = compile_tree(node->target, reg);
01475       if (r) return r;
01476       r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
01477     }
01478     break;
01479 
01480   default:
01481     return ONIGERR_TYPE_BUG;
01482     break;
01483   }
01484 
01485   return r;
01486 }
01487 
01488 static int
01489 compile_length_tree(Node* node, regex_t* reg)
01490 {
01491   int len, type, r;
01492 
01493   type = NTYPE(node);
01494   switch (type) {
01495   case NT_LIST:
01496     len = 0;
01497     do {
01498       r = compile_length_tree(NCAR(node), reg);
01499       if (r < 0) return r;
01500       len += r;
01501     } while (IS_NOT_NULL(node = NCDR(node)));
01502     r = len;
01503     break;
01504 
01505   case NT_ALT:
01506     {
01507       int n;
01508 
01509       n = r = 0;
01510       do {
01511         r += compile_length_tree(NCAR(node), reg);
01512         n++;
01513       } while (IS_NOT_NULL(node = NCDR(node)));
01514       r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
01515     }
01516     break;
01517 
01518   case NT_STR:
01519     if (NSTRING_IS_RAW(node))
01520       r = compile_length_string_raw_node(NSTR(node), reg);
01521     else
01522       r = compile_length_string_node(node, reg);
01523     break;
01524 
01525   case NT_CCLASS:
01526     r = compile_length_cclass_node(NCCLASS(node), reg);
01527     break;
01528 
01529   case NT_CTYPE:
01530   case NT_CANY:
01531     r = SIZE_OPCODE;
01532     break;
01533 
01534   case NT_BREF:
01535     {
01536       BRefNode* br = NBREF(node);
01537 
01538 #ifdef USE_BACKREF_WITH_LEVEL
01539       if (IS_BACKREF_NEST_LEVEL(br)) {
01540         r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
01541             SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01542       }
01543       else
01544 #endif
01545       if (br->back_num == 1) {
01546         r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
01547              ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
01548       }
01549       else {
01550         r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
01551       }
01552     }
01553     break;
01554 
01555 #ifdef USE_SUBEXP_CALL
01556   case NT_CALL:
01557     r = SIZE_OP_CALL;
01558     break;
01559 #endif
01560 
01561   case NT_QTFR:
01562     r = compile_length_quantifier_node(NQTFR(node), reg);
01563     break;
01564 
01565   case NT_ENCLOSE:
01566     r = compile_length_enclose_node(NENCLOSE(node), reg);
01567     break;
01568 
01569   case NT_ANCHOR:
01570     r = compile_length_anchor_node(NANCHOR(node), reg);
01571     break;
01572 
01573   default:
01574     return ONIGERR_TYPE_BUG;
01575     break;
01576   }
01577 
01578   return r;
01579 }
01580 
01581 static int
01582 compile_tree(Node* node, regex_t* reg)
01583 {
01584   int n, type, len, pos, r = 0;
01585 
01586   type = NTYPE(node);
01587   switch (type) {
01588   case NT_LIST:
01589     do {
01590       r = compile_tree(NCAR(node), reg);
01591     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01592     break;
01593 
01594   case NT_ALT:
01595     {
01596       Node* x = node;
01597       len = 0;
01598       do {
01599         len += compile_length_tree(NCAR(x), reg);
01600         if (NCDR(x) != NULL) {
01601           len += SIZE_OP_PUSH + SIZE_OP_JUMP;
01602         }
01603       } while (IS_NOT_NULL(x = NCDR(x)));
01604       pos = reg->used + len;  /* goal position */
01605 
01606       do {
01607         len = compile_length_tree(NCAR(node), reg);
01608         if (IS_NOT_NULL(NCDR(node))) {
01609           r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
01610           if (r) break;
01611         }
01612         r = compile_tree(NCAR(node), reg);
01613         if (r) break;
01614         if (IS_NOT_NULL(NCDR(node))) {
01615           len = pos - (reg->used + SIZE_OP_JUMP);
01616           r = add_opcode_rel_addr(reg, OP_JUMP, len);
01617           if (r) break;
01618         }
01619       } while (IS_NOT_NULL(node = NCDR(node)));
01620     }
01621     break;
01622 
01623   case NT_STR:
01624     if (NSTRING_IS_RAW(node))
01625       r = compile_string_raw_node(NSTR(node), reg);
01626     else
01627       r = compile_string_node(node, reg);
01628     break;
01629 
01630   case NT_CCLASS:
01631     r = compile_cclass_node(NCCLASS(node), reg);
01632     break;
01633 
01634   case NT_CTYPE:
01635     {
01636       int op;
01637 
01638       switch (NCTYPE(node)->ctype) {
01639       case ONIGENC_CTYPE_WORD:
01640         if (NCTYPE(node)->not != 0)  op = OP_NOT_WORD;
01641         else                         op = OP_WORD;
01642         break;
01643       default:
01644         return ONIGERR_TYPE_BUG;
01645         break;
01646       }
01647       r = add_opcode(reg, op);
01648     }
01649     break;
01650 
01651   case NT_CANY:
01652     if (IS_MULTILINE(reg->options))
01653       r = add_opcode(reg, OP_ANYCHAR_ML);
01654     else
01655       r = add_opcode(reg, OP_ANYCHAR);
01656     break;
01657 
01658   case NT_BREF:
01659     {
01660       BRefNode* br = NBREF(node);
01661 
01662 #ifdef USE_BACKREF_WITH_LEVEL
01663       if (IS_BACKREF_NEST_LEVEL(br)) {
01664         r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
01665         if (r) return r;
01666         r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
01667         if (r) return r;
01668         r = add_length(reg, br->nest_level);
01669         if (r) return r;
01670 
01671         goto add_bacref_mems;
01672       }
01673       else
01674 #endif
01675       if (br->back_num == 1) {
01676         n = br->back_static[0];
01677         if (IS_IGNORECASE(reg->options)) {
01678           r = add_opcode(reg, OP_BACKREFN_IC);
01679           if (r) return r;
01680           r = add_mem_num(reg, n);
01681         }
01682         else {
01683           switch (n) {
01684           case 1:  r = add_opcode(reg, OP_BACKREF1); break;
01685           case 2:  r = add_opcode(reg, OP_BACKREF2); break;
01686           default:
01687             r = add_opcode(reg, OP_BACKREFN);
01688             if (r) return r;
01689             r = add_mem_num(reg, n);
01690             break;
01691           }
01692         }
01693       }
01694       else {
01695         int i;
01696         int* p;
01697 
01698         if (IS_IGNORECASE(reg->options)) {
01699           r = add_opcode(reg, OP_BACKREF_MULTI_IC);
01700         }
01701         else {
01702           r = add_opcode(reg, OP_BACKREF_MULTI);
01703         }
01704         if (r) return r;
01705 
01706 #ifdef USE_BACKREF_WITH_LEVEL
01707       add_bacref_mems:
01708 #endif
01709         r = add_length(reg, br->back_num);
01710         if (r) return r;
01711         p = BACKREFS_P(br);
01712         for (i = br->back_num - 1; i >= 0; i--) {
01713           r = add_mem_num(reg, p[i]);
01714           if (r) return r;
01715         }
01716       }
01717     }
01718     break;
01719 
01720 #ifdef USE_SUBEXP_CALL
01721   case NT_CALL:
01722     r = compile_call(NCALL(node), reg);
01723     break;
01724 #endif
01725 
01726   case NT_QTFR:
01727     r = compile_quantifier_node(NQTFR(node), reg);
01728     break;
01729 
01730   case NT_ENCLOSE:
01731     r = compile_enclose_node(NENCLOSE(node), reg);
01732     break;
01733 
01734   case NT_ANCHOR:
01735     r = compile_anchor_node(NANCHOR(node), reg);
01736     break;
01737 
01738   default:
01739 #ifdef ONIG_DEBUG
01740     fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
01741 #endif
01742     break;
01743   }
01744 
01745   return r;
01746 }
01747 
01748 #ifdef USE_NAMED_GROUP
01749 
01750 static int
01751 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
01752 {
01753   int r = 0;
01754   Node* node = *plink;
01755 
01756   switch (NTYPE(node)) {
01757   case NT_LIST:
01758   case NT_ALT:
01759     do {
01760       r = noname_disable_map(&(NCAR(node)), map, counter);
01761     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01762     break;
01763 
01764   case NT_QTFR:
01765     {
01766       Node** ptarget = &(NQTFR(node)->target);
01767       Node*  old = *ptarget;
01768       r = noname_disable_map(ptarget, map, counter);
01769       if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
01770         onig_reduce_nested_quantifier(node, *ptarget);
01771       }
01772     }
01773     break;
01774 
01775   case NT_ENCLOSE:
01776     {
01777       EncloseNode* en = NENCLOSE(node);
01778       if (en->type == ENCLOSE_MEMORY) {
01779         if (IS_ENCLOSE_NAMED_GROUP(en)) {
01780           (*counter)++;
01781           map[en->regnum].new_val = *counter;
01782           en->regnum = *counter;
01783           r = noname_disable_map(&(en->target), map, counter);
01784         }
01785         else {
01786           *plink = en->target;
01787           en->target = NULL_NODE;
01788           onig_node_free(node);
01789           r = noname_disable_map(plink, map, counter);
01790         }
01791       }
01792       else
01793         r = noname_disable_map(&(en->target), map, counter);
01794     }
01795     break;
01796 
01797   case NT_ANCHOR:
01798     {
01799       AnchorNode* an = NANCHOR(node);
01800       switch (an->type) {
01801       case ANCHOR_PREC_READ:
01802       case ANCHOR_PREC_READ_NOT:
01803       case ANCHOR_LOOK_BEHIND:
01804       case ANCHOR_LOOK_BEHIND_NOT:
01805         r = noname_disable_map(&(an->target), map, counter);
01806         break;
01807       }
01808     }
01809     break;
01810 
01811   default:
01812     break;
01813   }
01814 
01815   return r;
01816 }
01817 
01818 static int
01819 renumber_node_backref(Node* node, GroupNumRemap* map)
01820 {
01821   int i, pos, n, old_num;
01822   int *backs;
01823   BRefNode* bn = NBREF(node);
01824 
01825   if (! IS_BACKREF_NAME_REF(bn))
01826     return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01827 
01828   old_num = bn->back_num;
01829   if (IS_NULL(bn->back_dynamic))
01830     backs = bn->back_static;
01831   else
01832     backs = bn->back_dynamic;
01833 
01834   for (i = 0, pos = 0; i < old_num; i++) {
01835     n = map[backs[i]].new_val;
01836     if (n > 0) {
01837       backs[pos] = n;
01838       pos++;
01839     }
01840   }
01841 
01842   bn->back_num = pos;
01843   return 0;
01844 }
01845 
01846 static int
01847 renumber_by_map(Node* node, GroupNumRemap* map)
01848 {
01849   int r = 0;
01850 
01851   switch (NTYPE(node)) {
01852   case NT_LIST:
01853   case NT_ALT:
01854     do {
01855       r = renumber_by_map(NCAR(node), map);
01856     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01857     break;
01858   case NT_QTFR:
01859     r = renumber_by_map(NQTFR(node)->target, map);
01860     break;
01861   case NT_ENCLOSE:
01862     r = renumber_by_map(NENCLOSE(node)->target, map);
01863     break;
01864 
01865   case NT_BREF:
01866     r = renumber_node_backref(node, map);
01867     break;
01868 
01869   case NT_ANCHOR:
01870     {
01871       AnchorNode* an = NANCHOR(node);
01872       switch (an->type) {
01873       case ANCHOR_PREC_READ:
01874       case ANCHOR_PREC_READ_NOT:
01875       case ANCHOR_LOOK_BEHIND:
01876       case ANCHOR_LOOK_BEHIND_NOT:
01877         r = renumber_by_map(an->target, map);
01878         break;
01879       }
01880     }
01881     break;
01882 
01883   default:
01884     break;
01885   }
01886 
01887   return r;
01888 }
01889 
01890 static int
01891 numbered_ref_check(Node* node)
01892 {
01893   int r = 0;
01894 
01895   switch (NTYPE(node)) {
01896   case NT_LIST:
01897   case NT_ALT:
01898     do {
01899       r = numbered_ref_check(NCAR(node));
01900     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
01901     break;
01902   case NT_QTFR:
01903     r = numbered_ref_check(NQTFR(node)->target);
01904     break;
01905   case NT_ENCLOSE:
01906     r = numbered_ref_check(NENCLOSE(node)->target);
01907     break;
01908 
01909   case NT_BREF:
01910     if (! IS_BACKREF_NAME_REF(NBREF(node)))
01911       return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
01912     break;
01913 
01914   default:
01915     break;
01916   }
01917 
01918   return r;
01919 }
01920 
01921 static int
01922 disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
01923 {
01924   int r, i, pos, counter;
01925   BitStatusType loc;
01926   GroupNumRemap* map;
01927 
01928   map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
01929   CHECK_NULL_RETURN_MEMERR(map);
01930   for (i = 1; i <= env->num_mem; i++) {
01931     map[i].new_val = 0;
01932   }
01933   counter = 0;
01934   r = noname_disable_map(root, map, &counter);
01935   if (r != 0) return r;
01936 
01937   r = renumber_by_map(*root, map);
01938   if (r != 0) return r;
01939 
01940   for (i = 1, pos = 1; i <= env->num_mem; i++) {
01941     if (map[i].new_val > 0) {
01942       SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
01943       pos++;
01944     }
01945   }
01946 
01947   loc = env->capture_history;
01948   BIT_STATUS_CLEAR(env->capture_history);
01949   for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
01950     if (BIT_STATUS_AT(loc, i)) {
01951       BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
01952     }
01953   }
01954 
01955   env->num_mem = env->num_named;
01956   reg->num_mem = env->num_named;
01957 
01958   return onig_renumber_name_table(reg, map);
01959 }
01960 #endif /* USE_NAMED_GROUP */
01961 
01962 #ifdef USE_SUBEXP_CALL
01963 static int
01964 unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
01965 {
01966   int i, offset;
01967   EncloseNode* en;
01968   AbsAddrType addr;
01969 
01970   for (i = 0; i < uslist->num; i++) {
01971     en = NENCLOSE(uslist->us[i].target);
01972     if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
01973     addr = en->call_addr;
01974     offset = uslist->us[i].offset;
01975 
01976     BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
01977   }
01978   return 0;
01979 }
01980 #endif
01981 
01982 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
01983 static int
01984 quantifiers_memory_node_info(Node* node)
01985 {
01986   int r = 0;
01987 
01988   switch (NTYPE(node)) {
01989   case NT_LIST:
01990   case NT_ALT:
01991     {
01992       int v;
01993       do {
01994         v = quantifiers_memory_node_info(NCAR(node));
01995         if (v > r) r = v;
01996       } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
01997     }
01998     break;
01999 
02000 #ifdef USE_SUBEXP_CALL
02001   case NT_CALL:
02002     if (IS_CALL_RECURSION(NCALL(node))) {
02003       return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
02004     }
02005     else
02006       r = quantifiers_memory_node_info(NCALL(node)->target);
02007     break;
02008 #endif
02009 
02010   case NT_QTFR:
02011     {
02012       QtfrNode* qn = NQTFR(node);
02013       if (qn->upper != 0) {
02014         r = quantifiers_memory_node_info(qn->target);
02015       }
02016     }
02017     break;
02018 
02019   case NT_ENCLOSE:
02020     {
02021       EncloseNode* en = NENCLOSE(node);
02022       switch (en->type) {
02023       case ENCLOSE_MEMORY:
02024         return NQ_TARGET_IS_EMPTY_MEM;
02025         break;
02026 
02027       case ENCLOSE_OPTION:
02028       case ENCLOSE_STOP_BACKTRACK:
02029         r = quantifiers_memory_node_info(en->target);
02030         break;
02031       default:
02032         break;
02033       }
02034     }
02035     break;
02036 
02037   case NT_BREF:
02038   case NT_STR:
02039   case NT_CTYPE:
02040   case NT_CCLASS:
02041   case NT_CANY:
02042   case NT_ANCHOR:
02043   default:
02044     break;
02045   }
02046 
02047   return r;
02048 }
02049 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
02050 
02051 static int
02052 get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
02053 {
02054   OnigDistance tmin;
02055   int r = 0;
02056 
02057   *min = 0;
02058   switch (NTYPE(node)) {
02059   case NT_BREF:
02060     {
02061       int i;
02062       int* backs;
02063       Node** nodes = SCANENV_MEM_NODES(env);
02064       BRefNode* br = NBREF(node);
02065       if (br->state & NST_RECURSION) break;
02066 
02067       backs = BACKREFS_P(br);
02068       if (backs[0] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02069       r = get_min_match_length(nodes[backs[0]], min, env);
02070       if (r != 0) break;
02071       for (i = 1; i < br->back_num; i++) {
02072         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02073         r = get_min_match_length(nodes[backs[i]], &tmin, env);
02074         if (r != 0) break;
02075         if (*min > tmin) *min = tmin;
02076       }
02077     }
02078     break;
02079 
02080 #ifdef USE_SUBEXP_CALL
02081   case NT_CALL:
02082     if (IS_CALL_RECURSION(NCALL(node))) {
02083       EncloseNode* en = NENCLOSE(NCALL(node)->target);
02084       if (IS_ENCLOSE_MIN_FIXED(en))
02085         *min = en->min_len;
02086     }
02087     else
02088       r = get_min_match_length(NCALL(node)->target, min, env);
02089     break;
02090 #endif
02091 
02092   case NT_LIST:
02093     do {
02094       r = get_min_match_length(NCAR(node), &tmin, env);
02095       if (r == 0) *min += tmin;
02096     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02097     break;
02098 
02099   case NT_ALT:
02100     {
02101       Node *x, *y;
02102       y = node;
02103       do {
02104         x = NCAR(y);
02105         r = get_min_match_length(x, &tmin, env);
02106         if (r != 0) break;
02107         if (y == node) *min = tmin;
02108         else if (*min > tmin) *min = tmin;
02109       } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
02110     }
02111     break;
02112 
02113   case NT_STR:
02114     {
02115       StrNode* sn = NSTR(node);
02116       *min = sn->end - sn->s;
02117     }
02118     break;
02119 
02120   case NT_CTYPE:
02121     *min = 1;
02122     break;
02123 
02124   case NT_CCLASS:
02125   case NT_CANY:
02126     *min = 1;
02127     break;
02128 
02129   case NT_QTFR:
02130     {
02131       QtfrNode* qn = NQTFR(node);
02132 
02133       if (qn->lower > 0) {
02134         r = get_min_match_length(qn->target, min, env);
02135         if (r == 0)
02136           *min = distance_multiply(*min, qn->lower);
02137       }
02138     }
02139     break;
02140 
02141   case NT_ENCLOSE:
02142     {
02143       EncloseNode* en = NENCLOSE(node);
02144       switch (en->type) {
02145       case ENCLOSE_MEMORY:
02146 #ifdef USE_SUBEXP_CALL
02147         if (IS_ENCLOSE_MIN_FIXED(en))
02148           *min = en->min_len;
02149         else {
02150           r = get_min_match_length(en->target, min, env);
02151           if (r == 0) {
02152             en->min_len = *min;
02153             SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
02154           }
02155         }
02156         break;
02157 #endif
02158       case ENCLOSE_OPTION:
02159       case ENCLOSE_STOP_BACKTRACK:
02160         r = get_min_match_length(en->target, min, env);
02161         break;
02162       }
02163     }
02164     break;
02165 
02166   case NT_ANCHOR:
02167   default:
02168     break;
02169   }
02170 
02171   return r;
02172 }
02173 
02174 static int
02175 get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
02176 {
02177   OnigDistance tmax;
02178   int r = 0;
02179 
02180   *max = 0;
02181   switch (NTYPE(node)) {
02182   case NT_LIST:
02183     do {
02184       r = get_max_match_length(NCAR(node), &tmax, env);
02185       if (r == 0)
02186         *max = distance_add(*max, tmax);
02187     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02188     break;
02189 
02190   case NT_ALT:
02191     do {
02192       r = get_max_match_length(NCAR(node), &tmax, env);
02193       if (r == 0 && *max < tmax) *max = tmax;
02194     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02195     break;
02196 
02197   case NT_STR:
02198     {
02199       StrNode* sn = NSTR(node);
02200       *max = sn->end - sn->s;
02201     }
02202     break;
02203 
02204   case NT_CTYPE:
02205     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02206     break;
02207 
02208   case NT_CCLASS:
02209   case NT_CANY:
02210     *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
02211     break;
02212 
02213   case NT_BREF:
02214     {
02215       int i;
02216       int* backs;
02217       Node** nodes = SCANENV_MEM_NODES(env);
02218       BRefNode* br = NBREF(node);
02219       if (br->state & NST_RECURSION) {
02220         *max = ONIG_INFINITE_DISTANCE;
02221         break;
02222       }
02223       backs = BACKREFS_P(br);
02224       for (i = 0; i < br->back_num; i++) {
02225         if (backs[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
02226         r = get_max_match_length(nodes[backs[i]], &tmax, env);
02227         if (r != 0) break;
02228         if (*max < tmax) *max = tmax;
02229       }
02230     }
02231     break;
02232 
02233 #ifdef USE_SUBEXP_CALL
02234   case NT_CALL:
02235     if (! IS_CALL_RECURSION(NCALL(node)))
02236       r = get_max_match_length(NCALL(node)->target, max, env);
02237     else
02238       *max = ONIG_INFINITE_DISTANCE;
02239     break;
02240 #endif
02241 
02242   case NT_QTFR:
02243     {
02244       QtfrNode* qn = NQTFR(node);
02245 
02246       if (qn->upper != 0) {
02247         r = get_max_match_length(qn->target, max, env);
02248         if (r == 0 && *max != 0) {
02249           if (! IS_REPEAT_INFINITE(qn->upper))
02250             *max = distance_multiply(*max, qn->upper);
02251           else
02252             *max = ONIG_INFINITE_DISTANCE;
02253         }
02254       }
02255     }
02256     break;
02257 
02258   case NT_ENCLOSE:
02259     {
02260       EncloseNode* en = NENCLOSE(node);
02261       switch (en->type) {
02262       case ENCLOSE_MEMORY:
02263 #ifdef USE_SUBEXP_CALL
02264         if (IS_ENCLOSE_MAX_FIXED(en))
02265           *max = en->max_len;
02266         else {
02267           r = get_max_match_length(en->target, max, env);
02268           if (r == 0) {
02269             en->max_len = *max;
02270             SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
02271           }
02272         }
02273         break;
02274 #endif
02275       case ENCLOSE_OPTION:
02276       case ENCLOSE_STOP_BACKTRACK:
02277         r = get_max_match_length(en->target, max, env);
02278         break;
02279       }
02280     }
02281     break;
02282 
02283   case NT_ANCHOR:
02284   default:
02285     break;
02286   }
02287 
02288   return r;
02289 }
02290 
02291 #define GET_CHAR_LEN_VARLEN           -1
02292 #define GET_CHAR_LEN_TOP_ALT_VARLEN   -2
02293 
02294 /* fixed size pattern node only */
02295 static int
02296 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
02297 {
02298   int tlen;
02299   int r = 0;
02300 
02301   level++;
02302   *len = 0;
02303   switch (NTYPE(node)) {
02304   case NT_LIST:
02305     do {
02306       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02307       if (r == 0)
02308         *len = distance_add(*len, tlen);
02309     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02310     break;
02311 
02312   case NT_ALT:
02313     {
02314       int tlen2;
02315       int varlen = 0;
02316 
02317       r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
02318       while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
02319         r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
02320         if (r == 0) {
02321           if (tlen != tlen2)
02322             varlen = 1;
02323         }
02324       }
02325       if (r == 0) {
02326         if (varlen != 0) {
02327           if (level == 1)
02328             r = GET_CHAR_LEN_TOP_ALT_VARLEN;
02329           else
02330             r = GET_CHAR_LEN_VARLEN;
02331         }
02332         else
02333           *len = tlen;
02334       }
02335     }
02336     break;
02337 
02338   case NT_STR:
02339     {
02340       StrNode* sn = NSTR(node);
02341       UChar *s = sn->s;
02342       while (s < sn->end) {
02343         s += enclen(reg->enc, s, sn->end);
02344         (*len)++;
02345       }
02346     }
02347     break;
02348 
02349   case NT_QTFR:
02350     {
02351       QtfrNode* qn = NQTFR(node);
02352       if (qn->lower == qn->upper) {
02353         r = get_char_length_tree1(qn->target, reg, &tlen, level);
02354         if (r == 0)
02355           *len = distance_multiply(tlen, qn->lower);
02356       }
02357       else
02358         r = GET_CHAR_LEN_VARLEN;
02359     }
02360     break;
02361 
02362 #ifdef USE_SUBEXP_CALL
02363   case NT_CALL:
02364     if (! IS_CALL_RECURSION(NCALL(node)))
02365       r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
02366     else
02367       r = GET_CHAR_LEN_VARLEN;
02368     break;
02369 #endif
02370 
02371   case NT_CTYPE:
02372     *len = 1;
02373     break;
02374 
02375   case NT_CCLASS:
02376   case NT_CANY:
02377     *len = 1;
02378     break;
02379 
02380   case NT_ENCLOSE:
02381     {
02382       EncloseNode* en = NENCLOSE(node);
02383       switch (en->type) {
02384       case ENCLOSE_MEMORY:
02385 #ifdef USE_SUBEXP_CALL
02386         if (IS_ENCLOSE_CLEN_FIXED(en))
02387           *len = en->char_len;
02388         else {
02389           r = get_char_length_tree1(en->target, reg, len, level);
02390           if (r == 0) {
02391             en->char_len = *len;
02392             SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
02393           }
02394         }
02395         break;
02396 #endif
02397       case ENCLOSE_OPTION:
02398       case ENCLOSE_STOP_BACKTRACK:
02399         r = get_char_length_tree1(en->target, reg, len, level);
02400         break;
02401       default:
02402         break;
02403       }
02404     }
02405     break;
02406 
02407   case NT_ANCHOR:
02408     break;
02409 
02410   default:
02411     r = GET_CHAR_LEN_VARLEN;
02412     break;
02413   }
02414 
02415   return r;
02416 }
02417 
02418 static int
02419 get_char_length_tree(Node* node, regex_t* reg, int* len)
02420 {
02421   return get_char_length_tree1(node, reg, len, 0);
02422 }
02423 
02424 /* x is not included y ==>  1 : 0 */
02425 static int
02426 is_not_included(Node* x, Node* y, regex_t* reg)
02427 {
02428   int i, len;
02429   OnigCodePoint code;
02430   UChar *p, c;
02431   int ytype;
02432 
02433  retry:
02434   ytype = NTYPE(y);
02435   switch (NTYPE(x)) {
02436   case NT_CTYPE:
02437     {
02438       switch (ytype) {
02439       case NT_CTYPE:
02440         if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
02441             NCTYPE(y)->not   != NCTYPE(x)->not)
02442           return 1;
02443         else
02444           return 0;
02445         break;
02446 
02447       case NT_CCLASS:
02448       swap:
02449         {
02450           Node* tmp;
02451           tmp = x; x = y; y = tmp;
02452           goto retry;
02453         }
02454         break;
02455 
02456       case NT_STR:
02457         goto swap;
02458         break;
02459 
02460       default:
02461         break;
02462       }
02463     }
02464     break;
02465 
02466   case NT_CCLASS:
02467     {
02468       CClassNode* xc = NCCLASS(x);
02469       switch (ytype) {
02470       case NT_CTYPE:
02471         switch (NCTYPE(y)->ctype) {
02472         case ONIGENC_CTYPE_WORD:
02473           if (NCTYPE(y)->not == 0) {
02474             if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
02475               for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02476                 if (BITSET_AT(xc->bs, i)) {
02477                   if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
02478                 }
02479               }
02480               return 1;
02481             }
02482             return 0;
02483           }
02484           else {
02485             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02486               if (! IS_CODE_SB_WORD(reg->enc, i)) {
02487                 if (!IS_NCCLASS_NOT(xc)) {
02488                   if (BITSET_AT(xc->bs, i))
02489                     return 0;
02490                 }
02491                 else {
02492                   if (! BITSET_AT(xc->bs, i))
02493                     return 0;
02494                 }
02495               }
02496             }
02497             return 1;
02498           }
02499           break;
02500 
02501         default:
02502           break;
02503         }
02504         break;
02505 
02506       case NT_CCLASS:
02507         {
02508           int v;
02509           CClassNode* yc = NCCLASS(y);
02510 
02511           for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
02512             v = BITSET_AT(xc->bs, i);
02513             if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
02514                 (v == 0 && IS_NCCLASS_NOT(xc))) {
02515               v = BITSET_AT(yc->bs, i);
02516               if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
02517                   (v == 0 && IS_NCCLASS_NOT(yc)))
02518                 return 0;
02519             }
02520           }
02521           if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
02522               (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
02523             return 1;
02524           return 0;
02525         }
02526         break;
02527 
02528       case NT_STR:
02529         goto swap;
02530         break;
02531 
02532       default:
02533         break;
02534       }
02535     }
02536     break;
02537 
02538   case NT_STR:
02539     {
02540       StrNode* xs = NSTR(x);
02541       if (NSTRING_LEN(x) == 0)
02542         break;
02543 
02544       c = *(xs->s);
02545       switch (ytype) {
02546       case NT_CTYPE:
02547         switch (NCTYPE(y)->ctype) {
02548         case ONIGENC_CTYPE_WORD:
02549           if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
02550             return NCTYPE(y)->not;
02551           else
02552             return !(NCTYPE(y)->not);
02553           break;
02554         default:
02555           break;
02556         }
02557         break;
02558 
02559       case NT_CCLASS:
02560         {
02561           CClassNode* cc = NCCLASS(y);
02562 
02563           code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
02564                                      xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
02565           return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
02566         }
02567         break;
02568 
02569       case NT_STR:
02570         {
02571           UChar *q;
02572           StrNode* ys = NSTR(y);
02573           len = NSTRING_LEN(x);
02574           if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
02575           if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
02576             /* tiny version */
02577             return 0;
02578           }
02579           else {
02580             for (i = 0, p = ys->s, q = xs->s; i < len; i++, p++, q++) {
02581               if (*p != *q) return 1;
02582             }
02583           }
02584         }
02585         break;
02586 
02587       default:
02588         break;
02589       }
02590     }
02591     break;
02592 
02593   default:
02594     break;
02595   }
02596 
02597   return 0;
02598 }
02599 
02600 static Node*
02601 get_head_value_node(Node* node, int exact, regex_t* reg)
02602 {
02603   Node* n = NULL_NODE;
02604 
02605   switch (NTYPE(node)) {
02606   case NT_BREF:
02607   case NT_ALT:
02608   case NT_CANY:
02609 #ifdef USE_SUBEXP_CALL
02610   case NT_CALL:
02611 #endif
02612     break;
02613 
02614   case NT_CTYPE:
02615   case NT_CCLASS:
02616     if (exact == 0) {
02617       n = node;
02618     }
02619     break;
02620 
02621   case NT_LIST:
02622     n = get_head_value_node(NCAR(node), exact, reg);
02623     break;
02624 
02625   case NT_STR:
02626     {
02627       StrNode* sn = NSTR(node);
02628 
02629       if (sn->end <= sn->s)
02630         break;
02631 
02632       if (exact != 0 &&
02633           !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
02634       }
02635       else {
02636         n = node;
02637       }
02638     }
02639     break;
02640 
02641   case NT_QTFR:
02642     {
02643       QtfrNode* qn = NQTFR(node);
02644       if (qn->lower > 0) {
02645         if (IS_NOT_NULL(qn->head_exact))
02646           n = qn->head_exact;
02647         else
02648           n = get_head_value_node(qn->target, exact, reg);
02649       }
02650     }
02651     break;
02652 
02653   case NT_ENCLOSE:
02654     {
02655       EncloseNode* en = NENCLOSE(node);
02656       switch (en->type) {
02657       case ENCLOSE_OPTION:
02658         {
02659           OnigOptionType options = reg->options;
02660 
02661           reg->options = NENCLOSE(node)->option;
02662           n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
02663           reg->options = options;
02664         }
02665         break;
02666 
02667       case ENCLOSE_MEMORY:
02668       case ENCLOSE_STOP_BACKTRACK:
02669         n = get_head_value_node(en->target, exact, reg);
02670         break;
02671       }
02672     }
02673     break;
02674 
02675   case NT_ANCHOR:
02676     if (NANCHOR(node)->type == ANCHOR_PREC_READ)
02677       n = get_head_value_node(NANCHOR(node)->target, exact, reg);
02678     break;
02679 
02680   default:
02681     break;
02682   }
02683 
02684   return n;
02685 }
02686 
02687 static int
02688 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
02689 {
02690   int type, r = 0;
02691 
02692   type = NTYPE(node);
02693   if ((NTYPE2BIT(type) & type_mask) == 0)
02694     return 1;
02695 
02696   switch (type) {
02697   case NT_LIST:
02698   case NT_ALT:
02699     do {
02700       r = check_type_tree(NCAR(node), type_mask, enclose_mask,
02701                           anchor_mask);
02702     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02703     break;
02704 
02705   case NT_QTFR:
02706     r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
02707                         anchor_mask);
02708     break;
02709 
02710   case NT_ENCLOSE:
02711     {
02712       EncloseNode* en = NENCLOSE(node);
02713       if ((en->type & enclose_mask) == 0)
02714         return 1;
02715 
02716       r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
02717     }
02718     break;
02719 
02720   case NT_ANCHOR:
02721     type = NANCHOR(node)->type;
02722     if ((type & anchor_mask) == 0)
02723       return 1;
02724 
02725     if (NANCHOR(node)->target)
02726       r = check_type_tree(NANCHOR(node)->target,
02727                           type_mask, enclose_mask, anchor_mask);
02728     break;
02729 
02730   default:
02731     break;
02732   }
02733   return r;
02734 }
02735 
02736 #ifdef USE_SUBEXP_CALL
02737 
02738 #define RECURSION_EXIST       1
02739 #define RECURSION_INFINITE    2
02740 
02741 static int
02742 subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
02743 {
02744   int type;
02745   int r = 0;
02746 
02747   type = NTYPE(node);
02748   switch (type) {
02749   case NT_LIST:
02750     {
02751       Node *x;
02752       OnigDistance min;
02753       int ret;
02754 
02755       x = node;
02756       do {
02757         ret = subexp_inf_recursive_check(NCAR(x), env, head);
02758         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02759         r |= ret;
02760         if (head) {
02761           ret = get_min_match_length(NCAR(x), &min, env);
02762           if (ret != 0) return ret;
02763           if (min != 0) head = 0;
02764         }
02765       } while (IS_NOT_NULL(x = NCDR(x)));
02766     }
02767     break;
02768 
02769   case NT_ALT:
02770     {
02771       int ret;
02772       r = RECURSION_EXIST;
02773       do {
02774         ret = subexp_inf_recursive_check(NCAR(node), env, head);
02775         if (ret < 0 || ret == RECURSION_INFINITE) return ret;
02776         r &= ret;
02777       } while (IS_NOT_NULL(node = NCDR(node)));
02778     }
02779     break;
02780 
02781   case NT_QTFR:
02782     r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
02783     if (r == RECURSION_EXIST) {
02784       if (NQTFR(node)->lower == 0) r = 0;
02785     }
02786     break;
02787 
02788   case NT_ANCHOR:
02789     {
02790       AnchorNode* an = NANCHOR(node);
02791       switch (an->type) {
02792       case ANCHOR_PREC_READ:
02793       case ANCHOR_PREC_READ_NOT:
02794       case ANCHOR_LOOK_BEHIND:
02795       case ANCHOR_LOOK_BEHIND_NOT:
02796         r = subexp_inf_recursive_check(an->target, env, head);
02797         break;
02798       }
02799     }
02800     break;
02801 
02802   case NT_CALL:
02803     r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
02804     break;
02805 
02806   case NT_ENCLOSE:
02807     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02808       return 0;
02809     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02810       return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
02811     else {
02812       SET_ENCLOSE_STATUS(node, NST_MARK2);
02813       r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
02814       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02815     }
02816     break;
02817 
02818   default:
02819     break;
02820   }
02821 
02822   return r;
02823 }
02824 
02825 static int
02826 subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
02827 {
02828   int type;
02829   int r = 0;
02830 
02831   type = NTYPE(node);
02832   switch (type) {
02833   case NT_LIST:
02834   case NT_ALT:
02835     do {
02836       r = subexp_inf_recursive_check_trav(NCAR(node), env);
02837     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
02838     break;
02839 
02840   case NT_QTFR:
02841     r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
02842     break;
02843 
02844   case NT_ANCHOR:
02845     {
02846       AnchorNode* an = NANCHOR(node);
02847       switch (an->type) {
02848       case ANCHOR_PREC_READ:
02849       case ANCHOR_PREC_READ_NOT:
02850       case ANCHOR_LOOK_BEHIND:
02851       case ANCHOR_LOOK_BEHIND_NOT:
02852         r = subexp_inf_recursive_check_trav(an->target, env);
02853         break;
02854       }
02855     }
02856     break;
02857 
02858   case NT_ENCLOSE:
02859     {
02860       EncloseNode* en = NENCLOSE(node);
02861 
02862       if (IS_ENCLOSE_RECURSION(en)) {
02863         SET_ENCLOSE_STATUS(node, NST_MARK1);
02864         r = subexp_inf_recursive_check(en->target, env, 1);
02865         if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
02866         CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02867       }
02868       r = subexp_inf_recursive_check_trav(en->target, env);
02869     }
02870 
02871     break;
02872 
02873   default:
02874     break;
02875   }
02876 
02877   return r;
02878 }
02879 
02880 static int
02881 subexp_recursive_check(Node* node)
02882 {
02883   int r = 0;
02884 
02885   switch (NTYPE(node)) {
02886   case NT_LIST:
02887   case NT_ALT:
02888     do {
02889       r |= subexp_recursive_check(NCAR(node));
02890     } while (IS_NOT_NULL(node = NCDR(node)));
02891     break;
02892 
02893   case NT_QTFR:
02894     r = subexp_recursive_check(NQTFR(node)->target);
02895     break;
02896 
02897   case NT_ANCHOR:
02898     {
02899       AnchorNode* an = NANCHOR(node);
02900       switch (an->type) {
02901       case ANCHOR_PREC_READ:
02902       case ANCHOR_PREC_READ_NOT:
02903       case ANCHOR_LOOK_BEHIND:
02904       case ANCHOR_LOOK_BEHIND_NOT:
02905         r = subexp_recursive_check(an->target);
02906         break;
02907       }
02908     }
02909     break;
02910 
02911   case NT_CALL:
02912     r = subexp_recursive_check(NCALL(node)->target);
02913     if (r != 0) SET_CALL_RECURSION(node);
02914     break;
02915 
02916   case NT_ENCLOSE:
02917     if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
02918       return 0;
02919     else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
02920       return 1; /* recursion */
02921     else {
02922       SET_ENCLOSE_STATUS(node, NST_MARK2);
02923       r = subexp_recursive_check(NENCLOSE(node)->target);
02924       CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
02925     }
02926     break;
02927 
02928   default:
02929     break;
02930   }
02931 
02932   return r;
02933 }
02934 
02935 
02936 static int
02937 subexp_recursive_check_trav(Node* node, ScanEnv* env)
02938 {
02939 #define FOUND_CALLED_NODE    1
02940 
02941   int type;
02942   int r = 0;
02943 
02944   type = NTYPE(node);
02945   switch (type) {
02946   case NT_LIST:
02947   case NT_ALT:
02948     {
02949       int ret;
02950       do {
02951         ret = subexp_recursive_check_trav(NCAR(node), env);
02952         if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
02953         else if (ret < 0) return ret;
02954       } while (IS_NOT_NULL(node = NCDR(node)));
02955     }
02956     break;
02957 
02958   case NT_QTFR:
02959     r = subexp_recursive_check_trav(NQTFR(node)->target, env);
02960     if (NQTFR(node)->upper == 0) {
02961       if (r == FOUND_CALLED_NODE)
02962         NQTFR(node)->is_refered = 1;
02963     }
02964     break;
02965 
02966   case NT_ANCHOR:
02967     {
02968       AnchorNode* an = NANCHOR(node);
02969       switch (an->type) {
02970       case ANCHOR_PREC_READ:
02971       case ANCHOR_PREC_READ_NOT:
02972       case ANCHOR_LOOK_BEHIND:
02973       case ANCHOR_LOOK_BEHIND_NOT:
02974         r = subexp_recursive_check_trav(an->target, env);
02975         break;
02976       }
02977     }
02978     break;
02979 
02980   case NT_ENCLOSE:
02981     {
02982       EncloseNode* en = NENCLOSE(node);
02983 
02984       if (! IS_ENCLOSE_RECURSION(en)) {
02985         if (IS_ENCLOSE_CALLED(en)) {
02986           SET_ENCLOSE_STATUS(node, NST_MARK1);
02987           r = subexp_recursive_check(en->target);
02988           if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
02989           CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
02990         }
02991       }
02992       r = subexp_recursive_check_trav(en->target, env);
02993       if (IS_ENCLOSE_CALLED(en))
02994         r |= FOUND_CALLED_NODE;
02995     }
02996     break;
02997 
02998   default:
02999     break;
03000   }
03001 
03002   return r;
03003 }
03004 
03005 static int
03006 setup_subexp_call(Node* node, ScanEnv* env)
03007 {
03008   int type;
03009   int r = 0;
03010 
03011   type = NTYPE(node);
03012   switch (type) {
03013   case NT_LIST:
03014     do {
03015       r = setup_subexp_call(NCAR(node), env);
03016     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03017     break;
03018 
03019   case NT_ALT:
03020     do {
03021       r = setup_subexp_call(NCAR(node), env);
03022     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03023     break;
03024 
03025   case NT_QTFR:
03026     r = setup_subexp_call(NQTFR(node)->target, env);
03027     break;
03028   case NT_ENCLOSE:
03029     r = setup_subexp_call(NENCLOSE(node)->target, env);
03030     break;
03031 
03032   case NT_CALL:
03033     {
03034       CallNode* cn = NCALL(node);
03035       Node** nodes = SCANENV_MEM_NODES(env);
03036 
03037       if (cn->group_num != 0) {
03038         int gnum = cn->group_num;
03039 
03040 #ifdef USE_NAMED_GROUP
03041         if (env->num_named > 0 &&
03042             IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
03043             !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
03044           return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
03045         }
03046 #endif
03047         if (gnum > env->num_mem) {
03048           onig_scan_env_set_error_string(env,
03049                  ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
03050           return ONIGERR_UNDEFINED_GROUP_REFERENCE;
03051         }
03052 
03053 #ifdef USE_NAMED_GROUP
03054       set_call_attr:
03055 #endif
03056         cn->target = nodes[cn->group_num];
03057         if (IS_NULL(cn->target)) {
03058           onig_scan_env_set_error_string(env,
03059                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03060           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03061         }
03062         SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
03063         BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
03064         cn->unset_addr_list = env->unset_addr_list;
03065       }
03066 #ifdef USE_NAMED_GROUP
03067       else {
03068         int *refs;
03069 
03070         int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
03071                                            &refs);
03072         if (n <= 0) {
03073           onig_scan_env_set_error_string(env,
03074                  ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
03075           return ONIGERR_UNDEFINED_NAME_REFERENCE;
03076         }
03077         else if (n > 1) {
03078           onig_scan_env_set_error_string(env,
03079             ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
03080           return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
03081         }
03082         else {
03083           cn->group_num = refs[0];
03084           goto set_call_attr;
03085         }
03086       }
03087 #endif
03088     }
03089     break;
03090 
03091   case NT_ANCHOR:
03092     {
03093       AnchorNode* an = NANCHOR(node);
03094 
03095       switch (an->type) {
03096       case ANCHOR_PREC_READ:
03097       case ANCHOR_PREC_READ_NOT:
03098       case ANCHOR_LOOK_BEHIND:
03099       case ANCHOR_LOOK_BEHIND_NOT:
03100         r = setup_subexp_call(an->target, env);
03101         break;
03102       }
03103     }
03104     break;
03105 
03106   default:
03107     break;
03108   }
03109 
03110   return r;
03111 }
03112 #endif
03113 
03114 /* divide different length alternatives in look-behind.
03115   (?<=A|B) ==> (?<=A)|(?<=B)
03116   (?<!A|B) ==> (?<!A)(?<!B)
03117 */
03118 static int
03119 divide_look_behind_alternatives(Node* node)
03120 {
03121   Node *head, *np, *insert_node;
03122   AnchorNode* an = NANCHOR(node);
03123   int anc_type = an->type;
03124 
03125   head = an->target;
03126   np = NCAR(head);
03127   swap_node(node, head);
03128   NCAR(node) = head;
03129   NANCHOR(head)->target = np;
03130 
03131   np = node;
03132   while ((np = NCDR(np)) != NULL_NODE) {
03133     insert_node = onig_node_new_anchor(anc_type);
03134     CHECK_NULL_RETURN_MEMERR(insert_node);
03135     NANCHOR(insert_node)->target = NCAR(np);
03136     NCAR(np) = insert_node;
03137   }
03138 
03139   if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
03140     np = node;
03141     do {
03142       SET_NTYPE(np, NT_LIST);  /* alt -> list */
03143     } while ((np = NCDR(np)) != NULL_NODE);
03144   }
03145   return 0;
03146 }
03147 
03148 static int
03149 setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
03150 {
03151   int r, len;
03152   AnchorNode* an = NANCHOR(node);
03153 
03154   r = get_char_length_tree(an->target, reg, &len);
03155   if (r == 0)
03156     an->char_len = len;
03157   else if (r == GET_CHAR_LEN_VARLEN)
03158     r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03159   else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
03160     if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
03161       r = divide_look_behind_alternatives(node);
03162     else
03163       r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03164   }
03165 
03166   return r;
03167 }
03168 
03169 static int
03170 next_setup(Node* node, Node* next_node, regex_t* reg)
03171 {
03172   int type;
03173 
03174  retry:
03175   type = NTYPE(node);
03176   if (type == NT_QTFR) {
03177     QtfrNode* qn = NQTFR(node);
03178     if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
03179 #ifdef USE_QTFR_PEEK_NEXT
03180       Node* n = get_head_value_node(next_node, 1, reg);
03181       /* '\0': for UTF-16BE etc... */
03182       if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
03183         qn->next_head_exact = n;
03184       }
03185 #endif
03186       /* automatic posseivation a*b ==> (?>a*)b */
03187       if (qn->lower <= 1) {
03188         int ttype = NTYPE(qn->target);
03189         if (IS_NODE_TYPE_SIMPLE(ttype)) {
03190           Node *x, *y;
03191           x = get_head_value_node(qn->target, 0, reg);
03192           if (IS_NOT_NULL(x)) {
03193             y = get_head_value_node(next_node,  0, reg);
03194             if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
03195               Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
03196               CHECK_NULL_RETURN_MEMERR(en);
03197               SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
03198               swap_node(node, en);
03199               NENCLOSE(node)->target = en;
03200             }
03201           }
03202         }
03203       }
03204     }
03205   }
03206   else if (type == NT_ENCLOSE) {
03207     EncloseNode* en = NENCLOSE(node);
03208     if (en->type == ENCLOSE_MEMORY) {
03209       node = en->target;
03210       goto retry;
03211     }
03212   }
03213   return 0;
03214 }
03215 
03216 
03217 static int
03218 update_string_node_case_fold(regex_t* reg, Node *node)
03219 {
03220   UChar *p, *q, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
03221   UChar *sbuf, *ebuf, *sp;
03222   int r, i, len, sbuf_size;
03223   StrNode* sn = NSTR(node);
03224 
03225   end = sn->end;
03226   sbuf_size = (end - sn->s) * 2;
03227   sbuf = (UChar* )xmalloc(sbuf_size);
03228   CHECK_NULL_RETURN_MEMERR(sbuf);
03229   ebuf = sbuf + sbuf_size;
03230 
03231   sp = sbuf;
03232   p = sn->s;
03233   while (p < end) {
03234     len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
03235     q = buf;
03236     for (i = 0; i < len; i++) {
03237       if (sp >= ebuf) {
03238         sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
03239         CHECK_NULL_RETURN_MEMERR(sbuf);
03240         sp = sbuf + sbuf_size;
03241         sbuf_size *= 2;
03242         ebuf = sbuf + sbuf_size;
03243       }
03244 
03245       *sp++ = buf[i];
03246     }
03247   }
03248 
03249   r = onig_node_str_set(node, sbuf, sp);
03250   if (r != 0) {
03251     xfree(sbuf);
03252     return r;
03253   }
03254 
03255   xfree(sbuf);
03256   return 0;
03257 }
03258 
03259 static int
03260 expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
03261                                  regex_t* reg)
03262 {
03263   int r;
03264   Node *node;
03265 
03266   node = onig_node_new_str(s, end);
03267   if (IS_NULL(node)) return ONIGERR_MEMORY;
03268 
03269   r = update_string_node_case_fold(reg, node);
03270   if (r != 0) {
03271     onig_node_free(node);
03272     return r;
03273   }
03274 
03275   NSTRING_SET_AMBIG(node);
03276   NSTRING_SET_DONT_GET_OPT_INFO(node);
03277   *rnode = node;
03278   return 0;
03279 }
03280 
03281 static int
03282 expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
03283                             UChar *p, int slen, UChar *end,
03284                             regex_t* reg, Node **rnode)
03285 {
03286   int r, i, j, len, varlen;
03287   Node *anode, *var_anode, *snode, *xnode, *an;
03288   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
03289 
03290   *rnode = var_anode = NULL_NODE;
03291 
03292   varlen = 0;
03293   for (i = 0; i < item_num; i++) {
03294     if (items[i].byte_len != slen) {
03295       varlen = 1;
03296       break;
03297     }
03298   }
03299 
03300   if (varlen != 0) {
03301     *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03302     if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
03303 
03304     xnode = onig_node_new_list(NULL, NULL);
03305     if (IS_NULL(xnode)) goto mem_err;
03306     NCAR(var_anode) = xnode;
03307 
03308     anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03309     if (IS_NULL(anode)) goto mem_err;
03310     NCAR(xnode) = anode;
03311   }
03312   else {
03313     *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
03314     if (IS_NULL(anode)) return ONIGERR_MEMORY;
03315   }
03316 
03317   snode = onig_node_new_str(p, p + slen);
03318   if (IS_NULL(snode)) goto mem_err;
03319 
03320   NCAR(anode) = snode;
03321 
03322   for (i = 0; i < item_num; i++) {
03323     snode = onig_node_new_str(NULL, NULL);
03324     if (IS_NULL(snode)) goto mem_err;
03325 
03326     for (j = 0; j < items[i].code_len; j++) {
03327       len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
03328       if (len < 0) {
03329         r = len;
03330         goto mem_err2;
03331       }
03332 
03333       r = onig_node_str_cat(snode, buf, buf + len);
03334       if (r != 0) goto mem_err2;
03335     }
03336 
03337     an = onig_node_new_alt(NULL_NODE, NULL_NODE);
03338     if (IS_NULL(an)) {
03339       goto mem_err2;
03340     }
03341 
03342     if (items[i].byte_len != slen) {
03343       Node *rem;
03344       UChar *q = p + items[i].byte_len;
03345 
03346       if (q < end) {
03347         r = expand_case_fold_make_rem_string(&rem, q, end, reg);
03348         if (r != 0) {
03349           onig_node_free(an);
03350           goto mem_err2;
03351         }
03352 
03353         xnode = onig_node_list_add(NULL_NODE, snode);
03354         if (IS_NULL(xnode)) {
03355           onig_node_free(an);
03356           onig_node_free(rem);
03357           goto mem_err2;
03358         }
03359         if (IS_NULL(onig_node_list_add(xnode, rem))) {
03360           onig_node_free(an);
03361           onig_node_free(xnode);
03362           onig_node_free(rem);
03363           goto mem_err;
03364         }
03365 
03366         NCAR(an) = xnode;
03367       }
03368       else {
03369         NCAR(an) = snode;
03370       }
03371 
03372       NCDR(var_anode) = an;
03373       var_anode = an;
03374     }
03375     else {
03376       NCAR(an)     = snode;
03377       NCDR(anode) = an;
03378       anode = an;
03379     }
03380   }
03381 
03382   return varlen;
03383 
03384  mem_err2:
03385   onig_node_free(snode);
03386 
03387  mem_err:
03388   onig_node_free(*rnode);
03389 
03390   return ONIGERR_MEMORY;
03391 }
03392 
03393 static int
03394 expand_case_fold_string(Node* node, regex_t* reg)
03395 {
03396 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION  8
03397 
03398   int r, n, len, alt_num;
03399   UChar *start, *end, *p;
03400   Node *top_root, *root, *snode, *prev_node;
03401   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
03402   StrNode* sn = NSTR(node);
03403 
03404   if (NSTRING_IS_AMBIG(node)) return 0;
03405 
03406   start = sn->s;
03407   end   = sn->end;
03408   if (start >= end) return 0;
03409 
03410   r = 0;
03411   top_root = root = prev_node = snode = NULL_NODE;
03412   alt_num = 1;
03413   p = start;
03414   while (p < end) {
03415     n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
03416                                            p, end, items);
03417     if (n < 0) {
03418       r = n;
03419       goto err;
03420     }
03421 
03422     len = enclen(reg->enc, p, end);
03423 
03424     if (n == 0) {
03425       if (IS_NULL(snode)) {
03426         if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03427           top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03428           if (IS_NULL(root)) {
03429             onig_node_free(prev_node);
03430             goto mem_err;
03431           }
03432         }
03433 
03434         prev_node = snode = onig_node_new_str(NULL, NULL);
03435         if (IS_NULL(snode)) goto mem_err;
03436         if (IS_NOT_NULL(root)) {
03437           if (IS_NULL(onig_node_list_add(root, snode))) {
03438             onig_node_free(snode);
03439             goto mem_err;
03440           }
03441         }
03442       }
03443 
03444       r = onig_node_str_cat(snode, p, p + len);
03445       if (r != 0) goto err;
03446     }
03447     else {
03448       alt_num *= (n + 1);
03449       if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
03450 
03451       if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
03452         top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03453         if (IS_NULL(root)) {
03454           onig_node_free(prev_node);
03455           goto mem_err;
03456         }
03457       }
03458 
03459       r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
03460       if (r < 0) goto mem_err;
03461       if (r == 1) {
03462         if (IS_NULL(root)) {
03463           top_root = prev_node;
03464         }
03465         else {
03466           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03467             onig_node_free(prev_node);
03468             goto mem_err;
03469           }
03470         }
03471 
03472         root = NCAR(prev_node);
03473       }
03474       else { /* r == 0 */
03475         if (IS_NOT_NULL(root)) {
03476           if (IS_NULL(onig_node_list_add(root, prev_node))) {
03477             onig_node_free(prev_node);
03478             goto mem_err;
03479           }
03480         }
03481       }
03482 
03483       snode = NULL_NODE;
03484     }
03485 
03486     p += len;
03487   }
03488 
03489   if (p < end) {
03490     Node *srem;
03491 
03492     r = expand_case_fold_make_rem_string(&srem, p, end, reg);
03493     if (r != 0) goto mem_err;
03494 
03495     if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
03496       top_root = root = onig_node_list_add(NULL_NODE, prev_node);
03497       if (IS_NULL(root)) {
03498         onig_node_free(srem);
03499         onig_node_free(prev_node);
03500         goto mem_err;
03501       }
03502     }
03503 
03504     if (IS_NULL(root)) {
03505       prev_node = srem;
03506     }
03507     else {
03508       if (IS_NULL(onig_node_list_add(root, srem))) {
03509         onig_node_free(srem);
03510         goto mem_err;
03511       }
03512     }
03513   }
03514 
03515   /* ending */
03516   top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
03517   swap_node(node, top_root);
03518   onig_node_free(top_root);
03519   return 0;
03520 
03521  mem_err:
03522   r = ONIGERR_MEMORY;
03523 
03524  err:
03525   onig_node_free(top_root);
03526   return r;
03527 }
03528 
03529 
03530 #ifdef USE_COMBINATION_EXPLOSION_CHECK
03531 
03532 #define CEC_THRES_NUM_BIG_REPEAT         512
03533 #define CEC_INFINITE_NUM          0x7fffffff
03534 
03535 #define CEC_IN_INFINITE_REPEAT    (1<<0)
03536 #define CEC_IN_FINITE_REPEAT      (1<<1)
03537 #define CEC_CONT_BIG_REPEAT       (1<<2)
03538 
03539 static int
03540 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
03541 {
03542   int type;
03543   int r = state;
03544 
03545   type = NTYPE(node);
03546   switch (type) {
03547   case NT_LIST:
03548     {
03549       Node* prev = NULL_NODE;
03550       do {
03551         r = setup_comb_exp_check(NCAR(node), r, env);
03552         prev = NCAR(node);
03553       } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
03554     }
03555     break;
03556 
03557   case NT_ALT:
03558     {
03559       int ret;
03560       do {
03561         ret = setup_comb_exp_check(NCAR(node), state, env);
03562         r |= ret;
03563       } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
03564     }
03565     break;
03566 
03567   case NT_QTFR:
03568     {
03569       int child_state = state;
03570       int add_state = 0;
03571       QtfrNode* qn = NQTFR(node);
03572       Node* target = qn->target;
03573       int var_num;
03574 
03575       if (! IS_REPEAT_INFINITE(qn->upper)) {
03576         if (qn->upper > 1) {
03577           /* {0,1}, {1,1} are allowed */
03578           child_state |= CEC_IN_FINITE_REPEAT;
03579 
03580           /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
03581           if (env->backrefed_mem == 0) {
03582             if (NTYPE(qn->target) == NT_ENCLOSE) {
03583               EncloseNode* en = NENCLOSE(qn->target);
03584               if (en->type == ENCLOSE_MEMORY) {
03585                 if (NTYPE(en->target) == NT_QTFR) {
03586                   QtfrNode* q = NQTFR(en->target);
03587                   if (IS_REPEAT_INFINITE(q->upper)
03588                       && q->greedy == qn->greedy) {
03589                     qn->upper = (qn->lower == 0 ? 1 : qn->lower);
03590                     if (qn->upper == 1)
03591                       child_state = state;
03592                   }
03593                 }
03594               }
03595             }
03596           }
03597         }
03598       }
03599 
03600       if (state & CEC_IN_FINITE_REPEAT) {
03601         qn->comb_exp_check_num = -1;
03602       }
03603       else {
03604         if (IS_REPEAT_INFINITE(qn->upper)) {
03605           var_num = CEC_INFINITE_NUM;
03606           child_state |= CEC_IN_INFINITE_REPEAT;
03607         }
03608         else {
03609           var_num = qn->upper - qn->lower;
03610         }
03611 
03612         if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
03613           add_state |= CEC_CONT_BIG_REPEAT;
03614 
03615         if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
03616             ((state & CEC_CONT_BIG_REPEAT) != 0 &&
03617              var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
03618           if (qn->comb_exp_check_num == 0) {
03619             env->num_comb_exp_check++;
03620             qn->comb_exp_check_num = env->num_comb_exp_check;
03621             if (env->curr_max_regnum > env->comb_exp_max_regnum)
03622               env->comb_exp_max_regnum = env->curr_max_regnum;
03623           }
03624         }
03625       }
03626 
03627       r = setup_comb_exp_check(target, child_state, env);
03628       r |= add_state;
03629     }
03630     break;
03631 
03632   case NT_ENCLOSE:
03633     {
03634       EncloseNode* en = NENCLOSE(node);
03635 
03636       switch (en->type) {
03637       case ENCLOSE_MEMORY:
03638         {
03639           if (env->curr_max_regnum < en->regnum)
03640             env->curr_max_regnum = en->regnum;
03641 
03642           r = setup_comb_exp_check(en->target, state, env);
03643         }
03644         break;
03645 
03646       default:
03647         r = setup_comb_exp_check(en->target, state, env);
03648         break;
03649       }
03650     }
03651     break;
03652 
03653 #ifdef USE_SUBEXP_CALL
03654   case NT_CALL:
03655     if (IS_CALL_RECURSION(NCALL(node)))
03656       env->has_recursion = 1;
03657     else
03658       r = setup_comb_exp_check(NCALL(node)->target, state, env);
03659     break;
03660 #endif
03661 
03662   default:
03663     break;
03664   }
03665 
03666   return r;
03667 }
03668 #endif
03669 
03670 #define IN_ALT        (1<<0)
03671 #define IN_NOT        (1<<1)
03672 #define IN_REPEAT     (1<<2)
03673 #define IN_VAR_REPEAT (1<<3)
03674 
03675 /* setup_tree does the following work.
03676  1. check empty loop. (set qn->target_empty_info)
03677  2. expand ignore-case in char class.
03678  3. set memory status bit flags. (reg->mem_stats)
03679  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
03680  5. find invalid patterns in look-behind.
03681  6. expand repeated string.
03682  */
03683 static int
03684 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
03685 {
03686   int type;
03687   int r = 0;
03688 
03689   type = NTYPE(node);
03690   switch (type) {
03691   case NT_LIST:
03692     {
03693       Node* prev = NULL_NODE;
03694       do {
03695         r = setup_tree(NCAR(node), reg, state, env);
03696         if (IS_NOT_NULL(prev) && r == 0) {
03697           r = next_setup(prev, NCAR(node), reg);
03698         }
03699         prev = NCAR(node);
03700       } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03701     }
03702     break;
03703 
03704   case NT_ALT:
03705     do {
03706       r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
03707     } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
03708     break;
03709 
03710   case NT_CCLASS:
03711     break;
03712 
03713   case NT_STR:
03714     if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
03715       r = expand_case_fold_string(node, reg);
03716     }
03717     break;
03718 
03719   case NT_CTYPE:
03720   case NT_CANY:
03721     break;
03722 
03723 #ifdef USE_SUBEXP_CALL
03724   case NT_CALL:
03725     break;
03726 #endif
03727 
03728   case NT_BREF:
03729     {
03730       int i;
03731       int* p;
03732       Node** nodes = SCANENV_MEM_NODES(env);
03733       BRefNode* br = NBREF(node);
03734       p = BACKREFS_P(br);
03735       for (i = 0; i < br->back_num; i++) {
03736         if (p[i] > env->num_mem)  return ONIGERR_INVALID_BACKREF;
03737         BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
03738         BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
03739 #ifdef USE_BACKREF_WITH_LEVEL
03740         if (IS_BACKREF_NEST_LEVEL(br)) {
03741           BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
03742         }
03743 #endif
03744         SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
03745       }
03746     }
03747     break;
03748 
03749   case NT_QTFR:
03750     {
03751       OnigDistance d;
03752       QtfrNode* qn = NQTFR(node);
03753       Node* target = qn->target;
03754 
03755       if ((state & IN_REPEAT) != 0) {
03756         qn->state |= NST_IN_REPEAT;
03757       }
03758 
03759       if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
03760         r = get_min_match_length(target, &d, env);
03761         if (r) break;
03762         if (d == 0) {
03763           qn->target_empty_info = NQ_TARGET_IS_EMPTY;
03764 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
03765           r = quantifiers_memory_node_info(target);
03766           if (r < 0) break;
03767           if (r > 0) {
03768             qn->target_empty_info = r;
03769           }
03770 #endif
03771 #if 0
03772           r = get_max_match_length(target, &d, env);
03773           if (r == 0 && d == 0) {
03774             /*  ()* ==> ()?, ()+ ==> ()  */
03775             qn->upper = 1;
03776             if (qn->lower > 1) qn->lower = 1;
03777             if (NTYPE(target) == NT_STR) {
03778               qn->upper = qn->lower = 0;  /* /(?:)+/ ==> // */
03779             }
03780           }
03781 #endif
03782         }
03783       }
03784 
03785       state |= IN_REPEAT;
03786       if (qn->lower != qn->upper)
03787         state |= IN_VAR_REPEAT;
03788       r = setup_tree(target, reg, state, env);
03789       if (r) break;
03790 
03791       /* expand string */
03792 #define EXPAND_STRING_MAX_LENGTH  100
03793       if (NTYPE(target) == NT_STR) {
03794         if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
03795             qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03796           int len = NSTRING_LEN(target);
03797           StrNode* sn = NSTR(target);
03798 
03799           if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
03800             int i, n = qn->lower;
03801             onig_node_conv_to_str_node(node, NSTR(target)->flag);
03802             for (i = 0; i < n; i++) {
03803               r = onig_node_str_cat(node, sn->s, sn->end);
03804               if (r) break;
03805             }
03806             onig_node_free(target);
03807             break; /* break case NT_QTFR: */
03808           }
03809         }
03810       }
03811 
03812 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
03813       if (qn->greedy && (qn->target_empty_info != 0)) {
03814         if (NTYPE(target) == NT_QTFR) {
03815           QtfrNode* tqn = NQTFR(target);
03816           if (IS_NOT_NULL(tqn->head_exact)) {
03817             qn->head_exact  = tqn->head_exact;
03818             tqn->head_exact = NULL;
03819           }
03820         }
03821         else {
03822           qn->head_exact = get_head_value_node(qn->target, 1, reg);
03823         }
03824       }
03825 #endif
03826     }
03827     break;
03828 
03829   case NT_ENCLOSE:
03830     {
03831       EncloseNode* en = NENCLOSE(node);
03832 
03833       switch (en->type) {
03834       case ENCLOSE_OPTION:
03835         {
03836           OnigOptionType options = reg->options;
03837           reg->options = NENCLOSE(node)->option;
03838           r = setup_tree(NENCLOSE(node)->target, reg, state, env);
03839           reg->options = options;
03840         }
03841         break;
03842 
03843       case ENCLOSE_MEMORY:
03844         if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
03845           BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
03846           /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
03847         }
03848         r = setup_tree(en->target, reg, state, env);
03849         break;
03850 
03851       case ENCLOSE_STOP_BACKTRACK:
03852         {
03853           Node* target = en->target;
03854           r = setup_tree(target, reg, state, env);
03855           if (NTYPE(target) == NT_QTFR) {
03856             QtfrNode* tqn = NQTFR(target);
03857             if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
03858                 tqn->greedy != 0) {  /* (?>a*), a*+ etc... */
03859               int qtype = NTYPE(tqn->target);
03860               if (IS_NODE_TYPE_SIMPLE(qtype))
03861                 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
03862             }
03863           }
03864         }
03865         break;
03866       }
03867     }
03868     break;
03869 
03870   case NT_ANCHOR:
03871     {
03872       AnchorNode* an = NANCHOR(node);
03873 
03874       switch (an->type) {
03875       case ANCHOR_PREC_READ:
03876         r = setup_tree(an->target, reg, state, env);
03877         break;
03878       case ANCHOR_PREC_READ_NOT:
03879         r = setup_tree(an->target, reg, (state | IN_NOT), env);
03880         break;
03881 
03882 /* allowed node types in look-behind */
03883 #define ALLOWED_TYPE_IN_LB  \
03884   ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
03885     BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
03886 
03887 #define ALLOWED_ENCLOSE_IN_LB       ( ENCLOSE_MEMORY )
03888 #define ALLOWED_ENCLOSE_IN_LB_NOT   0
03889 
03890 #define ALLOWED_ANCHOR_IN_LB \
03891 ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03892 #define ALLOWED_ANCHOR_IN_LB_NOT \
03893 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
03894 
03895       case ANCHOR_LOOK_BEHIND:
03896         {
03897           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03898                               ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
03899           if (r < 0) return r;
03900           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03901           r = setup_look_behind(node, reg, env);
03902           if (r != 0) return r;
03903           r = setup_tree(an->target, reg, state, env);
03904         }
03905         break;
03906 
03907       case ANCHOR_LOOK_BEHIND_NOT:
03908         {
03909           r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
03910                       ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
03911           if (r < 0) return r;
03912           if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
03913           r = setup_look_behind(node, reg, env);
03914           if (r != 0) return r;
03915           r = setup_tree(an->target, reg, (state | IN_NOT), env);
03916         }
03917         break;
03918       }
03919     }
03920     break;
03921 
03922   default:
03923     break;
03924   }
03925 
03926   return r;
03927 }
03928 
03929 /* set skip map for Boyer-Moor search */
03930 static int
03931 set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
03932             UChar skip[], int** int_skip)
03933 {
03934   int i, len;
03935 
03936   len = end - s;
03937   if (len < ONIG_CHAR_TABLE_SIZE) {
03938     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len;
03939 
03940     for (i = 0; i < len - 1; i++)
03941       skip[s[i]] = len - 1 - i;
03942   }
03943   else {
03944     if (IS_NULL(*int_skip)) {
03945       *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
03946       if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
03947     }
03948     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = len;
03949 
03950     for (i = 0; i < len - 1; i++)
03951       (*int_skip)[s[i]] = len - 1 - i;
03952   }
03953   return 0;
03954 }
03955 
03956 #define OPT_EXACT_MAXLEN   24
03957 
03958 typedef struct {
03959   OnigDistance min;  /* min byte length */
03960   OnigDistance max;  /* max byte length */
03961 } MinMaxLen;
03962 
03963 typedef struct {
03964   MinMaxLen        mmd;
03965   OnigEncoding     enc;
03966   OnigOptionType   options;
03967   OnigCaseFoldType case_fold_flag;
03968   ScanEnv*         scan_env;
03969 } OptEnv;
03970 
03971 typedef struct {
03972   int left_anchor;
03973   int right_anchor;
03974 } OptAncInfo;
03975 
03976 typedef struct {
03977   MinMaxLen  mmd; /* info position */
03978   OptAncInfo anc;
03979 
03980   int   reach_end;
03981   int   ignore_case;
03982   int   len;
03983   UChar s[OPT_EXACT_MAXLEN];
03984 } OptExactInfo;
03985 
03986 typedef struct {
03987   MinMaxLen mmd; /* info position */
03988   OptAncInfo anc;
03989 
03990   int   value;      /* weighted value */
03991   UChar map[ONIG_CHAR_TABLE_SIZE];
03992 } OptMapInfo;
03993 
03994 typedef struct {
03995   MinMaxLen    len;
03996 
03997   OptAncInfo   anc;
03998   OptExactInfo exb;    /* boundary */
03999   OptExactInfo exm;    /* middle */
04000   OptExactInfo expr;   /* prec read (?=...) */
04001 
04002   OptMapInfo   map;   /* boundary */
04003 } NodeOptInfo;
04004 
04005 
04006 static int
04007 map_position_value(OnigEncoding enc, int i)
04008 {
04009   static const short int ByteValTable[] = {
04010      5,  1,  1,  1,  1,  1,  1,  1,  1, 10, 10,  1,  1, 10,  1,  1,
04011      1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
04012     12,  4,  7,  4,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,
04013      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,
04014      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04015      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  6,  5,  5,  5,
04016      5,  6,  6,  6,  6,  7,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
04017      6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  5,  5,  5,  5,  1
04018   };
04019 
04020   if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
04021     if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
04022       return 20;
04023     else
04024       return (int )ByteValTable[i];
04025   }
04026   else
04027     return 4;   /* Take it easy. */
04028 }
04029 
04030 static int
04031 distance_value(MinMaxLen* mm)
04032 {
04033   /* 1000 / (min-max-dist + 1) */
04034   static const short int dist_vals[] = {
04035     1000,  500,  333,  250,  200,  167,  143,  125,  111,  100,
04036       91,   83,   77,   71,   67,   63,   59,   56,   53,   50,
04037       48,   45,   43,   42,   40,   38,   37,   36,   34,   33,
04038       32,   31,   30,   29,   29,   28,   27,   26,   26,   25,
04039       24,   24,   23,   23,   22,   22,   21,   21,   20,   20,
04040       20,   19,   19,   19,   18,   18,   18,   17,   17,   17,
04041       16,   16,   16,   16,   15,   15,   15,   15,   14,   14,
04042       14,   14,   14,   14,   13,   13,   13,   13,   13,   13,
04043       12,   12,   12,   12,   12,   12,   11,   11,   11,   11,
04044       11,   11,   11,   11,   11,   10,   10,   10,   10,   10
04045   };
04046 
04047   int d;
04048 
04049   if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
04050 
04051   d = mm->max - mm->min;
04052   if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
04053     /* return dist_vals[d] * 16 / (mm->min + 12); */
04054     return (int )dist_vals[d];
04055   else
04056     return 1;
04057 }
04058 
04059 static int
04060 comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
04061 {
04062   if (v2 <= 0) return -1;
04063   if (v1 <= 0) return  1;
04064 
04065   v1 *= distance_value(d1);
04066   v2 *= distance_value(d2);
04067 
04068   if (v2 > v1) return  1;
04069   if (v2 < v1) return -1;
04070 
04071   if (d2->min < d1->min) return  1;
04072   if (d2->min > d1->min) return -1;
04073   return 0;
04074 }
04075 
04076 static int
04077 is_equal_mml(MinMaxLen* a, MinMaxLen* b)
04078 {
04079   return (a->min == b->min && a->max == b->max) ? 1 : 0;
04080 }
04081 
04082 
04083 static void
04084 set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
04085 {
04086   mml->min = min;
04087   mml->max = max;
04088 }
04089 
04090 static void
04091 clear_mml(MinMaxLen* mml)
04092 {
04093   mml->min = mml->max = 0;
04094 }
04095 
04096 static void
04097 copy_mml(MinMaxLen* to, MinMaxLen* from)
04098 {
04099   to->min = from->min;
04100   to->max = from->max;
04101 }
04102 
04103 static void
04104 add_mml(MinMaxLen* to, MinMaxLen* from)
04105 {
04106   to->min = distance_add(to->min, from->min);
04107   to->max = distance_add(to->max, from->max);
04108 }
04109 
04110 #if 0
04111 static void
04112 add_len_mml(MinMaxLen* to, OnigDistance len)
04113 {
04114   to->min = distance_add(to->min, len);
04115   to->max = distance_add(to->max, len);
04116 }
04117 #endif
04118 
04119 static void
04120 alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
04121 {
04122   if (to->min > from->min) to->min = from->min;
04123   if (to->max < from->max) to->max = from->max;
04124 }
04125 
04126 static void
04127 copy_opt_env(OptEnv* to, OptEnv* from)
04128 {
04129   *to = *from;
04130 }
04131 
04132 static void
04133 clear_opt_anc_info(OptAncInfo* anc)
04134 {
04135   anc->left_anchor  = 0;
04136   anc->right_anchor = 0;
04137 }
04138 
04139 static void
04140 copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
04141 {
04142   *to = *from;
04143 }
04144 
04145 static void
04146 concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
04147                     OnigDistance left_len, OnigDistance right_len)
04148 {
04149   clear_opt_anc_info(to);
04150 
04151   to->left_anchor = left->left_anchor;
04152   if (left_len == 0) {
04153     to->left_anchor |= right->left_anchor;
04154   }
04155 
04156   to->right_anchor = right->right_anchor;
04157   if (right_len == 0) {
04158     to->right_anchor |= left->right_anchor;
04159   }
04160 }
04161 
04162 static int
04163 is_left_anchor(int anc)
04164 {
04165   if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
04166       anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
04167       anc == ANCHOR_PREC_READ_NOT)
04168     return 0;
04169 
04170   return 1;
04171 }
04172 
04173 static int
04174 is_set_opt_anc_info(OptAncInfo* to, int anc)
04175 {
04176   if ((to->left_anchor & anc) != 0) return 1;
04177 
04178   return ((to->right_anchor & anc) != 0 ? 1 : 0);
04179 }
04180 
04181 static void
04182 add_opt_anc_info(OptAncInfo* to, int anc)
04183 {
04184   if (is_left_anchor(anc))
04185     to->left_anchor |= anc;
04186   else
04187     to->right_anchor |= anc;
04188 }
04189 
04190 static void
04191 remove_opt_anc_info(OptAncInfo* to, int anc)
04192 {
04193   if (is_left_anchor(anc))
04194     to->left_anchor &= ~anc;
04195   else
04196     to->right_anchor &= ~anc;
04197 }
04198 
04199 static void
04200 alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
04201 {
04202   to->left_anchor  &= add->left_anchor;
04203   to->right_anchor &= add->right_anchor;
04204 }
04205 
04206 static int
04207 is_full_opt_exact_info(OptExactInfo* ex)
04208 {
04209   return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
04210 }
04211 
04212 static void
04213 clear_opt_exact_info(OptExactInfo* ex)
04214 {
04215   clear_mml(&ex->mmd);
04216   clear_opt_anc_info(&ex->anc);
04217   ex->reach_end   = 0;
04218   ex->ignore_case = 0;
04219   ex->len         = 0;
04220   ex->s[0]        = '\0';
04221 }
04222 
04223 static void
04224 copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
04225 {
04226   *to = *from;
04227 }
04228 
04229 static void
04230 concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
04231 {
04232   int i, j, len;
04233   UChar *p, *end;
04234   OptAncInfo tanc;
04235 
04236   if (! to->ignore_case && add->ignore_case) {
04237     if (to->len >= add->len) return ;  /* avoid */
04238 
04239     to->ignore_case = 1;
04240   }
04241 
04242   p = add->s;
04243   end = p + add->len;
04244   for (i = to->len; p < end; ) {
04245     len = enclen(enc, p, end);
04246     if (i + len > OPT_EXACT_MAXLEN) break;
04247     for (j = 0; j < len && p < end; j++)
04248       to->s[i++] = *p++;
04249   }
04250 
04251   to->len = i;
04252   to->reach_end = (p == end ? add->reach_end : 0);
04253 
04254   concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
04255   if (! to->reach_end) tanc.right_anchor = 0;
04256   copy_opt_anc_info(&to->anc, &tanc);
04257 }
04258 
04259 static void
04260 concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
04261                           int raw ARG_UNUSED, OnigEncoding enc)
04262 {
04263   int i, j, len;
04264   UChar *p;
04265 
04266   for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
04267     len = enclen(enc, p, end);
04268     if (i + len > OPT_EXACT_MAXLEN) break;
04269     for (j = 0; j < len && p < end; j++)
04270       to->s[i++] = *p++;
04271   }
04272 
04273   to->len = i;
04274 }
04275 
04276 static void
04277 alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
04278 {
04279   int i, j, len;
04280 
04281   if (add->len == 0 || to->len == 0) {
04282     clear_opt_exact_info(to);
04283     return ;
04284   }
04285 
04286   if (! is_equal_mml(&to->mmd, &add->mmd)) {
04287     clear_opt_exact_info(to);
04288     return ;
04289   }
04290 
04291   for (i = 0; i < to->len && i < add->len; ) {
04292     if (to->s[i] != add->s[i]) break;
04293     len = enclen(env->enc, to->s + i, to->s + to->len);
04294 
04295     for (j = 1; j < len; j++) {
04296       if (to->s[i+j] != add->s[i+j]) break;
04297     }
04298     if (j < len) break;
04299     i += len;
04300   }
04301 
04302   if (! add->reach_end || i < add->len || i < to->len) {
04303     to->reach_end = 0;
04304   }
04305   to->len = i;
04306   to->ignore_case |= add->ignore_case;
04307 
04308   alt_merge_opt_anc_info(&to->anc, &add->anc);
04309   if (! to->reach_end) to->anc.right_anchor = 0;
04310 }
04311 
04312 static void
04313 select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
04314 {
04315   int v1, v2;
04316 
04317   v1 = now->len;
04318   v2 = alt->len;
04319 
04320   if (v2 == 0) {
04321     return ;
04322   }
04323   else if (v1 == 0) {
04324     copy_opt_exact_info(now, alt);
04325     return ;
04326   }
04327   else if (v1 <= 2 && v2 <= 2) {
04328     /* ByteValTable[x] is big value --> low price */
04329     v2 = map_position_value(enc, now->s[0]);
04330     v1 = map_position_value(enc, alt->s[0]);
04331 
04332     if (now->len > 1) v1 += 5;
04333     if (alt->len > 1) v2 += 5;
04334   }
04335 
04336   if (now->ignore_case == 0) v1 *= 2;
04337   if (alt->ignore_case == 0) v2 *= 2;
04338 
04339   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04340     copy_opt_exact_info(now, alt);
04341 }
04342 
04343 static void
04344 clear_opt_map_info(OptMapInfo* map)
04345 {
04346   static const OptMapInfo clean_info = {
04347     {0, 0}, {0, 0}, 0,
04348     {
04349       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04350       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04351       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04352       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04353       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04354       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04355       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04356       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04357       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04358       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04359       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04360       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04361       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04362       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04363       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
04364       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
04365     }
04366   };
04367 
04368   xmemcpy(map, &clean_info, sizeof(OptMapInfo));
04369 }
04370 
04371 static void
04372 copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
04373 {
04374   *to = *from;
04375 }
04376 
04377 static void
04378 add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
04379 {
04380   if (map->map[c] == 0) {
04381     map->map[c] = 1;
04382     map->value += map_position_value(enc, c);
04383   }
04384 }
04385 
04386 static int
04387 add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
04388                           OnigEncoding enc, OnigCaseFoldType case_fold_flag)
04389 {
04390   OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
04391   UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
04392   int i, n;
04393 
04394   add_char_opt_map_info(map, p[0], enc);
04395 
04396   case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
04397   n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
04398   if (n < 0) return n;
04399 
04400   for (i = 0; i < n; i++) {
04401     ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
04402     add_char_opt_map_info(map, buf[0], enc);
04403   }
04404 
04405   return 0;
04406 }
04407 
04408 static void
04409 select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
04410 {
04411   const int z = 1<<15; /* 32768: something big value */
04412 
04413   int v1, v2;
04414 
04415   if (alt->value == 0) return ;
04416   if (now->value == 0) {
04417     copy_opt_map_info(now, alt);
04418     return ;
04419   }
04420 
04421   v1 = z / now->value;
04422   v2 = z / alt->value;
04423   if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
04424     copy_opt_map_info(now, alt);
04425 }
04426 
04427 static int
04428 comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
04429 {
04430 #define COMP_EM_BASE  20
04431   int ve, vm;
04432 
04433   if (m->value <= 0) return -1;
04434 
04435   ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2);
04436   vm = COMP_EM_BASE * 5 * 2 / m->value;
04437   return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
04438 }
04439 
04440 static void
04441 alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
04442 {
04443   int i, val;
04444 
04445   /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
04446   if (to->value == 0) return ;
04447   if (add->value == 0 || to->mmd.max < add->mmd.min) {
04448     clear_opt_map_info(to);
04449     return ;
04450   }
04451 
04452   alt_merge_mml(&to->mmd, &add->mmd);
04453 
04454   val = 0;
04455   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
04456     if (add->map[i])
04457       to->map[i] = 1;
04458 
04459     if (to->map[i])
04460       val += map_position_value(enc, i);
04461   }
04462   to->value = val;
04463 
04464   alt_merge_opt_anc_info(&to->anc, &add->anc);
04465 }
04466 
04467 static void
04468 set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
04469 {
04470   copy_mml(&(opt->exb.mmd),  mmd);
04471   copy_mml(&(opt->expr.mmd), mmd);
04472   copy_mml(&(opt->map.mmd),  mmd);
04473 }
04474 
04475 static void
04476 clear_node_opt_info(NodeOptInfo* opt)
04477 {
04478   clear_mml(&opt->len);
04479   clear_opt_anc_info(&opt->anc);
04480   clear_opt_exact_info(&opt->exb);
04481   clear_opt_exact_info(&opt->exm);
04482   clear_opt_exact_info(&opt->expr);
04483   clear_opt_map_info(&opt->map);
04484 }
04485 
04486 static void
04487 copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
04488 {
04489   *to = *from;
04490 }
04491 
04492 static void
04493 concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
04494 {
04495   int exb_reach, exm_reach;
04496   OptAncInfo tanc;
04497 
04498   concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
04499   copy_opt_anc_info(&to->anc, &tanc);
04500 
04501   if (add->exb.len > 0 && to->len.max == 0) {
04502     concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
04503                         to->len.max, add->len.max);
04504     copy_opt_anc_info(&add->exb.anc, &tanc);
04505   }
04506 
04507   if (add->map.value > 0 && to->len.max == 0) {
04508     if (add->map.mmd.max == 0)
04509       add->map.anc.left_anchor |= to->anc.left_anchor;
04510   }
04511 
04512   exb_reach = to->exb.reach_end;
04513   exm_reach = to->exm.reach_end;
04514 
04515   if (add->len.max != 0)
04516     to->exb.reach_end = to->exm.reach_end = 0;
04517 
04518   if (add->exb.len > 0) {
04519     if (exb_reach) {
04520       concat_opt_exact_info(&to->exb, &add->exb, enc);
04521       clear_opt_exact_info(&add->exb);
04522     }
04523     else if (exm_reach) {
04524       concat_opt_exact_info(&to->exm, &add->exb, enc);
04525       clear_opt_exact_info(&add->exb);
04526     }
04527   }
04528   select_opt_exact_info(enc, &to->exm, &add->exb);
04529   select_opt_exact_info(enc, &to->exm, &add->exm);
04530 
04531   if (to->expr.len > 0) {
04532     if (add->len.max > 0) {
04533       if (to->expr.len > (int )add->len.max)
04534         to->expr.len = add->len.max;
04535 
04536       if (to->expr.mmd.max == 0)
04537         select_opt_exact_info(enc, &to->exb, &to->expr);
04538       else
04539         select_opt_exact_info(enc, &to->exm, &to->expr);
04540     }
04541   }
04542   else if (add->expr.len > 0) {
04543     copy_opt_exact_info(&to->expr, &add->expr);
04544   }
04545 
04546   select_opt_map_info(&to->map, &add->map);
04547 
04548   add_mml(&to->len, &add->len);
04549 }
04550 
04551 static void
04552 alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
04553 {
04554   alt_merge_opt_anc_info  (&to->anc,  &add->anc);
04555   alt_merge_opt_exact_info(&to->exb,  &add->exb, env);
04556   alt_merge_opt_exact_info(&to->exm,  &add->exm, env);
04557   alt_merge_opt_exact_info(&to->expr, &add->expr, env);
04558   alt_merge_opt_map_info(env->enc, &to->map,  &add->map);
04559 
04560   alt_merge_mml(&to->len, &add->len);
04561 }
04562 
04563 
04564 #define MAX_NODE_OPT_INFO_REF_COUNT    5
04565 
04566 static int
04567 optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
04568 {
04569   int type;
04570   int r = 0;
04571 
04572   clear_node_opt_info(opt);
04573   set_bound_node_opt_info(opt, &env->mmd);
04574 
04575   type = NTYPE(node);
04576   switch (type) {
04577   case NT_LIST:
04578     {
04579       OptEnv nenv;
04580       NodeOptInfo nopt;
04581       Node* nd = node;
04582 
04583       copy_opt_env(&nenv, env);
04584       do {
04585         r = optimize_node_left(NCAR(nd), &nopt, &nenv);
04586         if (r == 0) {
04587           add_mml(&nenv.mmd, &nopt.len);
04588           concat_left_node_opt_info(env->enc, opt, &nopt);
04589         }
04590       } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
04591     }
04592     break;
04593 
04594   case NT_ALT:
04595     {
04596       NodeOptInfo nopt;
04597       Node* nd = node;
04598 
04599       do {
04600         r = optimize_node_left(NCAR(nd), &nopt, env);
04601         if (r == 0) {
04602           if (nd == node) copy_node_opt_info(opt, &nopt);
04603           else            alt_merge_node_opt_info(opt, &nopt, env);
04604         }
04605       } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
04606     }
04607     break;
04608 
04609   case NT_STR:
04610     {
04611       StrNode* sn = NSTR(node);
04612       int slen = sn->end - sn->s;
04613       int is_raw = NSTRING_IS_RAW(node);
04614 
04615       if (! NSTRING_IS_AMBIG(node)) {
04616         concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04617                                   NSTRING_IS_RAW(node), env->enc);
04618         if (slen > 0) {
04619           add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
04620         }
04621         set_mml(&opt->len, slen, slen);
04622       }
04623       else {
04624         int max;
04625 
04626         if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
04627           int n = onigenc_strlen(env->enc, sn->s, sn->end);
04628           max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
04629         }
04630         else {
04631           concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
04632                                     is_raw, env->enc);
04633           opt->exb.ignore_case = 1;
04634 
04635           if (slen > 0) {
04636             r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
04637                                           env->enc, env->case_fold_flag);
04638             if (r != 0) break;
04639           }
04640 
04641           max = slen;
04642         }
04643 
04644         set_mml(&opt->len, slen, max);
04645       }
04646 
04647       if (opt->exb.len == slen)
04648         opt->exb.reach_end = 1;
04649     }
04650     break;
04651 
04652   case NT_CCLASS:
04653     {
04654       int i, z;
04655       CClassNode* cc = NCCLASS(node);
04656 
04657       /* no need to check ignore case. (setted in setup_tree()) */
04658 
04659       if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
04660         OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04661         OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04662 
04663         set_mml(&opt->len, min, max);
04664       }
04665       else {
04666         for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04667           z = BITSET_AT(cc->bs, i);
04668           if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
04669             add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04670           }
04671         }
04672         set_mml(&opt->len, 1, 1);
04673       }
04674     }
04675     break;
04676 
04677   case NT_CTYPE:
04678     {
04679       int i, min, max;
04680 
04681       max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04682 
04683       if (max == 1) {
04684         min = 1;
04685 
04686         switch (NCTYPE(node)->ctype) {
04687         case ONIGENC_CTYPE_WORD:
04688           if (NCTYPE(node)->not != 0) {
04689             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04690               if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
04691                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04692               }
04693             }
04694           }
04695           else {
04696             for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
04697               if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
04698                 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
04699               }
04700             }
04701           }
04702           break;
04703         }
04704       }
04705       else {
04706         min = ONIGENC_MBC_MINLEN(env->enc);
04707       }
04708       set_mml(&opt->len, min, max);
04709     }
04710     break;
04711 
04712   case NT_CANY:
04713     {
04714       OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
04715       OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
04716       set_mml(&opt->len, min, max);
04717     }
04718     break;
04719 
04720   case NT_ANCHOR:
04721     switch (NANCHOR(node)->type) {
04722     case ANCHOR_BEGIN_BUF:
04723     case ANCHOR_BEGIN_POSITION:
04724     case ANCHOR_BEGIN_LINE:
04725     case ANCHOR_END_BUF:
04726     case ANCHOR_SEMI_END_BUF:
04727     case ANCHOR_END_LINE:
04728       add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
04729       break;
04730 
04731     case ANCHOR_PREC_READ:
04732       {
04733         NodeOptInfo nopt;
04734 
04735         r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
04736         if (r == 0) {
04737           if (nopt.exb.len > 0)
04738             copy_opt_exact_info(&opt->expr, &nopt.exb);
04739           else if (nopt.exm.len > 0)
04740             copy_opt_exact_info(&opt->expr, &nopt.exm);
04741 
04742           opt->expr.reach_end = 0;
04743 
04744           if (nopt.map.value > 0)
04745             copy_opt_map_info(&opt->map, &nopt.map);
04746         }
04747       }
04748       break;
04749 
04750     case ANCHOR_PREC_READ_NOT:
04751     case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
04752     case ANCHOR_LOOK_BEHIND_NOT:
04753       break;
04754     }
04755     break;
04756 
04757   case NT_BREF:
04758     {
04759       int i;
04760       int* backs;
04761       OnigDistance min, max, tmin, tmax;
04762       Node** nodes = SCANENV_MEM_NODES(env->scan_env);
04763       BRefNode* br = NBREF(node);
04764 
04765       if (br->state & NST_RECURSION) {
04766         set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04767         break;
04768       }
04769       backs = BACKREFS_P(br);
04770       r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
04771       if (r != 0) break;
04772       r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
04773       if (r != 0) break;
04774       for (i = 1; i < br->back_num; i++) {
04775         r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
04776         if (r != 0) break;
04777         r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
04778         if (r != 0) break;
04779         if (min > tmin) min = tmin;
04780         if (max < tmax) max = tmax;
04781       }
04782       if (r == 0) set_mml(&opt->len, min, max);
04783     }
04784     break;
04785 
04786 #ifdef USE_SUBEXP_CALL
04787   case NT_CALL:
04788     if (IS_CALL_RECURSION(NCALL(node)))
04789       set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
04790     else {
04791       OnigOptionType save = env->options;
04792       env->options = NENCLOSE(NCALL(node)->target)->option;
04793       r = optimize_node_left(NCALL(node)->target, opt, env);
04794       env->options = save;
04795     }
04796     break;
04797 #endif
04798 
04799   case NT_QTFR:
04800     {
04801       int i;
04802       OnigDistance min, max;
04803       NodeOptInfo nopt;
04804       QtfrNode* qn = NQTFR(node);
04805 
04806       r = optimize_node_left(qn->target, &nopt, env);
04807       if (r) break;
04808 
04809       if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
04810         if (env->mmd.max == 0 &&
04811             NTYPE(qn->target) == NT_CANY && qn->greedy) {
04812           if (IS_MULTILINE(env->options))
04813             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
04814           else
04815             add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
04816         }
04817       }
04818       else {
04819         if (qn->lower > 0) {
04820           copy_node_opt_info(opt, &nopt);
04821           if (nopt.exb.len > 0) {
04822             if (nopt.exb.reach_end) {
04823               for (i = 2; i <= qn->lower &&
04824                           ! is_full_opt_exact_info(&opt->exb); i++) {
04825                 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
04826               }
04827               if (i < qn->lower) {
04828                 opt->exb.reach_end = 0;
04829               }
04830             }
04831           }
04832 
04833           if (qn->lower != qn->upper) {
04834             opt->exb.reach_end = 0;
04835             opt->exm.reach_end = 0;
04836           }
04837           if (qn->lower > 1)
04838             opt->exm.reach_end = 0;
04839         }
04840       }
04841 
04842       min = distance_multiply(nopt.len.min, qn->lower);
04843       if (IS_REPEAT_INFINITE(qn->upper))
04844         max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
04845       else
04846         max = distance_multiply(nopt.len.max, qn->upper);
04847 
04848       set_mml(&opt->len, min, max);
04849     }
04850     break;
04851 
04852   case NT_ENCLOSE:
04853     {
04854       EncloseNode* en = NENCLOSE(node);
04855 
04856       switch (en->type) {
04857       case ENCLOSE_OPTION:
04858         {
04859           OnigOptionType save = env->options;
04860 
04861           env->options = en->option;
04862           r = optimize_node_left(en->target, opt, env);
04863           env->options = save;
04864         }
04865         break;
04866 
04867       case ENCLOSE_MEMORY:
04868 #ifdef USE_SUBEXP_CALL
04869         en->opt_count++;
04870         if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
04871           OnigDistance min, max;
04872 
04873           min = 0;
04874           max = ONIG_INFINITE_DISTANCE;
04875           if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
04876           if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
04877           set_mml(&opt->len, min, max);
04878         }
04879         else
04880 #endif
04881         {
04882           r = optimize_node_left(en->target, opt, env);
04883 
04884           if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
04885             if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
04886               remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
04887           }
04888         }
04889         break;
04890 
04891       case ENCLOSE_STOP_BACKTRACK:
04892         r = optimize_node_left(en->target, opt, env);
04893         break;
04894       }
04895     }
04896     break;
04897 
04898   default:
04899 #ifdef ONIG_DEBUG
04900     fprintf(stderr, "optimize_node_left: undefined node type %d\n",
04901             NTYPE(node));
04902 #endif
04903     r = ONIGERR_TYPE_BUG;
04904     break;
04905   }
04906 
04907   return r;
04908 }
04909 
04910 static int
04911 set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
04912 {
04913   int r;
04914 
04915   if (e->len == 0) return 0;
04916 
04917   if (e->ignore_case) {
04918     reg->exact = (UChar* )xmalloc(e->len);
04919     CHECK_NULL_RETURN_MEMERR(reg->exact);
04920     xmemcpy(reg->exact, e->s, e->len);
04921     reg->exact_end = reg->exact + e->len;
04922     reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
04923   }
04924   else {
04925     int allow_reverse;
04926 
04927     reg->exact = str_dup(e->s, e->s + e->len);
04928     CHECK_NULL_RETURN_MEMERR(reg->exact);
04929     reg->exact_end = reg->exact + e->len;
04930 
04931     allow_reverse =
04932         ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
04933 
04934     if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
04935       r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
04936                       reg->map, &(reg->int_map));
04937       if (r) return r;
04938 
04939       reg->optimize = (allow_reverse != 0
04940                        ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
04941     }
04942     else {
04943       reg->optimize = ONIG_OPTIMIZE_EXACT;
04944     }
04945   }
04946 
04947   reg->dmin = e->mmd.min;
04948   reg->dmax = e->mmd.max;
04949 
04950   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04951     reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact);
04952   }
04953 
04954   return 0;
04955 }
04956 
04957 static void
04958 set_optimize_map_info(regex_t* reg, OptMapInfo* m)
04959 {
04960   int i;
04961 
04962   for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
04963     reg->map[i] = m->map[i];
04964 
04965   reg->optimize   = ONIG_OPTIMIZE_MAP;
04966   reg->dmin       = m->mmd.min;
04967   reg->dmax       = m->mmd.max;
04968 
04969   if (reg->dmin != ONIG_INFINITE_DISTANCE) {
04970     reg->threshold_len = reg->dmin + 1;
04971   }
04972 }
04973 
04974 static void
04975 set_sub_anchor(regex_t* reg, OptAncInfo* anc)
04976 {
04977   reg->sub_anchor |= anc->left_anchor  & ANCHOR_BEGIN_LINE;
04978   reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
04979 }
04980 
04981 #ifdef ONIG_DEBUG
04982 static void print_optimize_info(FILE* f, regex_t* reg);
04983 #endif
04984 
04985 static int
04986 set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
04987 {
04988 
04989   int r;
04990   NodeOptInfo opt;
04991   OptEnv env;
04992 
04993   env.enc            = reg->enc;
04994   env.options        = reg->options;
04995   env.case_fold_flag = reg->case_fold_flag;
04996   env.scan_env   = scan_env;
04997   clear_mml(&env.mmd);
04998 
04999   r = optimize_node_left(node, &opt, &env);
05000   if (r) return r;
05001 
05002   reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
05003         ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
05004 
05005   reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
05006 
05007   if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
05008     reg->anchor_dmin = opt.len.min;
05009     reg->anchor_dmax = opt.len.max;
05010   }
05011 
05012   if (opt.exb.len > 0 || opt.exm.len > 0) {
05013     select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
05014     if (opt.map.value > 0 &&
05015         comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
05016       goto set_map;
05017     }
05018     else {
05019       r = set_optimize_exact_info(reg, &opt.exb);
05020       set_sub_anchor(reg, &opt.exb.anc);
05021     }
05022   }
05023   else if (opt.map.value > 0) {
05024   set_map:
05025     set_optimize_map_info(reg, &opt.map);
05026     set_sub_anchor(reg, &opt.map.anc);
05027   }
05028   else {
05029     reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
05030     if (opt.len.max == 0)
05031       reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
05032   }
05033 
05034 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
05035   print_optimize_info(stderr, reg);
05036 #endif
05037   return r;
05038 }
05039 
05040 static void
05041 clear_optimize_info(regex_t* reg)
05042 {
05043   reg->optimize      = ONIG_OPTIMIZE_NONE;
05044   reg->anchor        = 0;
05045   reg->anchor_dmin   = 0;
05046   reg->anchor_dmax   = 0;
05047   reg->sub_anchor    = 0;
05048   reg->exact_end     = (UChar* )NULL;
05049   reg->threshold_len = 0;
05050   if (IS_NOT_NULL(reg->exact)) {
05051     xfree(reg->exact);
05052     reg->exact = (UChar* )NULL;
05053   }
05054 }
05055 
05056 #ifdef ONIG_DEBUG
05057 
05058 static void print_enc_string(FILE* fp, OnigEncoding enc,
05059                              const UChar *s, const UChar *end)
05060 {
05061   fprintf(fp, "\nPATTERN: /");
05062 
05063   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05064     const UChar *p;
05065     OnigCodePoint code;
05066 
05067     p = s;
05068     while (p < end) {
05069       code = ONIGENC_MBC_TO_CODE(enc, p, end);
05070       if (code >= 0x80) {
05071         fprintf(fp, " 0x%04x ", (int )code);
05072       }
05073       else {
05074         fputc((int )code, fp);
05075       }
05076 
05077       p += enclen(enc, p, end);
05078     }
05079   }
05080   else {
05081     while (s < end) {
05082       fputc((int )*s, fp);
05083       s++;
05084     }
05085   }
05086 
05087   fprintf(fp, "/\n");
05088 }
05089 
05090 static void
05091 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
05092 {
05093   if (a == ONIG_INFINITE_DISTANCE)
05094     fputs("inf", f);
05095   else
05096     fprintf(f, "(%u)", a);
05097 
05098   fputs("-", f);
05099 
05100   if (b == ONIG_INFINITE_DISTANCE)
05101     fputs("inf", f);
05102   else
05103     fprintf(f, "(%u)", b);
05104 }
05105 
05106 static void
05107 print_anchor(FILE* f, int anchor)
05108 {
05109   int q = 0;
05110 
05111   fprintf(f, "[");
05112 
05113   if (anchor & ANCHOR_BEGIN_BUF) {
05114     fprintf(f, "begin-buf");
05115     q = 1;
05116   }
05117   if (anchor & ANCHOR_BEGIN_LINE) {
05118     if (q) fprintf(f, ", ");
05119     q = 1;
05120     fprintf(f, "begin-line");
05121   }
05122   if (anchor & ANCHOR_BEGIN_POSITION) {
05123     if (q) fprintf(f, ", ");
05124     q = 1;
05125     fprintf(f, "begin-pos");
05126   }
05127   if (anchor & ANCHOR_END_BUF) {
05128     if (q) fprintf(f, ", ");
05129     q = 1;
05130     fprintf(f, "end-buf");
05131   }
05132   if (anchor & ANCHOR_SEMI_END_BUF) {
05133     if (q) fprintf(f, ", ");
05134     q = 1;
05135     fprintf(f, "semi-end-buf");
05136   }
05137   if (anchor & ANCHOR_END_LINE) {
05138     if (q) fprintf(f, ", ");
05139     q = 1;
05140     fprintf(f, "end-line");
05141   }
05142   if (anchor & ANCHOR_ANYCHAR_STAR) {
05143     if (q) fprintf(f, ", ");
05144     q = 1;
05145     fprintf(f, "anychar-star");
05146   }
05147   if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
05148     if (q) fprintf(f, ", ");
05149     fprintf(f, "anychar-star-pl");
05150   }
05151 
05152   fprintf(f, "]");
05153 }
05154 
05155 static void
05156 print_optimize_info(FILE* f, regex_t* reg)
05157 {
05158   static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
05159                               "EXACT_IC", "MAP" };
05160 
05161   fprintf(f, "optimize: %s\n", on[reg->optimize]);
05162   fprintf(f, "  anchor: "); print_anchor(f, reg->anchor);
05163   if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
05164     print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
05165   fprintf(f, "\n");
05166 
05167   if (reg->optimize) {
05168     fprintf(f, "  sub anchor: "); print_anchor(f, reg->sub_anchor);
05169     fprintf(f, "\n");
05170   }
05171   fprintf(f, "\n");
05172 
05173   if (reg->exact) {
05174     UChar *p;
05175     fprintf(f, "exact: [");
05176     for (p = reg->exact; p < reg->exact_end; p++) {
05177       fputc(*p, f);
05178     }
05179     fprintf(f, "]: length: %d\n", (reg->exact_end - reg->exact));
05180   }
05181   else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
05182     int c, i, n = 0;
05183 
05184     for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
05185       if (reg->map[i]) n++;
05186 
05187     fprintf(f, "map: n=%d\n", n);
05188     if (n > 0) {
05189       c = 0;
05190       fputc('[', f);
05191       for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
05192         if (reg->map[i] != 0) {
05193           if (c > 0)  fputs(", ", f);
05194           c++;
05195           if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
05196               ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
05197             fputc(i, f);
05198           else
05199             fprintf(f, "%d", i);
05200         }
05201       }
05202       fprintf(f, "]\n");
05203     }
05204   }
05205 }
05206 #endif /* ONIG_DEBUG */
05207 
05208 
05209 extern void
05210 onig_free_body(regex_t* reg)
05211 {
05212   if (IS_NOT_NULL(reg)) {
05213     if (IS_NOT_NULL(reg->p))                xfree(reg->p);
05214     if (IS_NOT_NULL(reg->exact))            xfree(reg->exact);
05215     if (IS_NOT_NULL(reg->int_map))          xfree(reg->int_map);
05216     if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
05217     if (IS_NOT_NULL(reg->repeat_range))     xfree(reg->repeat_range);
05218     if (IS_NOT_NULL(reg->chain))            onig_free(reg->chain);
05219 
05220 #ifdef USE_NAMED_GROUP
05221     onig_names_free(reg);
05222 #endif
05223   }
05224 }
05225 
05226 extern void
05227 onig_free(regex_t* reg)
05228 {
05229   if (IS_NOT_NULL(reg)) {
05230     onig_free_body(reg);
05231     xfree(reg);
05232   }
05233 }
05234 
05235 size_t
05236 onig_memsize(regex_t *reg)
05237 {
05238     size_t size = sizeof(regex_t);
05239     if (IS_NOT_NULL(reg->p))                size += reg->alloc;
05240     if (IS_NOT_NULL(reg->exact))            size += reg->exact_end - reg->exact;
05241     if (IS_NOT_NULL(reg->int_map))          size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05242     if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
05243     if (IS_NOT_NULL(reg->repeat_range))     size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
05244     if (IS_NOT_NULL(reg->chain))            size += onig_memsize(reg->chain);
05245 
05246     return size;
05247 }
05248 
05249 #define REGEX_TRANSFER(to,from) do {\
05250   (to)->state = ONIG_STATE_MODIFY;\
05251   onig_free_body(to);\
05252   xmemcpy(to, from, sizeof(regex_t));\
05253   xfree(from);\
05254 } while (0)
05255 
05256 extern void
05257 onig_transfer(regex_t* to, regex_t* from)
05258 {
05259   THREAD_ATOMIC_START;
05260   REGEX_TRANSFER(to, from);
05261   THREAD_ATOMIC_END;
05262 }
05263 
05264 #define REGEX_CHAIN_HEAD(reg) do {\
05265   while (IS_NOT_NULL((reg)->chain)) {\
05266     (reg) = (reg)->chain;\
05267   }\
05268 } while (0)
05269 
05270 extern void
05271 onig_chain_link_add(regex_t* to, regex_t* add)
05272 {
05273   THREAD_ATOMIC_START;
05274   REGEX_CHAIN_HEAD(to);
05275   to->chain = add;
05276   THREAD_ATOMIC_END;
05277 }
05278 
05279 extern void
05280 onig_chain_reduce(regex_t* reg)
05281 {
05282   regex_t *head, *prev;
05283 
05284   prev = reg;
05285   head = prev->chain;
05286   if (IS_NOT_NULL(head)) {
05287     reg->state = ONIG_STATE_MODIFY;
05288     while (IS_NOT_NULL(head->chain)) {
05289       prev = head;
05290       head = head->chain;
05291     }
05292     prev->chain = (regex_t* )NULL;
05293     REGEX_TRANSFER(reg, head);
05294   }
05295 }
05296 
05297 #ifdef ONIG_DEBUG
05298 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
05299 #endif
05300 #ifdef ONIG_DEBUG_PARSE_TREE
05301 static void print_tree P_((FILE* f, Node* node));
05302 #endif
05303 
05304 extern int
05305 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05306               OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
05307 {
05308 #define COMPILE_INIT_SIZE  20
05309 
05310   int r, init_size;
05311   Node*  root;
05312   ScanEnv  scan_env = {0};
05313 #ifdef USE_SUBEXP_CALL
05314   UnsetAddrList  uslist;
05315 #endif
05316 
05317   if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
05318 
05319   scan_env.sourcefile = sourcefile;
05320   scan_env.sourceline = sourceline;
05321   reg->state = ONIG_STATE_COMPILING;
05322 
05323 #ifdef ONIG_DEBUG
05324   print_enc_string(stderr, reg->enc, pattern, pattern_end);
05325 #endif
05326 
05327   if (reg->alloc == 0) {
05328     init_size = (pattern_end - pattern) * 2;
05329     if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
05330     r = BBUF_INIT(reg, init_size);
05331     if (r != 0) goto end;
05332   }
05333   else
05334     reg->used = 0;
05335 
05336   reg->num_mem            = 0;
05337   reg->num_repeat         = 0;
05338   reg->num_null_check     = 0;
05339   reg->repeat_range_alloc = 0;
05340   reg->repeat_range       = (OnigRepeatRange* )NULL;
05341 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05342   reg->num_comb_exp_check = 0;
05343 #endif
05344 
05345   r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
05346   if (r != 0) goto err;
05347 
05348 #ifdef USE_NAMED_GROUP
05349   /* mixed use named group and no-named group */
05350   if (scan_env.num_named > 0 &&
05351       IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
05352       !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
05353     if (scan_env.num_named != scan_env.num_mem)
05354       r = disable_noname_group_capture(&root, reg, &scan_env);
05355     else
05356       r = numbered_ref_check(root);
05357 
05358     if (r != 0) goto err;
05359   }
05360 #endif
05361 
05362 #ifdef USE_SUBEXP_CALL
05363   if (scan_env.num_call > 0) {
05364     r = unset_addr_list_init(&uslist, scan_env.num_call);
05365     if (r != 0) goto err;
05366     scan_env.unset_addr_list = &uslist;
05367     r = setup_subexp_call(root, &scan_env);
05368     if (r != 0) goto err_unset;
05369     r = subexp_recursive_check_trav(root, &scan_env);
05370     if (r  < 0) goto err_unset;
05371     r = subexp_inf_recursive_check_trav(root, &scan_env);
05372     if (r != 0) goto err_unset;
05373 
05374     reg->num_call = scan_env.num_call;
05375   }
05376   else
05377     reg->num_call = 0;
05378 #endif
05379 
05380   r = setup_tree(root, reg, 0, &scan_env);
05381   if (r != 0) goto err_unset;
05382 
05383 #ifdef ONIG_DEBUG_PARSE_TREE
05384   print_tree(stderr, root);
05385 #endif
05386 
05387   reg->capture_history  = scan_env.capture_history;
05388   reg->bt_mem_start     = scan_env.bt_mem_start;
05389   reg->bt_mem_start    |= reg->capture_history;
05390   if (IS_FIND_CONDITION(reg->options))
05391     BIT_STATUS_ON_ALL(reg->bt_mem_end);
05392   else {
05393     reg->bt_mem_end  = scan_env.bt_mem_end;
05394     reg->bt_mem_end |= reg->capture_history;
05395   }
05396 
05397 #ifdef USE_COMBINATION_EXPLOSION_CHECK
05398   if (scan_env.backrefed_mem == 0
05399 #ifdef USE_SUBEXP_CALL
05400       || scan_env.num_call == 0
05401 #endif
05402       ) {
05403     setup_comb_exp_check(root, 0, &scan_env);
05404 #ifdef USE_SUBEXP_CALL
05405     if (scan_env.has_recursion != 0) {
05406       scan_env.num_comb_exp_check = 0;
05407     }
05408     else
05409 #endif
05410     if (scan_env.comb_exp_max_regnum > 0) {
05411       int i;
05412       for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
05413         if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
05414           scan_env.num_comb_exp_check = 0;
05415           break;
05416         }
05417       }
05418     }
05419   }
05420 
05421   reg->num_comb_exp_check = scan_env.num_comb_exp_check;
05422 #endif
05423 
05424   clear_optimize_info(reg);
05425 #ifndef ONIG_DONT_OPTIMIZE
05426   r = set_optimize_info_from_tree(root, reg, &scan_env);
05427   if (r != 0) goto err_unset;
05428 #endif
05429 
05430   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
05431     xfree(scan_env.mem_nodes_dynamic);
05432     scan_env.mem_nodes_dynamic = (Node** )NULL;
05433   }
05434 
05435   r = compile_tree(root, reg);
05436   if (r == 0) {
05437     r = add_opcode(reg, OP_END);
05438 #ifdef USE_SUBEXP_CALL
05439     if (scan_env.num_call > 0) {
05440       r = unset_addr_list_fix(&uslist, reg);
05441       unset_addr_list_end(&uslist);
05442       if (r) goto err;
05443     }
05444 #endif
05445 
05446     if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
05447       reg->stack_pop_level = STACK_POP_LEVEL_ALL;
05448     else {
05449       if (reg->bt_mem_start != 0)
05450         reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
05451       else
05452         reg->stack_pop_level = STACK_POP_LEVEL_FREE;
05453     }
05454   }
05455 #ifdef USE_SUBEXP_CALL
05456   else if (scan_env.num_call > 0) {
05457     unset_addr_list_end(&uslist);
05458   }
05459 #endif
05460   onig_node_free(root);
05461 
05462 #ifdef ONIG_DEBUG_COMPILE
05463 #ifdef USE_NAMED_GROUP
05464   onig_print_names(stderr, reg);
05465 #endif
05466   print_compiled_byte_code_list(stderr, reg);
05467 #endif
05468 
05469  end:
05470   reg->state = ONIG_STATE_NORMAL;
05471   return r;
05472 
05473  err_unset:
05474 #ifdef USE_SUBEXP_CALL
05475   if (scan_env.num_call > 0) {
05476     unset_addr_list_end(&uslist);
05477   }
05478 #endif
05479  err:
05480   if (IS_NOT_NULL(scan_env.error)) {
05481     if (IS_NOT_NULL(einfo)) {
05482       einfo->enc     = scan_env.enc;
05483       einfo->par     = scan_env.error;
05484       einfo->par_end = scan_env.error_end;
05485     }
05486   }
05487 
05488   onig_node_free(root);
05489   if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
05490       xfree(scan_env.mem_nodes_dynamic);
05491   return r;
05492 }
05493 
05494 #ifdef USE_RECOMPILE_API
05495 extern int
05496 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
05497             OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
05498             OnigErrorInfo* einfo)
05499 {
05500   int r;
05501   regex_t *new_reg;
05502 
05503   r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
05504   if (r) return r;
05505   if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
05506     onig_transfer(reg, new_reg);
05507   }
05508   else {
05509     onig_chain_link_add(reg, new_reg);
05510   }
05511   return 0;
05512 }
05513 #endif
05514 
05515 static int onig_inited = 0;
05516 
05517 extern int
05518 onig_reg_init(regex_t* reg, OnigOptionType option,
05519               OnigCaseFoldType case_fold_flag,
05520               OnigEncoding enc, const OnigSyntaxType* syntax)
05521 {
05522   if (! onig_inited)
05523     onig_init();
05524 
05525   if (IS_NULL(reg))
05526     return ONIGERR_INVALID_ARGUMENT;
05527 
05528   if (ONIGENC_IS_UNDEF(enc))
05529     return ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED;
05530 
05531   if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
05532       == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
05533     return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
05534   }
05535 
05536   (reg)->state = ONIG_STATE_MODIFY;
05537 
05538   if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
05539     option |= syntax->options;
05540     option &= ~ONIG_OPTION_SINGLELINE;
05541   }
05542   else
05543     option |= syntax->options;
05544 
05545   (reg)->enc              = enc;
05546   (reg)->options          = option;
05547   (reg)->syntax           = syntax;
05548   (reg)->optimize         = 0;
05549   (reg)->exact            = (UChar* )NULL;
05550   (reg)->int_map          = (int* )NULL;
05551   (reg)->int_map_backward = (int* )NULL;
05552   (reg)->chain            = (regex_t* )NULL;
05553 
05554   (reg)->p                = (UChar* )NULL;
05555   (reg)->alloc            = 0;
05556   (reg)->used             = 0;
05557   (reg)->name_table       = (void* )NULL;
05558 
05559   (reg)->case_fold_flag   = case_fold_flag;
05560   return 0;
05561 }
05562 
05563 extern int
05564 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
05565           const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
05566           OnigSyntaxType* syntax, OnigErrorInfo* einfo)
05567 {
05568   int r;
05569 
05570   r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05571   if (r) return r;
05572 
05573   r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
05574   return r;
05575 }
05576 
05577 extern int
05578 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
05579           OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
05580           OnigErrorInfo* einfo)
05581 {
05582   int r;
05583 
05584   *reg = (regex_t* )xmalloc(sizeof(regex_t));
05585   if (IS_NULL(*reg)) return ONIGERR_MEMORY;
05586 
05587   r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
05588   if (r) goto err;
05589 
05590   r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
05591   if (r) {
05592   err:
05593     onig_free(*reg);
05594     *reg = NULL;
05595   }
05596   return r;
05597 }
05598 
05599 
05600 extern int
05601 onig_init(void)
05602 {
05603   if (onig_inited != 0)
05604     return 0;
05605 
05606   THREAD_SYSTEM_INIT;
05607   THREAD_ATOMIC_START;
05608 
05609   onig_inited = 1;
05610 
05611   onigenc_init();
05612   /* onigenc_set_default_caseconv_table((UChar* )0); */
05613 
05614 #ifdef ONIG_DEBUG_STATISTICS
05615   onig_statistics_init();
05616 #endif
05617 
05618   THREAD_ATOMIC_END;
05619   return 0;
05620 }
05621 
05622 
05623 extern int
05624 onig_end(void)
05625 {
05626   THREAD_ATOMIC_START;
05627 
05628 #ifdef ONIG_DEBUG_STATISTICS
05629   onig_print_statistics(stderr);
05630 #endif
05631 
05632 #ifdef USE_SHARED_CCLASS_TABLE
05633   onig_free_shared_cclass_table();
05634 #endif
05635 
05636 #ifdef USE_PARSE_TREE_NODE_RECYCLE
05637   onig_free_node_list();
05638 #endif
05639 
05640   onig_inited = 0;
05641 
05642   THREAD_ATOMIC_END;
05643   THREAD_SYSTEM_END;
05644   return 0;
05645 }
05646 
05647 extern int
05648 onig_is_in_code_range(const UChar* p, OnigCodePoint code)
05649 {
05650   OnigCodePoint n, *data;
05651   OnigCodePoint low, high, x;
05652 
05653   GET_CODE_POINT(n, p);
05654   data = (OnigCodePoint* )p;
05655   data++;
05656 
05657   for (low = 0, high = n; low < high; ) {
05658     x = (low + high) >> 1;
05659     if (code > data[x * 2 + 1])
05660       low = x + 1;
05661     else
05662       high = x;
05663   }
05664 
05665   return ((low < n && code >= data[low * 2]) ? 1 : 0);
05666 }
05667 
05668 extern int
05669 onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
05670 {
05671   int found;
05672 
05673   if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
05674     if (IS_NULL(cc->mbuf)) {
05675       found = 0;
05676     }
05677     else {
05678       found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
05679     }
05680   }
05681   else {
05682     found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
05683   }
05684 
05685   if (IS_NCCLASS_NOT(cc))
05686     return !found;
05687   else
05688     return found;
05689 }
05690 
05691 extern int
05692 onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
05693 {
05694   int len;
05695 
05696   if (ONIGENC_MBC_MINLEN(enc) > 1) {
05697     len = 2;
05698   }
05699   else {
05700     len = ONIGENC_CODE_TO_MBCLEN(enc, code);
05701   }
05702   return onig_is_code_in_cc_len(len, code, cc);
05703 }
05704 
05705 
05706 #ifdef ONIG_DEBUG
05707 
05708 /* arguments type */
05709 #define ARG_SPECIAL     -1
05710 #define ARG_NON          0
05711 #define ARG_RELADDR      1
05712 #define ARG_ABSADDR      2
05713 #define ARG_LENGTH       3
05714 #define ARG_MEMNUM       4
05715 #define ARG_OPTION       5
05716 #define ARG_STATE_CHECK  6
05717 
05718 OnigOpInfoType OnigOpInfo[] = {
05719   { OP_FINISH,            "finish",          ARG_NON },
05720   { OP_END,               "end",             ARG_NON },
05721   { OP_EXACT1,            "exact1",          ARG_SPECIAL },
05722   { OP_EXACT2,            "exact2",          ARG_SPECIAL },
05723   { OP_EXACT3,            "exact3",          ARG_SPECIAL },
05724   { OP_EXACT4,            "exact4",          ARG_SPECIAL },
05725   { OP_EXACT5,            "exact5",          ARG_SPECIAL },
05726   { OP_EXACTN,            "exactn",          ARG_SPECIAL },
05727   { OP_EXACTMB2N1,        "exactmb2-n1",     ARG_SPECIAL },
05728   { OP_EXACTMB2N2,        "exactmb2-n2",     ARG_SPECIAL },
05729   { OP_EXACTMB2N3,        "exactmb2-n3",     ARG_SPECIAL },
05730   { OP_EXACTMB2N,         "exactmb2-n",      ARG_SPECIAL },
05731   { OP_EXACTMB3N,         "exactmb3n"  ,     ARG_SPECIAL },
05732   { OP_EXACTMBN,          "exactmbn",        ARG_SPECIAL },
05733   { OP_EXACT1_IC,         "exact1-ic",       ARG_SPECIAL },
05734   { OP_EXACTN_IC,         "exactn-ic",       ARG_SPECIAL },
05735   { OP_CCLASS,            "cclass",          ARG_SPECIAL },
05736   { OP_CCLASS_MB,         "cclass-mb",       ARG_SPECIAL },
05737   { OP_CCLASS_MIX,        "cclass-mix",      ARG_SPECIAL },
05738   { OP_CCLASS_NOT,        "cclass-not",      ARG_SPECIAL },
05739   { OP_CCLASS_MB_NOT,     "cclass-mb-not",   ARG_SPECIAL },
05740   { OP_CCLASS_MIX_NOT,    "cclass-mix-not",  ARG_SPECIAL },
05741   { OP_CCLASS_NODE,       "cclass-node",     ARG_SPECIAL },
05742   { OP_ANYCHAR,           "anychar",         ARG_NON },
05743   { OP_ANYCHAR_ML,        "anychar-ml",      ARG_NON },
05744   { OP_ANYCHAR_STAR,      "anychar*",        ARG_NON },
05745   { OP_ANYCHAR_ML_STAR,   "anychar-ml*",     ARG_NON },
05746   { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
05747   { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
05748   { OP_WORD,                "word",            ARG_NON },
05749   { OP_NOT_WORD,            "not-word",        ARG_NON },
05750   { OP_WORD_BOUND,          "word-bound",      ARG_NON },
05751   { OP_NOT_WORD_BOUND,      "not-word-bound",  ARG_NON },
05752   { OP_WORD_BEGIN,          "word-begin",      ARG_NON },
05753   { OP_WORD_END,            "word-end",        ARG_NON },
05754   { OP_BEGIN_BUF,           "begin-buf",       ARG_NON },
05755   { OP_END_BUF,             "end-buf",         ARG_NON },
05756   { OP_BEGIN_LINE,          "begin-line",      ARG_NON },
05757   { OP_END_LINE,            "end-line",        ARG_NON },
05758   { OP_SEMI_END_BUF,        "semi-end-buf",    ARG_NON },
05759   { OP_BEGIN_POSITION,      "begin-position",  ARG_NON },
05760   { OP_BACKREF1,            "backref1",             ARG_NON },
05761   { OP_BACKREF2,            "backref2",             ARG_NON },
05762   { OP_BACKREFN,            "backrefn",             ARG_MEMNUM  },
05763   { OP_BACKREFN_IC,         "backrefn-ic",          ARG_SPECIAL },
05764   { OP_BACKREF_MULTI,       "backref_multi",        ARG_SPECIAL },
05765   { OP_BACKREF_MULTI_IC,    "backref_multi-ic",     ARG_SPECIAL },
05766   { OP_BACKREF_WITH_LEVEL,    "backref_at_level",     ARG_SPECIAL },
05767   { OP_MEMORY_START_PUSH,   "mem-start-push",       ARG_MEMNUM  },
05768   { OP_MEMORY_START,        "mem-start",            ARG_MEMNUM  },
05769   { OP_MEMORY_END_PUSH,     "mem-end-push",         ARG_MEMNUM  },
05770   { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec",     ARG_MEMNUM  },
05771   { OP_MEMORY_END,          "mem-end",              ARG_MEMNUM  },
05772   { OP_MEMORY_END_REC,      "mem-end-rec",          ARG_MEMNUM  },
05773   { OP_SET_OPTION_PUSH,     "set-option-push",      ARG_OPTION  },
05774   { OP_SET_OPTION,          "set-option",           ARG_OPTION  },
05775   { OP_FAIL,                "fail",                 ARG_NON },
05776   { OP_JUMP,                "jump",                 ARG_RELADDR },
05777   { OP_PUSH,                "push",                 ARG_RELADDR },
05778   { OP_POP,                 "pop",                  ARG_NON },
05779   { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1",      ARG_SPECIAL },
05780   { OP_PUSH_IF_PEEK_NEXT,   "push-if-peek-next",    ARG_SPECIAL },
05781   { OP_REPEAT,              "repeat",               ARG_SPECIAL },
05782   { OP_REPEAT_NG,           "repeat-ng",            ARG_SPECIAL },
05783   { OP_REPEAT_INC,          "repeat-inc",           ARG_MEMNUM  },
05784   { OP_REPEAT_INC_NG,       "repeat-inc-ng",        ARG_MEMNUM  },
05785   { OP_REPEAT_INC_SG,       "repeat-inc-sg",        ARG_MEMNUM  },
05786   { OP_REPEAT_INC_NG_SG,    "repeat-inc-ng-sg",     ARG_MEMNUM  },
05787   { OP_NULL_CHECK_START,    "null-check-start",     ARG_MEMNUM  },
05788   { OP_NULL_CHECK_END,      "null-check-end",       ARG_MEMNUM  },
05789   { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM  },
05790   { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM  },
05791   { OP_PUSH_POS,             "push-pos",             ARG_NON },
05792   { OP_POP_POS,              "pop-pos",              ARG_NON },
05793   { OP_PUSH_POS_NOT,         "push-pos-not",         ARG_RELADDR },
05794   { OP_FAIL_POS,             "fail-pos",             ARG_NON },
05795   { OP_PUSH_STOP_BT,         "push-stop-bt",         ARG_NON },
05796   { OP_POP_STOP_BT,          "pop-stop-bt",          ARG_NON },
05797   { OP_LOOK_BEHIND,          "look-behind",          ARG_SPECIAL },
05798   { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
05799   { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
05800   { OP_CALL,                 "call",                 ARG_ABSADDR },
05801   { OP_RETURN,               "return",               ARG_NON },
05802   { OP_STATE_CHECK_PUSH,         "state-check-push",         ARG_SPECIAL },
05803   { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
05804   { OP_STATE_CHECK,              "state-check",              ARG_STATE_CHECK },
05805   { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*",     ARG_STATE_CHECK },
05806   { OP_STATE_CHECK_ANYCHAR_ML_STAR,
05807     "state-check-anychar-ml*", ARG_STATE_CHECK },
05808   { -1, "", ARG_NON }
05809 };
05810 
05811 static char*
05812 op2name(int opcode)
05813 {
05814   int i;
05815 
05816   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05817     if (opcode == OnigOpInfo[i].opcode)
05818       return OnigOpInfo[i].name;
05819   }
05820   return "";
05821 }
05822 
05823 static int
05824 op2arg_type(int opcode)
05825 {
05826   int i;
05827 
05828   for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
05829     if (opcode == OnigOpInfo[i].opcode)
05830       return OnigOpInfo[i].arg_type;
05831   }
05832   return ARG_SPECIAL;
05833 }
05834 
05835 static void
05836 Indent(FILE* f, int indent)
05837 {
05838   int i;
05839   for (i = 0; i < indent; i++) putc(' ', f);
05840 }
05841 
05842 static void
05843 p_string(FILE* f, int len, UChar* s)
05844 {
05845   fputs(":", f);
05846   while (len-- > 0) { fputc(*s++, f); }
05847 }
05848 
05849 static void
05850 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
05851 {
05852   int x = len * mb_len;
05853 
05854   fprintf(f, ":%d:", len);
05855   while (x-- > 0) { fputc(*s++, f); }
05856 }
05857 
05858 extern void
05859 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
05860                               OnigEncoding enc)
05861 {
05862   int i, n, arg_type;
05863   RelAddrType addr;
05864   LengthType len;
05865   MemNumType mem;
05866   StateCheckNumType scn;
05867   OnigCodePoint code;
05868   UChar *q;
05869 
05870   fprintf(f, "[%s", op2name(*bp));
05871   arg_type = op2arg_type(*bp);
05872   if (arg_type != ARG_SPECIAL) {
05873     bp++;
05874     switch (arg_type) {
05875     case ARG_NON:
05876       break;
05877     case ARG_RELADDR:
05878       GET_RELADDR_INC(addr, bp);
05879       fprintf(f, ":(%d)", addr);
05880       break;
05881     case ARG_ABSADDR:
05882       GET_ABSADDR_INC(addr, bp);
05883       fprintf(f, ":(%d)", addr);
05884       break;
05885     case ARG_LENGTH:
05886       GET_LENGTH_INC(len, bp);
05887       fprintf(f, ":%d", len);
05888       break;
05889     case ARG_MEMNUM:
05890       mem = *((MemNumType* )bp);
05891       bp += SIZE_MEMNUM;
05892       fprintf(f, ":%d", mem);
05893       break;
05894     case ARG_OPTION:
05895       {
05896         OnigOptionType option = *((OnigOptionType* )bp);
05897         bp += SIZE_OPTION;
05898         fprintf(f, ":%d", option);
05899       }
05900       break;
05901 
05902     case ARG_STATE_CHECK:
05903       scn = *((StateCheckNumType* )bp);
05904       bp += SIZE_STATE_CHECK_NUM;
05905       fprintf(f, ":%d", scn);
05906       break;
05907     }
05908   }
05909   else {
05910     switch (*bp++) {
05911     case OP_EXACT1:
05912     case OP_ANYCHAR_STAR_PEEK_NEXT:
05913     case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
05914       p_string(f, 1, bp++); break;
05915     case OP_EXACT2:
05916       p_string(f, 2, bp); bp += 2; break;
05917     case OP_EXACT3:
05918       p_string(f, 3, bp); bp += 3; break;
05919     case OP_EXACT4:
05920       p_string(f, 4, bp); bp += 4; break;
05921     case OP_EXACT5:
05922       p_string(f, 5, bp); bp += 5; break;
05923     case OP_EXACTN:
05924       GET_LENGTH_INC(len, bp);
05925       p_len_string(f, len, 1, bp);
05926       bp += len;
05927       break;
05928 
05929     case OP_EXACTMB2N1:
05930       p_string(f, 2, bp); bp += 2; break;
05931     case OP_EXACTMB2N2:
05932       p_string(f, 4, bp); bp += 4; break;
05933     case OP_EXACTMB2N3:
05934       p_string(f, 6, bp); bp += 6; break;
05935     case OP_EXACTMB2N:
05936       GET_LENGTH_INC(len, bp);
05937       p_len_string(f, len, 2, bp);
05938       bp += len * 2;
05939       break;
05940     case OP_EXACTMB3N:
05941       GET_LENGTH_INC(len, bp);
05942       p_len_string(f, len, 3, bp);
05943       bp += len * 3;
05944       break;
05945     case OP_EXACTMBN:
05946       {
05947         int mb_len;
05948 
05949         GET_LENGTH_INC(mb_len, bp);
05950         GET_LENGTH_INC(len, bp);
05951         fprintf(f, ":%d:%d:", mb_len, len);
05952         n = len * mb_len;
05953         while (n-- > 0) { fputc(*bp++, f); }
05954       }
05955       break;
05956 
05957     case OP_EXACT1_IC:
05958       len = enclen(enc, bp, bpend);
05959       p_string(f, len, bp);
05960       bp += len;
05961       break;
05962     case OP_EXACTN_IC:
05963       GET_LENGTH_INC(len, bp);
05964       p_len_string(f, len, 1, bp);
05965       bp += len;
05966       break;
05967 
05968     case OP_CCLASS:
05969       n = bitset_on_num((BitSetRef )bp);
05970       bp += SIZE_BITSET;
05971       fprintf(f, ":%d", n);
05972       break;
05973 
05974     case OP_CCLASS_NOT:
05975       n = bitset_on_num((BitSetRef )bp);
05976       bp += SIZE_BITSET;
05977       fprintf(f, ":%d", n);
05978       break;
05979 
05980     case OP_CCLASS_MB:
05981     case OP_CCLASS_MB_NOT:
05982       GET_LENGTH_INC(len, bp);
05983       q = bp;
05984 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
05985       ALIGNMENT_RIGHT(q);
05986 #endif
05987       GET_CODE_POINT(code, q);
05988       bp += len;
05989       fprintf(f, ":%d:%d", (int )code, len);
05990       break;
05991 
05992     case OP_CCLASS_MIX:
05993     case OP_CCLASS_MIX_NOT:
05994       n = bitset_on_num((BitSetRef )bp);
05995       bp += SIZE_BITSET;
05996       GET_LENGTH_INC(len, bp);
05997       q = bp;
05998 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
05999       ALIGNMENT_RIGHT(q);
06000 #endif
06001       GET_CODE_POINT(code, q);
06002       bp += len;
06003       fprintf(f, ":%d:%d:%d", n, (int )code, len);
06004       break;
06005 
06006     case OP_CCLASS_NODE:
06007       {
06008         CClassNode *cc;
06009 
06010         GET_POINTER_INC(cc, bp);
06011         n = bitset_on_num(cc->bs);
06012         fprintf(f, ":%u:%d", (unsigned int )cc, n);
06013       }
06014       break;
06015 
06016     case OP_BACKREFN_IC:
06017       mem = *((MemNumType* )bp);
06018       bp += SIZE_MEMNUM;
06019       fprintf(f, ":%d", mem);
06020       break;
06021 
06022     case OP_BACKREF_MULTI_IC:
06023     case OP_BACKREF_MULTI:
06024       fputs(" ", f);
06025       GET_LENGTH_INC(len, bp);
06026       for (i = 0; i < len; i++) {
06027         GET_MEMNUM_INC(mem, bp);
06028         if (i > 0) fputs(", ", f);
06029         fprintf(f, "%d", mem);
06030       }
06031       break;
06032 
06033     case OP_BACKREF_WITH_LEVEL:
06034       {
06035         OnigOptionType option;
06036         LengthType level;
06037 
06038         GET_OPTION_INC(option, bp);
06039         fprintf(f, ":%d", option);
06040         GET_LENGTH_INC(level, bp);
06041         fprintf(f, ":%d", level);
06042 
06043         fputs(" ", f);
06044         GET_LENGTH_INC(len, bp);
06045         for (i = 0; i < len; i++) {
06046           GET_MEMNUM_INC(mem, bp);
06047           if (i > 0) fputs(", ", f);
06048           fprintf(f, "%d", mem);
06049         }
06050       }
06051       break;
06052 
06053     case OP_REPEAT:
06054     case OP_REPEAT_NG:
06055       {
06056         mem = *((MemNumType* )bp);
06057         bp += SIZE_MEMNUM;
06058         addr = *((RelAddrType* )bp);
06059         bp += SIZE_RELADDR;
06060         fprintf(f, ":%d:%d", mem, addr);
06061       }
06062       break;
06063 
06064     case OP_PUSH_OR_JUMP_EXACT1:
06065     case OP_PUSH_IF_PEEK_NEXT:
06066       addr = *((RelAddrType* )bp);
06067       bp += SIZE_RELADDR;
06068       fprintf(f, ":(%d)", addr);
06069       p_string(f, 1, bp);
06070       bp += 1;
06071       break;
06072 
06073     case OP_LOOK_BEHIND:
06074       GET_LENGTH_INC(len, bp);
06075       fprintf(f, ":%d", len);
06076       break;
06077 
06078     case OP_PUSH_LOOK_BEHIND_NOT:
06079       GET_RELADDR_INC(addr, bp);
06080       GET_LENGTH_INC(len, bp);
06081       fprintf(f, ":%d:(%d)", len, addr);
06082       break;
06083 
06084     case OP_STATE_CHECK_PUSH:
06085     case OP_STATE_CHECK_PUSH_OR_JUMP:
06086       scn = *((StateCheckNumType* )bp);
06087       bp += SIZE_STATE_CHECK_NUM;
06088       addr = *((RelAddrType* )bp);
06089       bp += SIZE_RELADDR;
06090       fprintf(f, ":%d:(%d)", scn, addr);
06091       break;
06092 
06093     default:
06094       fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
06095               *--bp);
06096     }
06097   }
06098   fputs("]", f);
06099   if (nextp) *nextp = bp;
06100 }
06101 
06102 static void
06103 print_compiled_byte_code_list(FILE* f, regex_t* reg)
06104 {
06105   int ncode;
06106   UChar* bp = reg->p;
06107   UChar* end = reg->p + reg->used;
06108 
06109   fprintf(f, "code length: %d\n", reg->used);
06110 
06111   ncode = 0;
06112   while (bp < end) {
06113     ncode++;
06114     if (bp > reg->p) {
06115       if (ncode % 5 == 0)
06116         fprintf(f, "\n");
06117       else
06118         fputs(" ", f);
06119     }
06120     onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
06121   }
06122 
06123   fprintf(f, "\n");
06124 }
06125 
06126 static void
06127 print_indent_tree(FILE* f, Node* node, int indent)
06128 {
06129   int i, type;
06130   int add = 3;
06131   UChar* p;
06132 
06133   Indent(f, indent);
06134   if (IS_NULL(node)) {
06135     fprintf(f, "ERROR: null node!!!\n");
06136     exit (0);
06137   }
06138 
06139   type = NTYPE(node);
06140   switch (type) {
06141   case NT_LIST:
06142   case NT_ALT:
06143     if (NTYPE(node) == NT_LIST)
06144       fprintf(f, "<list:%x>\n", (int )node);
06145     else
06146       fprintf(f, "<alt:%x>\n", (int )node);
06147 
06148     print_indent_tree(f, NCAR(node), indent + add);
06149     while (IS_NOT_NULL(node = NCDR(node))) {
06150       if (NTYPE(node) != type) {
06151         fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
06152         exit(0);
06153       }
06154       print_indent_tree(f, NCAR(node), indent + add);
06155     }
06156     break;
06157 
06158   case NT_STR:
06159     fprintf(f, "<string%s:%x>",
06160             (NSTRING_IS_RAW(node) ? "-raw" : ""), (int )node);
06161     for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
06162       if (*p >= 0x20 && *p < 0x7f)
06163         fputc(*p, f);
06164       else {
06165         fprintf(f, " 0x%02x", *p);
06166       }
06167     }
06168     break;
06169 
06170   case NT_CCLASS:
06171     fprintf(f, "<cclass:%x>", (int )node);
06172     if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
06173     if (NCCLASS(node)->mbuf) {
06174       BBuf* bbuf = NCCLASS(node)->mbuf;
06175       for (i = 0; i < bbuf->used; i++) {
06176         if (i > 0) fprintf(f, ",");
06177         fprintf(f, "%0x", bbuf->p[i]);
06178       }
06179     }
06180     break;
06181 
06182   case NT_CTYPE:
06183     fprintf(f, "<ctype:%x> ", (int )node);
06184     switch (NCTYPE(node)->ctype) {
06185     case ONIGENC_CTYPE_WORD:
06186       if (NCTYPE(node)->not != 0)
06187         fputs("not word",       f);
06188       else
06189         fputs("word",           f);
06190       break;
06191 
06192     default:
06193       fprintf(f, "ERROR: undefined ctype.\n");
06194       exit(0);
06195     }
06196     break;
06197 
06198   case NT_CANY:
06199     fprintf(f, "<anychar:%x>", (int )node);
06200     break;
06201 
06202   case NT_ANCHOR:
06203     fprintf(f, "<anchor:%x> ", (int )node);
06204     switch (NANCHOR(node)->type) {
06205     case ANCHOR_BEGIN_BUF:      fputs("begin buf",      f); break;
06206     case ANCHOR_END_BUF:        fputs("end buf",        f); break;
06207     case ANCHOR_BEGIN_LINE:     fputs("begin line",     f); break;
06208     case ANCHOR_END_LINE:       fputs("end line",       f); break;
06209     case ANCHOR_SEMI_END_BUF:   fputs("semi end buf",   f); break;
06210     case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
06211 
06212     case ANCHOR_WORD_BOUND:      fputs("word bound",     f); break;
06213     case ANCHOR_NOT_WORD_BOUND:  fputs("not word bound", f); break;
06214 #ifdef USE_WORD_BEGIN_END
06215     case ANCHOR_WORD_BEGIN:      fputs("word begin", f);     break;
06216     case ANCHOR_WORD_END:        fputs("word end", f);       break;
06217 #endif
06218     case ANCHOR_PREC_READ:       fputs("prec read",      f); break;
06219     case ANCHOR_PREC_READ_NOT:   fputs("prec read not",  f); break;
06220     case ANCHOR_LOOK_BEHIND:     fputs("look_behind",    f); break;
06221     case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); break;
06222 
06223     default:
06224       fprintf(f, "ERROR: undefined anchor type.\n");
06225       break;
06226     }
06227     break;
06228 
06229   case NT_BREF:
06230     {
06231       int* p;
06232       BRefNode* br = NBREF(node);
06233       p = BACKREFS_P(br);
06234       fprintf(f, "<backref:%x>", (int )node);
06235       for (i = 0; i < br->back_num; i++) {
06236         if (i > 0) fputs(", ", f);
06237         fprintf(f, "%d", p[i]);
06238       }
06239     }
06240     break;
06241 
06242 #ifdef USE_SUBEXP_CALL
06243   case NT_CALL:
06244     {
06245       CallNode* cn = NCALL(node);
06246       fprintf(f, "<call:%x>", (int )node);
06247       p_string(f, cn->name_end - cn->name, cn->name);
06248     }
06249     break;
06250 #endif
06251 
06252   case NT_QTFR:
06253     fprintf(f, "<quantifier:%x>{%d,%d}%s\n", (int )node,
06254             NQTFR(node)->lower, NQTFR(node)->upper,
06255             (NQTFR(node)->greedy ? "" : "?"));
06256     print_indent_tree(f, NQTFR(node)->target, indent + add);
06257     break;
06258 
06259   case NT_ENCLOSE:
06260     fprintf(f, "<enclose:%x> ", (int )node);
06261     switch (NENCLOSE(node)->type) {
06262     case ENCLOSE_OPTION:
06263       fprintf(f, "option:%d\n", NENCLOSE(node)->option);
06264       print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06265       break;
06266     case ENCLOSE_MEMORY:
06267       fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
06268       break;
06269     case ENCLOSE_STOP_BACKTRACK:
06270       fprintf(f, "stop-bt");
06271       break;
06272 
06273     default:
06274       break;
06275     }
06276     fprintf(f, "\n");
06277     print_indent_tree(f, NENCLOSE(node)->target, indent + add);
06278     break;
06279 
06280   default:
06281     fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
06282     break;
06283   }
06284 
06285   if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
06286       type != NT_ENCLOSE)
06287     fprintf(f, "\n");
06288   fflush(f);
06289 }
06290 #endif /* ONIG_DEBUG */
06291 
06292 #ifdef ONIG_DEBUG_PARSE_TREE
06293 static void
06294 print_tree(FILE* f, Node* node)
06295 {
06296   print_indent_tree(f, node, 0);
06297 }
06298 #endif
06299 

Generated on Sat Jul 7 2012 15:29:22 for Ruby by  doxygen 1.7.1