Ruby  2.0.0p645(2015-04-13revision50299)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
34 
35 extern OnigCaseFoldType
37 {
39 }
40 
41 extern int
43 {
44  OnigDefaultCaseFoldFlag = case_fold_flag;
45  return 0;
46 }
47 
48 
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51 #endif
52 
53 #if 0
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57  ptrdiff_t len = end - s;
58 
59  if (len > 0) {
60  UChar* r = (UChar* )xmalloc(len + 1);
62  xmemcpy(r, s, len);
63  r[len] = (UChar )0;
64  return r;
65  }
66  else return NULL;
67 }
68 #endif
69 
70 static void
72 {
73  Node c;
74  c = *a; *a = *b; *b = c;
75 
76  if (NTYPE(a) == NT_STR) {
77  StrNode* sn = NSTR(a);
78  if (sn->capa == 0) {
79  size_t len = sn->end - sn->s;
80  sn->s = sn->buf;
81  sn->end = sn->s + len;
82  }
83  }
84 
85  if (NTYPE(b) == NT_STR) {
86  StrNode* sn = NSTR(b);
87  if (sn->capa == 0) {
88  size_t len = sn->end - sn->s;
89  sn->s = sn->buf;
90  sn->end = sn->s + len;
91  }
92  }
93 }
94 
95 static OnigDistance
97 {
100  else {
101  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102  else return ONIG_INFINITE_DISTANCE;
103  }
104 }
105 
106 static OnigDistance
108 {
109  if (m == 0) return 0;
110 
111  if (d < ONIG_INFINITE_DISTANCE / m)
112  return d * m;
113  else
114  return ONIG_INFINITE_DISTANCE;
115 }
116 
117 static int
119 {
120  int i;
121  for (i = 0; i < BITSET_SIZE; i++) {
122  if (bs[i] != 0) return 0;
123  }
124  return 1;
125 }
126 
127 #ifdef ONIG_DEBUG
128 static int
129 onig_is_prelude(void)
130 {
131  return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
132 }
133 
134 static int
135 bitset_on_num(BitSetRef bs)
136 {
137  int i, n;
138 
139  n = 0;
140  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
141  if (BITSET_AT(bs, i)) n++;
142  }
143  return n;
144 }
145 #endif
146 
147 extern int
149 {
150  if (size <= 0) {
151  size = 0;
152  buf->p = NULL;
153  }
154  else {
155  buf->p = (UChar* )xmalloc(size);
156  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
157  }
158 
159  buf->alloc = (unsigned int )size;
160  buf->used = 0;
161  return 0;
162 }
163 
164 
165 #ifdef USE_SUBEXP_CALL
166 
167 static int
169 {
170  UnsetAddr* p;
171 
172  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
174  uslist->num = 0;
175  uslist->alloc = size;
176  uslist->us = p;
177  return 0;
178 }
179 
180 static void
182 {
183  if (IS_NOT_NULL(uslist->us))
184  xfree(uslist->us);
185 }
186 
187 static int
188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
189 {
190  UnsetAddr* p;
191  int size;
192 
193  if (uslist->num >= uslist->alloc) {
194  size = uslist->alloc * 2;
195  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
197  uslist->alloc = size;
198  uslist->us = p;
199  }
200 
201  uslist->us[uslist->num].offset = offset;
202  uslist->us[uslist->num].target = node;
203  uslist->num++;
204  return 0;
205 }
206 #endif /* USE_SUBEXP_CALL */
207 
208 
209 static int
210 add_opcode(regex_t* reg, int opcode)
211 {
212  BBUF_ADD1(reg, opcode);
213  return 0;
214 }
215 
216 #ifdef USE_COMBINATION_EXPLOSION_CHECK
217 static int
218 add_state_check_num(regex_t* reg, int num)
219 {
221 
222  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
223  return 0;
224 }
225 #endif
226 
227 static int
228 add_rel_addr(regex_t* reg, int addr)
229 {
230  RelAddrType ra = (RelAddrType )addr;
231 
232  BBUF_ADD(reg, &ra, SIZE_RELADDR);
233  return 0;
234 }
235 
236 static int
237 add_abs_addr(regex_t* reg, int addr)
238 {
239  AbsAddrType ra = (AbsAddrType )addr;
240 
241  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
242  return 0;
243 }
244 
245 static int
247 {
248  LengthType l = (LengthType )len;
249 
250  BBUF_ADD(reg, &l, SIZE_LENGTH);
251  return 0;
252 }
253 
254 static int
255 add_mem_num(regex_t* reg, int num)
256 {
257  MemNumType n = (MemNumType )num;
258 
259  BBUF_ADD(reg, &n, SIZE_MEMNUM);
260  return 0;
261 }
262 
263 static int
264 add_pointer(regex_t* reg, void* addr)
265 {
266  PointerType ptr = (PointerType )addr;
267 
268  BBUF_ADD(reg, &ptr, SIZE_POINTER);
269  return 0;
270 }
271 
272 static int
274 {
275  BBUF_ADD(reg, &option, SIZE_OPTION);
276  return 0;
277 }
278 
279 static int
280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
281 {
282  int r;
283 
284  r = add_opcode(reg, opcode);
285  if (r) return r;
286  r = add_rel_addr(reg, addr);
287  return r;
288 }
289 
290 static int
292 {
293  BBUF_ADD(reg, bytes, len);
294  return 0;
295 }
296 
297 static int
299 {
300  BBUF_ADD(reg, bs, SIZE_BITSET);
301  return 0;
302 }
303 
304 static int
305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
306 {
307  int r;
308 
309  r = add_opcode(reg, opcode);
310  if (r) return r;
311  r = add_option(reg, option);
312  return r;
313 }
314 
315 static int compile_length_tree(Node* node, regex_t* reg);
316 static int compile_tree(Node* node, regex_t* reg);
317 
318 
319 #define IS_NEED_STR_LEN_OP_EXACT(op) \
320  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
321  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
322 
323 static int
324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
325 {
326  int op;
327 
328  if (ignore_case) {
329  switch (str_len) {
330  case 1: op = OP_EXACT1_IC; break;
331  default: op = OP_EXACTN_IC; break;
332  }
333  }
334  else {
335  switch (mb_len) {
336  case 1:
337  switch (str_len) {
338  case 1: op = OP_EXACT1; break;
339  case 2: op = OP_EXACT2; break;
340  case 3: op = OP_EXACT3; break;
341  case 4: op = OP_EXACT4; break;
342  case 5: op = OP_EXACT5; break;
343  default: op = OP_EXACTN; break;
344  }
345  break;
346 
347  case 2:
348  switch (str_len) {
349  case 1: op = OP_EXACTMB2N1; break;
350  case 2: op = OP_EXACTMB2N2; break;
351  case 3: op = OP_EXACTMB2N3; break;
352  default: op = OP_EXACTMB2N; break;
353  }
354  break;
355 
356  case 3:
357  op = OP_EXACTMB3N;
358  break;
359 
360  default:
361  op = OP_EXACTMBN;
362  break;
363  }
364  }
365  return op;
366 }
367 
368 static int
369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
370 {
371  int r;
372  int saved_num_null_check = reg->num_null_check;
373 
374  if (empty_info != 0) {
376  if (r) return r;
377  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
378  if (r) return r;
379  reg->num_null_check++;
380  }
381 
382  r = compile_tree(node, reg);
383  if (r) return r;
384 
385  if (empty_info != 0) {
386  if (empty_info == NQ_TARGET_IS_EMPTY)
387  r = add_opcode(reg, OP_NULL_CHECK_END);
388  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
390  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
392 
393  if (r) return r;
394  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
395  }
396  return r;
397 }
398 
399 #ifdef USE_SUBEXP_CALL
400 static int
402 {
403  int r;
404 
405  r = add_opcode(reg, OP_CALL);
406  if (r) return r;
408  node->target);
409  if (r) return r;
410  r = add_abs_addr(reg, 0 /*dummy addr.*/);
411  return r;
412 }
413 #endif
414 
415 static int
416 compile_tree_n_times(Node* node, int n, regex_t* reg)
417 {
418  int i, r;
419 
420  for (i = 0; i < n; i++) {
421  r = compile_tree(node, reg);
422  if (r) return r;
423  }
424  return 0;
425 }
426 
427 static int
429  regex_t* reg ARG_UNUSED, int ignore_case)
430 {
431  int len;
432  int op = select_str_opcode(mb_len, str_len, ignore_case);
433 
434  len = SIZE_OPCODE;
435 
436  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
437  if (IS_NEED_STR_LEN_OP_EXACT(op))
438  len += SIZE_LENGTH;
439 
440  len += mb_len * (int )str_len;
441  return len;
442 }
443 
444 static int
445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
446  regex_t* reg, int ignore_case)
447 {
448  int op = select_str_opcode(mb_len, str_len, ignore_case);
449  add_opcode(reg, op);
450 
451  if (op == OP_EXACTMBN)
452  add_length(reg, mb_len);
453 
454  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
455  if (op == OP_EXACTN_IC)
456  add_length(reg, mb_len * str_len);
457  else
458  add_length(reg, str_len);
459  }
460 
461  add_bytes(reg, s, mb_len * str_len);
462  return 0;
463 }
464 
465 
466 static int
468 {
469  int rlen, r, len, prev_len, slen, ambig;
470  OnigEncoding enc = reg->enc;
471  UChar *p, *prev;
472  StrNode* sn;
473 
474  sn = NSTR(node);
475  if (sn->end <= sn->s)
476  return 0;
477 
478  ambig = NSTRING_IS_AMBIG(node);
479 
480  p = prev = sn->s;
481  prev_len = enclen(enc, p, sn->end);
482  p += prev_len;
483  slen = 1;
484  rlen = 0;
485 
486  for (; p < sn->end; ) {
487  len = enclen(enc, p, sn->end);
488  if (len == prev_len) {
489  slen++;
490  }
491  else {
492  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
493  rlen += r;
494  prev = p;
495  slen = 1;
496  prev_len = len;
497  }
498  p += len;
499  }
500  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
501  rlen += r;
502  return rlen;
503 }
504 
505 static int
507 {
508  if (sn->end <= sn->s)
509  return 0;
510 
511  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
512 }
513 
514 static int
516 {
517  int r, len, prev_len, slen, ambig;
518  OnigEncoding enc = reg->enc;
519  UChar *p, *prev, *end;
520  StrNode* sn;
521 
522  sn = NSTR(node);
523  if (sn->end <= sn->s)
524  return 0;
525 
526  end = sn->end;
527  ambig = NSTRING_IS_AMBIG(node);
528 
529  p = prev = sn->s;
530  prev_len = enclen(enc, p, end);
531  p += prev_len;
532  slen = 1;
533 
534  for (; p < end; ) {
535  len = enclen(enc, p, end);
536  if (len == prev_len) {
537  slen++;
538  }
539  else {
540  r = add_compile_string(prev, prev_len, slen, reg, ambig);
541  if (r) return r;
542 
543  prev = p;
544  slen = 1;
545  prev_len = len;
546  }
547 
548  p += len;
549  }
550  return add_compile_string(prev, prev_len, slen, reg, ambig);
551 }
552 
553 static int
555 {
556  if (sn->end <= sn->s)
557  return 0;
558 
559  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560 }
561 
562 static int
564 {
565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566  add_length(reg, mbuf->used);
567  return add_bytes(reg, mbuf->p, mbuf->used);
568 #else
569  int r, pad_size;
571 
572  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575 
576  r = add_bytes(reg, mbuf->p, mbuf->used);
577 
578  /* padding for return value from compile_length_cclass_node() to be fix. */
579  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581  return r;
582 #endif
583 }
584 
585 static int
587 {
588  int len;
589 
590  if (IS_NCCLASS_SHARE(cc)) {
591  len = SIZE_OPCODE + SIZE_POINTER;
592  return len;
593  }
594 
595  if (IS_NULL(cc->mbuf)) {
596  len = SIZE_OPCODE + SIZE_BITSET;
597  }
598  else {
599  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
600  len = SIZE_OPCODE;
601  }
602  else {
603  len = SIZE_OPCODE + SIZE_BITSET;
604  }
605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
606  len += SIZE_LENGTH + cc->mbuf->used;
607 #else
608  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
609 #endif
610  }
611 
612  return len;
613 }
614 
615 static int
617 {
618  int r;
619 
620  if (IS_NCCLASS_SHARE(cc)) {
622  r = add_pointer(reg, cc);
623  return r;
624  }
625 
626  if (IS_NULL(cc->mbuf)) {
627  if (IS_NCCLASS_NOT(cc))
629  else
630  add_opcode(reg, OP_CCLASS);
631 
632  r = add_bitset(reg, cc->bs);
633  }
634  else {
635  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
636  if (IS_NCCLASS_NOT(cc))
638  else
639  add_opcode(reg, OP_CCLASS_MB);
640 
641  r = add_multi_byte_cclass(cc->mbuf, reg);
642  }
643  else {
644  if (IS_NCCLASS_NOT(cc))
646  else
648 
649  r = add_bitset(reg, cc->bs);
650  if (r) return r;
651  r = add_multi_byte_cclass(cc->mbuf, reg);
652  }
653  }
654 
655  return r;
656 }
657 
658 static int
659 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
660 {
661 #define REPEAT_RANGE_ALLOC 4
662 
664 
665  if (reg->repeat_range_alloc == 0) {
668  reg->repeat_range = p;
670  }
671  else if (reg->repeat_range_alloc <= id) {
672  int n;
675  sizeof(OnigRepeatRange) * n);
677  reg->repeat_range = p;
678  reg->repeat_range_alloc = n;
679  }
680  else {
681  p = reg->repeat_range;
682  }
683 
684  p[id].lower = lower;
685  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
686  return 0;
687 }
688 
689 static int
690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
691  regex_t* reg)
692 {
693  int r;
694  int num_repeat = reg->num_repeat;
695 
696  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
697  if (r) return r;
698  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
699  reg->num_repeat++;
700  if (r) return r;
701  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
702  if (r) return r;
703 
704  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
705  if (r) return r;
706 
707  r = compile_tree_empty_check(qn->target, reg, empty_info);
708  if (r) return r;
709 
710  if (
711 #ifdef USE_SUBEXP_CALL
712  reg->num_call > 0 ||
713 #endif
716  }
717  else {
719  }
720  if (r) return r;
721  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
722  return r;
723 }
724 
725 static int
727 {
728  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
729  NTYPE(qn->target) == NT_CANY)
730  return 1;
731  else
732  return 0;
733 }
734 
735 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
736 #define CKN_ON (ckn > 0)
737 
738 #ifdef USE_COMBINATION_EXPLOSION_CHECK
739 
740 static int
742 {
743  int len, mod_tlen, cklen;
744  int ckn;
745  int infinite = IS_REPEAT_INFINITE(qn->upper);
746  int empty_info = qn->target_empty_info;
747  int tlen = compile_length_tree(qn->target, reg);
748 
749  if (tlen < 0) return tlen;
750 
751  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
752 
753  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
754 
755  /* anychar repeat */
756  if (NTYPE(qn->target) == NT_CANY) {
757  if (qn->greedy && infinite) {
758  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
759  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
760  else
761  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
762  }
763  }
764 
765  if (empty_info != 0)
766  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
767  else
768  mod_tlen = tlen;
769 
770  if (infinite && qn->lower <= 1) {
771  if (qn->greedy) {
772  if (qn->lower == 1)
773  len = SIZE_OP_JUMP;
774  else
775  len = 0;
776 
777  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
778  }
779  else {
780  if (qn->lower == 0)
781  len = SIZE_OP_JUMP;
782  else
783  len = 0;
784 
785  len += mod_tlen + SIZE_OP_PUSH + cklen;
786  }
787  }
788  else if (qn->upper == 0) {
789  if (qn->is_refered != 0) /* /(?<n>..){0}/ */
790  len = SIZE_OP_JUMP + tlen;
791  else
792  len = 0;
793  }
794  else if (qn->upper == 1 && qn->greedy) {
795  if (qn->lower == 0) {
796  if (CKN_ON) {
797  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
798  }
799  else {
800  len = SIZE_OP_PUSH + tlen;
801  }
802  }
803  else {
804  len = tlen;
805  }
806  }
807  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
808  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
809  }
810  else {
811  len = SIZE_OP_REPEAT_INC
812  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
813  if (CKN_ON)
814  len += SIZE_OP_STATE_CHECK;
815  }
816 
817  return len;
818 }
819 
820 static int
822 {
823  int r, mod_tlen;
824  int ckn;
825  int infinite = IS_REPEAT_INFINITE(qn->upper);
826  int empty_info = qn->target_empty_info;
827  int tlen = compile_length_tree(qn->target, reg);
828 
829  if (tlen < 0) return tlen;
830 
831  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
832 
833  if (is_anychar_star_quantifier(qn)) {
834  r = compile_tree_n_times(qn->target, qn->lower, reg);
835  if (r) return r;
836  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
837  if (IS_MULTILINE(reg->options))
839  else
841  if (r) return r;
842  if (CKN_ON) {
843  r = add_state_check_num(reg, ckn);
844  if (r) return r;
845  }
846 
847  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
848  }
849  else {
850  if (IS_MULTILINE(reg->options)) {
851  r = add_opcode(reg, (CKN_ON ?
853  : OP_ANYCHAR_ML_STAR));
854  }
855  else {
856  r = add_opcode(reg, (CKN_ON ?
858  : OP_ANYCHAR_STAR));
859  }
860  if (r) return r;
861  if (CKN_ON)
862  r = add_state_check_num(reg, ckn);
863 
864  return r;
865  }
866  }
867 
868  if (empty_info != 0)
869  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
870  else
871  mod_tlen = tlen;
872 
873  if (infinite && qn->lower <= 1) {
874  if (qn->greedy) {
875  if (qn->lower == 1) {
876  r = add_opcode_rel_addr(reg, OP_JUMP,
877  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
878  if (r) return r;
879  }
880 
881  if (CKN_ON) {
883  if (r) return r;
884  r = add_state_check_num(reg, ckn);
885  if (r) return r;
886  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
887  }
888  else {
889  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
890  }
891  if (r) return r;
892  r = compile_tree_empty_check(qn->target, reg, empty_info);
893  if (r) return r;
894  r = add_opcode_rel_addr(reg, OP_JUMP,
895  -(mod_tlen + (int )SIZE_OP_JUMP
896  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
897  }
898  else {
899  if (qn->lower == 0) {
900  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
901  if (r) return r;
902  }
903  r = compile_tree_empty_check(qn->target, reg, empty_info);
904  if (r) return r;
905  if (CKN_ON) {
907  if (r) return r;
908  r = add_state_check_num(reg, ckn);
909  if (r) return r;
910  r = add_rel_addr(reg,
911  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
912  }
913  else
914  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
915  }
916  }
917  else if (qn->upper == 0) {
918  if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
919  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
920  if (r) return r;
921  r = compile_tree(qn->target, reg);
922  }
923  else
924  r = 0;
925  }
926  else if (qn->upper == 1 && qn->greedy) {
927  if (qn->lower == 0) {
928  if (CKN_ON) {
930  if (r) return r;
931  r = add_state_check_num(reg, ckn);
932  if (r) return r;
933  r = add_rel_addr(reg, tlen);
934  }
935  else {
936  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
937  }
938  if (r) return r;
939  }
940 
941  r = compile_tree(qn->target, reg);
942  }
943  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
944  if (CKN_ON) {
946  if (r) return r;
947  r = add_state_check_num(reg, ckn);
948  if (r) return r;
949  r = add_rel_addr(reg, SIZE_OP_JUMP);
950  }
951  else {
953  }
954 
955  if (r) return r;
956  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
957  if (r) return r;
958  r = compile_tree(qn->target, reg);
959  }
960  else {
961  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
962  if (CKN_ON) {
963  if (r) return r;
964  r = add_opcode(reg, OP_STATE_CHECK);
965  if (r) return r;
966  r = add_state_check_num(reg, ckn);
967  }
968  }
969  return r;
970 }
971 
972 #else /* USE_COMBINATION_EXPLOSION_CHECK */
973 
974 static int
976 {
977  int len, mod_tlen;
978  int infinite = IS_REPEAT_INFINITE(qn->upper);
979  int empty_info = qn->target_empty_info;
980  int tlen = compile_length_tree(qn->target, reg);
981 
982  if (tlen < 0) return tlen;
983 
984  /* anychar repeat */
985  if (NTYPE(qn->target) == NT_CANY) {
986  if (qn->greedy && infinite) {
987  if (IS_NOT_NULL(qn->next_head_exact))
988  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
989  else
990  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
991  }
992  }
993 
994  if (empty_info != 0)
995  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
996  else
997  mod_tlen = tlen;
998 
999  if (infinite &&
1000  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1001  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1002  len = SIZE_OP_JUMP;
1003  }
1004  else {
1005  len = tlen * qn->lower;
1006  }
1007 
1008  if (qn->greedy) {
1009  if (IS_NOT_NULL(qn->head_exact))
1010  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1011  else if (IS_NOT_NULL(qn->next_head_exact))
1012  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1013  else
1014  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1015  }
1016  else
1017  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1018  }
1019  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1020  len = SIZE_OP_JUMP + tlen;
1021  }
1022  else if (!infinite && qn->greedy &&
1023  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1025  len = tlen * qn->lower;
1026  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1027  }
1028  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1029  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1030  }
1031  else {
1032  len = SIZE_OP_REPEAT_INC
1033  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1034  }
1035 
1036  return len;
1037 }
1038 
1039 static int
1041 {
1042  int i, r, mod_tlen;
1043  int infinite = IS_REPEAT_INFINITE(qn->upper);
1044  int empty_info = qn->target_empty_info;
1045  int tlen = compile_length_tree(qn->target, reg);
1046 
1047  if (tlen < 0) return tlen;
1048 
1049  if (is_anychar_star_quantifier(qn)) {
1050  r = compile_tree_n_times(qn->target, qn->lower, reg);
1051  if (r) return r;
1052  if (IS_NOT_NULL(qn->next_head_exact)) {
1053  if (IS_MULTILINE(reg->options))
1055  else
1057  if (r) return r;
1058  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1059  }
1060  else {
1061  if (IS_MULTILINE(reg->options))
1062  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1063  else
1064  return add_opcode(reg, OP_ANYCHAR_STAR);
1065  }
1066  }
1067 
1068  if (empty_info != 0)
1069  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1070  else
1071  mod_tlen = tlen;
1072 
1073  if (infinite &&
1074  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1075  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1076  if (qn->greedy) {
1077  if (IS_NOT_NULL(qn->head_exact))
1079  else if (IS_NOT_NULL(qn->next_head_exact))
1081  else
1083  }
1084  else {
1086  }
1087  if (r) return r;
1088  }
1089  else {
1090  r = compile_tree_n_times(qn->target, qn->lower, reg);
1091  if (r) return r;
1092  }
1093 
1094  if (qn->greedy) {
1095  if (IS_NOT_NULL(qn->head_exact)) {
1097  mod_tlen + SIZE_OP_JUMP);
1098  if (r) return r;
1099  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1100  r = compile_tree_empty_check(qn->target, reg, empty_info);
1101  if (r) return r;
1102  r = add_opcode_rel_addr(reg, OP_JUMP,
1103  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1104  }
1105  else if (IS_NOT_NULL(qn->next_head_exact)) {
1107  mod_tlen + SIZE_OP_JUMP);
1108  if (r) return r;
1109  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1110  r = compile_tree_empty_check(qn->target, reg, empty_info);
1111  if (r) return r;
1112  r = add_opcode_rel_addr(reg, OP_JUMP,
1113  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1114  }
1115  else {
1116  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1117  if (r) return r;
1118  r = compile_tree_empty_check(qn->target, reg, empty_info);
1119  if (r) return r;
1120  r = add_opcode_rel_addr(reg, OP_JUMP,
1121  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1122  }
1123  }
1124  else {
1125  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1126  if (r) return r;
1127  r = compile_tree_empty_check(qn->target, reg, empty_info);
1128  if (r) return r;
1129  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1130  }
1131  }
1132  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1133  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1134  if (r) return r;
1135  r = compile_tree(qn->target, reg);
1136  }
1137  else if (!infinite && qn->greedy &&
1138  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1140  int n = qn->upper - qn->lower;
1141 
1142  r = compile_tree_n_times(qn->target, qn->lower, reg);
1143  if (r) return r;
1144 
1145  for (i = 0; i < n; i++) {
1146  r = add_opcode_rel_addr(reg, OP_PUSH,
1147  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1148  if (r) return r;
1149  r = compile_tree(qn->target, reg);
1150  if (r) return r;
1151  }
1152  }
1153  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1155  if (r) return r;
1156  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1157  if (r) return r;
1158  r = compile_tree(qn->target, reg);
1159  }
1160  else {
1161  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1162  }
1163  return r;
1164 }
1165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1166 
1167 static int
1169 {
1170  int tlen;
1171  OnigOptionType prev = reg->options;
1172 
1173  reg->options = node->option;
1174  tlen = compile_length_tree(node->target, reg);
1175  reg->options = prev;
1176 
1177  if (tlen < 0) return tlen;
1178 
1179  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1181  + tlen + SIZE_OP_SET_OPTION;
1182  }
1183  else
1184  return tlen;
1185 }
1186 
1187 static int
1189 {
1190  int r;
1191  OnigOptionType prev = reg->options;
1192 
1193  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1194  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1195  if (r) return r;
1196  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1197  if (r) return r;
1198  r = add_opcode(reg, OP_FAIL);
1199  if (r) return r;
1200  }
1201 
1202  reg->options = node->option;
1203  r = compile_tree(node->target, reg);
1204  reg->options = prev;
1205 
1206  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1207  if (r) return r;
1208  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1209  }
1210  return r;
1211 }
1212 
1213 static int
1215 {
1216  int len;
1217  int tlen;
1218 
1219  if (node->type == ENCLOSE_OPTION)
1220  return compile_length_option_node(node, reg);
1221 
1222  if (node->target) {
1223  tlen = compile_length_tree(node->target, reg);
1224  if (tlen < 0) return tlen;
1225  }
1226  else
1227  tlen = 0;
1228 
1229  switch (node->type) {
1230  case ENCLOSE_MEMORY:
1231 #ifdef USE_SUBEXP_CALL
1232  if (IS_ENCLOSE_CALLED(node)) {
1233  len = SIZE_OP_MEMORY_START_PUSH + tlen
1235  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1236  len += (IS_ENCLOSE_RECURSION(node)
1238  else
1239  len += (IS_ENCLOSE_RECURSION(node)
1241  }
1242  else
1243 #endif
1244  {
1245  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1247  else
1248  len = SIZE_OP_MEMORY_START;
1249 
1250  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1252  }
1253  break;
1254 
1257  QtfrNode* qn = NQTFR(node->target);
1258  tlen = compile_length_tree(qn->target, reg);
1259  if (tlen < 0) return tlen;
1260 
1261  len = tlen * qn->lower
1262  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1263  }
1264  else {
1266  }
1267  break;
1268 
1269  case ENCLOSE_CONDITION:
1270  len = SIZE_OP_CONDITION;
1271  if (NTYPE(node->target) == NT_ALT) {
1272  Node* x = node->target;
1273 
1274  tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275  if (tlen < 0) return tlen;
1276  len += tlen + SIZE_OP_JUMP;
1277  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278  x = NCDR(x);
1279  tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280  if (tlen < 0) return tlen;
1281  len += tlen;
1282  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1283  }
1284  else {
1285  return ONIGERR_PARSER_BUG;
1286  }
1287  break;
1288 
1289  default:
1290  return ONIGERR_TYPE_BUG;
1291  break;
1292  }
1293 
1294  return len;
1295 }
1296 
1297 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1298 
1299 static int
1301 {
1302  int r, len;
1303 
1304  if (node->type == ENCLOSE_OPTION)
1305  return compile_option_node(node, reg);
1306 
1307  switch (node->type) {
1308  case ENCLOSE_MEMORY:
1309 #ifdef USE_SUBEXP_CALL
1310  if (IS_ENCLOSE_CALLED(node)) {
1311  r = add_opcode(reg, OP_CALL);
1312  if (r) return r;
1314  node->state |= NST_ADDR_FIXED;
1315  r = add_abs_addr(reg, (int )node->call_addr);
1316  if (r) return r;
1317  len = compile_length_tree(node->target, reg);
1319  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1320  len += (IS_ENCLOSE_RECURSION(node)
1322  else
1323  len += (IS_ENCLOSE_RECURSION(node)
1325 
1326  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1327  if (r) return r;
1328  }
1329 #endif
1330  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1331  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1332  else
1333  r = add_opcode(reg, OP_MEMORY_START);
1334  if (r) return r;
1335  r = add_mem_num(reg, node->regnum);
1336  if (r) return r;
1337  r = compile_tree(node->target, reg);
1338  if (r) return r;
1339 #ifdef USE_SUBEXP_CALL
1340  if (IS_ENCLOSE_CALLED(node)) {
1341  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1342  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1344  else
1345  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1347 
1348  if (r) return r;
1349  r = add_mem_num(reg, node->regnum);
1350  if (r) return r;
1351  r = add_opcode(reg, OP_RETURN);
1352  }
1353  else
1354 #endif
1355  {
1356  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1357  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1358  else
1359  r = add_opcode(reg, OP_MEMORY_END);
1360  if (r) return r;
1361  r = add_mem_num(reg, node->regnum);
1362  }
1363  break;
1364 
1367  QtfrNode* qn = NQTFR(node->target);
1368  r = compile_tree_n_times(qn->target, qn->lower, reg);
1369  if (r) return r;
1370 
1371  len = compile_length_tree(qn->target, reg);
1372  if (len < 0) return len;
1373 
1375  if (r) return r;
1376  r = compile_tree(qn->target, reg);
1377  if (r) return r;
1378  r = add_opcode(reg, OP_POP);
1379  if (r) return r;
1380  r = add_opcode_rel_addr(reg, OP_JUMP,
1381  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1382  }
1383  else {
1384  r = add_opcode(reg, OP_PUSH_STOP_BT);
1385  if (r) return r;
1386  r = compile_tree(node->target, reg);
1387  if (r) return r;
1388  r = add_opcode(reg, OP_POP_STOP_BT);
1389  }
1390  break;
1391 
1392  case ENCLOSE_CONDITION:
1393  r = add_opcode(reg, OP_CONDITION);
1394  if (r) return r;
1395  r = add_mem_num(reg, node->regnum);
1396  if (r) return r;
1397 
1398  if (NTYPE(node->target) == NT_ALT) {
1399  Node* x = node->target;
1400  int len2;
1401 
1402  len = compile_length_tree(NCAR(x), reg); /* yes-node */
1403  if (len < 0) return len;
1404  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1405  x = NCDR(x);
1406  len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1407  if (len2 < 0) return len2;
1408  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1409 
1410  x = node->target;
1411  r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1412  if (r) return r;
1413  r = compile_tree(NCAR(x), reg); /* yes-node */
1414  if (r) return r;
1415  r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1416  if (r) return r;
1417  x = NCDR(x);
1418  r = compile_tree(NCAR(x), reg); /* no-node */
1419  }
1420  else {
1421  return ONIGERR_PARSER_BUG;
1422  }
1423  break;
1424 
1425  default:
1426  return ONIGERR_TYPE_BUG;
1427  break;
1428  }
1429 
1430  return r;
1431 }
1432 
1433 static int
1435 {
1436  int len;
1437  int tlen = 0;
1438 
1439  if (node->target) {
1440  tlen = compile_length_tree(node->target, reg);
1441  if (tlen < 0) return tlen;
1442  }
1443 
1444  switch (node->type) {
1445  case ANCHOR_PREC_READ:
1446  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1447  break;
1448  case ANCHOR_PREC_READ_NOT:
1449  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1450  break;
1451  case ANCHOR_LOOK_BEHIND:
1452  len = SIZE_OP_LOOK_BEHIND + tlen;
1453  break;
1456  break;
1457 
1458  default:
1459  len = SIZE_OPCODE;
1460  break;
1461  }
1462 
1463  return len;
1464 }
1465 
1466 static int
1468 {
1469  int r, len;
1470 
1471  switch (node->type) {
1472  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1473  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1474  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1475  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1476  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1477  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1478 
1479  /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
1480  case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
1481 
1482  case ANCHOR_WORD_BOUND:
1483  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1484  else r = add_opcode(reg, OP_WORD_BOUND);
1485  break;
1486  case ANCHOR_NOT_WORD_BOUND:
1487  if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1488  else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1489  break;
1490 #ifdef USE_WORD_BEGIN_END
1491  case ANCHOR_WORD_BEGIN:
1492  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1493  else r = add_opcode(reg, OP_WORD_BEGIN);
1494  break;
1495  case ANCHOR_WORD_END:
1496  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1497  else r = add_opcode(reg, OP_WORD_END);
1498  break;
1499 #endif
1500  case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1501 
1502  case ANCHOR_PREC_READ:
1503  r = add_opcode(reg, OP_PUSH_POS);
1504  if (r) return r;
1505  r = compile_tree(node->target, reg);
1506  if (r) return r;
1507  r = add_opcode(reg, OP_POP_POS);
1508  break;
1509 
1510  case ANCHOR_PREC_READ_NOT:
1511  len = compile_length_tree(node->target, reg);
1512  if (len < 0) return len;
1514  if (r) return r;
1515  r = compile_tree(node->target, reg);
1516  if (r) return r;
1517  r = add_opcode(reg, OP_FAIL_POS);
1518  break;
1519 
1520  case ANCHOR_LOOK_BEHIND:
1521  {
1522  int n;
1523  r = add_opcode(reg, OP_LOOK_BEHIND);
1524  if (r) return r;
1525  if (node->char_len < 0) {
1526  r = get_char_length_tree(node->target, reg, &n);
1528  }
1529  else
1530  n = node->char_len;
1531  r = add_length(reg, n);
1532  if (r) return r;
1533  r = compile_tree(node->target, reg);
1534  }
1535  break;
1536 
1538  {
1539  int n;
1540  len = compile_length_tree(node->target, reg);
1543  if (r) return r;
1544  if (node->char_len < 0) {
1545  r = get_char_length_tree(node->target, reg, &n);
1547  }
1548  else
1549  n = node->char_len;
1550  r = add_length(reg, n);
1551  if (r) return r;
1552  r = compile_tree(node->target, reg);
1553  if (r) return r;
1555  }
1556  break;
1557 
1558  default:
1559  return ONIGERR_TYPE_BUG;
1560  break;
1561  }
1562 
1563  return r;
1564 }
1565 
1566 static int
1568 {
1569  int len, type, r;
1570 
1571  type = NTYPE(node);
1572  switch (type) {
1573  case NT_LIST:
1574  len = 0;
1575  do {
1576  r = compile_length_tree(NCAR(node), reg);
1577  if (r < 0) return r;
1578  len += r;
1579  } while (IS_NOT_NULL(node = NCDR(node)));
1580  r = len;
1581  break;
1582 
1583  case NT_ALT:
1584  {
1585  int n;
1586 
1587  n = r = 0;
1588  do {
1589  r += compile_length_tree(NCAR(node), reg);
1590  n++;
1591  } while (IS_NOT_NULL(node = NCDR(node)));
1592  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1593  }
1594  break;
1595 
1596  case NT_STR:
1597  if (NSTRING_IS_RAW(node))
1598  r = compile_length_string_raw_node(NSTR(node), reg);
1599  else
1600  r = compile_length_string_node(node, reg);
1601  break;
1602 
1603  case NT_CCLASS:
1604  r = compile_length_cclass_node(NCCLASS(node), reg);
1605  break;
1606 
1607  case NT_CTYPE:
1608  case NT_CANY:
1609  r = SIZE_OPCODE;
1610  break;
1611 
1612  case NT_BREF:
1613  {
1614  BRefNode* br = NBREF(node);
1615 
1616 #ifdef USE_BACKREF_WITH_LEVEL
1617  if (IS_BACKREF_NEST_LEVEL(br)) {
1619  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1620  }
1621  else
1622 #endif
1623  if (br->back_num == 1) {
1624  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1626  }
1627  else {
1628  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1629  }
1630  }
1631  break;
1632 
1633 #ifdef USE_SUBEXP_CALL
1634  case NT_CALL:
1635  r = SIZE_OP_CALL;
1636  break;
1637 #endif
1638 
1639  case NT_QTFR:
1640  r = compile_length_quantifier_node(NQTFR(node), reg);
1641  break;
1642 
1643  case NT_ENCLOSE:
1644  r = compile_length_enclose_node(NENCLOSE(node), reg);
1645  break;
1646 
1647  case NT_ANCHOR:
1648  r = compile_length_anchor_node(NANCHOR(node), reg);
1649  break;
1650 
1651  default:
1652  return ONIGERR_TYPE_BUG;
1653  break;
1654  }
1655 
1656  return r;
1657 }
1658 
1659 static int
1661 {
1662  int n, type, len, pos, r = 0;
1663 
1664  type = NTYPE(node);
1665  switch (type) {
1666  case NT_LIST:
1667  do {
1668  r = compile_tree(NCAR(node), reg);
1669  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1670  break;
1671 
1672  case NT_ALT:
1673  {
1674  Node* x = node;
1675  len = 0;
1676  do {
1677  len += compile_length_tree(NCAR(x), reg);
1678  if (NCDR(x) != NULL) {
1679  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1680  }
1681  } while (IS_NOT_NULL(x = NCDR(x)));
1682  pos = reg->used + len; /* goal position */
1683 
1684  do {
1685  len = compile_length_tree(NCAR(node), reg);
1686  if (IS_NOT_NULL(NCDR(node))) {
1687  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1688  if (r) break;
1689  }
1690  r = compile_tree(NCAR(node), reg);
1691  if (r) break;
1692  if (IS_NOT_NULL(NCDR(node))) {
1693  len = pos - (reg->used + SIZE_OP_JUMP);
1694  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1695  if (r) break;
1696  }
1697  } while (IS_NOT_NULL(node = NCDR(node)));
1698  }
1699  break;
1700 
1701  case NT_STR:
1702  if (NSTRING_IS_RAW(node))
1703  r = compile_string_raw_node(NSTR(node), reg);
1704  else
1705  r = compile_string_node(node, reg);
1706  break;
1707 
1708  case NT_CCLASS:
1709  r = compile_cclass_node(NCCLASS(node), reg);
1710  break;
1711 
1712  case NT_CTYPE:
1713  {
1714  int op;
1715 
1716  switch (NCTYPE(node)->ctype) {
1717  case ONIGENC_CTYPE_WORD:
1718  if (NCTYPE(node)->ascii_range != 0) {
1719  if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1720  else op = OP_ASCII_WORD;
1721  }
1722  else {
1723  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1724  else op = OP_WORD;
1725  }
1726  break;
1727  default:
1728  return ONIGERR_TYPE_BUG;
1729  break;
1730  }
1731  r = add_opcode(reg, op);
1732  }
1733  break;
1734 
1735  case NT_CANY:
1736  if (IS_MULTILINE(reg->options))
1737  r = add_opcode(reg, OP_ANYCHAR_ML);
1738  else
1739  r = add_opcode(reg, OP_ANYCHAR);
1740  break;
1741 
1742  case NT_BREF:
1743  {
1744  BRefNode* br = NBREF(node);
1745 
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747  if (IS_BACKREF_NEST_LEVEL(br)) {
1749  if (r) return r;
1750  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1751  if (r) return r;
1752  r = add_length(reg, br->nest_level);
1753  if (r) return r;
1754 
1755  goto add_bacref_mems;
1756  }
1757  else
1758 #endif
1759  if (br->back_num == 1) {
1760  n = br->back_static[0];
1761  if (IS_IGNORECASE(reg->options)) {
1762  r = add_opcode(reg, OP_BACKREFN_IC);
1763  if (r) return r;
1764  r = add_mem_num(reg, n);
1765  }
1766  else {
1767  switch (n) {
1768  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1769  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1770  default:
1771  r = add_opcode(reg, OP_BACKREFN);
1772  if (r) return r;
1773  r = add_mem_num(reg, n);
1774  break;
1775  }
1776  }
1777  }
1778  else {
1779  int i;
1780  int* p;
1781 
1782  if (IS_IGNORECASE(reg->options)) {
1783  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1784  }
1785  else {
1786  r = add_opcode(reg, OP_BACKREF_MULTI);
1787  }
1788  if (r) return r;
1789 
1790 #ifdef USE_BACKREF_WITH_LEVEL
1791  add_bacref_mems:
1792 #endif
1793  r = add_length(reg, br->back_num);
1794  if (r) return r;
1795  p = BACKREFS_P(br);
1796  for (i = br->back_num - 1; i >= 0; i--) {
1797  r = add_mem_num(reg, p[i]);
1798  if (r) return r;
1799  }
1800  }
1801  }
1802  break;
1803 
1804 #ifdef USE_SUBEXP_CALL
1805  case NT_CALL:
1806  r = compile_call(NCALL(node), reg);
1807  break;
1808 #endif
1809 
1810  case NT_QTFR:
1811  r = compile_quantifier_node(NQTFR(node), reg);
1812  break;
1813 
1814  case NT_ENCLOSE:
1815  r = compile_enclose_node(NENCLOSE(node), reg);
1816  break;
1817 
1818  case NT_ANCHOR:
1819  r = compile_anchor_node(NANCHOR(node), reg);
1820  break;
1821 
1822  default:
1823 #ifdef ONIG_DEBUG
1824  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1825 #endif
1826  break;
1827  }
1828 
1829  return r;
1830 }
1831 
1832 #ifdef USE_NAMED_GROUP
1833 
1834 static int
1835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1836 {
1837  int r = 0;
1838  Node* node = *plink;
1839 
1840  switch (NTYPE(node)) {
1841  case NT_LIST:
1842  case NT_ALT:
1843  do {
1844  r = noname_disable_map(&(NCAR(node)), map, counter);
1845  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1846  break;
1847 
1848  case NT_QTFR:
1849  {
1850  Node** ptarget = &(NQTFR(node)->target);
1851  Node* old = *ptarget;
1852  r = noname_disable_map(ptarget, map, counter);
1853  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1854  onig_reduce_nested_quantifier(node, *ptarget);
1855  }
1856  }
1857  break;
1858 
1859  case NT_ENCLOSE:
1860  {
1861  EncloseNode* en = NENCLOSE(node);
1862  if (en->type == ENCLOSE_MEMORY) {
1863  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1864  (*counter)++;
1865  map[en->regnum].new_val = *counter;
1866  en->regnum = *counter;
1867  r = noname_disable_map(&(en->target), map, counter);
1868  }
1869  else {
1870  *plink = en->target;
1871  en->target = NULL_NODE;
1872  onig_node_free(node);
1873  r = noname_disable_map(plink, map, counter);
1874  }
1875  }
1876  else
1877  r = noname_disable_map(&(en->target), map, counter);
1878  }
1879  break;
1880 
1881  case NT_ANCHOR:
1882  {
1883  AnchorNode* an = NANCHOR(node);
1884  switch (an->type) {
1885  case ANCHOR_PREC_READ:
1886  case ANCHOR_PREC_READ_NOT:
1887  case ANCHOR_LOOK_BEHIND:
1889  r = noname_disable_map(&(an->target), map, counter);
1890  break;
1891  }
1892  }
1893  break;
1894 
1895  default:
1896  break;
1897  }
1898 
1899  return r;
1900 }
1901 
1902 static int
1904 {
1905  int i, pos, n, old_num;
1906  int *backs;
1907  BRefNode* bn = NBREF(node);
1908 
1909  if (! IS_BACKREF_NAME_REF(bn))
1911 
1912  old_num = bn->back_num;
1913  if (IS_NULL(bn->back_dynamic))
1914  backs = bn->back_static;
1915  else
1916  backs = bn->back_dynamic;
1917 
1918  for (i = 0, pos = 0; i < old_num; i++) {
1919  n = map[backs[i]].new_val;
1920  if (n > 0) {
1921  backs[pos] = n;
1922  pos++;
1923  }
1924  }
1925 
1926  bn->back_num = pos;
1927  return 0;
1928 }
1929 
1930 static int
1932 {
1933  int r = 0;
1934 
1935  switch (NTYPE(node)) {
1936  case NT_LIST:
1937  case NT_ALT:
1938  do {
1939  r = renumber_by_map(NCAR(node), map);
1940  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1941  break;
1942  case NT_QTFR:
1943  r = renumber_by_map(NQTFR(node)->target, map);
1944  break;
1945  case NT_ENCLOSE:
1946  {
1947  EncloseNode* en = NENCLOSE(node);
1948  if (en->type == ENCLOSE_CONDITION)
1949  en->regnum = map[en->regnum].new_val;
1950  r = renumber_by_map(en->target, map);
1951  }
1952  break;
1953 
1954  case NT_BREF:
1955  r = renumber_node_backref(node, map);
1956  break;
1957 
1958  case NT_ANCHOR:
1959  {
1960  AnchorNode* an = NANCHOR(node);
1961  switch (an->type) {
1962  case ANCHOR_PREC_READ:
1963  case ANCHOR_PREC_READ_NOT:
1964  case ANCHOR_LOOK_BEHIND:
1966  r = renumber_by_map(an->target, map);
1967  break;
1968  }
1969  }
1970  break;
1971 
1972  default:
1973  break;
1974  }
1975 
1976  return r;
1977 }
1978 
1979 static int
1981 {
1982  int r = 0;
1983 
1984  switch (NTYPE(node)) {
1985  case NT_LIST:
1986  case NT_ALT:
1987  do {
1988  r = numbered_ref_check(NCAR(node));
1989  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1990  break;
1991  case NT_QTFR:
1992  r = numbered_ref_check(NQTFR(node)->target);
1993  break;
1994  case NT_ENCLOSE:
1995  r = numbered_ref_check(NENCLOSE(node)->target);
1996  break;
1997 
1998  case NT_BREF:
1999  if (! IS_BACKREF_NAME_REF(NBREF(node)))
2001  break;
2002 
2003  default:
2004  break;
2005  }
2006 
2007  return r;
2008 }
2009 
2010 static int
2012 {
2013  int r, i, pos, counter;
2014  BitStatusType loc;
2015  GroupNumRemap* map;
2016 
2017  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2019  for (i = 1; i <= env->num_mem; i++) {
2020  map[i].new_val = 0;
2021  }
2022  counter = 0;
2023  r = noname_disable_map(root, map, &counter);
2024  if (r != 0) return r;
2025 
2026  r = renumber_by_map(*root, map);
2027  if (r != 0) return r;
2028 
2029  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2030  if (map[i].new_val > 0) {
2031  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2032  pos++;
2033  }
2034  }
2035 
2036  loc = env->capture_history;
2038  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2039  if (BIT_STATUS_AT(loc, i)) {
2041  }
2042  }
2043 
2044  env->num_mem = env->num_named;
2045  reg->num_mem = env->num_named;
2046 
2047  return onig_renumber_name_table(reg, map);
2048 }
2049 #endif /* USE_NAMED_GROUP */
2050 
2051 #ifdef USE_SUBEXP_CALL
2052 static int
2054 {
2055  int i, offset;
2056  EncloseNode* en;
2057  AbsAddrType addr;
2058 
2059  for (i = 0; i < uslist->num; i++) {
2060  en = NENCLOSE(uslist->us[i].target);
2061  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2062  addr = en->call_addr;
2063  offset = uslist->us[i].offset;
2064 
2065  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2066  }
2067  return 0;
2068 }
2069 #endif
2070 
2071 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2072 static int
2074 {
2075  int r = 0;
2076 
2077  switch (NTYPE(node)) {
2078  case NT_LIST:
2079  case NT_ALT:
2080  {
2081  int v;
2082  do {
2084  if (v > r) r = v;
2085  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2086  }
2087  break;
2088 
2089 #ifdef USE_SUBEXP_CALL
2090  case NT_CALL:
2091  if (IS_CALL_RECURSION(NCALL(node))) {
2092  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2093  }
2094  else
2095  r = quantifiers_memory_node_info(NCALL(node)->target);
2096  break;
2097 #endif
2098 
2099  case NT_QTFR:
2100  {
2101  QtfrNode* qn = NQTFR(node);
2102  if (qn->upper != 0) {
2104  }
2105  }
2106  break;
2107 
2108  case NT_ENCLOSE:
2109  {
2110  EncloseNode* en = NENCLOSE(node);
2111  switch (en->type) {
2112  case ENCLOSE_MEMORY:
2113  return NQ_TARGET_IS_EMPTY_MEM;
2114  break;
2115 
2116  case ENCLOSE_OPTION:
2118  case ENCLOSE_CONDITION:
2120  break;
2121  default:
2122  break;
2123  }
2124  }
2125  break;
2126 
2127  case NT_BREF:
2128  case NT_STR:
2129  case NT_CTYPE:
2130  case NT_CCLASS:
2131  case NT_CANY:
2132  case NT_ANCHOR:
2133  default:
2134  break;
2135  }
2136 
2137  return r;
2138 }
2139 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2140 
2141 static int
2143 {
2144  OnigDistance tmin;
2145  int r = 0;
2146 
2147  *min = 0;
2148  switch (NTYPE(node)) {
2149  case NT_BREF:
2150  {
2151  int i;
2152  int* backs;
2153  Node** nodes = SCANENV_MEM_NODES(env);
2154  BRefNode* br = NBREF(node);
2155  if (br->state & NST_RECURSION) break;
2156 
2157  backs = BACKREFS_P(br);
2158  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2159  r = get_min_match_length(nodes[backs[0]], min, env);
2160  if (r != 0) break;
2161  for (i = 1; i < br->back_num; i++) {
2162  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2163  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2164  if (r != 0) break;
2165  if (*min > tmin) *min = tmin;
2166  }
2167  }
2168  break;
2169 
2170 #ifdef USE_SUBEXP_CALL
2171  case NT_CALL:
2172  if (IS_CALL_RECURSION(NCALL(node))) {
2173  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2174  if (IS_ENCLOSE_MIN_FIXED(en))
2175  *min = en->min_len;
2176  }
2177  else
2178  r = get_min_match_length(NCALL(node)->target, min, env);
2179  break;
2180 #endif
2181 
2182  case NT_LIST:
2183  do {
2184  r = get_min_match_length(NCAR(node), &tmin, env);
2185  if (r == 0) *min += tmin;
2186  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2187  break;
2188 
2189  case NT_ALT:
2190  {
2191  Node *x, *y;
2192  y = node;
2193  do {
2194  x = NCAR(y);
2195  r = get_min_match_length(x, &tmin, env);
2196  if (r != 0) break;
2197  if (y == node) *min = tmin;
2198  else if (*min > tmin) *min = tmin;
2199  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2200  }
2201  break;
2202 
2203  case NT_STR:
2204  {
2205  StrNode* sn = NSTR(node);
2206  *min = sn->end - sn->s;
2207  }
2208  break;
2209 
2210  case NT_CTYPE:
2211  *min = 1;
2212  break;
2213 
2214  case NT_CCLASS:
2215  case NT_CANY:
2216  *min = 1;
2217  break;
2218 
2219  case NT_QTFR:
2220  {
2221  QtfrNode* qn = NQTFR(node);
2222 
2223  if (qn->lower > 0) {
2224  r = get_min_match_length(qn->target, min, env);
2225  if (r == 0)
2226  *min = distance_multiply(*min, qn->lower);
2227  }
2228  }
2229  break;
2230 
2231  case NT_ENCLOSE:
2232  {
2233  EncloseNode* en = NENCLOSE(node);
2234  switch (en->type) {
2235  case ENCLOSE_MEMORY:
2236 #ifdef USE_SUBEXP_CALL
2237  if (IS_ENCLOSE_MIN_FIXED(en))
2238  *min = en->min_len;
2239  else {
2240  r = get_min_match_length(en->target, min, env);
2241  if (r == 0) {
2242  en->min_len = *min;
2244  }
2245  }
2246  break;
2247 #endif
2248  case ENCLOSE_OPTION:
2250  case ENCLOSE_CONDITION:
2251  r = get_min_match_length(en->target, min, env);
2252  break;
2253  }
2254  }
2255  break;
2256 
2257  case NT_ANCHOR:
2258  default:
2259  break;
2260  }
2261 
2262  return r;
2263 }
2264 
2265 static int
2267 {
2268  OnigDistance tmax;
2269  int r = 0;
2270 
2271  *max = 0;
2272  switch (NTYPE(node)) {
2273  case NT_LIST:
2274  do {
2275  r = get_max_match_length(NCAR(node), &tmax, env);
2276  if (r == 0)
2277  *max = distance_add(*max, tmax);
2278  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2279  break;
2280 
2281  case NT_ALT:
2282  do {
2283  r = get_max_match_length(NCAR(node), &tmax, env);
2284  if (r == 0 && *max < tmax) *max = tmax;
2285  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2286  break;
2287 
2288  case NT_STR:
2289  {
2290  StrNode* sn = NSTR(node);
2291  *max = sn->end - sn->s;
2292  }
2293  break;
2294 
2295  case NT_CTYPE:
2296  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2297  break;
2298 
2299  case NT_CCLASS:
2300  case NT_CANY:
2301  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2302  break;
2303 
2304  case NT_BREF:
2305  {
2306  int i;
2307  int* backs;
2308  Node** nodes = SCANENV_MEM_NODES(env);
2309  BRefNode* br = NBREF(node);
2310  if (br->state & NST_RECURSION) {
2311  *max = ONIG_INFINITE_DISTANCE;
2312  break;
2313  }
2314  backs = BACKREFS_P(br);
2315  for (i = 0; i < br->back_num; i++) {
2316  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2317  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2318  if (r != 0) break;
2319  if (*max < tmax) *max = tmax;
2320  }
2321  }
2322  break;
2323 
2324 #ifdef USE_SUBEXP_CALL
2325  case NT_CALL:
2326  if (! IS_CALL_RECURSION(NCALL(node)))
2327  r = get_max_match_length(NCALL(node)->target, max, env);
2328  else
2329  *max = ONIG_INFINITE_DISTANCE;
2330  break;
2331 #endif
2332 
2333  case NT_QTFR:
2334  {
2335  QtfrNode* qn = NQTFR(node);
2336 
2337  if (qn->upper != 0) {
2338  r = get_max_match_length(qn->target, max, env);
2339  if (r == 0 && *max != 0) {
2340  if (! IS_REPEAT_INFINITE(qn->upper))
2341  *max = distance_multiply(*max, qn->upper);
2342  else
2343  *max = ONIG_INFINITE_DISTANCE;
2344  }
2345  }
2346  }
2347  break;
2348 
2349  case NT_ENCLOSE:
2350  {
2351  EncloseNode* en = NENCLOSE(node);
2352  switch (en->type) {
2353  case ENCLOSE_MEMORY:
2354 #ifdef USE_SUBEXP_CALL
2355  if (IS_ENCLOSE_MAX_FIXED(en))
2356  *max = en->max_len;
2357  else {
2358  r = get_max_match_length(en->target, max, env);
2359  if (r == 0) {
2360  en->max_len = *max;
2362  }
2363  }
2364  break;
2365 #endif
2366  case ENCLOSE_OPTION:
2368  case ENCLOSE_CONDITION:
2369  r = get_max_match_length(en->target, max, env);
2370  break;
2371  }
2372  }
2373  break;
2374 
2375  case NT_ANCHOR:
2376  default:
2377  break;
2378  }
2379 
2380  return r;
2381 }
2382 
2383 #define GET_CHAR_LEN_VARLEN -1
2384 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2385 
2386 /* fixed size pattern node only */
2387 static int
2388 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2389 {
2390  int tlen;
2391  int r = 0;
2392 
2393  level++;
2394  *len = 0;
2395  switch (NTYPE(node)) {
2396  case NT_LIST:
2397  do {
2398  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2399  if (r == 0)
2400  *len = (int )distance_add(*len, tlen);
2401  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2402  break;
2403 
2404  case NT_ALT:
2405  {
2406  int tlen2;
2407  int varlen = 0;
2408 
2409  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2410  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2411  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2412  if (r == 0) {
2413  if (tlen != tlen2)
2414  varlen = 1;
2415  }
2416  }
2417  if (r == 0) {
2418  if (varlen != 0) {
2419  if (level == 1)
2421  else
2422  r = GET_CHAR_LEN_VARLEN;
2423  }
2424  else
2425  *len = tlen;
2426  }
2427  }
2428  break;
2429 
2430  case NT_STR:
2431  {
2432  StrNode* sn = NSTR(node);
2433  UChar *s = sn->s;
2434  while (s < sn->end) {
2435  s += enclen(reg->enc, s, sn->end);
2436  (*len)++;
2437  }
2438  }
2439  break;
2440 
2441  case NT_QTFR:
2442  {
2443  QtfrNode* qn = NQTFR(node);
2444  if (qn->lower == qn->upper) {
2445  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2446  if (r == 0)
2447  *len = (int )distance_multiply(tlen, qn->lower);
2448  }
2449  else
2450  r = GET_CHAR_LEN_VARLEN;
2451  }
2452  break;
2453 
2454 #ifdef USE_SUBEXP_CALL
2455  case NT_CALL:
2456  if (! IS_CALL_RECURSION(NCALL(node)))
2457  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2458  else
2459  r = GET_CHAR_LEN_VARLEN;
2460  break;
2461 #endif
2462 
2463  case NT_CTYPE:
2464  *len = 1;
2465  break;
2466 
2467  case NT_CCLASS:
2468  case NT_CANY:
2469  *len = 1;
2470  break;
2471 
2472  case NT_ENCLOSE:
2473  {
2474  EncloseNode* en = NENCLOSE(node);
2475  switch (en->type) {
2476  case ENCLOSE_MEMORY:
2477 #ifdef USE_SUBEXP_CALL
2478  if (IS_ENCLOSE_CLEN_FIXED(en))
2479  *len = en->char_len;
2480  else {
2481  r = get_char_length_tree1(en->target, reg, len, level);
2482  if (r == 0) {
2483  en->char_len = *len;
2485  }
2486  }
2487  break;
2488 #endif
2489  case ENCLOSE_OPTION:
2491  case ENCLOSE_CONDITION:
2492  r = get_char_length_tree1(en->target, reg, len, level);
2493  break;
2494  default:
2495  break;
2496  }
2497  }
2498  break;
2499 
2500  case NT_ANCHOR:
2501  break;
2502 
2503  default:
2504  r = GET_CHAR_LEN_VARLEN;
2505  break;
2506  }
2507 
2508  return r;
2509 }
2510 
2511 static int
2512 get_char_length_tree(Node* node, regex_t* reg, int* len)
2513 {
2514  return get_char_length_tree1(node, reg, len, 0);
2515 }
2516 
2517 /* x is not included y ==> 1 : 0 */
2518 static int
2520 {
2521  int i;
2522  OnigDistance len;
2523  OnigCodePoint code;
2524  UChar *p;
2525  int ytype;
2526 
2527  retry:
2528  ytype = NTYPE(y);
2529  switch (NTYPE(x)) {
2530  case NT_CTYPE:
2531  {
2532  switch (ytype) {
2533  case NT_CTYPE:
2534  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2535  NCTYPE(y)->not != NCTYPE(x)->not &&
2536  NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2537  return 1;
2538  else
2539  return 0;
2540  break;
2541 
2542  case NT_CCLASS:
2543  swap:
2544  {
2545  Node* tmp;
2546  tmp = x; x = y; y = tmp;
2547  goto retry;
2548  }
2549  break;
2550 
2551  case NT_STR:
2552  goto swap;
2553  break;
2554 
2555  default:
2556  break;
2557  }
2558  }
2559  break;
2560 
2561  case NT_CCLASS:
2562  {
2563  CClassNode* xc = NCCLASS(x);
2564  switch (ytype) {
2565  case NT_CTYPE:
2566  switch (NCTYPE(y)->ctype) {
2567  case ONIGENC_CTYPE_WORD:
2568  if (NCTYPE(y)->not == 0) {
2569  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2570  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2571  if (BITSET_AT(xc->bs, i)) {
2572  if (NCTYPE(y)->ascii_range) {
2573  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2574  }
2575  else {
2576  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2577  }
2578  }
2579  }
2580  return 1;
2581  }
2582  return 0;
2583  }
2584  else {
2585  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2586  int is_word;
2587  if (NCTYPE(y)->ascii_range)
2588  is_word = IS_CODE_SB_WORD(reg->enc, i);
2589  else
2590  is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2591  if (! is_word) {
2592  if (!IS_NCCLASS_NOT(xc)) {
2593  if (BITSET_AT(xc->bs, i))
2594  return 0;
2595  }
2596  else {
2597  if (! BITSET_AT(xc->bs, i))
2598  return 0;
2599  }
2600  }
2601  }
2602  return 1;
2603  }
2604  break;
2605 
2606  default:
2607  break;
2608  }
2609  break;
2610 
2611  case NT_CCLASS:
2612  {
2613  int v;
2614  CClassNode* yc = NCCLASS(y);
2615 
2616  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2617  v = BITSET_AT(xc->bs, i);
2618  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2619  (v == 0 && IS_NCCLASS_NOT(xc))) {
2620  v = BITSET_AT(yc->bs, i);
2621  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2622  (v == 0 && IS_NCCLASS_NOT(yc)))
2623  return 0;
2624  }
2625  }
2626  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2627  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2628  return 1;
2629  return 0;
2630  }
2631  break;
2632 
2633  case NT_STR:
2634  goto swap;
2635  break;
2636 
2637  default:
2638  break;
2639  }
2640  }
2641  break;
2642 
2643  case NT_STR:
2644  {
2645  StrNode* xs = NSTR(x);
2646  if (NSTRING_LEN(x) == 0)
2647  break;
2648 
2649  switch (ytype) {
2650  case NT_CTYPE:
2651  switch (NCTYPE(y)->ctype) {
2652  case ONIGENC_CTYPE_WORD:
2653  if (NCTYPE(y)->ascii_range) {
2654  if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2655  return NCTYPE(y)->not;
2656  else
2657  return !(NCTYPE(y)->not);
2658  }
2659  else {
2660  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2661  return NCTYPE(y)->not;
2662  else
2663  return !(NCTYPE(y)->not);
2664  }
2665  break;
2666  default:
2667  break;
2668  }
2669  break;
2670 
2671  case NT_CCLASS:
2672  {
2673  CClassNode* cc = NCCLASS(y);
2674 
2675  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2676  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2677  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2678  }
2679  break;
2680 
2681  case NT_STR:
2682  {
2683  UChar *q;
2684  StrNode* ys = NSTR(y);
2685  len = NSTRING_LEN(x);
2686  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2687  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2688  /* tiny version */
2689  return 0;
2690  }
2691  else {
2692  for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2693  if (*p != *q) return 1;
2694  }
2695  }
2696  }
2697  break;
2698 
2699  default:
2700  break;
2701  }
2702  }
2703  break;
2704 
2705  default:
2706  break;
2707  }
2708 
2709  return 0;
2710 }
2711 
2712 static Node*
2713 get_head_value_node(Node* node, int exact, regex_t* reg)
2714 {
2715  Node* n = NULL_NODE;
2716 
2717  switch (NTYPE(node)) {
2718  case NT_BREF:
2719  case NT_ALT:
2720  case NT_CANY:
2721 #ifdef USE_SUBEXP_CALL
2722  case NT_CALL:
2723 #endif
2724  break;
2725 
2726  case NT_CTYPE:
2727  case NT_CCLASS:
2728  if (exact == 0) {
2729  n = node;
2730  }
2731  break;
2732 
2733  case NT_LIST:
2734  n = get_head_value_node(NCAR(node), exact, reg);
2735  break;
2736 
2737  case NT_STR:
2738  {
2739  StrNode* sn = NSTR(node);
2740 
2741  if (sn->end <= sn->s)
2742  break;
2743 
2744  if (exact != 0 &&
2745  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2746  }
2747  else {
2748  n = node;
2749  }
2750  }
2751  break;
2752 
2753  case NT_QTFR:
2754  {
2755  QtfrNode* qn = NQTFR(node);
2756  if (qn->lower > 0) {
2757  if (IS_NOT_NULL(qn->head_exact))
2758  n = qn->head_exact;
2759  else
2760  n = get_head_value_node(qn->target, exact, reg);
2761  }
2762  }
2763  break;
2764 
2765  case NT_ENCLOSE:
2766  {
2767  EncloseNode* en = NENCLOSE(node);
2768  switch (en->type) {
2769  case ENCLOSE_OPTION:
2770  {
2772 
2773  reg->options = NENCLOSE(node)->option;
2774  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2775  reg->options = options;
2776  }
2777  break;
2778 
2779  case ENCLOSE_MEMORY:
2781  case ENCLOSE_CONDITION:
2782  n = get_head_value_node(en->target, exact, reg);
2783  break;
2784  }
2785  }
2786  break;
2787 
2788  case NT_ANCHOR:
2789  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2790  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2791  break;
2792 
2793  default:
2794  break;
2795  }
2796 
2797  return n;
2798 }
2799 
2800 static int
2801 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2802 {
2803  int type, r = 0;
2804 
2805  type = NTYPE(node);
2806  if ((NTYPE2BIT(type) & type_mask) == 0)
2807  return 1;
2808 
2809  switch (type) {
2810  case NT_LIST:
2811  case NT_ALT:
2812  do {
2813  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2814  anchor_mask);
2815  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2816  break;
2817 
2818  case NT_QTFR:
2819  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2820  anchor_mask);
2821  break;
2822 
2823  case NT_ENCLOSE:
2824  {
2825  EncloseNode* en = NENCLOSE(node);
2826  if ((en->type & enclose_mask) == 0)
2827  return 1;
2828 
2829  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2830  }
2831  break;
2832 
2833  case NT_ANCHOR:
2834  type = NANCHOR(node)->type;
2835  if ((type & anchor_mask) == 0)
2836  return 1;
2837 
2838  if (NANCHOR(node)->target)
2839  r = check_type_tree(NANCHOR(node)->target,
2840  type_mask, enclose_mask, anchor_mask);
2841  break;
2842 
2843  default:
2844  break;
2845  }
2846  return r;
2847 }
2848 
2849 #ifdef USE_SUBEXP_CALL
2850 
2851 #define RECURSION_EXIST 1
2852 #define RECURSION_INFINITE 2
2853 
2854 static int
2856 {
2857  int type;
2858  int r = 0;
2859 
2860  type = NTYPE(node);
2861  switch (type) {
2862  case NT_LIST:
2863  {
2864  Node *x;
2865  OnigDistance min;
2866  int ret;
2867 
2868  x = node;
2869  do {
2870  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2871  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2872  r |= ret;
2873  if (head) {
2874  ret = get_min_match_length(NCAR(x), &min, env);
2875  if (ret != 0) return ret;
2876  if (min != 0) head = 0;
2877  }
2878  } while (IS_NOT_NULL(x = NCDR(x)));
2879  }
2880  break;
2881 
2882  case NT_ALT:
2883  {
2884  int ret;
2885  r = RECURSION_EXIST;
2886  do {
2887  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2888  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2889  r &= ret;
2890  } while (IS_NOT_NULL(node = NCDR(node)));
2891  }
2892  break;
2893 
2894  case NT_QTFR:
2895  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2896  if (r == RECURSION_EXIST) {
2897  if (NQTFR(node)->lower == 0) r = 0;
2898  }
2899  break;
2900 
2901  case NT_ANCHOR:
2902  {
2903  AnchorNode* an = NANCHOR(node);
2904  switch (an->type) {
2905  case ANCHOR_PREC_READ:
2906  case ANCHOR_PREC_READ_NOT:
2907  case ANCHOR_LOOK_BEHIND:
2909  r = subexp_inf_recursive_check(an->target, env, head);
2910  break;
2911  }
2912  }
2913  break;
2914 
2915  case NT_CALL:
2916  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2917  break;
2918 
2919  case NT_ENCLOSE:
2920  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2921  return 0;
2922  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2923  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2924  else {
2926  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2928  }
2929  break;
2930 
2931  default:
2932  break;
2933  }
2934 
2935  return r;
2936 }
2937 
2938 static int
2940 {
2941  int type;
2942  int r = 0;
2943 
2944  type = NTYPE(node);
2945  switch (type) {
2946  case NT_LIST:
2947  case NT_ALT:
2948  do {
2949  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2950  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2951  break;
2952 
2953  case NT_QTFR:
2954  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2955  break;
2956 
2957  case NT_ANCHOR:
2958  {
2959  AnchorNode* an = NANCHOR(node);
2960  switch (an->type) {
2961  case ANCHOR_PREC_READ:
2962  case ANCHOR_PREC_READ_NOT:
2963  case ANCHOR_LOOK_BEHIND:
2966  break;
2967  }
2968  }
2969  break;
2970 
2971  case NT_ENCLOSE:
2972  {
2973  EncloseNode* en = NENCLOSE(node);
2974 
2975  if (IS_ENCLOSE_RECURSION(en)) {
2977  r = subexp_inf_recursive_check(en->target, env, 1);
2978  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2980  }
2982  }
2983 
2984  break;
2985 
2986  default:
2987  break;
2988  }
2989 
2990  return r;
2991 }
2992 
2993 static int
2995 {
2996  int r = 0;
2997 
2998  switch (NTYPE(node)) {
2999  case NT_LIST:
3000  case NT_ALT:
3001  do {
3002  r |= subexp_recursive_check(NCAR(node));
3003  } while (IS_NOT_NULL(node = NCDR(node)));
3004  break;
3005 
3006  case NT_QTFR:
3007  r = subexp_recursive_check(NQTFR(node)->target);
3008  break;
3009 
3010  case NT_ANCHOR:
3011  {
3012  AnchorNode* an = NANCHOR(node);
3013  switch (an->type) {
3014  case ANCHOR_PREC_READ:
3015  case ANCHOR_PREC_READ_NOT:
3016  case ANCHOR_LOOK_BEHIND:
3018  r = subexp_recursive_check(an->target);
3019  break;
3020  }
3021  }
3022  break;
3023 
3024  case NT_CALL:
3025  r = subexp_recursive_check(NCALL(node)->target);
3026  if (r != 0) SET_CALL_RECURSION(node);
3027  break;
3028 
3029  case NT_ENCLOSE:
3030  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3031  return 0;
3032  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3033  return 1; /* recursion */
3034  else {
3036  r = subexp_recursive_check(NENCLOSE(node)->target);
3038  }
3039  break;
3040 
3041  default:
3042  break;
3043  }
3044 
3045  return r;
3046 }
3047 
3048 
3049 static int
3051 {
3052 #define FOUND_CALLED_NODE 1
3053 
3054  int type;
3055  int r = 0;
3056 
3057  type = NTYPE(node);
3058  switch (type) {
3059  case NT_LIST:
3060  case NT_ALT:
3061  {
3062  int ret;
3063  do {
3064  ret = subexp_recursive_check_trav(NCAR(node), env);
3065  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3066  else if (ret < 0) return ret;
3067  } while (IS_NOT_NULL(node = NCDR(node)));
3068  }
3069  break;
3070 
3071  case NT_QTFR:
3072  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3073  if (NQTFR(node)->upper == 0) {
3074  if (r == FOUND_CALLED_NODE)
3075  NQTFR(node)->is_refered = 1;
3076  }
3077  break;
3078 
3079  case NT_ANCHOR:
3080  {
3081  AnchorNode* an = NANCHOR(node);
3082  switch (an->type) {
3083  case ANCHOR_PREC_READ:
3084  case ANCHOR_PREC_READ_NOT:
3085  case ANCHOR_LOOK_BEHIND:
3087  r = subexp_recursive_check_trav(an->target, env);
3088  break;
3089  }
3090  }
3091  break;
3092 
3093  case NT_ENCLOSE:
3094  {
3095  EncloseNode* en = NENCLOSE(node);
3096 
3097  if (! IS_ENCLOSE_RECURSION(en)) {
3098  if (IS_ENCLOSE_CALLED(en)) {
3100  r = subexp_recursive_check(en->target);
3101  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3103  }
3104  }
3105  r = subexp_recursive_check_trav(en->target, env);
3106  if (IS_ENCLOSE_CALLED(en))
3107  r |= FOUND_CALLED_NODE;
3108  }
3109  break;
3110 
3111  default:
3112  break;
3113  }
3114 
3115  return r;
3116 }
3117 
3118 static int
3120 {
3121  int type;
3122  int r = 0;
3123 
3124  type = NTYPE(node);
3125  switch (type) {
3126  case NT_LIST:
3127  do {
3128  r = setup_subexp_call(NCAR(node), env);
3129  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3130  break;
3131 
3132  case NT_ALT:
3133  do {
3134  r = setup_subexp_call(NCAR(node), env);
3135  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3136  break;
3137 
3138  case NT_QTFR:
3139  r = setup_subexp_call(NQTFR(node)->target, env);
3140  break;
3141  case NT_ENCLOSE:
3142  r = setup_subexp_call(NENCLOSE(node)->target, env);
3143  break;
3144 
3145  case NT_CALL:
3146  {
3147  CallNode* cn = NCALL(node);
3148  Node** nodes = SCANENV_MEM_NODES(env);
3149 
3150  if (cn->group_num != 0) {
3151  int gnum = cn->group_num;
3152 
3153 #ifdef USE_NAMED_GROUP
3154  if (env->num_named > 0 &&
3158  }
3159 #endif
3160  if (gnum > env->num_mem) {
3164  }
3165 
3166 #ifdef USE_NAMED_GROUP
3167  set_call_attr:
3168 #endif
3169  cn->target = nodes[cn->group_num];
3170  if (IS_NULL(cn->target)) {
3174  }
3177  cn->unset_addr_list = env->unset_addr_list;
3178  }
3179 #ifdef USE_NAMED_GROUP
3180 #ifdef USE_PERL_SUBEXP_CALL
3181  else if (cn->name == cn->name_end) {
3182  goto set_call_attr;
3183  }
3184 #endif
3185  else {
3186  int *refs;
3187 
3188  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3189  &refs);
3190  if (n <= 0) {
3194  }
3195  else if (n > 1 &&
3200  }
3201  else {
3202  cn->group_num = refs[0];
3203  goto set_call_attr;
3204  }
3205  }
3206 #endif
3207  }
3208  break;
3209 
3210  case NT_ANCHOR:
3211  {
3212  AnchorNode* an = NANCHOR(node);
3213 
3214  switch (an->type) {
3215  case ANCHOR_PREC_READ:
3216  case ANCHOR_PREC_READ_NOT:
3217  case ANCHOR_LOOK_BEHIND:
3219  r = setup_subexp_call(an->target, env);
3220  break;
3221  }
3222  }
3223  break;
3224 
3225  default:
3226  break;
3227  }
3228 
3229  return r;
3230 }
3231 #endif
3232 
3233 /* divide different length alternatives in look-behind.
3234  (?<=A|B) ==> (?<=A)|(?<=B)
3235  (?<!A|B) ==> (?<!A)(?<!B)
3236 */
3237 static int
3239 {
3240  Node *head, *np, *insert_node;
3241  AnchorNode* an = NANCHOR(node);
3242  int anc_type = an->type;
3243 
3244  head = an->target;
3245  np = NCAR(head);
3246  swap_node(node, head);
3247  NCAR(node) = head;
3248  NANCHOR(head)->target = np;
3249 
3250  np = node;
3251  while ((np = NCDR(np)) != NULL_NODE) {
3252  insert_node = onig_node_new_anchor(anc_type);
3253  CHECK_NULL_RETURN_MEMERR(insert_node);
3254  NANCHOR(insert_node)->target = NCAR(np);
3255  NCAR(np) = insert_node;
3256  }
3257 
3258  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3259  np = node;
3260  do {
3261  SET_NTYPE(np, NT_LIST); /* alt -> list */
3262  } while ((np = NCDR(np)) != NULL_NODE);
3263  }
3264  return 0;
3265 }
3266 
3267 static int
3269 {
3270  int r, len;
3271  AnchorNode* an = NANCHOR(node);
3272 
3273  r = get_char_length_tree(an->target, reg, &len);
3274  if (r == 0)
3275  an->char_len = len;
3276  else if (r == GET_CHAR_LEN_VARLEN)
3278  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3281  else
3283  }
3284 
3285  return r;
3286 }
3287 
3288 static int
3289 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
3290 {
3291  int type;
3292 
3293  retry:
3294  type = NTYPE(node);
3295  if (type == NT_QTFR) {
3296  QtfrNode* qn = NQTFR(node);
3297  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3298 #ifdef USE_QTFR_PEEK_NEXT
3299  Node* n = get_head_value_node(next_node, 1, reg);
3300  /* '\0': for UTF-16BE etc... */
3301  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3302  qn->next_head_exact = n;
3303  }
3304 #endif
3305  /* automatic possessivation a*b ==> (?>a*)b */
3306  if (qn->lower <= 1) {
3307  int ttype = NTYPE(qn->target);
3308  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3309  Node *x, *y;
3310  x = get_head_value_node(qn->target, 0, reg);
3311  if (IS_NOT_NULL(x)) {
3312  y = get_head_value_node(next_node, 0, reg);
3313  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3317  swap_node(node, en);
3318  NENCLOSE(node)->target = en;
3319  }
3320  }
3321  }
3322  }
3323 
3324 #ifndef ONIG_DONT_OPTIMIZE
3325  if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
3326  in_root && /* qn->lower == 0 && */
3327  NTYPE(qn->target) == NT_CANY &&
3328  ! IS_MULTILINE(reg->options)) {
3329  /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
3330  Node *np;
3333  swap_node(node, np);
3334  NCDR(node) = onig_node_new_list(np, NULL_NODE);
3335  if (IS_NULL(NCDR(node))) {
3336  onig_node_free(np);
3337  return ONIGERR_MEMORY;
3338  }
3339  np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
3341  NCAR(node) = np;
3342  }
3343 #endif
3344  }
3345  }
3346  else if (type == NT_ENCLOSE) {
3347  EncloseNode* en = NENCLOSE(node);
3348  in_root = 0;
3349  if (en->type == ENCLOSE_MEMORY) {
3350  node = en->target;
3351  goto retry;
3352  }
3353  }
3354  return 0;
3355 }
3356 
3357 
3358 static int
3360 {
3362  UChar *sbuf, *ebuf, *sp;
3363  int r, i, len;
3364  OnigDistance sbuf_size;
3365  StrNode* sn = NSTR(node);
3366 
3367  end = sn->end;
3368  sbuf_size = (end - sn->s) * 2;
3369  sbuf = (UChar* )xmalloc(sbuf_size);
3371  ebuf = sbuf + sbuf_size;
3372 
3373  sp = sbuf;
3374  p = sn->s;
3375  while (p < end) {
3376  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3377  for (i = 0; i < len; i++) {
3378  if (sp >= ebuf) {
3379  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3380  if (IS_NULL(p)) {
3381  xfree(sbuf);
3382  return ONIGERR_MEMORY;
3383  }
3384  sbuf = p;
3385  sp = sbuf + sbuf_size;
3386  sbuf_size *= 2;
3387  ebuf = sbuf + sbuf_size;
3388  }
3389 
3390  *sp++ = buf[i];
3391  }
3392  }
3393 
3394  r = onig_node_str_set(node, sbuf, sp);
3395  if (r != 0) {
3396  xfree(sbuf);
3397  return r;
3398  }
3399 
3400  xfree(sbuf);
3401  return 0;
3402 }
3403 
3404 static int
3406  regex_t* reg)
3407 {
3408  int r;
3409  Node *node;
3410 
3411  node = onig_node_new_str(s, end);
3412  if (IS_NULL(node)) return ONIGERR_MEMORY;
3413 
3414  r = update_string_node_case_fold(reg, node);
3415  if (r != 0) {
3416  onig_node_free(node);
3417  return r;
3418  }
3419 
3420  NSTRING_SET_AMBIG(node);
3422  *rnode = node;
3423  return 0;
3424 }
3425 
3426 static int
3428  UChar *p, int slen, UChar *end,
3429  regex_t* reg, Node **rnode)
3430 {
3431  int r, i, j, len, varlen, varclen;
3432  Node *anode, *var_anode, *snode, *xnode, *an;
3434 
3435  *rnode = var_anode = NULL_NODE;
3436 
3437  varlen = 0;
3438  varclen = 0;
3439  for (i = 0; i < item_num; i++) {
3440  if (items[i].byte_len != slen) {
3441  varlen = 1;
3442  break;
3443  }
3444  if (items[i].code_len != 1) {
3445  varclen = 1;
3446  }
3447  }
3448 
3449  if (varlen != 0) {
3450  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3451  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3452 
3453  xnode = onig_node_new_list(NULL, NULL);
3454  if (IS_NULL(xnode)) goto mem_err;
3455  NCAR(var_anode) = xnode;
3456 
3458  if (IS_NULL(anode)) goto mem_err;
3459  NCAR(xnode) = anode;
3460  }
3461  else {
3462  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3463  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3464  }
3465 
3466  snode = onig_node_new_str(p, p + slen);
3467  if (IS_NULL(snode)) goto mem_err;
3468 
3469  NCAR(anode) = snode;
3470 
3471  for (i = 0; i < item_num; i++) {
3472  snode = onig_node_new_str(NULL, NULL);
3473  if (IS_NULL(snode)) goto mem_err;
3474 
3475  for (j = 0; j < items[i].code_len; j++) {
3476  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3477  if (len < 0) {
3478  r = len;
3479  goto mem_err2;
3480  }
3481 
3482  r = onig_node_str_cat(snode, buf, buf + len);
3483  if (r != 0) goto mem_err2;
3484  }
3485 
3487  if (IS_NULL(an)) {
3488  goto mem_err2;
3489  }
3490 
3491  if (items[i].byte_len != slen) {
3492  Node *rem;
3493  UChar *q = p + items[i].byte_len;
3494 
3495  if (q < end) {
3496  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3497  if (r != 0) {
3498  onig_node_free(an);
3499  goto mem_err2;
3500  }
3501 
3502  xnode = onig_node_list_add(NULL_NODE, snode);
3503  if (IS_NULL(xnode)) {
3504  onig_node_free(an);
3505  onig_node_free(rem);
3506  goto mem_err2;
3507  }
3508  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3509  onig_node_free(an);
3510  onig_node_free(xnode);
3511  onig_node_free(rem);
3512  goto mem_err;
3513  }
3514 
3515  NCAR(an) = xnode;
3516  }
3517  else {
3518  NCAR(an) = snode;
3519  }
3520 
3521  NCDR(var_anode) = an;
3522  var_anode = an;
3523  }
3524  else {
3525  NCAR(an) = snode;
3526  NCDR(anode) = an;
3527  anode = an;
3528  }
3529  }
3530 
3531  if (varclen && !varlen)
3532  return 2;
3533  return varlen;
3534 
3535  mem_err2:
3536  onig_node_free(snode);
3537 
3538  mem_err:
3539  onig_node_free(*rnode);
3540 
3541  return ONIGERR_MEMORY;
3542 }
3543 
3544 static int
3546 {
3547 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3548 
3549  int r, n, len, alt_num;
3550  int varlen = 0;
3551  UChar *start, *end, *p;
3552  Node *top_root, *root, *snode, *prev_node;
3554  StrNode* sn = NSTR(node);
3555 
3556  if (NSTRING_IS_AMBIG(node)) return 0;
3557 
3558  start = sn->s;
3559  end = sn->end;
3560  if (start >= end) return 0;
3561 
3562  r = 0;
3563  top_root = root = prev_node = snode = NULL_NODE;
3564  alt_num = 1;
3565  p = start;
3566  while (p < end) {
3568  p, end, items);
3569  if (n < 0) {
3570  r = n;
3571  goto err;
3572  }
3573 
3574  len = enclen(reg->enc, p, end);
3575 
3576  if (n == 0) {
3577  if (IS_NULL(snode)) {
3578  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3579  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3580  if (IS_NULL(root)) {
3581  onig_node_free(prev_node);
3582  goto mem_err;
3583  }
3584  }
3585 
3586  prev_node = snode = onig_node_new_str(NULL, NULL);
3587  if (IS_NULL(snode)) goto mem_err;
3588  if (IS_NOT_NULL(root)) {
3589  if (IS_NULL(onig_node_list_add(root, snode))) {
3590  onig_node_free(snode);
3591  goto mem_err;
3592  }
3593  }
3594  }
3595 
3596  r = onig_node_str_cat(snode, p, p + len);
3597  if (r != 0) goto err;
3598  }
3599  else {
3600  alt_num *= (n + 1);
3601  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3602 
3603  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3604  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3605  if (IS_NULL(root)) {
3606  onig_node_free(prev_node);
3607  goto mem_err;
3608  }
3609  }
3610 
3611  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3612  if (r < 0) goto mem_err;
3613  if (r > 0) varlen = 1;
3614  if (r == 1) {
3615  if (IS_NULL(root)) {
3616  top_root = prev_node;
3617  }
3618  else {
3619  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3620  onig_node_free(prev_node);
3621  goto mem_err;
3622  }
3623  }
3624 
3625  root = NCAR(prev_node);
3626  }
3627  else { /* r == 0 || r == 2 */
3628  if (IS_NOT_NULL(root)) {
3629  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3630  onig_node_free(prev_node);
3631  goto mem_err;
3632  }
3633  }
3634  }
3635 
3636  snode = NULL_NODE;
3637  }
3638 
3639  p += len;
3640  }
3641 
3642  if (p < end) {
3643  Node *srem;
3644 
3645  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3646  if (r != 0) goto mem_err;
3647 
3648  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3649  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3650  if (IS_NULL(root)) {
3651  onig_node_free(srem);
3652  onig_node_free(prev_node);
3653  goto mem_err;
3654  }
3655  }
3656 
3657  if (IS_NULL(root)) {
3658  prev_node = srem;
3659  }
3660  else {
3661  if (IS_NULL(onig_node_list_add(root, srem))) {
3662  onig_node_free(srem);
3663  goto mem_err;
3664  }
3665  }
3666  }
3667 
3668  /* ending */
3669  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3670  if (!varlen) {
3671  /* When all expanded strings are same length, case-insensitive
3672  BM search will be used. */
3673  r = update_string_node_case_fold(reg, node);
3674  if (r == 0) {
3675  NSTRING_SET_AMBIG(node);
3676  }
3677  }
3678  else {
3679  swap_node(node, top_root);
3680  r = 0;
3681  }
3682  onig_node_free(top_root);
3683  return r;
3684 
3685  mem_err:
3686  r = ONIGERR_MEMORY;
3687 
3688  err:
3689  onig_node_free(top_root);
3690  return r;
3691 }
3692 
3693 
3694 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3695 
3696 #define CEC_THRES_NUM_BIG_REPEAT 512
3697 #define CEC_INFINITE_NUM 0x7fffffff
3698 
3699 #define CEC_IN_INFINITE_REPEAT (1<<0)
3700 #define CEC_IN_FINITE_REPEAT (1<<1)
3701 #define CEC_CONT_BIG_REPEAT (1<<2)
3702 
3703 static int
3704 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3705 {
3706  int type;
3707  int r = state;
3708 
3709  type = NTYPE(node);
3710  switch (type) {
3711  case NT_LIST:
3712  {
3713  Node* prev = NULL_NODE;
3714  do {
3715  r = setup_comb_exp_check(NCAR(node), r, env);
3716  prev = NCAR(node);
3717  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3718  }
3719  break;
3720 
3721  case NT_ALT:
3722  {
3723  int ret;
3724  do {
3725  ret = setup_comb_exp_check(NCAR(node), state, env);
3726  r |= ret;
3727  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3728  }
3729  break;
3730 
3731  case NT_QTFR:
3732  {
3733  int child_state = state;
3734  int add_state = 0;
3735  QtfrNode* qn = NQTFR(node);
3736  Node* target = qn->target;
3737  int var_num;
3738 
3739  if (! IS_REPEAT_INFINITE(qn->upper)) {
3740  if (qn->upper > 1) {
3741  /* {0,1}, {1,1} are allowed */
3742  child_state |= CEC_IN_FINITE_REPEAT;
3743 
3744  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3745  if (env->backrefed_mem == 0) {
3746  if (NTYPE(qn->target) == NT_ENCLOSE) {
3747  EncloseNode* en = NENCLOSE(qn->target);
3748  if (en->type == ENCLOSE_MEMORY) {
3749  if (NTYPE(en->target) == NT_QTFR) {
3750  QtfrNode* q = NQTFR(en->target);
3751  if (IS_REPEAT_INFINITE(q->upper)
3752  && q->greedy == qn->greedy) {
3753  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3754  if (qn->upper == 1)
3755  child_state = state;
3756  }
3757  }
3758  }
3759  }
3760  }
3761  }
3762  }
3763 
3764  if (state & CEC_IN_FINITE_REPEAT) {
3765  qn->comb_exp_check_num = -1;
3766  }
3767  else {
3768  if (IS_REPEAT_INFINITE(qn->upper)) {
3769  var_num = CEC_INFINITE_NUM;
3770  child_state |= CEC_IN_INFINITE_REPEAT;
3771  }
3772  else {
3773  var_num = qn->upper - qn->lower;
3774  }
3775 
3776  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3777  add_state |= CEC_CONT_BIG_REPEAT;
3778 
3779  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3780  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3781  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3782  if (qn->comb_exp_check_num == 0) {
3783  env->num_comb_exp_check++;
3784  qn->comb_exp_check_num = env->num_comb_exp_check;
3785  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3786  env->comb_exp_max_regnum = env->curr_max_regnum;
3787  }
3788  }
3789  }
3790 
3791  r = setup_comb_exp_check(target, child_state, env);
3792  r |= add_state;
3793  }
3794  break;
3795 
3796  case NT_ENCLOSE:
3797  {
3798  EncloseNode* en = NENCLOSE(node);
3799 
3800  switch (en->type) {
3801  case ENCLOSE_MEMORY:
3802  {
3803  if (env->curr_max_regnum < en->regnum)
3804  env->curr_max_regnum = en->regnum;
3805 
3806  r = setup_comb_exp_check(en->target, state, env);
3807  }
3808  break;
3809 
3810  default:
3811  r = setup_comb_exp_check(en->target, state, env);
3812  break;
3813  }
3814  }
3815  break;
3816 
3817 #ifdef USE_SUBEXP_CALL
3818  case NT_CALL:
3819  if (IS_CALL_RECURSION(NCALL(node)))
3820  env->has_recursion = 1;
3821  else
3822  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3823  break;
3824 #endif
3825 
3826  default:
3827  break;
3828  }
3829 
3830  return r;
3831 }
3832 #endif
3833 
3834 #define IN_ALT (1<<0)
3835 #define IN_NOT (1<<1)
3836 #define IN_REPEAT (1<<2)
3837 #define IN_VAR_REPEAT (1<<3)
3838 #define IN_ROOT (1<<4)
3839 
3840 /* setup_tree does the following work.
3841  1. check empty loop. (set qn->target_empty_info)
3842  2. expand ignore-case in char class.
3843  3. set memory status bit flags. (reg->mem_stats)
3844  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3845  5. find invalid patterns in look-behind.
3846  6. expand repeated string.
3847  */
3848 static int
3849 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3850 {
3851  int type;
3852  int r = 0;
3853  int in_root = state & IN_ROOT;
3854 
3855  state &= ~IN_ROOT;
3856 restart:
3857  type = NTYPE(node);
3858  switch (type) {
3859  case NT_LIST:
3860  {
3861  Node* prev = NULL_NODE;
3862  int prev_in_root = 0;
3863  state |= in_root;
3864  do {
3865  r = setup_tree(NCAR(node), reg, state, env);
3866  if (IS_NOT_NULL(prev) && r == 0) {
3867  r = next_setup(prev, NCAR(node), prev_in_root, reg);
3868  }
3869  prev = NCAR(node);
3870  prev_in_root = state & IN_ROOT;
3871  state &= ~IN_ROOT;
3872  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3873  }
3874  break;
3875 
3876  case NT_ALT:
3877  do {
3878  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3879  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3880  break;
3881 
3882  case NT_CCLASS:
3883  break;
3884 
3885  case NT_STR:
3886  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3887  r = expand_case_fold_string(node, reg);
3888  }
3889  break;
3890 
3891  case NT_CTYPE:
3892  case NT_CANY:
3893  break;
3894 
3895 #ifdef USE_SUBEXP_CALL
3896  case NT_CALL:
3897  break;
3898 #endif
3899 
3900  case NT_BREF:
3901  {
3902  int i;
3903  int* p;
3904  Node** nodes = SCANENV_MEM_NODES(env);
3905  BRefNode* br = NBREF(node);
3906  p = BACKREFS_P(br);
3907  for (i = 0; i < br->back_num; i++) {
3908  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3909  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3910  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3911 #ifdef USE_BACKREF_WITH_LEVEL
3912  if (IS_BACKREF_NEST_LEVEL(br)) {
3913  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3914  }
3915 #endif
3916  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3917  }
3918  }
3919  break;
3920 
3921  case NT_QTFR:
3922  {
3923  OnigDistance d;
3924  QtfrNode* qn = NQTFR(node);
3925  Node* target = qn->target;
3926 
3927  if ((state & IN_REPEAT) != 0) {
3928  qn->state |= NST_IN_REPEAT;
3929  }
3930 
3931  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3932  r = get_min_match_length(target, &d, env);
3933  if (r) break;
3934  if (d == 0) {
3936 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3937  r = quantifiers_memory_node_info(target);
3938  if (r < 0) break;
3939  if (r > 0) {
3940  qn->target_empty_info = r;
3941  }
3942 #endif
3943 #if 0
3944  r = get_max_match_length(target, &d, env);
3945  if (r == 0 && d == 0) {
3946  /* ()* ==> ()?, ()+ ==> () */
3947  qn->upper = 1;
3948  if (qn->lower > 1) qn->lower = 1;
3949  if (NTYPE(target) == NT_STR) {
3950  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3951  }
3952  }
3953 #endif
3954  }
3955  }
3956 
3957  state |= IN_REPEAT;
3958  if (qn->lower != qn->upper)
3959  state |= IN_VAR_REPEAT;
3960  r = setup_tree(target, reg, state, env);
3961  if (r) break;
3962 
3963  /* expand string */
3964 #define EXPAND_STRING_MAX_LENGTH 100
3965  if (NTYPE(target) == NT_STR) {
3966  if (qn->lower > 1) {
3967  int i, n = qn->lower;
3968  OnigDistance len = NSTRING_LEN(target);
3969  StrNode* sn = NSTR(target);
3970  Node* np;
3971 
3972  np = onig_node_new_str(sn->s, sn->end);
3973  if (IS_NULL(np)) return ONIGERR_MEMORY;
3974  NSTR(np)->flag = sn->flag;
3975 
3976  for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3977  r = onig_node_str_cat(np, sn->s, sn->end);
3978  if (r) {
3979  onig_node_free(np);
3980  return r;
3981  }
3982  }
3983  if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
3984  Node *np1, *np2;
3985 
3986  qn->lower -= i;
3987  if (! IS_REPEAT_INFINITE(qn->upper))
3988  qn->upper -= i;
3989 
3990  np1 = onig_node_new_list(np, NULL);
3991  if (IS_NULL(np1)) {
3992  onig_node_free(np);
3993  return ONIGERR_MEMORY;
3994  }
3995  swap_node(np1, node);
3996  np2 = onig_node_list_add(node, np1);
3997  if (IS_NULL(np2)) {
3998  onig_node_free(np1);
3999  return ONIGERR_MEMORY;
4000  }
4001  }
4002  else {
4003  swap_node(np, node);
4004  onig_node_free(np);
4005  }
4006  break; /* break case NT_QTFR: */
4007  }
4008  }
4009 
4010 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4011  if (qn->greedy && (qn->target_empty_info != 0)) {
4012  if (NTYPE(target) == NT_QTFR) {
4013  QtfrNode* tqn = NQTFR(target);
4014  if (IS_NOT_NULL(tqn->head_exact)) {
4015  qn->head_exact = tqn->head_exact;
4016  tqn->head_exact = NULL;
4017  }
4018  }
4019  else {
4020  qn->head_exact = get_head_value_node(qn->target, 1, reg);
4021  }
4022  }
4023 #endif
4024  }
4025  break;
4026 
4027  case NT_ENCLOSE:
4028  {
4029  EncloseNode* en = NENCLOSE(node);
4030 
4031  switch (en->type) {
4032  case ENCLOSE_OPTION:
4033  {
4035  state |= in_root;
4036  reg->options = NENCLOSE(node)->option;
4037  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4038  reg->options = options;
4039  }
4040  break;
4041 
4042  case ENCLOSE_MEMORY:
4043  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
4045  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4046  }
4047  r = setup_tree(en->target, reg, state, env);
4048  break;
4049 
4051  {
4052  Node* target = en->target;
4053  r = setup_tree(target, reg, state, env);
4054  if (NTYPE(target) == NT_QTFR) {
4055  QtfrNode* tqn = NQTFR(target);
4056  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4057  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4058  int qtype = NTYPE(tqn->target);
4059  if (IS_NODE_TYPE_SIMPLE(qtype))
4061  }
4062  }
4063  }
4064  break;
4065 
4066  case ENCLOSE_CONDITION:
4067 #ifdef USE_NAMED_GROUP
4068  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4069  env->num_named > 0 &&
4073  }
4074 #endif
4075  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4076  break;
4077  }
4078  }
4079  break;
4080 
4081  case NT_ANCHOR:
4082  {
4083  AnchorNode* an = NANCHOR(node);
4084 
4085  switch (an->type) {
4086  case ANCHOR_PREC_READ:
4087  r = setup_tree(an->target, reg, state, env);
4088  break;
4089  case ANCHOR_PREC_READ_NOT:
4090  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4091  break;
4092 
4093 /* allowed node types in look-behind */
4094 #define ALLOWED_TYPE_IN_LB \
4095  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4096  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4097 
4098 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4099 #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4100 
4101 #define ALLOWED_ANCHOR_IN_LB \
4102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4103  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4104  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4105  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4106 #define ALLOWED_ANCHOR_IN_LB_NOT \
4107 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4108  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4109  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4110  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4111 
4112  case ANCHOR_LOOK_BEHIND:
4113  {
4116  if (r < 0) return r;
4117  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4118  r = setup_look_behind(node, reg, env);
4119  if (r != 0) return r;
4120  if (NTYPE(node) != NT_ANCHOR) goto restart;
4121  r = setup_tree(an->target, reg, state, env);
4122  }
4123  break;
4124 
4126  {
4129  if (r < 0) return r;
4130  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4131  r = setup_look_behind(node, reg, env);
4132  if (r != 0) return r;
4133  if (NTYPE(node) != NT_ANCHOR) goto restart;
4134  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4135  }
4136  break;
4137  }
4138  }
4139  break;
4140 
4141  default:
4142  break;
4143  }
4144 
4145  return r;
4146 }
4147 
4148 #ifndef USE_SUNDAY_QUICK_SEARCH
4149 /* set skip map for Boyer-Moore search */
4150 static int
4151 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4152  UChar skip[], int** int_skip, int ignore_case)
4153 {
4154  OnigDistance i, len;
4155  int clen, flen, n, j, k;
4158  OnigEncoding enc = reg->enc;
4159 
4160  len = end - s;
4161  if (len < ONIG_CHAR_TABLE_SIZE) {
4162  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4163 
4164  n = 0;
4165  for (i = 0; i < len - 1; i += clen) {
4166  p = s + i;
4167  if (ignore_case)
4169  p, end, items);
4170  clen = enclen(enc, p, end);
4171 
4172  for (j = 0; j < n; j++) {
4173  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4174  return 1; /* different length isn't supported. */
4175  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4176  if (flen != clen)
4177  return 1; /* different length isn't supported. */
4178  }
4179  for (j = 0; j < clen; j++) {
4180  skip[s[i + j]] = (UChar )(len - 1 - i - j);
4181  for (k = 0; k < n; k++) {
4182  skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4183  }
4184  }
4185  }
4186  }
4187  else {
4188  if (IS_NULL(*int_skip)) {
4189  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4190  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4191  }
4192  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4193 
4194  n = 0;
4195  for (i = 0; i < len - 1; i += clen) {
4196  p = s + i;
4197  if (ignore_case)
4199  p, end, items);
4200  clen = enclen(enc, p, end);
4201 
4202  for (j = 0; j < n; j++) {
4203  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4204  return 1; /* different length isn't supported. */
4205  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4206  if (flen != clen)
4207  return 1; /* different length isn't supported. */
4208  }
4209  for (j = 0; j < clen; j++) {
4210  (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4211  for (k = 0; k < n; k++) {
4212  (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4213  }
4214  }
4215  }
4216  }
4217  return 0;
4218 }
4219 
4220 #else /* USE_SUNDAY_QUICK_SEARCH */
4221 
4222 /* set skip map for Sunday's quick search */
4223 static int
4225  UChar skip[], int** int_skip, int ignore_case)
4226 {
4227  OnigDistance i, len;
4228  int clen, flen, n, j, k;
4231  OnigEncoding enc = reg->enc;
4232 
4233  len = end - s;
4234  if (len < ONIG_CHAR_TABLE_SIZE) {
4235  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4236 
4237  n = 0;
4238  for (i = 0; i < len; i += clen) {
4239  p = s + i;
4240  if (ignore_case)
4242  p, end, items);
4243  clen = enclen(enc, p, end);
4244 
4245  for (j = 0; j < n; j++) {
4246  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4247  return 1; /* different length isn't supported. */
4248  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4249  if (flen != clen)
4250  return 1; /* different length isn't supported. */
4251  }
4252  for (j = 0; j < clen; j++) {
4253  skip[s[i + j]] = (UChar )(len - i - j);
4254  for (k = 0; k < n; k++) {
4255  skip[buf[k][j]] = (UChar )(len - i - j);
4256  }
4257  }
4258  }
4259  }
4260  else {
4261  if (IS_NULL(*int_skip)) {
4262  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4263  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4264  }
4265  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4266 
4267  n = 0;
4268  for (i = 0; i < len; i += clen) {
4269  p = s + i;
4270  if (ignore_case)
4272  p, end, items);
4273  clen = enclen(enc, p, end);
4274 
4275  for (j = 0; j < n; j++) {
4276  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4277  return 1; /* different length isn't supported. */
4278  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4279  if (flen != clen)
4280  return 1; /* different length isn't supported. */
4281  }
4282  for (j = 0; j < clen; j++) {
4283  (*int_skip)[s[i + j]] = (int )(len - i - j);
4284  for (k = 0; k < n; k++) {
4285  (*int_skip)[buf[k][j]] = (int )(len - i - j);
4286  }
4287  }
4288  }
4289  }
4290  return 0;
4291 }
4292 #endif /* USE_SUNDAY_QUICK_SEARCH */
4293 
4294 #define OPT_EXACT_MAXLEN 24
4295 
4296 typedef struct {
4297  OnigDistance min; /* min byte length */
4298  OnigDistance max; /* max byte length */
4299 } MinMaxLen;
4300 
4301 typedef struct {
4307 } OptEnv;
4308 
4309 typedef struct {
4312 } OptAncInfo;
4313 
4314 typedef struct {
4315  MinMaxLen mmd; /* info position */
4317 
4319  int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4320  int len;
4322 } OptExactInfo;
4323 
4324 typedef struct {
4325  MinMaxLen mmd; /* info position */
4327 
4328  int value; /* weighted value */
4330 } OptMapInfo;
4331 
4332 typedef struct {
4334 
4336  OptExactInfo exb; /* boundary */
4337  OptExactInfo exm; /* middle */
4338  OptExactInfo expr; /* prec read (?=...) */
4339 
4340  OptMapInfo map; /* boundary */
4341 } NodeOptInfo;
4342 
4343 
4344 static int
4346 {
4347  static const short int ByteValTable[] = {
4348  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4349  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4350  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4351  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4352  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4353  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4354  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4355  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4356  };
4357 
4358  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4359  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4360  return 20;
4361  else
4362  return (int )ByteValTable[i];
4363  }
4364  else
4365  return 4; /* Take it easy. */
4366 }
4367 
4368 static int
4370 {
4371  /* 1000 / (min-max-dist + 1) */
4372  static const short int dist_vals[] = {
4373  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4374  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4375  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4376  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4377  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4378  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4379  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4380  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4381  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4382  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4383  };
4384 
4385  OnigDistance d;
4386 
4387  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4388 
4389  d = mm->max - mm->min;
4390  if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
4391  /* return dist_vals[d] * 16 / (mm->min + 12); */
4392  return (int )dist_vals[d];
4393  else
4394  return 1;
4395 }
4396 
4397 static int
4399 {
4400  if (v2 <= 0) return -1;
4401  if (v1 <= 0) return 1;
4402 
4403  v1 *= distance_value(d1);
4404  v2 *= distance_value(d2);
4405 
4406  if (v2 > v1) return 1;
4407  if (v2 < v1) return -1;
4408 
4409  if (d2->min < d1->min) return 1;
4410  if (d2->min > d1->min) return -1;
4411  return 0;
4412 }
4413 
4414 static int
4416 {
4417  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4418 }
4419 
4420 
4421 static void
4423 {
4424  mml->min = min;
4425  mml->max = max;
4426 }
4427 
4428 static void
4430 {
4431  mml->min = mml->max = 0;
4432 }
4433 
4434 static void
4436 {
4437  to->min = from->min;
4438  to->max = from->max;
4439 }
4440 
4441 static void
4443 {
4444  to->min = distance_add(to->min, from->min);
4445  to->max = distance_add(to->max, from->max);
4446 }
4447 
4448 #if 0
4449 static void
4450 add_len_mml(MinMaxLen* to, OnigDistance len)
4451 {
4452  to->min = distance_add(to->min, len);
4453  to->max = distance_add(to->max, len);
4454 }
4455 #endif
4456 
4457 static void
4459 {
4460  if (to->min > from->min) to->min = from->min;
4461  if (to->max < from->max) to->max = from->max;
4462 }
4463 
4464 static void
4466 {
4467  *to = *from;
4468 }
4469 
4470 static void
4472 {
4473  anc->left_anchor = 0;
4474  anc->right_anchor = 0;
4475 }
4476 
4477 static void
4479 {
4480  *to = *from;
4481 }
4482 
4483 static void
4485  OnigDistance left_len, OnigDistance right_len)
4486 {
4487  clear_opt_anc_info(to);
4488 
4489  to->left_anchor = left->left_anchor;
4490  if (left_len == 0) {
4491  to->left_anchor |= right->left_anchor;
4492  }
4493 
4494  to->right_anchor = right->right_anchor;
4495  if (right_len == 0) {
4496  to->right_anchor |= left->right_anchor;
4497  }
4498  else {
4500  }
4501 }
4502 
4503 static int
4505 {
4506  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4507  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4508  anc == ANCHOR_PREC_READ_NOT)
4509  return 0;
4510 
4511  return 1;
4512 }
4513 
4514 static int
4516 {
4517  if ((to->left_anchor & anc) != 0) return 1;
4518 
4519  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4520 }
4521 
4522 static void
4524 {
4525  if (is_left_anchor(anc))
4526  to->left_anchor |= anc;
4527  else
4528  to->right_anchor |= anc;
4529 }
4530 
4531 static void
4533 {
4534  if (is_left_anchor(anc))
4535  to->left_anchor &= ~anc;
4536  else
4537  to->right_anchor &= ~anc;
4538 }
4539 
4540 static void
4542 {
4543  to->left_anchor &= add->left_anchor;
4544  to->right_anchor &= add->right_anchor;
4545 }
4546 
4547 static int
4549 {
4550  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4551 }
4552 
4553 static void
4555 {
4556  clear_mml(&ex->mmd);
4557  clear_opt_anc_info(&ex->anc);
4558  ex->reach_end = 0;
4559  ex->ignore_case = -1; /* unset */
4560  ex->len = 0;
4561  ex->s[0] = '\0';
4562 }
4563 
4564 static void
4566 {
4567  *to = *from;
4568 }
4569 
4570 static void
4572 {
4573  int i, j, len;
4574  UChar *p, *end;
4575  OptAncInfo tanc;
4576 
4577  if (to->ignore_case < 0)
4578  to->ignore_case = add->ignore_case;
4579  else if (to->ignore_case != add->ignore_case)
4580  return ; /* avoid */
4581 
4582  p = add->s;
4583  end = p + add->len;
4584  for (i = to->len; p < end; ) {
4585  len = enclen(enc, p, end);
4586  if (i + len > OPT_EXACT_MAXLEN) break;
4587  for (j = 0; j < len && p < end; j++)
4588  to->s[i++] = *p++;
4589  }
4590 
4591  to->len = i;
4592  to->reach_end = (p == end ? add->reach_end : 0);
4593 
4594  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4595  if (! to->reach_end) tanc.right_anchor = 0;
4596  copy_opt_anc_info(&to->anc, &tanc);
4597 }
4598 
4599 static void
4601  int raw ARG_UNUSED, OnigEncoding enc)
4602 {
4603  int i, j, len;
4604  UChar *p;
4605 
4606  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4607  len = enclen(enc, p, end);
4608  if (i + len > OPT_EXACT_MAXLEN) break;
4609  for (j = 0; j < len && p < end; j++)
4610  to->s[i++] = *p++;
4611  }
4612 
4613  to->len = i;
4614 }
4615 
4616 static void
4618 {
4619  int i, j, len;
4620 
4621  if (add->len == 0 || to->len == 0) {
4623  return ;
4624  }
4625 
4626  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4628  return ;
4629  }
4630 
4631  for (i = 0; i < to->len && i < add->len; ) {
4632  if (to->s[i] != add->s[i]) break;
4633  len = enclen(env->enc, to->s + i, to->s + to->len);
4634 
4635  for (j = 1; j < len; j++) {
4636  if (to->s[i+j] != add->s[i+j]) break;
4637  }
4638  if (j < len) break;
4639  i += len;
4640  }
4641 
4642  if (! add->reach_end || i < add->len || i < to->len) {
4643  to->reach_end = 0;
4644  }
4645  to->len = i;
4646  if (to->ignore_case < 0)
4647  to->ignore_case = add->ignore_case;
4648  else if (add->ignore_case >= 0)
4649  to->ignore_case |= add->ignore_case;
4650 
4651  alt_merge_opt_anc_info(&to->anc, &add->anc);
4652  if (! to->reach_end) to->anc.right_anchor = 0;
4653 }
4654 
4655 static void
4657 {
4658  int v1, v2;
4659 
4660  v1 = now->len;
4661  v2 = alt->len;
4662 
4663  if (v2 == 0) {
4664  return ;
4665  }
4666  else if (v1 == 0) {
4667  copy_opt_exact_info(now, alt);
4668  return ;
4669  }
4670  else if (v1 <= 2 && v2 <= 2) {
4671  /* ByteValTable[x] is big value --> low price */
4672  v2 = map_position_value(enc, now->s[0]);
4673  v1 = map_position_value(enc, alt->s[0]);
4674 
4675  if (now->len > 1) v1 += 5;
4676  if (alt->len > 1) v2 += 5;
4677  }
4678 
4679  if (now->ignore_case <= 0) v1 *= 2;
4680  if (alt->ignore_case <= 0) v2 *= 2;
4681 
4682  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4683  copy_opt_exact_info(now, alt);
4684 }
4685 
4686 static void
4688 {
4689  static const OptMapInfo clean_info = {
4690  {0, 0}, {0, 0}, 0,
4691  {
4692  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4693  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4694  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4695  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4696  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4697  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4698  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4699  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4700  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4701  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4702  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4703  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4704  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4705  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4706  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4707  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4708  }
4709  };
4710 
4711  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4712 }
4713 
4714 static void
4716 {
4717  *to = *from;
4718 }
4719 
4720 static void
4722 {
4723  if (map->map[c] == 0) {
4724  map->map[c] = 1;
4725  map->value += map_position_value(enc, c);
4726  }
4727 }
4728 
4729 static int
4731  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4732 {
4735  int i, n;
4736 
4737  add_char_opt_map_info(map, p[0], enc);
4738 
4739  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4740  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4741  if (n < 0) return n;
4742 
4743  for (i = 0; i < n; i++) {
4744  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4745  add_char_opt_map_info(map, buf[0], enc);
4746  }
4747 
4748  return 0;
4749 }
4750 
4751 static void
4753 {
4754  const int z = 1<<15; /* 32768: something big value */
4755 
4756  int v1, v2;
4757 
4758  if (alt->value == 0) return ;
4759  if (now->value == 0) {
4760  copy_opt_map_info(now, alt);
4761  return ;
4762  }
4763 
4764  v1 = z / now->value;
4765  v2 = z / alt->value;
4766  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4767  copy_opt_map_info(now, alt);
4768 }
4769 
4770 static int
4772 {
4773 #define COMP_EM_BASE 20
4774  int ve, vm;
4775 
4776  if (m->value <= 0) return -1;
4777 
4778  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4779  vm = COMP_EM_BASE * 5 * 2 / m->value;
4780  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4781 }
4782 
4783 static void
4785 {
4786  int i, val;
4787 
4788  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4789  if (to->value == 0) return ;
4790  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4791  clear_opt_map_info(to);
4792  return ;
4793  }
4794 
4795  alt_merge_mml(&to->mmd, &add->mmd);
4796 
4797  val = 0;
4798  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4799  if (add->map[i])
4800  to->map[i] = 1;
4801 
4802  if (to->map[i])
4803  val += map_position_value(enc, i);
4804  }
4805  to->value = val;
4806 
4807  alt_merge_opt_anc_info(&to->anc, &add->anc);
4808 }
4809 
4810 static void
4812 {
4813  copy_mml(&(opt->exb.mmd), mmd);
4814  copy_mml(&(opt->expr.mmd), mmd);
4815  copy_mml(&(opt->map.mmd), mmd);
4816 }
4817 
4818 static void
4820 {
4821  clear_mml(&opt->len);
4822  clear_opt_anc_info(&opt->anc);
4823  clear_opt_exact_info(&opt->exb);
4824  clear_opt_exact_info(&opt->exm);
4825  clear_opt_exact_info(&opt->expr);
4826  clear_opt_map_info(&opt->map);
4827 }
4828 
4829 static void
4831 {
4832  *to = *from;
4833 }
4834 
4835 static void
4837 {
4838  int exb_reach, exm_reach;
4839  OptAncInfo tanc;
4840 
4841  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4842  copy_opt_anc_info(&to->anc, &tanc);
4843 
4844  if (add->exb.len > 0 && to->len.max == 0) {
4845  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4846  to->len.max, add->len.max);
4847  copy_opt_anc_info(&add->exb.anc, &tanc);
4848  }
4849 
4850  if (add->map.value > 0 && to->len.max == 0) {
4851  if (add->map.mmd.max == 0)
4852  add->map.anc.left_anchor |= to->anc.left_anchor;
4853  }
4854 
4855  exb_reach = to->exb.reach_end;
4856  exm_reach = to->exm.reach_end;
4857 
4858  if (add->len.max != 0)
4859  to->exb.reach_end = to->exm.reach_end = 0;
4860 
4861  if (add->exb.len > 0) {
4862  if (exb_reach) {
4863  concat_opt_exact_info(&to->exb, &add->exb, enc);
4864  clear_opt_exact_info(&add->exb);
4865  }
4866  else if (exm_reach) {
4867  concat_opt_exact_info(&to->exm, &add->exb, enc);
4868  clear_opt_exact_info(&add->exb);
4869  }
4870  }
4871  select_opt_exact_info(enc, &to->exm, &add->exb);
4872  select_opt_exact_info(enc, &to->exm, &add->exm);
4873 
4874  if (to->expr.len > 0) {
4875  if (add->len.max > 0) {
4876  if (to->expr.len > (int )add->len.max)
4877  to->expr.len = (int )add->len.max;
4878 
4879  if (to->expr.mmd.max == 0)
4880  select_opt_exact_info(enc, &to->exb, &to->expr);
4881  else
4882  select_opt_exact_info(enc, &to->exm, &to->expr);
4883  }
4884  }
4885  else if (add->expr.len > 0) {
4886  copy_opt_exact_info(&to->expr, &add->expr);
4887  }
4888 
4889  select_opt_map_info(&to->map, &add->map);
4890 
4891  add_mml(&to->len, &add->len);
4892 }
4893 
4894 static void
4896 {
4897  alt_merge_opt_anc_info (&to->anc, &add->anc);
4898  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4899  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4900  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4901  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4902 
4903  alt_merge_mml(&to->len, &add->len);
4904 }
4905 
4906 
4907 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4908 
4909 static int
4911 {
4912  int type;
4913  int r = 0;
4914 
4915  clear_node_opt_info(opt);
4916  set_bound_node_opt_info(opt, &env->mmd);
4917 
4918  type = NTYPE(node);
4919  switch (type) {
4920  case NT_LIST:
4921  {
4922  OptEnv nenv;
4923  NodeOptInfo nopt;
4924  Node* nd = node;
4925 
4926  copy_opt_env(&nenv, env);
4927  do {
4928  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4929  if (r == 0) {
4930  add_mml(&nenv.mmd, &nopt.len);
4931  concat_left_node_opt_info(env->enc, opt, &nopt);
4932  }
4933  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4934  }
4935  break;
4936 
4937  case NT_ALT:
4938  {
4939  NodeOptInfo nopt;
4940  Node* nd = node;
4941 
4942  do {
4943  r = optimize_node_left(NCAR(nd), &nopt, env);
4944  if (r == 0) {
4945  if (nd == node) copy_node_opt_info(opt, &nopt);
4946  else alt_merge_node_opt_info(opt, &nopt, env);
4947  }
4948  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4949  }
4950  break;
4951 
4952  case NT_STR:
4953  {
4954  StrNode* sn = NSTR(node);
4955  OnigDistance slen = sn->end - sn->s;
4956  int is_raw = NSTRING_IS_RAW(node);
4957 
4958  if (! NSTRING_IS_AMBIG(node)) {
4959  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4960  is_raw, env->enc);
4961  opt->exb.ignore_case = 0;
4962  if (slen > 0) {
4963  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4964  }
4965  set_mml(&opt->len, slen, slen);
4966  }
4967  else {
4968  OnigDistance max;
4969 
4970  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4971  int n = onigenc_strlen(env->enc, sn->s, sn->end);
4972  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4973  }
4974  else {
4975  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4976  is_raw, env->enc);
4977  opt->exb.ignore_case = 1;
4978 
4979  if (slen > 0) {
4980  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4981  env->enc, env->case_fold_flag);
4982  if (r != 0) break;
4983  }
4984 
4985  max = slen;
4986  }
4987 
4988  set_mml(&opt->len, slen, max);
4989  }
4990 
4991  if ((OnigDistance )opt->exb.len == slen)
4992  opt->exb.reach_end = 1;
4993  }
4994  break;
4995 
4996  case NT_CCLASS:
4997  {
4998  int i, z;
4999  CClassNode* cc = NCCLASS(node);
5000 
5001  /* no need to check ignore case. (set in setup_tree()) */
5002 
5003  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5004  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5006 
5007  set_mml(&opt->len, min, max);
5008  }
5009  else {
5010  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5011  z = BITSET_AT(cc->bs, i);
5012  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5013  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5014  }
5015  }
5016  set_mml(&opt->len, 1, 1);
5017  }
5018  }
5019  break;
5020 
5021  case NT_CTYPE:
5022  {
5023  int i, min, max;
5024  int maxcode;
5025 
5026  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5027 
5028  if (max == 1) {
5029  min = 1;
5030 
5031  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5032  switch (NCTYPE(node)->ctype) {
5033  case ONIGENC_CTYPE_WORD:
5034  if (NCTYPE(node)->not != 0) {
5035  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5036  if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5037  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5038  }
5039  }
5040  }
5041  else {
5042  for (i = 0; i < maxcode; i++) {
5043  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5044  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5045  }
5046  }
5047  }
5048  break;
5049  }
5050  }
5051  else {
5052  min = ONIGENC_MBC_MINLEN(env->enc);
5053  }
5054  set_mml(&opt->len, min, max);
5055  }
5056  break;
5057 
5058  case NT_CANY:
5059  {
5060  OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5062  set_mml(&opt->len, min, max);
5063  }
5064  break;
5065 
5066  case NT_ANCHOR:
5067  switch (NANCHOR(node)->type) {
5068  case ANCHOR_BEGIN_BUF:
5069  case ANCHOR_BEGIN_POSITION:
5070  case ANCHOR_BEGIN_LINE:
5071  case ANCHOR_END_BUF:
5072  case ANCHOR_SEMI_END_BUF:
5073  case ANCHOR_END_LINE:
5074  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5075  case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5076  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5077  break;
5078 
5079  case ANCHOR_PREC_READ:
5080  {
5081  NodeOptInfo nopt;
5082 
5083  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5084  if (r == 0) {
5085  if (nopt.exb.len > 0)
5086  copy_opt_exact_info(&opt->expr, &nopt.exb);
5087  else if (nopt.exm.len > 0)
5088  copy_opt_exact_info(&opt->expr, &nopt.exm);
5089 
5090  opt->expr.reach_end = 0;
5091 
5092  if (nopt.map.value > 0)
5093  copy_opt_map_info(&opt->map, &nopt.map);
5094  }
5095  }
5096  break;
5097 
5099  break;
5100  }
5101  break;
5102 
5103  case NT_BREF:
5104  {
5105  int i;
5106  int* backs;
5107  OnigDistance min, max, tmin, tmax;
5108  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5109  BRefNode* br = NBREF(node);
5110 
5111  if (br->state & NST_RECURSION) {
5112  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5113  break;
5114  }
5115  backs = BACKREFS_P(br);
5116  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5117  if (r != 0) break;
5118  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5119  if (r != 0) break;
5120  for (i = 1; i < br->back_num; i++) {
5121  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5122  if (r != 0) break;
5123  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5124  if (r != 0) break;
5125  if (min > tmin) min = tmin;
5126  if (max < tmax) max = tmax;
5127  }
5128  if (r == 0) set_mml(&opt->len, min, max);
5129  }
5130  break;
5131 
5132 #ifdef USE_SUBEXP_CALL
5133  case NT_CALL:
5134  if (IS_CALL_RECURSION(NCALL(node)))
5135  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5136  else {
5137  OnigOptionType save = env->options;
5138  env->options = NENCLOSE(NCALL(node)->target)->option;
5139  r = optimize_node_left(NCALL(node)->target, opt, env);
5140  env->options = save;
5141  }
5142  break;
5143 #endif
5144 
5145  case NT_QTFR:
5146  {
5147  int i;
5148  OnigDistance min, max;
5149  NodeOptInfo nopt;
5150  QtfrNode* qn = NQTFR(node);
5151 
5152  r = optimize_node_left(qn->target, &nopt, env);
5153  if (r) break;
5154 
5155  if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5156  if (env->mmd.max == 0 &&
5157  NTYPE(qn->target) == NT_CANY && qn->greedy) {
5158  if (IS_MULTILINE(env->options))
5159  /* implicit anchor: /.*a/ ==> /\A.*a/ */
5161  else
5163  }
5164  }
5165  else {
5166  if (qn->lower > 0) {
5167  copy_node_opt_info(opt, &nopt);
5168  if (nopt.exb.len > 0) {
5169  if (nopt.exb.reach_end) {
5170  for (i = 2; i <= qn->lower &&
5171  ! is_full_opt_exact_info(&opt->exb); i++) {
5172  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5173  }
5174  if (i < qn->lower) {
5175  opt->exb.reach_end = 0;
5176  }
5177  }
5178  }
5179 
5180  if (qn->lower != qn->upper) {
5181  opt->exb.reach_end = 0;
5182  opt->exm.reach_end = 0;
5183  }
5184  if (qn->lower > 1)
5185  opt->exm.reach_end = 0;
5186  }
5187  }
5188 
5189  min = distance_multiply(nopt.len.min, qn->lower);
5190  if (IS_REPEAT_INFINITE(qn->upper))
5191  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5192  else
5193  max = distance_multiply(nopt.len.max, qn->upper);
5194 
5195  set_mml(&opt->len, min, max);
5196  }
5197  break;
5198 
5199  case NT_ENCLOSE:
5200  {
5201  EncloseNode* en = NENCLOSE(node);
5202 
5203  switch (en->type) {
5204  case ENCLOSE_OPTION:
5205  {
5206  OnigOptionType save = env->options;
5207 
5208  env->options = en->option;
5209  r = optimize_node_left(en->target, opt, env);
5210  env->options = save;
5211  }
5212  break;
5213 
5214  case ENCLOSE_MEMORY:
5215 #ifdef USE_SUBEXP_CALL
5216  en->opt_count++;
5218  OnigDistance min, max;
5219 
5220  min = 0;
5221  max = ONIG_INFINITE_DISTANCE;
5222  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5223  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5224  set_mml(&opt->len, min, max);
5225  }
5226  else
5227 #endif
5228  {
5229  r = optimize_node_left(en->target, opt, env);
5230 
5232  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5234  }
5235  }
5236  break;
5237 
5239  case ENCLOSE_CONDITION:
5240  r = optimize_node_left(en->target, opt, env);
5241  break;
5242  }
5243  }
5244  break;
5245 
5246  default:
5247 #ifdef ONIG_DEBUG
5248  if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5249  NTYPE(node));
5250 #endif
5251  r = ONIGERR_TYPE_BUG;
5252  break;
5253  }
5254 
5255  return r;
5256 }
5257 
5258 static int
5260 {
5261  int r;
5262  int allow_reverse;
5263 
5264  if (e->len == 0) return 0;
5265 
5266  reg->exact = (UChar* )xmalloc(e->len);
5268  xmemcpy(reg->exact, e->s, e->len);
5269  reg->exact_end = reg->exact + e->len;
5270 
5271  allow_reverse =
5273 
5274  if (e->ignore_case > 0) {
5275  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5276  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5277  reg->map, &(reg->int_map), 1);
5278  if (r == 0) {
5279  reg->optimize = (allow_reverse != 0
5281  }
5282  else {
5284  }
5285  }
5286  else {
5288  }
5289  }
5290  else {
5291  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5292  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5293  reg->map, &(reg->int_map), 0);
5294  if (r) return r;
5295 
5296  reg->optimize = (allow_reverse != 0
5298  }
5299  else {
5301  }
5302  }
5303 
5304  reg->dmin = e->mmd.min;
5305  reg->dmax = e->mmd.max;
5306 
5307  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5308  reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5309  }
5310 
5311  return 0;
5312 }
5313 
5314 static void
5316 {
5317  int i;
5318 
5319  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5320  reg->map[i] = m->map[i];
5321 
5322  reg->optimize = ONIG_OPTIMIZE_MAP;
5323  reg->dmin = m->mmd.min;
5324  reg->dmax = m->mmd.max;
5325 
5326  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5327  reg->threshold_len = (int )(reg->dmin + 1);
5328  }
5329 }
5330 
5331 static void
5333 {
5334  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5335  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5336 }
5337 
5338 #ifdef ONIG_DEBUG
5339 static void print_optimize_info(FILE* f, regex_t* reg);
5340 #endif
5341 
5342 static int
5344 {
5345 
5346  int r;
5347  NodeOptInfo opt;
5348  OptEnv env;
5349 
5350  env.enc = reg->enc;
5351  env.options = reg->options;
5352  env.case_fold_flag = reg->case_fold_flag;
5353  env.scan_env = scan_env;
5354  clear_mml(&env.mmd);
5355 
5356  r = optimize_node_left(node, &opt, &env);
5357  if (r) return r;
5358 
5359  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5362 
5365 
5366  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5367  reg->anchor_dmin = opt.len.min;
5368  reg->anchor_dmax = opt.len.max;
5369  }
5370 
5371  if (opt.exb.len > 0 || opt.exm.len > 0) {
5372  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5373  if (opt.map.value > 0 &&
5374  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5375  goto set_map;
5376  }
5377  else {
5378  r = set_optimize_exact_info(reg, &opt.exb);
5379  set_sub_anchor(reg, &opt.exb.anc);
5380  }
5381  }
5382  else if (opt.map.value > 0) {
5383  set_map:
5384  set_optimize_map_info(reg, &opt.map);
5385  set_sub_anchor(reg, &opt.map.anc);
5386  }
5387  else {
5389  if (opt.len.max == 0)
5391  }
5392 
5393 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5394  if (!onig_is_prelude()) print_optimize_info(stderr, reg);
5395 #endif
5396  return r;
5397 }
5398 
5399 static void
5401 {
5403  reg->anchor = 0;
5404  reg->anchor_dmin = 0;
5405  reg->anchor_dmax = 0;
5406  reg->sub_anchor = 0;
5407  reg->exact_end = (UChar* )NULL;
5408  reg->threshold_len = 0;
5409  if (IS_NOT_NULL(reg->exact)) {
5410  xfree(reg->exact);
5411  reg->exact = (UChar* )NULL;
5412  }
5413 }
5414 
5415 #ifdef ONIG_DEBUG
5416 
5417 static void print_enc_string(FILE* fp, OnigEncoding enc,
5418  const UChar *s, const UChar *end)
5419 {
5420  fprintf(fp, "\nPATTERN: /");
5421 
5422  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5423  const UChar *p;
5424  OnigCodePoint code;
5425 
5426  p = s;
5427  while (p < end) {
5428  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5429  if (code >= 0x80) {
5430  fprintf(fp, " 0x%04x ", (int )code);
5431  }
5432  else {
5433  fputc((int )code, fp);
5434  }
5435 
5436  p += enclen(enc, p, end);
5437  }
5438  }
5439  else {
5440  while (s < end) {
5441  fputc((int )*s, fp);
5442  s++;
5443  }
5444  }
5445 
5446  fprintf(fp, "/ (%s)\n", enc->name);
5447 }
5448 
5449 static void
5450 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5451 {
5452  if (a == ONIG_INFINITE_DISTANCE)
5453  fputs("inf", f);
5454  else
5455  fprintf(f, "(%"PRIuSIZE")", a);
5456 
5457  fputs("-", f);
5458 
5459  if (b == ONIG_INFINITE_DISTANCE)
5460  fputs("inf", f);
5461  else
5462  fprintf(f, "(%"PRIuSIZE")", b);
5463 }
5464 
5465 static void
5466 print_anchor(FILE* f, int anchor)
5467 {
5468  int q = 0;
5469 
5470  fprintf(f, "[");
5471 
5472  if (anchor & ANCHOR_BEGIN_BUF) {
5473  fprintf(f, "begin-buf");
5474  q = 1;
5475  }
5476  if (anchor & ANCHOR_BEGIN_LINE) {
5477  if (q) fprintf(f, ", ");
5478  q = 1;
5479  fprintf(f, "begin-line");
5480  }
5481  if (anchor & ANCHOR_BEGIN_POSITION) {
5482  if (q) fprintf(f, ", ");
5483  q = 1;
5484  fprintf(f, "begin-pos");
5485  }
5486  if (anchor & ANCHOR_END_BUF) {
5487  if (q) fprintf(f, ", ");
5488  q = 1;
5489  fprintf(f, "end-buf");
5490  }
5491  if (anchor & ANCHOR_SEMI_END_BUF) {
5492  if (q) fprintf(f, ", ");
5493  q = 1;
5494  fprintf(f, "semi-end-buf");
5495  }
5496  if (anchor & ANCHOR_END_LINE) {
5497  if (q) fprintf(f, ", ");
5498  q = 1;
5499  fprintf(f, "end-line");
5500  }
5501  if (anchor & ANCHOR_ANYCHAR_STAR) {
5502  if (q) fprintf(f, ", ");
5503  q = 1;
5504  fprintf(f, "anychar-star");
5505  }
5506  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5507  if (q) fprintf(f, ", ");
5508  fprintf(f, "anychar-star-ml");
5509  }
5510 
5511  fprintf(f, "]");
5512 }
5513 
5514 static void
5515 print_optimize_info(FILE* f, regex_t* reg)
5516 {
5517  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5518  "EXACT_IC", "MAP",
5519  "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5520 
5521  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5522  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5523  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5524  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5525  fprintf(f, "\n");
5526 
5527  if (reg->optimize) {
5528  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5529  fprintf(f, "\n");
5530  }
5531  fprintf(f, "\n");
5532 
5533  if (reg->exact) {
5534  UChar *p;
5535  fprintf(f, "exact: [");
5536  for (p = reg->exact; p < reg->exact_end; p++) {
5537  fputc(*p, f);
5538  }
5539  fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5540  }
5541  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5542  int c, i, n = 0;
5543 
5544  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5545  if (reg->map[i]) n++;
5546 
5547  fprintf(f, "map: n=%d\n", n);
5548  if (n > 0) {
5549  c = 0;
5550  fputc('[', f);
5551  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5552  if (reg->map[i] != 0) {
5553  if (c > 0) fputs(", ", f);
5554  c++;
5555  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5557  fputc(i, f);
5558  else
5559  fprintf(f, "%d", i);
5560  }
5561  }
5562  fprintf(f, "]\n");
5563  }
5564  }
5565 }
5566 #endif /* ONIG_DEBUG */
5567 
5568 
5569 extern void
5571 {
5572  if (IS_NOT_NULL(reg)) {
5573  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5574  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5575  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5577  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5578  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5579 
5580 #ifdef USE_NAMED_GROUP
5581  onig_names_free(reg);
5582 #endif
5583  }
5584 }
5585 
5586 extern void
5588 {
5589  if (IS_NOT_NULL(reg)) {
5590  onig_free_body(reg);
5591  xfree(reg);
5592  }
5593 }
5594 
5595 size_t
5597 {
5598  size_t size = sizeof(regex_t);
5599  if (IS_NULL(reg)) return 0;
5600  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5601  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5602  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5603  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5604  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5605  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5606 
5607  return size;
5608 }
5609 
5610 size_t
5612 {
5613  size_t size = sizeof(*regs);
5614  if (IS_NULL(regs)) return 0;
5615  size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5616  return size;
5617 }
5618 
5619 #define REGEX_TRANSFER(to,from) do {\
5620  (to)->state = ONIG_STATE_MODIFY;\
5621  onig_free_body(to);\
5622  xmemcpy(to, from, sizeof(regex_t));\
5623  xfree(from);\
5624 } while (0)
5625 
5626 extern void
5628 {
5630  REGEX_TRANSFER(to, from);
5632 }
5633 
5634 #define REGEX_CHAIN_HEAD(reg) do {\
5635  while (IS_NOT_NULL((reg)->chain)) {\
5636  (reg) = (reg)->chain;\
5637  }\
5638 } while (0)
5639 
5640 extern void
5642 {
5644  REGEX_CHAIN_HEAD(to);
5645  to->chain = add;
5647 }
5648 
5649 extern void
5651 {
5652  regex_t *head, *prev;
5653 
5654  prev = reg;
5655  head = prev->chain;
5656  if (IS_NOT_NULL(head)) {
5657  reg->state = ONIG_STATE_MODIFY;
5658  while (IS_NOT_NULL(head->chain)) {
5659  prev = head;
5660  head = head->chain;
5661  }
5662  prev->chain = (regex_t* )NULL;
5663  REGEX_TRANSFER(reg, head);
5664  }
5665 }
5666 
5667 #ifdef ONIG_DEBUG
5668 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5669 #endif
5670 #ifdef ONIG_DEBUG_PARSE_TREE
5671 static void print_tree P_((FILE* f, Node* node));
5672 #endif
5673 
5674 extern int
5675 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5676  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5677 {
5678 #define COMPILE_INIT_SIZE 20
5679 
5680  int r;
5681  OnigDistance init_size;
5682  Node* root;
5683  ScanEnv scan_env = {0};
5684 #ifdef USE_SUBEXP_CALL
5685  UnsetAddrList uslist;
5686 #endif
5687 
5688  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5689 
5690  scan_env.sourcefile = sourcefile;
5691  scan_env.sourceline = sourceline;
5692  reg->state = ONIG_STATE_COMPILING;
5693 
5694 #ifdef ONIG_DEBUG
5695  if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
5696 #endif
5697 
5698  if (reg->alloc == 0) {
5699  init_size = (pattern_end - pattern) * 2;
5700  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5701  r = BBUF_INIT(reg, init_size);
5702  if (r != 0) goto end;
5703  }
5704  else
5705  reg->used = 0;
5706 
5707  reg->num_mem = 0;
5708  reg->num_repeat = 0;
5709  reg->num_null_check = 0;
5710  reg->repeat_range_alloc = 0;
5711  reg->repeat_range = (OnigRepeatRange* )NULL;
5712 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5713  reg->num_comb_exp_check = 0;
5714 #endif
5715 
5716  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5717  if (r != 0) goto err;
5718 
5719 #ifdef ONIG_DEBUG_PARSE_TREE
5720 # if 0
5721  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5722  if (!onig_is_prelude()) {
5723  print_tree(stderr, root);
5724  }
5725 # endif
5726 #endif
5727 
5728 #ifdef USE_NAMED_GROUP
5729  /* mixed use named group and no-named group */
5730  if (scan_env.num_named > 0 &&
5733  if (scan_env.num_named != scan_env.num_mem)
5734  r = disable_noname_group_capture(&root, reg, &scan_env);
5735  else
5736  r = numbered_ref_check(root);
5737 
5738  if (r != 0) goto err;
5739  }
5740 #endif
5741 
5742 #ifdef USE_SUBEXP_CALL
5743  if (scan_env.num_call > 0) {
5744  r = unset_addr_list_init(&uslist, scan_env.num_call);
5745  if (r != 0) goto err;
5746  scan_env.unset_addr_list = &uslist;
5747  r = setup_subexp_call(root, &scan_env);
5748  if (r != 0) goto err_unset;
5749  r = subexp_recursive_check_trav(root, &scan_env);
5750  if (r < 0) goto err_unset;
5751  r = subexp_inf_recursive_check_trav(root, &scan_env);
5752  if (r != 0) goto err_unset;
5753 
5754  reg->num_call = scan_env.num_call;
5755  }
5756  else
5757  reg->num_call = 0;
5758 #endif
5759 
5760  r = setup_tree(root, reg, IN_ROOT, &scan_env);
5761  if (r != 0) goto err_unset;
5762 
5763 #ifdef ONIG_DEBUG_PARSE_TREE
5764  if (!onig_is_prelude()) print_tree(stderr, root);
5765 #endif
5766 
5767  reg->capture_history = scan_env.capture_history;
5768  reg->bt_mem_start = scan_env.bt_mem_start;
5769  reg->bt_mem_start |= reg->capture_history;
5770  if (IS_FIND_CONDITION(reg->options))
5772  else {
5773  reg->bt_mem_end = scan_env.bt_mem_end;
5774  reg->bt_mem_end |= reg->capture_history;
5775  }
5776 
5777 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5778  if (scan_env.backrefed_mem == 0
5779 #ifdef USE_SUBEXP_CALL
5780  || scan_env.num_call == 0
5781 #endif
5782  ) {
5783  setup_comb_exp_check(root, 0, &scan_env);
5784 #ifdef USE_SUBEXP_CALL
5785  if (scan_env.has_recursion != 0) {
5786  scan_env.num_comb_exp_check = 0;
5787  }
5788  else
5789 #endif
5790  if (scan_env.comb_exp_max_regnum > 0) {
5791  int i;
5792  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5793  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5794  scan_env.num_comb_exp_check = 0;
5795  break;
5796  }
5797  }
5798  }
5799  }
5800 
5801  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5802 #endif
5803 
5804  clear_optimize_info(reg);
5805 #ifndef ONIG_DONT_OPTIMIZE
5806  r = set_optimize_info_from_tree(root, reg, &scan_env);
5807  if (r != 0) goto err_unset;
5808 #endif
5809 
5810  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5811  xfree(scan_env.mem_nodes_dynamic);
5812  scan_env.mem_nodes_dynamic = (Node** )NULL;
5813  }
5814 
5815  r = compile_tree(root, reg);
5816  if (r == 0) {
5817  r = add_opcode(reg, OP_END);
5818 #ifdef USE_SUBEXP_CALL
5819  if (scan_env.num_call > 0) {
5820  r = unset_addr_list_fix(&uslist, reg);
5821  unset_addr_list_end(&uslist);
5822  if (r) goto err;
5823  }
5824 #endif
5825 
5826  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5828  else {
5829  if (reg->bt_mem_start != 0)
5831  else
5833  }
5834  }
5835 #ifdef USE_SUBEXP_CALL
5836  else if (scan_env.num_call > 0) {
5837  unset_addr_list_end(&uslist);
5838  }
5839 #endif
5840  onig_node_free(root);
5841 
5842 #ifdef ONIG_DEBUG_COMPILE
5843 #ifdef USE_NAMED_GROUP
5844  if (!onig_is_prelude()) onig_print_names(stderr, reg);
5845 #endif
5846  if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
5847 #endif
5848 
5849  end:
5850  reg->state = ONIG_STATE_NORMAL;
5851  return r;
5852 
5853  err_unset:
5854 #ifdef USE_SUBEXP_CALL
5855  if (scan_env.num_call > 0) {
5856  unset_addr_list_end(&uslist);
5857  }
5858 #endif
5859  err:
5860  if (IS_NOT_NULL(scan_env.error)) {
5861  if (IS_NOT_NULL(einfo)) {
5862  einfo->enc = scan_env.enc;
5863  einfo->par = scan_env.error;
5864  einfo->par_end = scan_env.error_end;
5865  }
5866  }
5867 
5868  onig_node_free(root);
5869  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5870  xfree(scan_env.mem_nodes_dynamic);
5871  return r;
5872 }
5873 
5874 #ifdef USE_RECOMPILE_API
5875 extern int
5876 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5877  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5878  OnigErrorInfo* einfo)
5879 {
5880  int r;
5881  regex_t *new_reg;
5882 
5883  r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5884  if (r) return r;
5885  if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5886  onig_transfer(reg, new_reg);
5887  }
5888  else {
5889  onig_chain_link_add(reg, new_reg);
5890  }
5891  return 0;
5892 }
5893 #endif
5894 
5895 static int onig_inited = 0;
5896 
5897 extern int
5899  OnigCaseFoldType case_fold_flag,
5900  OnigEncoding enc, const OnigSyntaxType* syntax)
5901 {
5902  if (! onig_inited)
5903  onig_init();
5904 
5905  if (IS_NULL(reg))
5906  return ONIGERR_INVALID_ARGUMENT;
5907 
5908  if (ONIGENC_IS_UNDEF(enc))
5910 
5914  }
5915 
5916  (reg)->state = ONIG_STATE_MODIFY;
5917 
5918  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5919  option |= syntax->options;
5920  option &= ~ONIG_OPTION_SINGLELINE;
5921  }
5922  else
5923  option |= syntax->options;
5924 
5925  (reg)->enc = enc;
5926  (reg)->options = option;
5927  (reg)->syntax = syntax;
5928  (reg)->optimize = 0;
5929  (reg)->exact = (UChar* )NULL;
5930  (reg)->int_map = (int* )NULL;
5931  (reg)->int_map_backward = (int* )NULL;
5932  (reg)->chain = (regex_t* )NULL;
5933 
5934  (reg)->p = (UChar* )NULL;
5935  (reg)->alloc = 0;
5936  (reg)->used = 0;
5937  (reg)->name_table = (void* )NULL;
5938 
5939  (reg)->case_fold_flag = case_fold_flag;
5940  return 0;
5941 }
5942 
5943 extern int
5944 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5945  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5946  OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5947 {
5948  int r;
5949 
5950  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5951  if (r) return r;
5952 
5953  r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
5954  return r;
5955 }
5956 
5957 extern int
5958 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5959  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5960  OnigErrorInfo* einfo)
5961 {
5962  int r;
5963 
5964  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5965  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5966 
5967  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5968  if (r) goto err;
5969 
5970  r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
5971  if (r) {
5972  err:
5973  onig_free(*reg);
5974  *reg = NULL;
5975  }
5976  return r;
5977 }
5978 
5979 
5980 extern int
5982 {
5983  if (onig_inited != 0)
5984  return 0;
5985 
5988 
5989  onig_inited = 1;
5990 
5991  onigenc_init();
5992  /* onigenc_set_default_caseconv_table((UChar* )0); */
5993 
5994 #ifdef ONIG_DEBUG_STATISTICS
5995  onig_statistics_init();
5996 #endif
5997 
5999  return 0;
6000 }
6001 
6002 
6003 extern int
6005 {
6007 
6008 #ifdef ONIG_DEBUG_STATISTICS
6009  if (!onig_is_prelude()) onig_print_statistics(stderr);
6010 #endif
6011 
6012 #ifdef USE_SHARED_CCLASS_TABLE
6014 #endif
6015 
6016 #ifdef USE_PARSE_TREE_NODE_RECYCLE
6018 #endif
6019 
6020  onig_inited = 0;
6021 
6024  return 0;
6025 }
6026 
6027 extern int
6029 {
6030  OnigCodePoint n, *data;
6031  OnigCodePoint low, high, x;
6032 
6033  GET_CODE_POINT(n, p);
6034  data = (OnigCodePoint* )p;
6035  data++;
6036 
6037  for (low = 0, high = n; low < high; ) {
6038  x = (low + high) >> 1;
6039  if (code > data[x * 2 + 1])
6040  low = x + 1;
6041  else
6042  high = x;
6043  }
6044 
6045  return ((low < n && code >= data[low * 2]) ? 1 : 0);
6046 }
6047 
6048 extern int
6050 {
6051  int found;
6052 
6053  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6054  if (IS_NULL(cc->mbuf)) {
6055  found = 0;
6056  }
6057  else {
6058  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6059  }
6060  }
6061  else {
6062  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6063  }
6064 
6065  if (IS_NCCLASS_NOT(cc))
6066  return !found;
6067  else
6068  return found;
6069 }
6070 
6071 extern int
6073 {
6074  int len;
6075 
6076  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6077  len = 2;
6078  }
6079  else {
6080  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6081  }
6082  return onig_is_code_in_cc_len(len, code, cc);
6083 }
6084 
6085 
6086 #ifdef ONIG_DEBUG
6087 
6088 /* arguments type */
6089 #define ARG_SPECIAL -1
6090 #define ARG_NON 0
6091 #define ARG_RELADDR 1
6092 #define ARG_ABSADDR 2
6093 #define ARG_LENGTH 3
6094 #define ARG_MEMNUM 4
6095 #define ARG_OPTION 5
6096 #define ARG_STATE_CHECK 6
6097 
6098 OnigOpInfoType OnigOpInfo[] = {
6099  { OP_FINISH, "finish", ARG_NON },
6100  { OP_END, "end", ARG_NON },
6101  { OP_EXACT1, "exact1", ARG_SPECIAL },
6102  { OP_EXACT2, "exact2", ARG_SPECIAL },
6103  { OP_EXACT3, "exact3", ARG_SPECIAL },
6104  { OP_EXACT4, "exact4", ARG_SPECIAL },
6105  { OP_EXACT5, "exact5", ARG_SPECIAL },
6106  { OP_EXACTN, "exactn", ARG_SPECIAL },
6107  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6108  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6109  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6110  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6111  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6112  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6113  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6114  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6115  { OP_CCLASS, "cclass", ARG_SPECIAL },
6116  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6117  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6118  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6119  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6120  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6121  { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
6122  { OP_ANYCHAR, "anychar", ARG_NON },
6123  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6124  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6125  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6126  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6127  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6128  { OP_WORD, "word", ARG_NON },
6129  { OP_NOT_WORD, "not-word", ARG_NON },
6130  { OP_WORD_BOUND, "word-bound", ARG_NON },
6131  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6132  { OP_WORD_BEGIN, "word-begin", ARG_NON },
6133  { OP_WORD_END, "word-end", ARG_NON },
6134  { OP_ASCII_WORD, "ascii-word", ARG_NON },
6135  { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6136  { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6137  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6138  { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6139  { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6140  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6141  { OP_END_BUF, "end-buf", ARG_NON },
6142  { OP_BEGIN_LINE, "begin-line", ARG_NON },
6143  { OP_END_LINE, "end-line", ARG_NON },
6144  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6145  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6146  { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
6147  { OP_BACKREF1, "backref1", ARG_NON },
6148  { OP_BACKREF2, "backref2", ARG_NON },
6149  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6150  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6151  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6152  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6153  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6154  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6155  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6156  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6157  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6158  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6159  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6160  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6161  { OP_SET_OPTION, "set-option", ARG_OPTION },
6162  { OP_KEEP, "keep", ARG_NON },
6163  { OP_FAIL, "fail", ARG_NON },
6164  { OP_JUMP, "jump", ARG_RELADDR },
6165  { OP_PUSH, "push", ARG_RELADDR },
6166  { OP_POP, "pop", ARG_NON },
6167  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6168  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6169  { OP_REPEAT, "repeat", ARG_SPECIAL },
6170  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6171  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6172  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6173  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6174  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6175  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6176  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6177  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6178  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6179  { OP_PUSH_POS, "push-pos", ARG_NON },
6180  { OP_POP_POS, "pop-pos", ARG_NON },
6181  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6182  { OP_FAIL_POS, "fail-pos", ARG_NON },
6183  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6184  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6185  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6186  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6187  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6188  { OP_CALL, "call", ARG_ABSADDR },
6189  { OP_RETURN, "return", ARG_NON },
6190  { OP_CONDITION, "condition", ARG_SPECIAL },
6191  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6192  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6193  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6194  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6196  "state-check-anychar-ml*", ARG_STATE_CHECK },
6197  { -1, "", ARG_NON }
6198 };
6199 
6200 static const char*
6201 op2name(int opcode)
6202 {
6203  int i;
6204 
6205  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6206  if (opcode == OnigOpInfo[i].opcode)
6207  return OnigOpInfo[i].name;
6208  }
6209  return "";
6210 }
6211 
6212 static int
6213 op2arg_type(int opcode)
6214 {
6215  int i;
6216 
6217  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6218  if (opcode == OnigOpInfo[i].opcode)
6219  return OnigOpInfo[i].arg_type;
6220  }
6221  return ARG_SPECIAL;
6222 }
6223 
6224 static void
6225 Indent(FILE* f, int indent)
6226 {
6227  int i;
6228  for (i = 0; i < indent; i++) putc(' ', f);
6229 }
6230 
6231 static void
6232 p_string(FILE* f, int len, UChar* s)
6233 {
6234  fputs(":", f);
6235  while (len-- > 0) { fputc(*s++, f); }
6236 }
6237 
6238 static void
6239 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6240 {
6241  int x = len * mb_len;
6242 
6243  fprintf(f, ":%d:", len);
6244  while (x-- > 0) { fputc(*s++, f); }
6245 }
6246 
6247 extern void
6248 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6249  OnigEncoding enc)
6250 {
6251  int i, n, arg_type;
6252  RelAddrType addr;
6253  LengthType len;
6254  MemNumType mem;
6255  StateCheckNumType scn;
6256  OnigCodePoint code;
6257  UChar *q;
6258 
6259  fprintf(f, "[%s", op2name(*bp));
6260  arg_type = op2arg_type(*bp);
6261  if (arg_type != ARG_SPECIAL) {
6262  bp++;
6263  switch (arg_type) {
6264  case ARG_NON:
6265  break;
6266  case ARG_RELADDR:
6267  GET_RELADDR_INC(addr, bp);
6268  fprintf(f, ":(%d)", addr);
6269  break;
6270  case ARG_ABSADDR:
6271  GET_ABSADDR_INC(addr, bp);
6272  fprintf(f, ":(%d)", addr);
6273  break;
6274  case ARG_LENGTH:
6275  GET_LENGTH_INC(len, bp);
6276  fprintf(f, ":%d", len);
6277  break;
6278  case ARG_MEMNUM:
6279  mem = *((MemNumType* )bp);
6280  bp += SIZE_MEMNUM;
6281  fprintf(f, ":%d", mem);
6282  break;
6283  case ARG_OPTION:
6284  {
6285  OnigOptionType option = *((OnigOptionType* )bp);
6286  bp += SIZE_OPTION;
6287  fprintf(f, ":%d", option);
6288  }
6289  break;
6290 
6291  case ARG_STATE_CHECK:
6292  scn = *((StateCheckNumType* )bp);
6293  bp += SIZE_STATE_CHECK_NUM;
6294  fprintf(f, ":%d", scn);
6295  break;
6296  }
6297  }
6298  else {
6299  switch (*bp++) {
6300  case OP_EXACT1:
6303  p_string(f, 1, bp++); break;
6304  case OP_EXACT2:
6305  p_string(f, 2, bp); bp += 2; break;
6306  case OP_EXACT3:
6307  p_string(f, 3, bp); bp += 3; break;
6308  case OP_EXACT4:
6309  p_string(f, 4, bp); bp += 4; break;
6310  case OP_EXACT5:
6311  p_string(f, 5, bp); bp += 5; break;
6312  case OP_EXACTN:
6313  GET_LENGTH_INC(len, bp);
6314  p_len_string(f, len, 1, bp);
6315  bp += len;
6316  break;
6317 
6318  case OP_EXACTMB2N1:
6319  p_string(f, 2, bp); bp += 2; break;
6320  case OP_EXACTMB2N2:
6321  p_string(f, 4, bp); bp += 4; break;
6322  case OP_EXACTMB2N3:
6323  p_string(f, 6, bp); bp += 6; break;
6324  case OP_EXACTMB2N:
6325  GET_LENGTH_INC(len, bp);
6326  p_len_string(f, len, 2, bp);
6327  bp += len * 2;
6328  break;
6329  case OP_EXACTMB3N:
6330  GET_LENGTH_INC(len, bp);
6331  p_len_string(f, len, 3, bp);
6332  bp += len * 3;
6333  break;
6334  case OP_EXACTMBN:
6335  {
6336  int mb_len;
6337 
6338  GET_LENGTH_INC(mb_len, bp);
6339  GET_LENGTH_INC(len, bp);
6340  fprintf(f, ":%d:%d:", mb_len, len);
6341  n = len * mb_len;
6342  while (n-- > 0) { fputc(*bp++, f); }
6343  }
6344  break;
6345 
6346  case OP_EXACT1_IC:
6347  len = enclen(enc, bp, bpend);
6348  p_string(f, len, bp);
6349  bp += len;
6350  break;
6351  case OP_EXACTN_IC:
6352  GET_LENGTH_INC(len, bp);
6353  p_len_string(f, len, 1, bp);
6354  bp += len;
6355  break;
6356 
6357  case OP_CCLASS:
6358  n = bitset_on_num((BitSetRef )bp);
6359  bp += SIZE_BITSET;
6360  fprintf(f, ":%d", n);
6361  break;
6362 
6363  case OP_CCLASS_NOT:
6364  n = bitset_on_num((BitSetRef )bp);
6365  bp += SIZE_BITSET;
6366  fprintf(f, ":%d", n);
6367  break;
6368 
6369  case OP_CCLASS_MB:
6370  case OP_CCLASS_MB_NOT:
6371  GET_LENGTH_INC(len, bp);
6372  q = bp;
6373 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6374  ALIGNMENT_RIGHT(q);
6375 #endif
6376  GET_CODE_POINT(code, q);
6377  bp += len;
6378  fprintf(f, ":%d:%d", (int )code, len);
6379  break;
6380 
6381  case OP_CCLASS_MIX:
6382  case OP_CCLASS_MIX_NOT:
6383  n = bitset_on_num((BitSetRef )bp);
6384  bp += SIZE_BITSET;
6385  GET_LENGTH_INC(len, bp);
6386  q = bp;
6387 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6388  ALIGNMENT_RIGHT(q);
6389 #endif
6390  GET_CODE_POINT(code, q);
6391  bp += len;
6392  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6393  break;
6394 
6395  case OP_CCLASS_NODE:
6396  {
6397  CClassNode *cc;
6398 
6399  GET_POINTER_INC(cc, bp);
6400  n = bitset_on_num(cc->bs);
6401  fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
6402  }
6403  break;
6404 
6405  case OP_BACKREFN_IC:
6406  mem = *((MemNumType* )bp);
6407  bp += SIZE_MEMNUM;
6408  fprintf(f, ":%d", mem);
6409  break;
6410 
6411  case OP_BACKREF_MULTI_IC:
6412  case OP_BACKREF_MULTI:
6413  fputs(" ", f);
6414  GET_LENGTH_INC(len, bp);
6415  for (i = 0; i < len; i++) {
6416  GET_MEMNUM_INC(mem, bp);
6417  if (i > 0) fputs(", ", f);
6418  fprintf(f, "%d", mem);
6419  }
6420  break;
6421 
6422  case OP_BACKREF_WITH_LEVEL:
6423  {
6424  OnigOptionType option;
6425  LengthType level;
6426 
6427  GET_OPTION_INC(option, bp);
6428  fprintf(f, ":%d", option);
6429  GET_LENGTH_INC(level, bp);
6430  fprintf(f, ":%d", level);
6431 
6432  fputs(" ", f);
6433  GET_LENGTH_INC(len, bp);
6434  for (i = 0; i < len; i++) {
6435  GET_MEMNUM_INC(mem, bp);
6436  if (i > 0) fputs(", ", f);
6437  fprintf(f, "%d", mem);
6438  }
6439  }
6440  break;
6441 
6442  case OP_REPEAT:
6443  case OP_REPEAT_NG:
6444  {
6445  mem = *((MemNumType* )bp);
6446  bp += SIZE_MEMNUM;
6447  addr = *((RelAddrType* )bp);
6448  bp += SIZE_RELADDR;
6449  fprintf(f, ":%d:%d", mem, addr);
6450  }
6451  break;
6452 
6454  case OP_PUSH_IF_PEEK_NEXT:
6455  addr = *((RelAddrType* )bp);
6456  bp += SIZE_RELADDR;
6457  fprintf(f, ":(%d)", addr);
6458  p_string(f, 1, bp);
6459  bp += 1;
6460  break;
6461 
6462  case OP_LOOK_BEHIND:
6463  GET_LENGTH_INC(len, bp);
6464  fprintf(f, ":%d", len);
6465  break;
6466 
6468  GET_RELADDR_INC(addr, bp);
6469  GET_LENGTH_INC(len, bp);
6470  fprintf(f, ":%d:(%d)", len, addr);
6471  break;
6472 
6473  case OP_STATE_CHECK_PUSH:
6475  scn = *((StateCheckNumType* )bp);
6476  bp += SIZE_STATE_CHECK_NUM;
6477  addr = *((RelAddrType* )bp);
6478  bp += SIZE_RELADDR;
6479  fprintf(f, ":%d:(%d)", scn, addr);
6480  break;
6481 
6482  case OP_CONDITION:
6483  GET_MEMNUM_INC(mem, bp);
6484  GET_RELADDR_INC(addr, bp);
6485  fprintf(f, ":%d:(%d)", mem, addr);
6486  break;
6487 
6488  default:
6489  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6490  *--bp);
6491  }
6492  }
6493  fputs("]", f);
6494  if (nextp) *nextp = bp;
6495 }
6496 
6497 static void
6498 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6499 {
6500  int ncode;
6501  UChar* bp = reg->p;
6502  UChar* end = reg->p + reg->used;
6503 
6504  fprintf(f, "code length: %d", reg->used);
6505 
6506  ncode = -1;
6507  while (bp < end) {
6508  ncode++;
6509  if (ncode % 5 == 0)
6510  fprintf(f, "\n%ld:", bp - reg->p);
6511  else
6512  fprintf(f, " %ld:", bp - reg->p);
6513  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6514  }
6515 
6516  fprintf(f, "\n");
6517 }
6518 
6519 static void
6520 print_indent_tree(FILE* f, Node* node, int indent)
6521 {
6522  int i, type, container_p = 0;
6523  int add = 3;
6524  UChar* p;
6525 
6526  Indent(f, indent);
6527  if (IS_NULL(node)) {
6528  fprintf(f, "ERROR: null node!!!\n");
6529  exit (0);
6530  }
6531 
6532  type = NTYPE(node);
6533  switch (type) {
6534  case NT_LIST:
6535  case NT_ALT:
6536  if (NTYPE(node) == NT_LIST)
6537  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
6538  else
6539  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
6540 
6541  print_indent_tree(f, NCAR(node), indent + add);
6542  while (IS_NOT_NULL(node = NCDR(node))) {
6543  if (NTYPE(node) != type) {
6544  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6545  exit(0);
6546  }
6547  print_indent_tree(f, NCAR(node), indent + add);
6548  }
6549  break;
6550 
6551  case NT_STR:
6552  fprintf(f, "<string%s:%"PRIxPTR">",
6553  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
6554  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6555  if (*p >= 0x20 && *p < 0x7f)
6556  fputc(*p, f);
6557  else {
6558  fprintf(f, " 0x%02x", *p);
6559  }
6560  }
6561  break;
6562 
6563  case NT_CCLASS:
6564  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
6565  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6566  if (NCCLASS(node)->mbuf) {
6567  BBuf* bbuf = NCCLASS(node)->mbuf;
6568  for (i = 0; i < (int )bbuf->used; i++) {
6569  if (i > 0) fprintf(f, ",");
6570  fprintf(f, "%0x", bbuf->p[i]);
6571  }
6572  }
6573  break;
6574 
6575  case NT_CTYPE:
6576  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
6577  switch (NCTYPE(node)->ctype) {
6578  case ONIGENC_CTYPE_WORD:
6579  if (NCTYPE(node)->not != 0)
6580  fputs("not word", f);
6581  else
6582  fputs("word", f);
6583  break;
6584 
6585  default:
6586  fprintf(f, "ERROR: undefined ctype.\n");
6587  exit(0);
6588  }
6589  break;
6590 
6591  case NT_CANY:
6592  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
6593  break;
6594 
6595  case NT_ANCHOR:
6596  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
6597  switch (NANCHOR(node)->type) {
6598  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6599  case ANCHOR_END_BUF: fputs("end buf", f); break;
6600  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6601  case ANCHOR_END_LINE: fputs("end line", f); break;
6602  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6603  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6604  case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
6605 
6606  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6607  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6608 #ifdef USE_WORD_BEGIN_END
6609  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6610  case ANCHOR_WORD_END: fputs("word end", f); break;
6611 #endif
6612  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6613  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6614  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6615  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6616  case ANCHOR_KEEP: fputs("keep",f); break;
6617 
6618  default:
6619  fprintf(f, "ERROR: undefined anchor type.\n");
6620  break;
6621  }
6622  break;
6623 
6624  case NT_BREF:
6625  {
6626  int* p;
6627  BRefNode* br = NBREF(node);
6628  p = BACKREFS_P(br);
6629  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
6630  for (i = 0; i < br->back_num; i++) {
6631  if (i > 0) fputs(", ", f);
6632  fprintf(f, "%d", p[i]);
6633  }
6634  }
6635  break;
6636 
6637 #ifdef USE_SUBEXP_CALL
6638  case NT_CALL:
6639  {
6640  CallNode* cn = NCALL(node);
6641  fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
6642  p_string(f, cn->name_end - cn->name, cn->name);
6643  }
6644  break;
6645 #endif
6646 
6647  case NT_QTFR:
6648  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
6649  NQTFR(node)->lower, NQTFR(node)->upper,
6650  (NQTFR(node)->greedy ? "" : "?"));
6651  print_indent_tree(f, NQTFR(node)->target, indent + add);
6652  break;
6653 
6654  case NT_ENCLOSE:
6655  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
6656  switch (NENCLOSE(node)->type) {
6657  case ENCLOSE_OPTION:
6658  fprintf(f, "option:%d", NENCLOSE(node)->option);
6659  break;
6660  case ENCLOSE_MEMORY:
6661  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6662  break;
6664  fprintf(f, "stop-bt");
6665  break;
6666  case ENCLOSE_CONDITION:
6667  fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6668  break;
6669 
6670  default:
6671  break;
6672  }
6673  fprintf(f, "\n");
6674  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6675  break;
6676 
6677  default:
6678  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6679  break;
6680  }
6681 
6682  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6683  type != NT_ENCLOSE)
6684  fprintf(f, "\n");
6685 
6686  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6687 
6688  fflush(f);
6689 }
6690 #endif /* ONIG_DEBUG */
6691 
6692 #ifdef ONIG_DEBUG_PARSE_TREE
6693 static void
6694 print_tree(FILE* f, Node* node)
6695 {
6696  print_indent_tree(f, node, 0);
6697 }
6698 #endif
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:691
void onig_transfer(regex_t *to, regex_t *from)
Definition: regcomp.c:5627
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:517
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:696
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:382
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:108
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:337
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:144
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6313
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5944
static int add_bitset(regex_t *reg, BitSetRef bs)
Definition: regcomp.c:298
static void concat_opt_exact_info_str(OptExactInfo *to, UChar *s, UChar *end, int raw ARG_UNUSED, OnigEncoding enc)
Definition: regcomp.c:4600
int is_refered
Definition: regparse.h:186
#define ONIGERR_INVALID_CONDITION_PATTERN
Definition: oniguruma.h:564
unsigned int OnigCodePoint
Definition: oniguruma.h:114
#define IS_NULL(p)
Definition: regint.h:278
#define ENCLOSE_MEMORY
Definition: regparse.h:92
#define ONIGENC_CASE_FOLD_DEFAULT
Definition: oniguruma.h:131
unsigned int alloc
Definition: regint.h:423
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5958
#define OPT_EXACT_MAXLEN
Definition: regcomp.c:4294
int * int_map_backward
Definition: oniguruma.h:696
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:388
UChar * end
Definition: regparse.h:170
int onig_free_node_list(void)
Definition: regparse.c:1112
#define ALLOWED_ANCHOR_IN_LB_NOT
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Definition: oniguruma.h:584
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1445
int regnum
Definition: regparse.h:196
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1261
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:326
static void alt_merge_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OptEnv *env)
Definition: regcomp.c:4617
static int get_char_length_tree(Node *node, regex_t *reg, int *len)
Definition: regcomp.c:2512
static void concat_opt_anc_info(OptAncInfo *to, OptAncInfo *left, OptAncInfo *right, OnigDistance left_len, OnigDistance right_len)
Definition: regcomp.c:4484
struct _Node * target
Definition: regparse.h:226
#define ONIGENC_IS_CODE_WORD(enc, code)
Definition: oniguruma.h:302
#define NSTRING_IS_RAW(node)
Definition: regparse.h:111
void onig_print_compiled_byte_code(FILE *f, UChar *bp, UChar *bpend, UChar **nextp, OnigEncoding enc)
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:301
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
Definition: oniguruma.h:135
int i
Definition: win32ole.c:784
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:340
#define ONIG_OPTION_SINGLELINE
Definition: oniguruma.h:359
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:323
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define ANCHOR_END_BUF
Definition: regint.h:503
static int compile_length_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:975
#define SIZE_OPCODE
Definition: regint.h:647
static int max(int a, int b)
Definition: strftime.c:141
#define BBUF_ADD1(buf, byte)
Definition: regint.h:465
#define d1
if(dispIdMember==DISPID_VALUE)
Definition: win32ole.c:791
#define ANCHOR_WORD_BEGIN
Definition: regint.h:509
#define SINGLE_BYTE_SIZE
Definition: regint.h:392
#define SIZE_MEMNUM
Definition: regint.h:651
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:146
#define SIZE_OP_REPEAT_INC
Definition: regint.h:684
int num_call
Definition: regparse.h:301
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:112
static int compile_call(CallNode *node, regex_t *reg)
Definition: regcomp.c:401
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:450
static int add_multi_byte_cclass(BBuf *mbuf, regex_t *reg)
Definition: regcomp.c:563
static int get_char_length_tree1(Node *node, regex_t *reg, int *len, int level)
Definition: regcomp.c:2388
MinMaxLen len
Definition: regcomp.c:4333
OnigUChar * par_end
Definition: oniguruma.h:637
unsigned int OnigCaseFoldType
Definition: oniguruma.h:121
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
Definition: oniguruma.h:234
static void swap_node(Node *a, Node *b)
Definition: regcomp.c:71
int sourceline
Definition: regparse.h:320
unsigned int bt_mem_end
Definition: oniguruma.h:674
#define REGEX_CHAIN_HEAD(reg)
Definition: regcomp.c:5634
#define NST_CALLED
Definition: regparse.h:133
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:150
static int comp_distance_value(MinMaxLen *d1, MinMaxLen *d2, int v1, int v2)
Definition: regcomp.c:4398
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:283
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5596
#define THREAD_SYSTEM_END
Definition: regint.h:117
static int add_bytes(regex_t *reg, UChar *bytes, OnigDistance len)
Definition: regcomp.c:291
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:844
int onig_names_free(regex_t *reg)
Definition: regparse.c:486
#define ANCHOR_BEGIN_LINE
Definition: regint.h:501
OnigPosition * end
Definition: oniguruma.h:618
const int id
Definition: nkf.c:209
#define SIZE_OP_CALL
Definition: regint.h:706
static void select_opt_exact_info(OnigEncoding enc, OptExactInfo *now, OptExactInfo *alt)
Definition: regcomp.c:4656
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6028
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:735
int nest_level
Definition: regparse.h:238
UChar * error
Definition: regparse.h:298
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1481
MinMaxLen mmd
Definition: regcomp.c:4325
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
#define SIZE_OP_POP_POS
Definition: regint.h:688
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:767
#define ONIGENC_IS_CODE_PRINT(enc, code)
Definition: oniguruma.h:280
int reach_end
Definition: regcomp.c:4318
static void concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo *to, NodeOptInfo *add)
Definition: regcomp.c:4836
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2383
static OnigDistance distance_add(OnigDistance d1, OnigDistance d2)
Definition: regcomp.c:96
#define NENCLOSE(node)
Definition: regparse.h:79
static int subexp_recursive_check(Node *node)
Definition: regcomp.c:2994
OptExactInfo exm
Definition: regcomp.c:4337
static int compile_length_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:506
int target_empty_info
Definition: regparse.h:183
UChar * s
Definition: regparse.h:169
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:149
OnigDistance anchor_dmax
Definition: oniguruma.h:690
#define ANCHOR_END_LINE
Definition: regint.h:505
#define ANCHOR_PREC_READ
Definition: regint.h:511
int opt_count
Definition: regparse.h:204
UnsetAddrList * unset_addr_list
Definition: regparse.h:303
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:161
#define NT_QTFR
Definition: regparse.h:45
#define SIZE_ABSADDR
Definition: regint.h:649
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
Definition: oniguruma.h:593
static int map_position_value(OnigEncoding enc, int i)
Definition: regcomp.c:4345
struct _Node * next_head_exact
Definition: regparse.h:185
int onigenc_strlen(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:123
static void set_optimize_map_info(regex_t *reg, OptMapInfo *m)
Definition: regcomp.c:5315
#define CKN_ON
Definition: regcomp.c:736
#define GET_CODE_POINT(code, p)
Definition: regint.h:669
#define ONIGERR_NEVER_ENDING_RECURSION
Definition: oniguruma.h:585
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1279
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:121
void * PointerType
Definition: regint.h:645
int rb_const_defined(VALUE, ID)
Definition: variable.c:2103
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: oniguruma.h:694
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:158
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:347
#define ALLOWED_ANCHOR_IN_LB
static int subexp_recursive_check_trav(Node *node, ScanEnv *env)
Definition: regcomp.c:3050
#define ONIGENC_CODE_TO_MBC_MAXLEN
Definition: oniguruma.h:191
#define REPEAT_RANGE_ALLOC
static int optimize_node_left(Node *node, NodeOptInfo *opt, OptEnv *env)
Definition: regcomp.c:4910
int onig_init(void)
Definition: regcomp.c:5981
static int compile_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:515
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:326
#define NST_MEM_BACKREFED
Definition: regparse.h:130
Definition: regint.h:599
static int add_opcode_rel_addr(regex_t *reg, int opcode, int addr)
Definition: regcomp.c:280
#define BACKREFS_P(br)
Definition: regparse.h:116
#define GET_OPTION_INC(option, p)
Definition: regint.h:664
#define NST_RECURSION
Definition: regparse.h:132
#define ONIGERR_TYPE_BUG
Definition: oniguruma.h:531
unsigned char * p
Definition: oniguruma.h:662
#define GET_LENGTH_INC(len, p)
Definition: regint.h:661
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:502
UnsetAddr * us
Definition: regparse.h:217
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
Definition: oniguruma.h:500
#define ONIG_STATE_MODIFY
Definition: oniguruma.h:655
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4305
void onig_chain_link_add(regex_t *to, regex_t *add)
Definition: regcomp.c:5641
#define RECURSION_INFINITE
Definition: regcomp.c:2852
regex_t * reg
Definition: regparse.h:300
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
Definition: oniguruma.h:562
#define NST_ADDR_FIXED
Definition: regparse.h:134
OptAncInfo anc
Definition: regcomp.c:4316
int left_anchor
Definition: regcomp.c:4310
unsigned int OnigOptionType
Definition: oniguruma.h:349
OnigDistance anchor_dmin
Definition: oniguruma.h:689
static int set_optimize_exact_info(regex_t *reg, OptExactInfo *e)
Definition: regcomp.c:5259
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:173
#define ONIGENC_CTYPE_WORD
Definition: oniguruma.h:208
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:508
int num_named
Definition: regparse.h:307
unsigned int bt_mem_start
Definition: oniguruma.h:673
MinMaxLen mmd
Definition: regcomp.c:4315
int char_len
Definition: regparse.h:203
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
Definition: oniguruma.h:242
unsigned int BitStatusType
Definition: regint.h:332
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:336
unsigned char * exact
Definition: oniguruma.h:692
OptAncInfo anc
Definition: regcomp.c:4335
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:678
static int add_opcode_option(regex_t *reg, int opcode, OnigOptionType option)
Definition: regcomp.c:305
#define SIZE_OP_FAIL
Definition: regint.h:692
static int compile_tree_empty_check(Node *node, regex_t *reg, int empty_info)
Definition: regcomp.c:369
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:652
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:151
#define NBREF(node)
Definition: regparse.h:77
#define NT_ALT
Definition: regparse.h:49
#define IN_ALT
Definition: regcomp.c:3834
#define head
Definition: st.c:107
static void unset_addr_list_end(UnsetAddrList *uslist)
Definition: regcomp.c:181
#define ALLOWED_ENCLOSE_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5570
#define COMPILE_INIT_SIZE
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:335
#define SIZE_OP_MEMORY_START
Definition: regint.h:693
UnsetAddrList * unset_addr_list
Definition: regparse.h:227
#define SIZE_OPTION
Definition: regint.h:654
static int compile_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:616
struct _Node * target
Definition: regparse.h:244
int group_num
Definition: regparse.h:223
static int setup_tree(Node *node, regex_t *reg, int state, ScanEnv *env)
Definition: regcomp.c:3849
static int compile_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:1040
#define NCAR(node)
Definition: regparse.h:84
#define COMP_EM_BASE
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define xalloca
Definition: regint.h:189
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5675
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:699
Definition: regint.h:420
struct _Node * head_exact
Definition: regparse.h:184
static int add_option(regex_t *reg, OnigOptionType option)
Definition: regcomp.c:273
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define IS_IGNORECASE(option)
Definition: regint.h:363
static int numbered_ref_check(Node *node)
Definition: regcomp.c:1980
#define SIZE_OP_CONDITION
Definition: regint.h:708
void onig_chain_reduce(regex_t *reg)
Definition: regcomp.c:5650
BitSet bs
Definition: regint.h:779
static int is_full_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4548
int state
Definition: regparse.h:234
OnigRepeatRange * repeat_range
Definition: oniguruma.h:677
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:662
static int set_optimize_info_from_tree(Node *node, regex_t *reg, ScanEnv *scan_env)
Definition: regcomp.c:5343
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:318
static int add_rel_addr(regex_t *reg, int addr)
Definition: regcomp.c:228
int RelAddrType
Definition: regint.h:639
#define STACK_POP_LEVEL_ALL
Definition: regint.h:319
const char * sourcefile
Definition: regparse.h:319
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:329
static int quantifiers_memory_node_info(Node *node)
Definition: regcomp.c:2073
static int compile_length_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1168
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:148
#define THREAD_ATOMIC_END
Definition: regint.h:119
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:701
static void add_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4442
#define putc(_c, _stream)
Definition: win32.h:125
#define IS_NCCLASS_SHARE(nd)
Definition: regint.h:768
#define ONIG_IS_OPTION_ON(options, option)
Definition: oniguruma.h:379
static int compile_length_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1567
static int check_type_tree(Node *node, int type_mask, int enclose_mask, int anchor_mask)
Definition: regcomp.c:2801
int capa
Definition: regparse.h:172
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2384
#define RECURSION_EXIST
Definition: regcomp.c:2851
OnigOptionType options
Definition: regcomp.c:4304
#define SIZE_OP_POP
Definition: regint.h:681
OnigCaseFoldType case_fold_flag
Definition: oniguruma.h:682
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4907
static void set_mml(MinMaxLen *mml, OnigDistance min, OnigDistance max)
Definition: regcomp.c:4422
OnigDistance min_len
Definition: regparse.h:201
int AbsAddrType
Definition: regint.h:640
#define ONIG_CHAR_TABLE_SIZE
Definition: oniguruma.h:649
int onigenc_init(void)
Definition: regenc.c:36
#define NULL_NODE
Definition: regparse.h:280
static void clear_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4554
#define ONIG_OPTION_NEGATE_SINGLELINE
Definition: oniguruma.h:362
#define level
#define BBUF_INIT(buf, size)
Definition: regint.h:426
#define ONIG_STATE_COMPILING
Definition: oniguruma.h:654
static int disable_noname_group_capture(Node **root, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:2011
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:141
const OnigSyntaxType * syntax
Definition: regparse.h:291
#define ONIG_STATE_NORMAL
Definition: oniguruma.h:652
static void copy_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4435
#define ARG_UNUSED
Definition: nkf.h:181
#define IS_MULTILINE(option)
Definition: regint.h:362
static void alt_merge_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4458
static int expand_case_fold_string(Node *node, regex_t *reg)
Definition: regcomp.c:3545
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:309
static int is_equal_mml(MinMaxLen *a, MinMaxLen *b)
Definition: regcomp.c:4415
#define ONIGERR_INVALID_BACKREF
Definition: oniguruma.h:574
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:384
static int divide_look_behind_alternatives(Node *node)
Definition: regcomp.c:3238
OptAncInfo anc
Definition: regcomp.c:4326
#define add(x, y)
Definition: date_strftime.c:23
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:704
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:122
#define BITSET_AT(bs, pos)
Definition: regint.h:414
int onig_free_shared_cclass_table(void)
Definition: regparse.c:5404
int * back_dynamic
Definition: regparse.h:237
int lower
Definition: regparse.h:180
#define NCCLASS(node)
Definition: regparse.h:75
OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: regcomp.c:33
#define xmemcpy
Definition: regint.h:182
static int distance_value(MinMaxLen *mm)
Definition: regcomp.c:4369
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:281
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1416
#define NCTYPE(node)
Definition: regparse.h:76
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5898
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:327
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:695
#define SIZE_OP_RETURN
Definition: regint.h:707
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:120
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5611
static void clear_node_opt_info(NodeOptInfo *opt)
Definition: regcomp.c:4819
static int compile_tree_n_times(Node *node, int n, regex_t *reg)
Definition: regcomp.c:416
static void alt_merge_opt_anc_info(OptAncInfo *to, OptAncInfo *add)
Definition: regcomp.c:4541
static int setup_subexp_call(Node *node, ScanEnv *env)
Definition: regcomp.c:3119
short int StateCheckNumType
Definition: regint.h:644
#define NT_LIST
Definition: regparse.h:48
struct re_pattern_buffer * chain
Definition: oniguruma.h:701
static void clear_optimize_info(regex_t *reg)
Definition: regcomp.c:5400
int err
Definition: win32.c:87
#define NT_ANCHOR
Definition: regparse.h:47
#define ANCHOR_WORD_END
Definition: regint.h:510
#define IN_REPEAT
Definition: regcomp.c:3836
#define FOUND_CALLED_NODE
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:677
#define ALLOWED_TYPE_IN_LB
static int unset_addr_list_init(UnsetAddrList *uslist, int size)
Definition: regcomp.c:168
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:303
#define SIZE_LENGTH
Definition: regint.h:650
int upper
Definition: regparse.h:181
#define NST_CLEN_FIXED
Definition: regparse.h:127
static int compile_length_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:467
static void copy_opt_anc_info(OptAncInfo *to, OptAncInfo *from)
Definition: regcomp.c:4478
static int update_string_node_case_fold(regex_t *reg, Node *node)
Definition: regcomp.c:3359
#define ONIGERR_UNDEFINED_NAME_REFERENCE
Definition: oniguruma.h:581
#define EXPAND_STRING_MAX_LENGTH
static int set_bm_skip(UChar *s, UChar *end, regex_t *reg, UChar skip[], int **int_skip, int ignore_case)
Definition: regcomp.c:4224
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
Definition: oniguruma.h:269
#define SIZE_OP_MEMORY_END
Definition: regint.h:697
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:328
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:694
#define NT_STR
Definition: regparse.h:40
int type
Definition: regparse.h:243
#define NQTFR(node)
Definition: regparse.h:78
static void alt_merge_node_opt_info(NodeOptInfo *to, NodeOptInfo *add, OptEnv *env)
Definition: regcomp.c:4895
#define SIZE_BITSET
Definition: regint.h:404
static int noname_disable_map(Node **plink, GroupNumRemap *map, int *counter)
Definition: regcomp.c:1835
#define THREAD_ATOMIC_START
Definition: regint.h:118
#define ONIGENC_IS_MBC_WORD(enc, s, end)
Definition: oniguruma.h:224
#define ONIGENC_CASE_FOLD_MIN
Definition: oniguruma.h:130
#define TRUE
Definition: nkf.h:175
struct _Node * target
Definition: regparse.h:211
static int compile_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1467
MinMaxLen mmd
Definition: regcomp.c:4302
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:155
Bits * BitSetRef
Definition: regint.h:402
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:683
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
Definition: oniguruma.h:192
OnigDistance max_len
Definition: regparse.h:202
unsigned int flag
Definition: regparse.h:171
#define NST_MARK2
Definition: regparse.h:129
#define NT_CANY
Definition: regparse.h:43
UChar * name_end
Definition: regparse.h:225
OnigEncoding enc
Definition: regcomp.c:4303
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:848
BBuf * mbuf
Definition: regint.h:780
int LengthType
Definition: regint.h:641
OptMapInfo map
Definition: regcomp.c:4340
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:90
int right_anchor
Definition: regcomp.c:4311
static int get_max_match_length(Node *node, OnigDistance *max, ScanEnv *env)
Definition: regcomp.c:2266
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:162
static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, int slen, UChar *end, regex_t *reg, Node **rnode)
Definition: regcomp.c:3427
static int add_compile_string_length(UChar *s ARG_UNUSED, int mb_len, OnigDistance str_len, regex_t *reg ARG_UNUSED, int ignore_case)
Definition: regcomp.c:428
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6072
unsigned int uintptr_t
Definition: win32.h:94
#define SIZE_OP_JUMP
Definition: regint.h:679
#define NTYPE(node)
Definition: regparse.h:71
static int is_anychar_star_quantifier(QtfrNode *qn)
Definition: regcomp.c:726
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:514
int state
Definition: regparse.h:178
int type
Definition: tcltklib.c:111
BitStatusType backrefed_mem
Definition: regparse.h:295
static int options(unsigned char *cp)
Definition: nkf.c:6355
#define P_(args)
Definition: oniguruma.h:71
int ignore_case
Definition: regcomp.c:4319
const char * name
Definition: oniguruma.h:162
static int renumber_node_backref(Node *node, GroupNumRemap *map)
Definition: regcomp.c:1903
#define NCALL(node)
Definition: regparse.h:82
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:702
static int compile_length_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1434
OnigEncoding enc
Definition: oniguruma.h:635
static int expand_case_fold_make_rem_string(Node **rnode, UChar *s, UChar *end, regex_t *reg)
Definition: regcomp.c:3405
RUBY_EXTERN VALUE rb_cThread
Definition: ruby.h:1459
int num_mem
Definition: regparse.h:305
static void alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo *to, OptMapInfo *add)
Definition: regcomp.c:4784
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
Definition: oniguruma.h:270
OnigDistance max
Definition: regcomp.c:4298
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1255
int intptr_t
Definition: win32.h:86
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:113
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:145
static int add_pointer(regex_t *reg, void *addr)
Definition: regcomp.c:264
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:160
static int is_set_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4515
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
Definition: oniguruma.h:226
static Node * get_head_value_node(Node *node, int exact, regex_t *reg)
Definition: regcomp.c:2713
#define NT_CCLASS
Definition: regparse.h:41
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:700
static int setup_look_behind(Node *node, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:3268
#define ONIGERR_PARSER_BUG
Definition: oniguruma.h:532
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:504
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:322
unsigned int used
Definition: oniguruma.h:663
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1548
OnigPosition * beg
Definition: oniguruma.h:617
Definition: regint.h:524
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:147
#define ONIG_INFINITE_DISTANCE
Definition: oniguruma.h:119
UChar * error_end
Definition: regparse.h:299
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:705
Node ** mem_nodes_dynamic
Definition: regparse.h:311
static void set_sub_anchor(regex_t *reg, OptAncInfo *anc)
Definition: regcomp.c:5332
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4321
#define ONIG_OPTION_CAPTURE_GROUP
Definition: oniguruma.h:364
static int add_opcode(regex_t *reg, int opcode)
Definition: regcomp.c:210
#define ONIGENC_MBC_TO_CODE(enc, p, end)
Definition: oniguruma.h:268
static void copy_node_opt_info(NodeOptInfo *to, NodeOptInfo *from)
Definition: regcomp.c:4830
BitStatusType bt_mem_end
Definition: regparse.h:294
#define ONIGENC_MBC_MINLEN(enc)
Definition: oniguruma.h:266
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:94
void xfree(void *)
#define USE_SUBEXP_CALL
Definition: regint.h:60
static int is_left_anchor(int anc)
Definition: regcomp.c:4504
#define UChar
Definition: oniguruma.h:110
OnigEncoding enc
Definition: regparse.h:290
void onig_node_free(Node *node)
Definition: regparse.c:1029
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
Definition: oniguruma.h:575
#define NANCHOR(node)
Definition: regparse.h:80
static int subexp_inf_recursive_check(Node *node, ScanEnv *env, int head)
Definition: regcomp.c:2855
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:131
static int add_char_amb_opt_map_info(OptMapInfo *map, UChar *p, UChar *end, OnigEncoding enc, OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:4730
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:516
#define NT_BREF
Definition: regparse.h:44
#define SIZE_RELADDR
Definition: regint.h:648
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]
Definition: regcomp.c:50
OnigDistance min
Definition: regcomp.c:4297
#define NST_IN_REPEAT
Definition: regparse.h:137
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:319
static int compile_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:554
#define STACK_POP_LEVEL_FREE
Definition: regint.h:317
static void add_char_opt_map_info(OptMapInfo *map, UChar c, OnigEncoding enc)
Definition: regcomp.c:4721
#define CHECK_NULL_RETURN(p)
Definition: regint.h:280
#define SIZE_OP_FAIL_POS
Definition: regint.h:689
unsigned char * exact_end
Definition: oniguruma.h:693
static int add_length(regex_t *reg, OnigDistance len)
Definition: regcomp.c:246
OnigDistance dmax
Definition: oniguruma.h:698
static void copy_opt_map_info(OptMapInfo *to, OptMapInfo *from)
Definition: regcomp.c:4715
static void clear_opt_anc_info(OptAncInfo *anc)
Definition: regcomp.c:4471
int size
Definition: encoding.c:52
#define ONIGENC_IS_UNDEF(enc)
Definition: oniguruma.h:219
#define f
static int compile_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1660
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
Definition: oniguruma.h:236
unsigned int used
Definition: regint.h:422
#define SIZE_OP_SET_OPTION
Definition: regint.h:690
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
Definition: oniguruma.h:497
OnigRegexType regex_t
Definition: oniguruma.h:707
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:659
unsigned int alloc
Definition: oniguruma.h:664
#define NST_MARK1
Definition: regparse.h:128
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
Definition: oniguruma.h:600
#define xmalloc
Definition: defines.h:64
static int onig_inited
Definition: regcomp.c:5895
static int add_compile_string(UChar *s, int mb_len, OnigDistance str_len, regex_t *reg, int ignore_case)
Definition: regcomp.c:445
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:512
#define ENCLOSE_CONDITION
Definition: regparse.h:95
size_t OnigDistance
Definition: oniguruma.h:116
#define IN_NOT
Definition: regcomp.c:3835
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2269
#define ENCLOSE_OPTION
Definition: regparse.h:93
static int is_not_included(Node *x, Node *y, regex_t *reg)
Definition: regcomp.c:2519
#define NST_MIN_FIXED
Definition: regparse.h:125
unsigned int capture_history
Definition: oniguruma.h:672
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:152
#define ONIGERR_MEMORY
Definition: oniguruma.h:530
OptExactInfo exb
Definition: regcomp.c:4336
#define IS_FIND_CONDITION(option)
Definition: regint.h:367
#define THREAD_SYSTEM_INIT
Definition: regint.h:116
UChar * p
Definition: regint.h:421
static int renumber_by_map(Node *node, GroupNumRemap *map)
Definition: regcomp.c:1931
static OnigDistance distance_multiply(OnigDistance d, int m)
Definition: regcomp.c:107
#define PRIuSIZE
Definition: ruby.h:189
#define NSTR(node)
Definition: regparse.h:74
v
Definition: win32ole.c:798
static void clear_opt_map_info(OptMapInfo *map)
Definition: regcomp.c:4687
static void select_opt_map_info(OptMapInfo *now, OptMapInfo *alt)
Definition: regcomp.c:4752
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:698
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]
Definition: oniguruma.h:146
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:89
OptExactInfo expr
Definition: regcomp.c:4338
static void concat_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OnigEncoding enc)
Definition: regcomp.c:4571
static void set_bound_node_opt_info(NodeOptInfo *opt, MinMaxLen *mmd)
Definition: regcomp.c:4811
OnigDistance dmin
Definition: oniguruma.h:697
#define ONIGENC_MBC_MAXLEN_DIST(enc)
Definition: oniguruma.h:265
static int get_min_match_length(Node *node, OnigDistance *min, ScanEnv *env)
Definition: regcomp.c:2142
OnigOptionType option
Definition: regparse.h:288
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:142
static int compile_range_repeat_node(QtfrNode *qn, int target_len, int empty_info, regex_t *reg)
Definition: regcomp.c:690
static int compile_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1300
struct _Node * target
Definition: regparse.h:179
#define NTYPE2BIT(type)
Definition: regparse.h:53
#define SIZE_POINTER
Definition: regint.h:656
int greedy
Definition: regparse.h:182
#define ONIG_OPTION_DONT_CAPTURE_GROUP
Definition: oniguruma.h:363
#define ONIGERR_INVALID_ARGUMENT
Definition: oniguruma.h:540
int value
Definition: regcomp.c:4328
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6049
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6286
#define NT_CALL
Definition: regparse.h:50
#define xrealloc
Definition: defines.h:67
#define SIZE_OP_PUSH_POS
Definition: regint.h:686
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
Definition: oniguruma.h:537
#define NT_CTYPE
Definition: regparse.h:42
#define ONIG_OPTION_IGNORECASE
Definition: oniguruma.h:355
#define NST_MAX_FIXED
Definition: regparse.h:126
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:325
static int compile_length_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:586
#define ANCHOR_BEGIN_BUF
Definition: regint.h:500
#define IN_VAR_REPEAT
Definition: regcomp.c:3837
OnigEncoding enc
Definition: oniguruma.h:679
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:682
static void copy_opt_env(OptEnv *to, OptEnv *from)
Definition: regcomp.c:4465
static void copy_opt_exact_info(OptExactInfo *to, OptExactInfo *from)
Definition: regcomp.c:4565
#define IS_NOT_NULL(p)
Definition: regint.h:279
static int entry_repeat_range(regex_t *reg, int id, int lower, int upper)
Definition: regcomp.c:659
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:67
int onig_end(void)
Definition: regcomp.c:6004
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:513
short int MemNumType
Definition: regint.h:643
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:148
static int add_abs_addr(regex_t *reg, int addr)
Definition: regcomp.c:237
static int add_mem_num(regex_t *reg, int num)
Definition: regcomp.c:255
#define GET_POINTER_INC(ptr, p)
Definition: regint.h:665
#define NSTRING_LEN(node)
Definition: regparse.h:105
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:154
static void clear_mml(MinMaxLen *mml)
Definition: regcomp.c:4429
int char_len
Definition: regparse.h:245
UChar * name
Definition: regparse.h:224
#define rb_intern_const(str)
Definition: ruby.h:1332
struct _Node * target
Definition: regparse.h:198
static void remove_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4532
static int next_setup(Node *node, Node *next_node, int in_root, regex_t *reg)
Definition: regcomp.c:3289
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4329
BitStatusType bt_mem_start
Definition: regparse.h:293
static int subexp_inf_recursive_check_trav(Node *node, ScanEnv *env)
Definition: regcomp.c:2939
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:109
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:687
ScanEnv * scan_env
Definition: regcomp.c:4306
#define ANCHOR_WORD_BOUND
Definition: regint.h:507
static int comp_opt_exact_or_map_info(OptExactInfo *e, OptMapInfo *m)
Definition: regcomp.c:4771
int ascii_range
Definition: regparse.h:246
OnigOptionType option
Definition: regparse.h:197
#define ANCHOR_KEEP
Definition: regint.h:519
AbsAddrType call_addr
Definition: regparse.h:199
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:324
OnigOptionType options
Definition: oniguruma.h:680
#define env
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:572
#define NULL
Definition: _sdbm.c:102
#define SIZE_OP_PUSH
Definition: regint.h:680
#define IN_ROOT
Definition: regcomp.c:3838
void onig_free(regex_t *reg)
Definition: regcomp.c:5587
static int compile_length_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1214
int back_num
Definition: regparse.h:235
#define NCDR(node)
Definition: regparse.h:85
static int unset_addr_list_add(UnsetAddrList *uslist, int offset, struct _Node *node)
Definition: regcomp.c:188
OnigOptionType options
Definition: oniguruma.h:386
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:660
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
Definition: oniguruma.h:496
static int compile_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1188
#define SET_CALL_RECURSION(node)
Definition: regparse.h:157
#define bp()
Definition: vm_debug.h:27
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:467
#define SET_NTYPE(node, ntype)
Definition: regparse.h:72
static int unset_addr_list_fix(UnsetAddrList *uslist, regex_t *reg)
Definition: regcomp.c:2053
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5619
int offset
Definition: regparse.h:210
static int bitset_is_empty(BitSetRef bs)
Definition: regcomp.c:118
#define NT_ENCLOSE
Definition: regparse.h:46
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
static void add_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4523
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
Definition: oniguruma.h:582
#define ONIG_STATE(reg)
Definition: oniguruma.h:657
OnigUChar * par
Definition: oniguruma.h:636
#define enclen(enc, p, e)
Definition: regenc.h:78
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:466
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:703
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:464
static int select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
Definition: regcomp.c:324
BitStatusType capture_history
Definition: regparse.h:292
#define ONIGENC_MBC_MAXLEN(enc)
Definition: oniguruma.h:264
#define BITSET_SIZE
Definition: regint.h:394
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:236
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1291