Ruby  2.0.0p247(2013-06-27revision41674)
regcomp.c
Go to the documentation of this file.
1 /**********************************************************************
2  regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
34 
35 extern OnigCaseFoldType
37 {
39 }
40 
41 extern int
43 {
44  OnigDefaultCaseFoldFlag = case_fold_flag;
45  return 0;
46 }
47 
48 
49 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50 static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51 #endif
52 
53 #if 0
54 static UChar*
55 str_dup(UChar* s, UChar* end)
56 {
57  ptrdiff_t len = end - s;
58 
59  if (len > 0) {
60  UChar* r = (UChar* )xmalloc(len + 1);
62  xmemcpy(r, s, len);
63  r[len] = (UChar )0;
64  return r;
65  }
66  else return NULL;
67 }
68 #endif
69 
70 static void
72 {
73  Node c;
74  c = *a; *a = *b; *b = c;
75 
76  if (NTYPE(a) == NT_STR) {
77  StrNode* sn = NSTR(a);
78  if (sn->capa == 0) {
79  size_t len = sn->end - sn->s;
80  sn->s = sn->buf;
81  sn->end = sn->s + len;
82  }
83  }
84 
85  if (NTYPE(b) == NT_STR) {
86  StrNode* sn = NSTR(b);
87  if (sn->capa == 0) {
88  size_t len = sn->end - sn->s;
89  sn->s = sn->buf;
90  sn->end = sn->s + len;
91  }
92  }
93 }
94 
95 static OnigDistance
97 {
100  else {
101  if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102  else return ONIG_INFINITE_DISTANCE;
103  }
104 }
105 
106 static OnigDistance
108 {
109  if (m == 0) return 0;
110 
111  if (d < ONIG_INFINITE_DISTANCE / m)
112  return d * m;
113  else
114  return ONIG_INFINITE_DISTANCE;
115 }
116 
117 static int
119 {
120  int i;
121  for (i = 0; i < BITSET_SIZE; i++) {
122  if (bs[i] != 0) return 0;
123  }
124  return 1;
125 }
126 
127 #ifdef ONIG_DEBUG
128 static int
129 onig_is_prelude(void)
130 {
131  return !rb_const_defined(rb_cThread, rb_intern_const("MUTEX_FOR_THREAD_EXCLUSIVE"));
132 }
133 
134 static int
135 bitset_on_num(BitSetRef bs)
136 {
137  int i, n;
138 
139  n = 0;
140  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
141  if (BITSET_AT(bs, i)) n++;
142  }
143  return n;
144 }
145 #endif
146 
147 extern int
149 {
150  if (size <= 0) {
151  size = 0;
152  buf->p = NULL;
153  }
154  else {
155  buf->p = (UChar* )xmalloc(size);
156  if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
157  }
158 
159  buf->alloc = (unsigned int )size;
160  buf->used = 0;
161  return 0;
162 }
163 
164 
165 #ifdef USE_SUBEXP_CALL
166 
167 static int
169 {
170  UnsetAddr* p;
171 
172  p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
174  uslist->num = 0;
175  uslist->alloc = size;
176  uslist->us = p;
177  return 0;
178 }
179 
180 static void
182 {
183  if (IS_NOT_NULL(uslist->us))
184  xfree(uslist->us);
185 }
186 
187 static int
188 unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
189 {
190  UnsetAddr* p;
191  int size;
192 
193  if (uslist->num >= uslist->alloc) {
194  size = uslist->alloc * 2;
195  p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
197  uslist->alloc = size;
198  uslist->us = p;
199  }
200 
201  uslist->us[uslist->num].offset = offset;
202  uslist->us[uslist->num].target = node;
203  uslist->num++;
204  return 0;
205 }
206 #endif /* USE_SUBEXP_CALL */
207 
208 
209 static int
210 add_opcode(regex_t* reg, int opcode)
211 {
212  BBUF_ADD1(reg, opcode);
213  return 0;
214 }
215 
216 #ifdef USE_COMBINATION_EXPLOSION_CHECK
217 static int
218 add_state_check_num(regex_t* reg, int num)
219 {
221 
222  BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
223  return 0;
224 }
225 #endif
226 
227 static int
228 add_rel_addr(regex_t* reg, int addr)
229 {
230  RelAddrType ra = (RelAddrType )addr;
231 
232  BBUF_ADD(reg, &ra, SIZE_RELADDR);
233  return 0;
234 }
235 
236 static int
237 add_abs_addr(regex_t* reg, int addr)
238 {
239  AbsAddrType ra = (AbsAddrType )addr;
240 
241  BBUF_ADD(reg, &ra, SIZE_ABSADDR);
242  return 0;
243 }
244 
245 static int
247 {
248  LengthType l = (LengthType )len;
249 
250  BBUF_ADD(reg, &l, SIZE_LENGTH);
251  return 0;
252 }
253 
254 static int
255 add_mem_num(regex_t* reg, int num)
256 {
257  MemNumType n = (MemNumType )num;
258 
259  BBUF_ADD(reg, &n, SIZE_MEMNUM);
260  return 0;
261 }
262 
263 static int
264 add_pointer(regex_t* reg, void* addr)
265 {
266  PointerType ptr = (PointerType )addr;
267 
268  BBUF_ADD(reg, &ptr, SIZE_POINTER);
269  return 0;
270 }
271 
272 static int
274 {
275  BBUF_ADD(reg, &option, SIZE_OPTION);
276  return 0;
277 }
278 
279 static int
280 add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
281 {
282  int r;
283 
284  r = add_opcode(reg, opcode);
285  if (r) return r;
286  r = add_rel_addr(reg, addr);
287  return r;
288 }
289 
290 static int
292 {
293  BBUF_ADD(reg, bytes, len);
294  return 0;
295 }
296 
297 static int
299 {
300  BBUF_ADD(reg, bs, SIZE_BITSET);
301  return 0;
302 }
303 
304 static int
305 add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
306 {
307  int r;
308 
309  r = add_opcode(reg, opcode);
310  if (r) return r;
311  r = add_option(reg, option);
312  return r;
313 }
314 
315 static int compile_length_tree(Node* node, regex_t* reg);
316 static int compile_tree(Node* node, regex_t* reg);
317 
318 
319 #define IS_NEED_STR_LEN_OP_EXACT(op) \
320  ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
321  (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
322 
323 static int
324 select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
325 {
326  int op;
327 
328  if (ignore_case) {
329  switch (str_len) {
330  case 1: op = OP_EXACT1_IC; break;
331  default: op = OP_EXACTN_IC; break;
332  }
333  }
334  else {
335  switch (mb_len) {
336  case 1:
337  switch (str_len) {
338  case 1: op = OP_EXACT1; break;
339  case 2: op = OP_EXACT2; break;
340  case 3: op = OP_EXACT3; break;
341  case 4: op = OP_EXACT4; break;
342  case 5: op = OP_EXACT5; break;
343  default: op = OP_EXACTN; break;
344  }
345  break;
346 
347  case 2:
348  switch (str_len) {
349  case 1: op = OP_EXACTMB2N1; break;
350  case 2: op = OP_EXACTMB2N2; break;
351  case 3: op = OP_EXACTMB2N3; break;
352  default: op = OP_EXACTMB2N; break;
353  }
354  break;
355 
356  case 3:
357  op = OP_EXACTMB3N;
358  break;
359 
360  default:
361  op = OP_EXACTMBN;
362  break;
363  }
364  }
365  return op;
366 }
367 
368 static int
369 compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
370 {
371  int r;
372  int saved_num_null_check = reg->num_null_check;
373 
374  if (empty_info != 0) {
376  if (r) return r;
377  r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
378  if (r) return r;
379  reg->num_null_check++;
380  }
381 
382  r = compile_tree(node, reg);
383  if (r) return r;
384 
385  if (empty_info != 0) {
386  if (empty_info == NQ_TARGET_IS_EMPTY)
387  r = add_opcode(reg, OP_NULL_CHECK_END);
388  else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
390  else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
392 
393  if (r) return r;
394  r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
395  }
396  return r;
397 }
398 
399 #ifdef USE_SUBEXP_CALL
400 static int
402 {
403  int r;
404 
405  r = add_opcode(reg, OP_CALL);
406  if (r) return r;
408  node->target);
409  if (r) return r;
410  r = add_abs_addr(reg, 0 /*dummy addr.*/);
411  return r;
412 }
413 #endif
414 
415 static int
416 compile_tree_n_times(Node* node, int n, regex_t* reg)
417 {
418  int i, r;
419 
420  for (i = 0; i < n; i++) {
421  r = compile_tree(node, reg);
422  if (r) return r;
423  }
424  return 0;
425 }
426 
427 static int
429  regex_t* reg ARG_UNUSED, int ignore_case)
430 {
431  int len;
432  int op = select_str_opcode(mb_len, str_len, ignore_case);
433 
434  len = SIZE_OPCODE;
435 
436  if (op == OP_EXACTMBN) len += SIZE_LENGTH;
437  if (IS_NEED_STR_LEN_OP_EXACT(op))
438  len += SIZE_LENGTH;
439 
440  len += mb_len * (int )str_len;
441  return len;
442 }
443 
444 static int
445 add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
446  regex_t* reg, int ignore_case)
447 {
448  int op = select_str_opcode(mb_len, str_len, ignore_case);
449  add_opcode(reg, op);
450 
451  if (op == OP_EXACTMBN)
452  add_length(reg, mb_len);
453 
454  if (IS_NEED_STR_LEN_OP_EXACT(op)) {
455  if (op == OP_EXACTN_IC)
456  add_length(reg, mb_len * str_len);
457  else
458  add_length(reg, str_len);
459  }
460 
461  add_bytes(reg, s, mb_len * str_len);
462  return 0;
463 }
464 
465 
466 static int
468 {
469  int rlen, r, len, prev_len, slen, ambig;
470  OnigEncoding enc = reg->enc;
471  UChar *p, *prev;
472  StrNode* sn;
473 
474  sn = NSTR(node);
475  if (sn->end <= sn->s)
476  return 0;
477 
478  ambig = NSTRING_IS_AMBIG(node);
479 
480  p = prev = sn->s;
481  prev_len = enclen(enc, p, sn->end);
482  p += prev_len;
483  slen = 1;
484  rlen = 0;
485 
486  for (; p < sn->end; ) {
487  len = enclen(enc, p, sn->end);
488  if (len == prev_len) {
489  slen++;
490  }
491  else {
492  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
493  rlen += r;
494  prev = p;
495  slen = 1;
496  prev_len = len;
497  }
498  p += len;
499  }
500  r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
501  rlen += r;
502  return rlen;
503 }
504 
505 static int
507 {
508  if (sn->end <= sn->s)
509  return 0;
510 
511  return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
512 }
513 
514 static int
516 {
517  int r, len, prev_len, slen, ambig;
518  OnigEncoding enc = reg->enc;
519  UChar *p, *prev, *end;
520  StrNode* sn;
521 
522  sn = NSTR(node);
523  if (sn->end <= sn->s)
524  return 0;
525 
526  end = sn->end;
527  ambig = NSTRING_IS_AMBIG(node);
528 
529  p = prev = sn->s;
530  prev_len = enclen(enc, p, end);
531  p += prev_len;
532  slen = 1;
533 
534  for (; p < end; ) {
535  len = enclen(enc, p, end);
536  if (len == prev_len) {
537  slen++;
538  }
539  else {
540  r = add_compile_string(prev, prev_len, slen, reg, ambig);
541  if (r) return r;
542 
543  prev = p;
544  slen = 1;
545  prev_len = len;
546  }
547 
548  p += len;
549  }
550  return add_compile_string(prev, prev_len, slen, reg, ambig);
551 }
552 
553 static int
555 {
556  if (sn->end <= sn->s)
557  return 0;
558 
559  return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
560 }
561 
562 static int
564 {
565 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
566  add_length(reg, mbuf->used);
567  return add_bytes(reg, mbuf->p, mbuf->used);
568 #else
569  int r, pad_size;
571 
572  GET_ALIGNMENT_PAD_SIZE(p, pad_size);
573  add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
574  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
575 
576  r = add_bytes(reg, mbuf->p, mbuf->used);
577 
578  /* padding for return value from compile_length_cclass_node() to be fix. */
579  pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
580  if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
581  return r;
582 #endif
583 }
584 
585 static int
587 {
588  int len;
589 
590  if (IS_NCCLASS_SHARE(cc)) {
591  len = SIZE_OPCODE + SIZE_POINTER;
592  return len;
593  }
594 
595  if (IS_NULL(cc->mbuf)) {
596  len = SIZE_OPCODE + SIZE_BITSET;
597  }
598  else {
599  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
600  len = SIZE_OPCODE;
601  }
602  else {
603  len = SIZE_OPCODE + SIZE_BITSET;
604  }
605 #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
606  len += SIZE_LENGTH + cc->mbuf->used;
607 #else
608  len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
609 #endif
610  }
611 
612  return len;
613 }
614 
615 static int
617 {
618  int r;
619 
620  if (IS_NCCLASS_SHARE(cc)) {
622  r = add_pointer(reg, cc);
623  return r;
624  }
625 
626  if (IS_NULL(cc->mbuf)) {
627  if (IS_NCCLASS_NOT(cc))
629  else
630  add_opcode(reg, OP_CCLASS);
631 
632  r = add_bitset(reg, cc->bs);
633  }
634  else {
635  if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
636  if (IS_NCCLASS_NOT(cc))
638  else
639  add_opcode(reg, OP_CCLASS_MB);
640 
641  r = add_multi_byte_cclass(cc->mbuf, reg);
642  }
643  else {
644  if (IS_NCCLASS_NOT(cc))
646  else
648 
649  r = add_bitset(reg, cc->bs);
650  if (r) return r;
651  r = add_multi_byte_cclass(cc->mbuf, reg);
652  }
653  }
654 
655  return r;
656 }
657 
658 static int
659 entry_repeat_range(regex_t* reg, int id, int lower, int upper)
660 {
661 #define REPEAT_RANGE_ALLOC 4
662 
664 
665  if (reg->repeat_range_alloc == 0) {
668  reg->repeat_range = p;
670  }
671  else if (reg->repeat_range_alloc <= id) {
672  int n;
675  sizeof(OnigRepeatRange) * n);
677  reg->repeat_range = p;
678  reg->repeat_range_alloc = n;
679  }
680  else {
681  p = reg->repeat_range;
682  }
683 
684  p[id].lower = lower;
685  p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
686  return 0;
687 }
688 
689 static int
690 compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
691  regex_t* reg)
692 {
693  int r;
694  int num_repeat = reg->num_repeat;
695 
696  r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
697  if (r) return r;
698  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
699  reg->num_repeat++;
700  if (r) return r;
701  r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
702  if (r) return r;
703 
704  r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
705  if (r) return r;
706 
707  r = compile_tree_empty_check(qn->target, reg, empty_info);
708  if (r) return r;
709 
710  if (
711 #ifdef USE_SUBEXP_CALL
712  reg->num_call > 0 ||
713 #endif
716  }
717  else {
719  }
720  if (r) return r;
721  r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
722  return r;
723 }
724 
725 static int
727 {
728  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
729  NTYPE(qn->target) == NT_CANY)
730  return 1;
731  else
732  return 0;
733 }
734 
735 #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
736 #define CKN_ON (ckn > 0)
737 
738 #ifdef USE_COMBINATION_EXPLOSION_CHECK
739 
740 static int
742 {
743  int len, mod_tlen, cklen;
744  int ckn;
745  int infinite = IS_REPEAT_INFINITE(qn->upper);
746  int empty_info = qn->target_empty_info;
747  int tlen = compile_length_tree(qn->target, reg);
748 
749  if (tlen < 0) return tlen;
750 
751  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
752 
753  cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
754 
755  /* anychar repeat */
756  if (NTYPE(qn->target) == NT_CANY) {
757  if (qn->greedy && infinite) {
758  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
759  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
760  else
761  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
762  }
763  }
764 
765  if (empty_info != 0)
766  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
767  else
768  mod_tlen = tlen;
769 
770  if (infinite && qn->lower <= 1) {
771  if (qn->greedy) {
772  if (qn->lower == 1)
773  len = SIZE_OP_JUMP;
774  else
775  len = 0;
776 
777  len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
778  }
779  else {
780  if (qn->lower == 0)
781  len = SIZE_OP_JUMP;
782  else
783  len = 0;
784 
785  len += mod_tlen + SIZE_OP_PUSH + cklen;
786  }
787  }
788  else if (qn->upper == 0) {
789  if (qn->is_refered != 0) /* /(?<n>..){0}/ */
790  len = SIZE_OP_JUMP + tlen;
791  else
792  len = 0;
793  }
794  else if (qn->upper == 1 && qn->greedy) {
795  if (qn->lower == 0) {
796  if (CKN_ON) {
797  len = SIZE_OP_STATE_CHECK_PUSH + tlen;
798  }
799  else {
800  len = SIZE_OP_PUSH + tlen;
801  }
802  }
803  else {
804  len = tlen;
805  }
806  }
807  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
808  len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
809  }
810  else {
811  len = SIZE_OP_REPEAT_INC
812  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
813  if (CKN_ON)
814  len += SIZE_OP_STATE_CHECK;
815  }
816 
817  return len;
818 }
819 
820 static int
822 {
823  int r, mod_tlen;
824  int ckn;
825  int infinite = IS_REPEAT_INFINITE(qn->upper);
826  int empty_info = qn->target_empty_info;
827  int tlen = compile_length_tree(qn->target, reg);
828 
829  if (tlen < 0) return tlen;
830 
831  ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
832 
833  if (is_anychar_star_quantifier(qn)) {
834  r = compile_tree_n_times(qn->target, qn->lower, reg);
835  if (r) return r;
836  if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
837  if (IS_MULTILINE(reg->options))
839  else
841  if (r) return r;
842  if (CKN_ON) {
843  r = add_state_check_num(reg, ckn);
844  if (r) return r;
845  }
846 
847  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
848  }
849  else {
850  if (IS_MULTILINE(reg->options)) {
851  r = add_opcode(reg, (CKN_ON ?
853  : OP_ANYCHAR_ML_STAR));
854  }
855  else {
856  r = add_opcode(reg, (CKN_ON ?
858  : OP_ANYCHAR_STAR));
859  }
860  if (r) return r;
861  if (CKN_ON)
862  r = add_state_check_num(reg, ckn);
863 
864  return r;
865  }
866  }
867 
868  if (empty_info != 0)
869  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
870  else
871  mod_tlen = tlen;
872 
873  if (infinite && qn->lower <= 1) {
874  if (qn->greedy) {
875  if (qn->lower == 1) {
876  r = add_opcode_rel_addr(reg, OP_JUMP,
877  (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
878  if (r) return r;
879  }
880 
881  if (CKN_ON) {
883  if (r) return r;
884  r = add_state_check_num(reg, ckn);
885  if (r) return r;
886  r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
887  }
888  else {
889  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
890  }
891  if (r) return r;
892  r = compile_tree_empty_check(qn->target, reg, empty_info);
893  if (r) return r;
894  r = add_opcode_rel_addr(reg, OP_JUMP,
895  -(mod_tlen + (int )SIZE_OP_JUMP
896  + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
897  }
898  else {
899  if (qn->lower == 0) {
900  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
901  if (r) return r;
902  }
903  r = compile_tree_empty_check(qn->target, reg, empty_info);
904  if (r) return r;
905  if (CKN_ON) {
907  if (r) return r;
908  r = add_state_check_num(reg, ckn);
909  if (r) return r;
910  r = add_rel_addr(reg,
911  -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
912  }
913  else
914  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
915  }
916  }
917  else if (qn->upper == 0) {
918  if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
919  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
920  if (r) return r;
921  r = compile_tree(qn->target, reg);
922  }
923  else
924  r = 0;
925  }
926  else if (qn->upper == 1 && qn->greedy) {
927  if (qn->lower == 0) {
928  if (CKN_ON) {
930  if (r) return r;
931  r = add_state_check_num(reg, ckn);
932  if (r) return r;
933  r = add_rel_addr(reg, tlen);
934  }
935  else {
936  r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
937  }
938  if (r) return r;
939  }
940 
941  r = compile_tree(qn->target, reg);
942  }
943  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
944  if (CKN_ON) {
946  if (r) return r;
947  r = add_state_check_num(reg, ckn);
948  if (r) return r;
949  r = add_rel_addr(reg, SIZE_OP_JUMP);
950  }
951  else {
953  }
954 
955  if (r) return r;
956  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
957  if (r) return r;
958  r = compile_tree(qn->target, reg);
959  }
960  else {
961  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
962  if (CKN_ON) {
963  if (r) return r;
964  r = add_opcode(reg, OP_STATE_CHECK);
965  if (r) return r;
966  r = add_state_check_num(reg, ckn);
967  }
968  }
969  return r;
970 }
971 
972 #else /* USE_COMBINATION_EXPLOSION_CHECK */
973 
974 static int
976 {
977  int len, mod_tlen;
978  int infinite = IS_REPEAT_INFINITE(qn->upper);
979  int empty_info = qn->target_empty_info;
980  int tlen = compile_length_tree(qn->target, reg);
981 
982  if (tlen < 0) return tlen;
983 
984  /* anychar repeat */
985  if (NTYPE(qn->target) == NT_CANY) {
986  if (qn->greedy && infinite) {
987  if (IS_NOT_NULL(qn->next_head_exact))
988  return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
989  else
990  return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
991  }
992  }
993 
994  if (empty_info != 0)
995  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
996  else
997  mod_tlen = tlen;
998 
999  if (infinite &&
1000  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1001  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1002  len = SIZE_OP_JUMP;
1003  }
1004  else {
1005  len = tlen * qn->lower;
1006  }
1007 
1008  if (qn->greedy) {
1009  if (IS_NOT_NULL(qn->head_exact))
1010  len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1011  else if (IS_NOT_NULL(qn->next_head_exact))
1012  len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1013  else
1014  len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1015  }
1016  else
1017  len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1018  }
1019  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1020  len = SIZE_OP_JUMP + tlen;
1021  }
1022  else if (!infinite && qn->greedy &&
1023  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1025  len = tlen * qn->lower;
1026  len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1027  }
1028  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1029  len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1030  }
1031  else {
1032  len = SIZE_OP_REPEAT_INC
1033  + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1034  }
1035 
1036  return len;
1037 }
1038 
1039 static int
1041 {
1042  int i, r, mod_tlen;
1043  int infinite = IS_REPEAT_INFINITE(qn->upper);
1044  int empty_info = qn->target_empty_info;
1045  int tlen = compile_length_tree(qn->target, reg);
1046 
1047  if (tlen < 0) return tlen;
1048 
1049  if (is_anychar_star_quantifier(qn)) {
1050  r = compile_tree_n_times(qn->target, qn->lower, reg);
1051  if (r) return r;
1052  if (IS_NOT_NULL(qn->next_head_exact)) {
1053  if (IS_MULTILINE(reg->options))
1055  else
1057  if (r) return r;
1058  return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1059  }
1060  else {
1061  if (IS_MULTILINE(reg->options))
1062  return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1063  else
1064  return add_opcode(reg, OP_ANYCHAR_STAR);
1065  }
1066  }
1067 
1068  if (empty_info != 0)
1069  mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1070  else
1071  mod_tlen = tlen;
1072 
1073  if (infinite &&
1074  (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1075  if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1076  if (qn->greedy) {
1077  if (IS_NOT_NULL(qn->head_exact))
1079  else if (IS_NOT_NULL(qn->next_head_exact))
1081  else
1083  }
1084  else {
1086  }
1087  if (r) return r;
1088  }
1089  else {
1090  r = compile_tree_n_times(qn->target, qn->lower, reg);
1091  if (r) return r;
1092  }
1093 
1094  if (qn->greedy) {
1095  if (IS_NOT_NULL(qn->head_exact)) {
1097  mod_tlen + SIZE_OP_JUMP);
1098  if (r) return r;
1099  add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1100  r = compile_tree_empty_check(qn->target, reg, empty_info);
1101  if (r) return r;
1102  r = add_opcode_rel_addr(reg, OP_JUMP,
1103  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1104  }
1105  else if (IS_NOT_NULL(qn->next_head_exact)) {
1107  mod_tlen + SIZE_OP_JUMP);
1108  if (r) return r;
1109  add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1110  r = compile_tree_empty_check(qn->target, reg, empty_info);
1111  if (r) return r;
1112  r = add_opcode_rel_addr(reg, OP_JUMP,
1113  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1114  }
1115  else {
1116  r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1117  if (r) return r;
1118  r = compile_tree_empty_check(qn->target, reg, empty_info);
1119  if (r) return r;
1120  r = add_opcode_rel_addr(reg, OP_JUMP,
1121  -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1122  }
1123  }
1124  else {
1125  r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1126  if (r) return r;
1127  r = compile_tree_empty_check(qn->target, reg, empty_info);
1128  if (r) return r;
1129  r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1130  }
1131  }
1132  else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1133  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1134  if (r) return r;
1135  r = compile_tree(qn->target, reg);
1136  }
1137  else if (!infinite && qn->greedy &&
1138  (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1140  int n = qn->upper - qn->lower;
1141 
1142  r = compile_tree_n_times(qn->target, qn->lower, reg);
1143  if (r) return r;
1144 
1145  for (i = 0; i < n; i++) {
1146  r = add_opcode_rel_addr(reg, OP_PUSH,
1147  (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1148  if (r) return r;
1149  r = compile_tree(qn->target, reg);
1150  if (r) return r;
1151  }
1152  }
1153  else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1155  if (r) return r;
1156  r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1157  if (r) return r;
1158  r = compile_tree(qn->target, reg);
1159  }
1160  else {
1161  r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1162  }
1163  return r;
1164 }
1165 #endif /* USE_COMBINATION_EXPLOSION_CHECK */
1166 
1167 static int
1169 {
1170  int tlen;
1171  OnigOptionType prev = reg->options;
1172 
1173  reg->options = node->option;
1174  tlen = compile_length_tree(node->target, reg);
1175  reg->options = prev;
1176 
1177  if (tlen < 0) return tlen;
1178 
1179  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1181  + tlen + SIZE_OP_SET_OPTION;
1182  }
1183  else
1184  return tlen;
1185 }
1186 
1187 static int
1189 {
1190  int r;
1191  OnigOptionType prev = reg->options;
1192 
1193  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1194  r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1195  if (r) return r;
1196  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1197  if (r) return r;
1198  r = add_opcode(reg, OP_FAIL);
1199  if (r) return r;
1200  }
1201 
1202  reg->options = node->option;
1203  r = compile_tree(node->target, reg);
1204  reg->options = prev;
1205 
1206  if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1207  if (r) return r;
1208  r = add_opcode_option(reg, OP_SET_OPTION, prev);
1209  }
1210  return r;
1211 }
1212 
1213 static int
1215 {
1216  int len;
1217  int tlen;
1218 
1219  if (node->type == ENCLOSE_OPTION)
1220  return compile_length_option_node(node, reg);
1221 
1222  if (node->target) {
1223  tlen = compile_length_tree(node->target, reg);
1224  if (tlen < 0) return tlen;
1225  }
1226  else
1227  tlen = 0;
1228 
1229  switch (node->type) {
1230  case ENCLOSE_MEMORY:
1231 #ifdef USE_SUBEXP_CALL
1232  if (IS_ENCLOSE_CALLED(node)) {
1233  len = SIZE_OP_MEMORY_START_PUSH + tlen
1235  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1236  len += (IS_ENCLOSE_RECURSION(node)
1238  else
1239  len += (IS_ENCLOSE_RECURSION(node)
1241  }
1242  else
1243 #endif
1244  {
1245  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1247  else
1248  len = SIZE_OP_MEMORY_START;
1249 
1250  len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1252  }
1253  break;
1254 
1257  QtfrNode* qn = NQTFR(node->target);
1258  tlen = compile_length_tree(qn->target, reg);
1259  if (tlen < 0) return tlen;
1260 
1261  len = tlen * qn->lower
1262  + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1263  }
1264  else {
1266  }
1267  break;
1268 
1269  case ENCLOSE_CONDITION:
1270  len = SIZE_OP_CONDITION;
1271  if (NTYPE(node->target) == NT_ALT) {
1272  Node* x = node->target;
1273 
1274  tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275  if (tlen < 0) return tlen;
1276  len += tlen + SIZE_OP_JUMP;
1277  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278  x = NCDR(x);
1279  tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280  if (tlen < 0) return tlen;
1281  len += tlen;
1282  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1283  }
1284  else {
1285  return ONIGERR_PARSER_BUG;
1286  }
1287  break;
1288 
1289  default:
1290  return ONIGERR_TYPE_BUG;
1291  break;
1292  }
1293 
1294  return len;
1295 }
1296 
1297 static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1298 
1299 static int
1301 {
1302  int r, len;
1303 
1304  if (node->type == ENCLOSE_OPTION)
1305  return compile_option_node(node, reg);
1306 
1307  switch (node->type) {
1308  case ENCLOSE_MEMORY:
1309 #ifdef USE_SUBEXP_CALL
1310  if (IS_ENCLOSE_CALLED(node)) {
1311  r = add_opcode(reg, OP_CALL);
1312  if (r) return r;
1314  node->state |= NST_ADDR_FIXED;
1315  r = add_abs_addr(reg, (int )node->call_addr);
1316  if (r) return r;
1317  len = compile_length_tree(node->target, reg);
1319  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1320  len += (IS_ENCLOSE_RECURSION(node)
1322  else
1323  len += (IS_ENCLOSE_RECURSION(node)
1325 
1326  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1327  if (r) return r;
1328  }
1329 #endif
1330  if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1331  r = add_opcode(reg, OP_MEMORY_START_PUSH);
1332  else
1333  r = add_opcode(reg, OP_MEMORY_START);
1334  if (r) return r;
1335  r = add_mem_num(reg, node->regnum);
1336  if (r) return r;
1337  r = compile_tree(node->target, reg);
1338  if (r) return r;
1339 #ifdef USE_SUBEXP_CALL
1340  if (IS_ENCLOSE_CALLED(node)) {
1341  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1342  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1344  else
1345  r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1347 
1348  if (r) return r;
1349  r = add_mem_num(reg, node->regnum);
1350  if (r) return r;
1351  r = add_opcode(reg, OP_RETURN);
1352  }
1353  else
1354 #endif
1355  {
1356  if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1357  r = add_opcode(reg, OP_MEMORY_END_PUSH);
1358  else
1359  r = add_opcode(reg, OP_MEMORY_END);
1360  if (r) return r;
1361  r = add_mem_num(reg, node->regnum);
1362  }
1363  break;
1364 
1367  QtfrNode* qn = NQTFR(node->target);
1368  r = compile_tree_n_times(qn->target, qn->lower, reg);
1369  if (r) return r;
1370 
1371  len = compile_length_tree(qn->target, reg);
1372  if (len < 0) return len;
1373 
1375  if (r) return r;
1376  r = compile_tree(qn->target, reg);
1377  if (r) return r;
1378  r = add_opcode(reg, OP_POP);
1379  if (r) return r;
1380  r = add_opcode_rel_addr(reg, OP_JUMP,
1381  -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1382  }
1383  else {
1384  r = add_opcode(reg, OP_PUSH_STOP_BT);
1385  if (r) return r;
1386  r = compile_tree(node->target, reg);
1387  if (r) return r;
1388  r = add_opcode(reg, OP_POP_STOP_BT);
1389  }
1390  break;
1391 
1392  case ENCLOSE_CONDITION:
1393  r = add_opcode(reg, OP_CONDITION);
1394  if (r) return r;
1395  r = add_mem_num(reg, node->regnum);
1396  if (r) return r;
1397 
1398  if (NTYPE(node->target) == NT_ALT) {
1399  Node* x = node->target;
1400  int len2;
1401 
1402  len = compile_length_tree(NCAR(x), reg); /* yes-node */
1403  if (len < 0) return len;
1404  if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1405  x = NCDR(x);
1406  len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1407  if (len2 < 0) return len2;
1408  if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1409 
1410  x = node->target;
1411  r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1412  if (r) return r;
1413  r = compile_tree(NCAR(x), reg); /* yes-node */
1414  if (r) return r;
1415  r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1416  if (r) return r;
1417  x = NCDR(x);
1418  r = compile_tree(NCAR(x), reg); /* no-node */
1419  }
1420  else {
1421  return ONIGERR_PARSER_BUG;
1422  }
1423  break;
1424 
1425  default:
1426  return ONIGERR_TYPE_BUG;
1427  break;
1428  }
1429 
1430  return r;
1431 }
1432 
1433 static int
1435 {
1436  int len;
1437  int tlen = 0;
1438 
1439  if (node->target) {
1440  tlen = compile_length_tree(node->target, reg);
1441  if (tlen < 0) return tlen;
1442  }
1443 
1444  switch (node->type) {
1445  case ANCHOR_PREC_READ:
1446  len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1447  break;
1448  case ANCHOR_PREC_READ_NOT:
1449  len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1450  break;
1451  case ANCHOR_LOOK_BEHIND:
1452  len = SIZE_OP_LOOK_BEHIND + tlen;
1453  break;
1456  break;
1457 
1458  default:
1459  len = SIZE_OPCODE;
1460  break;
1461  }
1462 
1463  return len;
1464 }
1465 
1466 static int
1468 {
1469  int r, len;
1470 
1471  switch (node->type) {
1472  case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1473  case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1474  case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1475  case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1476  case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1477  case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1478 
1479  /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
1480  case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
1481 
1482  case ANCHOR_WORD_BOUND:
1483  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1484  else r = add_opcode(reg, OP_WORD_BOUND);
1485  break;
1486  case ANCHOR_NOT_WORD_BOUND:
1487  if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1488  else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1489  break;
1490 #ifdef USE_WORD_BEGIN_END
1491  case ANCHOR_WORD_BEGIN:
1492  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1493  else r = add_opcode(reg, OP_WORD_BEGIN);
1494  break;
1495  case ANCHOR_WORD_END:
1496  if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1497  else r = add_opcode(reg, OP_WORD_END);
1498  break;
1499 #endif
1500  case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1501 
1502  case ANCHOR_PREC_READ:
1503  r = add_opcode(reg, OP_PUSH_POS);
1504  if (r) return r;
1505  r = compile_tree(node->target, reg);
1506  if (r) return r;
1507  r = add_opcode(reg, OP_POP_POS);
1508  break;
1509 
1510  case ANCHOR_PREC_READ_NOT:
1511  len = compile_length_tree(node->target, reg);
1512  if (len < 0) return len;
1514  if (r) return r;
1515  r = compile_tree(node->target, reg);
1516  if (r) return r;
1517  r = add_opcode(reg, OP_FAIL_POS);
1518  break;
1519 
1520  case ANCHOR_LOOK_BEHIND:
1521  {
1522  int n;
1523  r = add_opcode(reg, OP_LOOK_BEHIND);
1524  if (r) return r;
1525  if (node->char_len < 0) {
1526  r = get_char_length_tree(node->target, reg, &n);
1528  }
1529  else
1530  n = node->char_len;
1531  r = add_length(reg, n);
1532  if (r) return r;
1533  r = compile_tree(node->target, reg);
1534  }
1535  break;
1536 
1538  {
1539  int n;
1540  len = compile_length_tree(node->target, reg);
1543  if (r) return r;
1544  if (node->char_len < 0) {
1545  r = get_char_length_tree(node->target, reg, &n);
1547  }
1548  else
1549  n = node->char_len;
1550  r = add_length(reg, n);
1551  if (r) return r;
1552  r = compile_tree(node->target, reg);
1553  if (r) return r;
1555  }
1556  break;
1557 
1558  default:
1559  return ONIGERR_TYPE_BUG;
1560  break;
1561  }
1562 
1563  return r;
1564 }
1565 
1566 static int
1568 {
1569  int len, type, r;
1570 
1571  type = NTYPE(node);
1572  switch (type) {
1573  case NT_LIST:
1574  len = 0;
1575  do {
1576  r = compile_length_tree(NCAR(node), reg);
1577  if (r < 0) return r;
1578  len += r;
1579  } while (IS_NOT_NULL(node = NCDR(node)));
1580  r = len;
1581  break;
1582 
1583  case NT_ALT:
1584  {
1585  int n;
1586 
1587  n = r = 0;
1588  do {
1589  r += compile_length_tree(NCAR(node), reg);
1590  n++;
1591  } while (IS_NOT_NULL(node = NCDR(node)));
1592  r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1593  }
1594  break;
1595 
1596  case NT_STR:
1597  if (NSTRING_IS_RAW(node))
1598  r = compile_length_string_raw_node(NSTR(node), reg);
1599  else
1600  r = compile_length_string_node(node, reg);
1601  break;
1602 
1603  case NT_CCLASS:
1604  r = compile_length_cclass_node(NCCLASS(node), reg);
1605  break;
1606 
1607  case NT_CTYPE:
1608  case NT_CANY:
1609  r = SIZE_OPCODE;
1610  break;
1611 
1612  case NT_BREF:
1613  {
1614  BRefNode* br = NBREF(node);
1615 
1616 #ifdef USE_BACKREF_WITH_LEVEL
1617  if (IS_BACKREF_NEST_LEVEL(br)) {
1619  SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1620  }
1621  else
1622 #endif
1623  if (br->back_num == 1) {
1624  r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1626  }
1627  else {
1628  r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1629  }
1630  }
1631  break;
1632 
1633 #ifdef USE_SUBEXP_CALL
1634  case NT_CALL:
1635  r = SIZE_OP_CALL;
1636  break;
1637 #endif
1638 
1639  case NT_QTFR:
1640  r = compile_length_quantifier_node(NQTFR(node), reg);
1641  break;
1642 
1643  case NT_ENCLOSE:
1644  r = compile_length_enclose_node(NENCLOSE(node), reg);
1645  break;
1646 
1647  case NT_ANCHOR:
1648  r = compile_length_anchor_node(NANCHOR(node), reg);
1649  break;
1650 
1651  default:
1652  return ONIGERR_TYPE_BUG;
1653  break;
1654  }
1655 
1656  return r;
1657 }
1658 
1659 static int
1661 {
1662  int n, type, len, pos, r = 0;
1663 
1664  type = NTYPE(node);
1665  switch (type) {
1666  case NT_LIST:
1667  do {
1668  r = compile_tree(NCAR(node), reg);
1669  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1670  break;
1671 
1672  case NT_ALT:
1673  {
1674  Node* x = node;
1675  len = 0;
1676  do {
1677  len += compile_length_tree(NCAR(x), reg);
1678  if (NCDR(x) != NULL) {
1679  len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1680  }
1681  } while (IS_NOT_NULL(x = NCDR(x)));
1682  pos = reg->used + len; /* goal position */
1683 
1684  do {
1685  len = compile_length_tree(NCAR(node), reg);
1686  if (IS_NOT_NULL(NCDR(node))) {
1687  r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1688  if (r) break;
1689  }
1690  r = compile_tree(NCAR(node), reg);
1691  if (r) break;
1692  if (IS_NOT_NULL(NCDR(node))) {
1693  len = pos - (reg->used + SIZE_OP_JUMP);
1694  r = add_opcode_rel_addr(reg, OP_JUMP, len);
1695  if (r) break;
1696  }
1697  } while (IS_NOT_NULL(node = NCDR(node)));
1698  }
1699  break;
1700 
1701  case NT_STR:
1702  if (NSTRING_IS_RAW(node))
1703  r = compile_string_raw_node(NSTR(node), reg);
1704  else
1705  r = compile_string_node(node, reg);
1706  break;
1707 
1708  case NT_CCLASS:
1709  r = compile_cclass_node(NCCLASS(node), reg);
1710  break;
1711 
1712  case NT_CTYPE:
1713  {
1714  int op;
1715 
1716  switch (NCTYPE(node)->ctype) {
1717  case ONIGENC_CTYPE_WORD:
1718  if (NCTYPE(node)->ascii_range != 0) {
1719  if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1720  else op = OP_ASCII_WORD;
1721  }
1722  else {
1723  if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1724  else op = OP_WORD;
1725  }
1726  break;
1727  default:
1728  return ONIGERR_TYPE_BUG;
1729  break;
1730  }
1731  r = add_opcode(reg, op);
1732  }
1733  break;
1734 
1735  case NT_CANY:
1736  if (IS_MULTILINE(reg->options))
1737  r = add_opcode(reg, OP_ANYCHAR_ML);
1738  else
1739  r = add_opcode(reg, OP_ANYCHAR);
1740  break;
1741 
1742  case NT_BREF:
1743  {
1744  BRefNode* br = NBREF(node);
1745 
1746 #ifdef USE_BACKREF_WITH_LEVEL
1747  if (IS_BACKREF_NEST_LEVEL(br)) {
1749  if (r) return r;
1750  r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1751  if (r) return r;
1752  r = add_length(reg, br->nest_level);
1753  if (r) return r;
1754 
1755  goto add_bacref_mems;
1756  }
1757  else
1758 #endif
1759  if (br->back_num == 1) {
1760  n = br->back_static[0];
1761  if (IS_IGNORECASE(reg->options)) {
1762  r = add_opcode(reg, OP_BACKREFN_IC);
1763  if (r) return r;
1764  r = add_mem_num(reg, n);
1765  }
1766  else {
1767  switch (n) {
1768  case 1: r = add_opcode(reg, OP_BACKREF1); break;
1769  case 2: r = add_opcode(reg, OP_BACKREF2); break;
1770  default:
1771  r = add_opcode(reg, OP_BACKREFN);
1772  if (r) return r;
1773  r = add_mem_num(reg, n);
1774  break;
1775  }
1776  }
1777  }
1778  else {
1779  int i;
1780  int* p;
1781 
1782  if (IS_IGNORECASE(reg->options)) {
1783  r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1784  }
1785  else {
1786  r = add_opcode(reg, OP_BACKREF_MULTI);
1787  }
1788  if (r) return r;
1789 
1790 #ifdef USE_BACKREF_WITH_LEVEL
1791  add_bacref_mems:
1792 #endif
1793  r = add_length(reg, br->back_num);
1794  if (r) return r;
1795  p = BACKREFS_P(br);
1796  for (i = br->back_num - 1; i >= 0; i--) {
1797  r = add_mem_num(reg, p[i]);
1798  if (r) return r;
1799  }
1800  }
1801  }
1802  break;
1803 
1804 #ifdef USE_SUBEXP_CALL
1805  case NT_CALL:
1806  r = compile_call(NCALL(node), reg);
1807  break;
1808 #endif
1809 
1810  case NT_QTFR:
1811  r = compile_quantifier_node(NQTFR(node), reg);
1812  break;
1813 
1814  case NT_ENCLOSE:
1815  r = compile_enclose_node(NENCLOSE(node), reg);
1816  break;
1817 
1818  case NT_ANCHOR:
1819  r = compile_anchor_node(NANCHOR(node), reg);
1820  break;
1821 
1822  default:
1823 #ifdef ONIG_DEBUG
1824  fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1825 #endif
1826  break;
1827  }
1828 
1829  return r;
1830 }
1831 
1832 #ifdef USE_NAMED_GROUP
1833 
1834 static int
1835 noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1836 {
1837  int r = 0;
1838  Node* node = *plink;
1839 
1840  switch (NTYPE(node)) {
1841  case NT_LIST:
1842  case NT_ALT:
1843  do {
1844  r = noname_disable_map(&(NCAR(node)), map, counter);
1845  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1846  break;
1847 
1848  case NT_QTFR:
1849  {
1850  Node** ptarget = &(NQTFR(node)->target);
1851  Node* old = *ptarget;
1852  r = noname_disable_map(ptarget, map, counter);
1853  if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1854  onig_reduce_nested_quantifier(node, *ptarget);
1855  }
1856  }
1857  break;
1858 
1859  case NT_ENCLOSE:
1860  {
1861  EncloseNode* en = NENCLOSE(node);
1862  if (en->type == ENCLOSE_MEMORY) {
1863  if (IS_ENCLOSE_NAMED_GROUP(en)) {
1864  (*counter)++;
1865  map[en->regnum].new_val = *counter;
1866  en->regnum = *counter;
1867  r = noname_disable_map(&(en->target), map, counter);
1868  }
1869  else {
1870  *plink = en->target;
1871  en->target = NULL_NODE;
1872  onig_node_free(node);
1873  r = noname_disable_map(plink, map, counter);
1874  }
1875  }
1876  else
1877  r = noname_disable_map(&(en->target), map, counter);
1878  }
1879  break;
1880 
1881  case NT_ANCHOR:
1882  {
1883  AnchorNode* an = NANCHOR(node);
1884  switch (an->type) {
1885  case ANCHOR_PREC_READ:
1886  case ANCHOR_PREC_READ_NOT:
1887  case ANCHOR_LOOK_BEHIND:
1889  r = noname_disable_map(&(an->target), map, counter);
1890  break;
1891  }
1892  }
1893  break;
1894 
1895  default:
1896  break;
1897  }
1898 
1899  return r;
1900 }
1901 
1902 static int
1904 {
1905  int i, pos, n, old_num;
1906  int *backs;
1907  BRefNode* bn = NBREF(node);
1908 
1909  if (! IS_BACKREF_NAME_REF(bn))
1911 
1912  old_num = bn->back_num;
1913  if (IS_NULL(bn->back_dynamic))
1914  backs = bn->back_static;
1915  else
1916  backs = bn->back_dynamic;
1917 
1918  for (i = 0, pos = 0; i < old_num; i++) {
1919  n = map[backs[i]].new_val;
1920  if (n > 0) {
1921  backs[pos] = n;
1922  pos++;
1923  }
1924  }
1925 
1926  bn->back_num = pos;
1927  return 0;
1928 }
1929 
1930 static int
1932 {
1933  int r = 0;
1934 
1935  switch (NTYPE(node)) {
1936  case NT_LIST:
1937  case NT_ALT:
1938  do {
1939  r = renumber_by_map(NCAR(node), map);
1940  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1941  break;
1942  case NT_QTFR:
1943  r = renumber_by_map(NQTFR(node)->target, map);
1944  break;
1945  case NT_ENCLOSE:
1946  r = renumber_by_map(NENCLOSE(node)->target, map);
1947  break;
1948 
1949  case NT_BREF:
1950  r = renumber_node_backref(node, map);
1951  break;
1952 
1953  case NT_ANCHOR:
1954  {
1955  AnchorNode* an = NANCHOR(node);
1956  switch (an->type) {
1957  case ANCHOR_PREC_READ:
1958  case ANCHOR_PREC_READ_NOT:
1959  case ANCHOR_LOOK_BEHIND:
1961  r = renumber_by_map(an->target, map);
1962  break;
1963  }
1964  }
1965  break;
1966 
1967  default:
1968  break;
1969  }
1970 
1971  return r;
1972 }
1973 
1974 static int
1976 {
1977  int r = 0;
1978 
1979  switch (NTYPE(node)) {
1980  case NT_LIST:
1981  case NT_ALT:
1982  do {
1983  r = numbered_ref_check(NCAR(node));
1984  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1985  break;
1986  case NT_QTFR:
1987  r = numbered_ref_check(NQTFR(node)->target);
1988  break;
1989  case NT_ENCLOSE:
1990  r = numbered_ref_check(NENCLOSE(node)->target);
1991  break;
1992 
1993  case NT_BREF:
1994  if (! IS_BACKREF_NAME_REF(NBREF(node)))
1996  break;
1997 
1998  default:
1999  break;
2000  }
2001 
2002  return r;
2003 }
2004 
2005 static int
2007 {
2008  int r, i, pos, counter;
2009  BitStatusType loc;
2010  GroupNumRemap* map;
2011 
2012  map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
2014  for (i = 1; i <= env->num_mem; i++) {
2015  map[i].new_val = 0;
2016  }
2017  counter = 0;
2018  r = noname_disable_map(root, map, &counter);
2019  if (r != 0) return r;
2020 
2021  r = renumber_by_map(*root, map);
2022  if (r != 0) return r;
2023 
2024  for (i = 1, pos = 1; i <= env->num_mem; i++) {
2025  if (map[i].new_val > 0) {
2026  SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2027  pos++;
2028  }
2029  }
2030 
2031  loc = env->capture_history;
2033  for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2034  if (BIT_STATUS_AT(loc, i)) {
2036  }
2037  }
2038 
2039  env->num_mem = env->num_named;
2040  reg->num_mem = env->num_named;
2041 
2042  return onig_renumber_name_table(reg, map);
2043 }
2044 #endif /* USE_NAMED_GROUP */
2045 
2046 #ifdef USE_SUBEXP_CALL
2047 static int
2049 {
2050  int i, offset;
2051  EncloseNode* en;
2052  AbsAddrType addr;
2053 
2054  for (i = 0; i < uslist->num; i++) {
2055  en = NENCLOSE(uslist->us[i].target);
2056  if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2057  addr = en->call_addr;
2058  offset = uslist->us[i].offset;
2059 
2060  BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2061  }
2062  return 0;
2063 }
2064 #endif
2065 
2066 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2067 static int
2069 {
2070  int r = 0;
2071 
2072  switch (NTYPE(node)) {
2073  case NT_LIST:
2074  case NT_ALT:
2075  {
2076  int v;
2077  do {
2079  if (v > r) r = v;
2080  } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2081  }
2082  break;
2083 
2084 #ifdef USE_SUBEXP_CALL
2085  case NT_CALL:
2086  if (IS_CALL_RECURSION(NCALL(node))) {
2087  return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2088  }
2089  else
2091  break;
2092 #endif
2093 
2094  case NT_QTFR:
2095  {
2096  QtfrNode* qn = NQTFR(node);
2097  if (qn->upper != 0) {
2099  }
2100  }
2101  break;
2102 
2103  case NT_ENCLOSE:
2104  {
2105  EncloseNode* en = NENCLOSE(node);
2106  switch (en->type) {
2107  case ENCLOSE_MEMORY:
2108  return NQ_TARGET_IS_EMPTY_MEM;
2109  break;
2110 
2111  case ENCLOSE_OPTION:
2113  case ENCLOSE_CONDITION:
2115  break;
2116  default:
2117  break;
2118  }
2119  }
2120  break;
2121 
2122  case NT_BREF:
2123  case NT_STR:
2124  case NT_CTYPE:
2125  case NT_CCLASS:
2126  case NT_CANY:
2127  case NT_ANCHOR:
2128  default:
2129  break;
2130  }
2131 
2132  return r;
2133 }
2134 #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2135 
2136 static int
2138 {
2139  OnigDistance tmin;
2140  int r = 0;
2141 
2142  *min = 0;
2143  switch (NTYPE(node)) {
2144  case NT_BREF:
2145  {
2146  int i;
2147  int* backs;
2148  Node** nodes = SCANENV_MEM_NODES(env);
2149  BRefNode* br = NBREF(node);
2150  if (br->state & NST_RECURSION) break;
2151 
2152  backs = BACKREFS_P(br);
2153  if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2154  r = get_min_match_length(nodes[backs[0]], min, env);
2155  if (r != 0) break;
2156  for (i = 1; i < br->back_num; i++) {
2157  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2158  r = get_min_match_length(nodes[backs[i]], &tmin, env);
2159  if (r != 0) break;
2160  if (*min > tmin) *min = tmin;
2161  }
2162  }
2163  break;
2164 
2165 #ifdef USE_SUBEXP_CALL
2166  case NT_CALL:
2167  if (IS_CALL_RECURSION(NCALL(node))) {
2168  EncloseNode* en = NENCLOSE(NCALL(node)->target);
2169  if (IS_ENCLOSE_MIN_FIXED(en))
2170  *min = en->min_len;
2171  }
2172  else
2173  r = get_min_match_length(NCALL(node)->target, min, env);
2174  break;
2175 #endif
2176 
2177  case NT_LIST:
2178  do {
2179  r = get_min_match_length(NCAR(node), &tmin, env);
2180  if (r == 0) *min += tmin;
2181  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2182  break;
2183 
2184  case NT_ALT:
2185  {
2186  Node *x, *y;
2187  y = node;
2188  do {
2189  x = NCAR(y);
2190  r = get_min_match_length(x, &tmin, env);
2191  if (r != 0) break;
2192  if (y == node) *min = tmin;
2193  else if (*min > tmin) *min = tmin;
2194  } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2195  }
2196  break;
2197 
2198  case NT_STR:
2199  {
2200  StrNode* sn = NSTR(node);
2201  *min = sn->end - sn->s;
2202  }
2203  break;
2204 
2205  case NT_CTYPE:
2206  *min = 1;
2207  break;
2208 
2209  case NT_CCLASS:
2210  case NT_CANY:
2211  *min = 1;
2212  break;
2213 
2214  case NT_QTFR:
2215  {
2216  QtfrNode* qn = NQTFR(node);
2217 
2218  if (qn->lower > 0) {
2219  r = get_min_match_length(qn->target, min, env);
2220  if (r == 0)
2221  *min = distance_multiply(*min, qn->lower);
2222  }
2223  }
2224  break;
2225 
2226  case NT_ENCLOSE:
2227  {
2228  EncloseNode* en = NENCLOSE(node);
2229  switch (en->type) {
2230  case ENCLOSE_MEMORY:
2231 #ifdef USE_SUBEXP_CALL
2232  if (IS_ENCLOSE_MIN_FIXED(en))
2233  *min = en->min_len;
2234  else {
2235  r = get_min_match_length(en->target, min, env);
2236  if (r == 0) {
2237  en->min_len = *min;
2239  }
2240  }
2241  break;
2242 #endif
2243  case ENCLOSE_OPTION:
2245  case ENCLOSE_CONDITION:
2246  r = get_min_match_length(en->target, min, env);
2247  break;
2248  }
2249  }
2250  break;
2251 
2252  case NT_ANCHOR:
2253  default:
2254  break;
2255  }
2256 
2257  return r;
2258 }
2259 
2260 static int
2262 {
2263  OnigDistance tmax;
2264  int r = 0;
2265 
2266  *max = 0;
2267  switch (NTYPE(node)) {
2268  case NT_LIST:
2269  do {
2270  r = get_max_match_length(NCAR(node), &tmax, env);
2271  if (r == 0)
2272  *max = distance_add(*max, tmax);
2273  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2274  break;
2275 
2276  case NT_ALT:
2277  do {
2278  r = get_max_match_length(NCAR(node), &tmax, env);
2279  if (r == 0 && *max < tmax) *max = tmax;
2280  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2281  break;
2282 
2283  case NT_STR:
2284  {
2285  StrNode* sn = NSTR(node);
2286  *max = sn->end - sn->s;
2287  }
2288  break;
2289 
2290  case NT_CTYPE:
2291  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2292  break;
2293 
2294  case NT_CCLASS:
2295  case NT_CANY:
2296  *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2297  break;
2298 
2299  case NT_BREF:
2300  {
2301  int i;
2302  int* backs;
2303  Node** nodes = SCANENV_MEM_NODES(env);
2304  BRefNode* br = NBREF(node);
2305  if (br->state & NST_RECURSION) {
2306  *max = ONIG_INFINITE_DISTANCE;
2307  break;
2308  }
2309  backs = BACKREFS_P(br);
2310  for (i = 0; i < br->back_num; i++) {
2311  if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2312  r = get_max_match_length(nodes[backs[i]], &tmax, env);
2313  if (r != 0) break;
2314  if (*max < tmax) *max = tmax;
2315  }
2316  }
2317  break;
2318 
2319 #ifdef USE_SUBEXP_CALL
2320  case NT_CALL:
2321  if (! IS_CALL_RECURSION(NCALL(node)))
2322  r = get_max_match_length(NCALL(node)->target, max, env);
2323  else
2324  *max = ONIG_INFINITE_DISTANCE;
2325  break;
2326 #endif
2327 
2328  case NT_QTFR:
2329  {
2330  QtfrNode* qn = NQTFR(node);
2331 
2332  if (qn->upper != 0) {
2333  r = get_max_match_length(qn->target, max, env);
2334  if (r == 0 && *max != 0) {
2335  if (! IS_REPEAT_INFINITE(qn->upper))
2336  *max = distance_multiply(*max, qn->upper);
2337  else
2338  *max = ONIG_INFINITE_DISTANCE;
2339  }
2340  }
2341  }
2342  break;
2343 
2344  case NT_ENCLOSE:
2345  {
2346  EncloseNode* en = NENCLOSE(node);
2347  switch (en->type) {
2348  case ENCLOSE_MEMORY:
2349 #ifdef USE_SUBEXP_CALL
2350  if (IS_ENCLOSE_MAX_FIXED(en))
2351  *max = en->max_len;
2352  else {
2353  r = get_max_match_length(en->target, max, env);
2354  if (r == 0) {
2355  en->max_len = *max;
2357  }
2358  }
2359  break;
2360 #endif
2361  case ENCLOSE_OPTION:
2363  case ENCLOSE_CONDITION:
2364  r = get_max_match_length(en->target, max, env);
2365  break;
2366  }
2367  }
2368  break;
2369 
2370  case NT_ANCHOR:
2371  default:
2372  break;
2373  }
2374 
2375  return r;
2376 }
2377 
2378 #define GET_CHAR_LEN_VARLEN -1
2379 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2380 
2381 /* fixed size pattern node only */
2382 static int
2383 get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2384 {
2385  int tlen;
2386  int r = 0;
2387 
2388  level++;
2389  *len = 0;
2390  switch (NTYPE(node)) {
2391  case NT_LIST:
2392  do {
2393  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2394  if (r == 0)
2395  *len = (int )distance_add(*len, tlen);
2396  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2397  break;
2398 
2399  case NT_ALT:
2400  {
2401  int tlen2;
2402  int varlen = 0;
2403 
2404  r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2405  while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2406  r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2407  if (r == 0) {
2408  if (tlen != tlen2)
2409  varlen = 1;
2410  }
2411  }
2412  if (r == 0) {
2413  if (varlen != 0) {
2414  if (level == 1)
2416  else
2417  r = GET_CHAR_LEN_VARLEN;
2418  }
2419  else
2420  *len = tlen;
2421  }
2422  }
2423  break;
2424 
2425  case NT_STR:
2426  {
2427  StrNode* sn = NSTR(node);
2428  UChar *s = sn->s;
2429  while (s < sn->end) {
2430  s += enclen(reg->enc, s, sn->end);
2431  (*len)++;
2432  }
2433  }
2434  break;
2435 
2436  case NT_QTFR:
2437  {
2438  QtfrNode* qn = NQTFR(node);
2439  if (qn->lower == qn->upper) {
2440  r = get_char_length_tree1(qn->target, reg, &tlen, level);
2441  if (r == 0)
2442  *len = (int )distance_multiply(tlen, qn->lower);
2443  }
2444  else
2445  r = GET_CHAR_LEN_VARLEN;
2446  }
2447  break;
2448 
2449 #ifdef USE_SUBEXP_CALL
2450  case NT_CALL:
2451  if (! IS_CALL_RECURSION(NCALL(node)))
2452  r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2453  else
2454  r = GET_CHAR_LEN_VARLEN;
2455  break;
2456 #endif
2457 
2458  case NT_CTYPE:
2459  *len = 1;
2460  break;
2461 
2462  case NT_CCLASS:
2463  case NT_CANY:
2464  *len = 1;
2465  break;
2466 
2467  case NT_ENCLOSE:
2468  {
2469  EncloseNode* en = NENCLOSE(node);
2470  switch (en->type) {
2471  case ENCLOSE_MEMORY:
2472 #ifdef USE_SUBEXP_CALL
2473  if (IS_ENCLOSE_CLEN_FIXED(en))
2474  *len = en->char_len;
2475  else {
2476  r = get_char_length_tree1(en->target, reg, len, level);
2477  if (r == 0) {
2478  en->char_len = *len;
2480  }
2481  }
2482  break;
2483 #endif
2484  case ENCLOSE_OPTION:
2486  case ENCLOSE_CONDITION:
2487  r = get_char_length_tree1(en->target, reg, len, level);
2488  break;
2489  default:
2490  break;
2491  }
2492  }
2493  break;
2494 
2495  case NT_ANCHOR:
2496  break;
2497 
2498  default:
2499  r = GET_CHAR_LEN_VARLEN;
2500  break;
2501  }
2502 
2503  return r;
2504 }
2505 
2506 static int
2508 {
2509  return get_char_length_tree1(node, reg, len, 0);
2510 }
2511 
2512 /* x is not included y ==> 1 : 0 */
2513 static int
2515 {
2516  int i;
2517  OnigDistance len;
2519  UChar *p;
2520  int ytype;
2521 
2522  retry:
2523  ytype = NTYPE(y);
2524  switch (NTYPE(x)) {
2525  case NT_CTYPE:
2526  {
2527  switch (ytype) {
2528  case NT_CTYPE:
2529  if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2530  NCTYPE(y)->not != NCTYPE(x)->not &&
2531  NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2532  return 1;
2533  else
2534  return 0;
2535  break;
2536 
2537  case NT_CCLASS:
2538  swap:
2539  {
2540  Node* tmp;
2541  tmp = x; x = y; y = tmp;
2542  goto retry;
2543  }
2544  break;
2545 
2546  case NT_STR:
2547  goto swap;
2548  break;
2549 
2550  default:
2551  break;
2552  }
2553  }
2554  break;
2555 
2556  case NT_CCLASS:
2557  {
2558  CClassNode* xc = NCCLASS(x);
2559  switch (ytype) {
2560  case NT_CTYPE:
2561  switch (NCTYPE(y)->ctype) {
2562  case ONIGENC_CTYPE_WORD:
2563  if (NCTYPE(y)->not == 0) {
2564  if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2565  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2566  if (BITSET_AT(xc->bs, i)) {
2567  if (NCTYPE(y)->ascii_range) {
2568  if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2569  }
2570  else {
2571  if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2572  }
2573  }
2574  }
2575  return 1;
2576  }
2577  return 0;
2578  }
2579  else {
2580  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2581  int is_word;
2582  if (NCTYPE(y)->ascii_range)
2583  is_word = IS_CODE_SB_WORD(reg->enc, i);
2584  else
2585  is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2586  if (! is_word) {
2587  if (!IS_NCCLASS_NOT(xc)) {
2588  if (BITSET_AT(xc->bs, i))
2589  return 0;
2590  }
2591  else {
2592  if (! BITSET_AT(xc->bs, i))
2593  return 0;
2594  }
2595  }
2596  }
2597  return 1;
2598  }
2599  break;
2600 
2601  default:
2602  break;
2603  }
2604  break;
2605 
2606  case NT_CCLASS:
2607  {
2608  int v;
2609  CClassNode* yc = NCCLASS(y);
2610 
2611  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2612  v = BITSET_AT(xc->bs, i);
2613  if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2614  (v == 0 && IS_NCCLASS_NOT(xc))) {
2615  v = BITSET_AT(yc->bs, i);
2616  if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2617  (v == 0 && IS_NCCLASS_NOT(yc)))
2618  return 0;
2619  }
2620  }
2621  if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2622  (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2623  return 1;
2624  return 0;
2625  }
2626  break;
2627 
2628  case NT_STR:
2629  goto swap;
2630  break;
2631 
2632  default:
2633  break;
2634  }
2635  }
2636  break;
2637 
2638  case NT_STR:
2639  {
2640  StrNode* xs = NSTR(x);
2641  if (NSTRING_LEN(x) == 0)
2642  break;
2643 
2644  switch (ytype) {
2645  case NT_CTYPE:
2646  switch (NCTYPE(y)->ctype) {
2647  case ONIGENC_CTYPE_WORD:
2648  if (NCTYPE(y)->ascii_range) {
2649  if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2650  return NCTYPE(y)->not;
2651  else
2652  return !(NCTYPE(y)->not);
2653  }
2654  else {
2655  if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2656  return NCTYPE(y)->not;
2657  else
2658  return !(NCTYPE(y)->not);
2659  }
2660  break;
2661  default:
2662  break;
2663  }
2664  break;
2665 
2666  case NT_CCLASS:
2667  {
2668  CClassNode* cc = NCCLASS(y);
2669 
2670  code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2671  xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2672  return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2673  }
2674  break;
2675 
2676  case NT_STR:
2677  {
2678  UChar *q;
2679  StrNode* ys = NSTR(y);
2680  len = NSTRING_LEN(x);
2681  if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2682  if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2683  /* tiny version */
2684  return 0;
2685  }
2686  else {
2687  for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2688  if (*p != *q) return 1;
2689  }
2690  }
2691  }
2692  break;
2693 
2694  default:
2695  break;
2696  }
2697  }
2698  break;
2699 
2700  default:
2701  break;
2702  }
2703 
2704  return 0;
2705 }
2706 
2707 static Node*
2708 get_head_value_node(Node* node, int exact, regex_t* reg)
2709 {
2710  Node* n = NULL_NODE;
2711 
2712  switch (NTYPE(node)) {
2713  case NT_BREF:
2714  case NT_ALT:
2715  case NT_CANY:
2716 #ifdef USE_SUBEXP_CALL
2717  case NT_CALL:
2718 #endif
2719  break;
2720 
2721  case NT_CTYPE:
2722  case NT_CCLASS:
2723  if (exact == 0) {
2724  n = node;
2725  }
2726  break;
2727 
2728  case NT_LIST:
2729  n = get_head_value_node(NCAR(node), exact, reg);
2730  break;
2731 
2732  case NT_STR:
2733  {
2734  StrNode* sn = NSTR(node);
2735 
2736  if (sn->end <= sn->s)
2737  break;
2738 
2739  if (exact != 0 &&
2740  !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2741  }
2742  else {
2743  n = node;
2744  }
2745  }
2746  break;
2747 
2748  case NT_QTFR:
2749  {
2750  QtfrNode* qn = NQTFR(node);
2751  if (qn->lower > 0) {
2752  if (IS_NOT_NULL(qn->head_exact))
2753  n = qn->head_exact;
2754  else
2755  n = get_head_value_node(qn->target, exact, reg);
2756  }
2757  }
2758  break;
2759 
2760  case NT_ENCLOSE:
2761  {
2762  EncloseNode* en = NENCLOSE(node);
2763  switch (en->type) {
2764  case ENCLOSE_OPTION:
2765  {
2767 
2768  reg->options = NENCLOSE(node)->option;
2769  n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2770  reg->options = options;
2771  }
2772  break;
2773 
2774  case ENCLOSE_MEMORY:
2776  case ENCLOSE_CONDITION:
2777  n = get_head_value_node(en->target, exact, reg);
2778  break;
2779  }
2780  }
2781  break;
2782 
2783  case NT_ANCHOR:
2784  if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2785  n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2786  break;
2787 
2788  default:
2789  break;
2790  }
2791 
2792  return n;
2793 }
2794 
2795 static int
2796 check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2797 {
2798  int type, r = 0;
2799 
2800  type = NTYPE(node);
2801  if ((NTYPE2BIT(type) & type_mask) == 0)
2802  return 1;
2803 
2804  switch (type) {
2805  case NT_LIST:
2806  case NT_ALT:
2807  do {
2808  r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2809  anchor_mask);
2810  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2811  break;
2812 
2813  case NT_QTFR:
2814  r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2815  anchor_mask);
2816  break;
2817 
2818  case NT_ENCLOSE:
2819  {
2820  EncloseNode* en = NENCLOSE(node);
2821  if ((en->type & enclose_mask) == 0)
2822  return 1;
2823 
2824  r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2825  }
2826  break;
2827 
2828  case NT_ANCHOR:
2829  type = NANCHOR(node)->type;
2830  if ((type & anchor_mask) == 0)
2831  return 1;
2832 
2833  if (NANCHOR(node)->target)
2834  r = check_type_tree(NANCHOR(node)->target,
2835  type_mask, enclose_mask, anchor_mask);
2836  break;
2837 
2838  default:
2839  break;
2840  }
2841  return r;
2842 }
2843 
2844 #ifdef USE_SUBEXP_CALL
2845 
2846 #define RECURSION_EXIST 1
2847 #define RECURSION_INFINITE 2
2848 
2849 static int
2851 {
2852  int type;
2853  int r = 0;
2854 
2855  type = NTYPE(node);
2856  switch (type) {
2857  case NT_LIST:
2858  {
2859  Node *x;
2860  OnigDistance min;
2861  int ret;
2862 
2863  x = node;
2864  do {
2865  ret = subexp_inf_recursive_check(NCAR(x), env, head);
2866  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2867  r |= ret;
2868  if (head) {
2869  ret = get_min_match_length(NCAR(x), &min, env);
2870  if (ret != 0) return ret;
2871  if (min != 0) head = 0;
2872  }
2873  } while (IS_NOT_NULL(x = NCDR(x)));
2874  }
2875  break;
2876 
2877  case NT_ALT:
2878  {
2879  int ret;
2880  r = RECURSION_EXIST;
2881  do {
2882  ret = subexp_inf_recursive_check(NCAR(node), env, head);
2883  if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2884  r &= ret;
2885  } while (IS_NOT_NULL(node = NCDR(node)));
2886  }
2887  break;
2888 
2889  case NT_QTFR:
2890  r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2891  if (r == RECURSION_EXIST) {
2892  if (NQTFR(node)->lower == 0) r = 0;
2893  }
2894  break;
2895 
2896  case NT_ANCHOR:
2897  {
2898  AnchorNode* an = NANCHOR(node);
2899  switch (an->type) {
2900  case ANCHOR_PREC_READ:
2901  case ANCHOR_PREC_READ_NOT:
2902  case ANCHOR_LOOK_BEHIND:
2904  r = subexp_inf_recursive_check(an->target, env, head);
2905  break;
2906  }
2907  }
2908  break;
2909 
2910  case NT_CALL:
2911  r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2912  break;
2913 
2914  case NT_ENCLOSE:
2915  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2916  return 0;
2917  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2918  return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2919  else {
2921  r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2923  }
2924  break;
2925 
2926  default:
2927  break;
2928  }
2929 
2930  return r;
2931 }
2932 
2933 static int
2935 {
2936  int type;
2937  int r = 0;
2938 
2939  type = NTYPE(node);
2940  switch (type) {
2941  case NT_LIST:
2942  case NT_ALT:
2943  do {
2944  r = subexp_inf_recursive_check_trav(NCAR(node), env);
2945  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2946  break;
2947 
2948  case NT_QTFR:
2949  r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2950  break;
2951 
2952  case NT_ANCHOR:
2953  {
2954  AnchorNode* an = NANCHOR(node);
2955  switch (an->type) {
2956  case ANCHOR_PREC_READ:
2957  case ANCHOR_PREC_READ_NOT:
2958  case ANCHOR_LOOK_BEHIND:
2961  break;
2962  }
2963  }
2964  break;
2965 
2966  case NT_ENCLOSE:
2967  {
2968  EncloseNode* en = NENCLOSE(node);
2969 
2970  if (IS_ENCLOSE_RECURSION(en)) {
2972  r = subexp_inf_recursive_check(en->target, env, 1);
2973  if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2975  }
2977  }
2978 
2979  break;
2980 
2981  default:
2982  break;
2983  }
2984 
2985  return r;
2986 }
2987 
2988 static int
2990 {
2991  int r = 0;
2992 
2993  switch (NTYPE(node)) {
2994  case NT_LIST:
2995  case NT_ALT:
2996  do {
2997  r |= subexp_recursive_check(NCAR(node));
2998  } while (IS_NOT_NULL(node = NCDR(node)));
2999  break;
3000 
3001  case NT_QTFR:
3002  r = subexp_recursive_check(NQTFR(node)->target);
3003  break;
3004 
3005  case NT_ANCHOR:
3006  {
3007  AnchorNode* an = NANCHOR(node);
3008  switch (an->type) {
3009  case ANCHOR_PREC_READ:
3010  case ANCHOR_PREC_READ_NOT:
3011  case ANCHOR_LOOK_BEHIND:
3013  r = subexp_recursive_check(an->target);
3014  break;
3015  }
3016  }
3017  break;
3018 
3019  case NT_CALL:
3020  r = subexp_recursive_check(NCALL(node)->target);
3021  if (r != 0) SET_CALL_RECURSION(node);
3022  break;
3023 
3024  case NT_ENCLOSE:
3025  if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3026  return 0;
3027  else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3028  return 1; /* recursion */
3029  else {
3033  }
3034  break;
3035 
3036  default:
3037  break;
3038  }
3039 
3040  return r;
3041 }
3042 
3043 
3044 static int
3046 {
3047 #define FOUND_CALLED_NODE 1
3048 
3049  int type;
3050  int r = 0;
3051 
3052  type = NTYPE(node);
3053  switch (type) {
3054  case NT_LIST:
3055  case NT_ALT:
3056  {
3057  int ret;
3058  do {
3059  ret = subexp_recursive_check_trav(NCAR(node), env);
3060  if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3061  else if (ret < 0) return ret;
3062  } while (IS_NOT_NULL(node = NCDR(node)));
3063  }
3064  break;
3065 
3066  case NT_QTFR:
3067  r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3068  if (NQTFR(node)->upper == 0) {
3069  if (r == FOUND_CALLED_NODE)
3070  NQTFR(node)->is_refered = 1;
3071  }
3072  break;
3073 
3074  case NT_ANCHOR:
3075  {
3076  AnchorNode* an = NANCHOR(node);
3077  switch (an->type) {
3078  case ANCHOR_PREC_READ:
3079  case ANCHOR_PREC_READ_NOT:
3080  case ANCHOR_LOOK_BEHIND:
3082  r = subexp_recursive_check_trav(an->target, env);
3083  break;
3084  }
3085  }
3086  break;
3087 
3088  case NT_ENCLOSE:
3089  {
3090  EncloseNode* en = NENCLOSE(node);
3091 
3092  if (! IS_ENCLOSE_RECURSION(en)) {
3093  if (IS_ENCLOSE_CALLED(en)) {
3095  r = subexp_recursive_check(en->target);
3096  if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3098  }
3099  }
3100  r = subexp_recursive_check_trav(en->target, env);
3101  if (IS_ENCLOSE_CALLED(en))
3102  r |= FOUND_CALLED_NODE;
3103  }
3104  break;
3105 
3106  default:
3107  break;
3108  }
3109 
3110  return r;
3111 }
3112 
3113 static int
3115 {
3116  int type;
3117  int r = 0;
3118 
3119  type = NTYPE(node);
3120  switch (type) {
3121  case NT_LIST:
3122  do {
3123  r = setup_subexp_call(NCAR(node), env);
3124  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3125  break;
3126 
3127  case NT_ALT:
3128  do {
3129  r = setup_subexp_call(NCAR(node), env);
3130  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3131  break;
3132 
3133  case NT_QTFR:
3134  r = setup_subexp_call(NQTFR(node)->target, env);
3135  break;
3136  case NT_ENCLOSE:
3137  r = setup_subexp_call(NENCLOSE(node)->target, env);
3138  break;
3139 
3140  case NT_CALL:
3141  {
3142  CallNode* cn = NCALL(node);
3143  Node** nodes = SCANENV_MEM_NODES(env);
3144 
3145  if (cn->group_num != 0) {
3146  int gnum = cn->group_num;
3147 
3148 #ifdef USE_NAMED_GROUP
3149  if (env->num_named > 0 &&
3153  }
3154 #endif
3155  if (gnum > env->num_mem) {
3159  }
3160 
3161 #ifdef USE_NAMED_GROUP
3162  set_call_attr:
3163 #endif
3164  cn->target = nodes[cn->group_num];
3165  if (IS_NULL(cn->target)) {
3169  }
3172  cn->unset_addr_list = env->unset_addr_list;
3173  }
3174 #ifdef USE_NAMED_GROUP
3175 #ifdef USE_PERL_SUBEXP_CALL
3176  else if (cn->name == cn->name_end) {
3177  goto set_call_attr;
3178  }
3179 #endif
3180  else {
3181  int *refs;
3182 
3183  int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3184  &refs);
3185  if (n <= 0) {
3189  }
3190  else if (n > 1 &&
3195  }
3196  else {
3197  cn->group_num = refs[0];
3198  goto set_call_attr;
3199  }
3200  }
3201 #endif
3202  }
3203  break;
3204 
3205  case NT_ANCHOR:
3206  {
3207  AnchorNode* an = NANCHOR(node);
3208 
3209  switch (an->type) {
3210  case ANCHOR_PREC_READ:
3211  case ANCHOR_PREC_READ_NOT:
3212  case ANCHOR_LOOK_BEHIND:
3214  r = setup_subexp_call(an->target, env);
3215  break;
3216  }
3217  }
3218  break;
3219 
3220  default:
3221  break;
3222  }
3223 
3224  return r;
3225 }
3226 #endif
3227 
3228 /* divide different length alternatives in look-behind.
3229  (?<=A|B) ==> (?<=A)|(?<=B)
3230  (?<!A|B) ==> (?<!A)(?<!B)
3231 */
3232 static int
3234 {
3235  Node *head, *np, *insert_node;
3236  AnchorNode* an = NANCHOR(node);
3237  int anc_type = an->type;
3238 
3239  head = an->target;
3240  np = NCAR(head);
3241  swap_node(node, head);
3242  NCAR(node) = head;
3243  NANCHOR(head)->target = np;
3244 
3245  np = node;
3246  while ((np = NCDR(np)) != NULL_NODE) {
3247  insert_node = onig_node_new_anchor(anc_type);
3248  CHECK_NULL_RETURN_MEMERR(insert_node);
3249  NANCHOR(insert_node)->target = NCAR(np);
3250  NCAR(np) = insert_node;
3251  }
3252 
3253  if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3254  np = node;
3255  do {
3256  SET_NTYPE(np, NT_LIST); /* alt -> list */
3257  } while ((np = NCDR(np)) != NULL_NODE);
3258  }
3259  return 0;
3260 }
3261 
3262 static int
3264 {
3265  int r, len;
3266  AnchorNode* an = NANCHOR(node);
3267 
3268  r = get_char_length_tree(an->target, reg, &len);
3269  if (r == 0)
3270  an->char_len = len;
3271  else if (r == GET_CHAR_LEN_VARLEN)
3273  else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3276  else
3278  }
3279 
3280  return r;
3281 }
3282 
3283 static int
3284 next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
3285 {
3286  int type;
3287 
3288  retry:
3289  type = NTYPE(node);
3290  if (type == NT_QTFR) {
3291  QtfrNode* qn = NQTFR(node);
3292  if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3293 #ifdef USE_QTFR_PEEK_NEXT
3294  Node* n = get_head_value_node(next_node, 1, reg);
3295  /* '\0': for UTF-16BE etc... */
3296  if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3297  qn->next_head_exact = n;
3298  }
3299 #endif
3300  /* automatic possessivation a*b ==> (?>a*)b */
3301  if (qn->lower <= 1) {
3302  int ttype = NTYPE(qn->target);
3303  if (IS_NODE_TYPE_SIMPLE(ttype)) {
3304  Node *x, *y;
3305  x = get_head_value_node(qn->target, 0, reg);
3306  if (IS_NOT_NULL(x)) {
3307  y = get_head_value_node(next_node, 0, reg);
3308  if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3312  swap_node(node, en);
3313  NENCLOSE(node)->target = en;
3314  }
3315  }
3316  }
3317  }
3318 
3319 #ifndef ONIG_DONT_OPTIMIZE
3320  if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
3321  in_root && /* qn->lower == 0 && */
3322  NTYPE(qn->target) == NT_CANY &&
3323  ! IS_MULTILINE(reg->options)) {
3324  /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
3325  Node *np;
3328  swap_node(node, np);
3329  NCDR(node) = onig_node_new_list(np, NULL_NODE);
3330  if (IS_NULL(NCDR(node))) {
3331  onig_node_free(np);
3332  return ONIGERR_MEMORY;
3333  }
3334  np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
3336  NCAR(node) = np;
3337  }
3338 #endif
3339  }
3340  }
3341  else if (type == NT_ENCLOSE) {
3342  EncloseNode* en = NENCLOSE(node);
3343  in_root = 0;
3344  if (en->type == ENCLOSE_MEMORY) {
3345  node = en->target;
3346  goto retry;
3347  }
3348  }
3349  return 0;
3350 }
3351 
3352 
3353 static int
3355 {
3357  UChar *sbuf, *ebuf, *sp;
3358  int r, i, len;
3359  OnigDistance sbuf_size;
3360  StrNode* sn = NSTR(node);
3361 
3362  end = sn->end;
3363  sbuf_size = (end - sn->s) * 2;
3364  sbuf = (UChar* )xmalloc(sbuf_size);
3366  ebuf = sbuf + sbuf_size;
3367 
3368  sp = sbuf;
3369  p = sn->s;
3370  while (p < end) {
3371  len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3372  for (i = 0; i < len; i++) {
3373  if (sp >= ebuf) {
3374  UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3375  if (IS_NULL(p)) {
3376  xfree(sbuf);
3377  return ONIGERR_MEMORY;
3378  }
3379  sbuf = p;
3380  sp = sbuf + sbuf_size;
3381  sbuf_size *= 2;
3382  ebuf = sbuf + sbuf_size;
3383  }
3384 
3385  *sp++ = buf[i];
3386  }
3387  }
3388 
3389  r = onig_node_str_set(node, sbuf, sp);
3390  if (r != 0) {
3391  xfree(sbuf);
3392  return r;
3393  }
3394 
3395  xfree(sbuf);
3396  return 0;
3397 }
3398 
3399 static int
3401  regex_t* reg)
3402 {
3403  int r;
3404  Node *node;
3405 
3406  node = onig_node_new_str(s, end);
3407  if (IS_NULL(node)) return ONIGERR_MEMORY;
3408 
3409  r = update_string_node_case_fold(reg, node);
3410  if (r != 0) {
3411  onig_node_free(node);
3412  return r;
3413  }
3414 
3415  NSTRING_SET_AMBIG(node);
3417  *rnode = node;
3418  return 0;
3419 }
3420 
3421 static int
3423  UChar *p, int slen, UChar *end,
3424  regex_t* reg, Node **rnode)
3425 {
3426  int r, i, j, len, varlen, varclen;
3427  Node *anode, *var_anode, *snode, *xnode, *an;
3429 
3430  *rnode = var_anode = NULL_NODE;
3431 
3432  varlen = 0;
3433  varclen = 0;
3434  for (i = 0; i < item_num; i++) {
3435  if (items[i].byte_len != slen) {
3436  varlen = 1;
3437  break;
3438  }
3439  if (items[i].code_len != 1) {
3440  varclen = 1;
3441  }
3442  }
3443 
3444  if (varlen != 0) {
3445  *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3446  if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3447 
3448  xnode = onig_node_new_list(NULL, NULL);
3449  if (IS_NULL(xnode)) goto mem_err;
3450  NCAR(var_anode) = xnode;
3451 
3453  if (IS_NULL(anode)) goto mem_err;
3454  NCAR(xnode) = anode;
3455  }
3456  else {
3457  *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3458  if (IS_NULL(anode)) return ONIGERR_MEMORY;
3459  }
3460 
3461  snode = onig_node_new_str(p, p + slen);
3462  if (IS_NULL(snode)) goto mem_err;
3463 
3464  NCAR(anode) = snode;
3465 
3466  for (i = 0; i < item_num; i++) {
3467  snode = onig_node_new_str(NULL, NULL);
3468  if (IS_NULL(snode)) goto mem_err;
3469 
3470  for (j = 0; j < items[i].code_len; j++) {
3471  len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3472  if (len < 0) {
3473  r = len;
3474  goto mem_err2;
3475  }
3476 
3477  r = onig_node_str_cat(snode, buf, buf + len);
3478  if (r != 0) goto mem_err2;
3479  }
3480 
3482  if (IS_NULL(an)) {
3483  goto mem_err2;
3484  }
3485 
3486  if (items[i].byte_len != slen) {
3487  Node *rem;
3488  UChar *q = p + items[i].byte_len;
3489 
3490  if (q < end) {
3491  r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3492  if (r != 0) {
3493  onig_node_free(an);
3494  goto mem_err2;
3495  }
3496 
3497  xnode = onig_node_list_add(NULL_NODE, snode);
3498  if (IS_NULL(xnode)) {
3499  onig_node_free(an);
3500  onig_node_free(rem);
3501  goto mem_err2;
3502  }
3503  if (IS_NULL(onig_node_list_add(xnode, rem))) {
3504  onig_node_free(an);
3505  onig_node_free(xnode);
3506  onig_node_free(rem);
3507  goto mem_err;
3508  }
3509 
3510  NCAR(an) = xnode;
3511  }
3512  else {
3513  NCAR(an) = snode;
3514  }
3515 
3516  NCDR(var_anode) = an;
3517  var_anode = an;
3518  }
3519  else {
3520  NCAR(an) = snode;
3521  NCDR(anode) = an;
3522  anode = an;
3523  }
3524  }
3525 
3526  if (varclen && !varlen)
3527  return 2;
3528  return varlen;
3529 
3530  mem_err2:
3531  onig_node_free(snode);
3532 
3533  mem_err:
3534  onig_node_free(*rnode);
3535 
3536  return ONIGERR_MEMORY;
3537 }
3538 
3539 static int
3541 {
3542 #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3543 
3544  int r, n, len, alt_num;
3545  int varlen = 0;
3546  UChar *start, *end, *p;
3547  Node *top_root, *root, *snode, *prev_node;
3549  StrNode* sn = NSTR(node);
3550 
3551  if (NSTRING_IS_AMBIG(node)) return 0;
3552 
3553  start = sn->s;
3554  end = sn->end;
3555  if (start >= end) return 0;
3556 
3557  r = 0;
3558  top_root = root = prev_node = snode = NULL_NODE;
3559  alt_num = 1;
3560  p = start;
3561  while (p < end) {
3563  p, end, items);
3564  if (n < 0) {
3565  r = n;
3566  goto err;
3567  }
3568 
3569  len = enclen(reg->enc, p, end);
3570 
3571  if (n == 0) {
3572  if (IS_NULL(snode)) {
3573  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3574  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3575  if (IS_NULL(root)) {
3576  onig_node_free(prev_node);
3577  goto mem_err;
3578  }
3579  }
3580 
3581  prev_node = snode = onig_node_new_str(NULL, NULL);
3582  if (IS_NULL(snode)) goto mem_err;
3583  if (IS_NOT_NULL(root)) {
3584  if (IS_NULL(onig_node_list_add(root, snode))) {
3585  onig_node_free(snode);
3586  goto mem_err;
3587  }
3588  }
3589  }
3590 
3591  r = onig_node_str_cat(snode, p, p + len);
3592  if (r != 0) goto err;
3593  }
3594  else {
3595  alt_num *= (n + 1);
3596  if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3597 
3598  if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3599  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3600  if (IS_NULL(root)) {
3601  onig_node_free(prev_node);
3602  goto mem_err;
3603  }
3604  }
3605 
3606  r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3607  if (r < 0) goto mem_err;
3608  if (r > 0) varlen = 1;
3609  if (r == 1) {
3610  if (IS_NULL(root)) {
3611  top_root = prev_node;
3612  }
3613  else {
3614  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3615  onig_node_free(prev_node);
3616  goto mem_err;
3617  }
3618  }
3619 
3620  root = NCAR(prev_node);
3621  }
3622  else { /* r == 0 || r == 2 */
3623  if (IS_NOT_NULL(root)) {
3624  if (IS_NULL(onig_node_list_add(root, prev_node))) {
3625  onig_node_free(prev_node);
3626  goto mem_err;
3627  }
3628  }
3629  }
3630 
3631  snode = NULL_NODE;
3632  }
3633 
3634  p += len;
3635  }
3636 
3637  if (p < end) {
3638  Node *srem;
3639 
3640  r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3641  if (r != 0) goto mem_err;
3642 
3643  if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3644  top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3645  if (IS_NULL(root)) {
3646  onig_node_free(srem);
3647  onig_node_free(prev_node);
3648  goto mem_err;
3649  }
3650  }
3651 
3652  if (IS_NULL(root)) {
3653  prev_node = srem;
3654  }
3655  else {
3656  if (IS_NULL(onig_node_list_add(root, srem))) {
3657  onig_node_free(srem);
3658  goto mem_err;
3659  }
3660  }
3661  }
3662 
3663  /* ending */
3664  top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3665  if (!varlen) {
3666  /* When all expanded strings are same length, case-insensitive
3667  BM search will be used. */
3668  r = update_string_node_case_fold(reg, node);
3669  if (r == 0) {
3670  NSTRING_SET_AMBIG(node);
3671  }
3672  }
3673  else {
3674  swap_node(node, top_root);
3675  r = 0;
3676  }
3677  onig_node_free(top_root);
3678  return r;
3679 
3680  mem_err:
3681  r = ONIGERR_MEMORY;
3682 
3683  err:
3684  onig_node_free(top_root);
3685  return r;
3686 }
3687 
3688 
3689 #ifdef USE_COMBINATION_EXPLOSION_CHECK
3690 
3691 #define CEC_THRES_NUM_BIG_REPEAT 512
3692 #define CEC_INFINITE_NUM 0x7fffffff
3693 
3694 #define CEC_IN_INFINITE_REPEAT (1<<0)
3695 #define CEC_IN_FINITE_REPEAT (1<<1)
3696 #define CEC_CONT_BIG_REPEAT (1<<2)
3697 
3698 static int
3699 setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3700 {
3701  int type;
3702  int r = state;
3703 
3704  type = NTYPE(node);
3705  switch (type) {
3706  case NT_LIST:
3707  {
3708  Node* prev = NULL_NODE;
3709  do {
3710  r = setup_comb_exp_check(NCAR(node), r, env);
3711  prev = NCAR(node);
3712  } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3713  }
3714  break;
3715 
3716  case NT_ALT:
3717  {
3718  int ret;
3719  do {
3720  ret = setup_comb_exp_check(NCAR(node), state, env);
3721  r |= ret;
3722  } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3723  }
3724  break;
3725 
3726  case NT_QTFR:
3727  {
3728  int child_state = state;
3729  int add_state = 0;
3730  QtfrNode* qn = NQTFR(node);
3731  Node* target = qn->target;
3732  int var_num;
3733 
3734  if (! IS_REPEAT_INFINITE(qn->upper)) {
3735  if (qn->upper > 1) {
3736  /* {0,1}, {1,1} are allowed */
3737  child_state |= CEC_IN_FINITE_REPEAT;
3738 
3739  /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3740  if (env->backrefed_mem == 0) {
3741  if (NTYPE(qn->target) == NT_ENCLOSE) {
3742  EncloseNode* en = NENCLOSE(qn->target);
3743  if (en->type == ENCLOSE_MEMORY) {
3744  if (NTYPE(en->target) == NT_QTFR) {
3745  QtfrNode* q = NQTFR(en->target);
3746  if (IS_REPEAT_INFINITE(q->upper)
3747  && q->greedy == qn->greedy) {
3748  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3749  if (qn->upper == 1)
3750  child_state = state;
3751  }
3752  }
3753  }
3754  }
3755  }
3756  }
3757  }
3758 
3759  if (state & CEC_IN_FINITE_REPEAT) {
3760  qn->comb_exp_check_num = -1;
3761  }
3762  else {
3763  if (IS_REPEAT_INFINITE(qn->upper)) {
3764  var_num = CEC_INFINITE_NUM;
3765  child_state |= CEC_IN_INFINITE_REPEAT;
3766  }
3767  else {
3768  var_num = qn->upper - qn->lower;
3769  }
3770 
3771  if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3772  add_state |= CEC_CONT_BIG_REPEAT;
3773 
3774  if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3775  ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3776  var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3777  if (qn->comb_exp_check_num == 0) {
3778  env->num_comb_exp_check++;
3779  qn->comb_exp_check_num = env->num_comb_exp_check;
3780  if (env->curr_max_regnum > env->comb_exp_max_regnum)
3781  env->comb_exp_max_regnum = env->curr_max_regnum;
3782  }
3783  }
3784  }
3785 
3786  r = setup_comb_exp_check(target, child_state, env);
3787  r |= add_state;
3788  }
3789  break;
3790 
3791  case NT_ENCLOSE:
3792  {
3793  EncloseNode* en = NENCLOSE(node);
3794 
3795  switch (en->type) {
3796  case ENCLOSE_MEMORY:
3797  {
3798  if (env->curr_max_regnum < en->regnum)
3799  env->curr_max_regnum = en->regnum;
3800 
3801  r = setup_comb_exp_check(en->target, state, env);
3802  }
3803  break;
3804 
3805  default:
3806  r = setup_comb_exp_check(en->target, state, env);
3807  break;
3808  }
3809  }
3810  break;
3811 
3812 #ifdef USE_SUBEXP_CALL
3813  case NT_CALL:
3814  if (IS_CALL_RECURSION(NCALL(node)))
3815  env->has_recursion = 1;
3816  else
3817  r = setup_comb_exp_check(NCALL(node)->target, state, env);
3818  break;
3819 #endif
3820 
3821  default:
3822  break;
3823  }
3824 
3825  return r;
3826 }
3827 #endif
3828 
3829 #define IN_ALT (1<<0)
3830 #define IN_NOT (1<<1)
3831 #define IN_REPEAT (1<<2)
3832 #define IN_VAR_REPEAT (1<<3)
3833 #define IN_ROOT (1<<4)
3834 
3835 /* setup_tree does the following work.
3836  1. check empty loop. (set qn->target_empty_info)
3837  2. expand ignore-case in char class.
3838  3. set memory status bit flags. (reg->mem_stats)
3839  4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3840  5. find invalid patterns in look-behind.
3841  6. expand repeated string.
3842  */
3843 static int
3844 setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3845 {
3846  int type;
3847  int r = 0;
3848  int in_root = state & IN_ROOT;
3849 
3850  state &= ~IN_ROOT;
3851 restart:
3852  type = NTYPE(node);
3853  switch (type) {
3854  case NT_LIST:
3855  {
3856  Node* prev = NULL_NODE;
3857  int prev_in_root = 0;
3858  state |= in_root;
3859  do {
3860  r = setup_tree(NCAR(node), reg, state, env);
3861  if (IS_NOT_NULL(prev) && r == 0) {
3862  r = next_setup(prev, NCAR(node), prev_in_root, reg);
3863  }
3864  prev = NCAR(node);
3865  prev_in_root = state & IN_ROOT;
3866  state &= ~IN_ROOT;
3867  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3868  }
3869  break;
3870 
3871  case NT_ALT:
3872  do {
3873  r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3874  } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3875  break;
3876 
3877  case NT_CCLASS:
3878  break;
3879 
3880  case NT_STR:
3881  if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3882  r = expand_case_fold_string(node, reg);
3883  }
3884  break;
3885 
3886  case NT_CTYPE:
3887  case NT_CANY:
3888  break;
3889 
3890 #ifdef USE_SUBEXP_CALL
3891  case NT_CALL:
3892  break;
3893 #endif
3894 
3895  case NT_BREF:
3896  {
3897  int i;
3898  int* p;
3899  Node** nodes = SCANENV_MEM_NODES(env);
3900  BRefNode* br = NBREF(node);
3901  p = BACKREFS_P(br);
3902  for (i = 0; i < br->back_num; i++) {
3903  if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3904  BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3905  BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3906 #ifdef USE_BACKREF_WITH_LEVEL
3907  if (IS_BACKREF_NEST_LEVEL(br)) {
3908  BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3909  }
3910 #endif
3911  SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3912  }
3913  }
3914  break;
3915 
3916  case NT_QTFR:
3917  {
3918  OnigDistance d;
3919  QtfrNode* qn = NQTFR(node);
3920  Node* target = qn->target;
3921 
3922  if ((state & IN_REPEAT) != 0) {
3923  qn->state |= NST_IN_REPEAT;
3924  }
3925 
3926  if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3927  r = get_min_match_length(target, &d, env);
3928  if (r) break;
3929  if (d == 0) {
3931 #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3932  r = quantifiers_memory_node_info(target);
3933  if (r < 0) break;
3934  if (r > 0) {
3935  qn->target_empty_info = r;
3936  }
3937 #endif
3938 #if 0
3939  r = get_max_match_length(target, &d, env);
3940  if (r == 0 && d == 0) {
3941  /* ()* ==> ()?, ()+ ==> () */
3942  qn->upper = 1;
3943  if (qn->lower > 1) qn->lower = 1;
3944  if (NTYPE(target) == NT_STR) {
3945  qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3946  }
3947  }
3948 #endif
3949  }
3950  }
3951 
3952  state |= IN_REPEAT;
3953  if (qn->lower != qn->upper)
3954  state |= IN_VAR_REPEAT;
3955  r = setup_tree(target, reg, state, env);
3956  if (r) break;
3957 
3958  /* expand string */
3959 #define EXPAND_STRING_MAX_LENGTH 100
3960  if (NTYPE(target) == NT_STR) {
3961  if (qn->lower > 1) {
3962  int i, n = qn->lower;
3963  OnigDistance len = NSTRING_LEN(target);
3964  StrNode* sn = NSTR(target);
3965  Node* np;
3966 
3967  np = onig_node_new_str(sn->s, sn->end);
3968  if (IS_NULL(np)) return ONIGERR_MEMORY;
3969  NSTR(np)->flag = sn->flag;
3970 
3971  for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3972  r = onig_node_str_cat(np, sn->s, sn->end);
3973  if (r) {
3974  onig_node_free(np);
3975  return r;
3976  }
3977  }
3978  if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
3979  Node *np1, *np2;
3980 
3981  qn->lower -= i;
3982  if (! IS_REPEAT_INFINITE(qn->upper))
3983  qn->upper -= i;
3984 
3985  np1 = onig_node_new_list(np, NULL);
3986  if (IS_NULL(np1)) {
3987  onig_node_free(np);
3988  return ONIGERR_MEMORY;
3989  }
3990  swap_node(np1, node);
3991  np2 = onig_node_list_add(node, np1);
3992  if (IS_NULL(np2)) {
3993  onig_node_free(np1);
3994  return ONIGERR_MEMORY;
3995  }
3996  }
3997  else {
3998  swap_node(np, node);
3999  onig_node_free(np);
4000  }
4001  break; /* break case NT_QTFR: */
4002  }
4003  }
4004 
4005 #ifdef USE_OP_PUSH_OR_JUMP_EXACT
4006  if (qn->greedy && (qn->target_empty_info != 0)) {
4007  if (NTYPE(target) == NT_QTFR) {
4008  QtfrNode* tqn = NQTFR(target);
4009  if (IS_NOT_NULL(tqn->head_exact)) {
4010  qn->head_exact = tqn->head_exact;
4011  tqn->head_exact = NULL;
4012  }
4013  }
4014  else {
4015  qn->head_exact = get_head_value_node(qn->target, 1, reg);
4016  }
4017  }
4018 #endif
4019  }
4020  break;
4021 
4022  case NT_ENCLOSE:
4023  {
4024  EncloseNode* en = NENCLOSE(node);
4025 
4026  switch (en->type) {
4027  case ENCLOSE_OPTION:
4028  {
4030  state |= in_root;
4031  reg->options = NENCLOSE(node)->option;
4032  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4033  reg->options = options;
4034  }
4035  break;
4036 
4037  case ENCLOSE_MEMORY:
4038  if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
4040  /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4041  }
4042  r = setup_tree(en->target, reg, state, env);
4043  break;
4044 
4046  {
4047  Node* target = en->target;
4048  r = setup_tree(target, reg, state, env);
4049  if (NTYPE(target) == NT_QTFR) {
4050  QtfrNode* tqn = NQTFR(target);
4051  if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4052  tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4053  int qtype = NTYPE(tqn->target);
4054  if (IS_NODE_TYPE_SIMPLE(qtype))
4056  }
4057  }
4058  }
4059  break;
4060 
4061  case ENCLOSE_CONDITION:
4062 #ifdef USE_NAMED_GROUP
4063  if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4064  env->num_named > 0 &&
4068  }
4069 #endif
4070  r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4071  break;
4072  }
4073  }
4074  break;
4075 
4076  case NT_ANCHOR:
4077  {
4078  AnchorNode* an = NANCHOR(node);
4079 
4080  switch (an->type) {
4081  case ANCHOR_PREC_READ:
4082  r = setup_tree(an->target, reg, state, env);
4083  break;
4084  case ANCHOR_PREC_READ_NOT:
4085  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4086  break;
4087 
4088 /* allowed node types in look-behind */
4089 #define ALLOWED_TYPE_IN_LB \
4090  ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4091  BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4092 
4093 #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY )
4094 #define ALLOWED_ENCLOSE_IN_LB_NOT 0
4095 
4096 #define ALLOWED_ANCHOR_IN_LB \
4097 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4098  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4099  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4100  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4101 #define ALLOWED_ANCHOR_IN_LB_NOT \
4102 ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4103  ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4104  ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4105  ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4106 
4107  case ANCHOR_LOOK_BEHIND:
4108  {
4111  if (r < 0) return r;
4112  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4113  r = setup_look_behind(node, reg, env);
4114  if (r != 0) return r;
4115  if (NTYPE(node) != NT_ANCHOR) goto restart;
4116  r = setup_tree(an->target, reg, state, env);
4117  }
4118  break;
4119 
4121  {
4124  if (r < 0) return r;
4125  if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4126  r = setup_look_behind(node, reg, env);
4127  if (r != 0) return r;
4128  if (NTYPE(node) != NT_ANCHOR) goto restart;
4129  r = setup_tree(an->target, reg, (state | IN_NOT), env);
4130  }
4131  break;
4132  }
4133  }
4134  break;
4135 
4136  default:
4137  break;
4138  }
4139 
4140  return r;
4141 }
4142 
4143 #ifndef USE_SUNDAY_QUICK_SEARCH
4144 /* set skip map for Boyer-Moore search */
4145 static int
4146 set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4147  UChar skip[], int** int_skip, int ignore_case)
4148 {
4149  OnigDistance i, len;
4150  int clen, flen, n, j, k;
4153  OnigEncoding enc = reg->enc;
4154 
4155  len = end - s;
4156  if (len < ONIG_CHAR_TABLE_SIZE) {
4157  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4158 
4159  n = 0;
4160  for (i = 0; i < len - 1; i += clen) {
4161  p = s + i;
4162  if (ignore_case)
4164  p, end, items);
4165  clen = enclen(enc, p, end);
4166 
4167  for (j = 0; j < n; j++) {
4168  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4169  return 1; /* different length isn't supported. */
4170  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4171  if (flen != clen)
4172  return 1; /* different length isn't supported. */
4173  }
4174  for (j = 0; j < clen; j++) {
4175  skip[s[i + j]] = (UChar )(len - 1 - i - j);
4176  for (k = 0; k < n; k++) {
4177  skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4178  }
4179  }
4180  }
4181  }
4182  else {
4183  if (IS_NULL(*int_skip)) {
4184  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4185  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4186  }
4187  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4188 
4189  n = 0;
4190  for (i = 0; i < len - 1; i += clen) {
4191  p = s + i;
4192  if (ignore_case)
4194  p, end, items);
4195  clen = enclen(enc, p, end);
4196 
4197  for (j = 0; j < n; j++) {
4198  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4199  return 1; /* different length isn't supported. */
4200  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4201  if (flen != clen)
4202  return 1; /* different length isn't supported. */
4203  }
4204  for (j = 0; j < clen; j++) {
4205  (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4206  for (k = 0; k < n; k++) {
4207  (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4208  }
4209  }
4210  }
4211  }
4212  return 0;
4213 }
4214 
4215 #else /* USE_SUNDAY_QUICK_SEARCH */
4216 
4217 /* set skip map for Sunday's quick search */
4218 static int
4220  UChar skip[], int** int_skip, int ignore_case)
4221 {
4222  OnigDistance i, len;
4223  int clen, flen, n, j, k;
4226  OnigEncoding enc = reg->enc;
4227 
4228  len = end - s;
4229  if (len < ONIG_CHAR_TABLE_SIZE) {
4230  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4231 
4232  n = 0;
4233  for (i = 0; i < len; i += clen) {
4234  p = s + i;
4235  if (ignore_case)
4237  p, end, items);
4238  clen = enclen(enc, p, end);
4239 
4240  for (j = 0; j < n; j++) {
4241  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4242  return 1; /* different length isn't supported. */
4243  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4244  if (flen != clen)
4245  return 1; /* different length isn't supported. */
4246  }
4247  for (j = 0; j < clen; j++) {
4248  skip[s[i + j]] = (UChar )(len - i - j);
4249  for (k = 0; k < n; k++) {
4250  skip[buf[k][j]] = (UChar )(len - i - j);
4251  }
4252  }
4253  }
4254  }
4255  else {
4256  if (IS_NULL(*int_skip)) {
4257  *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4258  if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4259  }
4260  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4261 
4262  n = 0;
4263  for (i = 0; i < len; i += clen) {
4264  p = s + i;
4265  if (ignore_case)
4267  p, end, items);
4268  clen = enclen(enc, p, end);
4269 
4270  for (j = 0; j < n; j++) {
4271  if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4272  return 1; /* different length isn't supported. */
4273  flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4274  if (flen != clen)
4275  return 1; /* different length isn't supported. */
4276  }
4277  for (j = 0; j < clen; j++) {
4278  (*int_skip)[s[i + j]] = (int )(len - i - j);
4279  for (k = 0; k < n; k++) {
4280  (*int_skip)[buf[k][j]] = (int )(len - i - j);
4281  }
4282  }
4283  }
4284  }
4285  return 0;
4286 }
4287 #endif /* USE_SUNDAY_QUICK_SEARCH */
4288 
4289 #define OPT_EXACT_MAXLEN 24
4290 
4291 typedef struct {
4292  OnigDistance min; /* min byte length */
4293  OnigDistance max; /* max byte length */
4294 } MinMaxLen;
4295 
4296 typedef struct {
4302 } OptEnv;
4303 
4304 typedef struct {
4307 } OptAncInfo;
4308 
4309 typedef struct {
4310  MinMaxLen mmd; /* info position */
4312 
4314  int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4315  int len;
4317 } OptExactInfo;
4318 
4319 typedef struct {
4320  MinMaxLen mmd; /* info position */
4322 
4323  int value; /* weighted value */
4325 } OptMapInfo;
4326 
4327 typedef struct {
4329 
4331  OptExactInfo exb; /* boundary */
4332  OptExactInfo exm; /* middle */
4333  OptExactInfo expr; /* prec read (?=...) */
4334 
4335  OptMapInfo map; /* boundary */
4336 } NodeOptInfo;
4337 
4338 
4339 static int
4341 {
4342  static const short int ByteValTable[] = {
4343  5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4344  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4345  12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4346  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4347  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4348  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4349  5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4350  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4351  };
4352 
4353  if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
4354  if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4355  return 20;
4356  else
4357  return (int )ByteValTable[i];
4358  }
4359  else
4360  return 4; /* Take it easy. */
4361 }
4362 
4363 static int
4365 {
4366  /* 1000 / (min-max-dist + 1) */
4367  static const short int dist_vals[] = {
4368  1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4369  91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4370  48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4371  32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4372  24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4373  20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4374  16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4375  14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4376  12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4377  11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4378  };
4379 
4380  OnigDistance d;
4381 
4382  if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4383 
4384  d = mm->max - mm->min;
4385  if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
4386  /* return dist_vals[d] * 16 / (mm->min + 12); */
4387  return (int )dist_vals[d];
4388  else
4389  return 1;
4390 }
4391 
4392 static int
4394 {
4395  if (v2 <= 0) return -1;
4396  if (v1 <= 0) return 1;
4397 
4398  v1 *= distance_value(d1);
4399  v2 *= distance_value(d2);
4400 
4401  if (v2 > v1) return 1;
4402  if (v2 < v1) return -1;
4403 
4404  if (d2->min < d1->min) return 1;
4405  if (d2->min > d1->min) return -1;
4406  return 0;
4407 }
4408 
4409 static int
4411 {
4412  return (a->min == b->min && a->max == b->max) ? 1 : 0;
4413 }
4414 
4415 
4416 static void
4418 {
4419  mml->min = min;
4420  mml->max = max;
4421 }
4422 
4423 static void
4425 {
4426  mml->min = mml->max = 0;
4427 }
4428 
4429 static void
4431 {
4432  to->min = from->min;
4433  to->max = from->max;
4434 }
4435 
4436 static void
4438 {
4439  to->min = distance_add(to->min, from->min);
4440  to->max = distance_add(to->max, from->max);
4441 }
4442 
4443 #if 0
4444 static void
4445 add_len_mml(MinMaxLen* to, OnigDistance len)
4446 {
4447  to->min = distance_add(to->min, len);
4448  to->max = distance_add(to->max, len);
4449 }
4450 #endif
4451 
4452 static void
4454 {
4455  if (to->min > from->min) to->min = from->min;
4456  if (to->max < from->max) to->max = from->max;
4457 }
4458 
4459 static void
4461 {
4462  *to = *from;
4463 }
4464 
4465 static void
4467 {
4468  anc->left_anchor = 0;
4469  anc->right_anchor = 0;
4470 }
4471 
4472 static void
4474 {
4475  *to = *from;
4476 }
4477 
4478 static void
4480  OnigDistance left_len, OnigDistance right_len)
4481 {
4482  clear_opt_anc_info(to);
4483 
4484  to->left_anchor = left->left_anchor;
4485  if (left_len == 0) {
4486  to->left_anchor |= right->left_anchor;
4487  }
4488 
4489  to->right_anchor = right->right_anchor;
4490  if (right_len == 0) {
4491  to->right_anchor |= left->right_anchor;
4492  }
4493 }
4494 
4495 static int
4497 {
4498  if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4499  anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4500  anc == ANCHOR_PREC_READ_NOT)
4501  return 0;
4502 
4503  return 1;
4504 }
4505 
4506 static int
4508 {
4509  if ((to->left_anchor & anc) != 0) return 1;
4510 
4511  return ((to->right_anchor & anc) != 0 ? 1 : 0);
4512 }
4513 
4514 static void
4516 {
4517  if (is_left_anchor(anc))
4518  to->left_anchor |= anc;
4519  else
4520  to->right_anchor |= anc;
4521 }
4522 
4523 static void
4525 {
4526  if (is_left_anchor(anc))
4527  to->left_anchor &= ~anc;
4528  else
4529  to->right_anchor &= ~anc;
4530 }
4531 
4532 static void
4534 {
4535  to->left_anchor &= add->left_anchor;
4536  to->right_anchor &= add->right_anchor;
4537 }
4538 
4539 static int
4541 {
4542  return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4543 }
4544 
4545 static void
4547 {
4548  clear_mml(&ex->mmd);
4549  clear_opt_anc_info(&ex->anc);
4550  ex->reach_end = 0;
4551  ex->ignore_case = -1; /* unset */
4552  ex->len = 0;
4553  ex->s[0] = '\0';
4554 }
4555 
4556 static void
4558 {
4559  *to = *from;
4560 }
4561 
4562 static void
4564 {
4565  int i, j, len;
4566  UChar *p, *end;
4567  OptAncInfo tanc;
4568 
4569  if (to->ignore_case < 0)
4570  to->ignore_case = add->ignore_case;
4571  else if (to->ignore_case != add->ignore_case)
4572  return ; /* avoid */
4573 
4574  p = add->s;
4575  end = p + add->len;
4576  for (i = to->len; p < end; ) {
4577  len = enclen(enc, p, end);
4578  if (i + len > OPT_EXACT_MAXLEN) break;
4579  for (j = 0; j < len && p < end; j++)
4580  to->s[i++] = *p++;
4581  }
4582 
4583  to->len = i;
4584  to->reach_end = (p == end ? add->reach_end : 0);
4585 
4586  concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4587  if (! to->reach_end) tanc.right_anchor = 0;
4588  copy_opt_anc_info(&to->anc, &tanc);
4589 }
4590 
4591 static void
4593  int raw ARG_UNUSED, OnigEncoding enc)
4594 {
4595  int i, j, len;
4596  UChar *p;
4597 
4598  for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4599  len = enclen(enc, p, end);
4600  if (i + len > OPT_EXACT_MAXLEN) break;
4601  for (j = 0; j < len && p < end; j++)
4602  to->s[i++] = *p++;
4603  }
4604 
4605  to->len = i;
4606 }
4607 
4608 static void
4610 {
4611  int i, j, len;
4612 
4613  if (add->len == 0 || to->len == 0) {
4615  return ;
4616  }
4617 
4618  if (! is_equal_mml(&to->mmd, &add->mmd)) {
4620  return ;
4621  }
4622 
4623  for (i = 0; i < to->len && i < add->len; ) {
4624  if (to->s[i] != add->s[i]) break;
4625  len = enclen(env->enc, to->s + i, to->s + to->len);
4626 
4627  for (j = 1; j < len; j++) {
4628  if (to->s[i+j] != add->s[i+j]) break;
4629  }
4630  if (j < len) break;
4631  i += len;
4632  }
4633 
4634  if (! add->reach_end || i < add->len || i < to->len) {
4635  to->reach_end = 0;
4636  }
4637  to->len = i;
4638  if (to->ignore_case < 0)
4639  to->ignore_case = add->ignore_case;
4640  else if (add->ignore_case >= 0)
4641  to->ignore_case |= add->ignore_case;
4642 
4643  alt_merge_opt_anc_info(&to->anc, &add->anc);
4644  if (! to->reach_end) to->anc.right_anchor = 0;
4645 }
4646 
4647 static void
4649 {
4650  int v1, v2;
4651 
4652  v1 = now->len;
4653  v2 = alt->len;
4654 
4655  if (v2 == 0) {
4656  return ;
4657  }
4658  else if (v1 == 0) {
4659  copy_opt_exact_info(now, alt);
4660  return ;
4661  }
4662  else if (v1 <= 2 && v2 <= 2) {
4663  /* ByteValTable[x] is big value --> low price */
4664  v2 = map_position_value(enc, now->s[0]);
4665  v1 = map_position_value(enc, alt->s[0]);
4666 
4667  if (now->len > 1) v1 += 5;
4668  if (alt->len > 1) v2 += 5;
4669  }
4670 
4671  if (now->ignore_case <= 0) v1 *= 2;
4672  if (alt->ignore_case <= 0) v2 *= 2;
4673 
4674  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4675  copy_opt_exact_info(now, alt);
4676 }
4677 
4678 static void
4680 {
4681  static const OptMapInfo clean_info = {
4682  {0, 0}, {0, 0}, 0,
4683  {
4684  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4685  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4686  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4687  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4688  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4689  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4690  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4691  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4692  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4693  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4694  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4695  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4696  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4697  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4698  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4699  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4700  }
4701  };
4702 
4703  xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4704 }
4705 
4706 static void
4708 {
4709  *to = *from;
4710 }
4711 
4712 static void
4714 {
4715  if (map->map[c] == 0) {
4716  map->map[c] = 1;
4717  map->value += map_position_value(enc, c);
4718  }
4719 }
4720 
4721 static int
4723  OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4724 {
4727  int i, n;
4728 
4729  add_char_opt_map_info(map, p[0], enc);
4730 
4731  case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4732  n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4733  if (n < 0) return n;
4734 
4735  for (i = 0; i < n; i++) {
4736  ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4737  add_char_opt_map_info(map, buf[0], enc);
4738  }
4739 
4740  return 0;
4741 }
4742 
4743 static void
4745 {
4746  const int z = 1<<15; /* 32768: something big value */
4747 
4748  int v1, v2;
4749 
4750  if (alt->value == 0) return ;
4751  if (now->value == 0) {
4752  copy_opt_map_info(now, alt);
4753  return ;
4754  }
4755 
4756  v1 = z / now->value;
4757  v2 = z / alt->value;
4758  if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4759  copy_opt_map_info(now, alt);
4760 }
4761 
4762 static int
4764 {
4765 #define COMP_EM_BASE 20
4766  int ve, vm;
4767 
4768  if (m->value <= 0) return -1;
4769 
4770  ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4771  vm = COMP_EM_BASE * 5 * 2 / m->value;
4772  return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4773 }
4774 
4775 static void
4777 {
4778  int i, val;
4779 
4780  /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4781  if (to->value == 0) return ;
4782  if (add->value == 0 || to->mmd.max < add->mmd.min) {
4783  clear_opt_map_info(to);
4784  return ;
4785  }
4786 
4787  alt_merge_mml(&to->mmd, &add->mmd);
4788 
4789  val = 0;
4790  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4791  if (add->map[i])
4792  to->map[i] = 1;
4793 
4794  if (to->map[i])
4795  val += map_position_value(enc, i);
4796  }
4797  to->value = val;
4798 
4799  alt_merge_opt_anc_info(&to->anc, &add->anc);
4800 }
4801 
4802 static void
4804 {
4805  copy_mml(&(opt->exb.mmd), mmd);
4806  copy_mml(&(opt->expr.mmd), mmd);
4807  copy_mml(&(opt->map.mmd), mmd);
4808 }
4809 
4810 static void
4812 {
4813  clear_mml(&opt->len);
4814  clear_opt_anc_info(&opt->anc);
4815  clear_opt_exact_info(&opt->exb);
4816  clear_opt_exact_info(&opt->exm);
4817  clear_opt_exact_info(&opt->expr);
4818  clear_opt_map_info(&opt->map);
4819 }
4820 
4821 static void
4823 {
4824  *to = *from;
4825 }
4826 
4827 static void
4829 {
4830  int exb_reach, exm_reach;
4831  OptAncInfo tanc;
4832 
4833  concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4834  copy_opt_anc_info(&to->anc, &tanc);
4835 
4836  if (add->exb.len > 0 && to->len.max == 0) {
4837  concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4838  to->len.max, add->len.max);
4839  copy_opt_anc_info(&add->exb.anc, &tanc);
4840  }
4841 
4842  if (add->map.value > 0 && to->len.max == 0) {
4843  if (add->map.mmd.max == 0)
4844  add->map.anc.left_anchor |= to->anc.left_anchor;
4845  }
4846 
4847  exb_reach = to->exb.reach_end;
4848  exm_reach = to->exm.reach_end;
4849 
4850  if (add->len.max != 0)
4851  to->exb.reach_end = to->exm.reach_end = 0;
4852 
4853  if (add->exb.len > 0) {
4854  if (exb_reach) {
4855  concat_opt_exact_info(&to->exb, &add->exb, enc);
4856  clear_opt_exact_info(&add->exb);
4857  }
4858  else if (exm_reach) {
4859  concat_opt_exact_info(&to->exm, &add->exb, enc);
4860  clear_opt_exact_info(&add->exb);
4861  }
4862  }
4863  select_opt_exact_info(enc, &to->exm, &add->exb);
4864  select_opt_exact_info(enc, &to->exm, &add->exm);
4865 
4866  if (to->expr.len > 0) {
4867  if (add->len.max > 0) {
4868  if (to->expr.len > (int )add->len.max)
4869  to->expr.len = (int )add->len.max;
4870 
4871  if (to->expr.mmd.max == 0)
4872  select_opt_exact_info(enc, &to->exb, &to->expr);
4873  else
4874  select_opt_exact_info(enc, &to->exm, &to->expr);
4875  }
4876  }
4877  else if (add->expr.len > 0) {
4878  copy_opt_exact_info(&to->expr, &add->expr);
4879  }
4880 
4881  select_opt_map_info(&to->map, &add->map);
4882 
4883  add_mml(&to->len, &add->len);
4884 }
4885 
4886 static void
4888 {
4889  alt_merge_opt_anc_info (&to->anc, &add->anc);
4890  alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4891  alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4892  alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4893  alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4894 
4895  alt_merge_mml(&to->len, &add->len);
4896 }
4897 
4898 
4899 #define MAX_NODE_OPT_INFO_REF_COUNT 5
4900 
4901 static int
4903 {
4904  int type;
4905  int r = 0;
4906 
4907  clear_node_opt_info(opt);
4908  set_bound_node_opt_info(opt, &env->mmd);
4909 
4910  type = NTYPE(node);
4911  switch (type) {
4912  case NT_LIST:
4913  {
4914  OptEnv nenv;
4915  NodeOptInfo nopt;
4916  Node* nd = node;
4917 
4918  copy_opt_env(&nenv, env);
4919  do {
4920  r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4921  if (r == 0) {
4922  add_mml(&nenv.mmd, &nopt.len);
4923  concat_left_node_opt_info(env->enc, opt, &nopt);
4924  }
4925  } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4926  }
4927  break;
4928 
4929  case NT_ALT:
4930  {
4931  NodeOptInfo nopt;
4932  Node* nd = node;
4933 
4934  do {
4935  r = optimize_node_left(NCAR(nd), &nopt, env);
4936  if (r == 0) {
4937  if (nd == node) copy_node_opt_info(opt, &nopt);
4938  else alt_merge_node_opt_info(opt, &nopt, env);
4939  }
4940  } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4941  }
4942  break;
4943 
4944  case NT_STR:
4945  {
4946  StrNode* sn = NSTR(node);
4947  OnigDistance slen = sn->end - sn->s;
4948  int is_raw = NSTRING_IS_RAW(node);
4949 
4950  if (! NSTRING_IS_AMBIG(node)) {
4951  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4952  is_raw, env->enc);
4953  opt->exb.ignore_case = 0;
4954  if (slen > 0) {
4955  add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4956  }
4957  set_mml(&opt->len, slen, slen);
4958  }
4959  else {
4960  OnigDistance max;
4961 
4962  if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4963  int n = onigenc_strlen(env->enc, sn->s, sn->end);
4964  max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4965  }
4966  else {
4967  concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4968  is_raw, env->enc);
4969  opt->exb.ignore_case = 1;
4970 
4971  if (slen > 0) {
4972  r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
4973  env->enc, env->case_fold_flag);
4974  if (r != 0) break;
4975  }
4976 
4977  max = slen;
4978  }
4979 
4980  set_mml(&opt->len, slen, max);
4981  }
4982 
4983  if ((OnigDistance )opt->exb.len == slen)
4984  opt->exb.reach_end = 1;
4985  }
4986  break;
4987 
4988  case NT_CCLASS:
4989  {
4990  int i, z;
4991  CClassNode* cc = NCCLASS(node);
4992 
4993  /* no need to check ignore case. (set in setup_tree()) */
4994 
4995  if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
4998 
4999  set_mml(&opt->len, min, max);
5000  }
5001  else {
5002  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5003  z = BITSET_AT(cc->bs, i);
5004  if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5005  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5006  }
5007  }
5008  set_mml(&opt->len, 1, 1);
5009  }
5010  }
5011  break;
5012 
5013  case NT_CTYPE:
5014  {
5015  int i, min, max;
5016  int maxcode;
5017 
5018  max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5019 
5020  if (max == 1) {
5021  min = 1;
5022 
5023  maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5024  switch (NCTYPE(node)->ctype) {
5025  case ONIGENC_CTYPE_WORD:
5026  if (NCTYPE(node)->not != 0) {
5027  for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5028  if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5029  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5030  }
5031  }
5032  }
5033  else {
5034  for (i = 0; i < maxcode; i++) {
5035  if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5036  add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5037  }
5038  }
5039  }
5040  break;
5041  }
5042  }
5043  else {
5044  min = ONIGENC_MBC_MINLEN(env->enc);
5045  }
5046  set_mml(&opt->len, min, max);
5047  }
5048  break;
5049 
5050  case NT_CANY:
5051  {
5054  set_mml(&opt->len, min, max);
5055  }
5056  break;
5057 
5058  case NT_ANCHOR:
5059  switch (NANCHOR(node)->type) {
5060  case ANCHOR_BEGIN_BUF:
5061  case ANCHOR_BEGIN_POSITION:
5062  case ANCHOR_BEGIN_LINE:
5063  case ANCHOR_END_BUF:
5064  case ANCHOR_SEMI_END_BUF:
5065  case ANCHOR_END_LINE:
5066  case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5067  add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5068  break;
5069 
5070  case ANCHOR_PREC_READ:
5071  {
5072  NodeOptInfo nopt;
5073 
5074  r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5075  if (r == 0) {
5076  if (nopt.exb.len > 0)
5077  copy_opt_exact_info(&opt->expr, &nopt.exb);
5078  else if (nopt.exm.len > 0)
5079  copy_opt_exact_info(&opt->expr, &nopt.exm);
5080 
5081  opt->expr.reach_end = 0;
5082 
5083  if (nopt.map.value > 0)
5084  copy_opt_map_info(&opt->map, &nopt.map);
5085  }
5086  }
5087  break;
5088 
5089  case ANCHOR_PREC_READ_NOT:
5091  break;
5092  }
5093  break;
5094 
5095  case NT_BREF:
5096  {
5097  int i;
5098  int* backs;
5099  OnigDistance min, max, tmin, tmax;
5100  Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5101  BRefNode* br = NBREF(node);
5102 
5103  if (br->state & NST_RECURSION) {
5104  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5105  break;
5106  }
5107  backs = BACKREFS_P(br);
5108  r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5109  if (r != 0) break;
5110  r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5111  if (r != 0) break;
5112  for (i = 1; i < br->back_num; i++) {
5113  r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5114  if (r != 0) break;
5115  r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5116  if (r != 0) break;
5117  if (min > tmin) min = tmin;
5118  if (max < tmax) max = tmax;
5119  }
5120  if (r == 0) set_mml(&opt->len, min, max);
5121  }
5122  break;
5123 
5124 #ifdef USE_SUBEXP_CALL
5125  case NT_CALL:
5126  if (IS_CALL_RECURSION(NCALL(node)))
5127  set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5128  else {
5129  OnigOptionType save = env->options;
5130  env->options = NENCLOSE(NCALL(node)->target)->option;
5131  r = optimize_node_left(NCALL(node)->target, opt, env);
5132  env->options = save;
5133  }
5134  break;
5135 #endif
5136 
5137  case NT_QTFR:
5138  {
5139  int i;
5140  OnigDistance min, max;
5141  NodeOptInfo nopt;
5142  QtfrNode* qn = NQTFR(node);
5143 
5144  r = optimize_node_left(qn->target, &nopt, env);
5145  if (r) break;
5146 
5147  if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5148  if (env->mmd.max == 0 &&
5149  NTYPE(qn->target) == NT_CANY && qn->greedy) {
5150  if (IS_MULTILINE(env->options))
5151  /* implicit anchor: /.*a/ ==> /\A.*a/ */
5153  else
5155  }
5156  }
5157  else {
5158  if (qn->lower > 0) {
5159  copy_node_opt_info(opt, &nopt);
5160  if (nopt.exb.len > 0) {
5161  if (nopt.exb.reach_end) {
5162  for (i = 2; i <= qn->lower &&
5163  ! is_full_opt_exact_info(&opt->exb); i++) {
5164  concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5165  }
5166  if (i < qn->lower) {
5167  opt->exb.reach_end = 0;
5168  }
5169  }
5170  }
5171 
5172  if (qn->lower != qn->upper) {
5173  opt->exb.reach_end = 0;
5174  opt->exm.reach_end = 0;
5175  }
5176  if (qn->lower > 1)
5177  opt->exm.reach_end = 0;
5178  }
5179  }
5180 
5181  min = distance_multiply(nopt.len.min, qn->lower);
5182  if (IS_REPEAT_INFINITE(qn->upper))
5183  max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5184  else
5185  max = distance_multiply(nopt.len.max, qn->upper);
5186 
5187  set_mml(&opt->len, min, max);
5188  }
5189  break;
5190 
5191  case NT_ENCLOSE:
5192  {
5193  EncloseNode* en = NENCLOSE(node);
5194 
5195  switch (en->type) {
5196  case ENCLOSE_OPTION:
5197  {
5198  OnigOptionType save = env->options;
5199 
5200  env->options = en->option;
5201  r = optimize_node_left(en->target, opt, env);
5202  env->options = save;
5203  }
5204  break;
5205 
5206  case ENCLOSE_MEMORY:
5207 #ifdef USE_SUBEXP_CALL
5208  en->opt_count++;
5210  OnigDistance min, max;
5211 
5212  min = 0;
5213  max = ONIG_INFINITE_DISTANCE;
5214  if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5215  if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5216  set_mml(&opt->len, min, max);
5217  }
5218  else
5219 #endif
5220  {
5221  r = optimize_node_left(en->target, opt, env);
5222 
5224  if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5226  }
5227  }
5228  break;
5229 
5231  case ENCLOSE_CONDITION:
5232  r = optimize_node_left(en->target, opt, env);
5233  break;
5234  }
5235  }
5236  break;
5237 
5238  default:
5239 #ifdef ONIG_DEBUG
5240  if (!onig_is_prelude()) fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5241  NTYPE(node));
5242 #endif
5243  r = ONIGERR_TYPE_BUG;
5244  break;
5245  }
5246 
5247  return r;
5248 }
5249 
5250 static int
5252 {
5253  int r;
5254  int allow_reverse;
5255 
5256  if (e->len == 0) return 0;
5257 
5258  reg->exact = (UChar* )xmalloc(e->len);
5260  xmemcpy(reg->exact, e->s, e->len);
5261  reg->exact_end = reg->exact + e->len;
5262 
5263  allow_reverse =
5265 
5266  if (e->ignore_case > 0) {
5267  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5268  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5269  reg->map, &(reg->int_map), 1);
5270  if (r == 0) {
5271  reg->optimize = (allow_reverse != 0
5273  }
5274  else {
5276  }
5277  }
5278  else {
5280  }
5281  }
5282  else {
5283  if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5284  r = set_bm_skip(reg->exact, reg->exact_end, reg,
5285  reg->map, &(reg->int_map), 0);
5286  if (r) return r;
5287 
5288  reg->optimize = (allow_reverse != 0
5290  }
5291  else {
5293  }
5294  }
5295 
5296  reg->dmin = e->mmd.min;
5297  reg->dmax = e->mmd.max;
5298 
5299  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5300  reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5301  }
5302 
5303  return 0;
5304 }
5305 
5306 static void
5308 {
5309  int i;
5310 
5311  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5312  reg->map[i] = m->map[i];
5313 
5314  reg->optimize = ONIG_OPTIMIZE_MAP;
5315  reg->dmin = m->mmd.min;
5316  reg->dmax = m->mmd.max;
5317 
5318  if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5319  reg->threshold_len = (int )(reg->dmin + 1);
5320  }
5321 }
5322 
5323 static void
5325 {
5326  reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5327  reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5328 }
5329 
5330 #ifdef ONIG_DEBUG
5331 static void print_optimize_info(FILE* f, regex_t* reg);
5332 #endif
5333 
5334 static int
5336 {
5337 
5338  int r;
5339  NodeOptInfo opt;
5340  OptEnv env;
5341 
5342  env.enc = reg->enc;
5343  env.options = reg->options;
5344  env.case_fold_flag = reg->case_fold_flag;
5345  env.scan_env = scan_env;
5346  clear_mml(&env.mmd);
5347 
5348  r = optimize_node_left(node, &opt, &env);
5349  if (r) return r;
5350 
5351  reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5354 
5356 
5357  if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5358  reg->anchor_dmin = opt.len.min;
5359  reg->anchor_dmax = opt.len.max;
5360  }
5361 
5362  if (opt.exb.len > 0 || opt.exm.len > 0) {
5363  select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5364  if (opt.map.value > 0 &&
5365  comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5366  goto set_map;
5367  }
5368  else {
5369  r = set_optimize_exact_info(reg, &opt.exb);
5370  set_sub_anchor(reg, &opt.exb.anc);
5371  }
5372  }
5373  else if (opt.map.value > 0) {
5374  set_map:
5375  set_optimize_map_info(reg, &opt.map);
5376  set_sub_anchor(reg, &opt.map.anc);
5377  }
5378  else {
5380  if (opt.len.max == 0)
5382  }
5383 
5384 #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5385  if (!onig_is_prelude()) print_optimize_info(stderr, reg);
5386 #endif
5387  return r;
5388 }
5389 
5390 static void
5392 {
5394  reg->anchor = 0;
5395  reg->anchor_dmin = 0;
5396  reg->anchor_dmax = 0;
5397  reg->sub_anchor = 0;
5398  reg->exact_end = (UChar* )NULL;
5399  reg->threshold_len = 0;
5400  if (IS_NOT_NULL(reg->exact)) {
5401  xfree(reg->exact);
5402  reg->exact = (UChar* )NULL;
5403  }
5404 }
5405 
5406 #ifdef ONIG_DEBUG
5407 
5408 static void print_enc_string(FILE* fp, OnigEncoding enc,
5409  const UChar *s, const UChar *end)
5410 {
5411  fprintf(fp, "\nPATTERN: /");
5412 
5413  if (ONIGENC_MBC_MINLEN(enc) > 1) {
5414  const UChar *p;
5416 
5417  p = s;
5418  while (p < end) {
5419  code = ONIGENC_MBC_TO_CODE(enc, p, end);
5420  if (code >= 0x80) {
5421  fprintf(fp, " 0x%04x ", (int )code);
5422  }
5423  else {
5424  fputc((int )code, fp);
5425  }
5426 
5427  p += enclen(enc, p, end);
5428  }
5429  }
5430  else {
5431  while (s < end) {
5432  fputc((int )*s, fp);
5433  s++;
5434  }
5435  }
5436 
5437  fprintf(fp, "/ (%s)\n", enc->name);
5438 }
5439 
5440 static void
5441 print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5442 {
5443  if (a == ONIG_INFINITE_DISTANCE)
5444  fputs("inf", f);
5445  else
5446  fprintf(f, "(%"PRIuSIZE")", a);
5447 
5448  fputs("-", f);
5449 
5450  if (b == ONIG_INFINITE_DISTANCE)
5451  fputs("inf", f);
5452  else
5453  fprintf(f, "(%"PRIuSIZE")", b);
5454 }
5455 
5456 static void
5457 print_anchor(FILE* f, int anchor)
5458 {
5459  int q = 0;
5460 
5461  fprintf(f, "[");
5462 
5463  if (anchor & ANCHOR_BEGIN_BUF) {
5464  fprintf(f, "begin-buf");
5465  q = 1;
5466  }
5467  if (anchor & ANCHOR_BEGIN_LINE) {
5468  if (q) fprintf(f, ", ");
5469  q = 1;
5470  fprintf(f, "begin-line");
5471  }
5472  if (anchor & ANCHOR_BEGIN_POSITION) {
5473  if (q) fprintf(f, ", ");
5474  q = 1;
5475  fprintf(f, "begin-pos");
5476  }
5477  if (anchor & ANCHOR_END_BUF) {
5478  if (q) fprintf(f, ", ");
5479  q = 1;
5480  fprintf(f, "end-buf");
5481  }
5482  if (anchor & ANCHOR_SEMI_END_BUF) {
5483  if (q) fprintf(f, ", ");
5484  q = 1;
5485  fprintf(f, "semi-end-buf");
5486  }
5487  if (anchor & ANCHOR_END_LINE) {
5488  if (q) fprintf(f, ", ");
5489  q = 1;
5490  fprintf(f, "end-line");
5491  }
5492  if (anchor & ANCHOR_ANYCHAR_STAR) {
5493  if (q) fprintf(f, ", ");
5494  q = 1;
5495  fprintf(f, "anychar-star");
5496  }
5497  if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5498  if (q) fprintf(f, ", ");
5499  fprintf(f, "anychar-star-ml");
5500  }
5501 
5502  fprintf(f, "]");
5503 }
5504 
5505 static void
5506 print_optimize_info(FILE* f, regex_t* reg)
5507 {
5508  static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5509  "EXACT_IC", "MAP",
5510  "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5511 
5512  fprintf(f, "optimize: %s\n", on[reg->optimize]);
5513  fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5514  if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5515  print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5516  fprintf(f, "\n");
5517 
5518  if (reg->optimize) {
5519  fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5520  fprintf(f, "\n");
5521  }
5522  fprintf(f, "\n");
5523 
5524  if (reg->exact) {
5525  UChar *p;
5526  fprintf(f, "exact: [");
5527  for (p = reg->exact; p < reg->exact_end; p++) {
5528  fputc(*p, f);
5529  }
5530  fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact));
5531  }
5532  else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5533  int c, i, n = 0;
5534 
5535  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5536  if (reg->map[i]) n++;
5537 
5538  fprintf(f, "map: n=%d\n", n);
5539  if (n > 0) {
5540  c = 0;
5541  fputc('[', f);
5542  for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5543  if (reg->map[i] != 0) {
5544  if (c > 0) fputs(", ", f);
5545  c++;
5546  if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5548  fputc(i, f);
5549  else
5550  fprintf(f, "%d", i);
5551  }
5552  }
5553  fprintf(f, "]\n");
5554  }
5555  }
5556 }
5557 #endif /* ONIG_DEBUG */
5558 
5559 
5560 extern void
5562 {
5563  if (IS_NOT_NULL(reg)) {
5564  if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5565  if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5566  if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5568  if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5569  if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5570 
5571 #ifdef USE_NAMED_GROUP
5572  onig_names_free(reg);
5573 #endif
5574  }
5575 }
5576 
5577 extern void
5579 {
5580  if (IS_NOT_NULL(reg)) {
5581  onig_free_body(reg);
5582  xfree(reg);
5583  }
5584 }
5585 
5586 size_t
5588 {
5589  size_t size = sizeof(regex_t);
5590  if (IS_NULL(reg)) return 0;
5591  if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5592  if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5593  if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5594  if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5595  if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5596  if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5597 
5598  return size;
5599 }
5600 
5601 size_t
5603 {
5604  size_t size = sizeof(*regs);
5605  if (IS_NULL(regs)) return 0;
5606  size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5607  return size;
5608 }
5609 
5610 #define REGEX_TRANSFER(to,from) do {\
5611  (to)->state = ONIG_STATE_MODIFY;\
5612  onig_free_body(to);\
5613  xmemcpy(to, from, sizeof(regex_t));\
5614  xfree(from);\
5615 } while (0)
5616 
5617 extern void
5619 {
5621  REGEX_TRANSFER(to, from);
5623 }
5624 
5625 #define REGEX_CHAIN_HEAD(reg) do {\
5626  while (IS_NOT_NULL((reg)->chain)) {\
5627  (reg) = (reg)->chain;\
5628  }\
5629 } while (0)
5630 
5631 extern void
5633 {
5635  REGEX_CHAIN_HEAD(to);
5636  to->chain = add;
5638 }
5639 
5640 extern void
5642 {
5643  regex_t *head, *prev;
5644 
5645  prev = reg;
5646  head = prev->chain;
5647  if (IS_NOT_NULL(head)) {
5648  reg->state = ONIG_STATE_MODIFY;
5649  while (IS_NOT_NULL(head->chain)) {
5650  prev = head;
5651  head = head->chain;
5652  }
5653  prev->chain = (regex_t* )NULL;
5654  REGEX_TRANSFER(reg, head);
5655  }
5656 }
5657 
5658 #ifdef ONIG_DEBUG
5659 static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5660 #endif
5661 #ifdef ONIG_DEBUG_PARSE_TREE
5662 static void print_tree P_((FILE* f, Node* node));
5663 #endif
5664 
5665 extern int
5666 onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5667  OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5668 {
5669 #define COMPILE_INIT_SIZE 20
5670 
5671  int r;
5672  OnigDistance init_size;
5673  Node* root;
5674  ScanEnv scan_env = {0};
5675 #ifdef USE_SUBEXP_CALL
5676  UnsetAddrList uslist;
5677 #endif
5678 
5679  if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5680 
5681  scan_env.sourcefile = sourcefile;
5682  scan_env.sourceline = sourceline;
5683  reg->state = ONIG_STATE_COMPILING;
5684 
5685 #ifdef ONIG_DEBUG
5686  if (!onig_is_prelude()) print_enc_string(stderr, reg->enc, pattern, pattern_end);
5687 #endif
5688 
5689  if (reg->alloc == 0) {
5690  init_size = (pattern_end - pattern) * 2;
5691  if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5692  r = BBUF_INIT(reg, init_size);
5693  if (r != 0) goto end;
5694  }
5695  else
5696  reg->used = 0;
5697 
5698  reg->num_mem = 0;
5699  reg->num_repeat = 0;
5700  reg->num_null_check = 0;
5701  reg->repeat_range_alloc = 0;
5702  reg->repeat_range = (OnigRepeatRange* )NULL;
5703 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5704  reg->num_comb_exp_check = 0;
5705 #endif
5706 
5707  r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5708  if (r != 0) goto err;
5709 
5710 #ifdef ONIG_DEBUG_PARSE_TREE
5711 # if 0
5712  fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5713  if (!onig_is_prelude()) {
5714  print_tree(stderr, root);
5715  }
5716 # endif
5717 #endif
5718 
5719 #ifdef USE_NAMED_GROUP
5720  /* mixed use named group and no-named group */
5721  if (scan_env.num_named > 0 &&
5724  if (scan_env.num_named != scan_env.num_mem)
5725  r = disable_noname_group_capture(&root, reg, &scan_env);
5726  else
5727  r = numbered_ref_check(root);
5728 
5729  if (r != 0) goto err;
5730  }
5731 #endif
5732 
5733 #ifdef USE_SUBEXP_CALL
5734  if (scan_env.num_call > 0) {
5735  r = unset_addr_list_init(&uslist, scan_env.num_call);
5736  if (r != 0) goto err;
5737  scan_env.unset_addr_list = &uslist;
5738  r = setup_subexp_call(root, &scan_env);
5739  if (r != 0) goto err_unset;
5740  r = subexp_recursive_check_trav(root, &scan_env);
5741  if (r < 0) goto err_unset;
5742  r = subexp_inf_recursive_check_trav(root, &scan_env);
5743  if (r != 0) goto err_unset;
5744 
5745  reg->num_call = scan_env.num_call;
5746  }
5747  else
5748  reg->num_call = 0;
5749 #endif
5750 
5751  r = setup_tree(root, reg, IN_ROOT, &scan_env);
5752  if (r != 0) goto err_unset;
5753 
5754 #ifdef ONIG_DEBUG_PARSE_TREE
5755  if (!onig_is_prelude()) print_tree(stderr, root);
5756 #endif
5757 
5758  reg->capture_history = scan_env.capture_history;
5759  reg->bt_mem_start = scan_env.bt_mem_start;
5760  reg->bt_mem_start |= reg->capture_history;
5761  if (IS_FIND_CONDITION(reg->options))
5763  else {
5764  reg->bt_mem_end = scan_env.bt_mem_end;
5765  reg->bt_mem_end |= reg->capture_history;
5766  }
5767 
5768 #ifdef USE_COMBINATION_EXPLOSION_CHECK
5769  if (scan_env.backrefed_mem == 0
5770 #ifdef USE_SUBEXP_CALL
5771  || scan_env.num_call == 0
5772 #endif
5773  ) {
5774  setup_comb_exp_check(root, 0, &scan_env);
5775 #ifdef USE_SUBEXP_CALL
5776  if (scan_env.has_recursion != 0) {
5777  scan_env.num_comb_exp_check = 0;
5778  }
5779  else
5780 #endif
5781  if (scan_env.comb_exp_max_regnum > 0) {
5782  int i;
5783  for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5784  if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5785  scan_env.num_comb_exp_check = 0;
5786  break;
5787  }
5788  }
5789  }
5790  }
5791 
5792  reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5793 #endif
5794 
5795  clear_optimize_info(reg);
5796 #ifndef ONIG_DONT_OPTIMIZE
5797  r = set_optimize_info_from_tree(root, reg, &scan_env);
5798  if (r != 0) goto err_unset;
5799 #endif
5800 
5801  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5802  xfree(scan_env.mem_nodes_dynamic);
5803  scan_env.mem_nodes_dynamic = (Node** )NULL;
5804  }
5805 
5806  r = compile_tree(root, reg);
5807  if (r == 0) {
5808  r = add_opcode(reg, OP_END);
5809 #ifdef USE_SUBEXP_CALL
5810  if (scan_env.num_call > 0) {
5811  r = unset_addr_list_fix(&uslist, reg);
5812  unset_addr_list_end(&uslist);
5813  if (r) goto err;
5814  }
5815 #endif
5816 
5817  if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5819  else {
5820  if (reg->bt_mem_start != 0)
5822  else
5824  }
5825  }
5826 #ifdef USE_SUBEXP_CALL
5827  else if (scan_env.num_call > 0) {
5828  unset_addr_list_end(&uslist);
5829  }
5830 #endif
5831  onig_node_free(root);
5832 
5833 #ifdef ONIG_DEBUG_COMPILE
5834 #ifdef USE_NAMED_GROUP
5835  if (!onig_is_prelude()) onig_print_names(stderr, reg);
5836 #endif
5837  if (!onig_is_prelude()) print_compiled_byte_code_list(stderr, reg);
5838 #endif
5839 
5840  end:
5841  reg->state = ONIG_STATE_NORMAL;
5842  return r;
5843 
5844  err_unset:
5845 #ifdef USE_SUBEXP_CALL
5846  if (scan_env.num_call > 0) {
5847  unset_addr_list_end(&uslist);
5848  }
5849 #endif
5850  err:
5851  if (IS_NOT_NULL(scan_env.error)) {
5852  if (IS_NOT_NULL(einfo)) {
5853  einfo->enc = scan_env.enc;
5854  einfo->par = scan_env.error;
5855  einfo->par_end = scan_env.error_end;
5856  }
5857  }
5858 
5859  onig_node_free(root);
5860  if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5861  xfree(scan_env.mem_nodes_dynamic);
5862  return r;
5863 }
5864 
5865 #ifdef USE_RECOMPILE_API
5866 extern int
5867 onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5868  OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5870 {
5871  int r;
5872  regex_t *new_reg;
5873 
5874  r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5875  if (r) return r;
5876  if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5877  onig_transfer(reg, new_reg);
5878  }
5879  else {
5880  onig_chain_link_add(reg, new_reg);
5881  }
5882  return 0;
5883 }
5884 #endif
5885 
5886 static int onig_inited = 0;
5887 
5888 extern int
5890  OnigCaseFoldType case_fold_flag,
5891  OnigEncoding enc, const OnigSyntaxType* syntax)
5892 {
5893  if (! onig_inited)
5894  onig_init();
5895 
5896  if (IS_NULL(reg))
5897  return ONIGERR_INVALID_ARGUMENT;
5898 
5899  if (ONIGENC_IS_UNDEF(enc))
5901 
5905  }
5906 
5907  (reg)->state = ONIG_STATE_MODIFY;
5908 
5909  if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5910  option |= syntax->options;
5911  option &= ~ONIG_OPTION_SINGLELINE;
5912  }
5913  else
5914  option |= syntax->options;
5915 
5916  (reg)->enc = enc;
5917  (reg)->options = option;
5918  (reg)->syntax = syntax;
5919  (reg)->optimize = 0;
5920  (reg)->exact = (UChar* )NULL;
5921  (reg)->int_map = (int* )NULL;
5922  (reg)->int_map_backward = (int* )NULL;
5923  (reg)->chain = (regex_t* )NULL;
5924 
5925  (reg)->p = (UChar* )NULL;
5926  (reg)->alloc = 0;
5927  (reg)->used = 0;
5928  (reg)->name_table = (void* )NULL;
5929 
5930  (reg)->case_fold_flag = case_fold_flag;
5931  return 0;
5932 }
5933 
5934 extern int
5935 onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5936  const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5937  OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5938 {
5939  int r;
5940 
5941  r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5942  if (r) return r;
5943 
5944  r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
5945  return r;
5946 }
5947 
5948 extern int
5949 onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5950  OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
5951  OnigErrorInfo* einfo)
5952 {
5953  int r;
5954 
5955  *reg = (regex_t* )xmalloc(sizeof(regex_t));
5956  if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5957 
5958  r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5959  if (r) goto err;
5960 
5961  r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
5962  if (r) {
5963  err:
5964  onig_free(*reg);
5965  *reg = NULL;
5966  }
5967  return r;
5968 }
5969 
5970 
5971 extern int
5973 {
5974  if (onig_inited != 0)
5975  return 0;
5976 
5979 
5980  onig_inited = 1;
5981 
5982  onigenc_init();
5983  /* onigenc_set_default_caseconv_table((UChar* )0); */
5984 
5985 #ifdef ONIG_DEBUG_STATISTICS
5986  onig_statistics_init();
5987 #endif
5988 
5990  return 0;
5991 }
5992 
5993 
5994 extern int
5996 {
5998 
5999 #ifdef ONIG_DEBUG_STATISTICS
6000  if (!onig_is_prelude()) onig_print_statistics(stderr);
6001 #endif
6002 
6003 #ifdef USE_SHARED_CCLASS_TABLE
6005 #endif
6006 
6007 #ifdef USE_PARSE_TREE_NODE_RECYCLE
6009 #endif
6010 
6011  onig_inited = 0;
6012 
6015  return 0;
6016 }
6017 
6018 extern int
6020 {
6021  OnigCodePoint n, *data;
6022  OnigCodePoint low, high, x;
6023 
6024  GET_CODE_POINT(n, p);
6025  data = (OnigCodePoint* )p;
6026  data++;
6027 
6028  for (low = 0, high = n; low < high; ) {
6029  x = (low + high) >> 1;
6030  if (code > data[x * 2 + 1])
6031  low = x + 1;
6032  else
6033  high = x;
6034  }
6035 
6036  return ((low < n && code >= data[low * 2]) ? 1 : 0);
6037 }
6038 
6039 extern int
6041 {
6042  int found;
6043 
6044  if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6045  if (IS_NULL(cc->mbuf)) {
6046  found = 0;
6047  }
6048  else {
6049  found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6050  }
6051  }
6052  else {
6053  found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6054  }
6055 
6056  if (IS_NCCLASS_NOT(cc))
6057  return !found;
6058  else
6059  return found;
6060 }
6061 
6062 extern int
6064 {
6065  int len;
6066 
6067  if (ONIGENC_MBC_MINLEN(enc) > 1) {
6068  len = 2;
6069  }
6070  else {
6071  len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6072  }
6073  return onig_is_code_in_cc_len(len, code, cc);
6074 }
6075 
6076 
6077 #ifdef ONIG_DEBUG
6078 
6079 /* arguments type */
6080 #define ARG_SPECIAL -1
6081 #define ARG_NON 0
6082 #define ARG_RELADDR 1
6083 #define ARG_ABSADDR 2
6084 #define ARG_LENGTH 3
6085 #define ARG_MEMNUM 4
6086 #define ARG_OPTION 5
6087 #define ARG_STATE_CHECK 6
6088 
6089 OnigOpInfoType OnigOpInfo[] = {
6090  { OP_FINISH, "finish", ARG_NON },
6091  { OP_END, "end", ARG_NON },
6092  { OP_EXACT1, "exact1", ARG_SPECIAL },
6093  { OP_EXACT2, "exact2", ARG_SPECIAL },
6094  { OP_EXACT3, "exact3", ARG_SPECIAL },
6095  { OP_EXACT4, "exact4", ARG_SPECIAL },
6096  { OP_EXACT5, "exact5", ARG_SPECIAL },
6097  { OP_EXACTN, "exactn", ARG_SPECIAL },
6098  { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6099  { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6100  { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6101  { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6102  { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6103  { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6104  { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6105  { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6106  { OP_CCLASS, "cclass", ARG_SPECIAL },
6107  { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6108  { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6109  { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6110  { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6111  { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6112  { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
6113  { OP_ANYCHAR, "anychar", ARG_NON },
6114  { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6115  { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6116  { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6117  { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6118  { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6119  { OP_WORD, "word", ARG_NON },
6120  { OP_NOT_WORD, "not-word", ARG_NON },
6121  { OP_WORD_BOUND, "word-bound", ARG_NON },
6122  { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6123  { OP_WORD_BEGIN, "word-begin", ARG_NON },
6124  { OP_WORD_END, "word-end", ARG_NON },
6125  { OP_ASCII_WORD, "ascii-word", ARG_NON },
6126  { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6127  { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6128  { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6129  { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6130  { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6131  { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6132  { OP_END_BUF, "end-buf", ARG_NON },
6133  { OP_BEGIN_LINE, "begin-line", ARG_NON },
6134  { OP_END_LINE, "end-line", ARG_NON },
6135  { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6136  { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6137  { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
6138  { OP_BACKREF1, "backref1", ARG_NON },
6139  { OP_BACKREF2, "backref2", ARG_NON },
6140  { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6141  { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6142  { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6143  { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6144  { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6145  { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6146  { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6147  { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6148  { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6149  { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6150  { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6151  { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6152  { OP_SET_OPTION, "set-option", ARG_OPTION },
6153  { OP_KEEP, "keep", ARG_NON },
6154  { OP_FAIL, "fail", ARG_NON },
6155  { OP_JUMP, "jump", ARG_RELADDR },
6156  { OP_PUSH, "push", ARG_RELADDR },
6157  { OP_POP, "pop", ARG_NON },
6158  { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6159  { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6160  { OP_REPEAT, "repeat", ARG_SPECIAL },
6161  { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6162  { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6163  { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6164  { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6165  { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6166  { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6167  { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6168  { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6169  { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6170  { OP_PUSH_POS, "push-pos", ARG_NON },
6171  { OP_POP_POS, "pop-pos", ARG_NON },
6172  { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6173  { OP_FAIL_POS, "fail-pos", ARG_NON },
6174  { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6175  { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6176  { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6177  { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6178  { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6179  { OP_CALL, "call", ARG_ABSADDR },
6180  { OP_RETURN, "return", ARG_NON },
6181  { OP_CONDITION, "condition", ARG_SPECIAL },
6182  { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6183  { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6184  { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6185  { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6187  "state-check-anychar-ml*", ARG_STATE_CHECK },
6188  { -1, "", ARG_NON }
6189 };
6190 
6191 static const char*
6192 op2name(int opcode)
6193 {
6194  int i;
6195 
6196  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6197  if (opcode == OnigOpInfo[i].opcode)
6198  return OnigOpInfo[i].name;
6199  }
6200  return "";
6201 }
6202 
6203 static int
6204 op2arg_type(int opcode)
6205 {
6206  int i;
6207 
6208  for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6209  if (opcode == OnigOpInfo[i].opcode)
6210  return OnigOpInfo[i].arg_type;
6211  }
6212  return ARG_SPECIAL;
6213 }
6214 
6215 static void
6216 Indent(FILE* f, int indent)
6217 {
6218  int i;
6219  for (i = 0; i < indent; i++) putc(' ', f);
6220 }
6221 
6222 static void
6223 p_string(FILE* f, int len, UChar* s)
6224 {
6225  fputs(":", f);
6226  while (len-- > 0) { fputc(*s++, f); }
6227 }
6228 
6229 static void
6230 p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6231 {
6232  int x = len * mb_len;
6233 
6234  fprintf(f, ":%d:", len);
6235  while (x-- > 0) { fputc(*s++, f); }
6236 }
6237 
6238 extern void
6239 onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
6240  OnigEncoding enc)
6241 {
6242  int i, n, arg_type;
6243  RelAddrType addr;
6244  LengthType len;
6245  MemNumType mem;
6246  StateCheckNumType scn;
6248  UChar *q;
6249 
6250  fprintf(f, "[%s", op2name(*bp));
6251  arg_type = op2arg_type(*bp);
6252  if (arg_type != ARG_SPECIAL) {
6253  bp++;
6254  switch (arg_type) {
6255  case ARG_NON:
6256  break;
6257  case ARG_RELADDR:
6258  GET_RELADDR_INC(addr, bp);
6259  fprintf(f, ":(%d)", addr);
6260  break;
6261  case ARG_ABSADDR:
6262  GET_ABSADDR_INC(addr, bp);
6263  fprintf(f, ":(%d)", addr);
6264  break;
6265  case ARG_LENGTH:
6266  GET_LENGTH_INC(len, bp);
6267  fprintf(f, ":%d", len);
6268  break;
6269  case ARG_MEMNUM:
6270  mem = *((MemNumType* )bp);
6271  bp += SIZE_MEMNUM;
6272  fprintf(f, ":%d", mem);
6273  break;
6274  case ARG_OPTION:
6275  {
6276  OnigOptionType option = *((OnigOptionType* )bp);
6277  bp += SIZE_OPTION;
6278  fprintf(f, ":%d", option);
6279  }
6280  break;
6281 
6282  case ARG_STATE_CHECK:
6283  scn = *((StateCheckNumType* )bp);
6284  bp += SIZE_STATE_CHECK_NUM;
6285  fprintf(f, ":%d", scn);
6286  break;
6287  }
6288  }
6289  else {
6290  switch (*bp++) {
6291  case OP_EXACT1:
6294  p_string(f, 1, bp++); break;
6295  case OP_EXACT2:
6296  p_string(f, 2, bp); bp += 2; break;
6297  case OP_EXACT3:
6298  p_string(f, 3, bp); bp += 3; break;
6299  case OP_EXACT4:
6300  p_string(f, 4, bp); bp += 4; break;
6301  case OP_EXACT5:
6302  p_string(f, 5, bp); bp += 5; break;
6303  case OP_EXACTN:
6304  GET_LENGTH_INC(len, bp);
6305  p_len_string(f, len, 1, bp);
6306  bp += len;
6307  break;
6308 
6309  case OP_EXACTMB2N1:
6310  p_string(f, 2, bp); bp += 2; break;
6311  case OP_EXACTMB2N2:
6312  p_string(f, 4, bp); bp += 4; break;
6313  case OP_EXACTMB2N3:
6314  p_string(f, 6, bp); bp += 6; break;
6315  case OP_EXACTMB2N:
6316  GET_LENGTH_INC(len, bp);
6317  p_len_string(f, len, 2, bp);
6318  bp += len * 2;
6319  break;
6320  case OP_EXACTMB3N:
6321  GET_LENGTH_INC(len, bp);
6322  p_len_string(f, len, 3, bp);
6323  bp += len * 3;
6324  break;
6325  case OP_EXACTMBN:
6326  {
6327  int mb_len;
6328 
6329  GET_LENGTH_INC(mb_len, bp);
6330  GET_LENGTH_INC(len, bp);
6331  fprintf(f, ":%d:%d:", mb_len, len);
6332  n = len * mb_len;
6333  while (n-- > 0) { fputc(*bp++, f); }
6334  }
6335  break;
6336 
6337  case OP_EXACT1_IC:
6338  len = enclen(enc, bp, bpend);
6339  p_string(f, len, bp);
6340  bp += len;
6341  break;
6342  case OP_EXACTN_IC:
6343  GET_LENGTH_INC(len, bp);
6344  p_len_string(f, len, 1, bp);
6345  bp += len;
6346  break;
6347 
6348  case OP_CCLASS:
6349  n = bitset_on_num((BitSetRef )bp);
6350  bp += SIZE_BITSET;
6351  fprintf(f, ":%d", n);
6352  break;
6353 
6354  case OP_CCLASS_NOT:
6355  n = bitset_on_num((BitSetRef )bp);
6356  bp += SIZE_BITSET;
6357  fprintf(f, ":%d", n);
6358  break;
6359 
6360  case OP_CCLASS_MB:
6361  case OP_CCLASS_MB_NOT:
6362  GET_LENGTH_INC(len, bp);
6363  q = bp;
6364 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6365  ALIGNMENT_RIGHT(q);
6366 #endif
6367  GET_CODE_POINT(code, q);
6368  bp += len;
6369  fprintf(f, ":%d:%d", (int )code, len);
6370  break;
6371 
6372  case OP_CCLASS_MIX:
6373  case OP_CCLASS_MIX_NOT:
6374  n = bitset_on_num((BitSetRef )bp);
6375  bp += SIZE_BITSET;
6376  GET_LENGTH_INC(len, bp);
6377  q = bp;
6378 #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6379  ALIGNMENT_RIGHT(q);
6380 #endif
6381  GET_CODE_POINT(code, q);
6382  bp += len;
6383  fprintf(f, ":%d:%d:%d", n, (int )code, len);
6384  break;
6385 
6386  case OP_CCLASS_NODE:
6387  {
6388  CClassNode *cc;
6389 
6390  GET_POINTER_INC(cc, bp);
6391  n = bitset_on_num(cc->bs);
6392  fprintf(f, ":%"PRIuPTR":%d", (uintptr_t)cc, n);
6393  }
6394  break;
6395 
6396  case OP_BACKREFN_IC:
6397  mem = *((MemNumType* )bp);
6398  bp += SIZE_MEMNUM;
6399  fprintf(f, ":%d", mem);
6400  break;
6401 
6402  case OP_BACKREF_MULTI_IC:
6403  case OP_BACKREF_MULTI:
6404  fputs(" ", f);
6405  GET_LENGTH_INC(len, bp);
6406  for (i = 0; i < len; i++) {
6407  GET_MEMNUM_INC(mem, bp);
6408  if (i > 0) fputs(", ", f);
6409  fprintf(f, "%d", mem);
6410  }
6411  break;
6412 
6413  case OP_BACKREF_WITH_LEVEL:
6414  {
6415  OnigOptionType option;
6416  LengthType level;
6417 
6418  GET_OPTION_INC(option, bp);
6419  fprintf(f, ":%d", option);
6420  GET_LENGTH_INC(level, bp);
6421  fprintf(f, ":%d", level);
6422 
6423  fputs(" ", f);
6424  GET_LENGTH_INC(len, bp);
6425  for (i = 0; i < len; i++) {
6426  GET_MEMNUM_INC(mem, bp);
6427  if (i > 0) fputs(", ", f);
6428  fprintf(f, "%d", mem);
6429  }
6430  }
6431  break;
6432 
6433  case OP_REPEAT:
6434  case OP_REPEAT_NG:
6435  {
6436  mem = *((MemNumType* )bp);
6437  bp += SIZE_MEMNUM;
6438  addr = *((RelAddrType* )bp);
6439  bp += SIZE_RELADDR;
6440  fprintf(f, ":%d:%d", mem, addr);
6441  }
6442  break;
6443 
6445  case OP_PUSH_IF_PEEK_NEXT:
6446  addr = *((RelAddrType* )bp);
6447  bp += SIZE_RELADDR;
6448  fprintf(f, ":(%d)", addr);
6449  p_string(f, 1, bp);
6450  bp += 1;
6451  break;
6452 
6453  case OP_LOOK_BEHIND:
6454  GET_LENGTH_INC(len, bp);
6455  fprintf(f, ":%d", len);
6456  break;
6457 
6459  GET_RELADDR_INC(addr, bp);
6460  GET_LENGTH_INC(len, bp);
6461  fprintf(f, ":%d:(%d)", len, addr);
6462  break;
6463 
6464  case OP_STATE_CHECK_PUSH:
6466  scn = *((StateCheckNumType* )bp);
6467  bp += SIZE_STATE_CHECK_NUM;
6468  addr = *((RelAddrType* )bp);
6469  bp += SIZE_RELADDR;
6470  fprintf(f, ":%d:(%d)", scn, addr);
6471  break;
6472 
6473  case OP_CONDITION:
6474  GET_MEMNUM_INC(mem, bp);
6475  GET_RELADDR_INC(addr, bp);
6476  fprintf(f, ":%d:(%d)", mem, addr);
6477  break;
6478 
6479  default:
6480  fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6481  *--bp);
6482  }
6483  }
6484  fputs("]", f);
6485  if (nextp) *nextp = bp;
6486 }
6487 
6488 static void
6489 print_compiled_byte_code_list(FILE* f, regex_t* reg)
6490 {
6491  int ncode;
6492  UChar* bp = reg->p;
6493  UChar* end = reg->p + reg->used;
6494 
6495  fprintf(f, "code length: %d", reg->used);
6496 
6497  ncode = -1;
6498  while (bp < end) {
6499  ncode++;
6500  if (ncode % 5 == 0)
6501  fprintf(f, "\n%ld:", bp - reg->p);
6502  else
6503  fprintf(f, " %ld:", bp - reg->p);
6504  onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
6505  }
6506 
6507  fprintf(f, "\n");
6508 }
6509 
6510 static void
6511 print_indent_tree(FILE* f, Node* node, int indent)
6512 {
6513  int i, type, container_p = 0;
6514  int add = 3;
6515  UChar* p;
6516 
6517  Indent(f, indent);
6518  if (IS_NULL(node)) {
6519  fprintf(f, "ERROR: null node!!!\n");
6520  exit (0);
6521  }
6522 
6523  type = NTYPE(node);
6524  switch (type) {
6525  case NT_LIST:
6526  case NT_ALT:
6527  if (NTYPE(node) == NT_LIST)
6528  fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t)node);
6529  else
6530  fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t)node);
6531 
6532  print_indent_tree(f, NCAR(node), indent + add);
6533  while (IS_NOT_NULL(node = NCDR(node))) {
6534  if (NTYPE(node) != type) {
6535  fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6536  exit(0);
6537  }
6538  print_indent_tree(f, NCAR(node), indent + add);
6539  }
6540  break;
6541 
6542  case NT_STR:
6543  fprintf(f, "<string%s:%"PRIxPTR">",
6544  (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t)node);
6545  for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6546  if (*p >= 0x20 && *p < 0x7f)
6547  fputc(*p, f);
6548  else {
6549  fprintf(f, " 0x%02x", *p);
6550  }
6551  }
6552  break;
6553 
6554  case NT_CCLASS:
6555  fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t)node);
6556  if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6557  if (NCCLASS(node)->mbuf) {
6558  BBuf* bbuf = NCCLASS(node)->mbuf;
6559  for (i = 0; i < (int )bbuf->used; i++) {
6560  if (i > 0) fprintf(f, ",");
6561  fprintf(f, "%0x", bbuf->p[i]);
6562  }
6563  }
6564  break;
6565 
6566  case NT_CTYPE:
6567  fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t)node);
6568  switch (NCTYPE(node)->ctype) {
6569  case ONIGENC_CTYPE_WORD:
6570  if (NCTYPE(node)->not != 0)
6571  fputs("not word", f);
6572  else
6573  fputs("word", f);
6574  break;
6575 
6576  default:
6577  fprintf(f, "ERROR: undefined ctype.\n");
6578  exit(0);
6579  }
6580  break;
6581 
6582  case NT_CANY:
6583  fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t)node);
6584  break;
6585 
6586  case NT_ANCHOR:
6587  fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t)node);
6588  switch (NANCHOR(node)->type) {
6589  case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6590  case ANCHOR_END_BUF: fputs("end buf", f); break;
6591  case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6592  case ANCHOR_END_LINE: fputs("end line", f); break;
6593  case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6594  case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6595  case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
6596 
6597  case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6598  case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6599 #ifdef USE_WORD_BEGIN_END
6600  case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6601  case ANCHOR_WORD_END: fputs("word end", f); break;
6602 #endif
6603  case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6604  case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6605  case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6606  case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6607  case ANCHOR_KEEP: fputs("keep",f); break;
6608 
6609  default:
6610  fprintf(f, "ERROR: undefined anchor type.\n");
6611  break;
6612  }
6613  break;
6614 
6615  case NT_BREF:
6616  {
6617  int* p;
6618  BRefNode* br = NBREF(node);
6619  p = BACKREFS_P(br);
6620  fprintf(f, "<backref:%"PRIxPTR">", (intptr_t)node);
6621  for (i = 0; i < br->back_num; i++) {
6622  if (i > 0) fputs(", ", f);
6623  fprintf(f, "%d", p[i]);
6624  }
6625  }
6626  break;
6627 
6628 #ifdef USE_SUBEXP_CALL
6629  case NT_CALL:
6630  {
6631  CallNode* cn = NCALL(node);
6632  fprintf(f, "<call:%"PRIxPTR">", (intptr_t)node);
6633  p_string(f, cn->name_end - cn->name, cn->name);
6634  }
6635  break;
6636 #endif
6637 
6638  case NT_QTFR:
6639  fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t)node,
6640  NQTFR(node)->lower, NQTFR(node)->upper,
6641  (NQTFR(node)->greedy ? "" : "?"));
6642  print_indent_tree(f, NQTFR(node)->target, indent + add);
6643  break;
6644 
6645  case NT_ENCLOSE:
6646  fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t)node);
6647  switch (NENCLOSE(node)->type) {
6648  case ENCLOSE_OPTION:
6649  fprintf(f, "option:%d\n", NENCLOSE(node)->option);
6650  break;
6651  case ENCLOSE_MEMORY:
6652  fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6653  break;
6655  fprintf(f, "stop-bt");
6656  break;
6657  case ENCLOSE_CONDITION:
6658  fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6659  break;
6660 
6661  default:
6662  break;
6663  }
6664  fprintf(f, "\n");
6665  print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6666  break;
6667 
6668  default:
6669  fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6670  break;
6671  }
6672 
6673  if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6674  type != NT_ENCLOSE)
6675  fprintf(f, "\n");
6676 
6677  if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6678 
6679  fflush(f);
6680 }
6681 #endif /* ONIG_DEBUG */
6682 
6683 #ifdef ONIG_DEBUG_PARSE_TREE
6684 static void
6685 print_tree(FILE* f, Node* node)
6686 {
6687  print_indent_tree(f, node, 0);
6688 }
6689 #endif
VALUE data
Definition: tcltklib.c:3368
#define SIZE_OP_SET_OPTION_PUSH
Definition: regint.h:691
void onig_transfer(regex_t *to, regex_t *from)
Definition: regcomp.c:5618
#define ANCHOR_ANYCHAR_STAR_ML
Definition: regint.h:517
#define SIZE_OP_MEMORY_END_PUSH_REC
Definition: regint.h:696
#define IS_DYNAMIC_OPTION(option)
Definition: regint.h:382
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:108
#define BIT_STATUS_AT(stats, n)
Definition: regint.h:337
#define IS_ENCLOSE_CALLED(en)
Definition: regparse.h:144
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6323
volatile VALUE tmp
Definition: tcltklib.c:10209
int onig_new_without_alloc(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5935
static int add_bitset(regex_t *reg, BitSetRef bs)
Definition: regcomp.c:298
static void concat_opt_exact_info_str(OptExactInfo *to, UChar *s, UChar *end, int raw ARG_UNUSED, OnigEncoding enc)
Definition: regcomp.c:4592
int is_refered
Definition: regparse.h:186
#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS
#define IS_NULL(p)
Definition: regint.h:278
ssize_t n
Definition: bigdecimal.c:5655
#define ENCLOSE_MEMORY
Definition: regparse.h:92
unsigned int alloc
Definition: regint.h:423
#define ONIGERR_NEVER_ENDING_RECURSION
int onig_new(regex_t **reg, const UChar *pattern, const UChar *pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType *syntax, OnigErrorInfo *einfo)
Definition: regcomp.c:5949
VP_EXPORT int
Definition: bigdecimal.c:5050
unsigned int OnigOptionType
Definition: ripper.y:349
#define OPT_EXACT_MAXLEN
Definition: regcomp.c:4289
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:388
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
UChar * end
Definition: regparse.h:170
int onig_free_node_list(void)
Definition: regparse.c:1110
unsigned char * exact_end
Definition: ripper.y:692
#define ALLOWED_ANCHOR_IN_LB_NOT
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1443
int regnum
Definition: regparse.h:196
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag
Definition: ripper.y:124
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1259
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:326
static void alt_merge_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OptEnv *env)
Definition: regcomp.c:4609
static int get_char_length_tree(Node *node, regex_t *reg, int *len)
Definition: regcomp.c:2507
code
Definition: tcltklib.c:3381
static void concat_opt_anc_info(OptAncInfo *to, OptAncInfo *left, OptAncInfo *right, OnigDistance left_len, OnigDistance right_len)
Definition: regcomp.c:4479
struct _Node * target
Definition: regparse.h:226
#define NSTRING_IS_RAW(node)
Definition: regparse.h:111
void onig_print_compiled_byte_code(FILE *f, UChar *bp, UChar *bpend, UChar **nextp, OnigEncoding enc)
#define WORD_ALIGNMENT_SIZE
Definition: regint.h:301
#define BIT_STATUS_ON_AT(stats, n)
Definition: regint.h:340
#define ONIG_OPTIMIZE_EXACT
Definition: regint.h:323
#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION
#define ANCHOR_END_BUF
Definition: regint.h:503
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define ONIGENC_MBC_MINLEN(enc)
static int compile_length_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:975
#define SIZE_OPCODE
Definition: regint.h:647
#define ONIGENC_IS_CODE_WORD(enc, code)
static int max(int a, int b)
Definition: strftime.c:141
#define BBUF_ADD1(buf, byte)
Definition: regint.h:465
#define d1
#define ANCHOR_WORD_BEGIN
Definition: regint.h:509
#define SINGLE_BYTE_SIZE
Definition: regint.h:392
#define SIZE_MEMNUM
Definition: regint.h:651
#define IS_ENCLOSE_RECURSION(en)
Definition: regparse.h:146
#define SIZE_OP_REPEAT_INC
Definition: regint.h:684
int num_call
Definition: regparse.h:301
#define NSTRING_IS_AMBIG(node)
Definition: regparse.h:112
static int compile_call(CallNode *node, regex_t *reg)
Definition: regcomp.c:401
#define BBUF_WRITE(buf, pos, bytes, n)
Definition: regint.h:450
#define ONIG_OPTION_DONT_CAPTURE_GROUP
static int add_multi_byte_cclass(BBuf *mbuf, regex_t *reg)
Definition: regcomp.c:563
return
Definition: bigdecimal.c:5800
static int get_char_length_tree1(Node *node, regex_t *reg, int *len, int level)
Definition: regcomp.c:2383
MinMaxLen len
Definition: regcomp.c:4328
unsigned int OnigCodePoint
Definition: ripper.y:115
static void swap_node(Node *a, Node *b)
Definition: regcomp.c:71
int sourceline
Definition: regparse.h:320
unsigned int bt_mem_end
Definition: ripper.y:673
#define REGEX_CHAIN_HEAD(reg)
Definition: regcomp.c:5625
#define NST_CALLED
Definition: regparse.h:133
#define IS_ENCLOSE_MAX_FIXED(en)
Definition: regparse.h:150
static int comp_distance_value(MinMaxLen *d1, MinMaxLen *d2, int v1, int v2)
Definition: regcomp.c:4393
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:283
size_t onig_memsize(const regex_t *reg)
Definition: regcomp.c:5587
#define THREAD_SYSTEM_END
Definition: regint.h:117
static int add_bytes(regex_t *reg, UChar *bytes, OnigDistance len)
Definition: regcomp.c:291
OnigUChar * par
Definition: ripper.y:635
#define IS_CODE_SB_WORD(enc, code)
Definition: regint.h:844
ssize_t i
Definition: bigdecimal.c:5655
#define UChar
int onig_names_free(regex_t *reg)
Definition: regparse.c:486
#define ANCHOR_BEGIN_LINE
Definition: regint.h:501
VALUE target
Definition: tcltklib.c:5532
#define SIZE_OP_CALL
Definition: regint.h:706
static void select_opt_exact_info(OnigEncoding enc, OptExactInfo *now, OptExactInfo *alt)
Definition: regcomp.c:4648
#define ONIGENC_MBC_MAXLEN(enc)
int onig_is_in_code_range(const UChar *p, OnigCodePoint code)
Definition: regcomp.c:6019
#define QUANTIFIER_EXPAND_LIMIT_SIZE
Definition: regcomp.c:735
int ret
Definition: tcltklib.c:280
#define ONIG_CHAR_TABLE_SIZE
int nest_level
Definition: regparse.h:238
UChar * error
Definition: regparse.h:298
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1479
MinMaxLen mmd
Definition: regcomp.c:4320
OnigCaseFoldType onig_get_default_case_fold_flag(void)
Definition: regcomp.c:36
#define SIZE_OP_POP_POS
Definition: regint.h:688
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:767
#define ONIGENC_CASE_FOLD_DEFAULT
Real * a
Definition: bigdecimal.c:1182
int reach_end
Definition: regcomp.c:4313
static void concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo *to, NodeOptInfo *add)
Definition: regcomp.c:4828
#define GET_CHAR_LEN_VARLEN
Definition: regcomp.c:2378
static OnigDistance distance_add(OnigDistance d1, OnigDistance d2)
Definition: regcomp.c:96
#define NENCLOSE(node)
Definition: regparse.h:79
static int subexp_recursive_check(Node *node)
Definition: regcomp.c:2989
VALUE enc
Definition: tcltklib.c:10311
OptExactInfo exm
Definition: regcomp.c:4332
static int compile_length_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:506
int target_empty_info
Definition: regparse.h:183
OnigRepeatRange * repeat_range
Definition: ripper.y:676
UChar * s
Definition: regparse.h:169
#define IS_ENCLOSE_MIN_FIXED(en)
Definition: regparse.h:149
OnigDistance anchor_dmax
Definition: ripper.y:689
#define ANCHOR_END_LINE
Definition: regint.h:505
#define ONIG_STATE_NORMAL
#define ANCHOR_PREC_READ
Definition: regint.h:511
int opt_count
Definition: regparse.h:204
UnsetAddrList * unset_addr_list
Definition: regparse.h:303
#define IS_BACKREF_NEST_LEVEL(bn)
Definition: regparse.h:161
#define NT_QTFR
Definition: regparse.h:45
#define SIZE_ABSADDR
Definition: regint.h:649
#define ONIGENC_MBC_CASE_FOLD(enc, flag, pp, end, buf)
static int map_position_value(OnigEncoding enc, int i)
Definition: regcomp.c:4340
struct _Node * next_head_exact
Definition: regparse.h:185
int onigenc_strlen(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:123
#define xfree
static void set_optimize_map_info(regex_t *reg, OptMapInfo *m)
Definition: regcomp.c:5307
#define CKN_ON
Definition: regcomp.c:736
#define GET_CODE_POINT(code, p)
Definition: regint.h:669
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1277
#define ONIG_STATE(reg)
#define NQ_TARGET_IS_EMPTY_MEM
Definition: regparse.h:121
void * PointerType
Definition: regint.h:645
#define IS_CALL_RECURSION(cn)
Definition: regparse.h:158
#define ONIGERR_INVALID_ARGUMENT
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:347
#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, acs)
#define ALLOWED_ANCHOR_IN_LB
static int subexp_recursive_check_trav(Node *node, ScanEnv *env)
Definition: regcomp.c:3045
#define REPEAT_RANGE_ALLOC
static int optimize_node_left(Node *node, NodeOptInfo *opt, OptEnv *env)
Definition: regcomp.c:4902
int onig_init(void)
Definition: regcomp.c:5972
r
Definition: bigdecimal.c:1196
static int compile_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:515
#define ONIG_OPTIMIZE_EXACT_IC
Definition: regint.h:326
#define NST_MEM_BACKREFED
Definition: regparse.h:130
Definition: regint.h:599
static int add_opcode_rel_addr(regex_t *reg, int opcode, int addr)
Definition: regcomp.c:280
#define BACKREFS_P(br)
Definition: regparse.h:116
#define GET_OPTION_INC(option, p)
Definition: regint.h:664
#define NST_RECURSION
Definition: regparse.h:132
int state
Definition: tcltklib.c:1462
#define GET_LENGTH_INC(len, p)
Definition: regint.h:661
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:502
UnsetAddr * us
Definition: regparse.h:217
OnigUChar * par_end
Definition: ripper.y:636
OnigCaseFoldType case_fold_flag
Definition: regcomp.c:4300
void onig_chain_link_add(regex_t *to, regex_t *add)
Definition: regcomp.c:5632
#define RECURSION_INFINITE
Definition: regcomp.c:2847
regex_t * reg
Definition: regparse.h:300
#define NST_ADDR_FIXED
Definition: regparse.h:134
OptAncInfo anc
Definition: regcomp.c:4311
int left_anchor
Definition: regcomp.c:4305
#define ONIGENC_IS_MBC_WORD(enc, s, end)
OnigDistance anchor_dmin
Definition: ripper.y:688
static int set_optimize_exact_info(regex_t *reg, OptExactInfo *e)
Definition: regcomp.c:5251
UChar buf[NODE_STR_BUF_SIZE]
Definition: regparse.h:173
unsigned int OnigCaseFoldType
Definition: ripper.y:122
#define ONIG_STATE_MODIFY
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:508
int num_named
Definition: regparse.h:307
unsigned int bt_mem_start
Definition: ripper.y:672
MinMaxLen mmd
Definition: regcomp.c:4310
int char_len
Definition: regparse.h:203
unsigned int BitStatusType
Definition: regint.h:332
#define BIT_STATUS_ON_ALL(stats)
Definition: regint.h:336
OptAncInfo anc
Definition: regcomp.c:4330
#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT
Definition: regint.h:678
static int add_opcode_option(regex_t *reg, int opcode, OnigOptionType option)
Definition: regcomp.c:305
#define SIZE_OP_FAIL
Definition: regint.h:692
#define ONIGENC_MBC_MAXLEN_DIST(enc)
#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, s, end)
static int compile_tree_empty_check(Node *node, regex_t *reg, int empty_info)
Definition: regcomp.c:369
VALUE einfo
Definition: tcltklib.c:847
d
Definition: strlcat.c:58
#define SIZE_STATE_CHECK_NUM
Definition: regint.h:652
#define IS_ENCLOSE_CLEN_FIXED(en)
Definition: regparse.h:151
#define NBREF(node)
Definition: regparse.h:77
#define NT_ALT
Definition: regparse.h:49
#define IN_ALT
Definition: regcomp.c:3829
#define head
Definition: st.c:107
static void unset_addr_list_end(UnsetAddrList *uslist)
Definition: regcomp.c:181
#define ALLOWED_ENCLOSE_IN_LB
void onig_free_body(regex_t *reg)
Definition: regcomp.c:5561
const char * name
Definition: ripper.y:163
#define COMPILE_INIT_SIZE
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:335
#define SIZE_OP_MEMORY_START
Definition: regint.h:693
UnsetAddrList * unset_addr_list
Definition: regparse.h:227
#define SIZE_OPTION
Definition: regint.h:654
static int compile_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:616
struct _Node * target
Definition: regparse.h:244
int group_num
Definition: regparse.h:223
static int setup_tree(Node *node, regex_t *reg, int state, ScanEnv *env)
Definition: regcomp.c:3844
static int compile_quantifier_node(QtfrNode *qn, regex_t *reg)
Definition: regcomp.c:1040
#define NCAR(node)
Definition: regparse.h:84
#define COMP_EM_BASE
OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]
Definition: ripper.y:147
#define xalloca
Definition: regint.h:189
int onig_compile(regex_t *reg, const UChar *pattern, const UChar *pattern_end, OnigErrorInfo *einfo, const char *sourcefile, int sourceline)
Definition: regcomp.c:5666
#define SIZE_OP_PUSH_STOP_BT
Definition: regint.h:699
Definition: regint.h:420
struct _Node * head_exact
Definition: regparse.h:184
static int add_option(regex_t *reg, OnigOptionType option)
Definition: regcomp.c:273
#define ALLOWED_ENCLOSE_IN_LB_NOT
#define IS_IGNORECASE(option)
Definition: regint.h:363
static int numbered_ref_check(Node *node)
Definition: regcomp.c:1975
#define SIZE_OP_CONDITION
Definition: regint.h:708
void onig_chain_reduce(regex_t *reg)
Definition: regcomp.c:5641
BitSet bs
Definition: regint.h:779
static int is_full_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4540
int state
Definition: regparse.h:234
#define GET_MEMNUM_INC(num, p)
Definition: regint.h:662
static int set_optimize_info_from_tree(Node *node, regex_t *reg, ScanEnv *scan_env)
Definition: regcomp.c:5335
#define STACK_POP_LEVEL_MEM_START
Definition: regint.h:318
static int add_rel_addr(regex_t *reg, int addr)
Definition: regcomp.c:228
#define ONIGENC_CASE_FOLD_MIN
#define enclen(enc, p, e)
int RelAddrType
Definition: regint.h:639
#define STACK_POP_LEVEL_ALL
Definition: regint.h:319
#define ONIGERR_INVALID_CONDITION_PATTERN
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET
const char * sourcefile
Definition: regparse.h:319
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC
Definition: regint.h:329
static int quantifiers_memory_node_info(Node *node)
Definition: regcomp.c:2068
static int compile_length_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1168
int onig_bbuf_init(BBuf *buf, OnigDistance size)
Definition: regcomp.c:148
#define THREAD_ATOMIC_END
Definition: regint.h:119
#define SIZE_OP_NULL_CHECK_START
Definition: regint.h:701
static void add_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4437
#define putc(_c, _stream)
Definition: win32.h:125
#define IS_NCCLASS_SHARE(nd)
Definition: regint.h:768
BDIGIT m
Definition: bigdecimal.c:5085
OnigPosition * end
Definition: ripper.y:617
static int compile_length_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1567
static int check_type_tree(Node *node, int type_mask, int enclose_mask, int anchor_mask)
Definition: regcomp.c:2796
int capa
Definition: regparse.h:172
#define GET_CHAR_LEN_TOP_ALT_VARLEN
Definition: regcomp.c:2379
#define RECURSION_EXIST
Definition: regcomp.c:2846
#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
OnigOptionType options
Definition: regcomp.c:4299
#define SIZE_OP_POP
Definition: regint.h:681
OnigCaseFoldType case_fold_flag
Definition: ripper.y:681
#define ONIGENC_IS_UNDEF(enc)
#define MAX_NODE_OPT_INFO_REF_COUNT
Definition: regcomp.c:4899
static void set_mml(MinMaxLen *mml, OnigDistance min, OnigDistance max)
Definition: regcomp.c:4417
OnigDistance min_len
Definition: regparse.h:201
int AbsAddrType
Definition: regint.h:640
int onigenc_init(void)
Definition: regenc.c:36
#define NULL_NODE
Definition: regparse.h:280
static void clear_opt_exact_info(OptExactInfo *ex)
Definition: regcomp.c:4546
#define val
Definition: tcltklib.c:1949
#define BBUF_INIT(buf, size)
Definition: regint.h:426
#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN
static int disable_noname_group_capture(Node **root, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:2006
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:141
const OnigSyntaxType * syntax
Definition: regparse.h:291
int num_comb_exp_check
Definition: ripper.y:669
static void copy_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4430
#define ARG_UNUSED
Definition: nkf.h:181
#define IS_MULTILINE(option)
Definition: regint.h:362
static void alt_merge_mml(MinMaxLen *to, MinMaxLen *from)
Definition: regcomp.c:4453
#define PRIuSIZE
static int expand_case_fold_string(Node *node, regex_t *reg)
Definition: regcomp.c:3540
#define ONIG_OPTION_CAPTURE_GROUP
#define ALIGNMENT_RIGHT(addr)
Definition: regint.h:309
static int is_equal_mml(MinMaxLen *a, MinMaxLen *b)
Definition: regcomp.c:4410
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag)
Definition: regint.h:384
static int divide_look_behind_alternatives(Node *node)
Definition: regcomp.c:3233
OptAncInfo anc
Definition: regcomp.c:4321
#define add(x, y)
Definition: date_strftime.c:23
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT
Definition: regint.h:704
#define NQ_TARGET_IS_EMPTY_REC
Definition: regparse.h:122
#define BITSET_AT(bs, pos)
Definition: regint.h:414
int onig_free_shared_cclass_table(void)
Definition: regparse.c:5414
int * back_dynamic
Definition: regparse.h:237
int lower
Definition: regparse.h:180
#define NCCLASS(node)
Definition: regparse.h:75
#define xmemcpy
Definition: regint.h:182
unsigned char map[ONIG_CHAR_TABLE_SIZE]
Definition: ripper.y:693
static int distance_value(MinMaxLen *mm)
Definition: regcomp.c:4364
#define ONIG_OPTION_SINGLELINE
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:281
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1414
#define NCTYPE(node)
Definition: regparse.h:76
int onig_reg_init(regex_t *reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType *syntax)
Definition: regcomp.c:5889
#define ONIG_OPTIMIZE_MAP
Definition: regint.h:327
#define xmalloc
#define xrealloc
#define SIZE_OP_MEMORY_END_PUSH
Definition: regint.h:695
#define SIZE_OP_RETURN
Definition: regint.h:707
#define NQ_TARGET_IS_EMPTY
Definition: regparse.h:120
size_t onig_region_memsize(const OnigRegion *regs)
Definition: regcomp.c:5602
static void clear_node_opt_info(NodeOptInfo *opt)
Definition: regcomp.c:4811
static int compile_tree_n_times(Node *node, int n, regex_t *reg)
Definition: regcomp.c:416
size_t OnigDistance
Definition: ripper.y:117
int stack_pop_level
Definition: ripper.y:674
static void alt_merge_opt_anc_info(OptAncInfo *to, OptAncInfo *add)
Definition: regcomp.c:4533
static int setup_subexp_call(Node *node, ScanEnv *env)
Definition: regcomp.c:3114
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
short int StateCheckNumType
Definition: regint.h:644
#define NT_LIST
Definition: regparse.h:48
static void clear_optimize_info(regex_t *reg)
Definition: regcomp.c:5391
int err
Definition: win32.c:87
#define NT_ANCHOR
Definition: regparse.h:47
#define ANCHOR_WORD_END
Definition: regint.h:510
#define IN_REPEAT
Definition: regcomp.c:3831
#define FOUND_CALLED_NODE
#define SIZE_OP_ANYCHAR_STAR
Definition: regint.h:677
#define ALLOWED_TYPE_IN_LB
static int unset_addr_list_init(UnsetAddrList *uslist, int size)
Definition: regcomp.c:168
#define GET_ALIGNMENT_PAD_SIZE(addr, pad_size)
Definition: regint.h:303
#define SIZE_LENGTH
Definition: regint.h:650
int upper
Definition: regparse.h:181
#define NST_CLEN_FIXED
Definition: regparse.h:127
static int compile_length_string_node(Node *node, regex_t *reg)
Definition: regcomp.c:467
static void copy_opt_anc_info(OptAncInfo *to, OptAncInfo *from)
Definition: regcomp.c:4473
static int update_string_node_case_fold(regex_t *reg, Node *node)
Definition: regcomp.c:3354
#define EXPAND_STRING_MAX_LENGTH
static int set_bm_skip(UChar *s, UChar *end, regex_t *reg, UChar skip[], int **int_skip, int ignore_case)
Definition: regcomp.c:4219
#define SIZE_OP_MEMORY_END
Definition: regint.h:697
#define ONIG_OPTIMIZE_EXACT_BM_IC
Definition: regint.h:328
#define SIZE_OP_MEMORY_START_PUSH
Definition: regint.h:694
#define ONIGERR_TYPE_BUG
#define ONIGERR_INVALID_BACKREF
#define NT_STR
Definition: regparse.h:40
unsigned char * exact
Definition: ripper.y:691
gz level
Definition: zlib.c:2262
int type
Definition: regparse.h:243
#define NQTFR(node)
Definition: regparse.h:78
static void alt_merge_node_opt_info(NodeOptInfo *to, NodeOptInfo *add, OptEnv *env)
Definition: regcomp.c:4887
const int id
Definition: nkf.c:209
#define SIZE_BITSET
Definition: regint.h:404
#define ONIG_INFINITE_DISTANCE
static int noname_disable_map(Node **plink, GroupNumRemap *map, int *counter)
Definition: regcomp.c:1835
#define THREAD_ATOMIC_START
Definition: regint.h:118
#define TRUE
Definition: nkf.h:175
struct _Node * target
Definition: regparse.h:211
static int compile_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1467
MinMaxLen mmd
Definition: regcomp.c:4297
#define IS_ENCLOSE_NAME_REF(en)
Definition: regparse.h:155
Bits * BitSetRef
Definition: regint.h:402
#define SIZE_OP_PUSH_IF_PEEK_NEXT
Definition: regint.h:683
OnigDistance max_len
Definition: regparse.h:202
unsigned int flag
Definition: regparse.h:171
int rb_const_defined(VALUE, ID)
Definition: variable.c:2098
#define NST_MARK2
Definition: regparse.h:129
#define NT_CANY
Definition: regparse.h:43
register char * s
Definition: os2.c:56
UChar * name_end
Definition: regparse.h:225
#define ONIGENC_IS_MBC_ASCII_WORD(enc, s, end)
OnigEncoding enc
Definition: regcomp.c:4298
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:848
BBuf * mbuf
Definition: regint.h:780
int LengthType
Definition: regint.h:641
OptMapInfo map
Definition: regcomp.c:4335
#define ANCHOR_END_BUF_MASK
Definition: regparse.h:90
int right_anchor
Definition: regcomp.c:4306
static int get_max_match_length(Node *node, OnigDistance *max, ScanEnv *env)
Definition: regcomp.c:2261
#define IS_QUANTIFIER_IN_REPEAT(qn)
Definition: regparse.h:162
static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, int slen, UChar *end, regex_t *reg, Node **rnode)
Definition: regcomp.c:3422
static int min(int a, int b)
Definition: strftime.c:131
static int add_compile_string_length(UChar *s ARG_UNUSED, int mb_len, OnigDistance str_len, regex_t *reg ARG_UNUSED, int ignore_case)
Definition: regcomp.c:428
OnigPosition * beg
Definition: ripper.y:616
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6063
unsigned int uintptr_t
Definition: win32.h:94
#define SIZE_OP_JUMP
Definition: regint.h:679
#define NTYPE(node)
Definition: regparse.h:71
static int is_anychar_star_quantifier(QtfrNode *qn)
Definition: regcomp.c:726
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:514
int state
Definition: regparse.h:178
int type
Definition: tcltklib.c:111
BitStatusType backrefed_mem
Definition: regparse.h:295
#define P_(args)
Definition: oniguruma.h:71
int ignore_case
Definition: regcomp.c:4314
static int renumber_node_backref(Node *node, GroupNumRemap *map)
Definition: regcomp.c:1903
#define NCALL(node)
Definition: regparse.h:82
#define SIZE_OP_NULL_CHECK_END
Definition: regint.h:702
static int compile_length_anchor_node(AnchorNode *node, regex_t *reg)
Definition: regcomp.c:1434
OnigEncoding enc
Definition: ripper.y:634
static int expand_case_fold_make_rem_string(Node **rnode, UChar *s, UChar *end, regex_t *reg)
Definition: regcomp.c:3400
int num_mem
Definition: regparse.h:305
ssize_t ex
Definition: bigdecimal.c:5087
static void alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo *to, OptMapInfo *add)
Definition: regcomp.c:4776
OnigDistance max
Definition: regcomp.c:4293
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1253
int intptr_t
Definition: win32.h:86
#define NSTRING_IS_DONT_GET_OPT_INFO(node)
Definition: regparse.h:113
#define IS_ENCLOSE_ADDR_FIXED(en)
Definition: regparse.h:145
static int add_pointer(regex_t *reg, void *addr)
Definition: regcomp.c:264
#define IS_BACKREF_NAME_REF(bn)
Definition: regparse.h:160
static int is_set_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4507
static Node * get_head_value_node(Node *node, int exact, regex_t *reg)
Definition: regcomp.c:2708
#define NT_CCLASS
Definition: regparse.h:41
#define SIZE_OP_POP_STOP_BT
Definition: regint.h:700
static int setup_look_behind(Node *node, regex_t *reg, ScanEnv *env)
Definition: regcomp.c:3263
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:504
#define ONIG_OPTIMIZE_NONE
Definition: regint.h:322
unsigned int used
Definition: ripper.y:662
#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1546
Definition: regint.h:524
#define IS_ENCLOSE_MARK1(en)
Definition: regparse.h:147
#define ONIGERR_UNDEFINED_GROUP_REFERENCE
UChar * error_end
Definition: regparse.h:299
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT
Definition: regint.h:705
int * int_map_backward
Definition: ripper.y:695
Node ** mem_nodes_dynamic
Definition: regparse.h:311
static void set_sub_anchor(regex_t *reg, OptAncInfo *anc)
Definition: regcomp.c:5324
UChar s[OPT_EXACT_MAXLEN]
Definition: regcomp.c:4316
static int add_opcode(regex_t *reg, int opcode)
Definition: regcomp.c:210
Real * b
Definition: bigdecimal.c:1182
static void copy_node_opt_info(NodeOptInfo *to, NodeOptInfo *from)
Definition: regcomp.c:4822
BitStatusType bt_mem_end
Definition: regparse.h:294
return ptr
Definition: tcltklib.c:784
VpDivd * c
Definition: bigdecimal.c:1205
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:94
#define rb_intern_const(str)
#define ONIG_OPTION_IGNORECASE
#define USE_SUBEXP_CALL
Definition: regint.h:60
static int is_left_anchor(int anc)
Definition: regcomp.c:4496
OnigEncoding enc
Definition: regparse.h:290
void onig_node_free(Node *node)
Definition: regparse.c:1027
#define NANCHOR(node)
Definition: regparse.h:80
static int subexp_inf_recursive_check(Node *node, ScanEnv *env, int head)
Definition: regcomp.c:2850
#define NST_STOP_BT_SIMPLE_REPEAT
Definition: regparse.h:131
static int add_char_amb_opt_map_info(OptMapInfo *map, UChar *p, UChar *end, OnigEncoding enc, OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:4722
gz end
Definition: zlib.c:2270
#define ANCHOR_ANYCHAR_STAR
Definition: regint.h:516
#define NT_BREF
Definition: regparse.h:44
#define SIZE_RELADDR
Definition: regint.h:648
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]
Definition: regcomp.c:50
OnigDistance min
Definition: regcomp.c:4292
#define NST_IN_REPEAT
Definition: regparse.h:137
#define IS_NEED_STR_LEN_OP_EXACT(op)
Definition: regcomp.c:319
static int compile_string_raw_node(StrNode *sn, regex_t *reg)
Definition: regcomp.c:554
#define STACK_POP_LEVEL_FREE
Definition: regint.h:317
static void add_char_opt_map_info(OptMapInfo *map, UChar c, OnigEncoding enc)
Definition: regcomp.c:4713
#define CHECK_NULL_RETURN(p)
Definition: regint.h:280
#define SIZE_OP_FAIL_POS
Definition: regint.h:689
#define ONIG_MAX_CAPTURE_HISTORY_GROUP
static int add_length(regex_t *reg, OnigDistance len)
Definition: regcomp.c:246
#define ONIGENC_IS_CODE_PRINT(enc, code)
OnigDistance dmax
Definition: ripper.y:697
static void copy_opt_map_info(OptMapInfo *to, OptMapInfo *from)
Definition: regcomp.c:4707
static void clear_opt_anc_info(OptAncInfo *anc)
Definition: regcomp.c:4466
int size
Definition: encoding.c:52
#define f
#define ONIGENC_CTYPE_WORD
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
static int compile_tree(Node *node, regex_t *reg)
Definition: regcomp.c:1660
unsigned int used
Definition: regint.h:422
#define SIZE_OP_SET_OPTION
Definition: regint.h:690
if(RB_TYPE_P(r, T_FLOAT))
Definition: bigdecimal.c:1186
#define ONIGENC_MBC_TO_CODE(enc, p, end)
#define GET_RELADDR_INC(addr, p)
Definition: regint.h:659
OnigRegexType regex_t
Definition: ripper.y:706
unsigned int alloc
Definition: ripper.y:663
#define NST_MARK1
Definition: regparse.h:128
static int onig_inited
Definition: regcomp.c:5886
static int add_compile_string(UChar *s, int mb_len, OnigDistance str_len, regex_t *reg, int ignore_case)
Definition: regcomp.c:445
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:512
#define ENCLOSE_CONDITION
Definition: regparse.h:95
#define IN_NOT
Definition: regcomp.c:3830
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2267
#define ENCLOSE_OPTION
Definition: regparse.h:93
static int is_not_included(Node *x, Node *y, regex_t *reg)
Definition: regcomp.c:2514
#define NST_MIN_FIXED
Definition: regparse.h:125
unsigned int capture_history
Definition: ripper.y:671
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en)
Definition: regparse.h:152
#define ONIGERR_MEMORY
OptExactInfo exb
Definition: regcomp.c:4331
#define IS_FIND_CONDITION(option)
Definition: regint.h:367
#define THREAD_SYSTEM_INIT
Definition: regint.h:116
UChar * p
Definition: regint.h:421
static int renumber_by_map(Node *node, GroupNumRemap *map)
Definition: regcomp.c:1931
static OnigDistance distance_multiply(OnigDistance d, int m)
Definition: regcomp.c:107
#define NSTR(node)
Definition: regparse.h:74
#define ONIG_IS_OPTION_ON(options, option)
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
#define ONIGENC_CODE_TO_MBC_MAXLEN
static void clear_opt_map_info(OptMapInfo *map)
Definition: regcomp.c:4679
static void select_opt_map_info(OptMapInfo *now, OptMapInfo *alt)
Definition: regcomp.c:4744
#define SIZE_OP_MEMORY_END_REC
Definition: regint.h:698
#define ANCHOR_ANYCHAR_STAR_MASK
Definition: regparse.h:89
OptExactInfo expr
Definition: regcomp.c:4333
static void concat_opt_exact_info(OptExactInfo *to, OptExactInfo *add, OnigEncoding enc)
Definition: regcomp.c:4563
#define ONIGENC_MBC_CASE_FOLD_MAXLEN
static void set_bound_node_opt_info(NodeOptInfo *opt, MinMaxLen *mmd)
Definition: regcomp.c:4803
OnigDistance dmin
Definition: ripper.y:696
static int get_min_match_length(Node *node, OnigDistance *min, ScanEnv *env)
Definition: regcomp.c:2137
OnigOptionType option
Definition: regparse.h:288
#define CLEAR_ENCLOSE_STATUS(node, f)
Definition: regparse.h:142
static int compile_range_repeat_node(QtfrNode *qn, int target_len, int empty_info, regex_t *reg)
Definition: regcomp.c:690
static int compile_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1300
#define ONIGERR_UNDEFINED_NAME_REFERENCE
struct _Node * target
Definition: regparse.h:179
#define NTYPE2BIT(type)
Definition: regparse.h:53
#define SIZE_POINTER
Definition: regint.h:656
int greedy
Definition: regparse.h:182
int value
Definition: regcomp.c:4323
#define ONIG_OPTION_NEGATE_SINGLELINE
int onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6040
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6296
#define NT_CALL
Definition: regparse.h:50
#define SIZE_OP_PUSH_POS
Definition: regint.h:686
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL
#define NT_CTYPE
Definition: regparse.h:42
#define NST_MAX_FIXED
Definition: regparse.h:126
#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV
Definition: regint.h:325
static int compile_length_cclass_node(CClassNode *cc, regex_t *reg)
Definition: regcomp.c:586
#define ANCHOR_BEGIN_BUF
Definition: regint.h:500
#define IN_VAR_REPEAT
Definition: regcomp.c:3832
BDIGIT e
Definition: bigdecimal.c:5085
OnigEncoding enc
Definition: ripper.y:678
#define SIZE_OP_PUSH_OR_JUMP_EXACT1
Definition: regint.h:682
static void copy_opt_env(OptEnv *to, OptEnv *from)
Definition: regcomp.c:4460
static void copy_opt_exact_info(OptExactInfo *to, OptExactInfo *from)
Definition: regcomp.c:4557
#define IS_NOT_NULL(p)
Definition: regint.h:279
options
Definition: tcltklib.c:4481
static int entry_repeat_range(regex_t *reg, int id, int lower, int upper)
Definition: regcomp.c:659
#define IS_NODE_TYPE_SIMPLE(type)
Definition: regparse.h:67
int onig_end(void)
Definition: regcomp.c:5995
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:513
short int MemNumType
Definition: regint.h:643
#define IS_ENCLOSE_MARK2(en)
Definition: regparse.h:148
static int add_abs_addr(regex_t *reg, int addr)
Definition: regcomp.c:237
static int add_mem_num(regex_t *reg, int num)
Definition: regcomp.c:255
#define GET_POINTER_INC(ptr, p)
Definition: regint.h:665
#define NSTRING_LEN(node)
Definition: regparse.h:105
#define IS_ENCLOSE_NAMED_GROUP(en)
Definition: regparse.h:154
static void clear_mml(MinMaxLen *mml)
Definition: regcomp.c:4424
int char_len
Definition: regparse.h:245
UChar * name
Definition: regparse.h:224
struct _Node * target
Definition: regparse.h:198
static void remove_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4524
int num_null_check
Definition: ripper.y:668
static int next_setup(Node *node, Node *next_node, int in_root, regex_t *reg)
Definition: regcomp.c:3284
UChar map[ONIG_CHAR_TABLE_SIZE]
Definition: regcomp.c:4324
BitStatusType bt_mem_start
Definition: regparse.h:293
static int subexp_inf_recursive_check_trav(Node *node, ScanEnv *env)
Definition: regcomp.c:2934
#define NSTRING_SET_DONT_GET_OPT_INFO(node)
Definition: regparse.h:109
#define SIZE_OP_PUSH_POS_NOT
Definition: regint.h:687
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
ScanEnv * scan_env
Definition: regcomp.c:4301
#define ANCHOR_WORD_BOUND
Definition: regint.h:507
static int comp_opt_exact_or_map_info(OptExactInfo *e, OptMapInfo *m)
Definition: regcomp.c:4763
BDIGIT v
Definition: bigdecimal.c:5656
int ascii_range
Definition: regparse.h:246
OnigOptionType option
Definition: regparse.h:197
int allocated
Definition: ripper.y:614
#define ANCHOR_KEEP
Definition: regint.h:519
AbsAddrType call_addr
Definition: regparse.h:199
#define ONIG_OPTIMIZE_EXACT_BM
Definition: regint.h:324
OnigOptionType options
Definition: ripper.y:679
#define env
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:572
#define NULL
Definition: _sdbm.c:103
q
Definition: tcltklib.c:2968
#define SIZE_OP_PUSH
Definition: regint.h:680
#define IN_ROOT
Definition: regcomp.c:3833
void onig_free(regex_t *reg)
Definition: regcomp.c:5578
static int compile_length_enclose_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1214
int back_num
Definition: regparse.h:235
#define NCDR(node)
Definition: regparse.h:85
static int unset_addr_list_add(UnsetAddrList *uslist, int offset, struct _Node *node)
Definition: regcomp.c:188
int retry
Definition: tcltklib.c:10151
OnigOptionType options
Definition: ripper.y:386
#define GET_ABSADDR_INC(addr, p)
Definition: regint.h:660
int repeat_range_alloc
Definition: ripper.y:675
unsigned char * p
Definition: ripper.y:661
static int compile_option_node(EncloseNode *node, regex_t *reg)
Definition: regcomp.c:1188
#define SET_CALL_RECURSION(node)
Definition: regparse.h:157
#define bp()
Definition: vm_debug.h:27
#define BBUF_GET_OFFSET_POS(buf)
Definition: regint.h:467
#define SET_NTYPE(node, ntype)
Definition: regparse.h:72
static int unset_addr_list_fix(UnsetAddrList *uslist, regex_t *reg)
Definition: regcomp.c:2048
#define REGEX_TRANSFER(to, from)
Definition: regcomp.c:5610
int offset
Definition: regparse.h:210
#define ONIG_STATE_COMPILING
static int bitset_is_empty(BitSetRef bs)
Definition: regcomp.c:118
RUBY_EXTERN VALUE rb_cThread
Definition: ripper.y:1459
#define NT_ENCLOSE
Definition: regparse.h:46
int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
Definition: regcomp.c:42
static void add_opt_anc_info(OptAncInfo *to, int anc)
Definition: regcomp.c:4515
struct re_pattern_buffer * chain
Definition: ripper.y:700
#define BBUF_GET_ADD_ADDRESS(buf)
Definition: regint.h:466
#define SIZE_OP_LOOK_BEHIND
Definition: regint.h:703
#define BBUF_ADD(buf, bytes, n)
Definition: regint.h:464
static int select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
Definition: regcomp.c:324
BitStatusType capture_history
Definition: regparse.h:292
#define ONIGERR_PARSER_BUG
#define BITSET_SIZE
Definition: regint.h:394
int back_static[NODE_BACKREFS_SIZE]
Definition: regparse.h:236
size_t len
Definition: tcltklib.c:3568
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1289