Ruby  2.0.0p247(2013-06-27revision41674)
regparse.c
Go to the documentation of this file.
1 /**********************************************************************
2  regparse.c - Onigmo (Oniguruma-mod) (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * Copyright (c) 2011-2013 K.Takata <kentkt AT csc DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regparse.h"
32 
33 #define WARN_BUFSIZE 256
34 
35 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
36 
37 
58  , ( SYN_GNU_REGEX_BV |
69  ,
70  {
71  (OnigCodePoint )'\\' /* esc */
72  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar '.' */
73  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anytime '*' */
74  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* zero or one time '?' */
75  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* one or more time '+' */
76  , (OnigCodePoint )ONIG_INEFFECTIVE_META_CHAR /* anychar anytime */
77  }
78 };
79 
81 
82 extern void onig_null_warn(const char* s ARG_UNUSED) { }
83 
84 #ifdef DEFAULT_WARN_FUNCTION
85 static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
86 #else
87 static OnigWarnFunc onig_warn = onig_null_warn;
88 #endif
89 
90 #ifdef DEFAULT_VERB_WARN_FUNCTION
91 static OnigWarnFunc onig_verb_warn = (OnigWarnFunc )DEFAULT_VERB_WARN_FUNCTION;
92 #else
93 static OnigWarnFunc onig_verb_warn = onig_null_warn;
94 #endif
95 
96 extern void onig_set_warn_func(OnigWarnFunc f)
97 {
98  onig_warn = f;
99 }
100 
101 extern void onig_set_verb_warn_func(OnigWarnFunc f)
102 {
103  onig_verb_warn = f;
104 }
105 
106 static void CC_DUP_WARN(ScanEnv *env);
107 
108 static void
110 {
111  if (IS_NOT_NULL(bbuf)) {
112  if (IS_NOT_NULL(bbuf->p)) xfree(bbuf->p);
113  xfree(bbuf);
114  }
115 }
116 
117 static int
118 bbuf_clone(BBuf** rto, BBuf* from)
119 {
120  int r;
121  BBuf *to;
122 
123  *rto = to = (BBuf* )xmalloc(sizeof(BBuf));
125  r = BBUF_INIT(to, from->alloc);
126  if (r != 0) return r;
127  to->used = from->used;
128  xmemcpy(to->p, from->p, from->used);
129  return 0;
130 }
131 
132 #define BACKREF_REL_TO_ABS(rel_no, env) \
133  ((env)->num_mem + 1 + (rel_no))
134 
135 #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
136 
137 #define MBCODE_START_POS(enc) \
138  (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80)
139 
140 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \
141  add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)
142 
143 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
144  if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
145  r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
146  if (r) return r;\
147  }\
148 } while (0)
149 
150 
151 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \
152  if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \
153  BS_ROOM(bs, pos) |= BS_BIT(pos); \
154 } while (0)
155 
156 #define BITSET_IS_EMPTY(bs,empty) do {\
157  int i;\
158  empty = 1;\
159  for (i = 0; i < BITSET_SIZE; i++) {\
160  if ((bs)[i] != 0) {\
161  empty = 0; break;\
162  }\
163  }\
164 } while (0)
165 
166 static void
167 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
168 {
169  int i;
170  for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) {
171  BITSET_SET_BIT_CHKDUP(bs, i);
172  }
173 }
174 
175 #if 0
176 static void
177 bitset_set_all(BitSetRef bs)
178 {
179  int i;
180  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
181 }
182 #endif
183 
184 static void
186 {
187  int i;
188  for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
189 }
190 
191 static void
193 {
194  int i;
195  for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
196 }
197 
198 static void
200 {
201  int i;
202  for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
203 }
204 
205 static void
207 {
208  int i;
209  for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
210 }
211 
212 static void
214 {
215  int i;
216  for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
217 }
218 
219 extern int
220 onig_strncmp(const UChar* s1, const UChar* s2, int n)
221 {
222  int x;
223 
224  while (n-- > 0) {
225  x = *s2++ - *s1++;
226  if (x) return x;
227  }
228  return 0;
229 }
230 
231 extern void
232 onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
233 {
234  ptrdiff_t len = end - src;
235  if (len > 0) {
236  xmemcpy(dest, src, len);
237  dest[len] = (UChar )0;
238  }
239 }
240 
241 #ifdef USE_NAMED_GROUP
242 static UChar*
244 {
245  ptrdiff_t slen;
246  int term_len, i;
247  UChar *r;
248 
249  slen = end - s;
250  term_len = ONIGENC_MBC_MINLEN(enc);
251 
252  r = (UChar* )xmalloc(slen + term_len);
254  xmemcpy(r, s, slen);
255 
256  for (i = 0; i < term_len; i++)
257  r[slen + i] = (UChar )0;
258 
259  return r;
260 }
261 #endif
262 
263 /* scan pattern methods */
264 #define PEND_VALUE 0
265 
266 #ifdef __GNUC__
267 /* get rid of Wunused-but-set-variable and Wuninitialized */
268 #define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
269 #else
270 #define PFETCH_READY UChar* pfetch_prev
271 #endif
272 #define PEND (p < end ? 0 : 1)
273 #define PUNFETCH p = pfetch_prev
274 #define PINC do { \
275  pfetch_prev = p; \
276  p += enclen(enc, p, end); \
277 } while (0)
278 #define PFETCH(c) do { \
279  c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \
280  pfetch_prev = p; \
281  p += enclen(enc, p, end); \
282 } while (0)
283 
284 #define PPEEK (p < end ? ONIGENC_MBC_TO_CODE(enc, p, end) : PEND_VALUE)
285 #define PPEEK_IS(c) (PPEEK == (OnigCodePoint )c)
286 
287 static UChar*
288 strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end,
289  size_t capa)
290 {
291  UChar* r;
292 
293  if (dest)
294  r = (UChar* )xrealloc(dest, capa + 1);
295  else
296  r = (UChar* )xmalloc(capa + 1);
297 
299  onig_strcpy(r + (dest_end - dest), src, src_end);
300  return r;
301 }
302 
303 /* dest on static area */
304 static UChar*
306  const UChar* src, const UChar* src_end, size_t capa)
307 {
308  UChar* r;
309 
310  r = (UChar* )xmalloc(capa + 1);
312  onig_strcpy(r, dest, dest_end);
313  onig_strcpy(r + (dest_end - dest), src, src_end);
314  return r;
315 }
316 
317 
318 #ifdef USE_ST_LIBRARY
319 
320 #include "ruby/st.h"
321 
322 typedef struct {
323  const UChar* s;
324  const UChar* end;
326 
327 static int
329 {
330  const st_str_end_key *x, *y;
331  const UChar *p, *q;
332  int c;
333 
334  x = (const st_str_end_key *)xp;
335  y = (const st_str_end_key *)yp;
336  if ((x->end - x->s) != (y->end - y->s))
337  return 1;
338 
339  p = x->s;
340  q = y->s;
341  while (p < x->end) {
342  c = (int )*p - (int )*q;
343  if (c != 0) return c;
344 
345  p++; q++;
346  }
347 
348  return 0;
349 }
350 
351 static st_index_t
353 {
354  const st_str_end_key *x = (const st_str_end_key *)xp;
355  const UChar *p;
356  st_index_t val = 0;
357 
358  p = x->s;
359  while (p < x->end) {
360  val = val * 997 + (int )*p++;
361  }
362 
363  return val + (val >> 5);
364 }
365 
366 extern hash_table_type*
368 {
369  static const struct st_hash_type hashType = {
370  str_end_cmp,
371  str_end_hash,
372  };
373 
374  return (hash_table_type* )
375  onig_st_init_table_with_size(&hashType, size);
376 }
377 
378 extern int
380  const UChar* end_key, hash_data_type *value)
381 {
383 
384  key.s = (UChar* )str_key;
385  key.end = (UChar* )end_key;
386 
387  return onig_st_lookup(table, (st_data_t )(&key), value);
388 }
389 
390 extern int
392  const UChar* end_key, hash_data_type value)
393 {
395  int result;
396 
397  key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
398  key->s = (UChar* )str_key;
399  key->end = (UChar* )end_key;
400  result = onig_st_insert(table, (st_data_t )key, value);
401  if (result) {
402  xfree(key);
403  }
404  return result;
405 }
406 
407 #endif /* USE_ST_LIBRARY */
408 
409 
410 #ifdef USE_NAMED_GROUP
411 
412 #define INIT_NAME_BACKREFS_ALLOC_NUM 8
413 
414 typedef struct {
416  size_t name_len; /* byte length */
417  int back_num; /* number of backrefs */
420  int* back_refs;
421 } NameEntry;
422 
423 #ifdef USE_ST_LIBRARY
424 
426 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
427 
428 #ifdef ONIG_DEBUG
429 static int
430 i_print_name_entry(UChar* key, NameEntry* e, void* arg)
431 {
432  int i;
433  FILE* fp = (FILE* )arg;
434 
435  fprintf(fp, "%s: ", e->name);
436  if (e->back_num == 0)
437  fputs("-", fp);
438  else if (e->back_num == 1)
439  fprintf(fp, "%d", e->back_ref1);
440  else {
441  for (i = 0; i < e->back_num; i++) {
442  if (i > 0) fprintf(fp, ", ");
443  fprintf(fp, "%d", e->back_refs[i]);
444  }
445  }
446  fputs("\n", fp);
447  return ST_CONTINUE;
448 }
449 
450 extern int
451 onig_print_names(FILE* fp, regex_t* reg)
452 {
453  NameTable* t = (NameTable* )reg->name_table;
454 
455  if (IS_NOT_NULL(t)) {
456  fprintf(fp, "name table\n");
457  onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
458  fputs("\n", fp);
459  }
460  return 0;
461 }
462 #endif /* ONIG_DEBUG */
463 
464 static int
466 {
467  xfree(e->name);
468  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
469  xfree(key);
470  xfree(e);
471  return ST_DELETE;
472 }
473 
474 static int
476 {
477  NameTable* t = (NameTable* )reg->name_table;
478 
479  if (IS_NOT_NULL(t)) {
481  }
482  return 0;
483 }
484 
485 extern int
487 {
488  int r;
489  NameTable* t;
490 
491  r = names_clear(reg);
492  if (r) return r;
493 
494  t = (NameTable* )reg->name_table;
496  reg->name_table = (void* )NULL;
497  return 0;
498 }
499 
500 static NameEntry*
501 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
502 {
503  NameEntry* e;
504  NameTable* t = (NameTable* )reg->name_table;
505 
506  e = (NameEntry* )NULL;
507  if (IS_NOT_NULL(t)) {
508  onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
509  }
510  return e;
511 }
512 
513 typedef struct {
514  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*);
515  regex_t* reg;
516  void* arg;
517  int ret;
519 } INamesArg;
520 
521 static int
523 {
524  int r = (*(arg->func))(e->name,
525  e->name + e->name_len,
526  e->back_num,
527  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
528  arg->reg, arg->arg);
529  if (r != 0) {
530  arg->ret = r;
531  return ST_STOP;
532  }
533  return ST_CONTINUE;
534 }
535 
536 extern int
538  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
539 {
540  INamesArg narg;
541  NameTable* t = (NameTable* )reg->name_table;
542 
543  narg.ret = 0;
544  if (IS_NOT_NULL(t)) {
545  narg.func = func;
546  narg.reg = reg;
547  narg.arg = arg;
548  narg.enc = reg->enc; /* should be pattern encoding. */
549  onig_st_foreach(t, i_names, (HashDataType )&narg);
550  }
551  return narg.ret;
552 }
553 
554 static int
556 {
557  int i;
558 
559  if (e->back_num > 1) {
560  for (i = 0; i < e->back_num; i++) {
561  e->back_refs[i] = map[e->back_refs[i]].new_val;
562  }
563  }
564  else if (e->back_num == 1) {
565  e->back_ref1 = map[e->back_ref1].new_val;
566  }
567 
568  return ST_CONTINUE;
569 }
570 
571 extern int
573 {
574  NameTable* t = (NameTable* )reg->name_table;
575 
576  if (IS_NOT_NULL(t)) {
577  onig_st_foreach(t, i_renumber_name, (HashDataType )map);
578  }
579  return 0;
580 }
581 
582 
583 extern int
585 {
586  NameTable* t = (NameTable* )reg->name_table;
587 
588  if (IS_NOT_NULL(t))
589  return (int )t->num_entries;
590  else
591  return 0;
592 }
593 
594 #else /* USE_ST_LIBRARY */
595 
596 #define INIT_NAMES_ALLOC_NUM 8
597 
598 typedef struct {
599  NameEntry* e;
600  int num;
601  int alloc;
602 } NameTable;
603 
604 #ifdef ONIG_DEBUG
605 extern int
606 onig_print_names(FILE* fp, regex_t* reg)
607 {
608  int i, j;
609  NameEntry* e;
610  NameTable* t = (NameTable* )reg->name_table;
611 
612  if (IS_NOT_NULL(t) && t->num > 0) {
613  fprintf(fp, "name table\n");
614  for (i = 0; i < t->num; i++) {
615  e = &(t->e[i]);
616  fprintf(fp, "%s: ", e->name);
617  if (e->back_num == 0) {
618  fputs("-", fp);
619  }
620  else if (e->back_num == 1) {
621  fprintf(fp, "%d", e->back_ref1);
622  }
623  else {
624  for (j = 0; j < e->back_num; j++) {
625  if (j > 0) fprintf(fp, ", ");
626  fprintf(fp, "%d", e->back_refs[j]);
627  }
628  }
629  fputs("\n", fp);
630  }
631  fputs("\n", fp);
632  }
633  return 0;
634 }
635 #endif
636 
637 static int
638 names_clear(regex_t* reg)
639 {
640  int i;
641  NameEntry* e;
642  NameTable* t = (NameTable* )reg->name_table;
643 
644  if (IS_NOT_NULL(t)) {
645  for (i = 0; i < t->num; i++) {
646  e = &(t->e[i]);
647  if (IS_NOT_NULL(e->name)) {
648  xfree(e->name);
649  e->name = NULL;
650  e->name_len = 0;
651  e->back_num = 0;
652  e->back_alloc = 0;
653  if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
654  e->back_refs = (int* )NULL;
655  }
656  }
657  if (IS_NOT_NULL(t->e)) {
658  xfree(t->e);
659  t->e = NULL;
660  }
661  t->num = 0;
662  }
663  return 0;
664 }
665 
666 extern int
668 {
669  int r;
670  NameTable* t;
671 
672  r = names_clear(reg);
673  if (r) return r;
674 
675  t = (NameTable* )reg->name_table;
676  if (IS_NOT_NULL(t)) xfree(t);
677  reg->name_table = NULL;
678  return 0;
679 }
680 
681 static NameEntry*
682 name_find(regex_t* reg, const UChar* name, const UChar* name_end)
683 {
684  int i, len;
685  NameEntry* e;
686  NameTable* t = (NameTable* )reg->name_table;
687 
688  if (IS_NOT_NULL(t)) {
689  len = name_end - name;
690  for (i = 0; i < t->num; i++) {
691  e = &(t->e[i]);
692  if (len == e->name_len && onig_strncmp(name, e->name, len) == 0)
693  return e;
694  }
695  }
696  return (NameEntry* )NULL;
697 }
698 
699 extern int
701  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
702 {
703  int i, r;
704  NameEntry* e;
705  NameTable* t = (NameTable* )reg->name_table;
706 
707  if (IS_NOT_NULL(t)) {
708  for (i = 0; i < t->num; i++) {
709  e = &(t->e[i]);
710  r = (*func)(e->name, e->name + e->name_len, e->back_num,
711  (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
712  reg, arg);
713  if (r != 0) return r;
714  }
715  }
716  return 0;
717 }
718 
719 extern int
721 {
722  NameTable* t = (NameTable* )reg->name_table;
723 
724  if (IS_NOT_NULL(t))
725  return t->num;
726  else
727  return 0;
728 }
729 
730 #endif /* else USE_ST_LIBRARY */
731 
732 static int
733 name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
734 {
735  int alloc;
736  NameEntry* e;
737  NameTable* t = (NameTable* )reg->name_table;
738 
739  if (name_end - name <= 0)
741 
742  e = name_find(reg, name, name_end);
743  if (IS_NULL(e)) {
744 #ifdef USE_ST_LIBRARY
745  if (IS_NULL(t)) {
747  reg->name_table = (void* )t;
748  }
749  e = (NameEntry* )xmalloc(sizeof(NameEntry));
751 
752  e->name = strdup_with_null(reg->enc, name, name_end);
753  if (IS_NULL(e->name)) {
754  xfree(e);
755  return ONIGERR_MEMORY;
756  }
757  onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
758  (HashDataType )e);
759 
760  e->name_len = name_end - name;
761  e->back_num = 0;
762  e->back_alloc = 0;
763  e->back_refs = (int* )NULL;
764 
765 #else
766 
767  if (IS_NULL(t)) {
768  alloc = INIT_NAMES_ALLOC_NUM;
769  t = (NameTable* )xmalloc(sizeof(NameTable));
771  t->e = NULL;
772  t->alloc = 0;
773  t->num = 0;
774 
775  t->e = (NameEntry* )xmalloc(sizeof(NameEntry) * alloc);
776  if (IS_NULL(t->e)) {
777  xfree(t);
778  return ONIGERR_MEMORY;
779  }
780  t->alloc = alloc;
781  reg->name_table = t;
782  goto clear;
783  }
784  else if (t->num == t->alloc) {
785  int i;
786  NameEntry* p;
787 
788  alloc = t->alloc * 2;
789  p = (NameEntry* )xrealloc(t->e, sizeof(NameEntry) * alloc);
791  t->e = p;
792  t->alloc = alloc;
793 
794  clear:
795  for (i = t->num; i < t->alloc; i++) {
796  t->e[i].name = NULL;
797  t->e[i].name_len = 0;
798  t->e[i].back_num = 0;
799  t->e[i].back_alloc = 0;
800  t->e[i].back_refs = (int* )NULL;
801  }
802  }
803  e = &(t->e[t->num]);
804  t->num++;
805  e->name = strdup_with_null(reg->enc, name, name_end);
806  if (IS_NULL(e->name)) return ONIGERR_MEMORY;
807  e->name_len = name_end - name;
808 #endif
809  }
810 
811  if (e->back_num >= 1 &&
814  name, name_end);
816  }
817 
818  e->back_num++;
819  if (e->back_num == 1) {
820  e->back_ref1 = backref;
821  }
822  else {
823  if (e->back_num == 2) {
825  e->back_refs = (int* )xmalloc(sizeof(int) * alloc);
827  e->back_alloc = alloc;
828  e->back_refs[0] = e->back_ref1;
829  e->back_refs[1] = backref;
830  }
831  else {
832  if (e->back_num > e->back_alloc) {
833  int* p;
834  alloc = e->back_alloc * 2;
835  p = (int* )xrealloc(e->back_refs, sizeof(int) * alloc);
837  e->back_refs = p;
838  e->back_alloc = alloc;
839  }
840  e->back_refs[e->back_num - 1] = backref;
841  }
842  }
843 
844  return 0;
845 }
846 
847 extern int
849  const UChar* name_end, int** nums)
850 {
851  NameEntry* e = name_find(reg, name, name_end);
852 
854 
855  switch (e->back_num) {
856  case 0:
857  *nums = 0;
858  break;
859  case 1:
860  *nums = &(e->back_ref1);
861  break;
862  default:
863  *nums = e->back_refs;
864  break;
865  }
866  return e->back_num;
867 }
868 
869 extern int
871  const UChar* name_end, OnigRegion *region)
872 {
873  int i, n, *nums;
874 
875  n = onig_name_to_group_numbers(reg, name, name_end, &nums);
876  if (n < 0)
877  return n;
878  else if (n == 0)
879  return ONIGERR_PARSER_BUG;
880  else if (n == 1)
881  return nums[0];
882  else {
883  if (IS_NOT_NULL(region)) {
884  for (i = n - 1; i >= 0; i--) {
885  if (region->beg[nums[i]] != ONIG_REGION_NOTPOS)
886  return nums[i];
887  }
888  }
889  return nums[n - 1];
890  }
891 }
892 
893 #else /* USE_NAMED_GROUP */
894 
895 extern int
896 onig_name_to_group_numbers(regex_t* reg, const UChar* name,
897  const UChar* name_end, int** nums)
898 {
899  return ONIG_NO_SUPPORT_CONFIG;
900 }
901 
902 extern int
903 onig_name_to_backref_number(regex_t* reg, const UChar* name,
904  const UChar* name_end, OnigRegion* region)
905 {
906  return ONIG_NO_SUPPORT_CONFIG;
907 }
908 
909 extern int
911  int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
912 {
913  return ONIG_NO_SUPPORT_CONFIG;
914 }
915 
916 extern int
918 {
919  return 0;
920 }
921 #endif /* else USE_NAMED_GROUP */
922 
923 extern int
925 {
927  return 0;
928 
929 #ifdef USE_NAMED_GROUP
930  if (onig_number_of_names(reg) > 0 &&
933  return 0;
934  }
935 #endif
936 
937  return 1;
938 }
939 
940 
941 #define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
942 
943 static void
945 {
946  int i;
947 
952  env->error = (UChar* )NULL;
953  env->error_end = (UChar* )NULL;
954  env->num_call = 0;
955  env->num_mem = 0;
956 #ifdef USE_NAMED_GROUP
957  env->num_named = 0;
958 #endif
959  env->mem_alloc = 0;
960  env->mem_nodes_dynamic = (Node** )NULL;
961 
962  for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
963  env->mem_nodes_static[i] = NULL_NODE;
964 
965 #ifdef USE_COMBINATION_EXPLOSION_CHECK
966  env->num_comb_exp_check = 0;
967  env->comb_exp_max_regnum = 0;
968  env->curr_max_regnum = 0;
969  env->has_recursion = 0;
970 #endif
971  env->warnings_flag = 0;
972 }
973 
974 static int
976 {
977  int i, need, alloc;
978  Node** p;
979 
980  need = env->num_mem + 1;
981  if (need >= SCANENV_MEMNODES_SIZE) {
982  if (env->mem_alloc <= need) {
983  if (IS_NULL(env->mem_nodes_dynamic)) {
985  p = (Node** )xmalloc(sizeof(Node*) * alloc);
986  xmemcpy(p, env->mem_nodes_static,
987  sizeof(Node*) * SCANENV_MEMNODES_SIZE);
988  }
989  else {
990  alloc = env->mem_alloc * 2;
991  p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
992  }
994 
995  for (i = env->num_mem + 1; i < alloc; i++)
996  p[i] = NULL_NODE;
997 
998  env->mem_nodes_dynamic = p;
999  env->mem_alloc = alloc;
1000  }
1001  }
1002 
1003  env->num_mem++;
1004  return env->num_mem;
1005 }
1006 
1007 static int
1009 {
1010  if (env->num_mem >= num)
1011  SCANENV_MEM_NODES(env)[num] = node;
1012  else
1013  return ONIGERR_PARSER_BUG;
1014  return 0;
1015 }
1016 
1017 
1018 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1019 typedef struct _FreeNode {
1020  struct _FreeNode* next;
1021 } FreeNode;
1022 
1024 #endif
1025 
1026 extern void
1028 {
1029  start:
1030  if (IS_NULL(node)) return ;
1031 
1032  switch (NTYPE(node)) {
1033  case NT_STR:
1034  if (NSTR(node)->capa != 0 &&
1035  IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1036  xfree(NSTR(node)->s);
1037  }
1038  break;
1039 
1040  case NT_LIST:
1041  case NT_ALT:
1042  onig_node_free(NCAR(node));
1043  {
1044  Node* next_node = NCDR(node);
1045 
1046 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1047  {
1048  FreeNode* n = (FreeNode* )node;
1049 
1051  n->next = FreeNodeList;
1052  FreeNodeList = n;
1054  }
1055 #else
1056  xfree(node);
1057 #endif
1058  node = next_node;
1059  goto start;
1060  }
1061  break;
1062 
1063  case NT_CCLASS:
1064  {
1065  CClassNode* cc = NCCLASS(node);
1066 
1067  if (IS_NCCLASS_SHARE(cc)) return ;
1068  if (cc->mbuf)
1069  bbuf_free(cc->mbuf);
1070  }
1071  break;
1072 
1073  case NT_QTFR:
1074  if (NQTFR(node)->target)
1075  onig_node_free(NQTFR(node)->target);
1076  break;
1077 
1078  case NT_ENCLOSE:
1079  if (NENCLOSE(node)->target)
1080  onig_node_free(NENCLOSE(node)->target);
1081  break;
1082 
1083  case NT_BREF:
1084  if (IS_NOT_NULL(NBREF(node)->back_dynamic))
1085  xfree(NBREF(node)->back_dynamic);
1086  break;
1087 
1088  case NT_ANCHOR:
1089  if (NANCHOR(node)->target)
1090  onig_node_free(NANCHOR(node)->target);
1091  break;
1092  }
1093 
1094 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1095  {
1096  FreeNode* n = (FreeNode* )node;
1097 
1099  n->next = FreeNodeList;
1100  FreeNodeList = n;
1102  }
1103 #else
1104  xfree(node);
1105 #endif
1106 }
1107 
1108 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1109 extern int
1111 {
1112  FreeNode* n;
1113 
1114  /* THREAD_ATOMIC_START; */
1115  while (IS_NOT_NULL(FreeNodeList)) {
1116  n = FreeNodeList;
1117  FreeNodeList = FreeNodeList->next;
1118  xfree(n);
1119  }
1120  /* THREAD_ATOMIC_END; */
1121  return 0;
1122 }
1123 #endif
1124 
1125 static Node*
1127 {
1128  Node* node;
1129 
1130 #ifdef USE_PARSE_TREE_NODE_RECYCLE
1132  if (IS_NOT_NULL(FreeNodeList)) {
1133  node = (Node* )FreeNodeList;
1134  FreeNodeList = FreeNodeList->next;
1136  return node;
1137  }
1139 #endif
1140 
1141  node = (Node* )xmalloc(sizeof(Node));
1142  /* xmemset(node, 0, sizeof(Node)); */
1143  return node;
1144 }
1145 
1146 
1147 static void
1149 {
1150  BITSET_CLEAR(cc->bs);
1151  /* cc->base.flags = 0; */
1152  cc->flags = 0;
1153  cc->mbuf = NULL;
1154 }
1155 
1156 static Node*
1158 {
1159  Node* node = node_new();
1160  CHECK_NULL_RETURN(node);
1161 
1162  SET_NTYPE(node, NT_CCLASS);
1163  initialize_cclass(NCCLASS(node));
1164  return node;
1165 }
1166 
1167 static Node*
1169  const OnigCodePoint ranges[])
1170 {
1171  int n, i;
1172  CClassNode* cc;
1173  OnigCodePoint j;
1174 
1175  Node* node = node_new_cclass();
1176  CHECK_NULL_RETURN(node);
1177 
1178  cc = NCCLASS(node);
1179  if (not != 0) NCCLASS_SET_NOT(cc);
1180 
1181  BITSET_CLEAR(cc->bs);
1182  if (sb_out > 0 && IS_NOT_NULL(ranges)) {
1183  n = ONIGENC_CODE_RANGE_NUM(ranges);
1184  for (i = 0; i < n; i++) {
1185  for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
1186  j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
1187  if (j >= sb_out) goto sb_end;
1188 
1189  BITSET_SET_BIT(cc->bs, j);
1190  }
1191  }
1192  }
1193 
1194  sb_end:
1195  if (IS_NULL(ranges)) {
1196  is_null:
1197  cc->mbuf = NULL;
1198  }
1199  else {
1200  BBuf* bbuf;
1201 
1202  n = ONIGENC_CODE_RANGE_NUM(ranges);
1203  if (n == 0) goto is_null;
1204 
1205  bbuf = (BBuf* )xmalloc(sizeof(BBuf));
1206  CHECK_NULL_RETURN(bbuf);
1207  bbuf->alloc = n + 1;
1208  bbuf->used = n + 1;
1209  bbuf->p = (UChar* )((void* )ranges);
1210 
1211  cc->mbuf = bbuf;
1212  }
1213 
1214  return node;
1215 }
1216 
1217 static Node*
1218 node_new_ctype(int type, int not, int ascii_range)
1219 {
1220  Node* node = node_new();
1221  CHECK_NULL_RETURN(node);
1222 
1223  SET_NTYPE(node, NT_CTYPE);
1224  NCTYPE(node)->ctype = type;
1225  NCTYPE(node)->not = not;
1226  NCTYPE(node)->ascii_range = ascii_range;
1227  return node;
1228 }
1229 
1230 static Node*
1232 {
1233  Node* node = node_new();
1234  CHECK_NULL_RETURN(node);
1235 
1236  SET_NTYPE(node, NT_CANY);
1237  return node;
1238 }
1239 
1240 static Node*
1241 node_new_list(Node* left, Node* right)
1242 {
1243  Node* node = node_new();
1244  CHECK_NULL_RETURN(node);
1245 
1246  SET_NTYPE(node, NT_LIST);
1247  NCAR(node) = left;
1248  NCDR(node) = right;
1249  return node;
1250 }
1251 
1252 extern Node*
1254 {
1255  return node_new_list(left, right);
1256 }
1257 
1258 extern Node*
1260 {
1261  Node *n;
1262 
1263  n = onig_node_new_list(x, NULL);
1264  if (IS_NULL(n)) return NULL_NODE;
1265 
1266  if (IS_NOT_NULL(list)) {
1267  while (IS_NOT_NULL(NCDR(list)))
1268  list = NCDR(list);
1269 
1270  NCDR(list) = n;
1271  }
1272 
1273  return n;
1274 }
1275 
1276 extern Node*
1278 {
1279  Node* node = node_new();
1280  CHECK_NULL_RETURN(node);
1281 
1282  SET_NTYPE(node, NT_ALT);
1283  NCAR(node) = left;
1284  NCDR(node) = right;
1285  return node;
1286 }
1287 
1288 extern Node*
1290 {
1291  Node* node = node_new();
1292  CHECK_NULL_RETURN(node);
1293 
1294  SET_NTYPE(node, NT_ANCHOR);
1295  NANCHOR(node)->type = type;
1296  NANCHOR(node)->target = NULL;
1297  NANCHOR(node)->char_len = -1;
1298  NANCHOR(node)->ascii_range = 0;
1299  return node;
1300 }
1301 
1302 static Node*
1303 node_new_backref(int back_num, int* backrefs, int by_name,
1305  int exist_level, int nest_level,
1306 #endif
1307  ScanEnv* env)
1308 {
1309  int i;
1310  Node* node = node_new();
1311 
1312  CHECK_NULL_RETURN(node);
1313 
1314  SET_NTYPE(node, NT_BREF);
1315  NBREF(node)->state = 0;
1316  NBREF(node)->back_num = back_num;
1317  NBREF(node)->back_dynamic = (int* )NULL;
1318  if (by_name != 0)
1319  NBREF(node)->state |= NST_NAME_REF;
1320 
1321 #ifdef USE_BACKREF_WITH_LEVEL
1322  if (exist_level != 0) {
1323  NBREF(node)->state |= NST_NEST_LEVEL;
1324  NBREF(node)->nest_level = nest_level;
1325  }
1326 #endif
1327 
1328  for (i = 0; i < back_num; i++) {
1329  if (backrefs[i] <= env->num_mem &&
1330  IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
1331  NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
1332  break;
1333  }
1334  }
1335 
1336  if (back_num <= NODE_BACKREFS_SIZE) {
1337  for (i = 0; i < back_num; i++)
1338  NBREF(node)->back_static[i] = backrefs[i];
1339  }
1340  else {
1341  int* p = (int* )xmalloc(sizeof(int) * back_num);
1342  if (IS_NULL(p)) {
1343  onig_node_free(node);
1344  return NULL;
1345  }
1346  NBREF(node)->back_dynamic = p;
1347  for (i = 0; i < back_num; i++)
1348  p[i] = backrefs[i];
1349  }
1350  return node;
1351 }
1352 
1353 #ifdef USE_SUBEXP_CALL
1354 static Node*
1355 node_new_call(UChar* name, UChar* name_end, int gnum)
1356 {
1357  Node* node = node_new();
1358  CHECK_NULL_RETURN(node);
1359 
1360  SET_NTYPE(node, NT_CALL);
1361  NCALL(node)->state = 0;
1362  NCALL(node)->target = NULL_NODE;
1363  NCALL(node)->name = name;
1364  NCALL(node)->name_end = name_end;
1365  NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
1366  return node;
1367 }
1368 #endif
1369 
1370 static Node*
1371 node_new_quantifier(int lower, int upper, int by_number)
1372 {
1373  Node* node = node_new();
1374  CHECK_NULL_RETURN(node);
1375 
1376  SET_NTYPE(node, NT_QTFR);
1377  NQTFR(node)->state = 0;
1378  NQTFR(node)->target = NULL;
1379  NQTFR(node)->lower = lower;
1380  NQTFR(node)->upper = upper;
1381  NQTFR(node)->greedy = 1;
1382  NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
1383  NQTFR(node)->head_exact = NULL_NODE;
1384  NQTFR(node)->next_head_exact = NULL_NODE;
1385  NQTFR(node)->is_refered = 0;
1386  if (by_number != 0)
1387  NQTFR(node)->state |= NST_BY_NUMBER;
1388 
1389 #ifdef USE_COMBINATION_EXPLOSION_CHECK
1390  NQTFR(node)->comb_exp_check_num = 0;
1391 #endif
1392 
1393  return node;
1394 }
1395 
1396 static Node*
1398 {
1399  Node* node = node_new();
1400  CHECK_NULL_RETURN(node);
1401 
1402  SET_NTYPE(node, NT_ENCLOSE);
1403  NENCLOSE(node)->type = type;
1404  NENCLOSE(node)->state = 0;
1405  NENCLOSE(node)->regnum = 0;
1406  NENCLOSE(node)->option = 0;
1407  NENCLOSE(node)->target = NULL;
1408  NENCLOSE(node)->call_addr = -1;
1409  NENCLOSE(node)->opt_count = 0;
1410  return node;
1411 }
1412 
1413 extern Node*
1415 {
1416  return node_new_enclose(type);
1417 }
1418 
1419 static Node*
1421 {
1423  CHECK_NULL_RETURN(node);
1424  if (is_named != 0)
1426 
1427 #ifdef USE_SUBEXP_CALL
1428  NENCLOSE(node)->option = option;
1429 #endif
1430  return node;
1431 }
1432 
1433 static Node*
1435 {
1437  CHECK_NULL_RETURN(node);
1438  NENCLOSE(node)->option = option;
1439  return node;
1440 }
1441 
1442 extern int
1443 onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
1444 {
1445  ptrdiff_t addlen = end - s;
1446 
1447  if (addlen > 0) {
1448  ptrdiff_t len = NSTR(node)->end - NSTR(node)->s;
1449 
1450  if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
1451  UChar* p;
1452  ptrdiff_t capa = len + addlen + NODE_STR_MARGIN;
1453 
1454  if (capa <= NSTR(node)->capa) {
1455  onig_strcpy(NSTR(node)->s + len, s, end);
1456  }
1457  else {
1458  if (NSTR(node)->s == NSTR(node)->buf)
1459  p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
1460  s, end, capa);
1461  else
1462  p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
1463 
1465  NSTR(node)->s = p;
1466  NSTR(node)->capa = (int )capa;
1467  }
1468  }
1469  else {
1470  onig_strcpy(NSTR(node)->s + len, s, end);
1471  }
1472  NSTR(node)->end = NSTR(node)->s + len + addlen;
1473  }
1474 
1475  return 0;
1476 }
1477 
1478 extern int
1479 onig_node_str_set(Node* node, const UChar* s, const UChar* end)
1480 {
1481  onig_node_str_clear(node);
1482  return onig_node_str_cat(node, s, end);
1483 }
1484 
1485 static int
1487 {
1488  UChar s[1];
1489 
1490  s[0] = c;
1491  return onig_node_str_cat(node, s, s + 1);
1492 }
1493 
1494 static int
1496 {
1498  int num = ONIGENC_CODE_TO_MBC(enc, c, buf);
1499  if (num < 0) return num;
1500  return onig_node_str_cat(node, buf, buf + num);
1501 }
1502 
1503 extern void
1505 {
1506  SET_NTYPE(node, NT_STR);
1507  NSTR(node)->flag = flag;
1508  NSTR(node)->capa = 0;
1509  NSTR(node)->s = NSTR(node)->buf;
1510  NSTR(node)->end = NSTR(node)->buf;
1511 }
1512 
1513 extern void
1515 {
1516  if (NSTR(node)->capa != 0 &&
1517  IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
1518  xfree(NSTR(node)->s);
1519  }
1520 
1521  NSTR(node)->capa = 0;
1522  NSTR(node)->flag = 0;
1523  NSTR(node)->s = NSTR(node)->buf;
1524  NSTR(node)->end = NSTR(node)->buf;
1525 }
1526 
1527 static Node*
1528 node_new_str(const UChar* s, const UChar* end)
1529 {
1530  Node* node = node_new();
1531  CHECK_NULL_RETURN(node);
1532 
1533  SET_NTYPE(node, NT_STR);
1534  NSTR(node)->capa = 0;
1535  NSTR(node)->flag = 0;
1536  NSTR(node)->s = NSTR(node)->buf;
1537  NSTR(node)->end = NSTR(node)->buf;
1538  if (onig_node_str_cat(node, s, end)) {
1539  onig_node_free(node);
1540  return NULL;
1541  }
1542  return node;
1543 }
1544 
1545 extern Node*
1547 {
1548  return node_new_str(s, end);
1549 }
1550 
1551 static Node*
1553 {
1554  Node* node = node_new_str(s, end);
1555  if (IS_NOT_NULL(node))
1556  NSTRING_SET_RAW(node);
1557  return node;
1558 }
1559 
1560 static Node*
1562 {
1563  return node_new_str(NULL, NULL);
1564 }
1565 
1566 static Node*
1568 {
1569  UChar p[1];
1570 
1571  p[0] = c;
1572  return node_new_str_raw(p, p + 1);
1573 }
1574 
1575 static Node*
1577 {
1578  const UChar *p;
1579  Node* n = NULL_NODE;
1580 
1581  if (sn->end > sn->s) {
1582  p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end);
1583  if (p && p > sn->s) { /* can be split. */
1584  n = node_new_str(p, sn->end);
1585  if (IS_NOT_NULL(n) && (sn->flag & NSTR_RAW) != 0)
1586  NSTRING_SET_RAW(n);
1587  sn->end = (UChar* )p;
1588  }
1589  }
1590  return n;
1591 }
1592 
1593 static int
1595 {
1596  if (sn->end > sn->s) {
1597  return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
1598  }
1599  return 0;
1600 }
1601 
1602 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
1603 static int
1604 node_str_head_pad(StrNode* sn, int num, UChar val)
1605 {
1607  int i, len;
1608 
1609  len = sn->end - sn->s;
1610  onig_strcpy(buf, sn->s, sn->end);
1611  onig_strcpy(&(sn->s[num]), buf, buf + len);
1612  sn->end += num;
1613 
1614  for (i = 0; i < num; i++) {
1615  sn->s[i] = val;
1616  }
1617 }
1618 #endif
1619 
1620 extern int
1622 {
1623  unsigned int num, val;
1624  OnigCodePoint c;
1625  UChar* p = *src;
1626  PFETCH_READY;
1627 
1628  num = 0;
1629  while (!PEND) {
1630  PFETCH(c);
1631  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
1632  val = (unsigned int )DIGITVAL(c);
1633  if ((INT_MAX_LIMIT - val) / 10UL < num)
1634  return -1; /* overflow */
1635 
1636  num = num * 10 + val;
1637  }
1638  else {
1639  PUNFETCH;
1640  break;
1641  }
1642  }
1643  *src = p;
1644  return num;
1645 }
1646 
1647 static int
1649  int maxlen, OnigEncoding enc)
1650 {
1651  OnigCodePoint c;
1652  unsigned int num, val;
1653  int restlen;
1654  UChar* p = *src;
1655  PFETCH_READY;
1656 
1657  restlen = maxlen - minlen;
1658  num = 0;
1659  while (!PEND && maxlen-- != 0) {
1660  PFETCH(c);
1661  if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
1662  val = (unsigned int )XDIGITVAL(enc,c);
1663  if ((INT_MAX_LIMIT - val) / 16UL < num)
1664  return -1; /* overflow */
1665 
1666  num = (num << 4) + XDIGITVAL(enc,c);
1667  }
1668  else {
1669  PUNFETCH;
1670  break;
1671  }
1672  }
1673  if (maxlen > restlen)
1674  return -2; /* not enough digits */
1675  *src = p;
1676  return num;
1677 }
1678 
1679 static int
1681  OnigEncoding enc)
1682 {
1683  OnigCodePoint c;
1684  unsigned int num, val;
1685  UChar* p = *src;
1686  PFETCH_READY;
1687 
1688  num = 0;
1689  while (!PEND && maxlen-- != 0) {
1690  PFETCH(c);
1691  if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
1692  val = ODIGITVAL(c);
1693  if ((INT_MAX_LIMIT - val) / 8UL < num)
1694  return -1; /* overflow */
1695 
1696  num = (num << 3) + val;
1697  }
1698  else {
1699  PUNFETCH;
1700  break;
1701  }
1702  }
1703  *src = p;
1704  return num;
1705 }
1706 
1707 
1708 #define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
1709  BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
1710 
1711 /* data format:
1712  [n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
1713  (all data size is OnigCodePoint)
1714  */
1715 static int
1717 {
1718 #define INIT_MULTI_BYTE_RANGE_SIZE (SIZE_CODE_POINT * 5)
1719  int r;
1720  OnigCodePoint n;
1721  BBuf* bbuf;
1722 
1723  bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
1724  CHECK_NULL_RETURN_MEMERR(*pbuf);
1726  if (r) return r;
1727 
1728  n = 0;
1729  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1730  return 0;
1731 }
1732 
1733 static int
1735  int checkdup)
1736 {
1737  int r, inc_n, pos;
1738  OnigCodePoint low, high, bound, x;
1739  OnigCodePoint n, *data;
1740  BBuf* bbuf;
1741 
1742  if (from > to) {
1743  n = from; from = to; to = n;
1744  }
1745 
1746  if (IS_NULL(*pbuf)) {
1747  r = new_code_range(pbuf);
1748  if (r) return r;
1749  bbuf = *pbuf;
1750  n = 0;
1751  }
1752  else {
1753  bbuf = *pbuf;
1754  GET_CODE_POINT(n, bbuf->p);
1755  }
1756  data = (OnigCodePoint* )(bbuf->p);
1757  data++;
1758 
1759  bound = (from == 0) ? 0 : n;
1760  for (low = 0; low < bound; ) {
1761  x = (low + bound) >> 1;
1762  if (from - 1 > data[x*2 + 1])
1763  low = x + 1;
1764  else
1765  bound = x;
1766  }
1767 
1768  high = (to == ONIG_LAST_CODE_POINT) ? n : low;
1769  for (bound = n; high < bound; ) {
1770  x = (high + bound) >> 1;
1771  if (to + 1 >= data[x*2])
1772  high = x + 1;
1773  else
1774  bound = x;
1775  }
1776  /* data[(low-1)*2+1] << from <= data[low*2]
1777  * data[(high-1)*2+1] <= to << data[high*2]
1778  */
1779 
1780  inc_n = low + 1 - high;
1781  if (n + inc_n > ONIG_MAX_MULTI_BYTE_RANGES_NUM)
1783 
1784  if (inc_n != 1) {
1785  if (checkdup && from <= data[low*2+1]
1786  && (data[low*2] <= from || data[low*2+1] <= to))
1787  CC_DUP_WARN(env);
1788  if (from > data[low*2])
1789  from = data[low*2];
1790  if (to < data[(high - 1)*2 + 1])
1791  to = data[(high - 1)*2 + 1];
1792  }
1793 
1794  if (inc_n != 0) {
1795  int from_pos = SIZE_CODE_POINT * (1 + high * 2);
1796  int to_pos = SIZE_CODE_POINT * (1 + (low + 1) * 2);
1797 
1798  if (inc_n > 0) {
1799  if (high < n) {
1800  int size = (n - high) * 2 * SIZE_CODE_POINT;
1801  BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
1802  }
1803  }
1804  else {
1805  BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
1806  }
1807  }
1808 
1809  pos = SIZE_CODE_POINT * (1 + low * 2);
1810  BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
1811  BBUF_WRITE_CODE_POINT(bbuf, pos, from);
1812  BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
1813  n += inc_n;
1814  BBUF_WRITE_CODE_POINT(bbuf, 0, n);
1815 
1816  return 0;
1817 }
1818 
1819 static int
1821 {
1822  return add_code_range_to_buf0(pbuf, env, from, to, 1);
1823 }
1824 
1825 static int
1826 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup)
1827 {
1828  if (from > to) {
1830  return 0;
1831  else
1833  }
1834 
1835  return add_code_range_to_buf0(pbuf, env, from, to, checkdup);
1836 }
1837 
1838 static int
1840 {
1841  return add_code_range0(pbuf, env, from, to, 1);
1842 }
1843 
1844 static int
1846 {
1847  int r, i, n;
1848  OnigCodePoint pre, from, *data, to = 0;
1849 
1850  *pbuf = (BBuf* )NULL;
1851  if (IS_NULL(bbuf)) {
1852  set_all:
1853  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1854  }
1855 
1856  data = (OnigCodePoint* )(bbuf->p);
1857  GET_CODE_POINT(n, data);
1858  data++;
1859  if (n <= 0) goto set_all;
1860 
1861  r = 0;
1862  pre = MBCODE_START_POS(enc);
1863  for (i = 0; i < n; i++) {
1864  from = data[i*2];
1865  to = data[i*2+1];
1866  if (pre <= from - 1) {
1867  r = add_code_range_to_buf(pbuf, env, pre, from - 1);
1868  if (r != 0) return r;
1869  }
1870  if (to == ONIG_LAST_CODE_POINT) break;
1871  pre = to + 1;
1872  }
1873  if (to < ONIG_LAST_CODE_POINT) {
1874  r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT);
1875  }
1876  return r;
1877 }
1878 
1879 #define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
1880  BBuf *tbuf; \
1881  int tnot; \
1882  tnot = not1; not1 = not2; not2 = tnot; \
1883  tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; \
1884 } while (0)
1885 
1886 static int
1888  BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1889 {
1890  int r;
1891  OnigCodePoint i, n1, *data1;
1892  OnigCodePoint from, to;
1893 
1894  *pbuf = (BBuf* )NULL;
1895  if (IS_NULL(bbuf1) && IS_NULL(bbuf2)) {
1896  if (not1 != 0 || not2 != 0)
1897  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1898  return 0;
1899  }
1900 
1901  r = 0;
1902  if (IS_NULL(bbuf2))
1903  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1904 
1905  if (IS_NULL(bbuf1)) {
1906  if (not1 != 0) {
1907  return SET_ALL_MULTI_BYTE_RANGE(enc, pbuf);
1908  }
1909  else {
1910  if (not2 == 0) {
1911  return bbuf_clone(pbuf, bbuf2);
1912  }
1913  else {
1914  return not_code_range_buf(enc, bbuf2, pbuf, env);
1915  }
1916  }
1917  }
1918 
1919  if (not1 != 0)
1920  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
1921 
1922  data1 = (OnigCodePoint* )(bbuf1->p);
1923  GET_CODE_POINT(n1, data1);
1924  data1++;
1925 
1926  if (not2 == 0 && not1 == 0) { /* 1 OR 2 */
1927  r = bbuf_clone(pbuf, bbuf2);
1928  }
1929  else if (not1 == 0) { /* 1 OR (not 2) */
1930  r = not_code_range_buf(enc, bbuf2, pbuf, env);
1931  }
1932  if (r != 0) return r;
1933 
1934  for (i = 0; i < n1; i++) {
1935  from = data1[i*2];
1936  to = data1[i*2+1];
1937  r = add_code_range_to_buf(pbuf, env, from, to);
1938  if (r != 0) return r;
1939  }
1940  return 0;
1941 }
1942 
1943 static int
1945  OnigCodePoint* data, int n)
1946 {
1947  int i, r;
1948  OnigCodePoint from2, to2;
1949 
1950  for (i = 0; i < n; i++) {
1951  from2 = data[i*2];
1952  to2 = data[i*2+1];
1953  if (from2 < from1) {
1954  if (to2 < from1) continue;
1955  else {
1956  from1 = to2 + 1;
1957  }
1958  }
1959  else if (from2 <= to1) {
1960  if (to2 < to1) {
1961  if (from1 <= from2 - 1) {
1962  r = add_code_range_to_buf(pbuf, env, from1, from2-1);
1963  if (r != 0) return r;
1964  }
1965  from1 = to2 + 1;
1966  }
1967  else {
1968  to1 = from2 - 1;
1969  }
1970  }
1971  else {
1972  from1 = from2;
1973  }
1974  if (from1 > to1) break;
1975  }
1976  if (from1 <= to1) {
1977  r = add_code_range_to_buf(pbuf, env, from1, to1);
1978  if (r != 0) return r;
1979  }
1980  return 0;
1981 }
1982 
1983 static int
1984 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env)
1985 {
1986  int r;
1987  OnigCodePoint i, j, n1, n2, *data1, *data2;
1988  OnigCodePoint from, to, from1, to1, from2, to2;
1989 
1990  *pbuf = (BBuf* )NULL;
1991  if (IS_NULL(bbuf1)) {
1992  if (not1 != 0 && IS_NOT_NULL(bbuf2)) /* not1 != 0 -> not2 == 0 */
1993  return bbuf_clone(pbuf, bbuf2);
1994  return 0;
1995  }
1996  else if (IS_NULL(bbuf2)) {
1997  if (not2 != 0)
1998  return bbuf_clone(pbuf, bbuf1);
1999  return 0;
2000  }
2001 
2002  if (not1 != 0)
2003  SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
2004 
2005  data1 = (OnigCodePoint* )(bbuf1->p);
2006  data2 = (OnigCodePoint* )(bbuf2->p);
2007  GET_CODE_POINT(n1, data1);
2008  GET_CODE_POINT(n2, data2);
2009  data1++;
2010  data2++;
2011 
2012  if (not2 == 0 && not1 == 0) { /* 1 AND 2 */
2013  for (i = 0; i < n1; i++) {
2014  from1 = data1[i*2];
2015  to1 = data1[i*2+1];
2016  for (j = 0; j < n2; j++) {
2017  from2 = data2[j*2];
2018  to2 = data2[j*2+1];
2019  if (from2 > to1) break;
2020  if (to2 < from1) continue;
2021  from = MAX(from1, from2);
2022  to = MIN(to1, to2);
2023  r = add_code_range_to_buf(pbuf, env, from, to);
2024  if (r != 0) return r;
2025  }
2026  }
2027  }
2028  else if (not1 == 0) { /* 1 AND (not 2) */
2029  for (i = 0; i < n1; i++) {
2030  from1 = data1[i*2];
2031  to1 = data1[i*2+1];
2032  r = and_code_range1(pbuf, env, from1, to1, data2, n2);
2033  if (r != 0) return r;
2034  }
2035  }
2036 
2037  return 0;
2038 }
2039 
2040 static int
2042 {
2043  OnigEncoding enc = env->enc;
2044  int r, not1, not2;
2045  BBuf *buf1, *buf2, *pbuf = 0;
2046  BitSetRef bsr1, bsr2;
2047  BitSet bs1, bs2;
2048 
2049  not1 = IS_NCCLASS_NOT(dest);
2050  bsr1 = dest->bs;
2051  buf1 = dest->mbuf;
2052  not2 = IS_NCCLASS_NOT(cc);
2053  bsr2 = cc->bs;
2054  buf2 = cc->mbuf;
2055 
2056  if (not1 != 0) {
2057  bitset_invert_to(bsr1, bs1);
2058  bsr1 = bs1;
2059  }
2060  if (not2 != 0) {
2061  bitset_invert_to(bsr2, bs2);
2062  bsr2 = bs2;
2063  }
2064  bitset_and(bsr1, bsr2);
2065  if (bsr1 != dest->bs) {
2066  bitset_copy(dest->bs, bsr1);
2067  bsr1 = dest->bs;
2068  }
2069  if (not1 != 0) {
2070  bitset_invert(dest->bs);
2071  }
2072 
2073  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2074  if (not1 != 0 && not2 != 0) {
2075  r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env);
2076  }
2077  else {
2078  r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env);
2079  if (r == 0 && not1 != 0) {
2080  BBuf *tbuf = 0;
2081  r = not_code_range_buf(enc, pbuf, &tbuf, env);
2082  bbuf_free(pbuf);
2083  pbuf = tbuf;
2084  }
2085  }
2086  if (r != 0) {
2087  bbuf_free(pbuf);
2088  return r;
2089  }
2090 
2091  dest->mbuf = pbuf;
2092  bbuf_free(buf1);
2093  return r;
2094  }
2095  return 0;
2096 }
2097 
2098 static int
2100 {
2101  OnigEncoding enc = env->enc;
2102  int r, not1, not2;
2103  BBuf *buf1, *buf2, *pbuf = 0;
2104  BitSetRef bsr1, bsr2;
2105  BitSet bs1, bs2;
2106 
2107  not1 = IS_NCCLASS_NOT(dest);
2108  bsr1 = dest->bs;
2109  buf1 = dest->mbuf;
2110  not2 = IS_NCCLASS_NOT(cc);
2111  bsr2 = cc->bs;
2112  buf2 = cc->mbuf;
2113 
2114  if (not1 != 0) {
2115  bitset_invert_to(bsr1, bs1);
2116  bsr1 = bs1;
2117  }
2118  if (not2 != 0) {
2119  bitset_invert_to(bsr2, bs2);
2120  bsr2 = bs2;
2121  }
2122  bitset_or(bsr1, bsr2);
2123  if (bsr1 != dest->bs) {
2124  bitset_copy(dest->bs, bsr1);
2125  bsr1 = dest->bs;
2126  }
2127  if (not1 != 0) {
2128  bitset_invert(dest->bs);
2129  }
2130 
2131  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
2132  if (not1 != 0 && not2 != 0) {
2133  r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env);
2134  }
2135  else {
2136  r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env);
2137  if (r == 0 && not1 != 0) {
2138  BBuf *tbuf = 0;
2139  r = not_code_range_buf(enc, pbuf, &tbuf, env);
2140  bbuf_free(pbuf);
2141  pbuf = tbuf;
2142  }
2143  }
2144  if (r != 0) {
2145  bbuf_free(pbuf);
2146  return r;
2147  }
2148 
2149  dest->mbuf = pbuf;
2150  bbuf_free(buf1);
2151  return r;
2152  }
2153  else
2154  return 0;
2155 }
2156 
2157 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
2158 
2159 static int
2161 {
2163  switch (c) {
2164  case 'n': return '\n';
2165  case 't': return '\t';
2166  case 'r': return '\r';
2167  case 'f': return '\f';
2168  case 'a': return '\007';
2169  case 'b': return '\010';
2170  case 'e': return '\033';
2171  case 'v':
2173  return '\v';
2174  break;
2175 
2176  default:
2177  if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
2178  UNKNOWN_ESC_WARN(env, c);
2179  break;
2180  }
2181  }
2182  return c;
2183 }
2184 
2185 #ifdef USE_NO_INVALID_QUANTIFIER
2186 #define is_invalid_quantifier_target(node) 0
2187 #else
2188 static int
2190 {
2191  switch (NTYPE(node)) {
2192  case NT_ANCHOR:
2193  return 1;
2194  break;
2195 
2196  case NT_ENCLOSE:
2197  /* allow enclosed elements */
2198  /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
2199  break;
2200 
2201  case NT_LIST:
2202  do {
2203  if (! is_invalid_quantifier_target(NCAR(node))) return 0;
2204  } while (IS_NOT_NULL(node = NCDR(node)));
2205  return 0;
2206  break;
2207 
2208  case NT_ALT:
2209  do {
2210  if (is_invalid_quantifier_target(NCAR(node))) return 1;
2211  } while (IS_NOT_NULL(node = NCDR(node)));
2212  break;
2213 
2214  default:
2215  break;
2216  }
2217  return 0;
2218 }
2219 #endif
2220 
2221 /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
2222 static int
2224 {
2225  if (q->greedy) {
2226  if (q->lower == 0) {
2227  if (q->upper == 1) return 0;
2228  else if (IS_REPEAT_INFINITE(q->upper)) return 1;
2229  }
2230  else if (q->lower == 1) {
2231  if (IS_REPEAT_INFINITE(q->upper)) return 2;
2232  }
2233  }
2234  else {
2235  if (q->lower == 0) {
2236  if (q->upper == 1) return 3;
2237  else if (IS_REPEAT_INFINITE(q->upper)) return 4;
2238  }
2239  else if (q->lower == 1) {
2240  if (IS_REPEAT_INFINITE(q->upper)) return 5;
2241  }
2242  }
2243  return -1;
2244 }
2245 
2246 
2248  RQ_ASIS = 0, /* as is */
2249  RQ_DEL = 1, /* delete parent */
2250  RQ_A, /* to '*' */
2251  RQ_AQ, /* to '*?' */
2252  RQ_QQ, /* to '??' */
2253  RQ_P_QQ, /* to '+)??' */
2254  RQ_PQ_Q /* to '+?)?' */
2255 };
2256 
2257 static enum ReduceType const ReduceTypeTable[6][6] = {
2258  {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
2259  {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
2260  {RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
2261  {RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL, RQ_AQ, RQ_AQ}, /* '??' */
2262  {RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL, RQ_DEL}, /* '*?' */
2263  {RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
2264 };
2265 
2266 extern void
2268 {
2269  int pnum, cnum;
2270  QtfrNode *p, *c;
2271 
2272  p = NQTFR(pnode);
2273  c = NQTFR(cnode);
2274  pnum = popular_quantifier_num(p);
2275  cnum = popular_quantifier_num(c);
2276  if (pnum < 0 || cnum < 0) return ;
2277 
2278  switch (ReduceTypeTable[cnum][pnum]) {
2279  case RQ_DEL:
2280  *pnode = *cnode;
2281  break;
2282  case RQ_A:
2283  p->target = c->target;
2284  p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
2285  break;
2286  case RQ_AQ:
2287  p->target = c->target;
2288  p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
2289  break;
2290  case RQ_QQ:
2291  p->target = c->target;
2292  p->lower = 0; p->upper = 1; p->greedy = 0;
2293  break;
2294  case RQ_P_QQ:
2295  p->target = cnode;
2296  p->lower = 0; p->upper = 1; p->greedy = 0;
2297  c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
2298  return ;
2299  break;
2300  case RQ_PQ_Q:
2301  p->target = cnode;
2302  p->lower = 0; p->upper = 1; p->greedy = 1;
2303  c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
2304  return ;
2305  break;
2306  case RQ_ASIS:
2307  p->target = cnode;
2308  return ;
2309  break;
2310  }
2311 
2312  c->target = NULL_NODE;
2313  onig_node_free(cnode);
2314 }
2315 
2316 
2318  TK_EOT = 0, /* end of token */
2330  TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
2336  TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
2340  /* in cc */
2344  TK_CC_AND, /* && */
2346 };
2347 
2348 typedef struct {
2350  int escaped;
2351  int base; /* is number: 8, 16 (used in [....]) */
2353  union {
2355  int c;
2357  struct {
2358  int subtype;
2360  } anchor;
2361  struct {
2362  int lower;
2363  int upper;
2364  int greedy;
2366  } repeat;
2367  struct {
2368  int num;
2369  int ref1;
2370  int* refs;
2371  int by_name;
2372 #ifdef USE_BACKREF_WITH_LEVEL
2374  int level; /* \k<name+n> */
2375 #endif
2376  } backref;
2377  struct {
2380  int gnum;
2381  int rel;
2382  } call;
2383  struct {
2384  int ctype;
2385  int not;
2386  } prop;
2387  } u;
2388 } OnigToken;
2389 
2390 
2391 static int
2393 {
2394  int low, up, syn_allow, non_low = 0;
2395  int r = 0;
2396  OnigCodePoint c;
2397  OnigEncoding enc = env->enc;
2398  UChar* p = *src;
2399  PFETCH_READY;
2400 
2402 
2403  if (PEND) {
2404  if (syn_allow)
2405  return 1; /* "....{" : OK! */
2406  else
2407  return ONIGERR_END_PATTERN_AT_LEFT_BRACE; /* "....{" syntax error */
2408  }
2409 
2410  if (! syn_allow) {
2411  c = PPEEK;
2412  if (c == ')' || c == '(' || c == '|') {
2414  }
2415  }
2416 
2417  low = onig_scan_unsigned_number(&p, end, env->enc);
2418  if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2419  if (low > ONIG_MAX_REPEAT_NUM)
2421 
2422  if (p == *src) { /* can't read low */
2424  /* allow {,n} as {0,n} */
2425  low = 0;
2426  non_low = 1;
2427  }
2428  else
2429  goto invalid;
2430  }
2431 
2432  if (PEND) goto invalid;
2433  PFETCH(c);
2434  if (c == ',') {
2435  UChar* prev = p;
2436  up = onig_scan_unsigned_number(&p, end, env->enc);
2437  if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
2438  if (up > ONIG_MAX_REPEAT_NUM)
2440 
2441  if (p == prev) {
2442  if (non_low != 0)
2443  goto invalid;
2444  up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
2445  }
2446  }
2447  else {
2448  if (non_low != 0)
2449  goto invalid;
2450 
2451  PUNFETCH;
2452  up = low; /* {n} : exact n times */
2453  r = 2; /* fixed */
2454  }
2455 
2456  if (PEND) goto invalid;
2457  PFETCH(c);
2459  if (c != MC_ESC(env->syntax)) goto invalid;
2460  PFETCH(c);
2461  }
2462  if (c != '}') goto invalid;
2463 
2464  if (!IS_REPEAT_INFINITE(up) && low > up) {
2466  }
2467 
2468  tok->type = TK_INTERVAL;
2469  tok->u.repeat.lower = low;
2470  tok->u.repeat.upper = up;
2471  *src = p;
2472  return r; /* 0: normal {n,m}, 2: fixed {n} */
2473 
2474  invalid:
2475  if (syn_allow)
2476  return 1; /* OK */
2477  else
2479 }
2480 
2481 /* \M-, \C-, \c, or \... */
2482 static int
2484 {
2485  int v;
2486  OnigCodePoint c;
2487  OnigEncoding enc = env->enc;
2488  UChar* p = *src;
2489  PFETCH_READY;
2490 
2491  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
2492 
2493  PFETCH(c);
2494  switch (c) {
2495  case 'M':
2497  if (PEND) return ONIGERR_END_PATTERN_AT_META;
2498  PFETCH(c);
2499  if (c != '-') return ONIGERR_META_CODE_SYNTAX;
2500  if (PEND) return ONIGERR_END_PATTERN_AT_META;
2501  PFETCH(c);
2502  if (c == MC_ESC(env->syntax)) {
2503  v = fetch_escaped_value(&p, end, env);
2504  if (v < 0) return v;
2505  c = (OnigCodePoint )v;
2506  }
2507  c = ((c & 0xff) | 0x80);
2508  }
2509  else
2510  goto backslash;
2511  break;
2512 
2513  case 'C':
2516  PFETCH(c);
2517  if (c != '-') return ONIGERR_CONTROL_CODE_SYNTAX;
2518  goto control;
2519  }
2520  else
2521  goto backslash;
2522 
2523  case 'c':
2525  control:
2527  PFETCH(c);
2528  if (c == '?') {
2529  c = 0177;
2530  }
2531  else {
2532  if (c == MC_ESC(env->syntax)) {
2533  v = fetch_escaped_value(&p, end, env);
2534  if (v < 0) return v;
2535  c = (OnigCodePoint )v;
2536  }
2537  c &= 0x9f;
2538  }
2539  break;
2540  }
2541  /* fall through */
2542 
2543  default:
2544  {
2545  backslash:
2546  c = conv_backslash_value(c, env);
2547  }
2548  break;
2549  }
2550 
2551  *src = p;
2552  return c;
2553 }
2554 
2555 static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
2556 
2557 static OnigCodePoint
2559 {
2560  switch (start) {
2561  case '<': return (OnigCodePoint )'>'; break;
2562  case '\'': return (OnigCodePoint )'\''; break;
2563  case '(': return (OnigCodePoint )')'; break;
2564  case '{': return (OnigCodePoint )'}'; break;
2565  default:
2566  break;
2567  }
2568 
2569  return (OnigCodePoint )0;
2570 }
2571 
2572 #ifdef USE_NAMED_GROUP
2573 #ifdef USE_BACKREF_WITH_LEVEL
2574 /*
2575  \k<name+n>, \k<name-n>
2576  \k<num+n>, \k<num-n>
2577  \k<-num+n>, \k<-num-n>
2578 */
2579 static int
2581  UChar** rname_end, ScanEnv* env,
2582  int* rback_num, int* rlevel)
2583 {
2584  int r, sign, is_num, exist_level;
2585  OnigCodePoint end_code;
2586  OnigCodePoint c = 0;
2587  OnigEncoding enc = env->enc;
2588  UChar *name_end;
2589  UChar *pnum_head;
2590  UChar *p = *src;
2591  PFETCH_READY;
2592 
2593  *rback_num = 0;
2594  is_num = exist_level = 0;
2595  sign = 1;
2596  pnum_head = *src;
2597 
2598  end_code = get_name_end_code_point(start_code);
2599 
2600  name_end = end;
2601  r = 0;
2602  if (PEND) {
2603  return ONIGERR_EMPTY_GROUP_NAME;
2604  }
2605  else {
2606  PFETCH(c);
2607  if (c == end_code)
2608  return ONIGERR_EMPTY_GROUP_NAME;
2609 
2610  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2611  is_num = 1;
2612  }
2613  else if (c == '-') {
2614  is_num = 2;
2615  sign = -1;
2616  pnum_head = p;
2617  }
2618  else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2620  }
2621  }
2622 
2623  while (!PEND) {
2624  name_end = p;
2625  PFETCH(c);
2626  if (c == end_code || c == ')' || c == '+' || c == '-') {
2627  if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
2628  break;
2629  }
2630 
2631  if (is_num != 0) {
2632  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2633  is_num = 1;
2634  }
2635  else {
2637  is_num = 0;
2638  }
2639  }
2640  else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2642  }
2643  }
2644 
2645  if (r == 0 && c != end_code) {
2646  if (c == '+' || c == '-') {
2647  int level;
2648  int flag = (c == '-' ? -1 : 1);
2649 
2650  PFETCH(c);
2651  if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
2652  PUNFETCH;
2653  level = onig_scan_unsigned_number(&p, end, enc);
2654  if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
2655  *rlevel = (level * flag);
2656  exist_level = 1;
2657 
2658  PFETCH(c);
2659  if (c == end_code)
2660  goto end;
2661  }
2662 
2663  err:
2665  name_end = end;
2666  }
2667 
2668  end:
2669  if (r == 0) {
2670  if (is_num != 0) {
2671  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2672  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2673  else if (*rback_num == 0) goto err;
2674 
2675  *rback_num *= sign;
2676  }
2677 
2678  *rname_end = name_end;
2679  *src = p;
2680  return (exist_level ? 1 : 0);
2681  }
2682  else {
2683  onig_scan_env_set_error_string(env, r, *src, name_end);
2684  return r;
2685  }
2686 }
2687 #endif /* USE_BACKREF_WITH_LEVEL */
2688 
2689 /*
2690  ref: 0 -> define name (don't allow number name)
2691  1 -> reference name (allow number name)
2692 */
2693 static int
2695  UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2696 {
2697  int r, is_num, sign;
2698  OnigCodePoint end_code;
2699  OnigCodePoint c = 0;
2700  OnigEncoding enc = env->enc;
2701  UChar *name_end;
2702  UChar *pnum_head;
2703  UChar *p = *src;
2704  PFETCH_READY;
2705 
2706  *rback_num = 0;
2707 
2708  end_code = get_name_end_code_point(start_code);
2709 
2710  name_end = end;
2711  pnum_head = *src;
2712  r = 0;
2713  is_num = 0;
2714  sign = 1;
2715  if (PEND) {
2716  return ONIGERR_EMPTY_GROUP_NAME;
2717  }
2718  else {
2719  PFETCH(c);
2720  if (c == end_code)
2721  return ONIGERR_EMPTY_GROUP_NAME;
2722 
2723  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2724  if (ref == 1)
2725  is_num = 1;
2726  else {
2728  is_num = 0;
2729  }
2730  }
2731  else if (c == '-') {
2732  if (ref == 1) {
2733  is_num = 2;
2734  sign = -1;
2735  pnum_head = p;
2736  }
2737  else {
2739  is_num = 0;
2740  }
2741  }
2742  else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2744  }
2745  }
2746 
2747  if (r == 0) {
2748  while (!PEND) {
2749  name_end = p;
2750  PFETCH(c);
2751  if (c == end_code || c == ')') {
2752  if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
2753  break;
2754  }
2755 
2756  if (is_num != 0) {
2757  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2758  is_num = 1;
2759  }
2760  else {
2761  if (!ONIGENC_IS_CODE_WORD(enc, c))
2763  else
2765 
2766  is_num = 0;
2767  }
2768  }
2769  else {
2770  if (!ONIGENC_IS_CODE_WORD(enc, c)) {
2772  }
2773  }
2774  }
2775 
2776  if (c != end_code) {
2778  name_end = end;
2779  }
2780 
2781  if (is_num != 0) {
2782  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2783  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2784  else if (*rback_num == 0) {
2786  goto err;
2787  }
2788 
2789  *rback_num *= sign;
2790  }
2791 
2792  *rname_end = name_end;
2793  *src = p;
2794  return 0;
2795  }
2796  else {
2797  while (!PEND) {
2798  name_end = p;
2799  PFETCH(c);
2800  if (c == end_code || c == ')')
2801  break;
2802  }
2803  if (PEND)
2804  name_end = end;
2805 
2806  err:
2807  onig_scan_env_set_error_string(env, r, *src, name_end);
2808  return r;
2809  }
2810 }
2811 #else
2812 static int
2813 fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
2814  UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
2815 {
2816  int r, is_num, sign;
2817  OnigCodePoint end_code;
2818  OnigCodePoint c = 0;
2819  UChar *name_end;
2820  OnigEncoding enc = env->enc;
2821  UChar *pnum_head;
2822  UChar *p = *src;
2823  PFETCH_READY;
2824 
2825  *rback_num = 0;
2826 
2827  end_code = get_name_end_code_point(start_code);
2828 
2829  *rname_end = name_end = end;
2830  r = 0;
2831  pnum_head = *src;
2832  is_num = 0;
2833  sign = 1;
2834 
2835  if (PEND) {
2836  return ONIGERR_EMPTY_GROUP_NAME;
2837  }
2838  else {
2839  PFETCH(c);
2840  if (c == end_code)
2841  return ONIGERR_EMPTY_GROUP_NAME;
2842 
2843  if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
2844  is_num = 1;
2845  }
2846  else if (c == '-') {
2847  is_num = 2;
2848  sign = -1;
2849  pnum_head = p;
2850  }
2851  else {
2853  }
2854  }
2855 
2856  while (!PEND) {
2857  name_end = p;
2858 
2859  PFETCH(c);
2860  if (c == end_code || c == ')') break;
2861  if (! ONIGENC_IS_CODE_DIGIT(enc, c))
2863  }
2864  if (r == 0 && c != end_code) {
2866  name_end = end;
2867  }
2868 
2869  if (r == 0) {
2870  *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
2871  if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
2872  else if (*rback_num == 0) {
2874  goto err;
2875  }
2876  *rback_num *= sign;
2877 
2878  *rname_end = name_end;
2879  *src = p;
2880  return 0;
2881  }
2882  else {
2883  err:
2884  onig_scan_env_set_error_string(env, r, *src, name_end);
2885  return r;
2886  }
2887 }
2888 #endif /* USE_NAMED_GROUP */
2889 
2891  UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
2892 
2893 static void
2894 onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
2895 {
2896  va_list args;
2897  UChar buf[WARN_BUFSIZE];
2898  va_start(args, fmt);
2900  env->pattern, env->pattern_end,
2901  (const UChar *)fmt, args);
2902  va_end(args);
2903  if (env->sourcefile == NULL)
2904  rb_warn("%s", (char *)buf);
2905  else
2906  rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
2907 }
2908 
2909 static void
2911 {
2912  if (onig_warn == onig_null_warn) return ;
2913 
2916  onig_syntax_warn(env, "character class has '%s' without escape", c);
2917  }
2918 }
2919 
2920 static void
2922 {
2923  if (onig_warn == onig_null_warn) return ;
2924 
2926  onig_syntax_warn(env, "regular expression has '%s' without escape", c);
2927  }
2928 }
2929 
2930 static void
2932 {
2934 
2936  !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) {
2938  onig_syntax_warn(env, "character class has duplicated range");
2939  }
2940 }
2941 
2942 static void
2944 {
2946  onig_syntax_warn(env, "Unknown escape \\%c is ignored", c);
2947 }
2948 
2949 static UChar*
2951  UChar **next, OnigEncoding enc)
2952 {
2953  int i;
2954  OnigCodePoint x;
2955  UChar *q;
2956  UChar *p = from;
2957 
2958  while (p < to) {
2959  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2960  q = p + enclen(enc, p, to);
2961  if (x == s[0]) {
2962  for (i = 1; i < n && q < to; i++) {
2963  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2964  if (x != s[i]) break;
2965  q += enclen(enc, q, to);
2966  }
2967  if (i >= n) {
2968  if (IS_NOT_NULL(next))
2969  *next = q;
2970  return p;
2971  }
2972  }
2973  p = q;
2974  }
2975  return NULL_UCHARP;
2976 }
2977 
2978 static int
2980  OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn)
2981 {
2982  int i, in_esc;
2983  OnigCodePoint x;
2984  UChar *q;
2985  UChar *p = from;
2986 
2987  in_esc = 0;
2988  while (p < to) {
2989  if (in_esc) {
2990  in_esc = 0;
2991  p += enclen(enc, p, to);
2992  }
2993  else {
2994  x = ONIGENC_MBC_TO_CODE(enc, p, to);
2995  q = p + enclen(enc, p, to);
2996  if (x == s[0]) {
2997  for (i = 1; i < n && q < to; i++) {
2998  x = ONIGENC_MBC_TO_CODE(enc, q, to);
2999  if (x != s[i]) break;
3000  q += enclen(enc, q, to);
3001  }
3002  if (i >= n) return 1;
3003  p += enclen(enc, p, to);
3004  }
3005  else {
3006  x = ONIGENC_MBC_TO_CODE(enc, p, to);
3007  if (x == bad) return 0;
3008  else if (x == MC_ESC(syn)) in_esc = 1;
3009  p = q;
3010  }
3011  }
3012  }
3013  return 0;
3014 }
3015 
3016 static int
3018 {
3019  int num;
3020  OnigCodePoint c, c2;
3021  const OnigSyntaxType* syn = env->syntax;
3022  OnigEncoding enc = env->enc;
3023  UChar* prev;
3024  UChar* p = *src;
3025  PFETCH_READY;
3026 
3027  if (PEND) {
3028  tok->type = TK_EOT;
3029  return tok->type;
3030  }
3031 
3032  PFETCH(c);
3033  tok->type = TK_CHAR;
3034  tok->base = 0;
3035  tok->u.c = c;
3036  tok->escaped = 0;
3037 
3038  if (c == ']') {
3039  tok->type = TK_CC_CLOSE;
3040  }
3041  else if (c == '-') {
3042  tok->type = TK_CC_RANGE;
3043  }
3044  else if (c == MC_ESC(syn)) {
3046  goto end;
3047 
3048  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3049 
3050  PFETCH(c);
3051  tok->escaped = 1;
3052  tok->u.c = c;
3053  switch (c) {
3054  case 'w':
3055  tok->type = TK_CHAR_TYPE;
3056  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3057  tok->u.prop.not = 0;
3058  break;
3059  case 'W':
3060  tok->type = TK_CHAR_TYPE;
3061  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3062  tok->u.prop.not = 1;
3063  break;
3064  case 'd':
3065  tok->type = TK_CHAR_TYPE;
3066  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3067  tok->u.prop.not = 0;
3068  break;
3069  case 'D':
3070  tok->type = TK_CHAR_TYPE;
3071  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3072  tok->u.prop.not = 1;
3073  break;
3074  case 's':
3075  tok->type = TK_CHAR_TYPE;
3076  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3077  tok->u.prop.not = 0;
3078  break;
3079  case 'S':
3080  tok->type = TK_CHAR_TYPE;
3081  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3082  tok->u.prop.not = 1;
3083  break;
3084  case 'h':
3085  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3086  tok->type = TK_CHAR_TYPE;
3087  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3088  tok->u.prop.not = 0;
3089  break;
3090  case 'H':
3091  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3092  tok->type = TK_CHAR_TYPE;
3093  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3094  tok->u.prop.not = 1;
3095  break;
3096 
3097  case 'p':
3098  case 'P':
3099  c2 = PPEEK;
3100  if (c2 == '{' &&
3102  PINC;
3103  tok->type = TK_CHAR_PROPERTY;
3104  tok->u.prop.not = (c == 'P' ? 1 : 0);
3105 
3107  PFETCH(c2);
3108  if (c2 == '^') {
3109  tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3110  }
3111  else
3112  PUNFETCH;
3113  }
3114  }
3115  else {
3116  onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3117  }
3118  break;
3119 
3120  case 'x':
3121  if (PEND) break;
3122 
3123  prev = p;
3125  PINC;
3126  num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3127  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3128  if (!PEND) {
3129  c2 = PPEEK;
3130  if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
3132  }
3133 
3134  if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
3135  PINC;
3136  tok->type = TK_CODE_POINT;
3137  tok->base = 16;
3138  tok->u.code = (OnigCodePoint )num;
3139  }
3140  else {
3141  /* can't read nothing or invalid format */
3142  p = prev;
3143  }
3144  }
3145  else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3146  num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3147  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3148  if (p == prev) { /* can't read nothing. */
3149  num = 0; /* but, it's not error */
3150  }
3151  tok->type = TK_RAW_BYTE;
3152  tok->base = 16;
3153  tok->u.c = num;
3154  }
3155  break;
3156 
3157  case 'u':
3158  if (PEND) break;
3159 
3160  prev = p;
3162  num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3163  if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3164  else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3165  if (p == prev) { /* can't read nothing. */
3166  num = 0; /* but, it's not error */
3167  }
3168  tok->type = TK_CODE_POINT;
3169  tok->base = 16;
3170  tok->u.code = (OnigCodePoint )num;
3171  }
3172  break;
3173 
3174  case '0':
3175  case '1': case '2': case '3': case '4': case '5': case '6': case '7':
3177  PUNFETCH;
3178  prev = p;
3179  num = scan_unsigned_octal_number(&p, end, 3, enc);
3180  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3181  if (p == prev) { /* can't read nothing. */
3182  num = 0; /* but, it's not error */
3183  }
3184  tok->type = TK_RAW_BYTE;
3185  tok->base = 8;
3186  tok->u.c = num;
3187  }
3188  break;
3189 
3190  default:
3191  PUNFETCH;
3192  num = fetch_escaped_value(&p, end, env);
3193  if (num < 0) return num;
3194  if (tok->u.c != num) {
3195  tok->u.code = (OnigCodePoint )num;
3196  tok->type = TK_CODE_POINT;
3197  }
3198  break;
3199  }
3200  }
3201  else if (c == '[') {
3202  if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
3203  OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
3204  tok->backp = p; /* point at '[' is read */
3205  PINC;
3206  if (str_exist_check_with_esc(send, 2, p, end,
3207  (OnigCodePoint )']', enc, syn)) {
3208  tok->type = TK_POSIX_BRACKET_OPEN;
3209  }
3210  else {
3211  PUNFETCH;
3212  goto cc_in_cc;
3213  }
3214  }
3215  else {
3216  cc_in_cc:
3218  tok->type = TK_CC_CC_OPEN;
3219  }
3220  else {
3221  CC_ESC_WARN(env, (UChar* )"[");
3222  }
3223  }
3224  }
3225  else if (c == '&') {
3227  !PEND && (PPEEK_IS('&'))) {
3228  PINC;
3229  tok->type = TK_CC_AND;
3230  }
3231  }
3232 
3233  end:
3234  *src = p;
3235  return tok->type;
3236 }
3237 
3238 #ifdef USE_NAMED_GROUP
3239 static int
3241  UChar* end, ScanEnv* env)
3242 {
3243  int r, num;
3244  const OnigSyntaxType* syn = env->syntax;
3245  UChar* prev;
3246  UChar* p = *src;
3247  UChar* name_end;
3248  int* backs;
3249  int back_num;
3250 
3251  prev = p;
3252 
3253 #ifdef USE_BACKREF_WITH_LEVEL
3254  name_end = NULL_UCHARP; /* no need. escape gcc warning. */
3255  r = fetch_name_with_level(c, &p, end, &name_end,
3256  env, &back_num, &tok->u.backref.level);
3257  if (r == 1) tok->u.backref.exist_level = 1;
3258  else tok->u.backref.exist_level = 0;
3259 #else
3260  r = fetch_name(&p, end, &name_end, env, &back_num, 1);
3261 #endif
3262  if (r < 0) return r;
3263 
3264  if (back_num != 0) {
3265  if (back_num < 0) {
3266  back_num = BACKREF_REL_TO_ABS(back_num, env);
3267  if (back_num <= 0)
3268  return ONIGERR_INVALID_BACKREF;
3269  }
3270 
3272  if (back_num > env->num_mem ||
3273  IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
3274  return ONIGERR_INVALID_BACKREF;
3275  }
3276  tok->type = TK_BACKREF;
3277  tok->u.backref.by_name = 0;
3278  tok->u.backref.num = 1;
3279  tok->u.backref.ref1 = back_num;
3280  }
3281  else {
3282  num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
3283  if (num <= 0) {
3285  ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
3287  }
3289  int i;
3290  for (i = 0; i < num; i++) {
3291  if (backs[i] > env->num_mem ||
3292  IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
3293  return ONIGERR_INVALID_BACKREF;
3294  }
3295  }
3296 
3297  tok->type = TK_BACKREF;
3298  tok->u.backref.by_name = 1;
3299  if (num == 1) {
3300  tok->u.backref.num = 1;
3301  tok->u.backref.ref1 = backs[0];
3302  }
3303  else {
3304  tok->u.backref.num = num;
3305  tok->u.backref.refs = backs;
3306  }
3307  }
3308  *src = p;
3309  return 0;
3310 }
3311 #endif
3312 
3313 static int
3315 {
3316  int r, num;
3317  OnigCodePoint c;
3318  OnigEncoding enc = env->enc;
3319  const OnigSyntaxType* syn = env->syntax;
3320  UChar* prev;
3321  UChar* p = *src;
3322  PFETCH_READY;
3323 
3324  start:
3325  if (PEND) {
3326  tok->type = TK_EOT;
3327  return tok->type;
3328  }
3329 
3330  tok->type = TK_STRING;
3331  tok->base = 0;
3332  tok->backp = p;
3333 
3334  PFETCH(c);
3335  if (IS_MC_ESC_CODE(c, syn)) {
3336  if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
3337 
3338  tok->backp = p;
3339  PFETCH(c);
3340 
3341  tok->u.c = c;
3342  tok->escaped = 1;
3343  switch (c) {
3344  case '*':
3346  tok->type = TK_OP_REPEAT;
3347  tok->u.repeat.lower = 0;
3348  tok->u.repeat.upper = REPEAT_INFINITE;
3349  goto greedy_check;
3350  break;
3351 
3352  case '+':
3353  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
3354  tok->type = TK_OP_REPEAT;
3355  tok->u.repeat.lower = 1;
3356  tok->u.repeat.upper = REPEAT_INFINITE;
3357  goto greedy_check;
3358  break;
3359 
3360  case '?':
3361  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
3362  tok->type = TK_OP_REPEAT;
3363  tok->u.repeat.lower = 0;
3364  tok->u.repeat.upper = 1;
3365  greedy_check:
3366  if (!PEND && PPEEK_IS('?') &&
3368  PFETCH(c);
3369  tok->u.repeat.greedy = 0;
3370  tok->u.repeat.possessive = 0;
3371  }
3372  else {
3373  possessive_check:
3374  if (!PEND && PPEEK_IS('+') &&
3376  tok->type != TK_INTERVAL) ||
3378  tok->type == TK_INTERVAL))) {
3379  PFETCH(c);
3380  tok->u.repeat.greedy = 1;
3381  tok->u.repeat.possessive = 1;
3382  }
3383  else {
3384  tok->u.repeat.greedy = 1;
3385  tok->u.repeat.possessive = 0;
3386  }
3387  }
3388  break;
3389 
3390  case '{':
3391  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
3392  r = fetch_range_quantifier(&p, end, tok, env);
3393  if (r < 0) return r; /* error */
3394  if (r == 0) goto greedy_check;
3395  else if (r == 2) { /* {n} */
3397  goto possessive_check;
3398 
3399  goto greedy_check;
3400  }
3401  /* r == 1 : normal char */
3402  break;
3403 
3404  case '|':
3405  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_VBAR_ALT)) break;
3406  tok->type = TK_ALT;
3407  break;
3408 
3409  case '(':
3410  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3411  tok->type = TK_SUBEXP_OPEN;
3412  break;
3413 
3414  case ')':
3415  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LPAREN_SUBEXP)) break;
3416  tok->type = TK_SUBEXP_CLOSE;
3417  break;
3418 
3419  case 'w':
3420  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3421  tok->type = TK_CHAR_TYPE;
3422  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3423  tok->u.prop.not = 0;
3424  break;
3425 
3426  case 'W':
3427  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_W_WORD)) break;
3428  tok->type = TK_CHAR_TYPE;
3429  tok->u.prop.ctype = ONIGENC_CTYPE_WORD;
3430  tok->u.prop.not = 1;
3431  break;
3432 
3433  case 'b':
3434  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3435  tok->type = TK_ANCHOR;
3436  tok->u.anchor.subtype = ANCHOR_WORD_BOUND;
3437  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3438  && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3439  break;
3440 
3441  case 'B':
3442  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
3443  tok->type = TK_ANCHOR;
3444  tok->u.anchor.subtype = ANCHOR_NOT_WORD_BOUND;
3445  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option)
3446  && ! IS_WORD_BOUND_ALL_RANGE(env->option);
3447  break;
3448 
3449 #ifdef USE_WORD_BEGIN_END
3450  case '<':
3452  tok->type = TK_ANCHOR;
3453  tok->u.anchor.subtype = ANCHOR_WORD_BEGIN;
3454  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3455  break;
3456 
3457  case '>':
3459  tok->type = TK_ANCHOR;
3460  tok->u.anchor.subtype = ANCHOR_WORD_END;
3461  tok->u.anchor.ascii_range = IS_ASCII_RANGE(env->option);
3462  break;
3463 #endif
3464 
3465  case 's':
3466  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3467  tok->type = TK_CHAR_TYPE;
3468  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3469  tok->u.prop.not = 0;
3470  break;
3471 
3472  case 'S':
3473  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_S_WHITE_SPACE)) break;
3474  tok->type = TK_CHAR_TYPE;
3475  tok->u.prop.ctype = ONIGENC_CTYPE_SPACE;
3476  tok->u.prop.not = 1;
3477  break;
3478 
3479  case 'd':
3480  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3481  tok->type = TK_CHAR_TYPE;
3482  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3483  tok->u.prop.not = 0;
3484  break;
3485 
3486  case 'D':
3487  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_D_DIGIT)) break;
3488  tok->type = TK_CHAR_TYPE;
3489  tok->u.prop.ctype = ONIGENC_CTYPE_DIGIT;
3490  tok->u.prop.not = 1;
3491  break;
3492 
3493  case 'h':
3494  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3495  tok->type = TK_CHAR_TYPE;
3496  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3497  tok->u.prop.not = 0;
3498  break;
3499 
3500  case 'H':
3501  if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_H_XDIGIT)) break;
3502  tok->type = TK_CHAR_TYPE;
3503  tok->u.prop.ctype = ONIGENC_CTYPE_XDIGIT;
3504  tok->u.prop.not = 1;
3505  break;
3506 
3507  case 'A':
3508  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3509  begin_buf:
3510  tok->type = TK_ANCHOR;
3511  tok->u.anchor.subtype = ANCHOR_BEGIN_BUF;
3512  break;
3513 
3514  case 'Z':
3515  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3516  tok->type = TK_ANCHOR;
3517  tok->u.anchor.subtype = ANCHOR_SEMI_END_BUF;
3518  break;
3519 
3520  case 'z':
3521  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
3522  end_buf:
3523  tok->type = TK_ANCHOR;
3524  tok->u.anchor.subtype = ANCHOR_END_BUF;
3525  break;
3526 
3527  case 'G':
3529  tok->type = TK_ANCHOR;
3530  tok->u.anchor.subtype = ANCHOR_BEGIN_POSITION;
3531  break;
3532 
3533  case '`':
3535  goto begin_buf;
3536  break;
3537 
3538  case '\'':
3540  goto end_buf;
3541  break;
3542 
3543  case 'x':
3544  if (PEND) break;
3545 
3546  prev = p;
3548  PINC;
3549  num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
3550  if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
3551  if (!PEND) {
3552  if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
3554  }
3555 
3556  if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
3557  PINC;
3558  tok->type = TK_CODE_POINT;
3559  tok->u.code = (OnigCodePoint )num;
3560  }
3561  else {
3562  /* can't read nothing or invalid format */
3563  p = prev;
3564  }
3565  }
3566  else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
3567  num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
3568  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3569  if (p == prev) { /* can't read nothing. */
3570  num = 0; /* but, it's not error */
3571  }
3572  tok->type = TK_RAW_BYTE;
3573  tok->base = 16;
3574  tok->u.c = num;
3575  }
3576  break;
3577 
3578  case 'u':
3579  if (PEND) break;
3580 
3581  prev = p;
3583  num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
3584  if (num < -1) return ONIGERR_TOO_SHORT_DIGITS;
3585  else if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3586  if (p == prev) { /* can't read nothing. */
3587  num = 0; /* but, it's not error */
3588  }
3589  tok->type = TK_CODE_POINT;
3590  tok->base = 16;
3591  tok->u.code = (OnigCodePoint )num;
3592  }
3593  break;
3594 
3595  case '1': case '2': case '3': case '4':
3596  case '5': case '6': case '7': case '8': case '9':
3597  PUNFETCH;
3598  prev = p;
3599  num = onig_scan_unsigned_number(&p, end, enc);
3600  if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
3601  goto skip_backref;
3602  }
3603 
3605  (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
3607  if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
3608  return ONIGERR_INVALID_BACKREF;
3609  }
3610 
3611  tok->type = TK_BACKREF;
3612  tok->u.backref.num = 1;
3613  tok->u.backref.ref1 = num;
3614  tok->u.backref.by_name = 0;
3615 #ifdef USE_BACKREF_WITH_LEVEL
3616  tok->u.backref.exist_level = 0;
3617 #endif
3618  break;
3619  }
3620 
3621  skip_backref:
3622  if (c == '8' || c == '9') {
3623  /* normal char */
3624  p = prev; PINC;
3625  break;
3626  }
3627 
3628  p = prev;
3629  /* fall through */
3630  case '0':
3632  prev = p;
3633  num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
3634  if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
3635  if (p == prev) { /* can't read nothing. */
3636  num = 0; /* but, it's not error */
3637  }
3638  tok->type = TK_RAW_BYTE;
3639  tok->base = 8;
3640  tok->u.c = num;
3641  }
3642  else if (c != '0') {
3643  PINC;
3644  }
3645  break;
3646 
3647 #ifdef USE_NAMED_GROUP
3648  case 'k':
3650  PFETCH(c);
3651  if (c == '<' || c == '\'') {
3652  r = fetch_named_backref_token(c, tok, &p, end, env);
3653  if (r < 0) return r;
3654  }
3655  else {
3656  PUNFETCH;
3657  onig_syntax_warn(env, "invalid back reference");
3658  }
3659  }
3660  break;
3661 #endif
3662 
3663 #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
3664  case 'g':
3665 #ifdef USE_NAMED_GROUP
3667  PFETCH(c);
3668  if (c == '{') {
3669  r = fetch_named_backref_token(c, tok, &p, end, env);
3670  if (r < 0) return r;
3671  }
3672  else
3673  PUNFETCH;
3674  }
3675 #endif
3676 #ifdef USE_SUBEXP_CALL
3678  PFETCH(c);
3679  if (c == '<' || c == '\'') {
3680  int gnum = -1, rel = 0;
3681  UChar* name_end;
3682  OnigCodePoint cnext;
3683 
3684  cnext = PPEEK;
3685  if (cnext == '0') {
3686  PINC;
3687  if (PPEEK_IS(get_name_end_code_point(c))) { /* \g<0>, \g'0' */
3688  PINC;
3689  name_end = p;
3690  gnum = 0;
3691  }
3692  }
3693  else if (cnext == '+') {
3694  PINC;
3695  rel = 1;
3696  }
3697  prev = p;
3698  if (gnum < 0) {
3699  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
3700  if (r < 0) return r;
3701  }
3702 
3703  tok->type = TK_CALL;
3704  tok->u.call.name = prev;
3705  tok->u.call.name_end = name_end;
3706  tok->u.call.gnum = gnum;
3707  tok->u.call.rel = rel;
3708  }
3709  else {
3710  onig_syntax_warn(env, "invalid subexp call");
3711  PUNFETCH;
3712  }
3713  }
3714 #endif
3715  break;
3716 #endif
3717 
3718  case 'Q':
3720  tok->type = TK_QUOTE_OPEN;
3721  }
3722  break;
3723 
3724  case 'p':
3725  case 'P':
3726  if (PPEEK_IS('{') &&
3728  PINC;
3729  tok->type = TK_CHAR_PROPERTY;
3730  tok->u.prop.not = (c == 'P' ? 1 : 0);
3731 
3733  PFETCH(c);
3734  if (c == '^') {
3735  tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
3736  }
3737  else
3738  PUNFETCH;
3739  }
3740  }
3741  else {
3742  onig_syntax_warn(env, "invalid Unicode Property \\%c", c);
3743  }
3744  break;
3745 
3746  case 'R':
3748  tok->type = TK_LINEBREAK;
3749  }
3750  break;
3751 
3752  case 'X':
3755  }
3756  break;
3757 
3758  case 'K':
3760  tok->type = TK_KEEP;
3761  }
3762  break;
3763 
3764  default:
3765  PUNFETCH;
3766  num = fetch_escaped_value(&p, end, env);
3767  if (num < 0) return num;
3768  /* set_raw: */
3769  if (tok->u.c != num) {
3770  tok->type = TK_CODE_POINT;
3771  tok->u.code = (OnigCodePoint )num;
3772  }
3773  else { /* string */
3774  p = tok->backp + enclen(enc, tok->backp, end);
3775  }
3776  break;
3777  }
3778  }
3779  else {
3780  tok->u.c = c;
3781  tok->escaped = 0;
3782 
3783 #ifdef USE_VARIABLE_META_CHARS
3784  if ((c != ONIG_INEFFECTIVE_META_CHAR) &&
3786  if (c == MC_ANYCHAR(syn))
3787  goto any_char;
3788  else if (c == MC_ANYTIME(syn))
3789  goto anytime;
3790  else if (c == MC_ZERO_OR_ONE_TIME(syn))
3791  goto zero_or_one_time;
3792  else if (c == MC_ONE_OR_MORE_TIME(syn))
3793  goto one_or_more_time;
3794  else if (c == MC_ANYCHAR_ANYTIME(syn)) {
3795  tok->type = TK_ANYCHAR_ANYTIME;
3796  goto out;
3797  }
3798  }
3799 #endif
3800 
3801  switch (c) {
3802  case '.':
3803  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_DOT_ANYCHAR)) break;
3804 #ifdef USE_VARIABLE_META_CHARS
3805  any_char:
3806 #endif
3807  tok->type = TK_ANYCHAR;
3808  break;
3809 
3810  case '*':
3811  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ASTERISK_ZERO_INF)) break;
3812 #ifdef USE_VARIABLE_META_CHARS
3813  anytime:
3814 #endif
3815  tok->type = TK_OP_REPEAT;
3816  tok->u.repeat.lower = 0;
3817  tok->u.repeat.upper = REPEAT_INFINITE;
3818  goto greedy_check;
3819  break;
3820 
3821  case '+':
3822  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_PLUS_ONE_INF)) break;
3823 #ifdef USE_VARIABLE_META_CHARS
3824  one_or_more_time:
3825 #endif
3826  tok->type = TK_OP_REPEAT;
3827  tok->u.repeat.lower = 1;
3828  tok->u.repeat.upper = REPEAT_INFINITE;
3829  goto greedy_check;
3830  break;
3831 
3832  case '?':
3833  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_ZERO_ONE)) break;
3834 #ifdef USE_VARIABLE_META_CHARS
3835  zero_or_one_time:
3836 #endif
3837  tok->type = TK_OP_REPEAT;
3838  tok->u.repeat.lower = 0;
3839  tok->u.repeat.upper = 1;
3840  goto greedy_check;
3841  break;
3842 
3843  case '{':
3844  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
3845  r = fetch_range_quantifier(&p, end, tok, env);
3846  if (r < 0) return r; /* error */
3847  if (r == 0) goto greedy_check;
3848  else if (r == 2) { /* {n} */
3850  goto possessive_check;
3851 
3852  goto greedy_check;
3853  }
3854  /* r == 1 : normal char */
3855  break;
3856 
3857  case '|':
3858  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_VBAR_ALT)) break;
3859  tok->type = TK_ALT;
3860  break;
3861 
3862  case '(':
3863  if (PPEEK_IS('?') &&
3865  PINC;
3866  if (PPEEK_IS('#')) {
3867  PFETCH(c);
3868  while (1) {
3869  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
3870  PFETCH(c);
3871  if (c == MC_ESC(syn)) {
3872  if (!PEND) PFETCH(c);
3873  }
3874  else {
3875  if (c == ')') break;
3876  }
3877  }
3878  goto start;
3879  }
3880 #ifdef USE_PERL_SUBEXP_CALL
3881  /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
3882  c = PPEEK;
3883  if ((c == '&' || c == 'R' || ONIGENC_IS_CODE_DIGIT(enc, c)) &&
3885  /* (?&name), (?n), (?R), (?0) */
3886  int gnum;
3887  UChar *name;
3888  UChar *name_end;
3889 
3890  if (c == 'R' || c == '0') {
3891  PINC; /* skip 'R' / '0' */
3892  if (!PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
3893  PINC; /* skip ')' */
3894  name_end = name = p;
3895  gnum = 0;
3896  }
3897  else {
3898  int numref = 1;
3899  if (c == '&') { /* (?&name) */
3900  PINC;
3901  numref = 0; /* don't allow number name */
3902  }
3903  name = p;
3904  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, numref);
3905  if (r < 0) return r;
3906  }
3907 
3908  tok->type = TK_CALL;
3909  tok->u.call.name = name;
3910  tok->u.call.name_end = name_end;
3911  tok->u.call.gnum = gnum;
3912  tok->u.call.rel = 0;
3913  break;
3914  }
3915  else if ((c == '-' || c == '+') &&
3917  /* (?+n), (?-n) */
3918  int gnum;
3919  UChar *name;
3920  UChar *name_end;
3921  OnigCodePoint cnext;
3922  PFETCH_READY;
3923 
3924  PINC; /* skip '-' / '+' */
3925  cnext = PPEEK;
3926  if (ONIGENC_IS_CODE_DIGIT(enc, cnext)) {
3927  if (c == '-') PUNFETCH;
3928  name = p;
3929  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 1);
3930  if (r < 0) return r;
3931 
3932  tok->type = TK_CALL;
3933  tok->u.call.name = name;
3934  tok->u.call.name_end = name_end;
3935  tok->u.call.gnum = gnum;
3936  tok->u.call.rel = 1;
3937  break;
3938  }
3939  }
3940 #endif /* USE_PERL_SUBEXP_CALL */
3941 #ifdef USE_CAPITAL_P_NAMED_GROUP
3942  if (PPEEK_IS('P') &&
3944  int gnum;
3945  UChar *name;
3946  UChar *name_end;
3947  PFETCH_READY;
3948 
3949  PINC; /* skip 'P' */
3950  PFETCH(c);
3951  if (c == '=') { /* (?P=name): backref */
3952  r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
3953  if (r < 0) return r;
3954  break;
3955  }
3956  else if (c == '>') { /* (?P>name): subexp call */
3957  name = p;
3958  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum, 0);
3959  if (r < 0) return r;
3960 
3961  tok->type = TK_CALL;
3962  tok->u.call.name = name;
3963  tok->u.call.name_end = name_end;
3964  tok->u.call.gnum = gnum;
3965  tok->u.call.rel = 0;
3966  break;
3967  }
3968  PUNFETCH;
3969  }
3970 #endif /* USE_CAPITAL_P_NAMED_GROUP */
3971  PUNFETCH;
3972  }
3973 
3974  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3975  tok->type = TK_SUBEXP_OPEN;
3976  break;
3977 
3978  case ')':
3979  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
3980  tok->type = TK_SUBEXP_CLOSE;
3981  break;
3982 
3983  case '^':
3984  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3985  tok->type = TK_ANCHOR;
3986  tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
3988  break;
3989 
3990  case '$':
3991  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break;
3992  tok->type = TK_ANCHOR;
3993  tok->u.anchor.subtype = (IS_SINGLELINE(env->option)
3995  break;
3996 
3997  case '[':
3998  if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
3999  tok->type = TK_CC_OPEN;
4000  break;
4001 
4002  case ']':
4003  if (*src > env->pattern) /* /].../ is allowed. */
4005  break;
4006 
4007  case '#':
4008  if (IS_EXTEND(env->option)) {
4009  while (!PEND) {
4010  PFETCH(c);
4011  if (ONIGENC_IS_CODE_NEWLINE(enc, c))
4012  break;
4013  }
4014  goto start;
4015  break;
4016  }
4017  break;
4018 
4019  case ' ': case '\t': case '\n': case '\r': case '\f':
4020  if (IS_EXTEND(env->option))
4021  goto start;
4022  break;
4023 
4024  default:
4025  /* string */
4026  break;
4027  }
4028  }
4029 
4030 #ifdef USE_VARIABLE_META_CHARS
4031  out:
4032 #endif
4033  *src = p;
4034  return tok->type;
4035 }
4036 
4037 static int
4039  ScanEnv* env,
4040  OnigCodePoint sb_out, const OnigCodePoint mbr[])
4041 {
4042  int i, r;
4043  OnigCodePoint j;
4044 
4045  int n = ONIGENC_CODE_RANGE_NUM(mbr);
4046 
4047  if (not == 0) {
4048  for (i = 0; i < n; i++) {
4049  for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
4050  j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
4051  if (j >= sb_out) {
4052  if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4053  r = add_code_range_to_buf(&(cc->mbuf), env, j,
4054  ONIGENC_CODE_RANGE_TO(mbr, i));
4055  if (r != 0) return r;
4056  i++;
4057  }
4058 
4059  goto sb_end;
4060  }
4061  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4062  }
4063  }
4064 
4065  sb_end:
4066  for ( ; i < n; i++) {
4067  r = add_code_range_to_buf(&(cc->mbuf), env,
4068  ONIGENC_CODE_RANGE_FROM(mbr, i),
4069  ONIGENC_CODE_RANGE_TO(mbr, i));
4070  if (r != 0) return r;
4071  }
4072  }
4073  else {
4074  OnigCodePoint prev = 0;
4075 
4076  for (i = 0; i < n; i++) {
4077  for (j = prev;
4078  j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
4079  if (j >= sb_out) {
4080  goto sb_end2;
4081  }
4082  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4083  }
4084  prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4085  }
4086  for (j = prev; j < sb_out; j++) {
4087  BITSET_SET_BIT_CHKDUP(cc->bs, j);
4088  }
4089 
4090  sb_end2:
4091  prev = sb_out;
4092 
4093  for (i = 0; i < n; i++) {
4094  if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
4095  r = add_code_range_to_buf(&(cc->mbuf), env, prev,
4096  ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
4097  if (r != 0) return r;
4098  }
4099  prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
4100  }
4101  if (prev < 0x7fffffff) {
4102  r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff);
4103  if (r != 0) return r;
4104  }
4105  }
4106 
4107  return 0;
4108 }
4109 
4110 static int
4111 add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
4112 {
4113  int maxcode, ascii_range;
4114  int c, r;
4115  const OnigCodePoint *ranges;
4116  OnigCodePoint sb_out;
4117  OnigEncoding enc = env->enc;
4118  OnigOptionType option = env->option;
4119 
4120  ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0);
4121 
4122  r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
4123  if (r == 0) {
4124  if (ascii_range) {
4125  CClassNode ccwork;
4126  initialize_cclass(&ccwork);
4127  r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out,
4128  ranges);
4129  if (r == 0) {
4130  if (not) {
4131  r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE);
4132  }
4133  else {
4134  CClassNode ccascii;
4135  initialize_cclass(&ccascii);
4136  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
4137  add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
4138  }
4139  else {
4140  bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
4141  }
4142  r = and_cclass(&ccwork, &ccascii, env);
4143  if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
4144  }
4145  if (r == 0) {
4146  r = or_cclass(cc, &ccwork, env);
4147  }
4148  if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf);
4149  }
4150  }
4151  else {
4152  r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges);
4153  }
4154  return r;
4155  }
4156  else if (r != ONIG_NO_SUPPORT_CONFIG) {
4157  return r;
4158  }
4159 
4160  maxcode = ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
4161  r = 0;
4162  switch (ctype) {
4163  case ONIGENC_CTYPE_ALPHA:
4164  case ONIGENC_CTYPE_BLANK:
4165  case ONIGENC_CTYPE_CNTRL:
4166  case ONIGENC_CTYPE_DIGIT:
4167  case ONIGENC_CTYPE_LOWER:
4168  case ONIGENC_CTYPE_PUNCT:
4169  case ONIGENC_CTYPE_SPACE:
4170  case ONIGENC_CTYPE_UPPER:
4171  case ONIGENC_CTYPE_XDIGIT:
4172  case ONIGENC_CTYPE_ASCII:
4173  case ONIGENC_CTYPE_ALNUM:
4174  if (not != 0) {
4175  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4176  if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4177  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4178  }
4179  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4180  }
4181  else {
4182  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4183  if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4184  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4185  }
4186  }
4187  break;
4188 
4189  case ONIGENC_CTYPE_GRAPH:
4190  case ONIGENC_CTYPE_PRINT:
4191  if (not != 0) {
4192  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4193  if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)
4194  || c >= maxcode)
4195  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4196  }
4197  if (ascii_range)
4198  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4199  }
4200  else {
4201  for (c = 0; c < maxcode; c++) {
4202  if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
4203  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4204  }
4205  if (! ascii_range)
4206  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4207  }
4208  break;
4209 
4210  case ONIGENC_CTYPE_WORD:
4211  if (not == 0) {
4212  for (c = 0; c < maxcode; c++) {
4213  if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
4214  }
4215  if (! ascii_range)
4216  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4217  }
4218  else {
4219  for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
4220  if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
4221  && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
4222  BITSET_SET_BIT_CHKDUP(cc->bs, c);
4223  }
4224  if (ascii_range)
4225  ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
4226  }
4227  break;
4228 
4229  default:
4230  return ONIGERR_PARSER_BUG;
4231  break;
4232  }
4233 
4234  return r;
4235 }
4236 
4237 static int
4239 {
4240 #define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
4241 #define POSIX_BRACKET_NAME_MIN_LEN 4
4242 
4243  static const PosixBracketEntryType PBS[] = {
4244  { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
4245  { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
4246  { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
4247  { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
4248  { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
4249  { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
4250  { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
4251  { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
4252  { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
4253  { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
4254  { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
4255  { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
4256  { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
4257  { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
4258  { (UChar* )NULL, -1, 0 }
4259  };
4260 
4261  const PosixBracketEntryType *pb;
4262  int not, i, r;
4263  OnigCodePoint c;
4264  OnigEncoding enc = env->enc;
4265  UChar *p = *src;
4266  PFETCH_READY;
4267 
4268  if (PPEEK_IS('^')) {
4269  PINC;
4270  not = 1;
4271  }
4272  else
4273  not = 0;
4274 
4275  if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
4276  goto not_posix_bracket;
4277 
4278  for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
4279  if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
4280  p = (UChar* )onigenc_step(enc, p, end, pb->len);
4281  if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
4283 
4284  r = add_ctype_to_cc(cc, pb->ctype, not,
4286  env);
4287  if (r != 0) return r;
4288 
4289  PINC; PINC;
4290  *src = p;
4291  return 0;
4292  }
4293  }
4294 
4295  not_posix_bracket:
4296  c = 0;
4297  i = 0;
4298  while (!PEND && ((c = PPEEK) != ':') && c != ']') {
4299  PINC;
4300  if (++i > POSIX_BRACKET_CHECK_LIMIT_LENGTH) break;
4301  }
4302  if (c == ':' && ! PEND) {
4303  PINC;
4304  if (! PEND) {
4305  PFETCH(c);
4306  if (c == ']')
4308  }
4309  }
4310 
4311  return 1; /* 1: is not POSIX bracket, but no error. */
4312 }
4313 
4314 static int
4316 {
4317  int r;
4318  OnigCodePoint c;
4319  OnigEncoding enc = env->enc;
4320  UChar *prev, *start, *p = *src;
4321  PFETCH_READY;
4322 
4323  r = 0;
4324  start = prev = p;
4325 
4326  while (!PEND) {
4327  prev = p;
4328  PFETCH(c);
4329  if (c == '}') {
4330  r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
4331  if (r < 0) break;
4332 
4333  *src = p;
4334  return r;
4335  }
4336  else if (c == '(' || c == ')' || c == '{' || c == '|') {
4338  break;
4339  }
4340  }
4341 
4342  onig_scan_env_set_error_string(env, r, *src, prev);
4343  return r;
4344 }
4345 
4346 static int
4348  ScanEnv* env)
4349 {
4350  int r, ctype;
4351  CClassNode* cc;
4352 
4353  ctype = fetch_char_property_to_ctype(src, end, env);
4354  if (ctype < 0) return ctype;
4355 
4356  *np = node_new_cclass();
4358  cc = NCCLASS(*np);
4359  r = add_ctype_to_cc(cc, ctype, 0, 1, env);
4360  if (r != 0) return r;
4361  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
4362 
4363  return 0;
4364 }
4365 
4366 
4367 enum CCSTATE {
4372 };
4373 
4378 };
4379 
4380 static int
4382  enum CCSTATE* state, ScanEnv* env)
4383 {
4384  int r;
4385 
4386  if (*state == CCS_RANGE)
4388 
4389  if (*state == CCS_VALUE && *type != CCV_CLASS) {
4390  if (*type == CCV_SB)
4391  BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4392  else if (*type == CCV_CODE_POINT) {
4393  r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4394  if (r < 0) return r;
4395  }
4396  }
4397 
4398  *state = CCS_VALUE;
4399  *type = CCV_CLASS;
4400  return 0;
4401 }
4402 
4403 static int
4405  int* vs_israw, int v_israw,
4406  enum CCVALTYPE intype, enum CCVALTYPE* type,
4407  enum CCSTATE* state, ScanEnv* env)
4408 {
4409  int r;
4410 
4411  switch (*state) {
4412  case CCS_VALUE:
4413  if (*type == CCV_SB)
4414  BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
4415  else if (*type == CCV_CODE_POINT) {
4416  r = add_code_range(&(cc->mbuf), env, *vs, *vs);
4417  if (r < 0) return r;
4418  }
4419  break;
4420 
4421  case CCS_RANGE:
4422  if (intype == *type) {
4423  if (intype == CCV_SB) {
4424  if (*vs > 0xff || v > 0xff)
4426 
4427  if (*vs > v) {
4429  goto ccs_range_end;
4430  else
4432  }
4433  bitset_set_range(env, cc->bs, (int )*vs, (int )v);
4434  }
4435  else {
4436  r = add_code_range(&(cc->mbuf), env, *vs, v);
4437  if (r < 0) return r;
4438  }
4439  }
4440  else {
4441 #if 0
4442  if (intype == CCV_CODE_POINT && *type == CCV_SB) {
4443 #endif
4444  if (*vs > v) {
4446  goto ccs_range_end;
4447  else
4449  }
4450  bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
4451  r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
4452  if (r < 0) return r;
4453 #if 0
4454  }
4455  else
4457 #endif
4458  }
4459  ccs_range_end:
4460  *state = CCS_COMPLETE;
4461  break;
4462 
4463  case CCS_COMPLETE:
4464  case CCS_START:
4465  *state = CCS_VALUE;
4466  break;
4467 
4468  default:
4469  break;
4470  }
4471 
4472  *vs_israw = v_israw;
4473  *vs = v;
4474  *type = intype;
4475  return 0;
4476 }
4477 
4478 static int
4479 code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
4480  ScanEnv* env)
4481 {
4482  int in_esc;
4484  OnigEncoding enc = env->enc;
4485  UChar* p = from;
4486  PFETCH_READY;
4487 
4488  in_esc = 0;
4489  while (! PEND) {
4490  if (ignore_escaped && in_esc) {
4491  in_esc = 0;
4492  }
4493  else {
4494  PFETCH(code);
4495  if (code == c) return 1;
4496  if (code == MC_ESC(env->syntax)) in_esc = 1;
4497  }
4498  }
4499  return 0;
4500 }
4501 
4502 static int
4504  ScanEnv* env)
4505 {
4506  int r, neg, len, fetched, and_start;
4507  OnigCodePoint v, vs;
4508  UChar *p;
4509  Node* node;
4510  CClassNode *cc, *prev_cc;
4511  CClassNode work_cc;
4512 
4513  enum CCSTATE state;
4514  enum CCVALTYPE val_type, in_type;
4515  int val_israw, in_israw;
4516 
4517  prev_cc = (CClassNode* )NULL;
4518  *np = NULL_NODE;
4519  r = fetch_token_in_cc(tok, src, end, env);
4520  if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
4521  neg = 1;
4522  r = fetch_token_in_cc(tok, src, end, env);
4523  }
4524  else {
4525  neg = 0;
4526  }
4527 
4528  if (r < 0) return r;
4529  if (r == TK_CC_CLOSE) {
4530  if (! code_exist_check((OnigCodePoint )']',
4531  *src, env->pattern_end, 1, env))
4532  return ONIGERR_EMPTY_CHAR_CLASS;
4533 
4534  CC_ESC_WARN(env, (UChar* )"]");
4535  r = tok->type = TK_CHAR; /* allow []...] */
4536  }
4537 
4538  *np = node = node_new_cclass();
4540  cc = NCCLASS(node);
4541 
4542  and_start = 0;
4543  state = CCS_START;
4544  p = *src;
4545  while (r != TK_CC_CLOSE) {
4546  fetched = 0;
4547  switch (r) {
4548  case TK_CHAR:
4549  if ((tok->u.code >= SINGLE_BYTE_SIZE) ||
4550  (len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c)) > 1) {
4551  in_type = CCV_CODE_POINT;
4552  }
4553  else if (len < 0) {
4554  r = len;
4555  goto err;
4556  }
4557  else {
4558  sb_char:
4559  in_type = CCV_SB;
4560  }
4561  v = (OnigCodePoint )tok->u.c;
4562  in_israw = 0;
4563  goto val_entry2;
4564  break;
4565 
4566  case TK_RAW_BYTE:
4567  /* tok->base != 0 : octal or hexadec. */
4568  if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
4570  UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
4571  UChar* psave = p;
4572  int i, base = tok->base;
4573 
4574  buf[0] = (UChar )tok->u.c;
4575  for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
4576  r = fetch_token_in_cc(tok, &p, end, env);
4577  if (r < 0) goto err;
4578  if (r != TK_RAW_BYTE || tok->base != base) {
4579  fetched = 1;
4580  break;
4581  }
4582  buf[i] = (UChar )tok->u.c;
4583  }
4584 
4585  if (i < ONIGENC_MBC_MINLEN(env->enc)) {
4587  goto err;
4588  }
4589 
4590  len = enclen(env->enc, buf, buf+i);
4591  if (i < len) {
4593  goto err;
4594  }
4595  else if (i > len) { /* fetch back */
4596  p = psave;
4597  for (i = 1; i < len; i++) {
4598  r = fetch_token_in_cc(tok, &p, end, env);
4599  }
4600  fetched = 0;
4601  }
4602 
4603  if (i == 1) {
4604  v = (OnigCodePoint )buf[0];
4605  goto raw_single;
4606  }
4607  else {
4608  v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
4609  in_type = CCV_CODE_POINT;
4610  }
4611  }
4612  else {
4613  v = (OnigCodePoint )tok->u.c;
4614  raw_single:
4615  in_type = CCV_SB;
4616  }
4617  in_israw = 1;
4618  goto val_entry2;
4619  break;
4620 
4621  case TK_CODE_POINT:
4622  v = tok->u.code;
4623  in_israw = 1;
4624  val_entry:
4625  len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
4626  if (len < 0) {
4627  r = len;
4628  goto err;
4629  }
4630  in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
4631  val_entry2:
4632  r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
4633  &state, env);
4634  if (r != 0) goto err;
4635  break;
4636 
4637  case TK_POSIX_BRACKET_OPEN:
4638  r = parse_posix_bracket(cc, &p, end, env);
4639  if (r < 0) goto err;
4640  if (r == 1) { /* is not POSIX bracket */
4641  CC_ESC_WARN(env, (UChar* )"[");
4642  p = tok->backp;
4643  v = (OnigCodePoint )tok->u.c;
4644  in_israw = 0;
4645  goto val_entry;
4646  }
4647  goto next_class;
4648  break;
4649 
4650  case TK_CHAR_TYPE:
4651  r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env);
4652  if (r != 0) return r;
4653 
4654  next_class:
4655  r = next_state_class(cc, &vs, &val_type, &state, env);
4656  if (r != 0) goto err;
4657  break;
4658 
4659  case TK_CHAR_PROPERTY:
4660  {
4661  int ctype;
4662 
4663  ctype = fetch_char_property_to_ctype(&p, end, env);
4664  if (ctype < 0) return ctype;
4665  r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env);
4666  if (r != 0) return r;
4667  goto next_class;
4668  }
4669  break;
4670 
4671  case TK_CC_RANGE:
4672  if (state == CCS_VALUE) {
4673  r = fetch_token_in_cc(tok, &p, end, env);
4674  if (r < 0) goto err;
4675  fetched = 1;
4676  if (r == TK_CC_CLOSE) { /* allow [x-] */
4677  range_end_val:
4678  v = (OnigCodePoint )'-';
4679  in_israw = 0;
4680  goto val_entry;
4681  }
4682  else if (r == TK_CC_AND) {
4683  CC_ESC_WARN(env, (UChar* )"-");
4684  goto range_end_val;
4685  }
4686  state = CCS_RANGE;
4687  }
4688  else if (state == CCS_START) {
4689  /* [-xa] is allowed */
4690  v = (OnigCodePoint )tok->u.c;
4691  in_israw = 0;
4692 
4693  r = fetch_token_in_cc(tok, &p, end, env);
4694  if (r < 0) goto err;
4695  fetched = 1;
4696  /* [--x] or [a&&-x] is warned. */
4697  if (r == TK_CC_RANGE || and_start != 0)
4698  CC_ESC_WARN(env, (UChar* )"-");
4699 
4700  goto val_entry;
4701  }
4702  else if (state == CCS_RANGE) {
4703  CC_ESC_WARN(env, (UChar* )"-");
4704  goto sb_char; /* [!--x] is allowed */
4705  }
4706  else { /* CCS_COMPLETE */
4707  r = fetch_token_in_cc(tok, &p, end, env);
4708  if (r < 0) goto err;
4709  fetched = 1;
4710  if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
4711  else if (r == TK_CC_AND) {
4712  CC_ESC_WARN(env, (UChar* )"-");
4713  goto range_end_val;
4714  }
4715 
4717  CC_ESC_WARN(env, (UChar* )"-");
4718  goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */
4719  }
4721  goto err;
4722  }
4723  break;
4724 
4725  case TK_CC_CC_OPEN: /* [ */
4726  {
4727  Node *anode;
4728  CClassNode* acc;
4729 
4730  r = parse_char_class(&anode, tok, &p, end, env);
4731  if (r == 0) {
4732  acc = NCCLASS(anode);
4733  r = or_cclass(cc, acc, env);
4734  }
4735  onig_node_free(anode);
4736  if (r != 0) goto err;
4737  }
4738  break;
4739 
4740  case TK_CC_AND: /* && */
4741  {
4742  if (state == CCS_VALUE) {
4743  r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
4744  &val_type, &state, env);
4745  if (r != 0) goto err;
4746  }
4747  /* initialize local variables */
4748  and_start = 1;
4749  state = CCS_START;
4750 
4751  if (IS_NOT_NULL(prev_cc)) {
4752  r = and_cclass(prev_cc, cc, env);
4753  if (r != 0) goto err;
4754  bbuf_free(cc->mbuf);
4755  }
4756  else {
4757  prev_cc = cc;
4758  cc = &work_cc;
4759  }
4760  initialize_cclass(cc);
4761  }
4762  break;
4763 
4764  case TK_EOT:
4766  goto err;
4767  break;
4768  default:
4769  r = ONIGERR_PARSER_BUG;
4770  goto err;
4771  break;
4772  }
4773 
4774  if (fetched)
4775  r = tok->type;
4776  else {
4777  r = fetch_token_in_cc(tok, &p, end, env);
4778  if (r < 0) goto err;
4779  }
4780  }
4781 
4782  if (state == CCS_VALUE) {
4783  r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
4784  &val_type, &state, env);
4785  if (r != 0) goto err;
4786  }
4787 
4788  if (IS_NOT_NULL(prev_cc)) {
4789  r = and_cclass(prev_cc, cc, env);
4790  if (r != 0) goto err;
4791  bbuf_free(cc->mbuf);
4792  cc = prev_cc;
4793  }
4794 
4795  if (neg != 0)
4796  NCCLASS_SET_NOT(cc);
4797  else
4798  NCCLASS_CLEAR_NOT(cc);
4799  if (IS_NCCLASS_NOT(cc) &&
4801  int is_empty;
4802 
4803  is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
4804  if (is_empty != 0)
4805  BITSET_IS_EMPTY(cc->bs, is_empty);
4806 
4807  if (is_empty == 0) {
4808 #define NEWLINE_CODE 0x0a
4809 
4811  if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
4813  else {
4814  r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
4815  if (r < 0) goto err;
4816  }
4817  }
4818  }
4819  }
4820  *src = p;
4821  return 0;
4822 
4823  err:
4824  if (cc != NCCLASS(*np))
4825  bbuf_free(cc->mbuf);
4826  return r;
4827 }
4828 
4829 static int parse_subexp(Node** top, OnigToken* tok, int term,
4830  UChar** src, UChar* end, ScanEnv* env);
4831 
4832 static int
4833 parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
4834  ScanEnv* env)
4835 {
4836  int r = 0, num;
4837  Node *target, *work1 = NULL, *work2 = NULL;
4838  OnigOptionType option;
4839  OnigCodePoint c;
4840  OnigEncoding enc = env->enc;
4841 
4842 #ifdef USE_NAMED_GROUP
4843  int list_capture;
4844 #endif
4845 
4846  UChar* p = *src;
4847  PFETCH_READY;
4848 
4849  *np = NULL;
4851 
4852  option = env->option;
4853  if (PPEEK_IS('?') &&
4855  PINC;
4856  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
4857 
4858  PFETCH(c);
4859  switch (c) {
4860  case ':': /* (?:...) grouping only */
4861  group:
4862  r = fetch_token(tok, &p, end, env);
4863  if (r < 0) return r;
4864  r = parse_subexp(np, tok, term, &p, end, env);
4865  if (r < 0) return r;
4866  *src = p;
4867  return 1; /* group */
4868  break;
4869 
4870  case '=':
4872  break;
4873  case '!': /* preceding read */
4875  break;
4876  case '>': /* (?>...) stop backtrack */
4878  break;
4879 
4880 #ifdef USE_NAMED_GROUP
4881  case '\'':
4883  goto named_group1;
4884  }
4885  else
4887  break;
4888 
4889 #ifdef USE_CAPITAL_P_NAMED_GROUP
4890  case 'P': /* (?P<name>...) */
4892  PFETCH(c);
4893  if (c == '<') goto named_group1;
4894  }
4896  break;
4897 #endif
4898 #endif
4899 
4900  case '<': /* look behind (?<=...), (?<!...) */
4901  PFETCH(c);
4902  if (c == '=')
4904  else if (c == '!')
4906 #ifdef USE_NAMED_GROUP
4907  else { /* (?<name>...) */
4909  UChar *name;
4910  UChar *name_end;
4911 
4912  PUNFETCH;
4913  c = '<';
4914 
4915  named_group1:
4916  list_capture = 0;
4917 
4918  named_group2:
4919  name = p;
4920  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
4921  if (r < 0) return r;
4922 
4923  num = scan_env_add_mem_entry(env);
4924  if (num < 0) return num;
4925  if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
4927 
4928  r = name_add(env->reg, name, name_end, num, env);
4929  if (r != 0) return r;
4930  *np = node_new_enclose_memory(env->option, 1);
4932  NENCLOSE(*np)->regnum = num;
4933  if (list_capture != 0)
4935  env->num_named++;
4936  }
4937  else {
4939  }
4940  }
4941 #else
4942  else {
4944  }
4945 #endif
4946  break;
4947 
4948  case '@':
4950 #ifdef USE_NAMED_GROUP
4952  PFETCH(c);
4953  if (c == '<' || c == '\'') {
4954  list_capture = 1;
4955  goto named_group2; /* (?@<name>...) */
4956  }
4957  PUNFETCH;
4958  }
4959 #endif
4960  *np = node_new_enclose_memory(env->option, 0);
4962  num = scan_env_add_mem_entry(env);
4963  if (num < 0) {
4964  onig_node_free(*np);
4965  return num;
4966  }
4967  else if (num >= (int )BIT_STATUS_BITS_NUM) {
4968  onig_node_free(*np);
4970  }
4971  NENCLOSE(*np)->regnum = num;
4973  }
4974  else {
4976  }
4977  break;
4978 
4979  case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
4981  UChar *name = NULL;
4982  UChar *name_end;
4983  PFETCH(c);
4984  if (ONIGENC_IS_CODE_DIGIT(enc, c)) { /* (n) */
4985  PUNFETCH;
4986  r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &num, 1);
4987  if (r < 0) return r;
4988  if (num < 0) {
4989  num = BACKREF_REL_TO_ABS(num, env);
4990  if (num <= 0)
4991  return ONIGERR_INVALID_BACKREF;
4992  }
4994  if (num > env->num_mem ||
4995  IS_NULL(SCANENV_MEM_NODES(env)[num]))
4996  return ONIGERR_INVALID_BACKREF;
4997  }
4998  }
4999 #ifdef USE_NAMED_GROUP
5000  else if (c == '<' || c == '\'') { /* (<name>), ('name') */
5001  int nums;
5002  int *backs;
5003 
5004  name = p;
5005  r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
5006  if (r < 0) return r;
5007  PFETCH(c);
5008  if (c != ')') return ONIGERR_UNDEFINED_GROUP_OPTION;
5009 
5010  nums = onig_name_to_group_numbers(env->reg, name, name_end, &backs);
5011  if (nums <= 0) {
5013  ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
5015  }
5017  int i;
5018  for (i = 0; i < nums; i++) {
5019  if (backs[i] > env->num_mem ||
5020  IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
5021  return ONIGERR_INVALID_BACKREF;
5022  }
5023  }
5024  num = backs[0]; /* XXX: use left most named group as Perl */
5025  }
5026 #endif
5027  else
5031  NENCLOSE(*np)->regnum = num;
5032  if (IS_NOT_NULL(name)) NENCLOSE(*np)->state |= NST_NAME_REF;
5033  }
5034  else
5036  break;
5037 
5038 #if 0
5039  case '|': /* branch reset: (?|...) */
5041  /* TODO */
5042  }
5043  else
5045  break;
5046 #endif
5047 
5048  case '^': /* loads default options */
5050  /* d-imsx */
5051  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5052  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5053  ONOFF(option, ONIG_OPTION_SINGLELINE, 0);
5054  ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5055  ONOFF(option, ONIG_OPTION_EXTEND, 1);
5056  PFETCH(c);
5057  }
5058 #if 0
5059  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5060  /* d-imx */
5061  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5064  ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
5065  ONOFF(option, ONIG_OPTION_MULTILINE, 1);
5066  ONOFF(option, ONIG_OPTION_EXTEND, 1);
5067  PFETCH(c);
5068  }
5069 #endif
5070  else {
5072  }
5073  /* fall through */
5074 #ifdef USE_POSIXLINE_OPTION
5075  case 'p':
5076 #endif
5077  case '-': case 'i': case 'm': case 's': case 'x':
5078  case 'a': case 'd': case 'l': case 'u':
5079  {
5080  int neg = 0;
5081 
5082  while (1) {
5083  switch (c) {
5084  case ':':
5085  case ')':
5086  break;
5087 
5088  case '-': neg = 1; break;
5089  case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break;
5090  case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break;
5091  case 's':
5093  ONOFF(option, ONIG_OPTION_MULTILINE, neg);
5094  }
5095  else
5097  break;
5098 
5099  case 'm':
5101  ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
5102  }
5103  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
5104  ONOFF(option, ONIG_OPTION_MULTILINE, neg);
5105  }
5106  else
5108  break;
5109 #ifdef USE_POSIXLINE_OPTION
5110  case 'p':
5112  break;
5113 #endif
5114 
5115  case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
5118  (neg == 0)) {
5119  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5122  }
5123  else
5125  break;
5126 
5127  case 'u':
5130  (neg == 0)) {
5131  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5134  }
5135  else
5137  break;
5138 
5139  case 'd':
5141  (neg == 0)) {
5142  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5143  }
5144  else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
5145  (neg == 0)) {
5146  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
5149  }
5150  else
5152  break;
5153 
5154  case 'l':
5155  if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && (neg == 0)) {
5156  ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
5157  }
5158  else
5160  break;
5161 
5162  default:
5164  }
5165 
5166  if (c == ')') {
5167  *np = node_new_option(option);
5169  *src = p;
5170  return 2; /* option only */
5171  }
5172  else if (c == ':') {
5173  OnigOptionType prev = env->option;
5174 
5175  env->option = option;
5176  r = fetch_token(tok, &p, end, env);
5177  if (r < 0) return r;
5178  r = parse_subexp(&target, tok, term, &p, end, env);
5179  env->option = prev;
5180  if (r < 0) return r;
5181  *np = node_new_option(option);
5183  NENCLOSE(*np)->target = target;
5184  *src = p;
5185  return 0;
5186  }
5187 
5188  if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
5189  PFETCH(c);
5190  }
5191  }
5192  break;
5193 
5194  default:
5196  }
5197  }
5198  else {
5200  goto group;
5201 
5202  *np = node_new_enclose_memory(env->option, 0);
5204  num = scan_env_add_mem_entry(env);
5205  if (num < 0) return num;
5206  NENCLOSE(*np)->regnum = num;
5207  }
5208 
5210  r = fetch_token(tok, &p, end, env);
5211  if (r < 0) return r;
5212  r = parse_subexp(&target, tok, term, &p, end, env);
5213  if (r < 0) {
5214  onig_node_free(target);
5215  return r;
5216  }
5217 
5218  if (NTYPE(*np) == NT_ANCHOR)
5219  NANCHOR(*np)->target = target;
5220  else {
5221  NENCLOSE(*np)->target = target;
5222  if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
5223  /* Don't move this to previous of parse_subexp() */
5224  r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
5225  if (r != 0) return r;
5226  }
5227  else if (NENCLOSE(*np)->type == ENCLOSE_CONDITION) {
5228  if (NTYPE(target) != NT_ALT) {
5229  /* convert (?(cond)yes) to (?(cond)yes|empty) */
5230  work1 = node_new_empty();
5231  if (IS_NULL(work1)) goto err;
5232  work2 = onig_node_new_alt(work1, NULL_NODE);
5233  if (IS_NULL(work2)) goto err;
5234  work1 = onig_node_new_alt(target, work2);
5235  if (IS_NULL(work1)) goto err;
5236  NENCLOSE(*np)->target = work1;
5237  }
5238  }
5239  }
5240 
5241  *src = p;
5242  return 0;
5243 
5244  err:
5245  onig_node_free(work1);
5246  onig_node_free(work2);
5247  onig_node_free(*np);
5248  *np = NULL;
5249  return ONIGERR_MEMORY;
5250 }
5251 
5252 static const char* const PopularQStr[] = {
5253  "?", "*", "+", "??", "*?", "+?"
5254 };
5255 
5256 static const char* const ReduceQStr[] = {
5257  "", "", "*", "*?", "??", "+ and ??", "+? and ?"
5258 };
5259 
5260 static int
5261 set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
5262 {
5263  QtfrNode* qn;
5264 
5265  qn = NQTFR(qnode);
5266  if (qn->lower == 1 && qn->upper == 1) {
5267  return 1;
5268  }
5269 
5270  switch (NTYPE(target)) {
5271  case NT_STR:
5272  if (! group) {
5273  StrNode* sn = NSTR(target);
5274  if (str_node_can_be_split(sn, env->enc)) {
5275  Node* n = str_node_split_last_char(sn, env->enc);
5276  if (IS_NOT_NULL(n)) {
5277  qn->target = n;
5278  return 2;
5279  }
5280  }
5281  }
5282  break;
5283 
5284  case NT_QTFR:
5285  { /* check redundant double repeat. */
5286  /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
5287  QtfrNode* qnt = NQTFR(target);
5288  int nestq_num = popular_quantifier_num(qn);
5289  int targetq_num = popular_quantifier_num(qnt);
5290 
5291 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
5294  UChar buf[WARN_BUFSIZE];
5295 
5296  switch (ReduceTypeTable[targetq_num][nestq_num]) {
5297  case RQ_ASIS:
5298  break;
5299 
5300  case RQ_DEL:
5301  if (onig_verb_warn != onig_null_warn) {
5302  onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5303  env->pattern, env->pattern_end,
5304  (UChar* )"redundant nested repeat operator");
5305  (*onig_verb_warn)((char* )buf);
5306  }
5307  goto warn_exit;
5308  break;
5309 
5310  default:
5311  if (onig_verb_warn != onig_null_warn) {
5312  onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc,
5313  env->pattern, env->pattern_end,
5314  (UChar* )"nested repeat operator %s and %s was replaced with '%s'",
5315  PopularQStr[targetq_num], PopularQStr[nestq_num],
5316  ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
5317  (*onig_verb_warn)((char* )buf);
5318  }
5319  goto warn_exit;
5320  break;
5321  }
5322  }
5323 
5324  warn_exit:
5325 #endif
5326  if (targetq_num >= 0) {
5327  if (nestq_num >= 0) {
5328  onig_reduce_nested_quantifier(qnode, target);
5329  goto q_exit;
5330  }
5331  else if (targetq_num == 1 || targetq_num == 2) { /* * or + */
5332  /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
5333  if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
5334  qn->upper = (qn->lower == 0 ? 1 : qn->lower);
5335  }
5336  }
5337  }
5338  }
5339  break;
5340 
5341  default:
5342  break;
5343  }
5344 
5345  qn->target = target;
5346  q_exit:
5347  return 0;
5348 }
5349 
5350 
5351 #ifdef USE_SHARED_CCLASS_TABLE
5352 
5353 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
5354 
5355 /* for ctype node hash table */
5356 
5357 typedef struct {
5359  int not;
5360  int type;
5361 } type_cclass_key;
5362 
5364 {
5365  if (x->type != y->type) return 1;
5366  if (x->enc != y->enc) return 1;
5367  if (x->not != y->not) return 1;
5368  return 0;
5369 }
5370 
5372 {
5373  int i, val;
5374  UChar *p;
5375 
5376  val = 0;
5377 
5378  p = (UChar* )&(key->enc);
5379  for (i = 0; i < (int )sizeof(key->enc); i++) {
5380  val = val * 997 + (int )*p++;
5381  }
5382 
5383  p = (UChar* )(&key->type);
5384  for (i = 0; i < (int )sizeof(key->type); i++) {
5385  val = val * 997 + (int )*p++;
5386  }
5387 
5388  val += key->not;
5389  return val + (val >> 5);
5390 }
5391 
5392 static const struct st_hash_type type_type_cclass_hash = {
5395 };
5396 
5398 
5399 
5400 static int
5402 {
5403  if (IS_NOT_NULL(node)) {
5404  CClassNode* cc = NCCLASS(node);
5405  if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
5406  xfree(node);
5407  }
5408 
5409  if (IS_NOT_NULL(key)) xfree(key);
5410  return ST_DELETE;
5411 }
5412 
5413 extern int
5415 {
5417  if (IS_NOT_NULL(OnigTypeCClassTable)) {
5418  onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
5419  onig_st_free_table(OnigTypeCClassTable);
5420  OnigTypeCClassTable = NULL;
5421  }
5423 
5424  return 0;
5425 }
5426 
5427 #endif /* USE_SHARED_CCLASS_TABLE */
5428 
5429 
5430 #ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5431 static int
5432 clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
5433 {
5434  BBuf *tbuf;
5435  int r;
5436 
5437  if (IS_NCCLASS_NOT(cc)) {
5438  bitset_invert(cc->bs);
5439 
5440  if (! ONIGENC_IS_SINGLEBYTE(enc)) {
5441  r = not_code_range_buf(enc, cc->mbuf, &tbuf);
5442  if (r != 0) return r;
5443 
5444  bbuf_free(cc->mbuf);
5445  cc->mbuf = tbuf;
5446  }
5447 
5448  NCCLASS_CLEAR_NOT(cc);
5449  }
5450 
5451  return 0;
5452 }
5453 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5454 
5455 typedef struct {
5461 
5462 static int
5464  int to_len, void* arg)
5465 {
5466  IApplyCaseFoldArg* iarg;
5467  ScanEnv* env;
5468  CClassNode* cc;
5469  BitSetRef bs;
5470 
5471  iarg = (IApplyCaseFoldArg* )arg;
5472  env = iarg->env;
5473  cc = iarg->cc;
5474  bs = cc->bs;
5475 
5476  if (to_len == 1) {
5477  int is_in = onig_is_code_in_cc(env->enc, from, cc);
5478 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
5479  if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
5480  (is_in == 0 && IS_NCCLASS_NOT(cc))) {
5481  if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5482  add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5483  }
5484  else {
5485  BITSET_SET_BIT(bs, *to);
5486  }
5487  }
5488 #else
5489  if (is_in != 0) {
5490  if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
5491  if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
5492  add_code_range0(&(cc->mbuf), env, *to, *to, 0);
5493  }
5494  else {
5495  if (IS_NCCLASS_NOT(cc)) {
5496  BITSET_CLEAR_BIT(bs, *to);
5497  }
5498  else
5499  BITSET_SET_BIT(bs, *to);
5500  }
5501  }
5502 #endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
5503  }
5504  else {
5505  int r, i, len;
5507  Node *snode = NULL_NODE;
5508 
5509  if (onig_is_code_in_cc(env->enc, from, cc)
5511  && !IS_NCCLASS_NOT(cc)
5512 #endif
5513  ) {
5514  for (i = 0; i < to_len; i++) {
5515  len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
5516  if (i == 0) {
5517  snode = onig_node_new_str(buf, buf + len);
5518  CHECK_NULL_RETURN_MEMERR(snode);
5519 
5520  /* char-class expanded multi-char only
5521  compare with string folded at match time. */
5522  NSTRING_SET_AMBIG(snode);
5523  }
5524  else {
5525  r = onig_node_str_cat(snode, buf, buf + len);
5526  if (r < 0) {
5527  onig_node_free(snode);
5528  return r;
5529  }
5530  }
5531  }
5532 
5533  *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
5534  CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
5535  iarg->ptail = &(NCDR((*(iarg->ptail))));
5536  }
5537  }
5538 
5539  return 0;
5540 }
5541 
5542 static int
5544 {
5545  /* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
5546  Node* left = NULL;
5547  Node* right = NULL;
5548  Node* target1 = NULL;
5549  Node* target2 = NULL;
5550  CClassNode* cc;
5551  int num1, num2;
5553 
5554  /* \x0D\x0A */
5555  num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
5556  if (num1 < 0) return num1;
5557  num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
5558  if (num2 < 0) return num2;
5559  left = node_new_str_raw(buf, buf + num1 + num2);
5560  if (IS_NULL(left)) goto err;
5561 
5562  /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */
5563  right = node_new_cclass();
5564  if (IS_NULL(right)) goto err;
5565  cc = NCCLASS(right);
5566  if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
5567  add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
5568  }
5569  else {
5570  bitset_set_range(env, cc->bs, 0x0A, 0x0D);
5571  }
5572 
5573  /* TODO: move this block to enc/unicode.c */
5574  if (ONIGENC_IS_UNICODE(env->enc)) {
5575  /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5576  add_code_range(&(cc->mbuf), env, 0x85, 0x85);
5577  add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
5578  }
5579 
5580  /* ...|... */
5581  target1 = onig_node_new_alt(right, NULL_NODE);
5582  if (IS_NULL(target1)) goto err;
5583  right = NULL;
5584  target2 = onig_node_new_alt(left, target1);
5585  if (IS_NULL(target2)) goto err;
5586  left = NULL;
5587  target1 = NULL;
5588 
5589  /* (?>...) */
5591  if (IS_NULL(*np)) goto err;
5592  NENCLOSE(*np)->target = target2;
5593  return ONIG_NORMAL;
5594 
5595  err:
5596  onig_node_free(left);
5597  onig_node_free(right);
5598  onig_node_free(target1);
5599  onig_node_free(target2);
5600  return ONIGERR_MEMORY;
5601 }
5602 
5603 static int
5605 {
5606  /* same as (?>\P{M}\p{M}*) */
5607  Node* np1 = NULL;
5608  Node* np2 = NULL;
5609  Node* qn = NULL;
5610  Node* list1 = NULL;
5611  Node* list2 = NULL;
5612  int r = 0;
5613 
5614 #ifdef USE_UNICODE_PROPERTIES
5615  if (ONIGENC_IS_UNICODE(env->enc)) {
5616  /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
5617  CClassNode* cc1;
5618  CClassNode* cc2;
5619  UChar* propname = (UChar* )"M";
5621  propname, propname + 1);
5622  if (ctype >= 0) {
5623  /* \P{M} */
5624  np1 = node_new_cclass();
5625  if (IS_NULL(np1)) goto err;
5626  cc1 = NCCLASS(np1);
5627  r = add_ctype_to_cc(cc1, ctype, 0, 1, env);
5628  if (r != 0) goto err;
5629  NCCLASS_SET_NOT(cc1);
5630 
5631  /* \p{M}* */
5632  np2 = node_new_cclass();
5633  if (IS_NULL(np2)) goto err;
5634  cc2 = NCCLASS(np2);
5635  r = add_ctype_to_cc(cc2, ctype, 0, 1, env);
5636  if (r != 0) goto err;
5637 
5639  if (IS_NULL(qn)) goto err;
5640  NQTFR(qn)->target = np2;
5641  np2 = NULL;
5642 
5643  /* \P{M}\p{M}* */
5644  list2 = node_new_list(qn, NULL_NODE);
5645  if (IS_NULL(list2)) goto err;
5646  qn = NULL;
5647  list1 = node_new_list(np1, list2);
5648  if (IS_NULL(list1)) goto err;
5649  np1 = NULL;
5650  list2 = NULL;
5651 
5652  /* (?>...) */
5654  if (IS_NULL(*np)) goto err;
5655  NENCLOSE(*np)->target = list1;
5656  return ONIG_NORMAL;
5657  }
5658  }
5659 #endif /* USE_UNICODE_PROPERTIES */
5660  if (IS_NULL(*np)) {
5661  /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */
5662  OnigOptionType option;
5663  np1 = node_new_anychar();
5664  if (IS_NULL(np1)) goto err;
5665 
5666  option = env->option;
5667  ONOFF(option, ONIG_OPTION_MULTILINE, 0);
5668  *np = node_new_option(option);
5669  if (IS_NULL(*np)) goto err;
5670  NENCLOSE(*np)->target = np1;
5671  }
5672  return ONIG_NORMAL;
5673 
5674  err:
5675  onig_node_free(np1);
5676  onig_node_free(np2);
5677  onig_node_free(qn);
5678  onig_node_free(list1);
5679  onig_node_free(list2);
5680  return (r == 0) ? ONIGERR_MEMORY : r;
5681 }
5682 
5683 static int
5684 countbits(unsigned int bits)
5685 {
5686  bits = (bits & 0x55555555) + ((bits >> 1) & 0x55555555);
5687  bits = (bits & 0x33333333) + ((bits >> 2) & 0x33333333);
5688  bits = (bits & 0x0f0f0f0f) + ((bits >> 4) & 0x0f0f0f0f);
5689  bits = (bits & 0x00ff00ff) + ((bits >> 8) & 0x00ff00ff);
5690  return (bits & 0x0000ffff) + ((bits >>16) & 0x0000ffff);
5691 }
5692 
5693 static int
5695 {
5696  const OnigCodePoint not_found = ONIG_LAST_CODE_POINT;
5697  OnigCodePoint c = not_found;
5698  int i;
5699  BBuf *bbuf = cc->mbuf;
5700 
5701  if (IS_NCCLASS_NOT(cc)) return 0;
5702 
5703  /* check bbuf */
5704  if (IS_NOT_NULL(bbuf)) {
5705  OnigCodePoint n, *data;
5706  GET_CODE_POINT(n, bbuf->p);
5707  data = (OnigCodePoint* )(bbuf->p) + 1;
5708  if ((n == 1) && (data[0] == data[1])) {
5709  /* only one char found in the bbuf, save the code point. */
5710  c = data[0];
5711  if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
5712  /* skip if c is included in the bitset */
5713  c = not_found;
5714  }
5715  }
5716  else {
5717  return 0; /* the bbuf contains multiple chars */
5718  }
5719  }
5720 
5721  /* check bitset */
5722  for (i = 0; i < BITSET_SIZE; i++) {
5723  Bits b1 = cc->bs[i];
5724  if (b1 != 0) {
5725  if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
5726  c = BITS_IN_ROOM * i + countbits(b1 - 1);
5727  } else {
5728  return 0; /* the character class contains multiple chars */
5729  }
5730  }
5731  }
5732 
5733  if (c != not_found) {
5734  *code = c;
5735  return 1;
5736  }
5737 
5738  /* the character class contains no char. */
5739  return 0;
5740 }
5741 
5742 
5743 static int
5744 parse_exp(Node** np, OnigToken* tok, int term,
5745  UChar** src, UChar* end, ScanEnv* env)
5746 {
5747  int r, len, group = 0;
5748  Node* qn;
5749  Node** targetp;
5750 
5751  *np = NULL;
5752  if (tok->type == (enum TokenSyms )term)
5753  goto end_of_token;
5754 
5755  switch (tok->type) {
5756  case TK_ALT:
5757  case TK_EOT:
5758  end_of_token:
5759  *np = node_new_empty();
5760  return tok->type;
5761  break;
5762 
5763  case TK_SUBEXP_OPEN:
5764  r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
5765  if (r < 0) return r;
5766  if (r == 1) group = 1;
5767  else if (r == 2) { /* option only */
5768  Node* target;
5769  OnigOptionType prev = env->option;
5770 
5771  env->option = NENCLOSE(*np)->option;
5772  r = fetch_token(tok, src, end, env);
5773  if (r < 0) return r;
5774  r = parse_subexp(&target, tok, term, src, end, env);
5775  env->option = prev;
5776  if (r < 0) {
5777  onig_node_free(target);
5778  return r;
5779  }
5780  NENCLOSE(*np)->target = target;
5781  return tok->type;
5782  }
5783  break;
5784 
5785  case TK_SUBEXP_CLOSE:
5788 
5789  if (tok->escaped) goto tk_raw_byte;
5790  else goto tk_byte;
5791  break;
5792 
5793  case TK_LINEBREAK:
5794  r = node_linebreak(np, env);
5795  if (r < 0) return r;
5796  break;
5797 
5799  r = node_extended_grapheme_cluster(np, env);
5800  if (r < 0) return r;
5801  break;
5802 
5803  case TK_KEEP:
5806  break;
5807 
5808  case TK_STRING:
5809  tk_byte:
5810  {
5811  *np = node_new_str(tok->backp, *src);
5813 
5814  string_loop:
5815  while (1) {
5816  r = fetch_token(tok, src, end, env);
5817  if (r < 0) return r;
5818  if (r == TK_STRING) {
5819  r = onig_node_str_cat(*np, tok->backp, *src);
5820  }
5821 #ifndef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
5822  else if (r == TK_CODE_POINT) {
5823  r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
5824  }
5825 #endif
5826  else {
5827  break;
5828  }
5829  if (r < 0) return r;
5830  }
5831 
5832  string_end:
5833  targetp = np;
5834  goto repeat;
5835  }
5836  break;
5837 
5838  case TK_RAW_BYTE:
5839  tk_raw_byte:
5840  {
5841  *np = node_new_str_raw_char((UChar )tok->u.c);
5843  len = 1;
5844  while (1) {
5845  if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
5846  if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
5847  r = fetch_token(tok, src, end, env);
5848  NSTRING_CLEAR_RAW(*np);
5849  goto string_end;
5850  }
5851  }
5852 
5853  r = fetch_token(tok, src, end, env);
5854  if (r < 0) return r;
5855  if (r != TK_RAW_BYTE) {
5856  /* Don't use this, it is wrong for little endian encodings. */
5857 #ifdef USE_PAD_TO_SHORT_BYTE_CHAR
5858  int rem;
5859  if (len < ONIGENC_MBC_MINLEN(env->enc)) {
5860  rem = ONIGENC_MBC_MINLEN(env->enc) - len;
5861  (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
5862  if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
5863  NSTRING_CLEAR_RAW(*np);
5864  goto string_end;
5865  }
5866  }
5867 #endif
5869  }
5870 
5871  r = node_str_cat_char(*np, (UChar )tok->u.c);
5872  if (r < 0) return r;
5873 
5874  len++;
5875  }
5876  }
5877  break;
5878 
5879  case TK_CODE_POINT:
5880  {
5881  *np = node_new_empty();
5883  r = node_str_cat_codepoint(*np, env->enc, tok->u.code);
5884  if (r != 0) return r;
5885 #ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
5886  NSTRING_SET_RAW(*np);
5887 #else
5888  goto string_loop;
5889 #endif
5890  }
5891  break;
5892 
5893  case TK_QUOTE_OPEN:
5894  {
5895  OnigCodePoint end_op[2];
5896  UChar *qstart, *qend, *nextp;
5897 
5898  end_op[0] = (OnigCodePoint )MC_ESC(env->syntax);
5899  end_op[1] = (OnigCodePoint )'E';
5900  qstart = *src;
5901  qend = find_str_position(end_op, 2, qstart, end, &nextp, env->enc);
5902  if (IS_NULL(qend)) {
5903  nextp = qend = end;
5904  }
5905  *np = node_new_str(qstart, qend);
5907  *src = nextp;
5908  }
5909  break;
5910 
5911  case TK_CHAR_TYPE:
5912  {
5913  switch (tok->u.prop.ctype) {
5914  case ONIGENC_CTYPE_WORD:
5915  *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not,
5916  IS_ASCII_RANGE(env->option));
5918  break;
5919 
5920  case ONIGENC_CTYPE_SPACE:
5921  case ONIGENC_CTYPE_DIGIT:
5922  case ONIGENC_CTYPE_XDIGIT:
5923  {
5924  CClassNode* cc;
5925 
5926 #ifdef USE_SHARED_CCLASS_TABLE
5927  const OnigCodePoint *mbr;
5928  OnigCodePoint sb_out;
5929 
5930  r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
5931  &sb_out, &mbr);
5932  if (r == 0 &&
5933  ! IS_ASCII_RANGE(env->option) &&
5937  type_cclass_key* new_key;
5938 
5939  key.enc = env->enc;
5940  key.not = tok->u.prop.not;
5941  key.type = tok->u.prop.ctype;
5942 
5944 
5945  if (IS_NULL(OnigTypeCClassTable)) {
5946  OnigTypeCClassTable
5947  = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
5948  if (IS_NULL(OnigTypeCClassTable)) {
5950  return ONIGERR_MEMORY;
5951  }
5952  }
5953  else {
5954  if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
5955  (st_data_t* )np)) {
5957  break;
5958  }
5959  }
5960 
5961  *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
5962  sb_out, mbr);
5963  if (IS_NULL(*np)) {
5965  return ONIGERR_MEMORY;
5966  }
5967 
5968  cc = NCCLASS(*np);
5969  NCCLASS_SET_SHARE(cc);
5970  new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
5971  xmemcpy(new_key, &key, sizeof(type_cclass_key));
5972  onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
5973  (st_data_t )*np);
5974 
5976  }
5977  else {
5978 #endif
5979  *np = node_new_cclass();
5981  cc = NCCLASS(*np);
5982  r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env);
5983  if (r != 0) return r;
5984  if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
5985 #ifdef USE_SHARED_CCLASS_TABLE
5986  }
5987 #endif
5988  }
5989  break;
5990 
5991  default:
5992  return ONIGERR_PARSER_BUG;
5993  break;
5994  }
5995  }
5996  break;
5997 
5998  case TK_CHAR_PROPERTY:
5999  r = parse_char_property(np, tok, src, end, env);
6000  if (r != 0) return r;
6001  break;
6002 
6003  case TK_CC_OPEN:
6004  {
6005  CClassNode* cc;
6007 
6008  r = parse_char_class(np, tok, src, end, env);
6009  if (r != 0) return r;
6010 
6011  cc = NCCLASS(*np);
6012  if (is_onechar_cclass(cc, &code)) {
6013  onig_node_free(*np);
6014  *np = node_new_empty();
6016  r = node_str_cat_codepoint(*np, env->enc, code);
6017  if (r != 0) return r;
6018  goto string_loop;
6019  }
6020  if (IS_IGNORECASE(env->option)) {
6021  IApplyCaseFoldArg iarg;
6022 
6023  iarg.env = env;
6024  iarg.cc = cc;
6025  iarg.alt_root = NULL_NODE;
6026  iarg.ptail = &(iarg.alt_root);
6027 
6029  i_apply_case_fold, &iarg);
6030  if (r != 0) {
6031  onig_node_free(iarg.alt_root);
6032  return r;
6033  }
6034  if (IS_NOT_NULL(iarg.alt_root)) {
6035  Node* work = onig_node_new_alt(*np, iarg.alt_root);
6036  if (IS_NULL(work)) {
6037  onig_node_free(iarg.alt_root);
6038  return ONIGERR_MEMORY;
6039  }
6040  *np = work;
6041  }
6042  }
6043  }
6044  break;
6045 
6046  case TK_ANYCHAR:
6047  *np = node_new_anychar();
6049  break;
6050 
6051  case TK_ANYCHAR_ANYTIME:
6052  *np = node_new_anychar();
6056  NQTFR(qn)->target = *np;
6057  *np = qn;
6058  break;
6059 
6060  case TK_BACKREF:
6061  len = tok->u.backref.num;
6062  *np = node_new_backref(len,
6063  (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
6064  tok->u.backref.by_name,
6066  tok->u.backref.exist_level,
6067  tok->u.backref.level,
6068 #endif
6069  env);
6071  break;
6072 
6073 #ifdef USE_SUBEXP_CALL
6074  case TK_CALL:
6075  {
6076  int gnum = tok->u.call.gnum;
6077 
6078  if (gnum < 0 || tok->u.call.rel != 0) {
6079  if (gnum > 0) gnum--;
6080  gnum = BACKREF_REL_TO_ABS(gnum, env);
6081  if (gnum <= 0)
6082  return ONIGERR_INVALID_BACKREF;
6083  }
6084  *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
6086  env->num_call++;
6087  }
6088  break;
6089 #endif
6090 
6091  case TK_ANCHOR:
6092  *np = onig_node_new_anchor(tok->u.anchor.subtype);
6094  NANCHOR(*np)->ascii_range = tok->u.anchor.ascii_range;
6095  break;
6096 
6097  case TK_OP_REPEAT:
6098  case TK_INTERVAL:
6102  else
6103  *np = node_new_empty();
6104  }
6105  else {
6106  goto tk_byte;
6107  }
6108  break;
6109 
6110  default:
6111  return ONIGERR_PARSER_BUG;
6112  break;
6113  }
6114 
6115  {
6116  targetp = np;
6117 
6118  re_entry:
6119  r = fetch_token(tok, src, end, env);
6120  if (r < 0) return r;
6121 
6122  repeat:
6123  if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
6124  if (is_invalid_quantifier_target(*targetp))
6126 
6127  qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
6128  (r == TK_INTERVAL ? 1 : 0));
6130  NQTFR(qn)->greedy = tok->u.repeat.greedy;
6131  r = set_quantifier(qn, *targetp, group, env);
6132  if (r < 0) {
6133  onig_node_free(qn);
6134  return r;
6135  }
6136 
6137  if (tok->u.repeat.possessive != 0) {
6138  Node* en;
6140  if (IS_NULL(en)) {
6141  onig_node_free(qn);
6142  return ONIGERR_MEMORY;
6143  }
6144  NENCLOSE(en)->target = qn;
6145  qn = en;
6146  }
6147 
6148  if (r == 0) {
6149  *targetp = qn;
6150  }
6151  else if (r == 1) {
6152  onig_node_free(qn);
6153  }
6154  else if (r == 2) { /* split case: /abc+/ */
6155  Node *tmp;
6156 
6157  *targetp = node_new_list(*targetp, NULL);
6158  if (IS_NULL(*targetp)) {
6159  onig_node_free(qn);
6160  return ONIGERR_MEMORY;
6161  }
6162  tmp = NCDR(*targetp) = node_new_list(qn, NULL);
6163  if (IS_NULL(tmp)) {
6164  onig_node_free(qn);
6165  return ONIGERR_MEMORY;
6166  }
6167  targetp = &(NCAR(tmp));
6168  }
6169  goto re_entry;
6170  }
6171  }
6172 
6173  return r;
6174 }
6175 
6176 static int
6178  UChar** src, UChar* end, ScanEnv* env)
6179 {
6180  int r;
6181  Node *node, **headp;
6182 
6183  *top = NULL;
6184  r = parse_exp(&node, tok, term, src, end, env);
6185  if (r < 0) {
6186  onig_node_free(node);
6187  return r;
6188  }
6189 
6190  if (r == TK_EOT || r == term || r == TK_ALT) {
6191  *top = node;
6192  }
6193  else {
6194  *top = node_new_list(node, NULL);
6195  headp = &(NCDR(*top));
6196  while (r != TK_EOT && r != term && r != TK_ALT) {
6197  r = parse_exp(&node, tok, term, src, end, env);
6198  if (r < 0) {
6199  onig_node_free(node);
6200  return r;
6201  }
6202 
6203  if (NTYPE(node) == NT_LIST) {
6204  *headp = node;
6205  while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
6206  headp = &(NCDR(node));
6207  }
6208  else {
6209  *headp = node_new_list(node, NULL);
6210  headp = &(NCDR(*headp));
6211  }
6212  }
6213  }
6214 
6215  return r;
6216 }
6217 
6218 /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
6219 static int
6221  UChar** src, UChar* end, ScanEnv* env)
6222 {
6223  int r;
6224  Node *node, **headp;
6225 
6226  *top = NULL;
6227  r = parse_branch(&node, tok, term, src, end, env);
6228  if (r < 0) {
6229  onig_node_free(node);
6230  return r;
6231  }
6232 
6233  if (r == term) {
6234  *top = node;
6235  }
6236  else if (r == TK_ALT) {
6237  *top = onig_node_new_alt(node, NULL);
6238  headp = &(NCDR(*top));
6239  while (r == TK_ALT) {
6240  r = fetch_token(tok, src, end, env);
6241  if (r < 0) return r;
6242  r = parse_branch(&node, tok, term, src, end, env);
6243  if (r < 0) {
6244  onig_node_free(node);
6245  return r;
6246  }
6247 
6248  *headp = onig_node_new_alt(node, NULL);
6249  headp = &(NCDR(*headp));
6250  }
6251 
6252  if (tok->type != (enum TokenSyms )term)
6253  goto err;
6254  }
6255  else {
6256  onig_node_free(node);
6257  err:
6258  if (term == TK_SUBEXP_CLOSE)
6260  else
6261  return ONIGERR_PARSER_BUG;
6262  }
6263 
6264  return r;
6265 }
6266 
6267 static int
6268 parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
6269 {
6270  int r;
6271  OnigToken tok;
6272 
6273  r = fetch_token(&tok, src, end, env);
6274  if (r < 0) return r;
6275  r = parse_subexp(top, &tok, TK_EOT, src, end, env);
6276  if (r < 0) return r;
6277 
6278 #ifdef USE_SUBEXP_CALL
6279  if (env->num_call > 0) {
6280  /* Capture the pattern itself. It is used for (?R), (?0) and \g<0>. */
6281  const int num = 0;
6282  Node* np;
6283  np = node_new_enclose_memory(env->option, 0);
6285  NENCLOSE(np)->regnum = num;
6286  NENCLOSE(np)->target = *top;
6287  r = scan_env_set_mem_node(env, num, np);
6288  if (r != 0) return r;
6289  *top = np;
6290  }
6291 #endif
6292  return 0;
6293 }
6294 
6295 extern int
6296 onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
6297  regex_t* reg, ScanEnv* env)
6298 {
6299  int r;
6300  UChar* p;
6301 
6302 #ifdef USE_NAMED_GROUP
6303  names_clear(reg);
6304 #endif
6305 
6306  scan_env_clear(env);
6307  env->option = reg->options;
6308  env->case_fold_flag = reg->case_fold_flag;
6309  env->enc = reg->enc;
6310  env->syntax = reg->syntax;
6311  env->pattern = (UChar* )pattern;
6312  env->pattern_end = (UChar* )end;
6313  env->reg = reg;
6314 
6315  *root = NULL;
6316  p = (UChar* )pattern;
6317  r = parse_regexp(root, &p, (UChar* )end, env);
6318  reg->num_mem = env->num_mem;
6319  return r;
6320 }
6321 
6322 extern void
6324  UChar* arg, UChar* arg_end)
6325 {
6326  env->error = arg;
6327  env->error_end = arg_end;
6328 }
VALUE data
Definition: tcltklib.c:3368
const UChar * name
Definition: ripper.y:104
UChar * pattern
Definition: regparse.h:296
static void bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to)
Definition: regparse.c:167
Definition: regparse.c:414
#define ONIG_SYN_OP_ESC_CONTROL_CHARS
#define NSTRING_SET_AMBIG(node)
Definition: regparse.h:108
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8
void onig_scan_env_set_error_string(ScanEnv *env, int ecode ARG_UNUSED, UChar *arg, UChar *arg_end)
Definition: regparse.c:6323
volatile VALUE tmp
Definition: tcltklib.c:10209
#define NCCLASS_SET_NOT(nd)
Definition: regint.h:764
#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY
static OnigWarnFunc onig_warn
Definition: regparse.c:87
void onig_set_warn_func(OnigWarnFunc f)
Definition: regparse.c:96
int gnum
Definition: regparse.c:2380
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE
#define IS_NULL(p)
Definition: regint.h:278
#define NODE_STR_BUF_SIZE
Definition: regparse.h:98
ssize_t n
Definition: bigdecimal.c:5655
#define ENCLOSE_MEMORY
Definition: regparse.h:92
unsigned int alloc
Definition: regint.h:423
static int popular_quantifier_num(QtfrNode *q)
Definition: regparse.c:2223
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf)
Definition: regparse.c:143
#define ONIGENC_CTYPE_GRAPH
#define ONIG_OPTION_WORD_BOUND_ALL_RANGE
VP_EXPORT int
Definition: bigdecimal.c:5050
UChar * pattern_end
Definition: regparse.h:297
unsigned int OnigOptionType
Definition: ripper.y:349
#define ONIG_SYNTAX_RUBY
int onig_foreach_name(regex_t *reg, int(*func)(const UChar *, const UChar *, int, int *, regex_t *, void *), void *arg)
Definition: regparse.c:537
static enum ReduceType const ReduceTypeTable[6][6]
Definition: regparse.c:2257
#define IS_REPEAT_INFINITE(n)
Definition: regint.h:388
#define NCCLASS_SET_SHARE(nd)
Definition: regint.h:765
#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND
UChar * end
Definition: regparse.h:170
UChar * pat
Definition: regerror.c:390
static Node * node_new_list(Node *left, Node *right)
Definition: regparse.c:1241
int onig_free_node_list(void)
Definition: regparse.c:1110
#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE
#define ONIGENC_IS_CODE_DIGIT(enc, code)
#define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf)
Definition: regparse.c:140
#define FALSE
Definition: nkf.h:174
int onig_node_str_cat(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1443
ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby
Definition: ripper.y:400
static void CC_ESC_WARN(ScanEnv *env, UChar *c)
Definition: regparse.c:2910
static int node_extended_grapheme_cluster(Node **np, ScanEnv *env)
Definition: regparse.c:5604
Node * onig_node_list_add(Node *list, Node *x)
Definition: regparse.c:1259
#define IS_SYNTAX_BV(syn, bvm)
Definition: regparse.h:326
int onig_st_lookup_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type *value)
Definition: regparse.c:379
int * back_refs
Definition: regparse.c:420
code
Definition: tcltklib.c:3381
static int fetch_name_with_level(OnigCodePoint start_code, UChar **src, UChar *end, UChar **rname_end, ScanEnv *env, int *rback_num, int *rlevel)
Definition: regparse.c:2580
#define INT_MAX_LIMIT
Definition: regint.h:353
#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
#define ONIG_ENCODING_ASCII
static const char *const PopularQStr[]
Definition: regparse.c:5252
#define NST_NEST_LEVEL
Definition: regparse.h:138
void onig_null_warn(const char *s ARG_UNUSED)
Definition: regparse.c:82
#define onig_st_init_table_with_size
Definition: regint.h:132
#define ONIGENC_CODE_RANGE_TO(range, i)
#define ANCHOR_END_BUF
Definition: regint.h:503
Win32OLEIDispatch * p
Definition: win32ole.c:786
#define PINC
Definition: regparse.c:274
#define BBUF_WRITE_CODE_POINT(bbuf, pos, code)
Definition: regparse.c:1708
#define ONIGENC_MBC_MINLEN(enc)
unsigned int flags
Definition: regint.h:778
#define ONIGENC_IS_CODE_WORD(enc, code)
#define ONIGENC_CTYPE_PUNCT
static int scan_env_add_mem_entry(ScanEnv *env)
Definition: regparse.c:975
#define ONIG_SYN_OP2_ESC_U_HEX4
#define ANCHOR_WORD_BEGIN
Definition: regint.h:509
#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END
#define SINGLE_BYTE_SIZE
Definition: regint.h:392
static void bitset_invert_to(BitSetRef from, BitSetRef to)
Definition: regparse.c:192
int num_call
Definition: regparse.h:301
C_block * out
Definition: crypt.c:308
#define ONIG_SYN_OP_ESC_B_WORD_BOUND
#define ONIG_OPTION_DONT_CAPTURE_GROUP
#define BITSET_SET_BIT(bs, pos)
Definition: regint.h:415
#define ONIGENC_CTYPE_XDIGIT
return
Definition: bigdecimal.c:5800
static void bitset_and(BitSetRef dest, BitSetRef bs)
Definition: regparse.c:199
#define ONIGERR_INVALID_POSIX_BRACKET_TYPE
#define NCCLASS_CLEAR_NOT(nd)
Definition: regint.h:766
unsigned int OnigCodePoint
Definition: ripper.y:115
static int scan_unsigned_octal_number(UChar **src, UChar *end, int maxlen, OnigEncoding enc)
Definition: regparse.c:1680
OnigCodePoint code
Definition: regparse.c:2356
static int VALUE table
Definition: tcltklib.c:10138
#define ONIG_SYN_WARN_CC_DUP
int sourceline
Definition: regparse.h:320
SSL_METHOD *(* func)(void)
Definition: ossl_ssl.c:108
#define NULL_UCHARP
Definition: regint.h:282
#define WARN_BUFSIZE
Definition: regparse.c:33
#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS
#define SCANENV_MEM_NODES(senv)
Definition: regparse.h:283
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL
static int parse_posix_bracket(CClassNode *cc, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:4238
ssize_t i
Definition: bigdecimal.c:5655
regex_t * reg
Definition: regparse.c:515
#define UChar
int onig_names_free(regex_t *reg)
Definition: regparse.c:486
#define ANCHOR_BEGIN_LINE
Definition: regint.h:501
#define IS_SYNTAX_OP(syn, opm)
Definition: regparse.h:324
#define MC_ANYTIME(syn)
Definition: regint.h:719
#define NEWLINE_CODE
#define ONIGENC_IS_SINGLEBYTE(enc)
VALUE target
Definition: tcltklib.c:5532
int(* func)(const UChar *, const UChar *, int, int *, regex_t *, void *)
Definition: regparse.c:514
#define ONIGENC_CTYPE_ALNUM
#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME
int(* property_name_to_ctype)(struct OnigEncodingTypeST *enc, OnigUChar *p, OnigUChar *end)
Definition: ripper.y:173
static int fetch_named_backref_token(OnigCodePoint c, OnigToken *tok, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:3240
#define ONIGENC_MBC_MAXLEN(enc)
static int parse_char_property(Node **np, OnigToken *tok, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:4347
UChar * error
Definition: regparse.h:298
int onig_node_str_set(Node *node, const UChar *s, const UChar *end)
Definition: regparse.c:1479
#define ONIG_SYN_OP_ESC_X_HEX2
int * refs
Definition: regparse.c:2370
#define IS_NCCLASS_NOT(nd)
Definition: regint.h:767
static void scan_env_clear(ScanEnv *env)
Definition: regparse.c:944
static int parse_branch(Node **top, OnigToken *tok, int term, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:6177
static const char *const ReduceQStr[]
Definition: regparse.c:5256
#define NENCLOSE(node)
Definition: regparse.h:79
VALUE enc
Definition: tcltklib.c:10311
#define ONIGENC_CTYPE_LOWER
static Node * node_new_str_raw(UChar *s, UChar *end)
Definition: regparse.c:1552
#define ONIG_SYN_OP_LPAREN_SUBEXP
UChar * s
Definition: regparse.h:169
unsigned char Bits
Definition: regint.h:399
#define ANCHOR_END_LINE
Definition: regint.h:505
hash_table_type * onig_st_init_strend_table_with_size(st_index_t size)
Definition: regparse.c:367
#define ONIGERR_CONTROL_CODE_SYNTAX
#define ANCHOR_PREC_READ
Definition: regint.h:511
#define NT_QTFR
Definition: regparse.h:45
#define ONIG_SYN_OP_VBAR_ALT
#define ONIG_MAX_MULTI_BYTE_RANGES_NUM
CCVALTYPE
Definition: regparse.c:4374
int onigenc_strlen(OnigEncoding enc, const UChar *p, const UChar *end)
Definition: regenc.c:123
#define tok()
Definition: ripper.c:11259
#define xfree
static int node_linebreak(Node **np, ScanEnv *env)
Definition: regparse.c:5543
#define IS_SINGLELINE(option)
Definition: regint.h:361
#define ONIG_SYN_OP_QMARK_ZERO_ONE
#define ONIGENC_CTYPE_SPACE
static int i_free_shared_class(type_cclass_key *key, Node *node, void *arg ARG_UNUSED)
Definition: regparse.c:5401
#define GET_CODE_POINT(code, p)
Definition: regint.h:669
static void bbuf_free(BBuf *bbuf)
Definition: regparse.c:109
void rb_compile_warn(const char *file, int line, const char *fmt,...)
Definition: error.c:172
#define MC_ANYCHAR_ANYTIME(syn)
Definition: regint.h:722
Node * onig_node_new_alt(Node *left, Node *right)
Definition: regparse.c:1277
union OnigToken::@128 u
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY
int onig_st_insert_strend(hash_table_type *table, const UChar *str_key, const UChar *end_key, hash_data_type value)
Definition: regparse.c:391
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC
#define BIT_STATUS_ON_AT_SIMPLE(stats, n)
Definition: regint.h:347
#define SCANENV_MEMNODES_SIZE
Definition: regparse.h:282
#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY
struct OnigToken::@128::@129 anchor
int back_alloc
Definition: regparse.c:418
#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER
int ascii_range
Definition: regparse.c:2359
static int is_onechar_cclass(CClassNode *cc, OnigCodePoint *code)
Definition: regparse.c:5694
#define ONIGERR_END_PATTERN_AT_CONTROL
#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL
TokenSyms
Definition: regparse.c:2317
r
Definition: bigdecimal.c:1196
#define BBUF_MOVE_RIGHT(buf, from, to, n)
Definition: regint.h:470
static struct st_hash_type type_type_cclass_hash
Definition: regparse.c:5392
#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE
#define NST_RECURSION
Definition: regparse.h:132
#define PFETCH(c)
Definition: regparse.c:278
int state
Definition: tcltklib.c:1462
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED
#define ANCHOR_BEGIN_POSITION
Definition: regint.h:502
OnigEncoding enc
Definition: regparse.c:518
regex_t * reg
Definition: regparse.h:300
#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR
static void CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv *env, UChar *c)
Definition: regparse.c:2921
#define ONIGENC_CTYPE_CNTRL
int greedy
Definition: regparse.c:2364
VALUE VALUE args
Definition: tcltklib.c:2561
ReduceType
Definition: regparse.c:2247
st_table NameTable
Definition: regparse.c:425
#define ONIGERR_TOO_BIG_NUMBER
#define ANCHOR_NOT_WORD_BOUND
Definition: regint.h:508
int num_named
Definition: regparse.h:307
#define MC_ONE_OR_MORE_TIME(syn)
Definition: regint.h:721
#define SYN_GNU_REGEX_OP
Definition: regint.h:736
#define PFETCH_READY
Definition: regparse.c:270
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS
#define ONIGERR_TOO_SHORT_DIGITS
#define ONIG_LAST_CODE_POINT
Definition: regint.h:284
#define ONIGENC_CODE_RANGE_NUM(range)
flag
Definition: tcltklib.c:2048
#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS
Definition: regparse.c:5353
#define NBREF(node)
Definition: regparse.h:77
#define NT_ALT
Definition: regparse.h:49
struct OnigToken::@128::@131 backref
struct _FreeNode * next
Definition: regparse.c:1020
const char * fmt
Definition: tcltklib.c:841
#define BIT_STATUS_CLEAR(stats)
Definition: regint.h:335
static int set_quantifier(Node *qnode, Node *target, int group, ScanEnv *env)
Definition: regparse.c:5261
static int i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void *arg)
Definition: regparse.c:5463
#define ONIGENC_IS_CODE_CTYPE(enc, code, ctype)
static UChar * strcat_capa_from_static(UChar *dest, UChar *dest_end, const UChar *src, const UChar *src_end, size_t capa)
Definition: regparse.c:305
#define ONIG_OPTION_EXTEND
#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK
static int add_code_range_to_buf0(BBuf **pbuf, ScanEnv *env, OnigCodePoint from, OnigCodePoint to, int checkdup)
Definition: regparse.c:1734
#define NCAR(node)
Definition: regparse.h:84
UChar * onigenc_get_prev_char_head(OnigEncoding enc, const UChar *start, const UChar *s, const UChar *end)
Definition: regenc.c:92
#define ONIGERR_UNDEFINED_GROUP_OPTION
Definition: regint.h:420
#define IS_IGNORECASE(option)
Definition: regint.h:363
void * name_table
Definition: ripper.y:682
unsigned long st_data_t
Definition: ripper.y:35
BitSet bs
Definition: regint.h:779
#define ONIG_SYN_OP2_ESC_V_VTAB
#define neg(x)
Definition: time.c:171
Node * mem_nodes_static[SCANENV_MEMNODES_SIZE]
Definition: regparse.h:310
#define enclen(enc, p, e)
static void initialize_cclass(CClassNode *cc)
Definition: regparse.c:1148
#define ONIGERR_INVALID_CONDITION_PATTERN
const char * sourcefile
Definition: regparse.h:319
Bits BitSet[BITSET_SIZE]
Definition: regint.h:401
static Node * node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, const OnigCodePoint ranges[])
Definition: regparse.c:1168
#define THREAD_ATOMIC_END
Definition: regint.h:119
static int parse_regexp(Node **top, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:6268
#define ONIG_NORMAL
int bufsize
Definition: regerror.c:388
static Node * node_new_quantifier(int lower, int upper, int by_number)
Definition: regparse.c:1371
#define IS_NCCLASS_SHARE(nd)
Definition: regint.h:768
#define ONIG_SYN_OP_LINE_ANCHOR
#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR
#define PUNFETCH
Definition: regparse.c:273
#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL
static st_table * OnigTypeCClassTable
Definition: regparse.c:5397
static Node * node_new_enclose(int type)
Definition: regparse.c:1397
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE
#define NST_NAME_REF
Definition: regparse.h:136
static int parse_subexp(Node **top, OnigToken *tok, int term, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:6220
#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS
static Node * node_new_call(UChar *name, UChar *name_end, int gnum)
Definition: regparse.c:1355
OnigCaseFoldType case_fold_flag
Definition: ripper.y:681
#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY
void * arg
Definition: regparse.c:516
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED
static Node * node_new(void)
Definition: regparse.c:1126
#define NULL_NODE
Definition: regparse.h:280
#define ONIG_SYN_OP2_CCLASS_SET_OP
#define PEND
Definition: regparse.c:272
#define INIT_MULTI_BYTE_RANGE_SIZE
#define ONIG_SYN_OP_ESC_OCTAL3
#define ONIG_SYN_OP_ESC_W_WORD
#define SIZE_CODE_POINT
Definition: regint.h:655
#define val
Definition: tcltklib.c:1949
#define BBUF_INIT(buf, size)
Definition: regint.h:426
#define SET_ENCLOSE_STATUS(node, f)
Definition: regparse.h:141
const OnigSyntaxType * syntax
Definition: regparse.h:291
#define BACKREF_REL_TO_ABS(rel_no, env)
Definition: regparse.c:132
#define ONIGENC_IS_CODE_NEWLINE(enc, code)
#define ONIGERR_END_PATTERN_AT_ESCAPE
#define ARG_UNUSED
Definition: nkf.h:181
#define ONIG_SYN_OP_ESC_S_WHITE_SPACE
static int str_exist_check_with_esc(OnigCodePoint s[], int n, UChar *from, UChar *to, OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType *syn)
Definition: regparse.c:2979
static int add_code_range0(BBuf **pbuf, ScanEnv *env, OnigCodePoint from, OnigCodePoint to, int checkdup)
Definition: regparse.c:1826
static Node * node_new_ctype(int type, int not, int ascii_range)
Definition: regparse.c:1218
UChar * name_end
Definition: regparse.c:2379
#define ONIG_OPTION_CAPTURE_GROUP
va_end(args)
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH
#define ONIGENC_CTYPE_UPPER
#define INIT_NAME_BACKREFS_ALLOC_NUM
Definition: regparse.c:412
int ref1
Definition: regparse.c:2369
static int node_str_cat_codepoint(Node *node, OnigEncoding enc, OnigCodePoint c)
Definition: regparse.c:1495
#define ONIG_REGION_NOTPOS
#define ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, sbout, ranges)
#define MC_ESC(syn)
Definition: regint.h:717
#define ONIGENC_CTYPE_ALPHA
int onig_name_to_backref_number(regex_t *reg, const UChar *name, const UChar *name_end, OnigRegion *region)
Definition: regparse.c:870
static int fetch_token(OnigToken *tok, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:3314
#define BITSET_AT(bs, pos)
Definition: regint.h:414
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT
#define BITS_IN_ROOM
Definition: regint.h:393
int onig_free_shared_cclass_table(void)
Definition: regparse.c:5414
static int scan_unsigned_hexadecimal_number(UChar **src, UChar *end, int minlen, int maxlen, OnigEncoding enc)
Definition: regparse.c:1648
int lower
Definition: regparse.h:180
#define NCCLASS(node)
Definition: regparse.h:75
#define xmemcpy
Definition: regint.h:182
#define IS_EXTEND(option)
Definition: regint.h:364
#define ONIG_OPTION_SINGLELINE
#define CHECK_NULL_RETURN_MEMERR(p)
Definition: regint.h:281
Node * onig_node_new_enclose(int type)
Definition: regparse.c:1414
#define ONIG_SYN_STRICT_CHECK_BACKREF
#define NCTYPE(node)
Definition: regparse.h:76
#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, p, end)
#define ONIGERR_EMPTY_CHAR_CLASS
#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE
void onig_set_verb_warn_func(OnigWarnFunc f)
Definition: regparse.c:101
static UChar * strcat_capa(UChar *dest, UChar *dest_end, const UChar *src, const UChar *src_end, size_t capa)
Definition: regparse.c:288
#define xmalloc
#define xrealloc
UChar * name
Definition: regparse.c:415
#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS
int mem_alloc
Definition: regparse.h:309
static OnigCodePoint get_name_end_code_point(OnigCodePoint start)
Definition: regparse.c:2558
static UChar * strdup_with_null(OnigEncoding enc, UChar *s, UChar *end)
Definition: regparse.c:243
static int bbuf_clone(BBuf **rto, BBuf *from)
Definition: regparse.c:118
static FreeNode * FreeNodeList
Definition: regparse.c:1023
static int fetch_range_quantifier(UChar **src, UChar *end, OnigToken *tok, ScanEnv *env)
Definition: regparse.c:2392
#define PPEEK_IS(c)
Definition: regparse.c:285
#define ONIG_SYN_OP_POSIX_BRACKET
unsigned char buf[MIME_BUF_SIZE]
Definition: nkf.c:4308
static int new_code_range(BBuf **pbuf)
Definition: regparse.c:1716
int ret
Definition: regparse.c:517
for(v/=shifter=1)
Definition: bigdecimal.c:5677
#define is_invalid_quantifier_target(node)
Definition: regparse.c:2186
#define NT_LIST
Definition: regparse.h:48
static int name_add(regex_t *reg, UChar *name, UChar *name_end, int backref, ScanEnv *env)
Definition: regparse.c:733
st_data_t HashDataType
Definition: regparse.c:426
UChar * s
Definition: regparse.c:2354
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT
int err
Definition: win32.c:87
#define BITSET_IS_EMPTY(bs, empty)
Definition: regparse.c:156
#define NT_ANCHOR
Definition: regparse.h:47
#define ANCHOR_WORD_END
Definition: regint.h:510
#define NSTRING_CLEAR_RAW(node)
Definition: regparse.h:107
#define XDIGITVAL(enc, code)
Definition: regint.h:357
static int add_code_range(BBuf **pbuf, ScanEnv *env, OnigCodePoint from, OnigCodePoint to)
Definition: regparse.c:1839
#define ONIG_SYN_OP2_OPTION_PERL
static int VALUE key
Definition: tkutil.c:265
#define ONIGERR_EMPTY_GROUP_NAME
static int fetch_char_property_to_ctype(UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:4315
int upper
Definition: regparse.h:181
struct _FreeNode FreeNode
#define ONIGENC_CTYPE_BLANK
#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL
struct OnigToken::@128::@133 prop
void onig_node_conv_to_str_node(Node *node, int flag)
Definition: regparse.c:1504
#define ONIGERR_INVALID_BACKREF
#define USE_BACKREF_WITH_LEVEL
Definition: regint.h:63
#define NT_STR
Definition: regparse.h:40
gz level
Definition: zlib.c:2262
#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID
#define ONIGENC_CTYPE_ASCII
#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET
#define NQTFR(node)
Definition: regparse.h:78
#define RTEST(v)
#define onig_st_insert
Definition: regint.h:139
#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE
#define THREAD_ATOMIC_START
Definition: regint.h:118
q result
Definition: tcltklib.c:7070
static int and_code_range1(BBuf **pbuf, ScanEnv *env, OnigCodePoint from1, OnigCodePoint to1, OnigCodePoint *data, int n)
Definition: regparse.c:1944
#define BITSET_SET_BIT_CHKDUP(bs, pos)
Definition: regparse.c:151
#define BBUF_MOVE_LEFT_REDUCE(buf, from, to)
Definition: regint.h:482
Bits * BitSetRef
Definition: regint.h:402
#define IS_WORD_BOUND_ALL_RANGE(option)
Definition: regint.h:374
#define ONIG_SYN_OP_QMARK_NON_GREEDY
#define ONIG_MAX_BACKREF_NUM
UChar * pat_end
Definition: regerror.c:391
volatile VALUE value
Definition: tcltklib.c:9442
#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS
unsigned int flag
Definition: regparse.h:171
#define NT_CANY
Definition: regparse.h:43
#define NST_NAMED_GROUP
Definition: regparse.h:135
register char * s
Definition: os2.c:56
int upper
Definition: regparse.c:2363
UChar * name
Definition: regparse.c:2378
#define ONIGERR_END_PATTERN_IN_GROUP
int onig_name_to_group_numbers(regex_t *reg, const UChar *name, const UChar *name_end, int **nums)
Definition: regparse.c:848
BBuf * mbuf
Definition: regint.h:780
#define ONIG_SYN_OP_ASTERISK_ZERO_INF
VP_EXPORT void
Definition: bigdecimal.c:5083
CClassNode * cc
Definition: regparse.c:5457
int ctype
Definition: ripper.y:105
#define ODIGITVAL(code)
Definition: regint.h:356
#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2)
Definition: regparse.c:1879
#define SYN_GNU_REGEX_BV
Definition: regint.h:749
#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC
enum TokenSyms type
Definition: regparse.c:2349
OnigPosition * beg
Definition: ripper.y:616
int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode *cc)
Definition: regcomp.c:6063
#define NTYPE(node)
Definition: regparse.h:71
#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP
#define ANCHOR_LOOK_BEHIND_NOT
Definition: regint.h:514
int type
Definition: tcltklib.c:111
#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING
BitStatusType backrefed_mem
Definition: regparse.h:295
#define NCALL(node)
Definition: regparse.h:82
#define NQ_TARGET_ISNOT_EMPTY
Definition: regparse.h:119
#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV
int escaped
Definition: regparse.c:2350
#define ONIG_SYN_OP_DECIMAL_BACKREF
#define ONIG_SYN_OP_ESC_PLUS_ONE_INF
int num_mem
Definition: regparse.h:305
static int or_cclass(CClassNode *dest, CClassNode *cc, ScanEnv *env)
Definition: regparse.c:2099
int onig_strncmp(const UChar *s1, const UChar *s2, int n)
Definition: regparse.c:220
st_data_t hash_data_type
Definition: regint.h:879
static void bitset_or(BitSetRef dest, BitSetRef bs)
Definition: regparse.c:206
Node * onig_node_new_list(Node *left, Node *right)
Definition: regparse.c:1253
static int str_end_cmp(st_data_t xp, st_data_t yp)
Definition: regparse.c:328
#define ONIG_SYN_OP_PLUS_ONE_INF
#define ONIGENC_IS_UNICODE(enc)
#define bad(x)
Definition: _sdbm.c:125
const UChar * s
Definition: regparse.c:323
#define NT_CCLASS
Definition: regparse.h:41
#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS
#define NODE_BACKREFS_SIZE
Definition: regparse.h:99
#define ANCHOR_SEMI_END_BUF
Definition: regint.h:504
#define ONIG_NO_SUPPORT_CONFIG
#define ONIG_MAX_REPEAT_NUM
Node * onig_node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1546
int level
Definition: regparse.c:2374
#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE
OnigEncoding enc
Definition: regparse.c:5358
UChar * error_end
Definition: regparse.h:299
#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN
void onig_node_str_clear(Node *node)
Definition: regparse.c:1514
Node ** mem_nodes_dynamic
Definition: regparse.h:311
static int names_clear(regex_t *reg)
Definition: regparse.c:475
ruby_verbose
Definition: tcltklib.c:5818
static Node * node_new_str_raw_char(UChar c)
Definition: regparse.c:1567
const OnigSyntaxType * syntax
Definition: ripper.y:680
BitStatusType bt_mem_end
Definition: regparse.h:294
VpDivd * c
Definition: bigdecimal.c:1205
static int fetch_token_in_cc(OnigToken *tok, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:3017
#define ONIG_SYN_OP_ESC_VBAR_ALT
#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS
static Node * node_new_cclass(void)
Definition: regparse.c:1157
#define ENCLOSE_STOP_BACKTRACK
Definition: regparse.h:94
#define MIN(a, b)
Definition: regint.h:275
#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR
#define ONIG_OPTION_IGNORECASE
UChar * onigenc_step(OnigEncoding enc, const UChar *p, const UChar *end, int n)
Definition: regenc.c:113
OnigEncoding enc
Definition: regparse.h:290
int subtype
Definition: regparse.c:2358
void onig_node_free(Node *node)
Definition: regparse.c:1027
#define ONIG_SYN_OP_BRACE_INTERVAL
#define NANCHOR(node)
Definition: regparse.h:80
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC
#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF
gz end
Definition: zlib.c:2270
#define NT_BREF
Definition: regparse.h:44
#define ONIGERR_INVALID_GROUP_NAME
#define ONIG_OPTION_MULTILINE
static int and_cclass(CClassNode *dest, CClassNode *cc, ScanEnv *env)
Definition: regparse.c:2041
#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP
#define ONIG_INEFFECTIVE_META_CHAR
#define MC_ZERO_OR_ONE_TIME(syn)
Definition: regint.h:720
static void bitset_copy(BitSetRef dest, BitSetRef bs)
Definition: regparse.c:213
static int i_free_name_entry(UChar *key, NameEntry *e, void *arg ARG_UNUSED)
Definition: regparse.c:465
unsigned int top
Definition: nkf.c:4309
#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF
#define CHECK_NULL_RETURN(p)
Definition: regint.h:280
static int str_node_can_be_split(StrNode *sn, OnigEncoding enc)
Definition: regparse.c:1594
arg
Definition: ripper.y:1312
static int countbits(unsigned int bits)
Definition: regparse.c:5684
VALUE src
Definition: tcltklib.c:7953
static Node * node_new_option(OnigOptionType option)
Definition: regparse.c:1434
static int conv_backslash_value(int c, ScanEnv *env)
Definition: regparse.c:2160
int onig_noname_group_capture_is_active(regex_t *reg)
Definition: regparse.c:924
#define MBCODE_START_POS(enc)
Definition: regparse.c:137
int size
Definition: encoding.c:52
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
Definition: regparse.c:35
size_t name_len
Definition: regparse.c:416
#define f
#define BBUF_ENSURE_SIZE(buf, size)
Definition: regint.h:440
#define ONIGENC_CTYPE_WORD
int base
Definition: regparse.c:2351
struct OnigToken::@128::@130 repeat
#define ONIGENC_CODE_TO_MBCLEN(enc, code)
static int or_code_range_buf(OnigEncoding enc, BBuf *bbuf1, int not1, BBuf *bbuf2, int not2, BBuf **pbuf, ScanEnv *env)
Definition: regparse.c:1887
#define PPEEK
Definition: regparse.c:284
unsigned int used
Definition: regint.h:422
#define ONIG_SYN_OP_BRACKET_CC
if(RB_TYPE_P(r, T_FLOAT))
Definition: bigdecimal.c:1186
#define ONIGENC_MBC_TO_CODE(enc, p, end)
OnigCaseFoldType case_fold_flag
Definition: regparse.h:289
int by_name
Definition: regparse.c:2371
static int scan_env_set_mem_node(ScanEnv *env, int num, Node *node)
Definition: regparse.c:1008
int t
Definition: ripper.c:13760
#define ANCHOR_PREC_READ_NOT
Definition: regint.h:512
#define ENCLOSE_CONDITION
Definition: regparse.h:95
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE
void onig_reduce_nested_quantifier(Node *pnode, Node *cnode)
Definition: regparse.c:2267
static int fetch_escaped_value(UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:2483
#define ENCLOSE_OPTION
Definition: regparse.h:93
#define ONIG_OPTION_ASCII_RANGE
static OnigWarnFunc onig_verb_warn
Definition: regparse.c:93
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC
int onig_number_of_names(regex_t *reg)
Definition: regparse.c:584
static int fetch_name(OnigCodePoint start_code, UChar **src, UChar *end, UChar **rname_end, ScanEnv *env, int *rback_num, int ref)
Definition: regparse.c:2694
#define ONIG_SYN_OP2_ESC_H_XDIGIT
static void onig_syntax_warn(ScanEnv *env, const char *fmt,...)
Definition: regparse.c:2894
#define BITSET_CLEAR_BIT(bs, pos)
Definition: regint.h:416
static int parse_enclose(Node **np, OnigToken *tok, int term, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:4833
struct OnigToken::@128::@132 call
onig_vsnprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, args)
static int node_str_cat_char(Node *node, UChar c)
Definition: regparse.c:1486
st_data_t st_index_t
Definition: ripper.y:63
#define ONIGERR_MEMORY
UChar * p
Definition: regint.h:421
static UChar * find_str_position(OnigCodePoint s[], int n, UChar *from, UChar *to, UChar **next, OnigEncoding enc)
Definition: regparse.c:2950
#define ONIGENC_CTYPE_DIGIT
#define ONIGERR_MULTIPLEX_DEFINED_NAME
static int and_code_range_buf(BBuf *bbuf1, int not1, BBuf *bbuf2, int not2, BBuf **pbuf, ScanEnv *env)
Definition: regparse.c:1984
void hash_table_type
Definition: regint.h:876
UChar * backp
Definition: regparse.c:2352
#define NSTR(node)
Definition: regparse.h:74
#define ONIG_IS_OPTION_ON(options, option)
#define ONIG_SYN_OP_DOT_ANYCHAR
#define ONIGENC_CODE_TO_MBC(enc, code, buf)
struct rb_encoding_entry * list
Definition: encoding.c:50
#define IS_POSIX_BRACKET_ALL_RANGE(option)
Definition: regint.h:373
static NameEntry * name_find(regex_t *reg, const UChar *name, const UChar *name_end)
Definition: regparse.c:501
#define REPEAT_INFINITE
Definition: regint.h:387
int lower
Definition: regparse.c:2362
#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META
static int not_code_range_buf(OnigEncoding enc, BBuf *bbuf, BBuf **pbuf, ScanEnv *env)
Definition: regparse.c:1845
#define IS_SYNTAX_OP2(syn, opm)
Definition: regparse.h:325
#define ONIGENC_CODE_TO_MBC_MAXLEN
int back_num
Definition: regparse.c:417
static int i_names(UChar *key ARG_UNUSED, NameEntry *e, INamesArg *arg)
Definition: regparse.c:522
int ctype
Definition: regparse.c:2384
OnigOptionType option
Definition: regparse.h:288
#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT
#define NODE_STR_MARGIN
Definition: regparse.h:97
static Node * node_new_empty(void)
Definition: regparse.c:1561
static st_index_t type_cclass_hash(type_cclass_key *key)
Definition: regparse.c:5371
#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF
CCSTATE
Definition: regparse.c:4367
Definition: ripper.y:103
#define ONOFF(v, f, negative)
Definition: regparse.c:135
#define ONIGERR_UNDEFINED_NAME_REFERENCE
static int add_code_range_to_buf(BBuf **pbuf, ScanEnv *env, OnigCodePoint from, OnigCodePoint to)
Definition: regparse.c:1820
#define IS_QUANTIFIER_BY_NUMBER(qn)
Definition: regparse.h:163
#define onig_st_add_direct
Definition: regint.h:142
struct _Node * target
Definition: regparse.h:179
static void bitset_invert(BitSetRef bs)
Definition: regparse.c:185
int greedy
Definition: regparse.h:182
#define INIT_SCANENV_MEMNODES_ALLOC_SIZE
Definition: regparse.c:941
#define NST_BY_NUMBER
Definition: regparse.h:139
static int next_state_val(CClassNode *cc, OnigCodePoint *vs, OnigCodePoint v, int *vs_israw, int v_israw, enum CCVALTYPE intype, enum CCVALTYPE *type, enum CCSTATE *state, ScanEnv *env)
Definition: regparse.c:4404
#define ONIG_SYN_OP_ESC_C_CONTROL
int onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar *p, const UChar *end, const UChar *sascii, int n)
Definition: regenc.c:854
#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP
int onig_parse_make_tree(Node **root, const UChar *pattern, const UChar *end, regex_t *reg, ScanEnv *env)
Definition: regparse.c:6296
#define NT_CALL
Definition: regparse.h:50
#define DIGITVAL(code)
Definition: regint.h:355
int onig_scan_unsigned_number(UChar **src, const UChar *end, OnigEncoding enc)
Definition: regparse.c:1621
#define onig_st_free_table
Definition: regint.h:143
#define NSTRING_SET_RAW(node)
Definition: regparse.h:106
#define ONIG_SYN_OP2_OPTION_RUBY
#define NT_CTYPE
Definition: regparse.h:42
#define ONIGERR_END_PATTERN_AT_META
ScanEnv * env
Definition: regparse.c:5456
#define ANCHOR_BEGIN_BUF
Definition: regint.h:500
int back_ref1
Definition: regparse.c:419
#define BIT_STATUS_BITS_NUM
Definition: regint.h:334
BDIGIT e
Definition: bigdecimal.c:5085
OnigEncoding enc
Definition: ripper.y:678
static Node * node_new_backref(int back_num, int *backrefs, int by_name, int exist_level, int nest_level, ScanEnv *env)
Definition: regparse.c:1303
#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS
#define IS_NOT_NULL(p)
Definition: regint.h:279
static Node * node_new_enclose_memory(OnigOptionType option, int is_named)
Definition: regparse.c:1420
const UChar * end
Definition: regparse.c:324
short int len
Definition: ripper.y:106
#define ONIGERR_INVALID_CODE_POINT_VALUE
#define ANCHOR_LOOK_BEHIND
Definition: regint.h:513
#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME
int possessive
Definition: regparse.c:2365
void onig_strcpy(UChar *dest, const UChar *src, const UChar *end)
Definition: regparse.c:232
#define ONIG_SYN_OP_ESC_D_DIGIT
static int next_state_class(CClassNode *cc, OnigCodePoint *vs, enum CCVALTYPE *type, enum CCSTATE *state, ScanEnv *env)
Definition: regparse.c:4381
#define POSIX_BRACKET_NAME_MIN_LEN
#define ONIGERR_META_CODE_SYNTAX
static void UNKNOWN_ESC_WARN(ScanEnv *env, int c)
Definition: regparse.c:2943
BitStatusType bt_mem_start
Definition: regparse.h:293
#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP
#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES
#define ONIGENC_CTYPE_PRINT
#define IS_MC_ESC_CODE(code, syn)
Definition: regint.h:724
#define ANCHOR_WORD_BOUND
Definition: regint.h:507
BDIGIT v
Definition: bigdecimal.c:5656
#define ONIGENC_CODE_RANGE_FROM(range, i)
#define ANCHOR_KEEP
Definition: regint.h:519
OnigOptionType options
Definition: ripper.y:679
#define env
static int code_exist_check(OnigCodePoint c, UChar *from, UChar *end, int ignore_escaped, ScanEnv *env)
Definition: regparse.c:4479
#define ONIGENC_APPLY_ALL_CASE_FOLD(enc, case_fold_flag, f, arg)
#define MAX(a, b)
Definition: regint.h:276
int onig_renumber_name_table(regex_t *reg, GroupNumRemap *map)
Definition: regparse.c:572
#define NULL
Definition: _sdbm.c:103
static int add_ctype_to_cc(CClassNode *cc, int ctype, int not, int char_prop, ScanEnv *env)
Definition: regparse.c:4111
#define ONIG_SYN_ALLOW_INVALID_INTERVAL
q
Definition: tcltklib.c:2968
#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE
const char * name
Definition: nkf.c:208
static Node * node_new_str(const UChar *s, const UChar *end)
Definition: regparse.c:1528
#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP
#define NCDR(node)
Definition: regparse.h:85
#define onig_st_foreach
Definition: regint.h:141
st_index_t num_entries
Definition: ripper.y:93
#define MC_ANYCHAR(syn)
Definition: regint.h:718
int warnings_flag
Definition: regparse.h:318
#define ONIG_SYN_OP_ESC_BRACE_INTERVAL
#define IS_ASCII_RANGE(option)
Definition: regint.h:372
#define onig_st_lookup
Definition: regint.h:140
static Node * node_new_anychar(void)
Definition: regparse.c:1231
void rb_warn(const char *fmt,...)
Definition: error.c:216
#define SET_NTYPE(node, ntype)
Definition: regparse.h:72
#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP
static Node * str_node_split_last_char(StrNode *sn, OnigEncoding enc)
Definition: regparse.c:1576
static int add_ctype_to_cc_by_range(CClassNode *cc, int ctype ARG_UNUSED, int not, ScanEnv *env, OnigCodePoint sb_out, const OnigCodePoint mbr[])
Definition: regparse.c:4038
#define NT_ENCLOSE
Definition: regparse.h:46
int exist_level
Definition: regparse.c:2373
#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION
#define NSTR_RAW
Definition: regparse.h:101
static void CC_DUP_WARN(ScanEnv *env)
Definition: regparse.c:2931
ONIG_EXTERN const OnigSyntaxType * OnigDefaultSyntax
Definition: ripper.y:418
static int parse_exp(Node **np, OnigToken *tok, int term, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:5744
#define ONIGENC_IS_CODE_XDIGIT(enc, code)
static int i_renumber_name(UChar *key ARG_UNUSED, NameEntry *e, GroupNumRemap *map)
Definition: regparse.c:555
static int parse_char_class(Node **np, OnigToken *tok, UChar **src, UChar *end, ScanEnv *env)
Definition: regparse.c:4503
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME
#define BITSET_CLEAR(bs)
Definition: regint.h:406
BitStatusType capture_history
Definition: regparse.h:292
static int type_cclass_cmp(type_cclass_key *x, type_cclass_key *y)
Definition: regparse.c:5363
#define ONIGERR_PARSER_BUG
#define BITSET_SIZE
Definition: regint.h:394
static st_index_t str_end_hash(st_data_t xp)
Definition: regparse.c:352
size_t len
Definition: tcltklib.c:3568
Node * onig_node_new_anchor(int type)
Definition: regparse.c:1289
#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT