Ruby
1.9.3p448(2013-06-27revision41675)
Main Page
Modules
Data Structures
Files
File List
Globals
regparse.h
Go to the documentation of this file.
1
#ifndef ONIGURUMA_REGPARSE_H
2
#define ONIGURUMA_REGPARSE_H
3
/**********************************************************************
4
regparse.h - Oniguruma (regular expression library)
5
**********************************************************************/
6
/*-
7
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
8
* All rights reserved.
9
*
10
* Redistribution and use in source and binary forms, with or without
11
* modification, are permitted provided that the following conditions
12
* are met:
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
* SUCH DAMAGE.
30
*/
31
32
#include "
regint.h
"
33
34
#if defined __GNUC__ && __GNUC__ >= 4
35
#pragma GCC visibility push(default)
36
#endif
37
38
/* node type */
39
#define NT_STR 0
40
#define NT_CCLASS 1
41
#define NT_CTYPE 2
42
#define NT_CANY 3
43
#define NT_BREF 4
44
#define NT_QTFR 5
45
#define NT_ENCLOSE 6
46
#define NT_ANCHOR 7
47
#define NT_LIST 8
48
#define NT_ALT 9
49
#define NT_CALL 10
50
51
/* node type bit */
52
#define NTYPE2BIT(type) (1<<(type))
53
54
#define BIT_NT_STR NTYPE2BIT(NT_STR)
55
#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
56
#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
57
#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
58
#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
59
#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
60
#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
61
#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
62
#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
63
#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
64
#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
65
66
#define IS_NODE_TYPE_SIMPLE(type) \
67
((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
68
BIT_NT_CANY | BIT_NT_BREF)) != 0)
69
70
#define NTYPE(node) ((node)->u.base.type)
71
#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
72
73
#define NSTR(node) (&((node)->u.str))
74
#define NCCLASS(node) (&((node)->u.cclass))
75
#define NCTYPE(node) (&((node)->u.ctype))
76
#define NBREF(node) (&((node)->u.bref))
77
#define NQTFR(node) (&((node)->u.qtfr))
78
#define NENCLOSE(node) (&((node)->u.enclose))
79
#define NANCHOR(node) (&((node)->u.anchor))
80
#define NCONS(node) (&((node)->u.cons))
81
#define NCALL(node) (&((node)->u.call))
82
83
#define NCAR(node) (NCONS(node)->car)
84
#define NCDR(node) (NCONS(node)->cdr)
85
86
87
88
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
89
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
90
91
#define ENCLOSE_MEMORY (1<<0)
92
#define ENCLOSE_OPTION (1<<1)
93
#define ENCLOSE_STOP_BACKTRACK (1<<2)
94
95
#define NODE_STR_MARGIN 16
96
#define NODE_STR_BUF_SIZE 24
/* sizeof(CClassNode) - sizeof(int)*4 */
97
#define NODE_BACKREFS_SIZE 6
98
99
#define NSTR_RAW (1<<0)
/* by backslashed number */
100
#define NSTR_AMBIG (1<<1)
101
#define NSTR_DONT_GET_OPT_INFO (1<<2)
102
103
#define NSTRING_LEN(node) (OnigDistance)((node)->u.str.end - (node)->u.str.s)
104
#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
105
#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
106
#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
107
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
108
(node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
109
#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
110
#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
111
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
112
(((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
113
114
#define BACKREFS_P(br) \
115
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
116
117
#define NQ_TARGET_ISNOT_EMPTY 0
118
#define NQ_TARGET_IS_EMPTY 1
119
#define NQ_TARGET_IS_EMPTY_MEM 2
120
#define NQ_TARGET_IS_EMPTY_REC 3
121
122
/* status bits */
123
#define NST_MIN_FIXED (1<<0)
124
#define NST_MAX_FIXED (1<<1)
125
#define NST_CLEN_FIXED (1<<2)
126
#define NST_MARK1 (1<<3)
127
#define NST_MARK2 (1<<4)
128
#define NST_MEM_BACKREFED (1<<5)
129
#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
130
#define NST_RECURSION (1<<7)
131
#define NST_CALLED (1<<8)
132
#define NST_ADDR_FIXED (1<<9)
133
#define NST_NAMED_GROUP (1<<10)
134
#define NST_NAME_REF (1<<11)
135
#define NST_IN_REPEAT (1<<12)
/* STK_REPEAT is nested in stack. */
136
#define NST_NEST_LEVEL (1<<13)
137
#define NST_BY_NUMBER (1<<14)
/* {n,m} */
138
139
#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
140
#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
141
142
#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
143
#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
144
#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
145
#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
146
#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
147
#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
148
#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
149
#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
150
#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
151
(((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
152
#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
153
154
#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
155
#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
156
#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
157
#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
158
#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
159
#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
160
#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
161
162
#define CALLNODE_REFNUM_UNDEF -1
163
164
typedef
struct
{
165
NodeBase
base
;
166
UChar
*
s
;
167
UChar
*
end
;
168
unsigned
int
flag
;
169
int
capa
;
/* (allocated size - 1) or 0: use buf[] */
170
UChar
buf
[
NODE_STR_BUF_SIZE
];
171
}
StrNode
;
172
173
typedef
struct
{
174
NodeBase
base
;
175
int
state
;
176
struct
_Node
*
target
;
177
int
lower
;
178
int
upper
;
179
int
greedy
;
180
int
target_empty_info
;
181
struct
_Node
*
head_exact
;
182
struct
_Node
*
next_head_exact
;
183
int
is_refered
;
/* include called node. don't eliminate even if {0} */
184
#ifdef USE_COMBINATION_EXPLOSION_CHECK
185
int
comb_exp_check_num;
/* 1,2,3...: check, 0: no check */
186
#endif
187
}
QtfrNode
;
188
189
typedef
struct
{
190
NodeBase
base
;
191
int
state
;
192
int
type
;
193
int
regnum
;
194
OnigOptionType
option
;
195
struct
_Node
*
target
;
196
AbsAddrType
call_addr
;
197
/* for multiple call reference */
198
OnigDistance
min_len
;
/* min length (byte) */
199
OnigDistance
max_len
;
/* max length (byte) */
200
int
char_len
;
/* character length */
201
int
opt_count
;
/* referenced count in optimize_node_left() */
202
}
EncloseNode
;
203
204
#ifdef USE_SUBEXP_CALL
205
206
typedef
struct
{
207
int
offset
;
208
struct
_Node
*
target
;
209
}
UnsetAddr
;
210
211
typedef
struct
{
212
int
num
;
213
int
alloc
;
214
UnsetAddr
*
us
;
215
}
UnsetAddrList
;
216
217
typedef
struct
{
218
NodeBase
base
;
219
int
state
;
220
int
group_num
;
221
UChar
*
name
;
222
UChar
*
name_end
;
223
struct
_Node
*
target
;
/* EncloseNode : ENCLOSE_MEMORY */
224
UnsetAddrList
*
unset_addr_list
;
225
}
CallNode
;
226
227
#endif
228
229
typedef
struct
{
230
NodeBase
base
;
231
int
state
;
232
int
back_num
;
233
int
back_static[
NODE_BACKREFS_SIZE
];
234
int
*
back_dynamic
;
235
int
nest_level
;
236
}
BRefNode
;
237
238
typedef
struct
{
239
NodeBase
base
;
240
int
type
;
241
struct
_Node
*
target
;
242
int
char_len
;
243
}
AnchorNode
;
244
245
typedef
struct
{
246
NodeBase
base
;
247
struct
_Node
*
car
;
248
struct
_Node
*
cdr
;
249
}
ConsAltNode
;
250
251
typedef
struct
{
252
NodeBase
base
;
253
int
ctype
;
254
int
not
;
255
}
CtypeNode
;
256
257
typedef
struct
_Node
{
258
union
{
259
NodeBase
base
;
260
StrNode
str
;
261
CClassNode
cclass
;
262
QtfrNode
qtfr
;
263
EncloseNode
enclose
;
264
BRefNode
bref
;
265
AnchorNode
anchor
;
266
ConsAltNode
cons
;
267
CtypeNode
ctype
;
268
#ifdef USE_SUBEXP_CALL
269
CallNode
call
;
270
#endif
271
}
u
;
272
}
Node
;
273
274
275
#define NULL_NODE ((Node* )0)
276
277
#define SCANENV_MEMNODES_SIZE 8
278
#define SCANENV_MEM_NODES(senv) \
279
(IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
280
(senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
281
282
typedef
struct
{
283
OnigOptionType
option
;
284
OnigCaseFoldType
case_fold_flag
;
285
OnigEncoding
enc
;
286
const
OnigSyntaxType
*
syntax
;
287
BitStatusType
capture_history
;
288
BitStatusType
bt_mem_start
;
289
BitStatusType
bt_mem_end
;
290
BitStatusType
backrefed_mem
;
291
UChar
*
pattern
;
292
UChar
*
pattern_end
;
293
UChar
*
error
;
294
UChar
*
error_end
;
295
regex_t
*
reg
;
/* for reg->names only */
296
int
num_call
;
297
#ifdef USE_SUBEXP_CALL
298
UnsetAddrList
*
unset_addr_list
;
299
#endif
300
int
num_mem
;
301
#ifdef USE_NAMED_GROUP
302
int
num_named
;
303
#endif
304
int
mem_alloc
;
305
Node
* mem_nodes_static[
SCANENV_MEMNODES_SIZE
];
306
Node
**
mem_nodes_dynamic
;
307
#ifdef USE_COMBINATION_EXPLOSION_CHECK
308
int
num_comb_exp_check;
309
int
comb_exp_max_regnum;
310
int
curr_max_regnum;
311
int
has_recursion;
312
#endif
313
int
warnings_flag
;
314
const
char
*
sourcefile
;
315
int
sourceline
;
316
}
ScanEnv
;
317
318
319
#define IS_SYNTAX_OP(syn, opm) (((syn)->op & (opm)) != 0)
320
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
321
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
322
323
#ifdef USE_NAMED_GROUP
324
typedef
struct
{
325
int
new_val
;
326
}
GroupNumRemap
;
327
328
extern
int
onig_renumber_name_table
P_
((
regex_t
* reg,
GroupNumRemap
* map));
329
#endif
330
331
extern
int
onig_strncmp
P_
((
const
UChar
* s1,
const
UChar
* s2,
int
n));
332
extern
void
onig_strcpy
P_
((
UChar
* dest,
const
UChar
* src,
const
UChar
* end));
333
extern
void
onig_scan_env_set_error_string
P_
((
ScanEnv
* env,
int
ecode,
UChar
*
arg
,
UChar
* arg_end));
334
extern
int
onig_scan_unsigned_number
P_
((
UChar
** src,
const
UChar
* end,
OnigEncoding
enc));
335
extern
void
onig_reduce_nested_quantifier
P_
((
Node
* pnode,
Node
* cnode));
336
extern
void
onig_node_conv_to_str_node
P_
((
Node
* node,
int
raw));
337
extern
int
onig_node_str_cat
P_
((
Node
* node,
const
UChar
* s,
const
UChar
* end));
338
extern
int
onig_node_str_set
P_
((
Node
* node,
const
UChar
* s,
const
UChar
* end));
339
extern
void
onig_node_free
P_
((
Node
* node));
340
extern
Node
*
onig_node_new_enclose
P_
((
int
type
));
341
extern
Node
*
onig_node_new_anchor
P_
((
int
type
));
342
extern
Node
*
onig_node_new_str
P_
((
const
UChar
* s,
const
UChar
* end));
343
extern
Node
*
onig_node_new_list
P_
((
Node
* left,
Node
* right));
344
extern
Node
*
onig_node_list_add
P_
((
Node
*
list
,
Node
* x));
345
extern
Node
*
onig_node_new_alt
P_
((
Node
* left,
Node
* right));
346
extern
void
onig_node_str_clear
P_
((
Node
* node));
347
extern
int
onig_free_node_list
P_
((
void
));
348
extern
int
onig_names_free
P_
((
regex_t
* reg));
349
extern
int
onig_parse_make_tree
P_
((
Node
** root,
const
UChar
* pattern,
const
UChar
* end,
regex_t
* reg,
ScanEnv
* env));
350
extern
int
onig_free_shared_cclass_table
P_
((
void
));
351
352
#ifdef ONIG_DEBUG
353
#ifdef USE_NAMED_GROUP
354
extern
int
onig_print_names(
FILE
*,
regex_t
*);
355
#endif
356
#endif
357
358
#if defined __GNUC__ && __GNUC__ >= 4
359
#pragma GCC visibility pop
360
#endif
361
362
#endif
/* ONIGURUMA_REGPARSE_H */
363
Generated on Fri Jun 28 2013 02:34:43 for Ruby by
1.8.3