Ruby  1.9.3p551(2014-11-13revision48407)
gb18030.c
Go to the documentation of this file.
1 /**********************************************************************
2  gb18030.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2005-2007 KUBO Takehiro <kubo AT jiubao DOT org>
6  * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in the
16  * documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include "regenc.h"
32 
33 #if 1
34 #define DEBUG_GB18030(arg)
35 #else
36 #define DEBUG_GB18030(arg) printf arg
37 #endif
38 
39 enum {
40  C1, /* one-byte char */
41  C2, /* one-byte or second of two-byte char */
42  C4, /* one-byte or second or fourth of four-byte char */
43  CM /* first of two- or four-byte char or second of two-byte char */
44 };
45 
46 static const char GB18030_MAP[] = {
47  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
48  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
49  C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1,
50  C4, C4, C4, C4, C4, C4, C4, C4, C4, C4, C1, C1, C1, C1, C1, C1,
51  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
52  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
53  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2,
54  C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C2, C1,
55  C2, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
56  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
57  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
58  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
59  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
60  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
61  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM,
62  CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1
63 };
64 
65 typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3 } state_t;
66 #define A ACCEPT
67 #define F FAILURE
68 static const signed char trans[][0x100] = {
69  { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
70  /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
71  /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
72  /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
73  /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
74  /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
75  /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
76  /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
77  /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
78  /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
79  /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
80  /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
81  /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82  /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83  /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
84  /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
85  /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
86  },
87  { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
88  /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
89  /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
90  /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
91  /* 3 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F,
92  /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
93  /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
94  /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
95  /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F,
96  /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
97  /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
98  /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
99  /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
100  /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
101  /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
102  /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
103  /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F
104  },
105  { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */
106  /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
107  /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
108  /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
109  /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
110  /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
111  /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
112  /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
113  /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
114  /* 8 */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115  /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116  /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117  /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118  /* c */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119  /* d */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120  /* e */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
121  /* f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F
122  },
123  { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */
124  /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
125  /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
126  /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
127  /* 3 */ A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F,
128  /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
129  /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
130  /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
131  /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
132  /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
133  /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
134  /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
135  /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
136  /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
137  /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
138  /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
139  /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
140  }
141 };
142 #undef A
143 #undef F
144 
145 static int
146 gb18030_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
147 {
148  int firstbyte = *p++;
149  state_t s = trans[0][firstbyte];
150 #define RETURN(n) \
151  return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
152  ONIGENC_CONSTRUCT_MBCLEN_INVALID()
153  if (s < 0) RETURN(1);
154  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2-1);
155  s = trans[s][*p++];
156  if (s < 0) RETURN(2);
157  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2);
158  s = trans[s][*p++];
159  if (s < 0) RETURN(3);
160  if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3);
161  s = trans[s][*p++];
162  RETURN(4);
163 #undef RETURN
164 }
165 
166 static OnigCodePoint
167 gb18030_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
168 {
169  int c, i, len;
170  OnigCodePoint n;
171 
172  len = enclen(enc, p, end);
173  n = (OnigCodePoint )(*p++);
174  if (len == 1) return n;
175 
176  for (i = 1; i < len; i++) {
177  if (p >= end) break;
178  c = *p++;
179  n <<= 8; n += c;
180  }
181  return n;
182 }
183 
184 static int
186 {
187  return onigenc_mb4_code_to_mbc(enc, code, buf);
188 }
189 
190 static int
191 gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
192  UChar* lower, OnigEncoding enc)
193 {
194  return onigenc_mbn_mbc_case_fold(enc, flag,
195  pp, end, lower);
196 }
197 
198 #if 0
199 static int
200 gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
201  const UChar** pp, const UChar* end, OnigEncoding enc)
202 {
203  return onigenc_mbn_is_mbc_ambiguous(enc, flag, pp, end);
204 }
205 #endif
206 
207 static int
208 gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
209 {
210  return onigenc_mb4_is_code_ctype(enc, code, ctype);
211 }
212 
213 enum state {
218 
221 
222  /* CMC4 : pair of "CM C4" */
228 
231 
234 
235  /* C4CM : pair of "C4 CM" */
240 
245 };
246 
247 static UChar*
248 gb18030_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
249 {
250  const UChar *p;
251  enum state state = S_START;
252 
253  DEBUG_GB18030(("----------------\n"));
254  for (p = s; p >= start; p--) {
255  DEBUG_GB18030(("state %d --(%02x)-->\n", state, *p));
256  switch (state) {
257  case S_START:
258  switch (GB18030_MAP[*p]) {
259  case C1:
260  return (UChar *)s;
261  case C2:
262  state = S_one_C2; /* C2 */
263  break;
264  case C4:
265  state = S_one_C4; /* C4 */
266  break;
267  case CM:
268  state = S_one_CM; /* CM */
269  break;
270  }
271  break;
272  case S_one_C2: /* C2 */
273  switch (GB18030_MAP[*p]) {
274  case C1:
275  case C2:
276  case C4:
277  return (UChar *)s;
278  case CM:
279  state = S_odd_CM_one_CX; /* CM C2 */
280  break;
281  }
282  break;
283  case S_one_C4: /* C4 */
284  switch (GB18030_MAP[*p]) {
285  case C1:
286  case C2:
287  case C4:
288  return (UChar *)s;
289  case CM:
290  state = S_one_CMC4;
291  break;
292  }
293  break;
294  case S_one_CM: /* CM */
295  switch (GB18030_MAP[*p]) {
296  case C1:
297  case C2:
298  return (UChar *)s;
299  case C4:
300  state = S_odd_C4CM;
301  break;
302  case CM:
303  state = S_odd_CM_one_CX; /* CM CM */
304  break;
305  }
306  break;
307 
308  case S_odd_CM_one_CX: /* CM C2 */ /* CM CM */ /* CM CM CM C4 */
309  switch (GB18030_MAP[*p]) {
310  case C1:
311  case C2:
312  case C4:
313  return (UChar *)(s - 1);
314  case CM:
315  state = S_even_CM_one_CX;
316  break;
317  }
318  break;
319  case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
320  switch (GB18030_MAP[*p]) {
321  case C1:
322  case C2:
323  case C4:
324  return (UChar *)s;
325  case CM:
326  state = S_odd_CM_one_CX;
327  break;
328  }
329  break;
330 
331  case S_one_CMC4: /* CM C4 */
332  switch (GB18030_MAP[*p]) {
333  case C1:
334  case C2:
335  return (UChar *)(s - 1);
336  case C4:
337  state = S_one_C4_odd_CMC4; /* C4 CM C4 */
338  break;
339  case CM:
340  state = S_even_CM_one_CX; /* CM CM C4 */
341  break;
342  }
343  break;
344  case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
345  switch (GB18030_MAP[*p]) {
346  case C1:
347  case C2:
348  return (UChar *)(s - 1);
349  case C4:
350  state = S_one_C4_odd_CMC4;
351  break;
352  case CM:
353  state = S_odd_CM_odd_CMC4;
354  break;
355  }
356  break;
357  case S_one_C4_odd_CMC4: /* C4 CM C4 */
358  switch (GB18030_MAP[*p]) {
359  case C1:
360  case C2:
361  case C4:
362  return (UChar *)(s - 1);
363  case CM:
364  state = S_even_CMC4; /* CM C4 CM C4 */
365  break;
366  }
367  break;
368  case S_even_CMC4: /* CM C4 CM C4 */
369  switch (GB18030_MAP[*p]) {
370  case C1:
371  case C2:
372  return (UChar *)(s - 3);
373  case C4:
374  state = S_one_C4_even_CMC4;
375  break;
376  case CM:
377  state = S_odd_CM_even_CMC4;
378  break;
379  }
380  break;
381  case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
382  switch (GB18030_MAP[*p]) {
383  case C1:
384  case C2:
385  case C4:
386  return (UChar *)(s - 3);
387  case CM:
388  state = S_odd_CMC4;
389  break;
390  }
391  break;
392 
393  case S_odd_CM_odd_CMC4: /* CM CM C4 CM C4 CM C4 */
394  switch (GB18030_MAP[*p]) {
395  case C1:
396  case C2:
397  case C4:
398  return (UChar *)(s - 3);
399  case CM:
400  state = S_even_CM_odd_CMC4;
401  break;
402  }
403  break;
404  case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
405  switch (GB18030_MAP[*p]) {
406  case C1:
407  case C2:
408  case C4:
409  return (UChar *)(s - 1);
410  case CM:
411  state = S_odd_CM_odd_CMC4;
412  break;
413  }
414  break;
415 
416  case S_odd_CM_even_CMC4: /* CM CM C4 CM C4 */
417  switch (GB18030_MAP[*p]) {
418  case C1:
419  case C2:
420  case C4:
421  return (UChar *)(s - 1);
422  case CM:
423  state = S_even_CM_even_CMC4;
424  break;
425  }
426  break;
427  case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
428  switch (GB18030_MAP[*p]) {
429  case C1:
430  case C2:
431  case C4:
432  return (UChar *)(s - 3);
433  case CM:
434  state = S_odd_CM_even_CMC4;
435  break;
436  }
437  break;
438 
439  case S_odd_C4CM: /* C4 CM */ /* C4 CM C4 CM C4 CM*/
440  switch (GB18030_MAP[*p]) {
441  case C1:
442  case C2:
443  case C4:
444  return (UChar *)s;
445  case CM:
446  state = S_one_CM_odd_C4CM; /* CM C4 CM */
447  break;
448  }
449  break;
450  case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
451  switch (GB18030_MAP[*p]) {
452  case C1:
453  case C2:
454  return (UChar *)(s - 2); /* |CM C4 CM */
455  case C4:
456  state = S_even_C4CM;
457  break;
458  case CM:
459  state = S_even_CM_odd_C4CM;
460  break;
461  }
462  break;
463  case S_even_C4CM: /* C4 CM C4 CM */
464  switch (GB18030_MAP[*p]) {
465  case C1:
466  case C2:
467  case C4:
468  return (UChar *)(s - 2); /* C4|CM C4 CM */
469  case CM:
470  state = S_one_CM_even_C4CM;
471  break;
472  }
473  break;
474  case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
475  switch (GB18030_MAP[*p]) {
476  case C1:
477  case C2:
478  return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
479  case C4:
480  state = S_odd_C4CM;
481  break;
482  case CM:
483  state = S_even_CM_even_C4CM;
484  break;
485  }
486  break;
487 
488  case S_even_CM_odd_C4CM: /* CM CM C4 CM */
489  switch (GB18030_MAP[*p]) {
490  case C1:
491  case C2:
492  case C4:
493  return (UChar *)(s - 0); /* |CM CM|C4|CM */
494  case CM:
495  state = S_odd_CM_odd_C4CM;
496  break;
497  }
498  break;
499  case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
500  switch (GB18030_MAP[*p]) {
501  case C1:
502  case C2:
503  case C4:
504  return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
505  case CM:
506  state = S_even_CM_odd_C4CM;
507  break;
508  }
509  break;
510 
511  case S_even_CM_even_C4CM: /* CM CM C4 CM C4 CM */
512  switch (GB18030_MAP[*p]) {
513  case C1:
514  case C2:
515  case C4:
516  return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
517  case CM:
518  state = S_odd_CM_even_C4CM;
519  break;
520  }
521  break;
522  case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
523  switch (GB18030_MAP[*p]) {
524  case C1:
525  case C2:
526  case C4:
527  return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
528  case CM:
529  state = S_even_CM_even_C4CM;
530  break;
531  }
532  break;
533  }
534  }
535 
536  DEBUG_GB18030(("state %d\n", state));
537  switch (state) {
538  case S_START: return (UChar *)(s - 0);
539  case S_one_C2: return (UChar *)(s - 0);
540  case S_one_C4: return (UChar *)(s - 0);
541  case S_one_CM: return (UChar *)(s - 0);
542 
543  case S_odd_CM_one_CX: return (UChar *)(s - 1);
544  case S_even_CM_one_CX: return (UChar *)(s - 0);
545 
546  case S_one_CMC4: return (UChar *)(s - 1);
547  case S_odd_CMC4: return (UChar *)(s - 1);
548  case S_one_C4_odd_CMC4: return (UChar *)(s - 1);
549  case S_even_CMC4: return (UChar *)(s - 3);
550  case S_one_C4_even_CMC4: return (UChar *)(s - 3);
551 
552  case S_odd_CM_odd_CMC4: return (UChar *)(s - 3);
553  case S_even_CM_odd_CMC4: return (UChar *)(s - 1);
554 
555  case S_odd_CM_even_CMC4: return (UChar *)(s - 1);
556  case S_even_CM_even_CMC4: return (UChar *)(s - 3);
557 
558  case S_odd_C4CM: return (UChar *)(s - 0);
559  case S_one_CM_odd_C4CM: return (UChar *)(s - 2);
560  case S_even_C4CM: return (UChar *)(s - 2);
561  case S_one_CM_even_C4CM: return (UChar *)(s - 0);
562 
563  case S_even_CM_odd_C4CM: return (UChar *)(s - 0);
564  case S_odd_CM_odd_C4CM: return (UChar *)(s - 2);
565  case S_even_CM_even_C4CM: return (UChar *)(s - 2);
566  case S_odd_CM_even_C4CM: return (UChar *)(s - 0);
567  }
568 
569  return (UChar* )s; /* never come here. (escape warning) */
570 }
571 
572 static int
573 gb18030_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
574 {
575  return GB18030_MAP[*s] == C1 ? TRUE : FALSE;
576 }
577 
578 /*
579  * Name: GB18030
580  * MIBenum: 114
581  * Link: http://www.iana.org/assignments/charset-reg/GB18030
582  */
583 OnigEncodingDefine(gb18030, GB18030) = {
585  "GB18030", /* name */
586  4, /* max enc length */
587  1, /* min enc length */
600 };
601 
602