Ruby  1.9.3p448(2013-06-27revision41675)
regenc.c
Go to the documentation of this file.
1 /**********************************************************************
2  regenc.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5  * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in the
15  * documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "regint.h"
31 
33 
34 extern int
36 {
37  return 0;
38 }
39 
40 extern OnigEncoding
42 {
44 }
45 
46 extern int
48 {
49  OnigEncDefaultCharEncoding = enc;
50  return 0;
51 }
52 
53 extern int
55 {
56  int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
58  return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
59  else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
60  return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
61  return 1;
62 }
63 
64 extern UChar*
65 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
66 {
67  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
68  if (p < s) {
69  p += enclen(enc, p, end);
70  }
71  return p;
72 }
73 
74 extern UChar*
76  const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
77 {
78  UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
79 
80  if (p < s) {
81  if (prev) *prev = (const UChar* )p;
82  p += enclen(enc, p, end);
83  }
84  else {
85  if (prev) *prev = (const UChar* )NULL; /* Sorry */
86  }
87  return p;
88 }
89 
90 extern UChar*
91 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
92 {
93  if (s <= start)
94  return (UChar* )NULL;
95 
96  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
97 }
98 
99 extern UChar*
100 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
101 {
102  while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
103  if (s <= start)
104  return (UChar* )NULL;
105 
106  s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
107  }
108  return (UChar* )s;
109 }
110 
111 extern UChar*
112 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
113 {
114  UChar* q = (UChar* )p;
115  while (n-- > 0) {
116  q += ONIGENC_MBC_ENC_LEN(enc, q, end);
117  }
118  return (q <= end ? q : NULL);
119 }
120 
121 extern int
122 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
123 {
124  int n = 0;
125  UChar* q = (UChar* )p;
126 
127  while (q < end) {
128  q += ONIGENC_MBC_ENC_LEN(enc, q, end);
129  n++;
130  }
131  return n;
132 }
133 
134 extern int
136 {
137  int n = 0;
138  UChar* p = (UChar* )s;
139  UChar* e;
140 
141  while (1) {
142  if (*p == '\0') {
143  UChar* q;
144  int len = ONIGENC_MBC_MINLEN(enc);
145 
146  if (len == 1) return n;
147  q = p + 1;
148  while (len > 1) {
149  if (*q != '\0') break;
150  q++;
151  len--;
152  }
153  if (len == 1) return n;
154  }
155  e = p + ONIGENC_MBC_MAXLEN(enc);
156  p += ONIGENC_MBC_ENC_LEN(enc, p, e);
157  n++;
158  }
159 }
160 
161 extern int
163 {
164  UChar* start = (UChar* )s;
165  UChar* p = (UChar* )s;
166  UChar* e;
167 
168  while (1) {
169  if (*p == '\0') {
170  UChar* q;
171  int len = ONIGENC_MBC_MINLEN(enc);
172 
173  if (len == 1) return (int )(p - start);
174  q = p + 1;
175  while (len > 1) {
176  if (*q != '\0') break;
177  q++;
178  len--;
179  }
180  if (len == 1) return (int )(p - start);
181  }
182  e = p + ONIGENC_MBC_MAXLEN(enc);
183  p += ONIGENC_MBC_ENC_LEN(enc, p, e);
184  }
185 }
186 
188  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
189  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
190  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
191  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
192  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
193  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
194  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
195  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
196  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
197  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
198  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
199  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
200  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
201  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
202  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
203  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
204  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
205  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
206  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
207  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
208  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
209  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
210  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
211  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
212  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
213  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
214  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
215  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
216  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
217  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
218  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
219  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
220 };
221 
222 #ifdef USE_UPPER_CASE_TABLE
223 const UChar OnigEncAsciiToUpperCaseTable[256] = {
224  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
225  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
226  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
227  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
228  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
229  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
230  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
231  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
232  '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
233  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
234  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
235  '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
236  '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
237  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
238  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
239  '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
240  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
241  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
242  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
243  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
244  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
245  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
246  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
247  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
248  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
249  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
250  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
251  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
252  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
253  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
254  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
255  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
256 };
257 #endif
258 
259 const unsigned short OnigEncAsciiCtypeTable[256] = {
260  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
261  0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
262  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
263  0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
264  0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
265  0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
266  0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
267  0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
268  0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
269  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
270  0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
271  0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
272  0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
273  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
274  0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
275  0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
276  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
277  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
278  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
279  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
280  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
281  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
282  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
283  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
284  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
285  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
286  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
287  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
288  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
289  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
290  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
291  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
292 };
293 
295  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
296  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
297  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
298  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
299  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
300  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
301  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
302  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
303  '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
304  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
305  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
306  '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
307  '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
308  '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
309  '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
310  '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
311  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
312  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
313  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
314  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
315  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
316  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
317  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
318  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
319  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
320  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
321  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
322  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
323  '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
324  '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
325  '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
326  '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
327 };
328 
329 #ifdef USE_UPPER_CASE_TABLE
331  '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
332  '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
333  '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
334  '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
335  '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
336  '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
337  '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
338  '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
339  '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
340  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
341  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
342  '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
343  '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
344  '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
345  '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
346  '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
347  '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
348  '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
349  '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
350  '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
351  '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
352  '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
353  '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
354  '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
355  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
356  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
357  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
358  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
359  '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
360  '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
361  '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
362  '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
363 };
364 #endif
365 
366 extern void
368 {
369  /* nothing */
370  /* obsoleted. */
371 }
372 
373 extern UChar*
374 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
375 {
376  return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
377 }
378 
380  { 0x41, 0x61 },
381  { 0x42, 0x62 },
382  { 0x43, 0x63 },
383  { 0x44, 0x64 },
384  { 0x45, 0x65 },
385  { 0x46, 0x66 },
386  { 0x47, 0x67 },
387  { 0x48, 0x68 },
388  { 0x49, 0x69 },
389  { 0x4a, 0x6a },
390  { 0x4b, 0x6b },
391  { 0x4c, 0x6c },
392  { 0x4d, 0x6d },
393  { 0x4e, 0x6e },
394  { 0x4f, 0x6f },
395  { 0x50, 0x70 },
396  { 0x51, 0x71 },
397  { 0x52, 0x72 },
398  { 0x53, 0x73 },
399  { 0x54, 0x74 },
400  { 0x55, 0x75 },
401  { 0x56, 0x76 },
402  { 0x57, 0x77 },
403  { 0x58, 0x78 },
404  { 0x59, 0x79 },
405  { 0x5a, 0x7a }
406 };
407 
408 extern int
410  OnigApplyAllCaseFoldFunc f, void* arg,
411  OnigEncoding enc ARG_UNUSED)
412 {
413  OnigCodePoint code;
414  int i, r;
415 
416  for (i = 0;
417  i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
418  i++) {
419  code = OnigAsciiLowerMap[i].to;
420  r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
421  if (r != 0) return r;
422 
423  code = OnigAsciiLowerMap[i].from;
424  r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
425  if (r != 0) return r;
426  }
427 
428  return 0;
429 }
430 
431 extern int
433  const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
434  OnigEncoding enc ARG_UNUSED)
435 {
436  if (0x41 <= *p && *p <= 0x5a) {
437  items[0].byte_len = 1;
438  items[0].code_len = 1;
439  items[0].code[0] = (OnigCodePoint )(*p + 0x20);
440  return 1;
441  }
442  else if (0x61 <= *p && *p <= 0x7a) {
443  items[0].byte_len = 1;
444  items[0].code_len = 1;
445  items[0].code[0] = (OnigCodePoint )(*p - 0x20);
446  return 1;
447  }
448  else
449  return 0;
450 }
451 
452 static int
454  OnigApplyAllCaseFoldFunc f, void* arg)
455 {
456  OnigCodePoint ss[] = { 0x73, 0x73 };
457 
458  return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
459 }
460 
461 extern int
463  const OnigPairCaseFoldCodes map[],
464  int ess_tsett_flag, OnigCaseFoldType flag,
465  OnigApplyAllCaseFoldFunc f, void* arg)
466 {
467  OnigCodePoint code;
468  int i, r;
469 
470  r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
471  if (r != 0) return r;
472 
473  for (i = 0; i < map_size; i++) {
474  code = map[i].to;
475  r = (*f)(map[i].from, &code, 1, arg);
476  if (r != 0) return r;
477 
478  code = map[i].from;
479  r = (*f)(map[i].to, &code, 1, arg);
480  if (r != 0) return r;
481  }
482 
483  if (ess_tsett_flag != 0)
484  return ss_apply_all_case_fold(flag, f, arg);
485 
486  return 0;
487 }
488 
489 extern int
491  const OnigPairCaseFoldCodes map[],
492  int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
493  const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
494 {
495  if (0x41 <= *p && *p <= 0x5a) {
496  items[0].byte_len = 1;
497  items[0].code_len = 1;
498  items[0].code[0] = (OnigCodePoint )(*p + 0x20);
499  if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
500  && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
501  /* SS */
502  items[1].byte_len = 2;
503  items[1].code_len = 1;
504  items[1].code[0] = (OnigCodePoint )0xdf;
505  return 2;
506  }
507  else
508  return 1;
509  }
510  else if (0x61 <= *p && *p <= 0x7a) {
511  items[0].byte_len = 1;
512  items[0].code_len = 1;
513  items[0].code[0] = (OnigCodePoint )(*p - 0x20);
514  if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
515  && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
516  /* ss */
517  items[1].byte_len = 2;
518  items[1].code_len = 1;
519  items[1].code[0] = (OnigCodePoint )0xdf;
520  return 2;
521  }
522  else
523  return 1;
524  }
525  else if (*p == 0xdf && ess_tsett_flag != 0) {
526  items[0].byte_len = 1;
527  items[0].code_len = 2;
528  items[0].code[0] = (OnigCodePoint )'s';
529  items[0].code[1] = (OnigCodePoint )'s';
530 
531  items[1].byte_len = 1;
532  items[1].code_len = 2;
533  items[1].code[0] = (OnigCodePoint )'S';
534  items[1].code[1] = (OnigCodePoint )'S';
535 
536  items[2].byte_len = 1;
537  items[2].code_len = 2;
538  items[2].code[0] = (OnigCodePoint )'s';
539  items[2].code[1] = (OnigCodePoint )'S';
540 
541  items[3].byte_len = 1;
542  items[3].code_len = 2;
543  items[3].code[0] = (OnigCodePoint )'S';
544  items[3].code[1] = (OnigCodePoint )'s';
545 
546  return 4;
547  }
548  else {
549  int i;
550 
551  for (i = 0; i < map_size; i++) {
552  if (*p == map[i].from) {
553  items[0].byte_len = 1;
554  items[0].code_len = 1;
555  items[0].code[0] = map[i].to;
556  return 1;
557  }
558  else if (*p == map[i].to) {
559  items[0].byte_len = 1;
560  items[0].code_len = 1;
561  items[0].code[0] = map[i].from;
562  return 1;
563  }
564  }
565  }
566 
567  return 0;
568 }
569 
570 
571 extern int
573  OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
574  OnigEncoding enc)
575 {
576  return ONIG_NO_SUPPORT_CONFIG;
577 }
578 
579 extern int
580 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
581 {
582  if (p < end) {
583  if (*p == 0x0a) return 1;
584  }
585  return 0;
586 }
587 
588 /* for single byte encodings */
589 extern int
591  const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
592 {
593  *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
594 
595  (*p)++;
596  return 1; /* return byte length of converted char to lower */
597 }
598 
599 #if 0
600 extern int
601 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
602  const UChar** pp, const UChar* end ARG_UNUSED)
603 {
604  const UChar* p = *pp;
605 
606  (*pp)++;
608 }
609 #endif
610 
611 extern int
612 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
613  OnigEncoding enc ARG_UNUSED)
614 {
615  return 1;
616 }
617 
618 extern OnigCodePoint
619 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
620  OnigEncoding enc ARG_UNUSED)
621 {
622  return (OnigCodePoint )(*p);
623 }
624 
625 extern int
627 {
628  return 1;
629 }
630 
631 extern int
633 {
634  if (code > 0xff)
635  rb_raise(rb_eRangeError, "%u out of char range", code);
636  *buf = (UChar )(code & 0xff);
637  return 1;
638 }
639 
640 extern UChar*
641 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
642  const UChar* end,
643  OnigEncoding enc ARG_UNUSED)
644 {
645  return (UChar* )s;
646 }
647 
648 extern int
649 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
650  OnigEncoding enc ARG_UNUSED)
651 {
652  return TRUE;
653 }
654 
655 extern int
656 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
657  OnigEncoding enc ARG_UNUSED)
658 {
659  return FALSE;
660 }
661 
662 extern int
664  OnigEncoding enc ARG_UNUSED)
665 {
666  if (code < 128)
667  return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
668  else
669  return FALSE;
670 }
671 
672 extern OnigCodePoint
674 {
675  int c, i, len;
676  OnigCodePoint n;
677 
678  len = enclen(enc, p, end);
679  n = (OnigCodePoint )(*p++);
680  if (len == 1) return n;
681 
682  for (i = 1; i < len; i++) {
683  if (p >= end) break;
684  c = *p++;
685  n <<= 8; n += c;
686  }
687  return n;
688 }
689 
690 extern int
692  const UChar** pp, const UChar* end ARG_UNUSED,
693  UChar* lower)
694 {
695  int len;
696  const UChar *p = *pp;
697 
698  if (ONIGENC_IS_MBC_ASCII(p)) {
700  (*pp)++;
701  return 1;
702  }
703  else {
704  int i;
705 
706  len = enclen(enc, p, end);
707  for (i = 0; i < len; i++) {
708  *lower++ = *p++;
709  }
710  (*pp) += len;
711  return len; /* return byte length of converted to lower char */
712  }
713 }
714 
715 #if 0
716 extern int
717 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
718  const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
719 {
720  const UChar* p = *pp;
721 
722  if (ONIGENC_IS_MBC_ASCII(p)) {
723  (*pp)++;
725  }
726 
727  (*pp) += enclen(enc, p);
728  return FALSE;
729 }
730 #endif
731 
732 extern int
734 {
735  if (code <= 0xff) return 1;
736  if (code <= 0xffff) return 2;
738 }
739 
740 extern int
742 {
743  if ((code & 0xff000000) != 0) return 4;
744  else if ((code & 0xff0000) != 0) return 3;
745  else if ((code & 0xff00) != 0) return 2;
746  else return 1;
747 }
748 
749 extern int
751 {
752  UChar *p = buf;
753 
754  if ((code & 0xff00) != 0) {
755  *p++ = (UChar )((code >> 8) & 0xff);
756  }
757  *p++ = (UChar )(code & 0xff);
758 
759 #if 1
760  if (enclen(enc, buf, p) != (p - buf))
762 #endif
763  return (int)(p - buf);
764 }
765 
766 extern int
768 {
769  UChar *p = buf;
770 
771  if ((code & 0xff000000) != 0) {
772  *p++ = (UChar )((code >> 24) & 0xff);
773  }
774  if ((code & 0xff0000) != 0 || p != buf) {
775  *p++ = (UChar )((code >> 16) & 0xff);
776  }
777  if ((code & 0xff00) != 0 || p != buf) {
778  *p++ = (UChar )((code >> 8) & 0xff);
779  }
780  *p++ = (UChar )(code & 0xff);
781 
782 #if 1
783  if (enclen(enc, buf, p) != (p - buf))
785 #endif
786  return (int)(p - buf);
787 }
788 
789 extern int
791 {
792  static const PosixBracketEntryType PBS[] = {
807  };
808 
809  const PosixBracketEntryType *pb, *pbe;
810  int len;
811 
812  len = onigenc_strlen(enc, p, end);
813  for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
814  if (len == pb->len &&
815  STRNCASECMP((char *)p, (char *)pb->name, len) == 0)
816  return pb->ctype;
817  }
818 
820 }
821 
822 extern int
824  unsigned int ctype)
825 {
826  if (code < 128)
827  return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
828  else {
829  if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
830  return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
831  }
832  }
833 
834  return FALSE;
835 }
836 
837 extern int
839  unsigned int ctype)
840 {
841  if (code < 128)
842  return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
843  else {
844  if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
845  return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
846  }
847  }
848 
849  return FALSE;
850 }
851 
852 extern int
854  const UChar* sascii /* ascii */, int n)
855 {
856  int x, c;
857 
858  while (n-- > 0) {
859  if (p >= end) return (int )(*sascii);
860 
861  c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
862  x = *sascii - c;
863  if (x) return x;
864 
865  sascii++;
866  p += enclen(enc, p, end);
867  }
868  return 0;
869 }
870 
871 /* Property management */
872 static int
873 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
874 {
875  size_t size;
876  const OnigCodePoint **list = *plist;
877 
878  size = sizeof(OnigCodePoint*) * new_size;
879  if (IS_NULL(list)) {
880  list = (const OnigCodePoint** )xmalloc(size);
881  }
882  else {
883  list = (const OnigCodePoint** )xrealloc((void* )list, size);
884  }
885 
886  if (IS_NULL(list)) return ONIGERR_MEMORY;
887 
888  *plist = list;
889  *psize = new_size;
890 
891  return 0;
892 }
893 
894 extern int
896  hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
897  int *psize)
898 {
899 #define PROP_INIT_SIZE 16
900 
901  int r;
902 
903  if (*psize <= *pnum) {
904  int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
905  r = resize_property_list(new_size, plist, psize);
906  if (r != 0) return r;
907  }
908 
909  (*plist)[*pnum] = prop;
910 
911  if (ONIG_IS_NULL(*table)) {
913  if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
914  }
915 
916  *pnum = *pnum + 1;
917  onig_st_insert_strend(*table, name, name + strlen((char* )name),
919  return 0;
920 }
921 
922 extern int
924 {
925  int r;
926 
928 
929  r = f();
930 
932  return r;
933 }
934