00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regint.h"
00031
00032 OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT;
00033
00034 extern int
00035 onigenc_init(void)
00036 {
00037 return 0;
00038 }
00039
00040 extern OnigEncoding
00041 onigenc_get_default_encoding(void)
00042 {
00043 return OnigEncDefaultCharEncoding;
00044 }
00045
00046 extern int
00047 onigenc_set_default_encoding(OnigEncoding enc)
00048 {
00049 OnigEncDefaultCharEncoding = enc;
00050 return 0;
00051 }
00052
00053 extern int
00054 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc)
00055 {
00056 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e);
00057 if (ONIGENC_MBCLEN_CHARFOUND_P(ret))
00058 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
00059 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret))
00060 return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret);
00061 return 1;
00062 }
00063
00064 extern UChar*
00065 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00066 {
00067 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00068 if (p < s) {
00069 p += enclen(enc, p, end);
00070 }
00071 return p;
00072 }
00073
00074 extern UChar*
00075 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
00076 const UChar* start, const UChar* s, const UChar* end, const UChar** prev)
00077 {
00078 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00079
00080 if (p < s) {
00081 if (prev) *prev = (const UChar* )p;
00082 p += enclen(enc, p, end);
00083 }
00084 else {
00085 if (prev) *prev = (const UChar* )NULL;
00086 }
00087 return p;
00088 }
00089
00090 extern UChar*
00091 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00092 {
00093 if (s <= start)
00094 return (UChar* )NULL;
00095
00096 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00097 }
00098
00099 extern UChar*
00100 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n)
00101 {
00102 while (ONIG_IS_NOT_NULL(s) && n-- > 0) {
00103 if (s <= start)
00104 return (UChar* )NULL;
00105
00106 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end);
00107 }
00108 return (UChar* )s;
00109 }
00110
00111 extern UChar*
00112 onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
00113 {
00114 UChar* q = (UChar* )p;
00115 while (n-- > 0) {
00116 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00117 }
00118 return (q <= end ? q : NULL);
00119 }
00120
00121 extern int
00122 onigenc_strlen(OnigEncoding enc, const UChar* p, const UChar* end)
00123 {
00124 int n = 0;
00125 UChar* q = (UChar* )p;
00126
00127 while (q < end) {
00128 q += ONIGENC_MBC_ENC_LEN(enc, q, end);
00129 n++;
00130 }
00131 return n;
00132 }
00133
00134 extern int
00135 onigenc_strlen_null(OnigEncoding enc, const UChar* s)
00136 {
00137 int n = 0;
00138 UChar* p = (UChar* )s;
00139 UChar* e;
00140
00141 while (1) {
00142 if (*p == '\0') {
00143 UChar* q;
00144 int len = ONIGENC_MBC_MINLEN(enc);
00145
00146 if (len == 1) return n;
00147 q = p + 1;
00148 while (len > 1) {
00149 if (*q != '\0') break;
00150 q++;
00151 len--;
00152 }
00153 if (len == 1) return n;
00154 }
00155 e = p + ONIGENC_MBC_MAXLEN(enc);
00156 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00157 n++;
00158 }
00159 }
00160
00161 extern int
00162 onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
00163 {
00164 UChar* start = (UChar* )s;
00165 UChar* p = (UChar* )s;
00166 UChar* e;
00167
00168 while (1) {
00169 if (*p == '\0') {
00170 UChar* q;
00171 int len = ONIGENC_MBC_MINLEN(enc);
00172
00173 if (len == 1) return (int )(p - start);
00174 q = p + 1;
00175 while (len > 1) {
00176 if (*q != '\0') break;
00177 q++;
00178 len--;
00179 }
00180 if (len == 1) return (int )(p - start);
00181 }
00182 e = p + ONIGENC_MBC_MAXLEN(enc);
00183 p += ONIGENC_MBC_ENC_LEN(enc, p, e);
00184 }
00185 }
00186
00187 const UChar OnigEncAsciiToLowerCaseTable[] = {
00188 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00189 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00190 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00191 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00192 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00193 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00194 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00195 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00196 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00197 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00198 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00199 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00200 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00201 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00202 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00203 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00204 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00205 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00206 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00207 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00208 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00209 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00210 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00211 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00212 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00213 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00214 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00215 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00216 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00217 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00218 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00219 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00220 };
00221
00222 #ifdef USE_UPPER_CASE_TABLE
00223 const UChar OnigEncAsciiToUpperCaseTable[256] = {
00224 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00225 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00226 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00227 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00228 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00229 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00230 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00231 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00232 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00233 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00234 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00235 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00236 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00237 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00238 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00239 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00240 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00241 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00242 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00243 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00244 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00245 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00246 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00247 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00248 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00249 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00250 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00251 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00252 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00253 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00254 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00255 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
00256 };
00257 #endif
00258
00259 const unsigned short OnigEncAsciiCtypeTable[256] = {
00260 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00261 0x4008, 0x420c, 0x4209, 0x4208, 0x4208, 0x4208, 0x4008, 0x4008,
00262 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00263 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008, 0x4008,
00264 0x4284, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00265 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00266 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0, 0x78b0,
00267 0x78b0, 0x78b0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x41a0,
00268 0x41a0, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x7ca2, 0x74a2,
00269 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00270 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2, 0x74a2,
00271 0x74a2, 0x74a2, 0x74a2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x51a0,
00272 0x41a0, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x78e2, 0x70e2,
00273 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00274 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2, 0x70e2,
00275 0x70e2, 0x70e2, 0x70e2, 0x41a0, 0x41a0, 0x41a0, 0x41a0, 0x4008,
00276 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00277 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00278 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00279 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00280 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00281 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00282 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00283 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00284 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00285 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00286 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00287 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00288 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00289 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00290 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
00291 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
00292 };
00293
00294 const UChar OnigEncISO_8859_1_ToLowerCaseTable[256] = {
00295 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00296 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00297 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00298 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00299 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00300 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00301 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00302 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00303 '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00304 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00305 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00306 '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
00307 '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
00308 '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
00309 '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
00310 '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
00311 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00312 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00313 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00314 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00315 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00316 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00317 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00318 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00319 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00320 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00321 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\327',
00322 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\337',
00323 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
00324 '\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
00325 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
00326 '\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377'
00327 };
00328
00329 #ifdef USE_UPPER_CASE_TABLE
00330 const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
00331 '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
00332 '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
00333 '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
00334 '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
00335 '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
00336 '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
00337 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
00338 '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
00339 '\100', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00340 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00341 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00342 '\130', '\131', '\132', '\133', '\134', '\135', '\136', '\137',
00343 '\140', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
00344 '\110', '\111', '\112', '\113', '\114', '\115', '\116', '\117',
00345 '\120', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
00346 '\130', '\131', '\132', '\173', '\174', '\175', '\176', '\177',
00347 '\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
00348 '\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
00349 '\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
00350 '\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
00351 '\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
00352 '\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
00353 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
00354 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
00355 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00356 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00357 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
00358 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
00359 '\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
00360 '\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
00361 '\320', '\321', '\322', '\323', '\324', '\325', '\326', '\367',
00362 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\377',
00363 };
00364 #endif
00365
00366 extern void
00367 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
00368 {
00369
00370
00371 }
00372
00373 extern UChar*
00374 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
00375 {
00376 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end);
00377 }
00378
00379 const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
00380 { 0x41, 0x61 },
00381 { 0x42, 0x62 },
00382 { 0x43, 0x63 },
00383 { 0x44, 0x64 },
00384 { 0x45, 0x65 },
00385 { 0x46, 0x66 },
00386 { 0x47, 0x67 },
00387 { 0x48, 0x68 },
00388 { 0x49, 0x69 },
00389 { 0x4a, 0x6a },
00390 { 0x4b, 0x6b },
00391 { 0x4c, 0x6c },
00392 { 0x4d, 0x6d },
00393 { 0x4e, 0x6e },
00394 { 0x4f, 0x6f },
00395 { 0x50, 0x70 },
00396 { 0x51, 0x71 },
00397 { 0x52, 0x72 },
00398 { 0x53, 0x73 },
00399 { 0x54, 0x74 },
00400 { 0x55, 0x75 },
00401 { 0x56, 0x76 },
00402 { 0x57, 0x77 },
00403 { 0x58, 0x78 },
00404 { 0x59, 0x79 },
00405 { 0x5a, 0x7a }
00406 };
00407
00408 extern int
00409 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00410 OnigApplyAllCaseFoldFunc f, void* arg,
00411 OnigEncoding enc ARG_UNUSED)
00412 {
00413 OnigCodePoint code;
00414 int i, r;
00415
00416 for (i = 0;
00417 i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
00418 i++) {
00419 code = OnigAsciiLowerMap[i].to;
00420 r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
00421 if (r != 0) return r;
00422
00423 code = OnigAsciiLowerMap[i].from;
00424 r = (*f)(OnigAsciiLowerMap[i].to, &code, 1, arg);
00425 if (r != 0) return r;
00426 }
00427
00428 return 0;
00429 }
00430
00431 extern int
00432 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
00433 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
00434 OnigEncoding enc ARG_UNUSED)
00435 {
00436 if (0x41 <= *p && *p <= 0x5a) {
00437 items[0].byte_len = 1;
00438 items[0].code_len = 1;
00439 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00440 return 1;
00441 }
00442 else if (0x61 <= *p && *p <= 0x7a) {
00443 items[0].byte_len = 1;
00444 items[0].code_len = 1;
00445 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00446 return 1;
00447 }
00448 else
00449 return 0;
00450 }
00451
00452 static int
00453 ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
00454 OnigApplyAllCaseFoldFunc f, void* arg)
00455 {
00456 OnigCodePoint ss[] = { 0x73, 0x73 };
00457
00458 return (*f)((OnigCodePoint )0xdf, ss, 2, arg);
00459 }
00460
00461 extern int
00462 onigenc_apply_all_case_fold_with_map(int map_size,
00463 const OnigPairCaseFoldCodes map[],
00464 int ess_tsett_flag, OnigCaseFoldType flag,
00465 OnigApplyAllCaseFoldFunc f, void* arg)
00466 {
00467 OnigCodePoint code;
00468 int i, r;
00469
00470 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0);
00471 if (r != 0) return r;
00472
00473 for (i = 0; i < map_size; i++) {
00474 code = map[i].to;
00475 r = (*f)(map[i].from, &code, 1, arg);
00476 if (r != 0) return r;
00477
00478 code = map[i].from;
00479 r = (*f)(map[i].to, &code, 1, arg);
00480 if (r != 0) return r;
00481 }
00482
00483 if (ess_tsett_flag != 0)
00484 return ss_apply_all_case_fold(flag, f, arg);
00485
00486 return 0;
00487 }
00488
00489 extern int
00490 onigenc_get_case_fold_codes_by_str_with_map(int map_size,
00491 const OnigPairCaseFoldCodes map[],
00492 int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
00493 const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
00494 {
00495 if (0x41 <= *p && *p <= 0x5a) {
00496 items[0].byte_len = 1;
00497 items[0].code_len = 1;
00498 items[0].code[0] = (OnigCodePoint )(*p + 0x20);
00499 if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1
00500 && (*(p+1) == 0x53 || *(p+1) == 0x73)) {
00501
00502 items[1].byte_len = 2;
00503 items[1].code_len = 1;
00504 items[1].code[0] = (OnigCodePoint )0xdf;
00505 return 2;
00506 }
00507 else
00508 return 1;
00509 }
00510 else if (0x61 <= *p && *p <= 0x7a) {
00511 items[0].byte_len = 1;
00512 items[0].code_len = 1;
00513 items[0].code[0] = (OnigCodePoint )(*p - 0x20);
00514 if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1
00515 && (*(p+1) == 0x73 || *(p+1) == 0x53)) {
00516
00517 items[1].byte_len = 2;
00518 items[1].code_len = 1;
00519 items[1].code[0] = (OnigCodePoint )0xdf;
00520 return 2;
00521 }
00522 else
00523 return 1;
00524 }
00525 else if (*p == 0xdf && ess_tsett_flag != 0) {
00526 items[0].byte_len = 1;
00527 items[0].code_len = 2;
00528 items[0].code[0] = (OnigCodePoint )'s';
00529 items[0].code[1] = (OnigCodePoint )'s';
00530
00531 items[1].byte_len = 1;
00532 items[1].code_len = 2;
00533 items[1].code[0] = (OnigCodePoint )'S';
00534 items[1].code[1] = (OnigCodePoint )'S';
00535
00536 items[2].byte_len = 1;
00537 items[2].code_len = 2;
00538 items[2].code[0] = (OnigCodePoint )'s';
00539 items[2].code[1] = (OnigCodePoint )'S';
00540
00541 items[3].byte_len = 1;
00542 items[3].code_len = 2;
00543 items[3].code[0] = (OnigCodePoint )'S';
00544 items[3].code[1] = (OnigCodePoint )'s';
00545
00546 return 4;
00547 }
00548 else {
00549 int i;
00550
00551 for (i = 0; i < map_size; i++) {
00552 if (*p == map[i].from) {
00553 items[0].byte_len = 1;
00554 items[0].code_len = 1;
00555 items[0].code[0] = map[i].to;
00556 return 1;
00557 }
00558 else if (*p == map[i].to) {
00559 items[0].byte_len = 1;
00560 items[0].code_len = 1;
00561 items[0].code[0] = map[i].from;
00562 return 1;
00563 }
00564 }
00565 }
00566
00567 return 0;
00568 }
00569
00570
00571 extern int
00572 onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
00573 OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
00574 OnigEncoding enc)
00575 {
00576 return ONIG_NO_SUPPORT_CONFIG;
00577 }
00578
00579 extern int
00580 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
00581 {
00582 if (p < end) {
00583 if (*p == 0x0a) return 1;
00584 }
00585 return 0;
00586 }
00587
00588
00589 extern int
00590 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
00591 const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
00592 {
00593 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
00594
00595 (*p)++;
00596 return 1;
00597 }
00598
00599 #if 0
00600 extern int
00601 onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag ARG_UNUSED,
00602 const UChar** pp, const UChar* end ARG_UNUSED)
00603 {
00604 const UChar* p = *pp;
00605
00606 (*pp)++;
00607 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00608 }
00609 #endif
00610
00611 extern int
00612 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED,
00613 OnigEncoding enc ARG_UNUSED)
00614 {
00615 return 1;
00616 }
00617
00618 extern OnigCodePoint
00619 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED,
00620 OnigEncoding enc ARG_UNUSED)
00621 {
00622 return (OnigCodePoint )(*p);
00623 }
00624
00625 extern int
00626 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
00627 {
00628 return 1;
00629 }
00630
00631 extern int
00632 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
00633 {
00634 if (code > 0xff)
00635 rb_raise(rb_eRangeError, "%u out of char range", code);
00636 *buf = (UChar )(code & 0xff);
00637 return 1;
00638 }
00639
00640 extern UChar*
00641 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
00642 const UChar* end,
00643 OnigEncoding enc ARG_UNUSED)
00644 {
00645 return (UChar* )s;
00646 }
00647
00648 extern int
00649 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00650 OnigEncoding enc ARG_UNUSED)
00651 {
00652 return TRUE;
00653 }
00654
00655 extern int
00656 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
00657 OnigEncoding enc ARG_UNUSED)
00658 {
00659 return FALSE;
00660 }
00661
00662 extern int
00663 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype,
00664 OnigEncoding enc ARG_UNUSED)
00665 {
00666 if (code < 128)
00667 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00668 else
00669 return FALSE;
00670 }
00671
00672 extern OnigCodePoint
00673 onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
00674 {
00675 int c, i, len;
00676 OnigCodePoint n;
00677
00678 len = enclen(enc, p, end);
00679 n = (OnigCodePoint )(*p++);
00680 if (len == 1) return n;
00681
00682 for (i = 1; i < len; i++) {
00683 if (p >= end) break;
00684 c = *p++;
00685 n <<= 8; n += c;
00686 }
00687 return n;
00688 }
00689
00690 extern int
00691 onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
00692 const UChar** pp, const UChar* end ARG_UNUSED,
00693 UChar* lower)
00694 {
00695 int len;
00696 const UChar *p = *pp;
00697
00698 if (ONIGENC_IS_MBC_ASCII(p)) {
00699 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
00700 (*pp)++;
00701 return 1;
00702 }
00703 else {
00704 int i;
00705
00706 len = enclen(enc, p, end);
00707 for (i = 0; i < len; i++) {
00708 *lower++ = *p++;
00709 }
00710 (*pp) += len;
00711 return len;
00712 }
00713 }
00714
00715 #if 0
00716 extern int
00717 onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
00718 const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
00719 {
00720 const UChar* p = *pp;
00721
00722 if (ONIGENC_IS_MBC_ASCII(p)) {
00723 (*pp)++;
00724 return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
00725 }
00726
00727 (*pp) += enclen(enc, p);
00728 return FALSE;
00729 }
00730 #endif
00731
00732 extern int
00733 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00734 {
00735 if ((code & 0xff00) != 0) return 2;
00736 else return 1;
00737 }
00738
00739 extern int
00740 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
00741 {
00742 if ((code & 0xff000000) != 0) return 4;
00743 else if ((code & 0xff0000) != 0) return 3;
00744 else if ((code & 0xff00) != 0) return 2;
00745 else return 1;
00746 }
00747
00748 extern int
00749 onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00750 {
00751 UChar *p = buf;
00752
00753 if ((code & 0xff00) != 0) {
00754 *p++ = (UChar )((code >> 8) & 0xff);
00755 }
00756 *p++ = (UChar )(code & 0xff);
00757
00758 #if 1
00759 if (enclen(enc, buf, p) != (p - buf))
00760 return ONIGERR_INVALID_CODE_POINT_VALUE;
00761 #endif
00762 return (int)(p - buf);
00763 }
00764
00765 extern int
00766 onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
00767 {
00768 UChar *p = buf;
00769
00770 if ((code & 0xff000000) != 0) {
00771 *p++ = (UChar )((code >> 24) & 0xff);
00772 }
00773 if ((code & 0xff0000) != 0 || p != buf) {
00774 *p++ = (UChar )((code >> 16) & 0xff);
00775 }
00776 if ((code & 0xff00) != 0 || p != buf) {
00777 *p++ = (UChar )((code >> 8) & 0xff);
00778 }
00779 *p++ = (UChar )(code & 0xff);
00780
00781 #if 1
00782 if (enclen(enc, buf, p) != (p - buf))
00783 return ONIGERR_INVALID_CODE_POINT_VALUE;
00784 #endif
00785 return (int)(p - buf);
00786 }
00787
00788 extern int
00789 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
00790 {
00791 static const PosixBracketEntryType PBS[] = {
00792 PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
00793 PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
00794 PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
00795 PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
00796 PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
00797 PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
00798 PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
00799 PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
00800 PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
00801 PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
00802 PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
00803 PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
00804 PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
00805 PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
00806 };
00807
00808 const PosixBracketEntryType *pb, *pbe;
00809 int len;
00810
00811 len = onigenc_strlen(enc, p, end);
00812 for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
00813 if (len == pb->len &&
00814 onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0)
00815 return pb->ctype;
00816 }
00817
00818 return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
00819 }
00820
00821 extern int
00822 onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00823 unsigned int ctype)
00824 {
00825 if (code < 128)
00826 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00827 else {
00828 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00829 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00830 }
00831 }
00832
00833 return FALSE;
00834 }
00835
00836 extern int
00837 onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code,
00838 unsigned int ctype)
00839 {
00840 if (code < 128)
00841 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
00842 else {
00843 if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
00844 return (ONIGENC_CODE_TO_MBCLEN(enc, code) > 1 ? TRUE : FALSE);
00845 }
00846 }
00847
00848 return FALSE;
00849 }
00850
00851 extern int
00852 onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
00853 const UChar* sascii , int n)
00854 {
00855 int x, c;
00856
00857 while (n-- > 0) {
00858 if (p >= end) return (int )(*sascii);
00859
00860 c = (int )ONIGENC_MBC_TO_CODE(enc, p, end);
00861 x = *sascii - c;
00862 if (x) return x;
00863
00864 sascii++;
00865 p += enclen(enc, p, end);
00866 }
00867 return 0;
00868 }
00869
00870
00871 static int
00872 resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
00873 {
00874 size_t size;
00875 const OnigCodePoint **list = *plist;
00876
00877 size = sizeof(OnigCodePoint*) * new_size;
00878 if (IS_NULL(list)) {
00879 list = (const OnigCodePoint** )xmalloc(size);
00880 }
00881 else {
00882 list = (const OnigCodePoint** )xrealloc((void* )list, size);
00883 }
00884
00885 if (IS_NULL(list)) return ONIGERR_MEMORY;
00886
00887 *plist = list;
00888 *psize = new_size;
00889
00890 return 0;
00891 }
00892
00893 extern int
00894 onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
00895 hash_table_type **table, const OnigCodePoint*** plist, int *pnum,
00896 int *psize)
00897 {
00898 #define PROP_INIT_SIZE 16
00899
00900 int r;
00901
00902 if (*psize <= *pnum) {
00903 int new_size = (*psize == 0 ? PROP_INIT_SIZE : *psize * 2);
00904 r = resize_property_list(new_size, plist, psize);
00905 if (r != 0) return r;
00906 }
00907
00908 (*plist)[*pnum] = prop;
00909
00910 if (ONIG_IS_NULL(*table)) {
00911 *table = onig_st_init_strend_table_with_size(PROP_INIT_SIZE);
00912 if (ONIG_IS_NULL(*table)) return ONIGERR_MEMORY;
00913 }
00914
00915 *pnum = *pnum + 1;
00916 onig_st_insert_strend(*table, name, name + strlen((char* )name),
00917 (hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
00918 return 0;
00919 }
00920
00921 extern int
00922 onigenc_property_list_init(int (*f)(void))
00923 {
00924 int r;
00925
00926 THREAD_ATOMIC_START;
00927
00928 r = f();
00929
00930 THREAD_ATOMIC_END;
00931 return r;
00932 }
00933