00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #include "regenc.h"
00031
00032 static const int EncLen_EUCTW[] = {
00033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00034 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00035 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00037 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00038 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00040 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
00042 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00043 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00044 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00045 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00046 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00047 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
00048 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1
00049 };
00050
00051 typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3 } state_t;
00052 #define A ACCEPT
00053 #define F FAILURE
00054 static const signed char trans[][0x100] = {
00055 {
00056 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00057 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00058 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00059 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00060 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00061 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00062 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00063 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00064 F, F, F, F, F, F, F, F, F, F, F, F, F, F, 2, F,
00065 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00066 F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00067 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00068 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00070 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00071 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
00072 },
00073 {
00074 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00075 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00076 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00077 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00078 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00079 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00080 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00081 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00082 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00083 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00084 F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00085 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00086 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00087 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00088 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
00089 A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F
00090 },
00091 {
00092 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00093 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00094 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00095 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00096 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00097 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00098 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00099 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00100 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00101 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00102 F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
00103 3, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00104 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00105 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00106 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00107 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
00108 },
00109 {
00110 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00111 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00112 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00113 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00114 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00115 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00116 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00117 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00118 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00119 F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
00120 F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00121 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00122 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00123 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00124 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00125 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F
00126 }
00127 };
00128 #undef A
00129 #undef F
00130
00131 static int
00132 euctw_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
00133 {
00134 int firstbyte = *p++;
00135 state_t s = trans[0][firstbyte];
00136 #define RETURN(n) \
00137 return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \
00138 ONIGENC_CONSTRUCT_MBCLEN_INVALID()
00139 if (s < 0) RETURN(1);
00140 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCTW[firstbyte]-1);
00141 s = trans[s][*p++];
00142 if (s < 0) RETURN(2);
00143 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2);
00144 s = trans[s][*p++];
00145 if (s < 0) RETURN(3);
00146 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3);
00147 s = trans[s][*p++];
00148 RETURN(4);
00149 #undef RETURN
00150 }
00151
00152 static OnigCodePoint
00153 euctw_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED)
00154 {
00155 return onigenc_mbn_mbc_to_code(enc, p, end);
00156 }
00157
00158 static int
00159 euctw_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
00160 {
00161 return onigenc_mb4_code_to_mbc(enc, code, buf);
00162 }
00163
00164 static int
00165 euctw_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
00166 UChar* lower, OnigEncoding enc)
00167 {
00168 return onigenc_mbn_mbc_case_fold(enc, flag,
00169 pp, end, lower);
00170 }
00171
00172 static int
00173 euctw_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
00174 {
00175 return onigenc_mb4_is_code_ctype(enc, code, ctype);
00176 }
00177
00178 #define euctw_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
00179
00180 static UChar*
00181 euctw_left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
00182 {
00183
00184
00185
00186 const UChar *p;
00187 int len;
00188
00189 if (s <= start) return (UChar* )s;
00190 p = s;
00191
00192 while (!euctw_islead(*p) && p > start) p--;
00193 len = enclen(enc, p, end);
00194 if (p + len > s) return (UChar* )p;
00195 p += len;
00196 return (UChar* )(p + ((s - p) & ~1));
00197 }
00198
00199 static int
00200 euctw_is_allowed_reverse_match(const UChar* s, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED)
00201 {
00202 const UChar c = *s;
00203 if (c <= 0x7e) return TRUE;
00204 else return FALSE;
00205 }
00206
00207 OnigEncodingDefine(euc_tw, EUC_TW) = {
00208 euctw_mbc_enc_len,
00209 "EUC-TW",
00210 4,
00211 1,
00212 onigenc_is_mbc_newline_0x0a,
00213 euctw_mbc_to_code,
00214 onigenc_mb4_code_to_mbclen,
00215 euctw_code_to_mbc,
00216 euctw_mbc_case_fold,
00217 onigenc_ascii_apply_all_case_fold,
00218 onigenc_ascii_get_case_fold_codes_by_str,
00219 onigenc_minimum_property_name_to_ctype,
00220 euctw_is_code_ctype,
00221 onigenc_not_support_get_ctype_code_range,
00222 euctw_left_adjust_char_head,
00223 euctw_is_allowed_reverse_match
00224 };
00225 ENC_ALIAS("eucTW", "EUC-TW")
00226