Ruby  1.9.3p448(2013-06-27revision41675)
utf_16_32.c
Go to the documentation of this file.
1 /* autogenerated. */
2 /* src="transcode-tblgen.rb", len=28123, checksum=30477 */
3 /* src="utf_16_32.trans", len=15312, checksum=29120 */
4 
5 #include "transcode_data.h"
6 
7 
8 
9 static const unsigned char
11 #define from_UTF_16LE_00toFF_D8toDB_00toFF_offsets 0
12 220, 223,
13  1, 1, 1, 1,
14 
15 #define from_UTF_16LE_00toFF_D8toDB_offsets 6
16 0, 255,
17  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
19  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
20  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
22  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
30  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33 
34 #define from_UTF_16LE_00toFF_offsets 264
35 0, 255,
36  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
46  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
48  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
50  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
51  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
52 
53 #define from_UTF_32LE_00toFF_00toD7_00_offsets 522
54 0, 0,
55  0,
56 
57 #define from_UTF_32LE_00toFF_00toD7_offsets 525
58 0, 16,
59  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
60  0,
61 
62 #define from_UTF_32LE_00toFF_D8toDF_offsets 544
63 1, 16,
64  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 
66 #define from_UTF_32LE_00toFF_offsets 562
67 0, 255,
68  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
69  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
70  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
71  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
72  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
73  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
74  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
75  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
76  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
77  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
78  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
79  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
80  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81  0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
82  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
83  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
84 
85 #define from_UTF_32BE_00_offsets 820
86 0, 16,
87  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
88  1,
89 
90 #define from_UTF_8_C2toDF_offsets 839
91 128, 191,
92  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
93  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
94  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
95  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 
97 #define from_UTF_8_E0_offsets 905
98 160, 191,
99  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 
102 #define from_UTF_8_ED_offsets 939
103 128, 159,
104  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
105  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
106 
107 #define from_UTF_8_F0_offsets 973
108 144, 191,
109  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
110  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
111  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
112 
113 #define from_UTF_8_F4_offsets 1023
114 128, 143,
115  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 
117 #define from_UTF_8_offsets 1041
118 0, 244,
119  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
121  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
128  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131  1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133  3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4,
134  6, 7, 7, 7, 8,
135 
136 };
137 static const unsigned int
139 #define from_UTF_16LE_00toFF_D8toDB_00toFF_infos WORDINDEX2INFO(0)
140  INVALID, FUNso,
141 
142 #define from_UTF_16LE_00toFF_D8toDB_00toFF WORDINDEX2INFO(2)
145 
146 #define from_UTF_16LE_00toFF_D8toDB_infos WORDINDEX2INFO(4)
148 
149 #define from_UTF_16LE_00toFF_D8toDB WORDINDEX2INFO(5)
152 
153 #define from_UTF_16LE_00toFF_infos WORDINDEX2INFO(7)
155  INVALID,
156 
157 #define from_UTF_16LE_00toFF WORDINDEX2INFO(10)
160 
161 #define from_UTF_16LE_infos WORDINDEX2INFO(12)
163 
164 #define from_UTF_16LE WORDINDEX2INFO(13)
165  from_UTF_16LE_00toFF_D8toDB_offsets,
167 
168 #define from_UTF_32LE_00toFF_00toD7_00_infos WORDINDEX2INFO(15)
169  FUNso, INVALID,
170 
171 #define from_UTF_32LE_00toFF_00toD7_00 WORDINDEX2INFO(17)
174 
175 #define from_UTF_32LE_00toFF_00toD7_infos WORDINDEX2INFO(19)
177 
178 #define from_UTF_32LE_00toFF_00toD7 WORDINDEX2INFO(21)
181 
182 #define from_UTF_32LE_00toFF_D8toDF_infos WORDINDEX2INFO(23)
183  INVALID, from_UTF_32LE_00toFF_00toD7_00,
184 
185 #define from_UTF_32LE_00toFF_D8toDF WORDINDEX2INFO(25)
188 
189 #define from_UTF_32LE_00toFF_infos WORDINDEX2INFO(27)
191 
192 #define from_UTF_32LE_00toFF WORDINDEX2INFO(29)
195 
196 #define from_UTF_32LE_infos WORDINDEX2INFO(31)
198 
199 #define from_UTF_32LE WORDINDEX2INFO(32)
200  from_UTF_16LE_00toFF_D8toDB_offsets,
202 
203 #define from_UTF_16BE_00toD7_infos WORDINDEX2INFO(34)
204  FUNso,
205 
206 #define from_UTF_16BE_00toD7 WORDINDEX2INFO(35)
207  from_UTF_16LE_00toFF_D8toDB_offsets,
209 
210 #define from_UTF_16BE_D8toDB_00toFF_infos WORDINDEX2INFO(37)
212 
213 #define from_UTF_16BE_D8toDB_00toFF WORDINDEX2INFO(39)
214  from_UTF_16LE_00toFF_D8toDB_00toFF_offsets,
216 
217 #define from_UTF_16BE_D8toDB_infos WORDINDEX2INFO(41)
219 
220 #define from_UTF_16BE_D8toDB WORDINDEX2INFO(42)
221  from_UTF_16LE_00toFF_D8toDB_offsets,
223 
224 #define from_UTF_16BE_infos WORDINDEX2INFO(44)
225  from_UTF_16BE_00toD7, from_UTF_16BE_D8toDB,
226  INVALID,
227 
228 #define from_UTF_16BE WORDINDEX2INFO(47)
229  from_UTF_16LE_00toFF_offsets,
231 
232 #define from_UTF_32BE_00_00_infos WORDINDEX2INFO(49)
233  from_UTF_16BE_00toD7, INVALID,
234 
235 #define from_UTF_32BE_00_00 WORDINDEX2INFO(51)
236  from_UTF_32LE_00toFF_offsets,
238 
239 #define from_UTF_32BE_00_01to10_infos WORDINDEX2INFO(53)
240  from_UTF_16BE_00toD7,
241 
242 #define from_UTF_32BE_00_01to10 WORDINDEX2INFO(54)
243  from_UTF_16LE_00toFF_D8toDB_offsets,
245 
246 #define from_UTF_32BE_00_infos WORDINDEX2INFO(56)
248  INVALID,
249 
250 #define from_UTF_32BE_00 WORDINDEX2INFO(59)
253 
254 #define from_UTF_32BE_infos WORDINDEX2INFO(61)
256 
257 #define from_UTF_32BE WORDINDEX2INFO(63)
258  from_UTF_32LE_00toFF_00toD7_00_offsets,
260 
261 #define from_UTF_16_00toFF_infos WORDINDEX2INFO(65)
262  FUNsi,
263 
264 #define from_UTF_16_00toFF WORDINDEX2INFO(66)
265  from_UTF_16LE_00toFF_D8toDB_offsets,
267 
268 #define from_UTF_16_infos WORDINDEX2INFO(68)
270 
271 #define from_UTF_16 WORDINDEX2INFO(69)
272  from_UTF_16LE_00toFF_D8toDB_offsets,
274 
275 #define from_UTF_32_00toFF_infos WORDINDEX2INFO(71)
276  from_UTF_16,
277 
278 #define from_UTF_32_00toFF WORDINDEX2INFO(72)
279  from_UTF_16LE_00toFF_D8toDB_offsets,
281 
282 #define from_UTF_32_infos WORDINDEX2INFO(74)
284 
285 #define from_UTF_32 WORDINDEX2INFO(75)
286  from_UTF_16LE_00toFF_D8toDB_offsets,
288 
289 #define from_UTF_8_C2toDF WORDINDEX2INFO(77)
291  from_UTF_16LE_00toFF_D8toDB_00toFF_infos,
292 
293 #define from_UTF_8_E0_infos WORDINDEX2INFO(79)
295 
296 #define from_UTF_8_E0 WORDINDEX2INFO(81)
299 
300 #define from_UTF_8_E1toEC WORDINDEX2INFO(83)
301  from_UTF_8_C2toDF_offsets,
302  from_UTF_8_E0_infos,
303 
304 #define from_UTF_8_ED WORDINDEX2INFO(85)
306  from_UTF_8_E0_infos,
307 
308 #define from_UTF_8_F0_infos WORDINDEX2INFO(87)
310 
311 #define from_UTF_8_F0 WORDINDEX2INFO(89)
314 
315 #define from_UTF_8_F1toF3 WORDINDEX2INFO(91)
316  from_UTF_8_C2toDF_offsets,
317  from_UTF_8_F0_infos,
318 
319 #define from_UTF_8_F4 WORDINDEX2INFO(93)
321  from_UTF_8_F0_infos,
322 
323 #define from_UTF_8_infos WORDINDEX2INFO(95)
324  FUNso, INVALID,
325  from_UTF_8_C2toDF, from_UTF_8_E0,
326  from_UTF_8_E1toEC, from_UTF_8_ED,
329 
330 #define from_UTF_8 WORDINDEX2INFO(104)
333 
334 };
335 #define TRANSCODE_TABLE_INFO utf_16_32_byte_array, 1288, utf_16_32_word_array, 106, ((int)sizeof(unsigned int))
336 
337 
338 static ssize_t
339 fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
340 {
341  if (!s[0] && s[1]<0x80) {
342  o[0] = s[1];
343  return 1;
344  }
345  else if (s[0]<0x08) {
346  o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6);
347  o[1] = 0x80 | (s[1]&0x3F);
348  return 2;
349  }
350  else if ((s[0]&0xF8)!=0xD8) {
351  o[0] = 0xE0 | (s[0]>>4);
352  o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6);
353  o[2] = 0x80 | (s[1]&0x3F);
354  return 3;
355  }
356  else {
357  unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1;
358  o[0] = 0xF0 | (u>>2);
359  o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F);
360  o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6);
361  o[3] = 0x80 | (s[3]&0x3F);
362  return 4;
363  }
364 }
365 
366 static ssize_t
367 fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
368 {
369  if (!(s[0]&0x80)) {
370  o[0] = 0x00;
371  o[1] = s[0];
372  return 2;
373  }
374  else if ((s[0]&0xE0)==0xC0) {
375  o[0] = (s[0]>>2)&0x07;
376  o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
377  return 2;
378  }
379  else if ((s[0]&0xF0)==0xE0) {
380  o[0] = (s[0]<<4) | ((s[1]>>2)^0x20);
381  o[1] = (s[1]<<6) | (s[2]^0x80);
382  return 2;
383  }
384  else {
385  int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
386  o[0] = 0xD8 | (w>>2);
387  o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
388  o[2] = 0xDC | ((s[2]>>2)&0x03);
389  o[3] = (s[2]<<6) | (s[3]&~0x80);
390  return 4;
391  }
392 }
393 
394 static ssize_t
395 fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
396 {
397  if (!s[1] && s[0]<0x80) {
398  o[0] = s[0];
399  return 1;
400  }
401  else if (s[1]<0x08) {
402  o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
403  o[1] = 0x80 | (s[0]&0x3F);
404  return 2;
405  }
406  else if ((s[1]&0xF8)!=0xD8) {
407  o[0] = 0xE0 | (s[1]>>4);
408  o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
409  o[2] = 0x80 | (s[0]&0x3F);
410  return 3;
411  }
412  else {
413  unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1;
414  o[0] = 0xF0 | u>>2;
415  o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F);
416  o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6);
417  o[3] = 0x80 | (s[2]&0x3F);
418  return 4;
419  }
420 }
421 
422 static ssize_t
423 fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
424 {
425  if (!(s[0]&0x80)) {
426  o[1] = 0x00;
427  o[0] = s[0];
428  return 2;
429  }
430  else if ((s[0]&0xE0)==0xC0) {
431  o[1] = (s[0]>>2)&0x07;
432  o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
433  return 2;
434  }
435  else if ((s[0]&0xF0)==0xE0) {
436  o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
437  o[0] = (s[1]<<6) | (s[2]^0x80);
438  return 2;
439  }
440  else {
441  int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1;
442  o[1] = 0xD8 | (w>>2);
443  o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8);
444  o[3] = 0xDC | ((s[2]>>2)&0x03);
445  o[2] = (s[2]<<6) | (s[3]&~0x80);
446  return 4;
447  }
448 }
449 
450 static ssize_t
451 fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
452 {
453  if (!s[1]) {
454  if (s[2]==0 && s[3]<0x80) {
455  o[0] = s[3];
456  return 1;
457  }
458  else if (s[2]<0x08) {
459  o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6);
460  o[1] = 0x80 | (s[3]&0x3F);
461  return 2;
462  }
463  else {
464  o[0] = 0xE0 | (s[2]>>4);
465  o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
466  o[2] = 0x80 | (s[3]&0x3F);
467  return 3;
468  }
469  }
470  else {
471  o[0] = 0xF0 | (s[1]>>2);
472  o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4);
473  o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6);
474  o[3] = 0x80 | (s[3]&0x3F);
475  return 4;
476  }
477 }
478 
479 static ssize_t
480 fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
481 {
482  o[0] = 0;
483  if (!(s[0]&0x80)) {
484  o[1] = o[2] = 0x00;
485  o[3] = s[0];
486  }
487  else if ((s[0]&0xE0)==0xC0) {
488  o[1] = 0x00;
489  o[2] = (s[0]>>2)&0x07;
490  o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
491  }
492  else if ((s[0]&0xF0)==0xE0) {
493  o[1] = 0x00;
494  o[2] = (s[0]<<4) | ((s[1]>>2)^0x20);
495  o[3] = (s[1]<<6) | (s[2]^0x80);
496  }
497  else {
498  o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
499  o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
500  o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
501  }
502  return 4;
503 }
504 
505 static ssize_t
506 fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
507 {
508  if (!s[2]) {
509  if (s[1]==0 && s[0]<0x80) {
510  o[0] = s[0];
511  return 1;
512  }
513  else if (s[1]<0x08) {
514  o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6);
515  o[1] = 0x80 | (s[0]&0x3F);
516  return 2;
517  }
518  else {
519  o[0] = 0xE0 | (s[1]>>4);
520  o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
521  o[2] = 0x80 | (s[0]&0x3F);
522  return 3;
523  }
524  }
525  else {
526  o[0] = 0xF0 | (s[2]>>2);
527  o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4);
528  o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6);
529  o[3] = 0x80 | (s[0]&0x3F);
530  return 4;
531  }
532 }
533 
534 static ssize_t
535 fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
536 {
537  o[3] = 0;
538  if (!(s[0]&0x80)) {
539  o[2] = o[1] = 0x00;
540  o[0] = s[0];
541  }
542  else if ((s[0]&0xE0)==0xC0) {
543  o[2] = 0x00;
544  o[1] = (s[0]>>2)&0x07;
545  o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F);
546  }
547  else if ((s[0]&0xF0)==0xE0) {
548  o[2] = 0x00;
549  o[1] = (s[0]<<4) | ((s[1]>>2)^0x20);
550  o[0] = (s[1]<<6) | (s[2]^0x80);
551  }
552  else {
553  o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03);
554  o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F);
555  o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F);
556  }
557  return 4;
558 }
559 
560 static int
561 state_init(void *statep)
562 {
563  unsigned char *sp = statep;
564  *sp = 0;
565  return 0;
566 }
567 
568 static VALUE
569 fun_si_from_utf_16(void *statep, const unsigned char *s, size_t l)
570 {
571  #define BE 1
572  #define LE 2
573  unsigned char *sp = statep;
574  switch (*sp) {
575  case 0:
576  if (s[0] == 0xFE && s[1] == 0xFF) {
577  *sp = BE;
578  return ZERObt;
579  }
580  else if (s[0] == 0xFF && s[1] == 0xFE) {
581  *sp = LE;
582  return ZERObt;
583  }
584  break;
585  case BE:
586  if (s[0] < 0xD8 || 0xDF < s[0]) {
587  return (VALUE)FUNso;
588  }
589  else if (s[0] <= 0xDB) {
591  }
592  break;
593  case LE:
594  if (s[1] < 0xD8 || 0xDF < s[1]) {
595  return (VALUE)FUNso;
596  }
597  else if (s[1] <= 0xDB) {
599  }
600  break;
601  }
602  return (VALUE)INVALID;
603 }
604 
605 static ssize_t
606 fun_so_from_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
607 {
608  unsigned char *sp = statep;
609  switch (*sp) {
610  case BE:
611  return fun_so_from_utf_16be(statep, s, l, o, osize);
612  case LE:
613  return fun_so_from_utf_16le(statep, s, l, o, osize);
614  }
615  return 0;
616 }
617 
618 static VALUE
619 fun_si_from_utf_32(void *statep, const unsigned char *s, size_t l)
620 {
621  unsigned char *sp = statep;
622  switch (*sp) {
623  case 0:
624  if (s[0] == 0 && s[1] == 0 && s[2] == 0xFE && s[3] == 0xFF) {
625  *sp = BE;
626  return ZERObt;
627  }
628  else if (s[0] == 0xFF && s[1] == 0xFE && s[2] == 0 && s[3] == 0) {
629  *sp = LE;
630  return ZERObt;
631  }
632  break;
633  case BE:
634  if (s[0] == 0 && ((0 < s[1] && s[1] <= 0x10) ||
635  (s[1] == 0 && (s[2] < 0xD8 || 0xDF < s[2]))))
636  return (VALUE)FUNso;
637  break;
638  case LE:
639  if (s[3] == 0 && ((0 < s[2] && s[2] <= 0x10) ||
640  (s[2] == 0 && (s[1] < 0xD8 || 0xDF < s[1]))))
641  return (VALUE)FUNso;
642  break;
643  }
644  return (VALUE)INVALID;
645 }
646 
647 static ssize_t
648 fun_so_from_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
649 {
650  unsigned char *sp = statep;
651  switch (*sp) {
652  case BE:
653  return fun_so_from_utf_32be(statep, s, l, o, osize);
654  case LE:
655  return fun_so_from_utf_32le(statep, s, l, o, osize);
656  }
657  return 0;
658 }
659 
660 static ssize_t
661 fun_so_to_utf_16(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
662 {
663  unsigned char *sp = statep;
664  if (*sp == 0) {
665  *o++ = 0xFE;
666  *o++ = 0xFF;
667  *sp = 1;
668  return 2 + fun_so_to_utf_16be(statep, s, l, o, osize);
669  }
670  return fun_so_to_utf_16be(statep, s, l, o, osize);
671 }
672 
673 static ssize_t
674 fun_so_to_utf_32(void *statep, const unsigned char *s, size_t l, unsigned char *o, size_t osize)
675 {
676  unsigned char *sp = statep;
677  if (*sp == 0) {
678  *o++ = 0x00;
679  *o++ = 0x00;
680  *o++ = 0xFE;
681  *o++ = 0xFF;
682  *sp = 1;
683  return 4 + fun_so_to_utf_32be(statep, s, l, o, osize);
684  }
685  return fun_so_to_utf_32be(statep, s, l, o, osize);
686 }
687 
688 static const rb_transcoder
690  "UTF-16BE", "UTF-8", from_UTF_16BE,
692  2, /* input_unit_length */
693  4, /* max_input */
694  4, /* max_output */
695  asciicompat_decoder, /* asciicompat_type */
696  0, NULL, NULL, /* state_size, state_init, state_fini */
697  NULL, NULL, NULL, fun_so_from_utf_16be
698 };
699 
700 static const rb_transcoder
702  "UTF-8", "UTF-16BE", from_UTF_8,
704  1, /* input_unit_length */
705  4, /* max_input */
706  4, /* max_output */
707  asciicompat_encoder, /* asciicompat_type */
708  0, NULL, NULL, /* state_size, state_init, state_fini */
709  NULL, NULL, NULL, fun_so_to_utf_16be
710 };
711 
712 static const rb_transcoder
714  "UTF-16LE", "UTF-8", from_UTF_16LE,
716  2, /* input_unit_length */
717  4, /* max_input */
718  4, /* max_output */
719  asciicompat_decoder, /* asciicompat_type */
720  0, NULL, NULL, /* state_size, state_init, state_fini */
721  NULL, NULL, NULL, fun_so_from_utf_16le
722 };
723 
724 static const rb_transcoder
726  "UTF-8", "UTF-16LE", from_UTF_8,
728  1, /* input_unit_length */
729  4, /* max_input */
730  4, /* max_output */
731  asciicompat_encoder, /* asciicompat_type */
732  0, NULL, NULL, /* state_size, state_init, state_fini */
733  NULL, NULL, NULL, fun_so_to_utf_16le
734 };
735 
736 static const rb_transcoder
738  "UTF-32BE", "UTF-8", from_UTF_32BE,
740  4, /* input_unit_length */
741  4, /* max_input */
742  4, /* max_output */
743  asciicompat_decoder, /* asciicompat_type */
744  0, NULL, NULL, /* state_size, state_init, state_fini */
745  NULL, NULL, NULL, fun_so_from_utf_32be
746 };
747 
748 static const rb_transcoder
750  "UTF-8", "UTF-32BE", from_UTF_8,
752  1, /* input_unit_length */
753  4, /* max_input */
754  4, /* max_output */
755  asciicompat_encoder, /* asciicompat_type */
756  0, NULL, NULL, /* state_size, state_init, state_fini */
757  NULL, NULL, NULL, fun_so_to_utf_32be
758 };
759 
760 static const rb_transcoder
762  "UTF-32LE", "UTF-8", from_UTF_32LE,
764  4, /* input_unit_length */
765  4, /* max_input */
766  4, /* max_output */
767  asciicompat_decoder, /* asciicompat_type */
768  0, NULL, NULL, /* state_size, state_init, state_fini */
769  NULL, NULL, NULL, fun_so_from_utf_32le
770 };
771 
772 static const rb_transcoder
774  "UTF-8", "UTF-32LE", from_UTF_8,
776  1, /* input_unit_length */
777  4, /* max_input */
778  4, /* max_output */
779  asciicompat_encoder, /* asciicompat_type */
780  0, NULL, NULL, /* state_size, state_init, state_fini */
781  NULL, NULL, NULL, fun_so_to_utf_32le
782 };
783 
784 static const rb_transcoder
786  "UTF-16", "UTF-8", from_UTF_16,
788  2, /* input_unit_length */
789  4, /* max_input */
790  4, /* max_output */
791  asciicompat_decoder, /* asciicompat_type */
792  1, state_init, NULL, /* state_size, state_init, state_fini */
794 };
795 
796 static const rb_transcoder
798  "UTF-32", "UTF-8", from_UTF_32,
800  4, /* input_unit_length */
801  4, /* max_input */
802  4, /* max_output */
803  asciicompat_decoder, /* asciicompat_type */
804  1, state_init, NULL, /* state_size, state_init, state_fini */
806 };
807 
808 static const rb_transcoder
810  "UTF-8", "UTF-16", from_UTF_8,
812  1, /* input_unit_length */
813  4, /* max_input */
814  4, /* max_output */
815  asciicompat_encoder, /* asciicompat_type */
816  1, state_init, NULL, /* state_size, state_init, state_fini */
817  NULL, NULL, NULL, fun_so_to_utf_16
818 };
819 
820 static const rb_transcoder
822  "UTF-8", "UTF-32", from_UTF_8,
824  1, /* input_unit_length */
825  4, /* max_input */
826  4, /* max_output */
827  asciicompat_encoder, /* asciicompat_type */
828  1, state_init, NULL, /* state_size, state_init, state_fini */
829  NULL, NULL, NULL, fun_so_to_utf_32
830 };
831 
832 void
834 {
847 }
848 
849