Ruby  1.9.3p484(2013-11-22revision43786)
nkf.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 1987, Fujitsu LTD. (Itaru ICHIKAWA).
3  * Copyright (c) 1996-2010, The nkf Project.
4  *
5  * This software is provided 'as-is', without any express or implied
6  * warranty. In no event will the authors be held liable for any damages
7  * arising from the use of this software.
8  *
9  * Permission is granted to anyone to use this software for any purpose,
10  * including commercial applications, and to alter it and redistribute it
11  * freely, subject to the following restrictions:
12  *
13  * 1. The origin of this software must not be misrepresented; you must not
14  * claim that you wrote the original software. If you use this software
15  * in a product, an acknowledgment in the product documentation would be
16  * appreciated but is not required.
17  *
18  * 2. Altered source versions must be plainly marked as such, and must not be
19  * misrepresented as being the original software.
20  *
21  * 3. This notice may not be removed or altered from any source distribution.
22  */
23 #define NKF_VERSION "2.1.2"
24 #define NKF_RELEASE_DATE "2011-09-08"
25 #define COPY_RIGHT \
26  "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa).\n" \
27  "Copyright (C) 1996-2011, The nkf Project."
28 
29 #include "config.h"
30 #include "nkf.h"
31 #include "utf8tbl.h"
32 #ifdef __WIN32__
33 #include <windows.h>
34 #include <locale.h>
35 #endif
36 #if defined(__OS2__)
37 # define INCL_DOS
38 # define INCL_DOSERRORS
39 # include <os2.h>
40 #endif
41 #include <assert.h>
42 
43 
44 /* state of output_mode and input_mode
45 
46  c2 0 means ASCII
47  JIS_X_0201_1976_K
48  ISO_8859_1
49  JIS_X_0208
50  EOF all termination
51  c1 32bit data
52 
53  */
54 
55 /* MIME ENCODE */
56 
57 #define FIXED_MIME 7
58 #define STRICT_MIME 8
59 
60 /* byte order */
61 enum byte_order {
66 };
67 
68 /* ASCII CODE */
69 
70 #define BS 0x08
71 #define TAB 0x09
72 #define LF 0x0a
73 #define CR 0x0d
74 #define ESC 0x1b
75 #define SP 0x20
76 #define DEL 0x7f
77 #define SI 0x0f
78 #define SO 0x0e
79 #define SS2 0x8e
80 #define SS3 0x8f
81 #define CRLF 0x0D0A
82 
83 
84 /* encodings */
85 
124  JIS_X_0201_1976_K = 0x1013, /* I */ /* JIS C 6220-1969 */
125  /* JIS_X_0201_1976_R = 0x1014, */ /* J */ /* JIS C 6220-1969 */
126  /* JIS_X_0208_1978 = 0x1040, */ /* @ */ /* JIS C 6226-1978 */
127  /* JIS_X_0208_1983 = 0x1087, */ /* B */ /* JIS C 6226-1983 */
128  JIS_X_0208 = 0x1168, /* @B */
129  JIS_X_0212 = 0x1159, /* D */
130  /* JIS_X_0213_2000_1 = 0x1228, */ /* O */
131  JIS_X_0213_2 = 0x1229, /* P */
132  JIS_X_0213_1 = 0x1233 /* Q */
133 };
134 
135 static nkf_char s_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
136 static nkf_char e_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
137 static nkf_char w_iconv(nkf_char c2, nkf_char c1, nkf_char c0);
138 static nkf_char w_iconv16(nkf_char c2, nkf_char c1, nkf_char c0);
139 static nkf_char w_iconv32(nkf_char c2, nkf_char c1, nkf_char c0);
140 static void j_oconv(nkf_char c2, nkf_char c1);
141 static void s_oconv(nkf_char c2, nkf_char c1);
142 static void e_oconv(nkf_char c2, nkf_char c1);
143 static void w_oconv(nkf_char c2, nkf_char c1);
144 static void w_oconv16(nkf_char c2, nkf_char c1);
145 static void w_oconv32(nkf_char c2, nkf_char c1);
146 
147 typedef struct {
148  const char *name;
150  void (*oconv)(nkf_char c2, nkf_char c1);
152 
160 
161 typedef struct {
162  const int id;
163  const char *name;
165 } nkf_encoding;
166 
168  {ASCII, "US-ASCII", &NkfEncodingASCII},
169  {ISO_8859_1, "ISO-8859-1", &NkfEncodingASCII},
170  {ISO_2022_JP, "ISO-2022-JP", &NkfEncodingISO_2022_JP},
171  {CP50220, "CP50220", &NkfEncodingISO_2022_JP},
172  {CP50221, "CP50221", &NkfEncodingISO_2022_JP},
173  {CP50222, "CP50222", &NkfEncodingISO_2022_JP},
174  {ISO_2022_JP_1, "ISO-2022-JP-1", &NkfEncodingISO_2022_JP},
175  {ISO_2022_JP_3, "ISO-2022-JP-3", &NkfEncodingISO_2022_JP},
176  {ISO_2022_JP_2004, "ISO-2022-JP-2004", &NkfEncodingISO_2022_JP},
177  {SHIFT_JIS, "Shift_JIS", &NkfEncodingShift_JIS},
178  {WINDOWS_31J, "Windows-31J", &NkfEncodingShift_JIS},
179  {CP10001, "CP10001", &NkfEncodingShift_JIS},
180  {EUC_JP, "EUC-JP", &NkfEncodingEUC_JP},
181  {EUCJP_NKF, "eucJP-nkf", &NkfEncodingEUC_JP},
182  {CP51932, "CP51932", &NkfEncodingEUC_JP},
183  {EUCJP_MS, "eucJP-MS", &NkfEncodingEUC_JP},
184  {EUCJP_ASCII, "eucJP-ASCII", &NkfEncodingEUC_JP},
185  {SHIFT_JISX0213, "Shift_JISX0213", &NkfEncodingShift_JIS},
186  {SHIFT_JIS_2004, "Shift_JIS-2004", &NkfEncodingShift_JIS},
187  {EUC_JISX0213, "EUC-JISX0213", &NkfEncodingEUC_JP},
188  {EUC_JIS_2004, "EUC-JIS-2004", &NkfEncodingEUC_JP},
189  {UTF_8, "UTF-8", &NkfEncodingUTF_8},
190  {UTF_8N, "UTF-8N", &NkfEncodingUTF_8},
191  {UTF_8_BOM, "UTF-8-BOM", &NkfEncodingUTF_8},
192  {UTF8_MAC, "UTF8-MAC", &NkfEncodingUTF_8},
193  {UTF_16, "UTF-16", &NkfEncodingUTF_16},
194  {UTF_16BE, "UTF-16BE", &NkfEncodingUTF_16},
195  {UTF_16BE_BOM, "UTF-16BE-BOM", &NkfEncodingUTF_16},
196  {UTF_16LE, "UTF-16LE", &NkfEncodingUTF_16},
197  {UTF_16LE_BOM, "UTF-16LE-BOM", &NkfEncodingUTF_16},
198  {UTF_32, "UTF-32", &NkfEncodingUTF_32},
199  {UTF_32BE, "UTF-32BE", &NkfEncodingUTF_32},
200  {UTF_32BE_BOM, "UTF-32BE-BOM", &NkfEncodingUTF_32},
201  {UTF_32LE, "UTF-32LE", &NkfEncodingUTF_32},
202  {UTF_32LE_BOM, "UTF-32LE-BOM", &NkfEncodingUTF_32},
203  {BINARY, "BINARY", &NkfEncodingASCII},
204  {-1, NULL, NULL}
205 };
206 
207 struct {
208  const char *name;
209  const int id;
211  {"US-ASCII", ASCII},
212  {"ASCII", ASCII},
213  {"646", ASCII},
214  {"ROMAN8", ASCII},
215  {"ISO-2022-JP", ISO_2022_JP},
216  {"ISO2022JP-CP932", CP50220},
217  {"CP50220", CP50220},
218  {"CP50221", CP50221},
219  {"CSISO2022JP", CP50221},
220  {"CP50222", CP50222},
221  {"ISO-2022-JP-1", ISO_2022_JP_1},
222  {"ISO-2022-JP-3", ISO_2022_JP_3},
223  {"ISO-2022-JP-2004", ISO_2022_JP_2004},
224  {"SHIFT_JIS", SHIFT_JIS},
225  {"SJIS", SHIFT_JIS},
226  {"MS_Kanji", SHIFT_JIS},
227  {"PCK", SHIFT_JIS},
228  {"WINDOWS-31J", WINDOWS_31J},
229  {"CSWINDOWS31J", WINDOWS_31J},
230  {"CP932", WINDOWS_31J},
231  {"MS932", WINDOWS_31J},
232  {"CP10001", CP10001},
233  {"EUCJP", EUC_JP},
234  {"EUC-JP", EUC_JP},
235  {"EUCJP-NKF", EUCJP_NKF},
236  {"CP51932", CP51932},
237  {"EUC-JP-MS", EUCJP_MS},
238  {"EUCJP-MS", EUCJP_MS},
239  {"EUCJPMS", EUCJP_MS},
240  {"EUC-JP-ASCII", EUCJP_ASCII},
241  {"EUCJP-ASCII", EUCJP_ASCII},
242  {"SHIFT_JISX0213", SHIFT_JISX0213},
243  {"SHIFT_JIS-2004", SHIFT_JIS_2004},
244  {"EUC-JISX0213", EUC_JISX0213},
245  {"EUC-JIS-2004", EUC_JIS_2004},
246  {"UTF-8", UTF_8},
247  {"UTF-8N", UTF_8N},
248  {"UTF-8-BOM", UTF_8_BOM},
249  {"UTF8-MAC", UTF8_MAC},
250  {"UTF-8-MAC", UTF8_MAC},
251  {"UTF-16", UTF_16},
252  {"UTF-16BE", UTF_16BE},
253  {"UTF-16BE-BOM", UTF_16BE_BOM},
254  {"UTF-16LE", UTF_16LE},
255  {"UTF-16LE-BOM", UTF_16LE_BOM},
256  {"UTF-32", UTF_32},
257  {"UTF-32BE", UTF_32BE},
258  {"UTF-32BE-BOM", UTF_32BE_BOM},
259  {"UTF-32LE", UTF_32LE},
260  {"UTF-32LE-BOM", UTF_32LE_BOM},
261  {"BINARY", BINARY},
262  {NULL, -1}
263 };
264 
265 #if defined(DEFAULT_CODE_JIS)
266 #define DEFAULT_ENCIDX ISO_2022_JP
267 #elif defined(DEFAULT_CODE_SJIS)
268 #define DEFAULT_ENCIDX SHIFT_JIS
269 #elif defined(DEFAULT_CODE_WINDOWS_31J)
270 #define DEFAULT_ENCIDX WINDOWS_31J
271 #elif defined(DEFAULT_CODE_EUC)
272 #define DEFAULT_ENCIDX EUC_JP
273 #elif defined(DEFAULT_CODE_UTF8)
274 #define DEFAULT_ENCIDX UTF_8
275 #endif
276 
277 
278 #define is_alnum(c) \
279  (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9'))
280 
281 /* I don't trust portablity of toupper */
282 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c)
283 #define nkf_isoctal(c) ('0'<=c && c<='7')
284 #define nkf_isdigit(c) ('0'<=c && c<='9')
285 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F'))
286 #define nkf_isblank(c) (c == SP || c == TAB)
287 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == LF)
288 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))
289 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c))
290 #define nkf_isprint(c) (SP<=c && c<='~')
291 #define nkf_isgraph(c) ('!'<=c && c<='~')
292 #define hex2bin(c) (('0'<=c&&c<='9') ? (c-'0') : \
293  ('A'<=c&&c<='F') ? (c-'A'+10) : \
294  ('a'<=c&&c<='f') ? (c-'a'+10) : 0)
295 #define bin2hex(c) ("0123456789ABCDEF"[c&15])
296 #define is_eucg3(c2) (((unsigned short)c2 >> 8) == SS3)
297 #define nkf_noescape_mime(c) ((c == CR) || (c == LF) || \
298  ((c > SP) && (c < DEL) && (c != '?') && (c != '=') && (c != '_') \
299  && (c != '(') && (c != ')') && (c != '.') && (c != 0x22)))
300 
301 #define is_ibmext_in_sjis(c2) (CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END)
302 #define nkf_byte_jisx0201_katakana_p(c) (SP <= c && c <= 0x5F)
303 
304 #define HOLD_SIZE 1024
305 #if defined(INT_IS_SHORT)
306 #define IOBUF_SIZE 2048
307 #else
308 #define IOBUF_SIZE 16384
309 #endif
310 
311 #define DEFAULT_J 'B'
312 #define DEFAULT_R 'B'
313 
314 
315 #define GETA1 0x22
316 #define GETA2 0x2e
317 
318 
319 /* MIME preprocessor */
320 
321 #ifdef EASYWIN /*Easy Win */
322 extern POINT _BufferSize;
323 #endif
324 
325 struct input_code{
326  const char *name;
331  void (*status_func)(struct input_code *, nkf_char);
334 };
335 
336 static const char *input_codename = NULL; /* NULL: unestablished, "": BINARY */
339 
340 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
341 /* UCS Mapping
342  * 0: Shift_JIS, eucJP-ascii
343  * 1: eucJP-ms
344  * 2: CP932, CP51932
345  * 3: CP10001
346  */
347 #define UCS_MAP_ASCII 0
348 #define UCS_MAP_MS 1
349 #define UCS_MAP_CP932 2
350 #define UCS_MAP_CP10001 3
352 #endif
353 #ifdef UTF8_INPUT_ENABLE
354 /* no NEC special, NEC-selected IBM extended and IBM extended characters */
355 static int no_cp932ext_f = FALSE;
356 /* ignore ZERO WIDTH NO-BREAK SPACE */
359 static nkf_char unicode_subchar = '?'; /* the regular substitution character */
360 static void (*encode_fallback)(nkf_char c) = NULL;
361 static void w_status(struct input_code *, nkf_char);
362 #endif
363 #ifdef UTF8_OUTPUT_ENABLE
364 static int output_bom_f = FALSE;
366 #endif
367 
368 static void std_putc(nkf_char c);
369 static nkf_char std_getc(FILE *f);
370 static nkf_char std_ungetc(nkf_char c,FILE *f);
371 
372 static nkf_char broken_getc(FILE *f);
373 static nkf_char broken_ungetc(nkf_char c,FILE *f);
374 
375 static nkf_char mime_getc(FILE *f);
376 
377 static void mime_putc(nkf_char c);
378 
379 /* buffers */
380 
381 #if !defined(PERL_XS) && !defined(WIN32DLL)
382 static unsigned char stdibuf[IOBUF_SIZE];
383 static unsigned char stdobuf[IOBUF_SIZE];
384 #endif
385 
386 #define NKF_UNSPECIFIED (-TRUE)
387 
388 /* flags */
389 static int unbuf_f = FALSE;
390 static int estab_f = FALSE;
391 static int nop_f = FALSE;
392 static int binmode_f = TRUE; /* binary mode */
393 static int rot_f = FALSE; /* rot14/43 mode */
394 static int hira_f = FALSE; /* hira/kata henkan */
395 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */
396 static int mime_f = MIME_DECODE_DEFAULT; /* convert MIME B base64 or Q */
397 static int mime_decode_f = FALSE; /* mime decode is explicitly on */
398 static int mimebuf_f = FALSE; /* MIME buffered input */
399 static int broken_f = FALSE; /* convert ESC-less broken JIS */
400 static int iso8859_f = FALSE; /* ISO8859 through */
401 static int mimeout_f = FALSE; /* base64 mode */
402 static int x0201_f = NKF_UNSPECIFIED; /* convert JIS X 0201 */
403 static int iso2022jp_f = FALSE; /* replace non ISO-2022-JP with GETA */
404 
405 #ifdef UNICODE_NORMALIZATION
406 static int nfc_f = FALSE;
407 static nkf_char (*i_nfc_getc)(FILE *) = std_getc; /* input of ugetc */
409 #endif
410 
411 #ifdef INPUT_OPTION
412 static int cap_f = FALSE;
413 static nkf_char (*i_cgetc)(FILE *) = std_getc; /* input of cgetc */
415 
416 static int url_f = FALSE;
417 static nkf_char (*i_ugetc)(FILE *) = std_getc; /* input of ugetc */
419 #endif
420 
421 #define PREFIX_EUCG3 NKF_INT32_C(0x8F00)
422 #define CLASS_MASK NKF_INT32_C(0xFF000000)
423 #define CLASS_UNICODE NKF_INT32_C(0x01000000)
424 #define VALUE_MASK NKF_INT32_C(0x00FFFFFF)
425 #define UNICODE_BMP_MAX NKF_INT32_C(0x0000FFFF)
426 #define UNICODE_MAX NKF_INT32_C(0x0010FFFF)
427 #define nkf_char_euc3_new(c) ((c) | PREFIX_EUCG3)
428 #define nkf_char_unicode_new(c) ((c) | CLASS_UNICODE)
429 #define nkf_char_unicode_p(c) ((c & CLASS_MASK) == CLASS_UNICODE)
430 #define nkf_char_unicode_bmp_p(c) ((c & VALUE_MASK) <= UNICODE_BMP_MAX)
431 #define nkf_char_unicode_value_p(c) ((c & VALUE_MASK) <= UNICODE_MAX)
432 
433 #ifdef NUMCHAR_OPTION
434 static int numchar_f = FALSE;
435 static nkf_char (*i_ngetc)(FILE *) = std_getc; /* input of ugetc */
437 #endif
438 
439 #ifdef CHECK_OPTION
440 static int noout_f = FALSE;
441 static void no_putc(nkf_char c);
442 static int debug_f = FALSE;
443 static void debug(const char *str);
445 #endif
446 
447 static int guess_f = 0; /* 0: OFF, 1: ON, 2: VERBOSE */
448 static void set_input_codename(const char *codename);
449 
450 #ifdef EXEC_IO
451 static int exec_f = 0;
452 #endif
453 
454 #ifdef SHIFTJIS_CP932
455 /* invert IBM extended characters to others */
456 static int cp51932_f = FALSE;
457 
458 /* invert NEC-selected IBM extended characters to IBM extended characters */
459 static int cp932inv_f = TRUE;
460 
461 /* static nkf_char cp932_conv(nkf_char c2, nkf_char c1); */
462 #endif /* SHIFTJIS_CP932 */
463 
464 static int x0212_f = FALSE;
465 static int x0213_f = FALSE;
466 
467 static unsigned char prefix_table[256];
468 
469 static void e_status(struct input_code *, nkf_char);
470 static void s_status(struct input_code *, nkf_char);
471 
473  {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0},
474  {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0},
475 #ifdef UTF8_INPUT_ENABLE
476  {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0},
477  {"UTF-16", 0, 0, 0, {0, 0, 0}, NULL, w_iconv16, 0},
478  {"UTF-32", 0, 0, 0, {0, 0, 0}, NULL, w_iconv32, 0},
479 #endif
480  {NULL, 0, 0, 0, {0, 0, 0}, NULL, NULL, 0}
481 };
482 
483 static int mimeout_mode = 0; /* 0, -1, 'Q', 'B', 1, 2 */
484 static int base64_count = 0;
485 
486 /* X0208 -> ASCII converter */
487 
488 /* fold parameter */
489 static int f_line = 0; /* chars in line */
490 static int f_prev = 0;
491 static int fold_preserve_f = FALSE; /* preserve new lines */
492 static int fold_f = FALSE;
493 static int fold_len = 0;
494 
495 /* options */
496 static unsigned char kanji_intro = DEFAULT_J;
497 static unsigned char ascii_intro = DEFAULT_R;
498 
499 /* Folding */
500 
501 #define FOLD_MARGIN 10
502 #define DEFAULT_FOLD 60
503 
505 
506 /* process default */
507 
508 static nkf_char
510 {
511  fprintf(stderr,"nkf internal module connection failure.\n");
512  exit(EXIT_FAILURE);
513  return 0; /* LINT */
514 }
515 
516 static void
518 {
519  no_connection2(c2,c1,0);
520 }
521 
523 static void (*oconv)(nkf_char c2,nkf_char c1) = no_connection;
524 
525 static void (*o_zconv)(nkf_char c2,nkf_char c1) = no_connection;
526 static void (*o_fconv)(nkf_char c2,nkf_char c1) = no_connection;
527 static void (*o_eol_conv)(nkf_char c2,nkf_char c1) = no_connection;
528 static void (*o_rot_conv)(nkf_char c2,nkf_char c1) = no_connection;
529 static void (*o_hira_conv)(nkf_char c2,nkf_char c1) = no_connection;
530 static void (*o_base64conv)(nkf_char c2,nkf_char c1) = no_connection;
532 
533 /* static redirections */
534 
535 static void (*o_putc)(nkf_char c) = std_putc;
536 
537 static nkf_char (*i_getc)(FILE *f) = std_getc; /* general input */
539 
540 static nkf_char (*i_bgetc)(FILE *) = std_getc; /* input of mgetc */
542 
543 static void (*o_mputc)(nkf_char c) = std_putc ; /* output of mputc */
544 
545 static nkf_char (*i_mgetc)(FILE *) = std_getc; /* input of mgetc */
547 
548 /* for strict mime */
549 static nkf_char (*i_mgetc_buf)(FILE *) = std_getc; /* input of mgetc_buf */
551 
552 /* Global states */
553 static int output_mode = ASCII; /* output kanji mode */
554 static int input_mode = ASCII; /* input kanji mode */
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */
556 
557 /* X0201 / X0208 conversion tables */
558 
559 /* X0201 kana conversion table */
560 /* 90-9F A0-DF */
561 static const unsigned char cv[]= {
562  0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57,
563  0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21,
564  0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29,
565  0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43,
566  0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26,
567  0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d,
568  0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35,
569  0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d,
570  0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46,
571  0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c,
572  0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52,
573  0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e,
574  0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62,
575  0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69,
576  0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d,
577  0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c,
578  0x00,0x00};
579 
580 
581 /* X0201 kana conversion table for daguten */
582 /* 90-9F A0-DF */
583 static const unsigned char dv[]= {
584  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
585  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
588  0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74,
589  0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e,
590  0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36,
591  0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e,
592  0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47,
593  0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00,
594  0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53,
595  0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00,
596  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
597  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
598  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
599  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
600  0x00,0x00};
601 
602 /* X0201 kana conversion table for han-daguten */
603 /* 90-9F A0-DF */
604 static const unsigned char ev[]= {
605  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
606  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
607  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
608  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
609  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
610  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
611  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
612  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
613  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
614  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
615  0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54,
616  0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00,
617  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
618  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
619  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
620  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
621  0x00,0x00};
622 
623 
624 /* X0208 kigou conversion table */
625 /* 0x8140 - 0x819e */
626 static const unsigned char fv[] = {
627 
628  0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a,
629  0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00,
630  0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00,
631  0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f,
632  0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27,
633  0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d,
634  0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00,
635  0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00,
636  0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00,
637  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
638  0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40,
639  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
640 } ;
641 
642 
643 
644 static int option_mode = 0;
645 static int file_out_f = FALSE;
646 #ifdef OVERWRITE
647 static int overwrite_f = FALSE;
648 static int preserve_time_f = FALSE;
649 static int backup_f = FALSE;
650 static char *backup_suffix = "";
651 #endif
652 
653 static int eolmode_f = 0; /* CR, LF, CRLF */
654 static int input_eol = 0; /* 0: unestablished, EOF: MIXED */
655 static nkf_char prev_cr = 0; /* CR or 0 */
656 #ifdef EASYWIN /*Easy Win */
657 static int end_check;
658 #endif /*Easy Win */
659 
660 static void *
662 {
663  void *ptr;
664 
665  if (size == 0) size = 1;
666 
667  ptr = malloc(size);
668  if (ptr == NULL) {
669  perror("can't malloc");
670  exit(EXIT_FAILURE);
671  }
672 
673  return ptr;
674 }
675 
676 static void *
677 nkf_xrealloc(void *ptr, size_t size)
678 {
679  if (size == 0) size = 1;
680 
681  ptr = realloc(ptr, size);
682  if (ptr == NULL) {
683  perror("can't realloc");
684  exit(EXIT_FAILURE);
685  }
686 
687  return ptr;
688 }
689 
690 #define nkf_xfree(ptr) free(ptr)
691 
692 static int
693 nkf_str_caseeql(const char *src, const char *target)
694 {
695  int i;
696  for (i = 0; src[i] && target[i]; i++) {
697  if (nkf_toupper(src[i]) != nkf_toupper(target[i])) return FALSE;
698  }
699  if (src[i] || target[i]) return FALSE;
700  else return TRUE;
701 }
702 
703 static nkf_encoding*
705 {
706  if (idx < 0 || NKF_ENCODING_TABLE_SIZE <= idx) {
707  return 0;
708  }
709  return &nkf_encoding_table[idx];
710 }
711 
712 static int
714 {
715  int i;
716  if (name[0] == 'X' && *(name+1) == '-') name += 2;
717  for (i = 0; encoding_name_to_id_table[i].id >= 0; i++) {
718  if (nkf_str_caseeql(encoding_name_to_id_table[i].name, name)) {
719  return encoding_name_to_id_table[i].id;
720  }
721  }
722  return -1;
723 }
724 
725 static nkf_encoding*
726 nkf_enc_find(const char *name)
727 {
728  int idx = -1;
729  idx = nkf_enc_find_index(name);
730  if (idx < 0) return 0;
731  return nkf_enc_from_index(idx);
732 }
733 
734 #define nkf_enc_name(enc) (enc)->name
735 #define nkf_enc_to_index(enc) (enc)->id
736 #define nkf_enc_to_base_encoding(enc) (enc)->base_encoding
737 #define nkf_enc_to_iconv(enc) nkf_enc_to_base_encoding(enc)->iconv
738 #define nkf_enc_to_oconv(enc) nkf_enc_to_base_encoding(enc)->oconv
739 #define nkf_enc_asciicompat(enc) (\
740  nkf_enc_to_base_encoding(enc) == &NkfEncodingASCII ||\
741  nkf_enc_to_base_encoding(enc) == &NkfEncodingISO_2022_JP)
742 #define nkf_enc_unicode_p(enc) (\
743  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_8 ||\
744  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_16 ||\
745  nkf_enc_to_base_encoding(enc) == &NkfEncodingUTF_32)
746 #define nkf_enc_cp5022x_p(enc) (\
747  nkf_enc_to_index(enc) == CP50220 ||\
748  nkf_enc_to_index(enc) == CP50221 ||\
749  nkf_enc_to_index(enc) == CP50222)
750 
751 #ifdef DEFAULT_CODE_LOCALE
752 static const char*
754 {
755 #ifdef HAVE_LANGINFO_H
756  return nl_langinfo(CODESET);
757 #elif defined(__WIN32__)
758  static char buf[16];
759  sprintf(buf, "CP%d", GetACP());
760  return buf;
761 #elif defined(__OS2__)
762 # if defined(INT_IS_SHORT)
763  /* OS/2 1.x */
764  return NULL;
765 # else
766  /* OS/2 32bit */
767  static char buf[16];
768  ULONG ulCP[1], ulncp;
769  DosQueryCp(sizeof(ulCP), ulCP, &ulncp);
770  if (ulCP[0] == 932 || ulCP[0] == 943)
771  strcpy(buf, "Shift_JIS");
772  else
773  sprintf(buf, "CP%lu", ulCP[0]);
774  return buf;
775 # endif
776 #endif
777  return NULL;
778 }
779 
780 static nkf_encoding*
782 {
783  nkf_encoding *enc = 0;
784  const char *encname = nkf_locale_charmap();
785  if (encname)
786  enc = nkf_enc_find(encname);
787  return enc;
788 }
789 #endif /* DEFAULT_CODE_LOCALE */
790 
791 static nkf_encoding*
793 {
794  return &nkf_encoding_table[UTF_8];
795 }
796 
797 static nkf_encoding*
799 {
800  nkf_encoding *enc = 0;
801 #ifdef DEFAULT_CODE_LOCALE
802  enc = nkf_locale_encoding();
803 #elif defined(DEFAULT_ENCIDX)
804  enc = nkf_enc_from_index(DEFAULT_ENCIDX);
805 #endif
806  if (!enc) enc = nkf_utf8_encoding();
807  return enc;
808 }
809 
810 typedef struct {
811  long capa;
812  long len;
814 } nkf_buf_t;
815 
816 static nkf_buf_t *
817 nkf_buf_new(int length)
818 {
819  nkf_buf_t *buf = nkf_xmalloc(sizeof(nkf_buf_t));
820  buf->ptr = nkf_xmalloc(sizeof(nkf_char) * length);
821  buf->capa = length;
822  buf->len = 0;
823  return buf;
824 }
825 
826 #if 0
827 static void
828 nkf_buf_dispose(nkf_buf_t *buf)
829 {
830  nkf_xfree(buf->ptr);
831  nkf_xfree(buf);
832 }
833 #endif
834 
835 #define nkf_buf_length(buf) ((buf)->len)
836 #define nkf_buf_empty_p(buf) ((buf)->len == 0)
837 
838 static nkf_char
840 {
841  assert(index <= buf->len);
842  return buf->ptr[index];
843 }
844 
845 static void
847 {
848  buf->len = 0;
849 }
850 
851 static void
853 {
854  if (buf->capa <= buf->len) {
855  exit(EXIT_FAILURE);
856  }
857  buf->ptr[buf->len++] = c;
858 }
859 
860 static nkf_char
862 {
863  assert(!nkf_buf_empty_p(buf));
864  return buf->ptr[--buf->len];
865 }
866 
867 /* Normalization Form C */
868 #ifndef PERL_XS
869 #ifdef WIN32DLL
870 #define fprintf dllprintf
871 #endif
872 
873 static void
874 version(void)
875 {
876  fprintf(HELP_OUTPUT,"Network Kanji Filter Version " NKF_VERSION " (" NKF_RELEASE_DATE ") \n" COPY_RIGHT "\n");
877 }
878 
879 static void
880 usage(void)
881 {
882  fprintf(HELP_OUTPUT,
883  "Usage: nkf -[flags] [--] [in file] .. [out file for -O flag]\n"
884 #ifdef UTF8_OUTPUT_ENABLE
885  " j/s/e/w Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
886  " UTF options is -w[8[0],{16,32}[{B,L}[0]]]\n"
887 #else
888 #endif
889 #ifdef UTF8_INPUT_ENABLE
890  " J/S/E/W Specify input encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
891  " UTF option is -W[8,[16,32][B,L]]\n"
892 #else
893  " J/S/E Specify output encoding ISO-2022-JP, Shift_JIS, EUC-JP\n"
894 #endif
895  );
896  fprintf(HELP_OUTPUT,
897  " m[BQSN0] MIME decode [B:base64,Q:quoted,S:strict,N:nonstrict,0:no decode]\n"
898  " M[BQ] MIME encode [B:base64 Q:quoted]\n"
899  " f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"
900  );
901  fprintf(HELP_OUTPUT,
902  " Z[0-4] Default/0: Convert JISX0208 Alphabet to ASCII\n"
903  " 1: Kankaku to one space 2: to two spaces 3: HTML Entity\n"
904  " 4: JISX0208 Katakana to JISX0201 Katakana\n"
905  " X,x Convert Halfwidth Katakana to Fullwidth or preserve it\n"
906  );
907  fprintf(HELP_OUTPUT,
908  " O Output to File (DEFAULT 'nkf.out')\n"
909  " L[uwm] Line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"
910  );
911  fprintf(HELP_OUTPUT,
912  " --ic=<encoding> Specify the input encoding\n"
913  " --oc=<encoding> Specify the output encoding\n"
914  " --hiragana --katakana Hiragana/Katakana Conversion\n"
915  " --katakana-hiragana Converts each other\n"
916  );
917  fprintf(HELP_OUTPUT,
918 #ifdef INPUT_OPTION
919  " --{cap, url}-input Convert hex after ':' or '%%'\n"
920 #endif
921 #ifdef NUMCHAR_OPTION
922  " --numchar-input Convert Unicode Character Reference\n"
923 #endif
924 #ifdef UTF8_INPUT_ENABLE
925  " --fb-{skip, html, xml, perl, java, subchar}\n"
926  " Specify unassigned character's replacement\n"
927 #endif
928  );
929  fprintf(HELP_OUTPUT,
930 #ifdef OVERWRITE
931  " --in-place[=SUF] Overwrite original files\n"
932  " --overwrite[=SUF] Preserve timestamp of original files\n"
933 #endif
934  " -g --guess Guess the input code\n"
935  " -v --version Print the version\n"
936  " --help/-V Print this help / configuration\n"
937  );
938  version();
939 }
940 
941 static void
943 {
944  fprintf(HELP_OUTPUT,
945  "Summary of my nkf " NKF_VERSION " (" NKF_RELEASE_DATE ") configuration:\n"
946  " Compile-time options:\n"
947  " Compiled at: " __DATE__ " " __TIME__ "\n"
948  );
949  fprintf(HELP_OUTPUT,
950  " Default output encoding: "
951 #ifdef DEFAULT_CODE_LOCALE
952  "LOCALE (%s)\n", nkf_enc_name(nkf_default_encoding())
953 #elif defined(DEFAULT_ENCIDX)
954  "CONFIG (%s)\n", nkf_enc_name(nkf_default_encoding())
955 #else
956  "NONE\n"
957 #endif
958  );
959  fprintf(HELP_OUTPUT,
960  " Default output end of line: "
961 #if DEFAULT_NEWLINE == CR
962  "CR"
963 #elif DEFAULT_NEWLINE == CRLF
964  "CRLF"
965 #else
966  "LF"
967 #endif
968  "\n"
969  " Decode MIME encoded string: "
971  "ON"
972 #else
973  "OFF"
974 #endif
975  "\n"
976  " Convert JIS X 0201 Katakana: "
977 #if X0201_DEFAULT
978  "ON"
979 #else
980  "OFF"
981 #endif
982  "\n"
983  " --help, --version output: "
984 #if HELP_OUTPUT_HELP_OUTPUT
985  "HELP_OUTPUT"
986 #else
987  "STDOUT"
988 #endif
989  "\n");
990 }
991 #endif /*PERL_XS*/
992 
993 #ifdef OVERWRITE
994 static char*
995 get_backup_filename(const char *suffix, const char *filename)
996 {
997  char *backup_filename;
998  int asterisk_count = 0;
999  int i, j;
1000  int filename_length = strlen(filename);
1001 
1002  for(i = 0; suffix[i]; i++){
1003  if(suffix[i] == '*') asterisk_count++;
1004  }
1005 
1006  if(asterisk_count){
1007  backup_filename = nkf_xmalloc(strlen(suffix) + (asterisk_count * (filename_length - 1)) + 1);
1008  for(i = 0, j = 0; suffix[i];){
1009  if(suffix[i] == '*'){
1010  backup_filename[j] = '\0';
1011  strncat(backup_filename, filename, filename_length);
1012  i++;
1013  j += filename_length;
1014  }else{
1015  backup_filename[j++] = suffix[i++];
1016  }
1017  }
1018  backup_filename[j] = '\0';
1019  }else{
1020  j = filename_length + strlen(suffix);
1021  backup_filename = nkf_xmalloc(j + 1);
1022  strcpy(backup_filename, filename);
1023  strcat(backup_filename, suffix);
1024  backup_filename[j] = '\0';
1025  }
1026  return backup_filename;
1027 }
1028 #endif
1029 
1030 #ifdef UTF8_INPUT_ENABLE
1031 static void
1033 {
1034  int shift = 20;
1035  c &= VALUE_MASK;
1036  while(shift >= 0){
1037  if(c >= NKF_INT32_C(1)<<shift){
1038  while(shift >= 0){
1039  (*f)(0, bin2hex(c>>shift));
1040  shift -= 4;
1041  }
1042  }else{
1043  shift -= 4;
1044  }
1045  }
1046  return;
1047 }
1048 
1049 static void
1051 {
1052  (*oconv)(0, '&');
1053  (*oconv)(0, '#');
1054  c &= VALUE_MASK;
1055  if(c >= NKF_INT32_C(1000000))
1056  (*oconv)(0, 0x30+(c/NKF_INT32_C(1000000))%10);
1057  if(c >= NKF_INT32_C(100000))
1058  (*oconv)(0, 0x30+(c/NKF_INT32_C(100000) )%10);
1059  if(c >= 10000)
1060  (*oconv)(0, 0x30+(c/10000 )%10);
1061  if(c >= 1000)
1062  (*oconv)(0, 0x30+(c/1000 )%10);
1063  if(c >= 100)
1064  (*oconv)(0, 0x30+(c/100 )%10);
1065  if(c >= 10)
1066  (*oconv)(0, 0x30+(c/10 )%10);
1067  if(c >= 0)
1068  (*oconv)(0, 0x30+ c %10);
1069  (*oconv)(0, ';');
1070  return;
1071 }
1072 
1073 static void
1075 {
1076  (*oconv)(0, '&');
1077  (*oconv)(0, '#');
1078  (*oconv)(0, 'x');
1079  nkf_each_char_to_hex(oconv, c);
1080  (*oconv)(0, ';');
1081  return;
1082 }
1083 
1084 static void
1086 {
1087  (*oconv)(0, '\\');
1088  c &= VALUE_MASK;
1089  if(!nkf_char_unicode_bmp_p(c)){
1090  (*oconv)(0, 'U');
1091  (*oconv)(0, '0');
1092  (*oconv)(0, '0');
1093  (*oconv)(0, bin2hex(c>>20));
1094  (*oconv)(0, bin2hex(c>>16));
1095  }else{
1096  (*oconv)(0, 'u');
1097  }
1098  (*oconv)(0, bin2hex(c>>12));
1099  (*oconv)(0, bin2hex(c>> 8));
1100  (*oconv)(0, bin2hex(c>> 4));
1101  (*oconv)(0, bin2hex(c ));
1102  return;
1103 }
1104 
1105 static void
1107 {
1108  (*oconv)(0, '\\');
1109  (*oconv)(0, 'x');
1110  (*oconv)(0, '{');
1111  nkf_each_char_to_hex(oconv, c);
1112  (*oconv)(0, '}');
1113  return;
1114 }
1115 
1116 static void
1118 {
1119  c = unicode_subchar;
1120  (*oconv)((c>>8)&0xFF, c&0xFF);
1121  return;
1122 }
1123 #endif
1124 
1125 static const struct {
1126  const char *name;
1127  const char *alias;
1128 } long_option[] = {
1129  {"ic=", ""},
1130  {"oc=", ""},
1131  {"base64","jMB"},
1132  {"euc","e"},
1133  {"euc-input","E"},
1134  {"fj","jm"},
1135  {"help",""},
1136  {"jis","j"},
1137  {"jis-input","J"},
1138  {"mac","sLm"},
1139  {"mime","jM"},
1140  {"mime-input","m"},
1141  {"msdos","sLw"},
1142  {"sjis","s"},
1143  {"sjis-input","S"},
1144  {"unix","eLu"},
1145  {"version","v"},
1146  {"windows","sLw"},
1147  {"hiragana","h1"},
1148  {"katakana","h2"},
1149  {"katakana-hiragana","h3"},
1150  {"guess=", ""},
1151  {"guess", "g2"},
1152  {"cp932", ""},
1153  {"no-cp932", ""},
1154 #ifdef X0212_ENABLE
1155  {"x0212", ""},
1156 #endif
1157 #ifdef UTF8_OUTPUT_ENABLE
1158  {"utf8", "w"},
1159  {"utf16", "w16"},
1160  {"ms-ucs-map", ""},
1161  {"fb-skip", ""},
1162  {"fb-html", ""},
1163  {"fb-xml", ""},
1164  {"fb-perl", ""},
1165  {"fb-java", ""},
1166  {"fb-subchar", ""},
1167  {"fb-subchar=", ""},
1168 #endif
1169 #ifdef UTF8_INPUT_ENABLE
1170  {"utf8-input", "W"},
1171  {"utf16-input", "W16"},
1172  {"no-cp932ext", ""},
1173  {"no-best-fit-chars",""},
1174 #endif
1175 #ifdef UNICODE_NORMALIZATION
1176  {"utf8mac-input", ""},
1177 #endif
1178 #ifdef OVERWRITE
1179  {"overwrite", ""},
1180  {"overwrite=", ""},
1181  {"in-place", ""},
1182  {"in-place=", ""},
1183 #endif
1184 #ifdef INPUT_OPTION
1185  {"cap-input", ""},
1186  {"url-input", ""},
1187 #endif
1188 #ifdef NUMCHAR_OPTION
1189  {"numchar-input", ""},
1190 #endif
1191 #ifdef CHECK_OPTION
1192  {"no-output", ""},
1193  {"debug", ""},
1194 #endif
1195 #ifdef SHIFTJIS_CP932
1196  {"cp932inv", ""},
1197 #endif
1198 #ifdef EXEC_IO
1199  {"exec-in", ""},
1200  {"exec-out", ""},
1201 #endif
1202  {"prefix=", ""},
1203 };
1204 
1205 static void
1207 {
1208  switch (nkf_enc_to_index(enc)) {
1209  case ISO_8859_1:
1210  iso8859_f = TRUE;
1211  break;
1212  case CP50221:
1213  case CP50222:
1214  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1215  case CP50220:
1216 #ifdef SHIFTJIS_CP932
1217  cp51932_f = TRUE;
1218 #endif
1219 #ifdef UTF8_OUTPUT_ENABLE
1221 #endif
1222  break;
1223  case ISO_2022_JP_1:
1224  x0212_f = TRUE;
1225  break;
1226  case ISO_2022_JP_3:
1227  x0212_f = TRUE;
1228  x0213_f = TRUE;
1229  break;
1230  case ISO_2022_JP_2004:
1231  x0212_f = TRUE;
1232  x0213_f = TRUE;
1233  break;
1234  case SHIFT_JIS:
1235  break;
1236  case WINDOWS_31J:
1237  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1238 #ifdef SHIFTJIS_CP932
1239  cp51932_f = TRUE;
1240 #endif
1241 #ifdef UTF8_OUTPUT_ENABLE
1243 #endif
1244  break;
1245  break;
1246  case CP10001:
1247 #ifdef SHIFTJIS_CP932
1248  cp51932_f = TRUE;
1249 #endif
1250 #ifdef UTF8_OUTPUT_ENABLE
1252 #endif
1253  break;
1254  case EUC_JP:
1255  break;
1256  case EUCJP_NKF:
1257  break;
1258  case CP51932:
1259  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1260 #ifdef SHIFTJIS_CP932
1261  cp51932_f = TRUE;
1262 #endif
1263 #ifdef UTF8_OUTPUT_ENABLE
1265 #endif
1266  break;
1267  case EUCJP_MS:
1268  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1269 #ifdef SHIFTJIS_CP932
1270  cp51932_f = FALSE;
1271 #endif
1272 #ifdef UTF8_OUTPUT_ENABLE
1274 #endif
1275  break;
1276  case EUCJP_ASCII:
1277  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1278 #ifdef SHIFTJIS_CP932
1279  cp51932_f = FALSE;
1280 #endif
1281 #ifdef UTF8_OUTPUT_ENABLE
1283 #endif
1284  break;
1285  case SHIFT_JISX0213:
1286  case SHIFT_JIS_2004:
1287  x0213_f = TRUE;
1288 #ifdef SHIFTJIS_CP932
1289  cp51932_f = FALSE;
1290 #endif
1291  break;
1292  case EUC_JISX0213:
1293  case EUC_JIS_2004:
1294  x0213_f = TRUE;
1295 #ifdef SHIFTJIS_CP932
1296  cp51932_f = FALSE;
1297 #endif
1298  break;
1299 #ifdef UTF8_INPUT_ENABLE
1300 #ifdef UNICODE_NORMALIZATION
1301  case UTF8_MAC:
1302  nfc_f = TRUE;
1303  break;
1304 #endif
1305  case UTF_16:
1306  case UTF_16BE:
1307  case UTF_16BE_BOM:
1309  break;
1310  case UTF_16LE:
1311  case UTF_16LE_BOM:
1313  break;
1314  case UTF_32:
1315  case UTF_32BE:
1316  case UTF_32BE_BOM:
1318  break;
1319  case UTF_32LE:
1320  case UTF_32LE_BOM:
1322  break;
1323 #endif
1324  }
1325 }
1326 
1327 static void
1329 {
1330  switch (nkf_enc_to_index(enc)) {
1331  case CP50220:
1332 #ifdef SHIFTJIS_CP932
1333  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1334 #endif
1335 #ifdef UTF8_OUTPUT_ENABLE
1337 #endif
1338  break;
1339  case CP50221:
1340  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1341 #ifdef SHIFTJIS_CP932
1342  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1343 #endif
1344 #ifdef UTF8_OUTPUT_ENABLE
1346 #endif
1347  break;
1348  case ISO_2022_JP:
1349 #ifdef SHIFTJIS_CP932
1350  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1351 #endif
1352  break;
1353  case ISO_2022_JP_1:
1354  x0212_f = TRUE;
1355 #ifdef SHIFTJIS_CP932
1356  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1357 #endif
1358  break;
1359  case ISO_2022_JP_3:
1360  x0212_f = TRUE;
1361  x0213_f = TRUE;
1362 #ifdef SHIFTJIS_CP932
1363  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1364 #endif
1365  break;
1366  case SHIFT_JIS:
1367  break;
1368  case WINDOWS_31J:
1369  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1370 #ifdef UTF8_OUTPUT_ENABLE
1372 #endif
1373  break;
1374  case CP10001:
1375 #ifdef UTF8_OUTPUT_ENABLE
1377 #endif
1378  break;
1379  case EUC_JP:
1380  x0212_f = TRUE;
1381 #ifdef SHIFTJIS_CP932
1382  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1383 #endif
1384 #ifdef UTF8_OUTPUT_ENABLE
1386 #endif
1387  break;
1388  case EUCJP_NKF:
1389  x0212_f = FALSE;
1390 #ifdef SHIFTJIS_CP932
1391  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1392 #endif
1393 #ifdef UTF8_OUTPUT_ENABLE
1395 #endif
1396  break;
1397  case CP51932:
1398  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1399 #ifdef SHIFTJIS_CP932
1400  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1401 #endif
1402 #ifdef UTF8_OUTPUT_ENABLE
1404 #endif
1405  break;
1406  case EUCJP_MS:
1407  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1408  x0212_f = TRUE;
1409 #ifdef UTF8_OUTPUT_ENABLE
1411 #endif
1412  break;
1413  case EUCJP_ASCII:
1414  if (x0201_f == NKF_UNSPECIFIED) x0201_f = FALSE; /* -x specified implicitly */
1415  x0212_f = TRUE;
1416 #ifdef UTF8_OUTPUT_ENABLE
1418 #endif
1419  break;
1420  case SHIFT_JISX0213:
1421  case SHIFT_JIS_2004:
1422  x0213_f = TRUE;
1423 #ifdef SHIFTJIS_CP932
1424  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1425 #endif
1426  break;
1427  case EUC_JISX0213:
1428  case EUC_JIS_2004:
1429  x0212_f = TRUE;
1430  x0213_f = TRUE;
1431 #ifdef SHIFTJIS_CP932
1432  if (cp932inv_f == TRUE) cp932inv_f = FALSE;
1433 #endif
1434  break;
1435 #ifdef UTF8_OUTPUT_ENABLE
1436  case UTF_8_BOM:
1437  output_bom_f = TRUE;
1438  break;
1439  case UTF_16:
1440  case UTF_16BE_BOM:
1441  output_bom_f = TRUE;
1442  break;
1443  case UTF_16LE:
1445  output_bom_f = FALSE;
1446  break;
1447  case UTF_16LE_BOM:
1449  output_bom_f = TRUE;
1450  break;
1451  case UTF_32:
1452  case UTF_32BE_BOM:
1453  output_bom_f = TRUE;
1454  break;
1455  case UTF_32LE:
1457  output_bom_f = FALSE;
1458  break;
1459  case UTF_32LE_BOM:
1461  output_bom_f = TRUE;
1462  break;
1463 #endif
1464  }
1465 }
1466 
1467 static struct input_code*
1469 {
1470  if (iconv_func){
1471  struct input_code *p = input_code_list;
1472  while (p->name){
1473  if (iconv_func == p->iconv_func){
1474  return p;
1475  }
1476  p++;
1477  }
1478  }
1479  return 0;
1480 }
1481 
1482 static void
1484 {
1485 #ifdef INPUT_CODE_FIX
1486  if (f || !input_encoding)
1487 #endif
1488  if (estab_f != f){
1489  estab_f = f;
1490  }
1491 
1492  if (iconv_func
1493 #ifdef INPUT_CODE_FIX
1494  && (f == -TRUE || !input_encoding) /* -TRUE means "FORCE" */
1495 #endif
1496  ){
1497  iconv = iconv_func;
1498  }
1499 #ifdef CHECK_OPTION
1500  if (estab_f && iconv_for_check != iconv){
1502  if (p){
1504  debug(p->name);
1505  }
1507  }
1508 #endif
1509 }
1510 
1511 #ifdef X0212_ENABLE
1512 static nkf_char
1514 {
1515  nkf_char ret = c;
1516  c &= 0x7f;
1517  if (is_eucg3(ret)){
1518  if (0x75 <= c && c <= 0x7f){
1519  ret = c + (0x109 - 0x75);
1520  }
1521  }else{
1522  if (0x75 <= c && c <= 0x7f){
1523  ret = c + (0x113 - 0x75);
1524  }
1525  }
1526  return ret;
1527 }
1528 
1529 
1530 static nkf_char
1532 {
1533  nkf_char ret = c;
1534  if (0x7f <= c && c <= 0x88){
1535  ret = c + (0x75 - 0x7f);
1536  }else if (0x89 <= c && c <= 0x92){
1537  ret = PREFIX_EUCG3 | 0x80 | (c + (0x75 - 0x89));
1538  }
1539  return ret;
1540 }
1541 #endif /* X0212_ENABLE */
1542 
1543 static nkf_char
1545 {
1546  nkf_char ndx;
1547  if (is_eucg3(c2)){
1548  ndx = c2 & 0x7f;
1549  if (x0213_f){
1550  if((0x21 <= ndx && ndx <= 0x2F)){
1551  if (p2) *p2 = ((ndx - 1) >> 1) + 0xec - ndx / 8 * 3;
1552  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1553  return 0;
1554  }else if(0x6E <= ndx && ndx <= 0x7E){
1555  if (p2) *p2 = ((ndx - 1) >> 1) + 0xbe;
1556  if (p1) *p1 = c1 + ((ndx & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1557  return 0;
1558  }
1559  return 1;
1560  }
1561 #ifdef X0212_ENABLE
1562  else if(nkf_isgraph(ndx)){
1563  nkf_char val = 0;
1564  const unsigned short *ptr;
1565  ptr = x0212_shiftjis[ndx - 0x21];
1566  if (ptr){
1567  val = ptr[(c1 & 0x7f) - 0x21];
1568  }
1569  if (val){
1570  c2 = val >> 8;
1571  c1 = val & 0xff;
1572  if (p2) *p2 = c2;
1573  if (p1) *p1 = c1;
1574  return 0;
1575  }
1576  c2 = x0212_shift(c2);
1577  }
1578 #endif /* X0212_ENABLE */
1579  }
1580  if(0x7F < c2) return 1;
1581  if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1);
1582  if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e);
1583  return 0;
1584 }
1585 
1586 static nkf_char
1588 {
1589 #if defined(SHIFTJIS_CP932) || defined(X0212_ENABLE)
1590  nkf_char val;
1591 #endif
1592  static const char shift_jisx0213_s1a3_table[5][2] ={ { 1, 8}, { 3, 4}, { 5,12}, {13,14}, {15, 0} };
1593  if (0xFC < c1) return 1;
1594 #ifdef SHIFTJIS_CP932
1595  if (!cp932inv_f && is_ibmext_in_sjis(c2)){
1596  val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40];
1597  if (val){
1598  c2 = val >> 8;
1599  c1 = val & 0xff;
1600  }
1601  }
1602  if (cp932inv_f
1603  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
1604  val = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
1605  if (val){
1606  c2 = val >> 8;
1607  c1 = val & 0xff;
1608  }
1609  }
1610 #endif /* SHIFTJIS_CP932 */
1611 #ifdef X0212_ENABLE
1612  if (!x0213_f && is_ibmext_in_sjis(c2)){
1613  val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40];
1614  if (val){
1615  if (val > 0x7FFF){
1616  c2 = PREFIX_EUCG3 | ((val >> 8) & 0x7f);
1617  c1 = val & 0xff;
1618  }else{
1619  c2 = val >> 8;
1620  c1 = val & 0xff;
1621  }
1622  if (p2) *p2 = c2;
1623  if (p1) *p1 = c1;
1624  return 0;
1625  }
1626  }
1627 #endif
1628  if(c2 >= 0x80){
1629  if(x0213_f && c2 >= 0xF0){
1630  if(c2 <= 0xF3 || (c2 == 0xF4 && c1 < 0x9F)){ /* k=1, 3<=k<=5, k=8, 12<=k<=15 */
1631  c2 = PREFIX_EUCG3 | 0x20 | shift_jisx0213_s1a3_table[c2 - 0xF0][0x9E < c1];
1632  }else{ /* 78<=k<=94 */
1633  c2 = PREFIX_EUCG3 | (c2 * 2 - 0x17B);
1634  if (0x9E < c1) c2++;
1635  }
1636  }else{
1637 #define SJ0162 0x00e1 /* 01 - 62 ku offset */
1638 #define SJ6394 0x0161 /* 63 - 94 ku offset */
1639  c2 = c2 + c2 - ((c2 <= 0x9F) ? SJ0162 : SJ6394);
1640  if (0x9E < c1) c2++;
1641  }
1642  if (c1 < 0x9F)
1643  c1 = c1 - ((c1 > DEL) ? SP : 0x1F);
1644  else {
1645  c1 = c1 - 0x7E;
1646  }
1647  }
1648 
1649 #ifdef X0212_ENABLE
1650  c2 = x0212_unshift(c2);
1651 #endif
1652  if (p2) *p2 = c2;
1653  if (p1) *p1 = c1;
1654  return 0;
1655 }
1656 
1657 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
1658 static void
1660 {
1661  val &= VALUE_MASK;
1662  if (val < 0x80){
1663  *p1 = val;
1664  *p2 = 0;
1665  *p3 = 0;
1666  *p4 = 0;
1667  }else if (val < 0x800){
1668  *p1 = 0xc0 | (val >> 6);
1669  *p2 = 0x80 | (val & 0x3f);
1670  *p3 = 0;
1671  *p4 = 0;
1672  } else if (nkf_char_unicode_bmp_p(val)) {
1673  *p1 = 0xe0 | (val >> 12);
1674  *p2 = 0x80 | ((val >> 6) & 0x3f);
1675  *p3 = 0x80 | ( val & 0x3f);
1676  *p4 = 0;
1677  } else if (nkf_char_unicode_value_p(val)) {
1678  *p1 = 0xf0 | (val >> 18);
1679  *p2 = 0x80 | ((val >> 12) & 0x3f);
1680  *p3 = 0x80 | ((val >> 6) & 0x3f);
1681  *p4 = 0x80 | ( val & 0x3f);
1682  } else {
1683  *p1 = 0;
1684  *p2 = 0;
1685  *p3 = 0;
1686  *p4 = 0;
1687  }
1688 }
1689 
1690 static nkf_char
1692 {
1693  nkf_char wc;
1694  if (c1 <= 0x7F) {
1695  /* single byte */
1696  wc = c1;
1697  }
1698  else if (c1 <= 0xC3) {
1699  /* trail byte or invalid */
1700  return -1;
1701  }
1702  else if (c1 <= 0xDF) {
1703  /* 2 bytes */
1704  wc = (c1 & 0x1F) << 6;
1705  wc |= (c2 & 0x3F);
1706  }
1707  else if (c1 <= 0xEF) {
1708  /* 3 bytes */
1709  wc = (c1 & 0x0F) << 12;
1710  wc |= (c2 & 0x3F) << 6;
1711  wc |= (c3 & 0x3F);
1712  }
1713  else if (c2 <= 0xF4) {
1714  /* 4 bytes */
1715  wc = (c1 & 0x0F) << 18;
1716  wc |= (c2 & 0x3F) << 12;
1717  wc |= (c3 & 0x3F) << 6;
1718  wc |= (c4 & 0x3F);
1719  }
1720  else {
1721  return -1;
1722  }
1723  return wc;
1724 }
1725 #endif
1726 
1727 #ifdef UTF8_INPUT_ENABLE
1728 static int
1730  const unsigned short *const *pp, nkf_char psize,
1731  nkf_char *p2, nkf_char *p1)
1732 {
1733  nkf_char c2;
1734  const unsigned short *p;
1735  unsigned short val;
1736 
1737  if (pp == 0) return 1;
1738 
1739  c1 -= 0x80;
1740  if (c1 < 0 || psize <= c1) return 1;
1741  p = pp[c1];
1742  if (p == 0) return 1;
1743 
1744  c0 -= 0x80;
1745  if (c0 < 0 || sizeof_utf8_to_euc_C2 <= c0) return 1;
1746  val = p[c0];
1747  if (val == 0) return 1;
1748  if (no_cp932ext_f && (
1749  (val>>8) == 0x2D || /* NEC special characters */
1750  val > NKF_INT32_C(0xF300) /* IBM extended characters */
1751  )) return 1;
1752 
1753  c2 = val >> 8;
1754  if (val > 0x7FFF){
1755  c2 &= 0x7f;
1756  c2 |= PREFIX_EUCG3;
1757  }
1758  if (c2 == SO) c2 = JIS_X_0201_1976_K;
1759  c1 = val & 0xFF;
1760  if (p2) *p2 = c2;
1761  if (p1) *p1 = c1;
1762  return 0;
1763 }
1764 
1765 static int
1767 {
1768  const unsigned short *const *pp;
1769  const unsigned short *const *const *ppp;
1770  static const char no_best_fit_chars_table_C2[] =
1771  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1772  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1773  1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 2,
1774  0, 0, 1, 1, 0, 1, 0, 1, 2, 1, 1, 1, 1, 1, 1, 1};
1775  static const char no_best_fit_chars_table_C2_ms[] =
1776  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1777  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1778  1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
1779  0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0};
1780  static const char no_best_fit_chars_table_932_C2[] =
1781  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1782  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1783  1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1784  0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0};
1785  static const char no_best_fit_chars_table_932_C3[] =
1786  {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1787  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
1788  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1789  1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1};
1790  nkf_char ret = 0;
1791 
1792  if(c2 < 0x80){
1793  *p2 = 0;
1794  *p1 = c2;
1795  }else if(c2 < 0xe0){
1796  if(no_best_fit_chars_f){
1797  if(ms_ucs_map_f == UCS_MAP_CP932){
1798  switch(c2){
1799  case 0xC2:
1800  if(no_best_fit_chars_table_932_C2[c1&0x3F]) return 1;
1801  break;
1802  case 0xC3:
1803  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1804  break;
1805  }
1806  }else if(!cp932inv_f){
1807  switch(c2){
1808  case 0xC2:
1809  if(no_best_fit_chars_table_C2[c1&0x3F]) return 1;
1810  break;
1811  case 0xC3:
1812  if(no_best_fit_chars_table_932_C3[c1&0x3F]) return 1;
1813  break;
1814  }
1815  }else if(ms_ucs_map_f == UCS_MAP_MS){
1816  if(c2 == 0xC2 && no_best_fit_chars_table_C2_ms[c1&0x3F]) return 1;
1817  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1818  switch(c2){
1819  case 0xC2:
1820  switch(c1){
1821  case 0xA2:
1822  case 0xA3:
1823  case 0xA5:
1824  case 0xA6:
1825  case 0xAC:
1826  case 0xAF:
1827  case 0xB8:
1828  return 1;
1829  }
1830  break;
1831  }
1832  }
1833  }
1834  pp =
1839  ret = unicode_to_jis_common2(c2, c1, pp, sizeof_utf8_to_euc_2bytes, p2, p1);
1840  }else if(c0 < 0xF0){
1841  if(no_best_fit_chars_f){
1842  if(ms_ucs_map_f == UCS_MAP_CP932){
1843  if(c2 == 0xE3 && c1 == 0x82 && c0 == 0x94) return 1;
1844  }else if(ms_ucs_map_f == UCS_MAP_MS){
1845  switch(c2){
1846  case 0xE2:
1847  switch(c1){
1848  case 0x80:
1849  if(c0 == 0x94 || c0 == 0x96 || c0 == 0xBE) return 1;
1850  break;
1851  case 0x88:
1852  if(c0 == 0x92) return 1;
1853  break;
1854  }
1855  break;
1856  case 0xE3:
1857  if(c1 == 0x80 || c0 == 0x9C) return 1;
1858  break;
1859  }
1860  }else if(ms_ucs_map_f == UCS_MAP_CP10001){
1861  switch(c2){
1862  case 0xE3:
1863  switch(c1){
1864  case 0x82:
1865  if(c0 == 0x94) return 1;
1866  break;
1867  case 0x83:
1868  if(c0 == 0xBB) return 1;
1869  break;
1870  }
1871  break;
1872  }
1873  }else{
1874  switch(c2){
1875  case 0xE2:
1876  switch(c1){
1877  case 0x80:
1878  if(c0 == 0x95) return 1;
1879  break;
1880  case 0x88:
1881  if(c0 == 0xA5) return 1;
1882  break;
1883  }
1884  break;
1885  case 0xEF:
1886  switch(c1){
1887  case 0xBC:
1888  if(c0 == 0x8D) return 1;
1889  break;
1890  case 0xBD:
1891  if(c0 == 0x9E && !cp932inv_f) return 1;
1892  break;
1893  case 0xBF:
1894  if(0xA0 <= c0 && c0 <= 0xA5) return 1;
1895  break;
1896  }
1897  break;
1898  }
1899  }
1900  }
1901  ppp =
1906  ret = unicode_to_jis_common2(c1, c0, ppp[c2 - 0xE0], sizeof_utf8_to_euc_C2, p2, p1);
1907  }else return -1;
1908 #ifdef SHIFTJIS_CP932
1909  if (!ret && !cp932inv_f && is_eucg3(*p2)) {
1910  nkf_char s2, s1;
1911  if (e2s_conv(*p2, *p1, &s2, &s1) == 0) {
1912  s2e_conv(s2, s1, p2, p1);
1913  }else{
1914  ret = 1;
1915  }
1916  }
1917 #endif
1918  return ret;
1919 }
1920 
1921 #ifdef UTF8_OUTPUT_ENABLE
1922 static nkf_char
1924 {
1925  const unsigned short *p;
1926 
1927  if (c2 == JIS_X_0201_1976_K) {
1928  if (ms_ucs_map_f == UCS_MAP_CP10001) {
1929  switch (c1) {
1930  case 0x20:
1931  return 0xA0;
1932  case 0x7D:
1933  return 0xA9;
1934  }
1935  }
1936  p = euc_to_utf8_1byte;
1937 #ifdef X0212_ENABLE
1938  } else if (is_eucg3(c2)){
1939  if(ms_ucs_map_f == UCS_MAP_ASCII&& c2 == NKF_INT32_C(0x8F22) && c1 == 0x43){
1940  return 0xA6;
1941  }
1942  c2 = (c2&0x7f) - 0x21;
1943  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1944  p = x0212_to_utf8_2bytes[c2];
1945  else
1946  return 0;
1947 #endif
1948  } else {
1949  c2 &= 0x7f;
1950  c2 = (c2&0x7f) - 0x21;
1951  if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes)
1952  p =
1956  else
1957  return 0;
1958  }
1959  if (!p) return 0;
1960  c1 = (c1 & 0x7f) - 0x21;
1961  if (0<=c1 && c1<sizeof_euc_to_utf8_1byte)
1962  return p[c1];
1963  return 0;
1964 }
1965 #endif
1966 
1967 static nkf_char
1969 {
1970  nkf_char ret = 0;
1971 
1972  if (!c1){
1973  *p2 = 0;
1974  *p1 = c2;
1975  }else if (0xc0 <= c2 && c2 <= 0xef) {
1976  ret = unicode_to_jis_common(c2, c1, c0, p2, p1);
1977 #ifdef NUMCHAR_OPTION
1978  if (ret > 0){
1979  if (p2) *p2 = 0;
1980  if (p1) *p1 = nkf_char_unicode_new(nkf_utf8_to_unicode(c2, c1, c0, 0));
1981  ret = 0;
1982  }
1983 #endif
1984  }
1985  return ret;
1986 }
1987 
1988 #ifdef UTF8_INPUT_ENABLE
1989 static nkf_char
1991 {
1992  nkf_char c1, c2, c3, c4;
1993  nkf_char ret = 0;
1994  val &= VALUE_MASK;
1995  if (val < 0x80) {
1996  *p2 = 0;
1997  *p1 = val;
1998  }
1999  else if (nkf_char_unicode_bmp_p(val)){
2000  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2001  ret = unicode_to_jis_common(c1, c2, c3, p2, p1);
2002  if (ret > 0){
2003  *p2 = 0;
2004  *p1 = nkf_char_unicode_new(val);
2005  ret = 0;
2006  }
2007  }
2008  else {
2009  *p2 = 0;
2010  *p1 = nkf_char_unicode_new(val);
2011  }
2012  return ret;
2013 }
2014 #endif
2015 
2016 static nkf_char
2018 {
2019  if (c2 == JIS_X_0201_1976_K || c2 == SS2){
2020  if (iso2022jp_f && !x0201_f) {
2021  c2 = GETA1; c1 = GETA2;
2022  } else {
2023  c2 = JIS_X_0201_1976_K;
2024  c1 &= 0x7f;
2025  }
2026 #ifdef X0212_ENABLE
2027  }else if (c2 == 0x8f){
2028  if (c0 == 0){
2029  return -1;
2030  }
2031  if (!cp51932_f && !x0213_f && 0xF5 <= c1 && c1 <= 0xFE && 0xA1 <= c0 && c0 <= 0xFE) {
2032  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2033  c1 = nkf_char_unicode_new((c1 - 0xF5) * 94 + c0 - 0xA1 + 0xE3AC);
2034  c2 = 0;
2035  } else {
2036  c2 = (c2 << 8) | (c1 & 0x7f);
2037  c1 = c0 & 0x7f;
2038 #ifdef SHIFTJIS_CP932
2039  if (cp51932_f){
2040  nkf_char s2, s1;
2041  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2042  s2e_conv(s2, s1, &c2, &c1);
2043  if (c2 < 0x100){
2044  c1 &= 0x7f;
2045  c2 &= 0x7f;
2046  }
2047  }
2048  }
2049 #endif /* SHIFTJIS_CP932 */
2050  }
2051 #endif /* X0212_ENABLE */
2052  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP || c2 == ISO_8859_1) {
2053  /* NOP */
2054  } else {
2055  if (!cp51932_f && ms_ucs_map_f && 0xF5 <= c2 && c2 <= 0xFE && 0xA1 <= c1 && c1 <= 0xFE) {
2056  /* encoding is eucJP-ms, so invert to Unicode Private User Area */
2057  c1 = nkf_char_unicode_new((c2 - 0xF5) * 94 + c1 - 0xA1 + 0xE000);
2058  c2 = 0;
2059  } else {
2060  c1 &= 0x7f;
2061  c2 &= 0x7f;
2062 #ifdef SHIFTJIS_CP932
2063  if (cp51932_f && 0x79 <= c2 && c2 <= 0x7c){
2064  nkf_char s2, s1;
2065  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2066  s2e_conv(s2, s1, &c2, &c1);
2067  if (c2 < 0x100){
2068  c1 &= 0x7f;
2069  c2 &= 0x7f;
2070  }
2071  }
2072  }
2073 #endif /* SHIFTJIS_CP932 */
2074  }
2075  }
2076  (*oconv)(c2, c1);
2077  return 0;
2078 }
2079 
2080 static nkf_char
2082 {
2083  if (c2 == JIS_X_0201_1976_K || (0xA1 <= c2 && c2 <= 0xDF)) {
2084  if (iso2022jp_f && !x0201_f) {
2085  c2 = GETA1; c1 = GETA2;
2086  } else {
2087  c1 &= 0x7f;
2088  }
2089  } else if ((c2 == EOF) || (c2 == 0) || c2 < SP) {
2090  /* NOP */
2091  } else if (!x0213_f && 0xF0 <= c2 && c2 <= 0xF9 && 0x40 <= c1 && c1 <= 0xFC) {
2092  /* CP932 UDC */
2093  if(c1 == 0x7F) return 0;
2094  c1 = nkf_char_unicode_new((c2 - 0xF0) * 188 + (c1 - 0x40 - (0x7E < c1)) + 0xE000);
2095  c2 = 0;
2096  } else {
2097  nkf_char ret = s2e_conv(c2, c1, &c2, &c1);
2098  if (ret) return ret;
2099  }
2100  (*oconv)(c2, c1);
2101  return 0;
2102 }
2103 
2104 static nkf_char
2106 {
2107  nkf_char ret = 0, c4 = 0;
2108  static const char w_iconv_utf8_1st_byte[] =
2109  { /* 0xC0 - 0xFF */
2110  20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2111  21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
2112  30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 32, 33, 33,
2113  40, 41, 41, 41, 42, 43, 43, 43, 50, 50, 50, 50, 60, 60, 70, 70};
2114 
2115  if (c3 > 0xFF) {
2116  c4 = c3 & 0xFF;
2117  c3 >>= 8;
2118  }
2119 
2120  if (c1 < 0 || 0xff < c1) {
2121  }else if (c1 == 0) { /* 0 : 1 byte*/
2122  c3 = 0;
2123  } else if ((c1 & 0xC0) == 0x80) { /* 0x80-0xbf : trail byte */
2124  return 0;
2125  } else{
2126  switch (w_iconv_utf8_1st_byte[c1 - 0xC0]) {
2127  case 21:
2128  if (c2 < 0x80 || 0xBF < c2) return 0;
2129  break;
2130  case 30:
2131  if (c3 == 0) return -1;
2132  if (c2 < 0xA0 || 0xBF < c2 || (c3 & 0xC0) != 0x80)
2133  return 0;
2134  break;
2135  case 31:
2136  case 33:
2137  if (c3 == 0) return -1;
2138  if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80)
2139  return 0;
2140  break;
2141  case 32:
2142  if (c3 == 0) return -1;
2143  if (c2 < 0x80 || 0x9F < c2 || (c3 & 0xC0) != 0x80)
2144  return 0;
2145  break;
2146  case 40:
2147  if (c3 == 0) return -2;
2148  if (c2 < 0x90 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2149  return 0;
2150  break;
2151  case 41:
2152  if (c3 == 0) return -2;
2153  if (c2 < 0x80 || 0xBF < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2154  return 0;
2155  break;
2156  case 42:
2157  if (c3 == 0) return -2;
2158  if (c2 < 0x80 || 0x8F < c2 || (c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
2159  return 0;
2160  break;
2161  default:
2162  return 0;
2163  break;
2164  }
2165  }
2166  if (c1 == 0 || c1 == EOF){
2167  } else if ((c1 & 0xf8) == 0xf0) { /* 4 bytes */
2168  c2 = nkf_char_unicode_new(nkf_utf8_to_unicode(c1, c2, c3, c4));
2169  c1 = 0;
2170  } else {
2171  ret = w2e_conv(c1, c2, c3, &c1, &c2);
2172  }
2173  if (ret == 0){
2174  (*oconv)(c1, c2);
2175  }
2176  return ret;
2177 }
2178 
2179 #define NKF_ICONV_INVALID_CODE_RANGE -13
2180 static size_t
2182 {
2183  nkf_char c1, c2;
2184  int ret = 0;
2185 
2186  if (wc < 0x80) {
2187  c2 = 0;
2188  c1 = wc;
2189  }else if ((wc>>11) == 27) {
2190  /* unpaired surrogate */
2192  }else if (wc < 0xFFFF) {
2193  ret = w16e_conv(wc, &c2, &c1);
2194  if (ret) return ret;
2195  }else if (wc < 0x10FFFF) {
2196  c2 = 0;
2197  c1 = nkf_char_unicode_new(wc);
2198  } else {
2200  }
2201  (*oconv)(c2, c1);
2202  return 0;
2203 }
2204 
2205 #define NKF_ICONV_NEED_ONE_MORE_BYTE (size_t)-1
2206 #define NKF_ICONV_NEED_TWO_MORE_BYTES (size_t)-2
2207 #define UTF16_TO_UTF32(lead, trail) (((lead) << 10) + (trail) - NKF_INT32_C(0x35FDC00))
2208 static size_t
2210 {
2211  nkf_char wc;
2212 
2213  if (c1 == EOF) {
2214  (*oconv)(EOF, 0);
2215  return 0;
2216  }
2217 
2218  if (input_endian == ENDIAN_BIG) {
2219  if (0xD8 <= c1 && c1 <= 0xDB) {
2220  if (0xDC <= c3 && c3 <= 0xDF) {
2221  wc = UTF16_TO_UTF32(c1 << 8 | c2, c3 << 8 | c4);
2222  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2223  } else {
2224  wc = c1 << 8 | c2;
2225  }
2226  } else {
2227  if (0xD8 <= c2 && c2 <= 0xDB) {
2228  if (0xDC <= c4 && c4 <= 0xDF) {
2229  wc = UTF16_TO_UTF32(c2 << 8 | c1, c4 << 8 | c3);
2230  } else return NKF_ICONV_NEED_TWO_MORE_BYTES;
2231  } else {
2232  wc = c2 << 8 | c1;
2233  }
2234  }
2235 
2236  return (*unicode_iconv)(wc);
2237 }
2238 
2239 static nkf_char
2241 {
2242  (*oconv)(c2, c1);
2243  return 16; /* different from w_iconv32 */
2244 }
2245 
2246 static nkf_char
2248 {
2249  (*oconv)(c2, c1);
2250  return 32; /* different from w_iconv16 */
2251 }
2252 
2253 static size_t
2255 {
2256  nkf_char wc;
2257 
2258  if (c1 == EOF) {
2259  (*oconv)(EOF, 0);
2260  return 0;
2261  }
2262 
2263  switch(input_endian){
2264  case ENDIAN_BIG:
2265  wc = c2 << 16 | c3 << 8 | c4;
2266  break;
2267  case ENDIAN_LITTLE:
2268  wc = c3 << 16 | c2 << 8 | c1;
2269  break;
2270  case ENDIAN_2143:
2271  wc = c1 << 16 | c4 << 8 | c3;
2272  break;
2273  case ENDIAN_3412:
2274  wc = c4 << 16 | c1 << 8 | c2;
2275  break;
2276  default:
2278  }
2279 
2280  return (*unicode_iconv)(wc);
2281 }
2282 #endif
2283 
2284 #define output_ascii_escape_sequence(mode) do { \
2285  if (output_mode != ASCII && output_mode != ISO_8859_1) { \
2286  (*o_putc)(ESC); \
2287  (*o_putc)('('); \
2288  (*o_putc)(ascii_intro); \
2289  output_mode = mode; \
2290  } \
2291  } while (0)
2292 
2293 static void
2295 {
2296  if (output_mode == mode)
2297  return;
2298  switch(mode) {
2299  case ISO_8859_1:
2300  (*o_putc)(ESC);
2301  (*o_putc)('.');
2302  (*o_putc)('A');
2303  break;
2304  case JIS_X_0201_1976_K:
2305  (*o_putc)(ESC);
2306  (*o_putc)('(');
2307  (*o_putc)('I');
2308  break;
2309  case JIS_X_0208:
2310  (*o_putc)(ESC);
2311  (*o_putc)('$');
2312  (*o_putc)(kanji_intro);
2313  break;
2314  case JIS_X_0212:
2315  (*o_putc)(ESC);
2316  (*o_putc)('$');
2317  (*o_putc)('(');
2318  (*o_putc)('D');
2319  break;
2320  case JIS_X_0213_1:
2321  (*o_putc)(ESC);
2322  (*o_putc)('$');
2323  (*o_putc)('(');
2324  (*o_putc)('Q');
2325  break;
2326  case JIS_X_0213_2:
2327  (*o_putc)(ESC);
2328  (*o_putc)('$');
2329  (*o_putc)('(');
2330  (*o_putc)('P');
2331  break;
2332  }
2333  output_mode = mode;
2334 }
2335 
2336 static void
2338 {
2339 #ifdef NUMCHAR_OPTION
2340  if (c2 == 0 && nkf_char_unicode_p(c1)){
2341  w16e_conv(c1, &c2, &c1);
2342  if (c2 == 0 && nkf_char_unicode_p(c1)){
2343  c2 = c1 & VALUE_MASK;
2344  if (ms_ucs_map_f && 0xE000 <= c2 && c2 <= 0xE757) {
2345  /* CP5022x UDC */
2346  c1 &= 0xFFF;
2347  c2 = 0x7F + c1 / 94;
2348  c1 = 0x21 + c1 % 94;
2349  } else {
2350  if (encode_fallback) (*encode_fallback)(c1);
2351  return;
2352  }
2353  }
2354  }
2355 #endif
2356  if (c2 == 0) {
2358  (*o_putc)(c1);
2359  }
2360  else if (c2 == EOF) {
2362  (*o_putc)(EOF);
2363  }
2364  else if (c2 == ISO_8859_1) {
2366  (*o_putc)(c1|0x80);
2367  }
2368  else if (c2 == JIS_X_0201_1976_K) {
2370  (*o_putc)(c1);
2371 #ifdef X0212_ENABLE
2372  } else if (is_eucg3(c2)){
2374  (*o_putc)(c2 & 0x7f);
2375  (*o_putc)(c1);
2376 #endif
2377  } else {
2378  if(ms_ucs_map_f
2379  ? c2<0x20 || 0x92<c2 || c1<0x20 || 0x7e<c1
2380  : c2<0x20 || 0x7e<c2 || c1<0x20 || 0x7e<c1) return;
2382  (*o_putc)(c2);
2383  (*o_putc)(c1);
2384  }
2385 }
2386 
2387 static void
2389 {
2390  if (c2 == 0 && nkf_char_unicode_p(c1)){
2391  w16e_conv(c1, &c2, &c1);
2392  if (c2 == 0 && nkf_char_unicode_p(c1)){
2393  c2 = c1 & VALUE_MASK;
2394  if (x0212_f && 0xE000 <= c2 && c2 <= 0xE757) {
2395  /* eucJP-ms UDC */
2396  c1 &= 0xFFF;
2397  c2 = c1 / 94;
2398  c2 += c2 < 10 ? 0x75 : 0x8FEB;
2399  c1 = 0x21 + c1 % 94;
2400  if (is_eucg3(c2)){
2401  (*o_putc)(0x8f);
2402  (*o_putc)((c2 & 0x7f) | 0x080);
2403  (*o_putc)(c1 | 0x080);
2404  }else{
2405  (*o_putc)((c2 & 0x7f) | 0x080);
2406  (*o_putc)(c1 | 0x080);
2407  }
2408  return;
2409  } else {
2410  if (encode_fallback) (*encode_fallback)(c1);
2411  return;
2412  }
2413  }
2414  }
2415 
2416  if (c2 == EOF) {
2417  (*o_putc)(EOF);
2418  } else if (c2 == 0) {
2419  output_mode = ASCII;
2420  (*o_putc)(c1);
2421  } else if (c2 == JIS_X_0201_1976_K) {
2422  output_mode = EUC_JP;
2423  (*o_putc)(SS2); (*o_putc)(c1|0x80);
2424  } else if (c2 == ISO_8859_1) {
2426  (*o_putc)(c1 | 0x080);
2427 #ifdef X0212_ENABLE
2428  } else if (is_eucg3(c2)){
2429  output_mode = EUC_JP;
2430 #ifdef SHIFTJIS_CP932
2431  if (!cp932inv_f){
2432  nkf_char s2, s1;
2433  if (e2s_conv(c2, c1, &s2, &s1) == 0){
2434  s2e_conv(s2, s1, &c2, &c1);
2435  }
2436  }
2437 #endif
2438  if (c2 == 0) {
2439  output_mode = ASCII;
2440  (*o_putc)(c1);
2441  }else if (is_eucg3(c2)){
2442  if (x0212_f){
2443  (*o_putc)(0x8f);
2444  (*o_putc)((c2 & 0x7f) | 0x080);
2445  (*o_putc)(c1 | 0x080);
2446  }
2447  }else{
2448  (*o_putc)((c2 & 0x7f) | 0x080);
2449  (*o_putc)(c1 | 0x080);
2450  }
2451 #endif
2452  } else {
2453  if (!nkf_isgraph(c1) || !nkf_isgraph(c2)) {
2454  set_iconv(FALSE, 0);
2455  return; /* too late to rescue this char */
2456  }
2457  output_mode = EUC_JP;
2458  (*o_putc)(c2 | 0x080);
2459  (*o_putc)(c1 | 0x080);
2460  }
2461 }
2462 
2463 static void
2465 {
2466 #ifdef NUMCHAR_OPTION
2467  if (c2 == 0 && nkf_char_unicode_p(c1)){
2468  w16e_conv(c1, &c2, &c1);
2469  if (c2 == 0 && nkf_char_unicode_p(c1)){
2470  c2 = c1 & VALUE_MASK;
2471  if (!x0213_f && 0xE000 <= c2 && c2 <= 0xE757) {
2472  /* CP932 UDC */
2473  c1 &= 0xFFF;
2474  c2 = c1 / 188 + (cp932inv_f ? 0xF0 : 0xEB);
2475  c1 = c1 % 188;
2476  c1 += 0x40 + (c1 > 0x3e);
2477  (*o_putc)(c2);
2478  (*o_putc)(c1);
2479  return;
2480  } else {
2481  if(encode_fallback)(*encode_fallback)(c1);
2482  return;
2483  }
2484  }
2485  }
2486 #endif
2487  if (c2 == EOF) {
2488  (*o_putc)(EOF);
2489  return;
2490  } else if (c2 == 0) {
2491  output_mode = ASCII;
2492  (*o_putc)(c1);
2493  } else if (c2 == JIS_X_0201_1976_K) {
2495  (*o_putc)(c1|0x80);
2496  } else if (c2 == ISO_8859_1) {
2498  (*o_putc)(c1 | 0x080);
2499 #ifdef X0212_ENABLE
2500  } else if (is_eucg3(c2)){
2502  if (e2s_conv(c2, c1, &c2, &c1) == 0){
2503  (*o_putc)(c2);
2504  (*o_putc)(c1);
2505  }
2506 #endif
2507  } else {
2508  if (!nkf_isprint(c1) || !nkf_isprint(c2)) {
2509  set_iconv(FALSE, 0);
2510  return; /* too late to rescue this char */
2511  }
2513  e2s_conv(c2, c1, &c2, &c1);
2514 
2515 #ifdef SHIFTJIS_CP932
2516  if (cp932inv_f
2517  && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){
2518  nkf_char c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40];
2519  if (c){
2520  c2 = c >> 8;
2521  c1 = c & 0xff;
2522  }
2523  }
2524 #endif /* SHIFTJIS_CP932 */
2525 
2526  (*o_putc)(c2);
2527  if (prefix_table[(unsigned char)c1]){
2528  (*o_putc)(prefix_table[(unsigned char)c1]);
2529  }
2530  (*o_putc)(c1);
2531  }
2532 }
2533 
2534 #ifdef UTF8_OUTPUT_ENABLE
2535 static void
2537 {
2538  nkf_char c3, c4;
2539  nkf_char val;
2540 
2541  if (output_bom_f) {
2542  output_bom_f = FALSE;
2543  (*o_putc)('\357');
2544  (*o_putc)('\273');
2545  (*o_putc)('\277');
2546  }
2547 
2548  if (c2 == EOF) {
2549  (*o_putc)(EOF);
2550  return;
2551  }
2552 
2553  if (c2 == 0 && nkf_char_unicode_p(c1)){
2554  val = c1 & VALUE_MASK;
2555  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2556  (*o_putc)(c1);
2557  if (c2) (*o_putc)(c2);
2558  if (c3) (*o_putc)(c3);
2559  if (c4) (*o_putc)(c4);
2560  return;
2561  }
2562 
2563  if (c2 == 0) {
2564  (*o_putc)(c1);
2565  } else {
2566  val = e2w_conv(c2, c1);
2567  if (val){
2568  nkf_unicode_to_utf8(val, &c1, &c2, &c3, &c4);
2569  (*o_putc)(c1);
2570  if (c2) (*o_putc)(c2);
2571  if (c3) (*o_putc)(c3);
2572  if (c4) (*o_putc)(c4);
2573  }
2574  }
2575 }
2576 
2577 static void
2579 {
2580  if (output_bom_f) {
2581  output_bom_f = FALSE;
2582  if (output_endian == ENDIAN_LITTLE){
2583  (*o_putc)(0xFF);
2584  (*o_putc)(0xFE);
2585  }else{
2586  (*o_putc)(0xFE);
2587  (*o_putc)(0xFF);
2588  }
2589  }
2590 
2591  if (c2 == EOF) {
2592  (*o_putc)(EOF);
2593  return;
2594  }
2595 
2596  if (c2 == 0 && nkf_char_unicode_p(c1)) {
2597  if (nkf_char_unicode_bmp_p(c1)) {
2598  c2 = (c1 >> 8) & 0xff;
2599  c1 &= 0xff;
2600  } else {
2601  c1 &= VALUE_MASK;
2602  if (c1 <= UNICODE_MAX) {
2603  c2 = (c1 >> 10) + NKF_INT32_C(0xD7C0); /* high surrogate */
2604  c1 = (c1 & 0x3FF) + NKF_INT32_C(0xDC00); /* low surrogate */
2605  if (output_endian == ENDIAN_LITTLE){
2606  (*o_putc)(c2 & 0xff);
2607  (*o_putc)((c2 >> 8) & 0xff);
2608  (*o_putc)(c1 & 0xff);
2609  (*o_putc)((c1 >> 8) & 0xff);
2610  }else{
2611  (*o_putc)((c2 >> 8) & 0xff);
2612  (*o_putc)(c2 & 0xff);
2613  (*o_putc)((c1 >> 8) & 0xff);
2614  (*o_putc)(c1 & 0xff);
2615  }
2616  }
2617  return;
2618  }
2619  } else if (c2) {
2620  nkf_char val = e2w_conv(c2, c1);
2621  c2 = (val >> 8) & 0xff;
2622  c1 = val & 0xff;
2623  if (!val) return;
2624  }
2625 
2626  if (output_endian == ENDIAN_LITTLE){
2627  (*o_putc)(c1);
2628  (*o_putc)(c2);
2629  }else{
2630  (*o_putc)(c2);
2631  (*o_putc)(c1);
2632  }
2633 }
2634 
2635 static void
2637 {
2638  if (output_bom_f) {
2639  output_bom_f = FALSE;
2640  if (output_endian == ENDIAN_LITTLE){
2641  (*o_putc)(0xFF);
2642  (*o_putc)(0xFE);
2643  (*o_putc)(0);
2644  (*o_putc)(0);
2645  }else{
2646  (*o_putc)(0);
2647  (*o_putc)(0);
2648  (*o_putc)(0xFE);
2649  (*o_putc)(0xFF);
2650  }
2651  }
2652 
2653  if (c2 == EOF) {
2654  (*o_putc)(EOF);
2655  return;
2656  }
2657 
2658  if (c2 == ISO_8859_1) {
2659  c1 |= 0x80;
2660  } else if (c2 == 0 && nkf_char_unicode_p(c1)) {
2661  c1 &= VALUE_MASK;
2662  } else if (c2) {
2663  c1 = e2w_conv(c2, c1);
2664  if (!c1) return;
2665  }
2666  if (output_endian == ENDIAN_LITTLE){
2667  (*o_putc)( c1 & 0xFF);
2668  (*o_putc)((c1 >> 8) & 0xFF);
2669  (*o_putc)((c1 >> 16) & 0xFF);
2670  (*o_putc)(0);
2671  }else{
2672  (*o_putc)(0);
2673  (*o_putc)((c1 >> 16) & 0xFF);
2674  (*o_putc)((c1 >> 8) & 0xFF);
2675  (*o_putc)( c1 & 0xFF);
2676  }
2677 }
2678 #endif
2679 
2680 #define SCORE_L2 (1) /* Kanji Level 2 */
2681 #define SCORE_KANA (SCORE_L2 << 1) /* Halfwidth Katakana */
2682 #define SCORE_DEPEND (SCORE_KANA << 1) /* MD Characters */
2683 #define SCORE_CP932 (SCORE_DEPEND << 1) /* IBM extended characters */
2684 #define SCORE_X0212 (SCORE_CP932 << 1) /* JIS X 0212 */
2685 #define SCORE_NO_EXIST (SCORE_X0212 << 1) /* Undefined Characters */
2686 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME selected */
2687 #define SCORE_ERROR (SCORE_iMIME << 1) /* Error */
2688 
2689 #define SCORE_INIT (SCORE_iMIME)
2690 
2691 static const nkf_char score_table_A0[] = {
2692  0, 0, 0, 0,
2693  0, 0, 0, 0,
2696 };
2697 
2698 static const nkf_char score_table_F0[] = {
2703 };
2704 
2705 static void
2707 {
2708  if (ptr){
2709  ptr->score |= score;
2710  }
2711 }
2712 
2713 static void
2715 {
2716  if (ptr){
2717  ptr->score &= ~score;
2718  }
2719 }
2720 
2721 static void
2723 {
2724  nkf_char c2 = ptr->buf[0];
2725 #ifdef UTF8_OUTPUT_ENABLE
2726  nkf_char c1 = ptr->buf[1];
2727 #endif
2728  if (c2 < 0){
2730  }else if (c2 == SS2){
2731  set_code_score(ptr, SCORE_KANA);
2732  }else if (c2 == 0x8f){
2734 #ifdef UTF8_OUTPUT_ENABLE
2735  }else if (!e2w_conv(c2, c1)){
2737 #endif
2738  }else if ((c2 & 0x70) == 0x20){
2739  set_code_score(ptr, score_table_A0[c2 & 0x0f]);
2740  }else if ((c2 & 0x70) == 0x70){
2741  set_code_score(ptr, score_table_F0[c2 & 0x0f]);
2742  }else if ((c2 & 0x70) >= 0x50){
2743  set_code_score(ptr, SCORE_L2);
2744  }
2745 }
2746 
2747 static void
2749 {
2750  ptr->stat = -1;
2751  ptr->buf[0] = -1;
2752  code_score(ptr);
2753  if (iconv == ptr->iconv_func) set_iconv(FALSE, 0);
2754 }
2755 
2756 static void
2758 {
2759  ptr->buf[ptr->index++] = c;
2760 }
2761 
2762 static void
2764 {
2765  ptr->stat = 0;
2766  ptr->index = 0;
2767 }
2768 
2769 static void
2771 {
2772  status_clear(ptr);
2773  ptr->score = SCORE_INIT;
2774 }
2775 
2776 static void
2778 {
2779  status_reset(ptr);
2780  ptr->_file_stat = 0;
2781 }
2782 
2783 static void
2785 {
2786  if (c <= DEL && estab_f){
2787  status_reset(ptr);
2788  }
2789 }
2790 
2791 static void
2793 {
2794  switch(ptr->stat){
2795  case -1:
2796  status_check(ptr, c);
2797  break;
2798  case 0:
2799  if (c <= DEL){
2800  break;
2801  }else if (nkf_char_unicode_p(c)){
2802  break;
2803  }else if (0xa1 <= c && c <= 0xdf){
2804  status_push_ch(ptr, SS2);
2805  status_push_ch(ptr, c);
2806  code_score(ptr);
2807  status_clear(ptr);
2808  }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xea)){
2809  ptr->stat = 1;
2810  status_push_ch(ptr, c);
2811  }else if (0xed <= c && c <= 0xee){
2812  ptr->stat = 3;
2813  status_push_ch(ptr, c);
2814 #ifdef SHIFTJIS_CP932
2815  }else if (is_ibmext_in_sjis(c)){
2816  ptr->stat = 2;
2817  status_push_ch(ptr, c);
2818 #endif /* SHIFTJIS_CP932 */
2819 #ifdef X0212_ENABLE
2820  }else if (0xf0 <= c && c <= 0xfc){
2821  ptr->stat = 1;
2822  status_push_ch(ptr, c);
2823 #endif /* X0212_ENABLE */
2824  }else{
2825  status_disable(ptr);
2826  }
2827  break;
2828  case 1:
2829  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2830  status_push_ch(ptr, c);
2831  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2832  code_score(ptr);
2833  status_clear(ptr);
2834  }else{
2835  status_disable(ptr);
2836  }
2837  break;
2838  case 2:
2839 #ifdef SHIFTJIS_CP932
2840  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)) {
2841  status_push_ch(ptr, c);
2842  if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0) {
2844  status_clear(ptr);
2845  break;
2846  }
2847  }
2848 #endif /* SHIFTJIS_CP932 */
2849  status_disable(ptr);
2850  break;
2851  case 3:
2852  if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){
2853  status_push_ch(ptr, c);
2854  s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]);
2856  status_clear(ptr);
2857  }else{
2858  status_disable(ptr);
2859  }
2860  break;
2861  }
2862 }
2863 
2864 static void
2866 {
2867  switch (ptr->stat){
2868  case -1:
2869  status_check(ptr, c);
2870  break;
2871  case 0:
2872  if (c <= DEL){
2873  break;
2874  }else if (nkf_char_unicode_p(c)){
2875  break;
2876  }else if (SS2 == c || (0xa1 <= c && c <= 0xfe)){
2877  ptr->stat = 1;
2878  status_push_ch(ptr, c);
2879 #ifdef X0212_ENABLE
2880  }else if (0x8f == c){
2881  ptr->stat = 2;
2882  status_push_ch(ptr, c);
2883 #endif /* X0212_ENABLE */
2884  }else{
2885  status_disable(ptr);
2886  }
2887  break;
2888  case 1:
2889  if (0xa1 <= c && c <= 0xfe){
2890  status_push_ch(ptr, c);
2891  code_score(ptr);
2892  status_clear(ptr);
2893  }else{
2894  status_disable(ptr);
2895  }
2896  break;
2897 #ifdef X0212_ENABLE
2898  case 2:
2899  if (0xa1 <= c && c <= 0xfe){
2900  ptr->stat = 1;
2901  status_push_ch(ptr, c);
2902  }else{
2903  status_disable(ptr);
2904  }
2905 #endif /* X0212_ENABLE */
2906  }
2907 }
2908 
2909 #ifdef UTF8_INPUT_ENABLE
2910 static void
2912 {
2913  switch (ptr->stat){
2914  case -1:
2915  status_check(ptr, c);
2916  break;
2917  case 0:
2918  if (c <= DEL){
2919  break;
2920  }else if (nkf_char_unicode_p(c)){
2921  break;
2922  }else if (0xc0 <= c && c <= 0xdf){
2923  ptr->stat = 1;
2924  status_push_ch(ptr, c);
2925  }else if (0xe0 <= c && c <= 0xef){
2926  ptr->stat = 2;
2927  status_push_ch(ptr, c);
2928  }else if (0xf0 <= c && c <= 0xf4){
2929  ptr->stat = 3;
2930  status_push_ch(ptr, c);
2931  }else{
2932  status_disable(ptr);
2933  }
2934  break;
2935  case 1:
2936  case 2:
2937  if (0x80 <= c && c <= 0xbf){
2938  status_push_ch(ptr, c);
2939  if (ptr->index > ptr->stat){
2940  int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb
2941  && ptr->buf[2] == 0xbf);
2942  w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2],
2943  &ptr->buf[0], &ptr->buf[1]);
2944  if (!bom){
2945  code_score(ptr);
2946  }
2947  status_clear(ptr);
2948  }
2949  }else{
2950  status_disable(ptr);
2951  }
2952  break;
2953  case 3:
2954  if (0x80 <= c && c <= 0xbf){
2955  if (ptr->index < ptr->stat){
2956  status_push_ch(ptr, c);
2957  } else {
2958  status_clear(ptr);
2959  }
2960  }else{
2961  status_disable(ptr);
2962  }
2963  break;
2964  }
2965 }
2966 #endif
2967 
2968 static void
2970 {
2971  int action_flag = 1;
2972  struct input_code *result = 0;
2973  struct input_code *p = input_code_list;
2974  while (p->name){
2975  if (!p->status_func) {
2976  ++p;
2977  continue;
2978  }
2979  if (!p->status_func)
2980  continue;
2981  (p->status_func)(p, c);
2982  if (p->stat > 0){
2983  action_flag = 0;
2984  }else if(p->stat == 0){
2985  if (result){
2986  action_flag = 0;
2987  }else{
2988  result = p;
2989  }
2990  }
2991  ++p;
2992  }
2993 
2994  if (action_flag){
2995  if (result && !estab_f){
2996  set_iconv(TRUE, result->iconv_func);
2997  }else if (c <= DEL){
2998  struct input_code *ptr = input_code_list;
2999  while (ptr->name){
3000  status_reset(ptr);
3001  ++ptr;
3002  }
3003  }
3004  }
3005 }
3006 
3007 typedef struct {
3013 } nkf_state_t;
3014 
3016 
3017 #define STD_GC_BUFSIZE (256)
3018 
3019 static void
3021 {
3022  if (nkf_state) {
3023  nkf_buf_clear(nkf_state->std_gc_buf);
3024  nkf_buf_clear(nkf_state->broken_buf);
3025  nkf_buf_clear(nkf_state->nfc_buf);
3026  }
3027  else {
3028  nkf_state = nkf_xmalloc(sizeof(nkf_state_t));
3029  nkf_state->std_gc_buf = nkf_buf_new(STD_GC_BUFSIZE);
3030  nkf_state->broken_buf = nkf_buf_new(3);
3031  nkf_state->nfc_buf = nkf_buf_new(9);
3032  }
3033  nkf_state->broken_state = 0;
3034  nkf_state->mimeout_state = 0;
3035 }
3036 
3037 #ifndef WIN32DLL
3038 static nkf_char
3040 {
3041  if (!nkf_buf_empty_p(nkf_state->std_gc_buf)){
3042  return nkf_buf_pop(nkf_state->std_gc_buf);
3043  }
3044  return getc(f);
3045 }
3046 #endif /*WIN32DLL*/
3047 
3048 static nkf_char
3050 {
3051  nkf_buf_push(nkf_state->std_gc_buf, c);
3052  return c;
3053 }
3054 
3055 #ifndef WIN32DLL
3056 static void
3058 {
3059  if(c!=EOF)
3060  putchar(c);
3061 }
3062 #endif /*WIN32DLL*/
3063 
3065 static int hold_count = 0;
3066 static nkf_char
3068 {
3069  if (hold_count >= HOLD_SIZE*2)
3070  return (EOF);
3071  hold_buf[hold_count++] = c2;
3072  return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count);
3073 }
3074 
3075 static int
3077 {
3078  int ret;
3079  int hold_index;
3080  nkf_char c3, c4;
3081 
3086  hold_count = 0;
3087  push_hold_buf(c1);
3088  push_hold_buf(c2);
3089 
3090  while ((c2 = (*i_getc)(f)) != EOF) {
3091  if (c2 == ESC){
3092  (*i_ungetc)(c2,f);
3093  break;
3094  }
3095  code_status(c2);
3096  if (push_hold_buf(c2) == EOF || estab_f) {
3097  break;
3098  }
3099  }
3100 
3101  if (!estab_f) {
3102  struct input_code *p = input_code_list;
3103  struct input_code *result = p;
3104  if (c2 == EOF) {
3105  code_status(c2);
3106  }
3107  while (p->name) {
3108  if (p->status_func && p->score < result->score) {
3109  result = p;
3110  }
3111  p++;
3112  }
3113  set_iconv(TRUE, result->iconv_func);
3114  }
3115 
3116 
3126  ret = c2;
3127  hold_index = 0;
3128  while (hold_index < hold_count){
3129  c1 = hold_buf[hold_index++];
3130  if (nkf_char_unicode_p(c1)) {
3131  (*oconv)(0, c1);
3132  continue;
3133  }
3134  else if (c1 <= DEL){
3135  (*iconv)(0, c1, 0);
3136  continue;
3137  }else if (iconv == s_iconv && 0xa1 <= c1 && c1 <= 0xdf){
3138  (*iconv)(JIS_X_0201_1976_K, c1, 0);
3139  continue;
3140  }
3141  if (hold_index < hold_count){
3142  c2 = hold_buf[hold_index++];
3143  }else{
3144  c2 = (*i_getc)(f);
3145  if (c2 == EOF){
3146  c4 = EOF;
3147  break;
3148  }
3149  code_status(c2);
3150  }
3151  c3 = 0;
3152  switch ((*iconv)(c1, c2, 0)) { /* can be EUC/SJIS/UTF-8 */
3153  case -2:
3154  /* 4 bytes UTF-8 */
3155  if (hold_index < hold_count){
3156  c3 = hold_buf[hold_index++];
3157  } else if ((c3 = (*i_getc)(f)) == EOF) {
3158  ret = EOF;
3159  break;
3160  }
3161  code_status(c3);
3162  if (hold_index < hold_count){
3163  c4 = hold_buf[hold_index++];
3164  } else if ((c4 = (*i_getc)(f)) == EOF) {
3165  c3 = ret = EOF;
3166  break;
3167  }
3168  code_status(c4);
3169  (*iconv)(c1, c2, (c3<<8)|c4);
3170  break;
3171  case -1:
3172  /* 3 bytes EUC or UTF-8 */
3173  if (hold_index < hold_count){
3174  c3 = hold_buf[hold_index++];
3175  } else if ((c3 = (*i_getc)(f)) == EOF) {
3176  ret = EOF;
3177  break;
3178  } else {
3179  code_status(c3);
3180  }
3181  (*iconv)(c1, c2, c3);
3182  break;
3183  }
3184  if (c3 == EOF) break;
3185  }
3186  return ret;
3187 }
3188 
3189 /*
3190  * Check and Ignore BOM
3191  */
3192 static void
3194 {
3195  int c2;
3196  switch(c2 = (*i_getc)(f)){
3197  case 0x00:
3198  if((c2 = (*i_getc)(f)) == 0x00){
3199  if((c2 = (*i_getc)(f)) == 0xFE){
3200  if((c2 = (*i_getc)(f)) == 0xFF){
3201  if(!input_encoding){
3203  }
3204  if (iconv == w_iconv32) {
3206  return;
3207  }
3208  (*i_ungetc)(0xFF,f);
3209  }else (*i_ungetc)(c2,f);
3210  (*i_ungetc)(0xFE,f);
3211  }else if(c2 == 0xFF){
3212  if((c2 = (*i_getc)(f)) == 0xFE){
3213  if(!input_encoding){
3215  }
3216  if (iconv == w_iconv32) {
3218  return;
3219  }
3220  (*i_ungetc)(0xFF,f);
3221  }else (*i_ungetc)(c2,f);
3222  (*i_ungetc)(0xFF,f);
3223  }else (*i_ungetc)(c2,f);
3224  (*i_ungetc)(0x00,f);
3225  }else (*i_ungetc)(c2,f);
3226  (*i_ungetc)(0x00,f);
3227  break;
3228  case 0xEF:
3229  if((c2 = (*i_getc)(f)) == 0xBB){
3230  if((c2 = (*i_getc)(f)) == 0xBF){
3231  if(!input_encoding){
3233  }
3234  if (iconv == w_iconv) {
3235  return;
3236  }
3237  (*i_ungetc)(0xBF,f);
3238  }else (*i_ungetc)(c2,f);
3239  (*i_ungetc)(0xBB,f);
3240  }else (*i_ungetc)(c2,f);
3241  (*i_ungetc)(0xEF,f);
3242  break;
3243  case 0xFE:
3244  if((c2 = (*i_getc)(f)) == 0xFF){
3245  if((c2 = (*i_getc)(f)) == 0x00){
3246  if((c2 = (*i_getc)(f)) == 0x00){
3247  if(!input_encoding){
3249  }
3250  if (iconv == w_iconv32) {
3252  return;
3253  }
3254  (*i_ungetc)(0x00,f);
3255  }else (*i_ungetc)(c2,f);
3256  (*i_ungetc)(0x00,f);
3257  }else (*i_ungetc)(c2,f);
3258  if(!input_encoding){
3260  }
3261  if (iconv == w_iconv16) {
3263  return;
3264  }
3265  (*i_ungetc)(0xFF,f);
3266  }else (*i_ungetc)(c2,f);
3267  (*i_ungetc)(0xFE,f);
3268  break;
3269  case 0xFF:
3270  if((c2 = (*i_getc)(f)) == 0xFE){
3271  if((c2 = (*i_getc)(f)) == 0x00){
3272  if((c2 = (*i_getc)(f)) == 0x00){
3273  if(!input_encoding){
3275  }
3276  if (iconv == w_iconv32) {
3278  return;
3279  }
3280  (*i_ungetc)(0x00,f);
3281  }else (*i_ungetc)(c2,f);
3282  (*i_ungetc)(0x00,f);
3283  }else (*i_ungetc)(c2,f);
3284  if(!input_encoding){
3286  }
3287  if (iconv == w_iconv16) {
3289  return;
3290  }
3291  (*i_ungetc)(0xFE,f);
3292  }else (*i_ungetc)(c2,f);
3293  (*i_ungetc)(0xFF,f);
3294  break;
3295  default:
3296  (*i_ungetc)(c2,f);
3297  break;
3298  }
3299 }
3300 
3301 static nkf_char
3303 {
3304  nkf_char c, c1;
3305 
3306  if (!nkf_buf_empty_p(nkf_state->broken_buf)) {
3307  return nkf_buf_pop(nkf_state->broken_buf);
3308  }
3309  c = (*i_bgetc)(f);
3310  if (c=='$' && nkf_state->broken_state != ESC
3311  && (input_mode == ASCII || input_mode == JIS_X_0201_1976_K)) {
3312  c1= (*i_bgetc)(f);
3313  nkf_state->broken_state = 0;
3314  if (c1=='@'|| c1=='B') {
3315  nkf_buf_push(nkf_state->broken_buf, c1);
3316  nkf_buf_push(nkf_state->broken_buf, c);
3317  return ESC;
3318  } else {
3319  (*i_bungetc)(c1,f);
3320  return c;
3321  }
3322  } else if (c=='(' && nkf_state->broken_state != ESC
3324  c1= (*i_bgetc)(f);
3325  nkf_state->broken_state = 0;
3326  if (c1=='J'|| c1=='B') {
3327  nkf_buf_push(nkf_state->broken_buf, c1);
3328  nkf_buf_push(nkf_state->broken_buf, c);
3329  return ESC;
3330  } else {
3331  (*i_bungetc)(c1,f);
3332  return c;
3333  }
3334  } else {
3335  nkf_state->broken_state = c;
3336  return c;
3337  }
3338 }
3339 
3340 static nkf_char
3342 {
3343  if (nkf_buf_length(nkf_state->broken_buf) < 2)
3344  nkf_buf_push(nkf_state->broken_buf, c);
3345  return c;
3346 }
3347 
3348 static void
3350 {
3351  if (guess_f && input_eol != EOF) {
3352  if (c2 == 0 && c1 == LF) {
3353  if (!input_eol) input_eol = prev_cr ? CRLF : LF;
3354  else if (input_eol != (prev_cr ? CRLF : LF)) input_eol = EOF;
3355  } else if (c2 == 0 && c1 == CR && input_eol == LF) input_eol = EOF;
3356  else if (!prev_cr);
3357  else if (!input_eol) input_eol = CR;
3358  else if (input_eol != CR) input_eol = EOF;
3359  }
3360  if (prev_cr || (c2 == 0 && c1 == LF)) {
3361  prev_cr = 0;
3362  if (eolmode_f != LF) (*o_eol_conv)(0, CR);
3363  if (eolmode_f != CR) (*o_eol_conv)(0, LF);
3364  }
3365  if (c2 == 0 && c1 == CR) prev_cr = CR;
3366  else if (c2 != 0 || c1 != LF) (*o_eol_conv)(c2, c1);
3367 }
3368 
3369 static void
3371 {
3372  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3373  case CRLF:
3374  (*func)(0x0D);
3375  (*func)(0x0A);
3376  break;
3377  case CR:
3378  (*func)(0x0D);
3379  break;
3380  case LF:
3381  (*func)(0x0A);
3382  break;
3383  }
3384 }
3385 
3386 static void
3388 {
3389  switch (eolmode_f ? eolmode_f : DEFAULT_NEWLINE) {
3390  case CRLF:
3391  (*func)(0, 0x0D);
3392  (*func)(0, 0x0A);
3393  break;
3394  case CR:
3395  (*func)(0, 0x0D);
3396  break;
3397  case LF:
3398  (*func)(0, 0x0A);
3399  break;
3400  }
3401 }
3402 
3403 /*
3404  Return value of fold_conv()
3405 
3406  LF add newline and output char
3407  CR add newline and output nothing
3408  SP space
3409  0 skip
3410  1 (or else) normal output
3411 
3412  fold state in prev (previous character)
3413 
3414  >0x80 Japanese (X0208/X0201)
3415  <0x80 ASCII
3416  LF new line
3417  SP space
3418 
3419  This fold algorthm does not preserve heading space in a line.
3420  This is the main difference from fmt.
3421  */
3422 
3423 #define char_size(c2,c1) (c2?2:1)
3424 
3425 static void
3427 {
3428  nkf_char prev0;
3429  nkf_char fold_state;
3430 
3431  if (c1== CR && !fold_preserve_f) {
3432  fold_state=0; /* ignore cr */
3433  }else if (c1== LF&&f_prev==CR && fold_preserve_f) {
3434  f_prev = LF;
3435  fold_state=0; /* ignore cr */
3436  } else if (c1== BS) {
3437  if (f_line>0) f_line--;
3438  fold_state = 1;
3439  } else if (c2==EOF && f_line != 0) { /* close open last line */
3440  fold_state = LF;
3441  } else if ((c1==LF && !fold_preserve_f)
3442  || ((c1==CR||(c1==LF&&f_prev!=CR))
3443  && fold_preserve_f)) {
3444  /* new line */
3445  if (fold_preserve_f) {
3446  f_prev = c1;
3447  f_line = 0;
3448  fold_state = CR;
3449  } else if ((f_prev == c1 && !fold_preserve_f)
3450  || (f_prev == LF && fold_preserve_f)
3451  ) { /* duplicate newline */
3452  if (f_line) {
3453  f_line = 0;
3454  fold_state = LF; /* output two newline */
3455  } else {
3456  f_line = 0;
3457  fold_state = 1;
3458  }
3459  } else {
3460  if (f_prev&0x80) { /* Japanese? */
3461  f_prev = c1;
3462  fold_state = 0; /* ignore given single newline */
3463  } else if (f_prev==SP) {
3464  fold_state = 0;
3465  } else {
3466  f_prev = c1;
3467  if (++f_line<=fold_len)
3468  fold_state = SP;
3469  else {
3470  f_line = 0;
3471  fold_state = CR; /* fold and output nothing */
3472  }
3473  }
3474  }
3475  } else if (c1=='\f') {
3476  f_prev = LF;
3477  f_line = 0;
3478  fold_state = LF; /* output newline and clear */
3479  } else if ((c2==0 && nkf_isblank(c1)) || (c2 == '!' && c1 == '!')) {
3480  /* X0208 kankaku or ascii space */
3481  if (f_prev == SP) {
3482  fold_state = 0; /* remove duplicate spaces */
3483  } else {
3484  f_prev = SP;
3485  if (++f_line<=fold_len)
3486  fold_state = SP; /* output ASCII space only */
3487  else {
3488  f_prev = SP; f_line = 0;
3489  fold_state = CR; /* fold and output nothing */
3490  }
3491  }
3492  } else {
3493  prev0 = f_prev; /* we still need this one... , but almost done */
3494  f_prev = c1;
3495  if (c2 || c2 == JIS_X_0201_1976_K)
3496  f_prev |= 0x80; /* this is Japanese */
3497  f_line += char_size(c2,c1);
3498  if (f_line<=fold_len) { /* normal case */
3499  fold_state = 1;
3500  } else {
3501  if (f_line>fold_len+fold_margin) { /* too many kinsoku suspension */
3502  f_line = char_size(c2,c1);
3503  fold_state = LF; /* We can't wait, do fold now */
3504  } else if (c2 == JIS_X_0201_1976_K) {
3505  /* simple kinsoku rules return 1 means no folding */
3506  if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/
3507  else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/
3508  else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/
3509  else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/
3510  else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/
3511  else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */
3512  else if (SP<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */
3513  f_line = 1;
3514  fold_state = LF;/* add one new f_line before this character */
3515  } else {
3516  f_line = 1;
3517  fold_state = LF;/* add one new f_line before this character */
3518  }
3519  } else if (c2==0) {
3520  /* kinsoku point in ASCII */
3521  if ( c1==')'|| /* { [ ( */
3522  c1==']'||
3523  c1=='}'||
3524  c1=='.'||
3525  c1==','||
3526  c1=='!'||
3527  c1=='?'||
3528  c1=='/'||
3529  c1==':'||
3530  c1==';') {
3531  fold_state = 1;
3532  /* just after special */
3533  } else if (!is_alnum(prev0)) {
3534  f_line = char_size(c2,c1);
3535  fold_state = LF;
3536  } else if ((prev0==SP) || /* ignored new f_line */
3537  (prev0==LF)|| /* ignored new f_line */
3538  (prev0&0x80)) { /* X0208 - ASCII */
3539  f_line = char_size(c2,c1);
3540  fold_state = LF;/* add one new f_line before this character */
3541  } else {
3542  fold_state = 1; /* default no fold in ASCII */
3543  }
3544  } else {
3545  if (c2=='!') {
3546  if (c1=='"') fold_state = 1; /* $B!"(B */
3547  else if (c1=='#') fold_state = 1; /* $B!#(B */
3548  else if (c1=='W') fold_state = 1; /* $B!W(B */
3549  else if (c1=='K') fold_state = 1; /* $B!K(B */
3550  else if (c1=='$') fold_state = 1; /* $B!$(B */
3551  else if (c1=='%') fold_state = 1; /* $B!%(B */
3552  else if (c1=='\'') fold_state = 1; /* $B!\(B */
3553  else if (c1=='(') fold_state = 1; /* $B!((B */
3554  else if (c1==')') fold_state = 1; /* $B!)(B */
3555  else if (c1=='*') fold_state = 1; /* $B!*(B */
3556  else if (c1=='+') fold_state = 1; /* $B!+(B */
3557  else if (c1==',') fold_state = 1; /* $B!,(B */
3558  /* default no fold in kinsoku */
3559  else {
3560  fold_state = LF;
3561  f_line = char_size(c2,c1);
3562  /* add one new f_line before this character */
3563  }
3564  } else {
3565  f_line = char_size(c2,c1);
3566  fold_state = LF;
3567  /* add one new f_line before this character */
3568  }
3569  }
3570  }
3571  }
3572  /* terminator process */
3573  switch(fold_state) {
3574  case LF:
3576  (*o_fconv)(c2,c1);
3577  break;
3578  case 0:
3579  return;
3580  case CR:
3582  break;
3583  case TAB:
3584  case SP:
3585  (*o_fconv)(0,SP);
3586  break;
3587  default:
3588  (*o_fconv)(c2,c1);
3589  }
3590 }
3591 
3593 
3594 static void
3596 {
3597 
3598  /* if (c2) c1 &= 0x7f; assertion */
3599 
3600  if (c2 == JIS_X_0201_1976_K && (c1 == 0x20 || c1 == 0x7D || c1 == 0x7E)) {
3601  (*o_zconv)(c2,c1);
3602  return;
3603  }
3604 
3605  if (x0201_f) {
3606  if (z_prev2 == JIS_X_0201_1976_K) {
3607  if (c2 == JIS_X_0201_1976_K) {
3608  if (c1 == (0xde&0x7f)) { /* $BByE@(B */
3609  z_prev2 = 0;
3610  (*o_zconv)(dv[(z_prev1-SP)*2], dv[(z_prev1-SP)*2+1]);
3611  return;
3612  } else if (c1 == (0xdf&0x7f) && ev[(z_prev1-SP)*2]) { /* $BH>ByE@(B */
3613  z_prev2 = 0;
3614  (*o_zconv)(ev[(z_prev1-SP)*2], ev[(z_prev1-SP)*2+1]);
3615  return;
3616  }
3617  }
3618  z_prev2 = 0;
3619  (*o_zconv)(cv[(z_prev1-SP)*2], cv[(z_prev1-SP)*2+1]);
3620  }
3621  if (c2 == JIS_X_0201_1976_K) {
3622  if (dv[(c1-SP)*2] || ev[(c1-SP)*2]) {
3623  /* wait for $BByE@(B or $BH>ByE@(B */
3624  z_prev1 = c1;
3625  z_prev2 = c2;
3626  return;
3627  } else {
3628  (*o_zconv)(cv[(c1-SP)*2], cv[(c1-SP)*2+1]);
3629  return;
3630  }
3631  }
3632  }
3633 
3634  if (c2 == EOF) {
3635  (*o_zconv)(c2, c1);
3636  return;
3637  }
3638 
3639  if (alpha_f&1 && c2 == 0x23) {
3640  /* JISX0208 Alphabet */
3641  c2 = 0;
3642  } else if (c2 == 0x21) {
3643  /* JISX0208 Kigou */
3644  if (0x21==c1) {
3645  if (alpha_f&2) {
3646  c2 = 0;
3647  c1 = SP;
3648  } else if (alpha_f&4) {
3649  (*o_zconv)(0, SP);
3650  (*o_zconv)(0, SP);
3651  return;
3652  }
3653  } else if (alpha_f&1 && 0x20<c1 && c1<0x7f && fv[c1-0x20]) {
3654  c2 = 0;
3655  c1 = fv[c1-0x20];
3656  }
3657  }
3658 
3659  if (alpha_f&8 && c2 == 0) {
3660  /* HTML Entity */
3661  const char *entity = 0;
3662  switch (c1){
3663  case '>': entity = "&gt;"; break;
3664  case '<': entity = "&lt;"; break;
3665  case '\"': entity = "&quot;"; break;
3666  case '&': entity = "&amp;"; break;
3667  }
3668  if (entity){
3669  while (*entity) (*o_zconv)(0, *entity++);
3670  return;
3671  }
3672  }
3673 
3674  if (alpha_f & 16) {
3675  /* JIS X 0208 Katakana to JIS X 0201 Katakana */
3676  if (c2 == 0x21) {
3677  nkf_char c = 0;
3678  switch (c1) {
3679  case 0x23:
3680  /* U+3002 (0x8142) Ideographic Full Stop -> U+FF61 (0xA1) Halfwidth Ideographic Full Stop */
3681  c = 0xA1;
3682  break;
3683  case 0x56:
3684  /* U+300C (0x8175) Left Corner Bracket -> U+FF62 (0xA2) Halfwidth Left Corner Bracket */
3685  c = 0xA2;
3686  break;
3687  case 0x57:
3688  /* U+300D (0x8176) Right Corner Bracket -> U+FF63 (0xA3) Halfwidth Right Corner Bracket */
3689  c = 0xA3;
3690  break;
3691  case 0x22:
3692  /* U+3001 (0x8141) Ideographic Comma -> U+FF64 (0xA4) Halfwidth Ideographic Comma */
3693  c = 0xA4;
3694  break;
3695  case 0x26:
3696  /* U+30FB (0x8145) Katakana Middle Dot -> U+FF65 (0xA5) Halfwidth Katakana Middle Dot */
3697  c = 0xA5;
3698  break;
3699  case 0x3C:
3700  /* U+30FC (0x815B) Katakana-Hiragana Prolonged Sound Mark -> U+FF70 (0xB0) Halfwidth Katakana-Hiragana Prolonged Sound Mark */
3701  c = 0xB0;
3702  break;
3703  case 0x2B:
3704  /* U+309B (0x814A) Katakana-Hiragana Voiced Sound Mark -> U+FF9E (0xDE) Halfwidth Katakana Voiced Sound Mark */
3705  c = 0xDE;
3706  break;
3707  case 0x2C:
3708  /* U+309C (0x814B) Katakana-Hiragana Semi-Voiced Sound Mark -> U+FF9F (0xDF) Halfwidth Katakana Semi-Voiced Sound Mark */
3709  c = 0xDF;
3710  break;
3711  }
3712  if (c) {
3713  (*o_zconv)(JIS_X_0201_1976_K, c);
3714  return;
3715  }
3716  } else if (c2 == 0x25) {
3717  /* JISX0208 Katakana */
3718  static const int fullwidth_to_halfwidth[] =
3719  {
3720  0x0000, 0x2700, 0x3100, 0x2800, 0x3200, 0x2900, 0x3300, 0x2A00,
3721  0x3400, 0x2B00, 0x3500, 0x3600, 0x365E, 0x3700, 0x375E, 0x3800,
3722  0x385E, 0x3900, 0x395E, 0x3A00, 0x3A5E, 0x3B00, 0x3B5E, 0x3C00,
3723  0x3C5E, 0x3D00, 0x3D5E, 0x3E00, 0x3E5E, 0x3F00, 0x3F5E, 0x4000,
3724  0x405E, 0x4100, 0x415E, 0x2F00, 0x4200, 0x425E, 0x4300, 0x435E,
3725  0x4400, 0x445E, 0x4500, 0x4600, 0x4700, 0x4800, 0x4900, 0x4A00,
3726  0x4A5E, 0x4A5F, 0x4B00, 0x4B5E, 0x4B5F, 0x4C00, 0x4C5E, 0x4C5F,
3727  0x4D00, 0x4D5E, 0x4D5F, 0x4E00, 0x4E5E, 0x4E5F, 0x4F00, 0x5000,
3728  0x5100, 0x5200, 0x5300, 0x2C00, 0x5400, 0x2D00, 0x5500, 0x2E00,
3729  0x5600, 0x5700, 0x5800, 0x5900, 0x5A00, 0x5B00, 0x0000, 0x5C00,
3730  0x0000, 0x0000, 0x2600, 0x5D00, 0x335E, 0x0000, 0x0000, 0x0000,
3731  0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
3732  };
3733  if (fullwidth_to_halfwidth[c1-0x20]){
3734  c2 = fullwidth_to_halfwidth[c1-0x20];
3735  (*o_zconv)(JIS_X_0201_1976_K, c2>>8);
3736  if (c2 & 0xFF) {
3737  (*o_zconv)(JIS_X_0201_1976_K, c2&0xFF);
3738  }
3739  return;
3740  }
3741  }
3742  }
3743  (*o_zconv)(c2,c1);
3744 }
3745 
3746 
3747 #define rot13(c) ( \
3748  ( c < 'A') ? c: \
3749  (c <= 'M') ? (c + 13): \
3750  (c <= 'Z') ? (c - 13): \
3751  (c < 'a') ? (c): \
3752  (c <= 'm') ? (c + 13): \
3753  (c <= 'z') ? (c - 13): \
3754  (c) \
3755  )
3756 
3757 #define rot47(c) ( \
3758  ( c < '!') ? c: \
3759  ( c <= 'O') ? (c + 47) : \
3760  ( c <= '~') ? (c - 47) : \
3761  c \
3762  )
3763 
3764 static void
3766 {
3767  if (c2 == 0 || c2 == JIS_X_0201_1976_K || c2 == ISO_8859_1) {
3768  c1 = rot13(c1);
3769  } else if (c2) {
3770  c1 = rot47(c1);
3771  c2 = rot47(c2);
3772  }
3773  (*o_rot_conv)(c2,c1);
3774 }
3775 
3776 static void
3778 {
3779  if (hira_f & 1) {
3780  if (c2 == 0x25) {
3781  if (0x20 < c1 && c1 < 0x74) {
3782  c2 = 0x24;
3783  (*o_hira_conv)(c2,c1);
3784  return;
3785  } else if (c1 == 0x74 && nkf_enc_unicode_p(output_encoding)) {
3786  c2 = 0;
3787  c1 = nkf_char_unicode_new(0x3094);
3788  (*o_hira_conv)(c2,c1);
3789  return;
3790  }
3791  } else if (c2 == 0x21 && (c1 == 0x33 || c1 == 0x34)) {
3792  c1 += 2;
3793  (*o_hira_conv)(c2,c1);
3794  return;
3795  }
3796  }
3797  if (hira_f & 2) {
3798  if (c2 == 0 && c1 == nkf_char_unicode_new(0x3094)) {
3799  c2 = 0x25;
3800  c1 = 0x74;
3801  } else if (c2 == 0x24 && 0x20 < c1 && c1 < 0x74) {
3802  c2 = 0x25;
3803  } else if (c2 == 0x21 && (c1 == 0x35 || c1 == 0x36)) {
3804  c1 -= 2;
3805  }
3806  }
3807  (*o_hira_conv)(c2,c1);
3808 }
3809 
3810 
3811 static void
3813 {
3814 #define RANGE_NUM_MAX 18
3815  static const nkf_char range[RANGE_NUM_MAX][2] = {
3816  {0x222f, 0x2239,},
3817  {0x2242, 0x2249,},
3818  {0x2251, 0x225b,},
3819  {0x226b, 0x2271,},
3820  {0x227a, 0x227d,},
3821  {0x2321, 0x232f,},
3822  {0x233a, 0x2340,},
3823  {0x235b, 0x2360,},
3824  {0x237b, 0x237e,},
3825  {0x2474, 0x247e,},
3826  {0x2577, 0x257e,},
3827  {0x2639, 0x2640,},
3828  {0x2659, 0x267e,},
3829  {0x2742, 0x2750,},
3830  {0x2772, 0x277e,},
3831  {0x2841, 0x287e,},
3832  {0x4f54, 0x4f7e,},
3833  {0x7425, 0x747e},
3834  };
3835  nkf_char i;
3836  nkf_char start, end, c;
3837 
3838  if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) {
3839  c2 = GETA1;
3840  c1 = GETA2;
3841  }
3842  if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) {
3843  c2 = GETA1;
3844  c1 = GETA2;
3845  }
3846 
3847  for (i = 0; i < RANGE_NUM_MAX; i++) {
3848  start = range[i][0];
3849  end = range[i][1];
3850  c = (c2 << 8) + c1;
3851  if (c >= start && c <= end) {
3852  c2 = GETA1;
3853  c1 = GETA2;
3854  }
3855  }
3856  (*o_iso2022jp_check_conv)(c2,c1);
3857 }
3858 
3859 
3860 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */
3861 
3862 static const unsigned char *mime_pattern[] = {
3863  (const unsigned char *)"\075?EUC-JP?B?",
3864  (const unsigned char *)"\075?SHIFT_JIS?B?",
3865  (const unsigned char *)"\075?ISO-8859-1?Q?",
3866  (const unsigned char *)"\075?ISO-8859-1?B?",
3867  (const unsigned char *)"\075?ISO-2022-JP?B?",
3868  (const unsigned char *)"\075?ISO-2022-JP?B?",
3869  (const unsigned char *)"\075?ISO-2022-JP?Q?",
3870 #if defined(UTF8_INPUT_ENABLE)
3871  (const unsigned char *)"\075?UTF-8?B?",
3872  (const unsigned char *)"\075?UTF-8?Q?",
3873 #endif
3874  (const unsigned char *)"\075?US-ASCII?Q?",
3875  NULL
3876 };
3877 
3878 
3879 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */
3881  e_iconv, s_iconv, 0, 0, 0, 0, 0,
3882 #if defined(UTF8_INPUT_ENABLE)
3883  w_iconv, w_iconv,
3884 #endif
3885  0,
3886 };
3887 
3888 static const nkf_char mime_encode[] = {
3890 #if defined(UTF8_INPUT_ENABLE)
3891  UTF_8, UTF_8,
3892 #endif
3893  ASCII,
3894  0
3895 };
3896 
3897 static const nkf_char mime_encode_method[] = {
3898  'B', 'B','Q', 'B', 'B', 'B', 'Q',
3899 #if defined(UTF8_INPUT_ENABLE)
3900  'B', 'Q',
3901 #endif
3902  'Q',
3903  0
3904 };
3905 
3906 
3907 /* MIME preprocessor fifo */
3908 
3909 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */
3910 #define MIME_BUF_MASK (MIME_BUF_SIZE-1)
3911 #define mime_input_buf(n) mime_input_state.buf[(n)&MIME_BUF_MASK]
3912 static struct {
3913  unsigned char buf[MIME_BUF_SIZE];
3914  unsigned int top;
3915  unsigned int last; /* decoded */
3916  unsigned int input; /* undecoded */
3919 
3920 #define MAXRECOVER 20
3921 
3922 static void
3924 {
3925  mime_input_buf(--mime_input_state.top) = (unsigned char)c;
3926 }
3927 
3928 static nkf_char
3930 {
3932  return c;
3933 }
3934 
3935 static nkf_char
3937 {
3938  if (mimebuf_f)
3939  (*i_mungetc_buf)(c,f);
3940  else
3941  mime_input_buf(--mime_input_state.input) = (unsigned char)c;
3942  return c;
3943 }
3944 
3945 static nkf_char
3947 {
3948  /* we don't keep eof of mime_input_buf, becase it contains ?= as
3949  a terminator. It was checked in mime_integrity. */
3950  return ((mimebuf_f)?
3952 }
3953 
3954 static void
3956 {
3957  if (i_getc!=mime_getc) {
3960  if(mime_f==STRICT_MIME) {
3963  }
3964  }
3965 }
3966 
3967 static void
3969 {
3970  if(mime_f==STRICT_MIME) {
3971  i_mgetc = i_mgetc_buf;
3973  }
3974  i_getc = i_mgetc;
3975  i_ungetc = i_mungetc;
3978 }
3979 
3980 static nkf_char
3981 mime_integrity(FILE *f, const unsigned char *p)
3982 {
3983  nkf_char c,d;
3984  unsigned int q;
3985  /* In buffered mode, read until =? or NL or buffer full
3986  */
3987  mime_input_state.input = mime_input_state.top;
3989 
3990  while(*p) mime_input_buf(mime_input_state.input++) = *p++;
3991  d = 0;
3992  q = mime_input_state.input;
3993  while((c=(*i_getc)(f))!=EOF) {
3994  if (((mime_input_state.input-mime_input_state.top)&MIME_BUF_MASK)==0) {
3995  break; /* buffer full */
3996  }
3997  if (c=='=' && d=='?') {
3998  /* checked. skip header, start decode */
3999  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4000  /* mime_last_input = mime_input_state.input; */
4001  mime_input_state.input = q;
4002  switch_mime_getc();
4003  return 1;
4004  }
4005  if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c))))
4006  break;
4007  /* Should we check length mod 4? */
4008  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4009  d=c;
4010  }
4011  /* In case of Incomplete MIME, no MIME decode */
4012  mime_input_buf(mime_input_state.input++) = (unsigned char)c;
4013  mime_input_state.last = mime_input_state.input; /* point undecoded buffer */
4014  mime_decode_mode = 1; /* no decode on mime_input_buf last in mime_getc */
4015  switch_mime_getc(); /* anyway we need buffered getc */
4016  return 1;
4017 }
4018 
4019 static nkf_char
4021 {
4022  nkf_char c1 = 0;
4023  int i,j,k;
4024  const unsigned char *p,*q;
4025  nkf_char r[MAXRECOVER]; /* recovery buffer, max mime pattern length */
4026 
4028  /* =? has been checked */
4029  j = 0;
4030  p = mime_pattern[j];
4031  r[0]='='; r[1]='?';
4032 
4033  for(i=2;p[i]>SP;i++) { /* start at =? */
4034  if (((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i]) {
4035  /* pattern fails, try next one */
4036  q = p;
4037  while (mime_pattern[++j]) {
4038  p = mime_pattern[j];
4039  for(k=2;k<i;k++) /* assume length(p) > i */
4040  if (p[k]!=q[k]) break;
4041  if (k==i && nkf_toupper(c1)==p[k]) break;
4042  }
4043  p = mime_pattern[j];
4044  if (p) continue; /* found next one, continue */
4045  /* all fails, output from recovery buffer */
4046  (*i_ungetc)(c1,f);
4047  for(j=0;j<i;j++) {
4048  (*oconv)(0,r[j]);
4049  }
4050  return c1;
4051  }
4052  }
4053  mime_decode_mode = p[i-2];
4054 
4058 
4059  if (mime_decode_mode=='B') {
4060  mimebuf_f = unbuf_f;
4061  if (!unbuf_f) {
4062  /* do MIME integrity check */
4063  return mime_integrity(f,mime_pattern[j]);
4064  }
4065  }
4066  switch_mime_getc();
4067  mimebuf_f = TRUE;
4068  return c1;
4069 }
4070 
4071 static nkf_char
4073 {
4074  nkf_char c1;
4075  int i,k;
4076 
4077  /* In NONSTRICT mode, only =? is checked. In case of failure, we */
4078  /* re-read and convert again from mime_buffer. */
4079 
4080  /* =? has been checked */
4081  k = mime_input_state.last;
4083  for(i=2;i<MAXRECOVER;i++) { /* start at =? */
4084  /* We accept any character type even if it is breaked by new lines */
4085  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4086  if (c1==LF||c1==SP||c1==CR||
4087  c1=='-'||c1=='_'||is_alnum(c1)) continue;
4088  if (c1=='=') {
4089  /* Failed. But this could be another MIME preemble */
4090  (*i_ungetc)(c1,f);
4091  mime_input_state.last--;
4092  break;
4093  }
4094  if (c1!='?') break;
4095  else {
4096  /* c1=='?' */
4097  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4098  if (!(++i<MAXRECOVER) || c1==EOF) break;
4099  if (c1=='b'||c1=='B') {
4100  mime_decode_mode = 'B';
4101  } else if (c1=='q'||c1=='Q') {
4102  mime_decode_mode = 'Q';
4103  } else {
4104  break;
4105  }
4106  c1 = (*i_getc)(f); mime_input_buf(mime_input_state.last++) = (unsigned char)c1;
4107  if (!(++i<MAXRECOVER) || c1==EOF) break;
4108  if (c1!='?') {
4110  }
4111  break;
4112  }
4113  }
4114  switch_mime_getc();
4115  if (!mime_decode_mode) {
4116  /* false MIME premble, restart from mime_buffer */
4117  mime_decode_mode = 1; /* no decode, but read from the mime_buffer */
4118  /* Since we are in MIME mode until buffer becomes empty, */
4119  /* we never go into mime_begin again for a while. */
4120  return c1;
4121  }
4122  /* discard mime preemble, and goto MIME mode */
4123  mime_input_state.last = k;
4124  /* do no MIME integrity check */
4125  return c1; /* used only for checking EOF */
4126 }
4127 
4128 #ifdef CHECK_OPTION
4129 static void
4131 {
4132  ;
4133 }
4134 
4135 static void
4136 debug(const char *str)
4137 {
4138  if (debug_f){
4139  fprintf(stderr, "%s\n", str ? str : "NULL");
4140  }
4141 }
4142 #endif
4143 
4144 static void
4145 set_input_codename(const char *codename)
4146 {
4147  if (!input_codename) {
4148  input_codename = codename;
4149  } else if (strcmp(codename, input_codename) != 0) {
4150  input_codename = "";
4151  }
4152 }
4153 
4154 static const char*
4156 {
4157  if (input_codename && !*input_codename) {
4158  input_codename = "BINARY";
4159  } else {
4161  if (!input_codename) {
4162  input_codename = "ASCII";
4163  } else if (strcmp(input_codename, "Shift_JIS") == 0) {
4164  if (p->score & (SCORE_DEPEND|SCORE_CP932))
4165  input_codename = "CP932";
4166  } else if (strcmp(input_codename, "EUC-JP") == 0) {
4167  if (p->score & (SCORE_X0212))
4168  input_codename = "EUCJP-MS";
4169  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4170  input_codename = "CP51932";
4171  } else if (strcmp(input_codename, "ISO-2022-JP") == 0) {
4172  if (p->score & (SCORE_KANA))
4173  input_codename = "CP50221";
4174  else if (p->score & (SCORE_DEPEND|SCORE_CP932))
4175  input_codename = "CP50220";
4176  }
4177  }
4178  return input_codename;
4179 }
4180 
4181 #if !defined(PERL_XS) && !defined(WIN32DLL)
4182 static void
4183 print_guessed_code(char *filename)
4184 {
4185  if (filename != NULL) printf("%s: ", filename);
4186  if (input_codename && !*input_codename) {
4187  printf("BINARY\n");
4188  } else {
4190  if (guess_f == 1) {
4191  printf("%s\n", input_codename);
4192  } else {
4193  printf("%s%s\n",
4195  input_eol == CR ? " (CR)" :
4196  input_eol == LF ? " (LF)" :
4197  input_eol == CRLF ? " (CRLF)" :
4198  input_eol == EOF ? " (MIXED NL)" :
4199  "");
4200  }
4201  }
4202 }
4203 #endif /*WIN32DLL*/
4204 
4205 #ifdef INPUT_OPTION
4206 
4207 static nkf_char
4208 hex_getc(nkf_char ch, FILE *f, nkf_char (*g)(FILE *f), nkf_char (*u)(nkf_char c, FILE *f))
4209 {
4210  nkf_char c1, c2, c3;
4211  c1 = (*g)(f);
4212  if (c1 != ch){
4213  return c1;
4214  }
4215  c2 = (*g)(f);
4216  if (!nkf_isxdigit(c2)){
4217  (*u)(c2, f);
4218  return c1;
4219  }
4220  c3 = (*g)(f);
4221  if (!nkf_isxdigit(c3)){
4222  (*u)(c2, f);
4223  (*u)(c3, f);
4224  return c1;
4225  }
4226  return (hex2bin(c2) << 4) | hex2bin(c3);
4227 }
4228 
4229 static nkf_char
4231 {
4232  return hex_getc(':', f, i_cgetc, i_cungetc);
4233 }
4234 
4235 static nkf_char
4237 {
4238  return (*i_cungetc)(c, f);
4239 }
4240 
4241 static nkf_char
4243 {
4244  return hex_getc('%', f, i_ugetc, i_uungetc);
4245 }
4246 
4247 static nkf_char
4249 {
4250  return (*i_uungetc)(c, f);
4251 }
4252 #endif
4253 
4254 #ifdef NUMCHAR_OPTION
4255 static nkf_char
4257 {
4258  nkf_char (*g)(FILE *) = i_ngetc;
4259  nkf_char (*u)(nkf_char c ,FILE *f) = i_nungetc;
4260  int i = 0, j;
4261  nkf_char buf[12];
4262  nkf_char c = -1;
4263 
4264  buf[i] = (*g)(f);
4265  if (buf[i] == '&'){
4266  buf[++i] = (*g)(f);
4267  if (buf[i] == '#'){
4268  c = 0;
4269  buf[++i] = (*g)(f);
4270  if (buf[i] == 'x' || buf[i] == 'X'){
4271  for (j = 0; j < 7; j++){
4272  buf[++i] = (*g)(f);
4273  if (!nkf_isxdigit(buf[i])){
4274  if (buf[i] != ';'){
4275  c = -1;
4276  }
4277  break;
4278  }
4279  c <<= 4;
4280  c |= hex2bin(buf[i]);
4281  }
4282  }else{
4283  for (j = 0; j < 8; j++){
4284  if (j){
4285  buf[++i] = (*g)(f);
4286  }
4287  if (!nkf_isdigit(buf[i])){
4288  if (buf[i] != ';'){
4289  c = -1;
4290  }
4291  break;
4292  }
4293  c *= 10;
4294  c += hex2bin(buf[i]);
4295  }
4296  }
4297  }
4298  }
4299  if (c != -1){
4300  return nkf_char_unicode_new(c);
4301  }
4302  while (i > 0){
4303  (*u)(buf[i], f);
4304  --i;
4305  }
4306  return buf[0];
4307 }
4308 
4309 static nkf_char
4311 {
4312  return (*i_nungetc)(c, f);
4313 }
4314 #endif
4315 
4316 #ifdef UNICODE_NORMALIZATION
4317 
4318 static nkf_char
4320 {
4321  nkf_char (*g)(FILE *f) = i_nfc_getc;
4322  nkf_char (*u)(nkf_char c ,FILE *f) = i_nfc_ungetc;
4323  nkf_buf_t *buf = nkf_state->nfc_buf;
4324  const unsigned char *array;
4325  int lower=0, upper=NORMALIZATION_TABLE_LENGTH-1;
4326  nkf_char c = (*g)(f);
4327 
4328  if (c == EOF || c > 0xFF || (c & 0xc0) == 0x80) return c;
4329 
4330  nkf_buf_push(buf, c);
4331  do {
4332  while (lower <= upper) {
4333  int mid = (lower+upper) / 2;
4334  int len;
4335  array = normalization_table[mid].nfd;
4336  for (len=0; len < NORMALIZATION_TABLE_NFD_LENGTH && array[len]; len++) {
4337  if (len >= nkf_buf_length(buf)) {
4338  c = (*g)(f);
4339  if (c == EOF) {
4340  len = 0;
4341  lower = 1, upper = 0;
4342  break;
4343  }
4344  nkf_buf_push(buf, c);
4345  }
4346  if (array[len] != nkf_buf_at(buf, len)) {
4347  if (array[len] < nkf_buf_at(buf, len)) lower = mid + 1;
4348  else upper = mid - 1;
4349  len = 0;
4350  break;
4351  }
4352  }
4353  if (len > 0) {
4354  int i;
4355  array = normalization_table[mid].nfc;
4356  nkf_buf_clear(buf);
4357  for (i=0; i < NORMALIZATION_TABLE_NFC_LENGTH && array[i]; i++)
4358  nkf_buf_push(buf, array[i]);
4359  break;
4360  }
4361  }
4362  } while (lower <= upper);
4363 
4364  while (nkf_buf_length(buf) > 1) (*u)(nkf_buf_pop(buf), f);
4365  c = nkf_buf_pop(buf);
4366 
4367  return c;
4368 }
4369 
4370 static nkf_char
4372 {
4373  return (*i_nfc_ungetc)(c, f);
4374 }
4375 #endif /* UNICODE_NORMALIZATION */
4376 
4377 
4378 static nkf_char
4380 {
4381  int i;
4382  if (c > '@') {
4383  if (c < '[') {
4384  i = c - 'A'; /* A..Z 0-25 */
4385  } else if (c == '_') {
4386  i = '?' /* 63 */ ; /* _ 63 */
4387  } else {
4388  i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */
4389  }
4390  } else if (c > '/') {
4391  i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */
4392  } else if (c == '+' || c == '-') {
4393  i = '>' /* 62 */ ; /* + and - 62 */
4394  } else {
4395  i = '?' /* 63 */ ; /* / 63 */
4396  }
4397  return (i);
4398 }
4399 
4400 static nkf_char
4402 {
4403  nkf_char c1, c2, c3, c4, cc;
4404  nkf_char t1, t2, t3, t4, mode, exit_mode;
4405  nkf_char lwsp_count;
4406  char *lwsp_buf;
4407  char *lwsp_buf_new;
4408  nkf_char lwsp_size = 128;
4409 
4410  if (mime_input_state.top != mime_input_state.last) { /* Something is in FIFO */
4411  return mime_input_buf(mime_input_state.top++);
4412  }
4416  return (*i_getc)(f);
4417  }
4418 
4419  if (mimebuf_f == FIXED_MIME)
4420  exit_mode = mime_decode_mode;
4421  else
4422  exit_mode = FALSE;
4423  if (mime_decode_mode == 'Q') {
4424  if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF);
4425  restart_mime_q:
4426  if (c1=='_' && mimebuf_f != FIXED_MIME) return SP;
4427  if (c1<=SP || DEL<=c1) {
4428  mime_decode_mode = exit_mode; /* prepare for quit */
4429  return c1;
4430  }
4431  if (c1!='=' && (c1!='?' || mimebuf_f == FIXED_MIME)) {
4432  return c1;
4433  }
4434 
4435  mime_decode_mode = exit_mode; /* prepare for quit */
4436  if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF);
4437  if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) {
4438  /* end Q encoding */
4439  input_mode = exit_mode;
4440  lwsp_count = 0;
4441  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4442  while ((c1=(*i_getc)(f))!=EOF) {
4443  switch (c1) {
4444  case LF:
4445  case CR:
4446  if (c1==LF) {
4447  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4448  i_ungetc(SP,f);
4449  continue;
4450  } else {
4451  i_ungetc(c1,f);
4452  }
4453  c1 = LF;
4454  } else {
4455  if ((c1=(*i_getc)(f))!=EOF && c1 == LF) {
4456  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4457  i_ungetc(SP,f);
4458  continue;
4459  } else {
4460  i_ungetc(c1,f);
4461  }
4462  i_ungetc(LF,f);
4463  } else {
4464  i_ungetc(c1,f);
4465  }
4466  c1 = CR;
4467  }
4468  break;
4469  case SP:
4470  case TAB:
4471  lwsp_buf[lwsp_count] = (unsigned char)c1;
4472  if (lwsp_count++>lwsp_size){
4473  lwsp_size <<= 1;
4474  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4475  lwsp_buf = lwsp_buf_new;
4476  }
4477  continue;
4478  }
4479  break;
4480  }
4481  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4482  i_ungetc(c1,f);
4483  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4484  i_ungetc(lwsp_buf[lwsp_count],f);
4485  c1 = lwsp_buf[0];
4486  }
4487  nkf_xfree(lwsp_buf);
4488  return c1;
4489  }
4490  if (c1=='='&&c2<SP) { /* this is soft wrap */
4491  while((c1 = (*i_mgetc)(f)) <=SP) {
4492  if (c1 == EOF) return (EOF);
4493  }
4494  mime_decode_mode = 'Q'; /* still in MIME */
4495  goto restart_mime_q;
4496  }
4497  if (c1=='?') {
4498  mime_decode_mode = 'Q'; /* still in MIME */
4499  (*i_mungetc)(c2,f);
4500  return c1;
4501  }
4502  if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF);
4503  if (c2<=SP) return c2;
4504  mime_decode_mode = 'Q'; /* still in MIME */
4505  return ((hex2bin(c2)<<4) + hex2bin(c3));
4506  }
4507 
4508  if (mime_decode_mode != 'B') {
4510  return (*i_mgetc)(f);
4511  }
4512 
4513 
4514  /* Base64 encoding */
4515  /*
4516  MIME allows line break in the middle of
4517  Base64, but we are very pessimistic in decoding
4518  in unbuf mode because MIME encoded code may broken by
4519  less or editor's control sequence (such as ESC-[-K in unbuffered
4520  mode. ignore incomplete MIME.
4521  */
4522  mode = mime_decode_mode;
4523  mime_decode_mode = exit_mode; /* prepare for quit */
4524 
4525  while ((c1 = (*i_mgetc)(f))<=SP) {
4526  if (c1==EOF)
4527  return (EOF);
4528  }
4529  mime_c2_retry:
4530  if ((c2 = (*i_mgetc)(f))<=SP) {
4531  if (c2==EOF)
4532  return (EOF);
4533  if (mime_f != STRICT_MIME) goto mime_c2_retry;
4535  return c2;
4536  }
4537  if ((c1 == '?') && (c2 == '=')) {
4538  input_mode = ASCII;
4539  lwsp_count = 0;
4540  lwsp_buf = nkf_xmalloc((lwsp_size+5)*sizeof(char));
4541  while ((c1=(*i_getc)(f))!=EOF) {
4542  switch (c1) {
4543  case LF:
4544  case CR:
4545  if (c1==LF) {
4546  if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4547  i_ungetc(SP,f);
4548  continue;
4549  } else {
4550  i_ungetc(c1,f);
4551  }
4552  c1 = LF;
4553  } else {
4554  if ((c1=(*i_getc)(f))!=EOF) {
4555  if (c1==SP) {
4556  i_ungetc(SP,f);
4557  continue;
4558  } else if ((c1=(*i_getc)(f))!=EOF && nkf_isblank(c1)) {
4559  i_ungetc(SP,f);
4560  continue;
4561  } else {
4562  i_ungetc(c1,f);
4563  }
4564  i_ungetc(LF,f);
4565  } else {
4566  i_ungetc(c1,f);
4567  }
4568  c1 = CR;
4569  }
4570  break;
4571  case SP:
4572  case TAB:
4573  lwsp_buf[lwsp_count] = (unsigned char)c1;
4574  if (lwsp_count++>lwsp_size){
4575  lwsp_size <<= 1;
4576  lwsp_buf_new = nkf_xrealloc(lwsp_buf, (lwsp_size+5)*sizeof(char));
4577  lwsp_buf = lwsp_buf_new;
4578  }
4579  continue;
4580  }
4581  break;
4582  }
4583  if (lwsp_count > 0 && (c1 != '=' || (lwsp_buf[lwsp_count-1] != SP && lwsp_buf[lwsp_count-1] != TAB))) {
4584  i_ungetc(c1,f);
4585  for(lwsp_count--;lwsp_count>0;lwsp_count--)
4586  i_ungetc(lwsp_buf[lwsp_count],f);
4587  c1 = lwsp_buf[0];
4588  }
4589  nkf_xfree(lwsp_buf);
4590  return c1;
4591  }
4592  mime_c3_retry:
4593  if ((c3 = (*i_mgetc)(f))<=SP) {
4594  if (c3==EOF)
4595  return (EOF);
4596  if (mime_f != STRICT_MIME) goto mime_c3_retry;
4598  return c3;
4599  }
4600  mime_c4_retry:
4601  if ((c4 = (*i_mgetc)(f))<=SP) {
4602  if (c4==EOF)
4603  return (EOF);
4604  if (mime_f != STRICT_MIME) goto mime_c4_retry;
4606  return c4;
4607  }
4608 
4609  mime_decode_mode = mode; /* still in MIME sigh... */
4610 
4611  /* BASE 64 decoding */
4612 
4613  t1 = 0x3f & base64decode(c1);
4614  t2 = 0x3f & base64decode(c2);
4615  t3 = 0x3f & base64decode(c3);
4616  t4 = 0x3f & base64decode(c4);
4617  cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03);
4618  if (c2 != '=') {
4619  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4620  cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f);
4621  if (c3 != '=') {
4622  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4623  cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f);
4624  if (c4 != '=')
4625  mime_input_buf(mime_input_state.last++) = (unsigned char)cc;
4626  }
4627  } else {
4628  return c1;
4629  }
4630  return mime_input_buf(mime_input_state.top++);
4631 }
4632 
4633 static const char basis_64[] =
4634  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
4635 
4636 #define MIMEOUT_BUF_LENGTH 74
4637 static struct {
4638  unsigned char buf[MIMEOUT_BUF_LENGTH+1];
4639  int count;
4640 } mimeout_state;
4641 
4642 /*nkf_char mime_lastchar2, mime_lastchar1;*/
4643 
4644 static void
4646 {
4647  const unsigned char *p;
4648  int i;
4649  int j;
4650  p = mime_pattern[0];
4651  for(i=0;mime_pattern[i];i++) {
4652  if (mode == mime_encode[i]) {
4653  p = mime_pattern[i];
4654  break;
4655  }
4656  }
4657  mimeout_mode = mime_encode_method[i];
4658  i = 0;
4659  if (base64_count>45) {
4660  if (mimeout_state.count>0 && nkf_isblank(mimeout_state.buf[i])){
4661  (*o_mputc)(mimeout_state.buf[i]);
4662  i++;
4663  }
4665  (*o_mputc)(SP);
4666  base64_count = 1;
4667  if (mimeout_state.count>0 && nkf_isspace(mimeout_state.buf[i])) {
4668  i++;
4669  }
4670  }
4671  for (;i<mimeout_state.count;i++) {
4672  if (nkf_isspace(mimeout_state.buf[i])) {
4673  (*o_mputc)(mimeout_state.buf[i]);
4674  base64_count ++;
4675  } else {
4676  break;
4677  }
4678  }
4679  while(*p) {
4680  (*o_mputc)(*p++);
4681  base64_count ++;
4682  }
4683  j = mimeout_state.count;
4684  mimeout_state.count = 0;
4685  for (;i<j;i++) {
4686  mime_putc(mimeout_state.buf[i]);
4687  }
4688 }
4689 
4690 static void
4692 {
4693  if (mimeout_mode > 0){
4694  if (c2 == EOF){
4695  if (base64_count + mimeout_state.count/3*4> 73){
4696  (*o_base64conv)(EOF,0);
4698  (*o_base64conv)(0,SP);
4699  base64_count = 1;
4700  }
4701  } else {
4702  if ((c2 != 0 || c1 > DEL) && base64_count + mimeout_state.count/3*4> 66) {
4703  (*o_base64conv)(EOF,0);
4705  (*o_base64conv)(0,SP);
4706  base64_count = 1;
4707  mimeout_mode = -1;
4708  }
4709  }
4710  } else if (c2) {
4711  if (c2 != EOF && base64_count + mimeout_state.count/3*4> 60) {
4712  mimeout_mode = (output_mode==ASCII ||output_mode == ISO_8859_1) ? 'Q' : 'B';
4714  (*o_base64conv)(EOF,0);
4716  (*o_base64conv)(0,SP);
4717  base64_count = 1;
4718  mimeout_mode = -1;
4719  }
4720  }
4721 }
4722 
4723 static void
4725 {
4726  (*o_mputc)('?');
4727  (*o_mputc)('=');
4728  base64_count += 2;
4729  mimeout_mode = 0;
4730 }
4731 
4732 static void
4734 {
4735  switch(mimeout_mode) {
4736  case 'Q':
4737  case 'B':
4738  break;
4739  case 2:
4740  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4)]);
4741  (*o_mputc)('=');
4742  (*o_mputc)('=');
4743  base64_count += 3;
4744  break;
4745  case 1:
4746  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2)]);
4747  (*o_mputc)('=');
4748  base64_count += 2;
4749  break;
4750  }
4751  if (mimeout_mode > 0) {
4752  if (mimeout_f!=FIXED_MIME) {
4753  close_mime();
4754  } else if (mimeout_mode != 'Q')
4755  mimeout_mode = 'B';
4756  }
4757 }
4758 
4759 static void
4761 {
4762  switch(mimeout_mode) {
4763  case 'Q':
4764  if (c==CR||c==LF) {
4765  (*o_mputc)(c);
4766  base64_count = 0;
4767  } else if(!nkf_isalnum(c)) {
4768  (*o_mputc)('=');
4769  (*o_mputc)(bin2hex(((c>>4)&0xf)));
4770  (*o_mputc)(bin2hex((c&0xf)));
4771  base64_count += 3;
4772  } else {
4773  (*o_mputc)(c);
4774  base64_count++;
4775  }
4776  break;
4777  case 'B':
4778  nkf_state->mimeout_state=c;
4779  (*o_mputc)(basis_64[c>>2]);
4780  mimeout_mode=2;
4781  base64_count ++;
4782  break;
4783  case 2:
4784  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0x3)<< 4) | ((c & 0xF0) >> 4)]);
4785  nkf_state->mimeout_state=c;
4786  mimeout_mode=1;
4787  base64_count ++;
4788  break;
4789  case 1:
4790  (*o_mputc)(basis_64[((nkf_state->mimeout_state & 0xF) << 2) | ((c & 0xC0) >>6)]);
4791  (*o_mputc)(basis_64[c & 0x3F]);
4792  mimeout_mode='B';
4793  base64_count += 2;
4794  break;
4795  default:
4796  (*o_mputc)(c);
4797  base64_count++;
4798  break;
4799  }
4800 }
4801 
4802 static void
4804 {
4805  int i, j;
4806  nkf_char lastchar;
4807 
4808  if (mimeout_f == FIXED_MIME){
4809  if (mimeout_mode == 'Q'){
4810  if (base64_count > 71){
4811  if (c!=CR && c!=LF) {
4812  (*o_mputc)('=');
4814  }
4815  base64_count = 0;
4816  }
4817  }else{
4818  if (base64_count > 71){
4819  eof_mime();
4821  base64_count = 0;
4822  }
4823  if (c == EOF) { /* c==EOF */
4824  eof_mime();
4825  }
4826  }
4827  if (c != EOF) { /* c==EOF */
4828  mimeout_addchar(c);
4829  }
4830  return;
4831  }
4832 
4833  /* mimeout_f != FIXED_MIME */
4834 
4835  if (c == EOF) { /* c==EOF */
4836  if (mimeout_mode == -1 && mimeout_state.count > 1) open_mime(output_mode);
4837  j = mimeout_state.count;
4838  mimeout_state.count = 0;
4839  i = 0;
4840  if (mimeout_mode > 0) {
4841  if (!nkf_isblank(mimeout_state.buf[j-1])) {
4842  for (;i<j;i++) {
4843  if (nkf_isspace(mimeout_state.buf[i]) && base64_count < 71){
4844  break;
4845  }
4847  }
4848  eof_mime();
4849  for (;i<j;i++) {
4851  }
4852  } else {
4853  for (;i<j;i++) {
4855  }
4856  eof_mime();
4857  }
4858  } else {
4859  for (;i<j;i++) {
4861  }
4862  }
4863  return;
4864  }
4865 
4866  if (mimeout_state.count > 0){
4867  lastchar = mimeout_state.buf[mimeout_state.count - 1];
4868  }else{
4869  lastchar = -1;
4870  }
4871 
4872  if (mimeout_mode=='Q') {
4873  if (c <= DEL && (output_mode==ASCII ||output_mode == ISO_8859_1)) {
4874  if (c == CR || c == LF) {
4875  close_mime();
4876  (*o_mputc)(c);
4877  base64_count = 0;
4878  return;
4879  } else if (c <= SP) {
4880  close_mime();
4881  if (base64_count > 70) {
4883  base64_count = 0;
4884  }
4885  if (!nkf_isblank(c)) {
4886  (*o_mputc)(SP);
4887  base64_count++;
4888  }
4889  } else {
4890  if (base64_count > 70) {
4891  close_mime();
4893  (*o_mputc)(SP);
4894  base64_count = 1;
4896  }
4897  if (!nkf_noescape_mime(c)) {
4898  mimeout_addchar(c);
4899  return;
4900  }
4901  }
4902  if (c != 0x1B) {
4903  (*o_mputc)(c);
4904  base64_count++;
4905  return;
4906  }
4907  }
4908  }
4909 
4910  if (mimeout_mode <= 0) {
4911  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
4912  output_mode == UTF_8)) {
4913  if (nkf_isspace(c)) {
4914  int flag = 0;
4915  if (mimeout_mode == -1) {
4916  flag = 1;
4917  }
4918  if (c==CR || c==LF) {
4919  if (flag) {
4921  output_mode = 0;
4922  } else {
4923  base64_count = 0;
4924  }
4925  }
4926  for (i=0;i<mimeout_state.count;i++) {
4927  (*o_mputc)(mimeout_state.buf[i]);
4928  if (mimeout_state.buf[i] == CR || mimeout_state.buf[i] == LF){
4929  base64_count = 0;
4930  }else{
4931  base64_count++;
4932  }
4933  }
4934  if (flag) {
4935  eof_mime();
4936  base64_count = 0;
4937  mimeout_mode = 0;
4938  }
4939  mimeout_state.buf[0] = (char)c;
4940  mimeout_state.count = 1;
4941  }else{
4942  if (base64_count > 1
4943  && base64_count + mimeout_state.count > 76
4944  && mimeout_state.buf[0] != CR && mimeout_state.buf[0] != LF){
4945  static const char *str = "boundary=\"";
4946  static int len = 10;
4947  i = 0;
4948 
4949  for (; i < mimeout_state.count - len; ++i) {
4950  if (!strncmp((char *)(mimeout_state.buf+i), str, len)) {
4951  i += len - 2;
4952  break;
4953  }
4954  }
4955 
4956  if (i == 0 || i == mimeout_state.count - len) {
4958  base64_count = 0;
4959  if (!nkf_isspace(mimeout_state.buf[0])){
4960  (*o_mputc)(SP);
4961  base64_count++;
4962  }
4963  }
4964  else {
4965  int j;
4966  for (j = 0; j <= i; ++j) {
4967  (*o_mputc)(mimeout_state.buf[j]);
4968  }
4970  base64_count = 1;
4971  for (; j <= mimeout_state.count; ++j) {
4972  mimeout_state.buf[j - i] = mimeout_state.buf[j];
4973  }
4974  mimeout_state.count -= i;
4975  }
4976  }
4977  mimeout_state.buf[mimeout_state.count++] = (char)c;
4978  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
4980  }
4981  }
4982  return;
4983  }else{
4984  if (lastchar==CR || lastchar == LF){
4985  for (i=0;i<mimeout_state.count;i++) {
4986  (*o_mputc)(mimeout_state.buf[i]);
4987  }
4988  base64_count = 0;
4989  mimeout_state.count = 0;
4990  }
4991  if (lastchar==SP) {
4992  for (i=0;i<mimeout_state.count-1;i++) {
4993  (*o_mputc)(mimeout_state.buf[i]);
4994  base64_count++;
4995  }
4996  mimeout_state.buf[0] = SP;
4997  mimeout_state.count = 1;
4998  }
5000  }
5001  }else{
5002  /* mimeout_mode == 'B', 1, 2 */
5003  if (c <= DEL && (output_mode==ASCII || output_mode == ISO_8859_1 ||
5004  output_mode == UTF_8)) {
5005  if (lastchar == CR || lastchar == LF){
5006  if (nkf_isblank(c)) {
5007  for (i=0;i<mimeout_state.count;i++) {
5009  }
5010  mimeout_state.count = 0;
5011  } else {
5012  eof_mime();
5013  for (i=0;i<mimeout_state.count;i++) {
5014  (*o_mputc)(mimeout_state.buf[i]);
5015  }
5016  base64_count = 0;
5017  mimeout_state.count = 0;
5018  }
5019  mimeout_state.buf[mimeout_state.count++] = (char)c;
5020  return;
5021  }
5022  if (nkf_isspace(c)) {
5023  for (i=0;i<mimeout_state.count;i++) {
5024  if (SP<mimeout_state.buf[i] && mimeout_state.buf[i]<DEL) {
5025  eof_mime();
5026  for (i=0;i<mimeout_state.count;i++) {
5027  (*o_mputc)(mimeout_state.buf[i]);
5028  base64_count++;
5029  }
5030  mimeout_state.count = 0;
5031  }
5032  }
5033  mimeout_state.buf[mimeout_state.count++] = (char)c;
5034  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5035  eof_mime();
5036  for (i=0;i<mimeout_state.count;i++) {
5037  (*o_mputc)(mimeout_state.buf[i]);
5038  base64_count++;
5039  }
5040  mimeout_state.count = 0;
5041  }
5042  return;
5043  }
5044  if (mimeout_state.count>0 && SP<c && c!='=') {
5045  mimeout_state.buf[mimeout_state.count++] = (char)c;
5046  if (mimeout_state.count>MIMEOUT_BUF_LENGTH) {
5047  j = mimeout_state.count;
5048  mimeout_state.count = 0;
5049  for (i=0;i<j;i++) {
5051  }
5052  }
5053  return;
5054  }
5055  }
5056  }
5057  if (mimeout_state.count>0) {
5058  j = mimeout_state.count;
5059  mimeout_state.count = 0;
5060  for (i=0;i<j;i++) {
5061  if (mimeout_state.buf[i]==CR || mimeout_state.buf[i]==LF)
5062  break;
5064  }
5065  if (i<j) {
5066  eof_mime();
5067  base64_count=0;
5068  for (;i<j;i++) {
5069  (*o_mputc)(mimeout_state.buf[i]);
5070  }
5072  }
5073  }
5074  mimeout_addchar(c);
5075 }
5076 
5077 static void
5079 {
5080  mime_prechar(c2, c1);
5081  (*o_base64conv)(c2,c1);
5082 }
5083 
5084 #ifdef HAVE_ICONV_H
5085 typedef struct nkf_iconv_t {
5086  iconv_t cd;
5087  char *input_buffer;
5088  size_t input_buffer_size;
5089  char *output_buffer;
5090  size_t output_buffer_size;
5091 }
5092 
5093 static nkf_iconv_t
5094 nkf_iconv_new(char *tocode, char *fromcode)
5095 {
5096  nkf_iconv_t converter;
5097 
5098  converter->input_buffer_size = IOBUF_SIZE;
5099  converter->input_buffer = nkf_xmalloc(converter->input_buffer_size);
5100  converter->output_buffer_size = IOBUF_SIZE * 2;
5101  converter->output_buffer = nkf_xmalloc(converter->output_buffer_size);
5102  converter->cd = iconv_open(tocode, fromcode);
5103  if (converter->cd == (iconv_t)-1)
5104  {
5105  switch (errno) {
5106  case EINVAL:
5107  perror(fprintf("iconv doesn't support %s to %s conversion.", fromcode, tocode));
5108  return -1;
5109  default:
5110  perror("can't iconv_open");
5111  }
5112  }
5113 }
5114 
5115 static size_t
5116 nkf_iconv_convert(nkf_iconv_t *converter, FILE *input)
5117 {
5118  size_t invalid = (size_t)0;
5119  char *input_buffer = converter->input_buffer;
5120  size_t input_length = (size_t)0;
5121  char *output_buffer = converter->output_buffer;
5122  size_t output_length = converter->output_buffer_size;
5123  int c;
5124 
5125  do {
5126  if (c != EOF) {
5127  while ((c = (*i_getc)(f)) != EOF) {
5128  input_buffer[input_length++] = c;
5129  if (input_length < converter->input_buffer_size) break;
5130  }
5131  }
5132 
5133  size_t ret = iconv(converter->cd, &input_buffer, &input_length, &output_buffer, &output_length);
5134  while (output_length-- > 0) {
5135  (*o_putc)(output_buffer[converter->output_buffer_size-output_length]);
5136  }
5137  if (ret == (size_t) - 1) {
5138  switch (errno) {
5139  case EINVAL:
5140  if (input_buffer != converter->input_buffer)
5141  memmove(converter->input_buffer, input_buffer, input_length);
5142  break;
5143  case E2BIG:
5144  converter->output_buffer_size *= 2;
5145  output_buffer = realloc(converter->outbuf, converter->output_buffer_size);
5146  if (output_buffer == NULL) {
5147  perror("can't realloc");
5148  return -1;
5149  }
5150  converter->output_buffer = output_buffer;
5151  break;
5152  default:
5153  perror("can't iconv");
5154  return -1;
5155  }
5156  } else {
5157  invalid += ret;
5158  }
5159  } while (1);
5160 
5161  return invalid;
5162 }
5163 
5164 
5165 static void
5166 nkf_iconv_close(nkf_iconv_t *convert)
5167 {
5168  nkf_xfree(converter->inbuf);
5169  nkf_xfree(converter->outbuf);
5170  iconv_close(converter->cd);
5171 }
5172 #endif
5173 
5174 
5175 static void
5176 reinit(void)
5177 {
5178  {
5179  struct input_code *p = input_code_list;
5180  while (p->name){
5181  status_reinit(p++);
5182  }
5183  }
5184  unbuf_f = FALSE;
5185  estab_f = FALSE;
5186  nop_f = FALSE;
5187  binmode_f = TRUE;
5188  rot_f = FALSE;
5189  hira_f = FALSE;
5190  alpha_f = FALSE;
5192  mime_decode_f = FALSE;
5193  mimebuf_f = FALSE;
5194  broken_f = FALSE;
5195  iso8859_f = FALSE;
5196  mimeout_f = FALSE;
5198  iso2022jp_f = FALSE;
5199 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE)
5201 #endif
5202 #ifdef UTF8_INPUT_ENABLE
5203  no_cp932ext_f = FALSE;
5206  unicode_subchar = '?';
5208 #endif
5209 #ifdef UTF8_OUTPUT_ENABLE
5210  output_bom_f = FALSE;
5212 #endif
5213 #ifdef UNICODE_NORMALIZATION
5214  nfc_f = FALSE;
5215 #endif
5216 #ifdef INPUT_OPTION
5217  cap_f = FALSE;
5218  url_f = FALSE;
5219  numchar_f = FALSE;
5220 #endif
5221 #ifdef CHECK_OPTION
5222  noout_f = FALSE;
5223  debug_f = FALSE;
5224 #endif
5225  guess_f = 0;
5226 #ifdef EXEC_IO
5227  exec_f = 0;
5228 #endif
5229 #ifdef SHIFTJIS_CP932
5230  cp51932_f = TRUE;
5231  cp932inv_f = TRUE;
5232 #endif
5233 #ifdef X0212_ENABLE
5234  x0212_f = FALSE;
5235  x0213_f = FALSE;
5236 #endif
5237  {
5238  int i;
5239  for (i = 0; i < 256; i++){
5240  prefix_table[i] = 0;
5241  }
5242  }
5243  hold_count = 0;
5244  mimeout_state.count = 0;
5245  mimeout_mode = 0;
5246  base64_count = 0;
5247  f_line = 0;
5248  f_prev = 0;
5250  fold_f = FALSE;
5251  fold_len = 0;
5262  o_putc = std_putc;
5263  i_getc = std_getc;
5264  i_ungetc = std_ungetc;
5265  i_bgetc = std_getc;
5267  o_mputc = std_putc;
5268  i_mgetc = std_getc;
5272  output_mode = ASCII;
5273  input_mode = ASCII;
5275  file_out_f = FALSE;
5276  eolmode_f = 0;
5277  input_eol = 0;
5278  prev_cr = 0;
5279  option_mode = 0;
5280  z_prev2=0,z_prev1=0;
5281 #ifdef CHECK_OPTION
5282  iconv_for_check = 0;
5283 #endif
5284  input_codename = NULL;
5285  input_encoding = NULL;
5286  output_encoding = NULL;
5287  nkf_state_init();
5288 #ifdef WIN32DLL
5289  reinitdll();
5290 #endif /*WIN32DLL*/
5291 }
5292 
5293 static int
5295 {
5296  if (input_encoding) set_input_encoding(input_encoding);
5297  if (!output_encoding) {
5298  output_encoding = nkf_default_encoding();
5299  }
5300  if (!output_encoding) {
5301  if (noout_f || guess_f) output_encoding = nkf_enc_from_index(ISO_2022_JP);
5302  else return -1;
5303  }
5304  set_output_encoding(output_encoding);
5305  oconv = nkf_enc_to_oconv(output_encoding);
5306  o_putc = std_putc;
5307  if (nkf_enc_unicode_p(output_encoding))
5308  output_mode = UTF_8;
5309 
5310  if (x0201_f == NKF_UNSPECIFIED) {
5312  }
5313 
5314  /* replace continucation module, from output side */
5315 
5316  /* output redicrection */
5317 #ifdef CHECK_OPTION
5318  if (noout_f || guess_f){
5319  o_putc = no_putc;
5320  }
5321 #endif
5322  if (mimeout_f) {
5323  o_mputc = o_putc;
5324  o_putc = mime_putc;
5325  if (mimeout_f == TRUE) {
5327  }
5328  /* base64_count = 0; */
5329  }
5330 
5331  if (eolmode_f || guess_f) {
5333  }
5334  if (rot_f) {
5336  }
5337  if (iso2022jp_f) {
5339  }
5340  if (hira_f) {
5342  }
5343  if (fold_f) {
5345  f_line = 0;
5346  }
5347  if (alpha_f || x0201_f) {
5348  o_zconv = oconv; oconv = z_conv;
5349  }
5350 
5351  i_getc = std_getc;
5352  i_ungetc = std_ungetc;
5353  /* input redicrection */
5354 #ifdef INPUT_OPTION
5355  if (cap_f){
5358  }
5359  if (url_f){
5362  }
5363 #endif
5364 #ifdef NUMCHAR_OPTION
5365  if (numchar_f){
5368  }
5369 #endif
5370 #ifdef UNICODE_NORMALIZATION
5371  if (nfc_f){
5374  }
5375 #endif
5376  if (mime_f && mimebuf_f==FIXED_MIME) {
5379  }
5380  if (broken_f & 1) {
5383  }
5384  if (input_encoding) {
5385  set_iconv(-TRUE, nkf_enc_to_iconv(input_encoding));
5386  } else {
5388  }
5389 
5390  {
5391  struct input_code *p = input_code_list;
5392  while (p->name){
5393  status_reinit(p++);
5394  }
5395  }
5396  return 0;
5397 }
5398 
5399 /*
5400  Conversion main loop. Code detection only.
5401  */
5402 
5403 #if !defined(PERL_XS) && !defined(WIN32DLL)
5404 static nkf_char
5406 {
5407  nkf_char c;
5408 
5409  if (nop_f == 2)
5411  while ((c = (*i_getc)(f)) != EOF)
5412  (*o_putc)(c);
5413  (*o_putc)(EOF);
5414  return 1;
5415 }
5416 #endif
5417 
5418 #define NEXT continue /* no output, get next */
5419 #define SKIP c2=0;continue /* no output, get next */
5420 #define MORE c2=c1;continue /* need one more byte */
5421 #define SEND (void)0 /* output c1 and c2, get next */
5422 #define LAST break /* end of loop, go closing */
5423 #define set_input_mode(mode) do { \
5424  input_mode = mode; \
5425  shift_mode = 0; \
5426  set_input_codename("ISO-2022-JP"); \
5427  debug("ISO-2022-JP"); \
5428 } while (0)
5429 
5430 static int
5432 {
5433  nkf_char c1=0, c2=0, c3=0, c4=0;
5434  int shift_mode = 0; /* 0, 1, 2, 3 */
5435  int g2 = 0;
5436  int is_8bit = FALSE;
5437 
5438  if (input_encoding && !nkf_enc_asciicompat(input_encoding)) {
5439  is_8bit = TRUE;
5440  }
5441 
5442  input_mode = ASCII;
5443  output_mode = ASCII;
5444 
5445  if (module_connection() < 0) {
5446 #if !defined(PERL_XS) && !defined(WIN32DLL)
5447  fprintf(stderr, "no output encoding given\n");
5448 #endif
5449  return -1;
5450  }
5451  check_bom(f);
5452 
5453 #ifdef UTF8_INPUT_ENABLE
5454  if(iconv == w_iconv32){
5455  while ((c1 = (*i_getc)(f)) != EOF &&
5456  (c2 = (*i_getc)(f)) != EOF &&
5457  (c3 = (*i_getc)(f)) != EOF &&
5458  (c4 = (*i_getc)(f)) != EOF) {
5459  nkf_iconv_utf_32(c1, c2, c3, c4);
5460  }
5461  goto finished;
5462  }
5463  else if (iconv == w_iconv16) {
5464  while ((c1 = (*i_getc)(f)) != EOF &&
5465  (c2 = (*i_getc)(f)) != EOF) {
5466  if (nkf_iconv_utf_16(c1, c2, 0, 0) == NKF_ICONV_NEED_TWO_MORE_BYTES &&
5467  (c3 = (*i_getc)(f)) != EOF &&
5468  (c4 = (*i_getc)(f)) != EOF) {
5469  nkf_iconv_utf_16(c1, c2, c3, c4);
5470  }
5471  }
5472  goto finished;
5473  }
5474 #endif
5475 
5476  while ((c1 = (*i_getc)(f)) != EOF) {
5477 #ifdef INPUT_CODE_FIX
5478  if (!input_encoding)
5479 #endif
5480  code_status(c1);
5481  if (c2) {
5482  /* second byte */
5483  if (c2 > DEL) {
5484  /* in case of 8th bit is on */
5485  if (!estab_f&&!mime_decode_mode) {
5486  /* in case of not established yet */
5487  /* It is still ambiguious */
5488  if (h_conv(f, c2, c1)==EOF) {
5489  LAST;
5490  }
5491  else {
5492  SKIP;
5493  }
5494  }
5495  else {
5496  /* in case of already established */
5497  if (c1 < 0x40) {
5498  /* ignore bogus code */
5499  SKIP;
5500  } else {
5501  SEND;
5502  }
5503  }
5504  }
5505  else {
5506  /* 2nd byte of 7 bit code or SJIS */
5507  SEND;
5508  }
5509  }
5510  else if (nkf_char_unicode_p(c1)) {
5511  (*oconv)(0, c1);
5512  NEXT;
5513  }
5514  else {
5515  /* first byte */
5516  if (input_mode == JIS_X_0208 && DEL <= c1 && c1 < 0x92) {
5517  /* CP5022x */
5518  MORE;
5519  }else if (input_codename && input_codename[0] == 'I' &&
5520  0xA1 <= c1 && c1 <= 0xDF) {
5521  /* JIS X 0201 Katakana in 8bit JIS */
5522  c2 = JIS_X_0201_1976_K;
5523  c1 &= 0x7f;
5524  SEND;
5525  } else if (c1 > DEL) {
5526  /* 8 bit code */
5527  if (!estab_f && !iso8859_f) {
5528  /* not established yet */
5529  MORE;
5530  } else { /* estab_f==TRUE */
5531  if (iso8859_f) {
5532  c2 = ISO_8859_1;
5533  c1 &= 0x7f;
5534  SEND;
5535  }
5536  else if ((iconv == s_iconv && 0xA0 <= c1 && c1 <= 0xDF) ||
5537  (ms_ucs_map_f == UCS_MAP_CP10001 && (c1 == 0xFD || c1 == 0xFE))) {
5538  /* JIS X 0201 */
5539  c2 = JIS_X_0201_1976_K;
5540  c1 &= 0x7f;
5541  SEND;
5542  }
5543  else {
5544  /* already established */
5545  MORE;
5546  }
5547  }
5548  } else if (SP < c1 && c1 < DEL) {
5549  /* in case of Roman characters */
5550  if (shift_mode) {
5551  /* output 1 shifted byte */
5552  if (iso8859_f) {
5553  c2 = ISO_8859_1;
5554  SEND;
5555  } else if (nkf_byte_jisx0201_katakana_p(c1)){
5556  /* output 1 shifted byte */
5557  c2 = JIS_X_0201_1976_K;
5558  SEND;
5559  } else {
5560  /* look like bogus code */
5561  SKIP;
5562  }
5563  } else if (input_mode == JIS_X_0208 || input_mode == JIS_X_0212 ||
5565  /* in case of Kanji shifted */
5566  MORE;
5567  } else if (c1 == '=' && mime_f && !mime_decode_mode) {
5568  /* Check MIME code */
5569  if ((c1 = (*i_getc)(f)) == EOF) {
5570  (*oconv)(0, '=');
5571  LAST;
5572  } else if (c1 == '?') {
5573  /* =? is mime conversion start sequence */
5574  if(mime_f == STRICT_MIME) {
5575  /* check in real detail */
5576  if (mime_begin_strict(f) == EOF)
5577  LAST;
5578  SKIP;
5579  } else if (mime_begin(f) == EOF)
5580  LAST;
5581  SKIP;
5582  } else {
5583  (*oconv)(0, '=');
5584  (*i_ungetc)(c1,f);
5585  SKIP;
5586  }
5587  } else {
5588  /* normal ASCII code */
5589  SEND;
5590  }
5591  } else if (c1 == SI && (!is_8bit || mime_decode_mode)) {
5592  shift_mode = 0;
5593  SKIP;
5594  } else if (c1 == SO && (!is_8bit || mime_decode_mode)) {
5595  shift_mode = 1;
5596  SKIP;
5597  } else if (c1 == ESC && (!is_8bit || mime_decode_mode)) {
5598  if ((c1 = (*i_getc)(f)) == EOF) {
5599  (*oconv)(0, ESC);
5600  LAST;
5601  }
5602  else if (c1 == '&') {
5603  /* IRR */
5604  if ((c1 = (*i_getc)(f)) == EOF) {
5605  LAST;
5606  } else {
5607  SKIP;
5608  }
5609  }
5610  else if (c1 == '$') {
5611  /* GZDMx */
5612  if ((c1 = (*i_getc)(f)) == EOF) {
5613  /* don't send bogus code
5614  (*oconv)(0, ESC);
5615  (*oconv)(0, '$'); */
5616  LAST;
5617  } else if (c1 == '@' || c1 == 'B') {
5618  /* JIS X 0208 */
5620  SKIP;
5621  } else if (c1 == '(') {
5622  /* GZDM4 */
5623  if ((c1 = (*i_getc)(f)) == EOF) {
5624  /* don't send bogus code
5625  (*oconv)(0, ESC);
5626  (*oconv)(0, '$');
5627  (*oconv)(0, '(');
5628  */
5629  LAST;
5630  } else if (c1 == '@'|| c1 == 'B') {
5631  /* JIS X 0208 */
5633  SKIP;
5634 #ifdef X0212_ENABLE
5635  } else if (c1 == 'D'){
5637  SKIP;
5638 #endif /* X0212_ENABLE */
5639  } else if (c1 == 'O' || c1 == 'Q'){
5641  SKIP;
5642  } else if (c1 == 'P'){
5644  SKIP;
5645  } else {
5646  /* could be some special code */
5647  (*oconv)(0, ESC);
5648  (*oconv)(0, '$');
5649  (*oconv)(0, '(');
5650  (*oconv)(0, c1);
5651  SKIP;
5652  }
5653  } else if (broken_f&0x2) {
5654  /* accept any ESC-(-x as broken code ... */
5656  shift_mode = 0;
5657  SKIP;
5658  } else {
5659  (*oconv)(0, ESC);
5660  (*oconv)(0, '$');
5661  (*oconv)(0, c1);
5662  SKIP;
5663  }
5664  } else if (c1 == '(') {
5665  /* GZD4 */
5666  if ((c1 = (*i_getc)(f)) == EOF) {
5667  /* don't send bogus code
5668  (*oconv)(0, ESC);
5669  (*oconv)(0, '('); */
5670  LAST;
5671  }
5672  else if (c1 == 'I') {
5673  /* JIS X 0201 Katakana */
5675  SKIP;
5676  }
5677  else if (c1 == 'B' || c1 == 'J' || c1 == 'H') {
5678  /* ISO-646IRV:1983 or JIS X 0201 Roman or JUNET */
5680  SKIP;
5681  }
5682  else if (broken_f&0x2) {
5684  SKIP;
5685  }
5686  else {
5687  (*oconv)(0, ESC);
5688  (*oconv)(0, '(');
5689  SEND;
5690  }
5691  }
5692  else if (c1 == '.') {
5693  /* G2D6 */
5694  if ((c1 = (*i_getc)(f)) == EOF) {
5695  LAST;
5696  }
5697  else if (c1 == 'A') {
5698  /* ISO-8859-1 */
5699  g2 = ISO_8859_1;
5700  SKIP;
5701  }
5702  else {
5703  (*oconv)(0, ESC);
5704  (*oconv)(0, '.');
5705  SEND;
5706  }
5707  }
5708  else if (c1 == 'N') {
5709  /* SS2 */
5710  c1 = (*i_getc)(f);
5711  if (g2 == ISO_8859_1) {
5712  c2 = ISO_8859_1;
5713  SEND;
5714  }else{
5715  (*i_ungetc)(c1, f);
5716  /* lonely ESC */
5717  (*oconv)(0, ESC);
5718  SEND;
5719  }
5720  }
5721  else {
5722  /* lonely ESC */
5723  (*oconv)(0, ESC);
5724  SEND;
5725  }
5726  } else if (c1 == ESC && iconv == s_iconv) {
5727  /* ESC in Shift_JIS */
5728  if ((c1 = (*i_getc)(f)) == EOF) {
5729  (*oconv)(0, ESC);
5730  LAST;
5731  } else if (c1 == '$') {
5732  /* J-PHONE emoji */
5733  if ((c1 = (*i_getc)(f)) == EOF) {
5734  LAST;
5735  } else if (('E' <= c1 && c1 <= 'G') ||
5736  ('O' <= c1 && c1 <= 'Q')) {
5737  /*
5738  NUM : 0 1 2 3 4 5
5739  BYTE: G E F O P Q
5740  C%7 : 1 6 0 2 3 4
5741  C%7 : 0 1 2 3 4 5 6
5742  NUM : 2 0 3 4 5 X 1
5743  */
5744  static const nkf_char jphone_emoji_first_table[7] =
5745  {0xE1E0, 0xDFE0, 0xE2E0, 0xE3E0, 0xE4E0, 0xDFE0, 0xE0E0};
5746  c3 = nkf_char_unicode_new(jphone_emoji_first_table[c1 % 7]);
5747  if ((c1 = (*i_getc)(f)) == EOF) LAST;
5748  while (SP <= c1 && c1 <= 'z') {
5749  (*oconv)(0, c1 + c3);
5750  if ((c1 = (*i_getc)(f)) == EOF) LAST;
5751  }
5752  SKIP;
5753  }
5754  else {
5755  (*oconv)(0, ESC);
5756  (*oconv)(0, '$');
5757  SEND;
5758  }
5759  }
5760  else {
5761  /* lonely ESC */
5762  (*oconv)(0, ESC);
5763  SEND;
5764  }
5765  } else if (c1 == LF || c1 == CR) {
5766  if (broken_f&4) {
5768  SEND;
5769  } else if (mime_decode_f && !mime_decode_mode){
5770  if (c1 == LF) {
5771  if ((c1=(*i_getc)(f))!=EOF && c1 == SP) {
5772  i_ungetc(SP,f);
5773  continue;
5774  } else {
5775  i_ungetc(c1,f);
5776  }
5777  c1 = LF;
5778  SEND;
5779  } else { /* if (c1 == CR)*/
5780  if ((c1=(*i_getc)(f))!=EOF) {
5781  if (c1==SP) {
5782  i_ungetc(SP,f);
5783  continue;
5784  } else if (c1 == LF && (c1=(*i_getc)(f))!=EOF && c1 == SP) {
5785  i_ungetc(SP,f);
5786  continue;
5787  } else {
5788  i_ungetc(c1,f);
5789  }
5790  i_ungetc(LF,f);
5791  } else {
5792  i_ungetc(c1,f);
5793  }
5794  c1 = CR;
5795  SEND;
5796  }
5797  }
5798  } else
5799  SEND;
5800  }
5801  /* send: */
5802  switch(input_mode){
5803  case ASCII:
5804  switch ((*iconv)(c2, c1, 0)) { /* can be EUC / SJIS / UTF-8 */
5805  case -2:
5806  /* 4 bytes UTF-8 */
5807  if ((c3 = (*i_getc)(f)) != EOF) {
5808  code_status(c3);
5809  c3 <<= 8;
5810  if ((c4 = (*i_getc)(f)) != EOF) {
5811  code_status(c4);
5812  (*iconv)(c2, c1, c3|c4);
5813  }
5814  }
5815  break;
5816  case -1:
5817  /* 3 bytes EUC or UTF-8 */
5818  if ((c3 = (*i_getc)(f)) != EOF) {
5819  code_status(c3);
5820  (*iconv)(c2, c1, c3);
5821  }
5822  break;
5823  }
5824  break;
5825  case JIS_X_0208:
5826  case JIS_X_0213_1:
5827  if (ms_ucs_map_f &&
5828  0x7F <= c2 && c2 <= 0x92 &&
5829  0x21 <= c1 && c1 <= 0x7E) {
5830  /* CP932 UDC */
5831  c1 = nkf_char_unicode_new((c2 - 0x7F) * 94 + c1 - 0x21 + 0xE000);
5832  c2 = 0;
5833  }
5834  (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */
5835  break;
5836 #ifdef X0212_ENABLE
5837  case JIS_X_0212:
5838  (*oconv)(PREFIX_EUCG3 | c2, c1);
5839  break;
5840 #endif /* X0212_ENABLE */
5841  case JIS_X_0213_2:
5842  (*oconv)(PREFIX_EUCG3 | c2, c1);
5843  break;
5844  default:
5845  (*oconv)(input_mode, c1); /* other special case */
5846  }
5847 
5848  c2 = 0;
5849  c3 = 0;
5850  continue;
5851  /* goto next_word */
5852  }
5853 
5854 finished:
5855  /* epilogue */
5856  (*iconv)(EOF, 0, 0);
5857  if (!input_codename)
5858  {
5859  if (is_8bit) {
5860  struct input_code *p = input_code_list;
5861  struct input_code *result = p;
5862  while (p->name){
5863  if (p->score < result->score) result = p;
5864  ++p;
5865  }
5866  set_input_codename(result->name);
5867 #ifdef CHECK_OPTION
5868  debug(result->name);
5869 #endif
5870  }
5871  }
5872  return 0;
5873 }
5874 
5875 /*
5876  * int options(unsigned char *cp)
5877  *
5878  * return values:
5879  * 0: success
5880  * -1: ArgumentError
5881  */
5882 static int
5883 options(unsigned char *cp)
5884 {
5885  nkf_char i, j;
5886  unsigned char *p;
5887  unsigned char *cp_back = NULL;
5888  nkf_encoding *enc;
5889 
5890  if (option_mode==1)
5891  return 0;
5892  while(*cp && *cp++!='-');
5893  while (*cp || cp_back) {
5894  if(!*cp){
5895  cp = cp_back;
5896  cp_back = NULL;
5897  continue;
5898  }
5899  p = 0;
5900  switch (*cp++) {
5901  case '-': /* literal options */
5902  if (!*cp || *cp == SP) { /* ignore the rest of arguments */
5903  option_mode = 1;
5904  return 0;
5905  }
5906  for (i=0;i<(int)(sizeof(long_option)/sizeof(long_option[0]));i++) {
5907  p = (unsigned char *)long_option[i].name;
5908  for (j=0;*p && *p != '=' && *p == cp[j];p++, j++);
5909  if (*p == cp[j] || cp[j] == SP){
5910  p = &cp[j] + 1;
5911  break;
5912  }
5913  p = 0;
5914  }
5915  if (p == 0) {
5916 #if !defined(PERL_XS) && !defined(WIN32DLL)
5917  fprintf(stderr, "unknown long option: --%s\n", cp);
5918 #endif
5919  return -1;
5920  }
5921  while(*cp && *cp != SP && cp++);
5922  if (long_option[i].alias[0]){
5923  cp_back = cp;
5924  cp = (unsigned char *)long_option[i].alias;
5925  }else{
5926 #ifndef PERL_XS
5927  if (strcmp(long_option[i].name, "help") == 0){
5928  usage();
5929  exit(EXIT_SUCCESS);
5930  }
5931 #endif
5932  if (strcmp(long_option[i].name, "ic=") == 0){
5933  enc = nkf_enc_find((char *)p);
5934  if (!enc) continue;
5935  input_encoding = enc;
5936  continue;
5937  }
5938  if (strcmp(long_option[i].name, "oc=") == 0){
5939  enc = nkf_enc_find((char *)p);
5940  /* if (enc <= 0) continue; */
5941  if (!enc) continue;
5942  output_encoding = enc;
5943  continue;
5944  }
5945  if (strcmp(long_option[i].name, "guess=") == 0){
5946  if (p[0] == '0' || p[0] == '1') {
5947  guess_f = 1;
5948  } else {
5949  guess_f = 2;
5950  }
5951  continue;
5952  }
5953 #ifdef OVERWRITE
5954  if (strcmp(long_option[i].name, "overwrite") == 0){
5955  file_out_f = TRUE;
5956  overwrite_f = TRUE;
5958  continue;
5959  }
5960  if (strcmp(long_option[i].name, "overwrite=") == 0){
5961  file_out_f = TRUE;
5962  overwrite_f = TRUE;
5964  backup_f = TRUE;
5965  backup_suffix = (char *)p;
5966  continue;
5967  }
5968  if (strcmp(long_option[i].name, "in-place") == 0){
5969  file_out_f = TRUE;
5970  overwrite_f = TRUE;
5972  continue;
5973  }
5974  if (strcmp(long_option[i].name, "in-place=") == 0){
5975  file_out_f = TRUE;
5976  overwrite_f = TRUE;
5978  backup_f = TRUE;
5979  backup_suffix = (char *)p;
5980  continue;
5981  }
5982 #endif
5983 #ifdef INPUT_OPTION
5984  if (strcmp(long_option[i].name, "cap-input") == 0){
5985  cap_f = TRUE;
5986  continue;
5987  }
5988  if (strcmp(long_option[i].name, "url-input") == 0){
5989  url_f = TRUE;
5990  continue;
5991  }
5992 #endif
5993 #ifdef NUMCHAR_OPTION
5994  if (strcmp(long_option[i].name, "numchar-input") == 0){
5995  numchar_f = TRUE;
5996  continue;
5997  }
5998 #endif
5999 #ifdef CHECK_OPTION
6000  if (strcmp(long_option[i].name, "no-output") == 0){
6001  noout_f = TRUE;
6002  continue;
6003  }
6004  if (strcmp(long_option[i].name, "debug") == 0){
6005  debug_f = TRUE;
6006  continue;
6007  }
6008 #endif
6009  if (strcmp(long_option[i].name, "cp932") == 0){
6010 #ifdef SHIFTJIS_CP932
6011  cp51932_f = TRUE;
6012  cp932inv_f = -TRUE;
6013 #endif
6014 #ifdef UTF8_OUTPUT_ENABLE
6016 #endif
6017  continue;
6018  }
6019  if (strcmp(long_option[i].name, "no-cp932") == 0){
6020 #ifdef SHIFTJIS_CP932
6021  cp51932_f = FALSE;
6022  cp932inv_f = FALSE;
6023 #endif
6024 #ifdef UTF8_OUTPUT_ENABLE
6026 #endif
6027  continue;
6028  }
6029 #ifdef SHIFTJIS_CP932
6030  if (strcmp(long_option[i].name, "cp932inv") == 0){
6031  cp932inv_f = -TRUE;
6032  continue;
6033  }
6034 #endif
6035 
6036 #ifdef X0212_ENABLE
6037  if (strcmp(long_option[i].name, "x0212") == 0){
6038  x0212_f = TRUE;
6039  continue;
6040  }
6041 #endif
6042 
6043 #ifdef EXEC_IO
6044  if (strcmp(long_option[i].name, "exec-in") == 0){
6045  exec_f = 1;
6046  return 0;
6047  }
6048  if (strcmp(long_option[i].name, "exec-out") == 0){
6049  exec_f = -1;
6050  return 0;
6051  }
6052 #endif
6053 #if defined(UTF8_OUTPUT_ENABLE) && defined(UTF8_INPUT_ENABLE)
6054  if (strcmp(long_option[i].name, "no-cp932ext") == 0){
6055  no_cp932ext_f = TRUE;
6056  continue;
6057  }
6058  if (strcmp(long_option[i].name, "no-best-fit-chars") == 0){
6060  continue;
6061  }
6062  if (strcmp(long_option[i].name, "fb-skip") == 0){
6064  continue;
6065  }
6066  if (strcmp(long_option[i].name, "fb-html") == 0){
6068  continue;
6069  }
6070  if (strcmp(long_option[i].name, "fb-xml") == 0){
6072  continue;
6073  }
6074  if (strcmp(long_option[i].name, "fb-java") == 0){
6076  continue;
6077  }
6078  if (strcmp(long_option[i].name, "fb-perl") == 0){
6080  continue;
6081  }
6082  if (strcmp(long_option[i].name, "fb-subchar") == 0){
6084  continue;
6085  }
6086  if (strcmp(long_option[i].name, "fb-subchar=") == 0){
6088  unicode_subchar = 0;
6089  if (p[0] != '0'){
6090  /* decimal number */
6091  for (i = 0; i < 7 && nkf_isdigit(p[i]); i++){
6092  unicode_subchar *= 10;
6093  unicode_subchar += hex2bin(p[i]);
6094  }
6095  }else if(p[1] == 'x' || p[1] == 'X'){
6096  /* hexadecimal number */
6097  for (i = 2; i < 8 && nkf_isxdigit(p[i]); i++){
6098  unicode_subchar <<= 4;
6099  unicode_subchar |= hex2bin(p[i]);
6100  }
6101  }else{
6102  /* octal number */
6103  for (i = 1; i < 8 && nkf_isoctal(p[i]); i++){
6104  unicode_subchar *= 8;
6105  unicode_subchar += hex2bin(p[i]);
6106  }
6107  }
6108  w16e_conv(unicode_subchar, &i, &j);
6109  unicode_subchar = i<<8 | j;
6110  continue;
6111  }
6112 #endif
6113 #ifdef UTF8_OUTPUT_ENABLE
6114  if (strcmp(long_option[i].name, "ms-ucs-map") == 0){
6116  continue;
6117  }
6118 #endif
6119 #ifdef UNICODE_NORMALIZATION
6120  if (strcmp(long_option[i].name, "utf8mac-input") == 0){
6121  nfc_f = TRUE;
6122  continue;
6123  }
6124 #endif
6125  if (strcmp(long_option[i].name, "prefix=") == 0){
6126  if (nkf_isgraph(p[0])){
6127  for (i = 1; nkf_isgraph(p[i]); i++){
6128  prefix_table[p[i]] = p[0];
6129  }
6130  }
6131  continue;
6132  }
6133 #if !defined(PERL_XS) && !defined(WIN32DLL)
6134  fprintf(stderr, "unsupported long option: --%s\n", long_option[i].name);
6135 #endif
6136  return -1;
6137  }
6138  continue;
6139  case 'b': /* buffered mode */
6140  unbuf_f = FALSE;
6141  continue;
6142  case 'u': /* non bufferd mode */
6143  unbuf_f = TRUE;
6144  continue;
6145  case 't': /* transparent mode */
6146  if (*cp=='1') {
6147  /* alias of -t */
6148  cp++;
6149  nop_f = TRUE;
6150  } else if (*cp=='2') {
6151  /*
6152  * -t with put/get
6153  *
6154  * nkf -t2MB hoge.bin | nkf -t2mB | diff -s - hoge.bin
6155  *
6156  */
6157  cp++;
6158  nop_f = 2;
6159  } else
6160  nop_f = TRUE;
6161  continue;
6162  case 'j': /* JIS output */
6163  case 'n':
6164  output_encoding = nkf_enc_from_index(ISO_2022_JP);
6165  continue;
6166  case 'e': /* AT&T EUC output */
6167  output_encoding = nkf_enc_from_index(EUCJP_NKF);
6168  continue;
6169  case 's': /* SJIS output */
6170  output_encoding = nkf_enc_from_index(SHIFT_JIS);
6171  continue;
6172  case 'l': /* ISO8859 Latin-1 support, no conversion */
6173  iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */
6174  input_encoding = nkf_enc_from_index(ISO_8859_1);
6175  continue;
6176  case 'i': /* Kanji IN ESC-$-@/B */
6177  if (*cp=='@'||*cp=='B')
6178  kanji_intro = *cp++;
6179  continue;
6180  case 'o': /* ASCII IN ESC-(-J/B/H */
6181  /* ESC ( H was used in initial JUNET messages */
6182  if (*cp=='J'||*cp=='B'||*cp=='H')
6183  ascii_intro = *cp++;
6184  continue;
6185  case 'h':
6186  /*
6187  bit:1 katakana->hiragana
6188  bit:2 hiragana->katakana
6189  */
6190  if ('9'>= *cp && *cp>='0')
6191  hira_f |= (*cp++ -'0');
6192  else
6193  hira_f |= 1;
6194  continue;
6195  case 'r':
6196  rot_f = TRUE;
6197  continue;
6198 #if defined(MSDOS) || defined(__OS2__)
6199  case 'T':
6200  binmode_f = FALSE;
6201  continue;
6202 #endif
6203 #ifndef PERL_XS
6204  case 'V':
6206  exit(EXIT_SUCCESS);
6207  break;
6208  case 'v':
6209  version();
6210  exit(EXIT_SUCCESS);
6211  break;
6212 #endif
6213 #ifdef UTF8_OUTPUT_ENABLE
6214  case 'w': /* UTF-{8,16,32} output */
6215  if (cp[0] == '8') {
6216  cp++;
6217  if (cp[0] == '0'){
6218  cp++;
6219  output_encoding = nkf_enc_from_index(UTF_8N);
6220  } else {
6221  output_bom_f = TRUE;
6222  output_encoding = nkf_enc_from_index(UTF_8_BOM);
6223  }
6224  } else {
6225  int enc_idx;
6226  if ('1'== cp[0] && '6'==cp[1]) {
6227  cp += 2;
6228  enc_idx = UTF_16;
6229  } else if ('3'== cp[0] && '2'==cp[1]) {
6230  cp += 2;
6231  enc_idx = UTF_32;
6232  } else {
6233  output_encoding = nkf_enc_from_index(UTF_8);
6234  continue;
6235  }
6236  if (cp[0]=='L') {
6237  cp++;
6239  output_bom_f = TRUE;
6240  } else if (cp[0] == 'B') {
6241  cp++;
6242  output_bom_f = TRUE;
6243  }
6244  if (cp[0] == '0'){
6245  output_bom_f = FALSE;
6246  cp++;
6247  enc_idx = enc_idx == UTF_16
6249  : (output_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE);
6250  } else {
6251  enc_idx = enc_idx == UTF_16
6253  : (output_endian == ENDIAN_LITTLE ? UTF_32LE_BOM : UTF_32BE_BOM);
6254  }
6255  output_encoding = nkf_enc_from_index(enc_idx);
6256  }
6257  continue;
6258 #endif
6259 #ifdef UTF8_INPUT_ENABLE
6260  case 'W': /* UTF input */
6261  if (cp[0] == '8') {
6262  cp++;
6263  input_encoding = nkf_enc_from_index(UTF_8);
6264  }else{
6265  int enc_idx;
6266  if ('1'== cp[0] && '6'==cp[1]) {
6267  cp += 2;
6269  enc_idx = UTF_16;
6270  } else if ('3'== cp[0] && '2'==cp[1]) {
6271  cp += 2;
6273  enc_idx = UTF_32;
6274  } else {
6275  input_encoding = nkf_enc_from_index(UTF_8);
6276  continue;
6277  }
6278  if (cp[0]=='L') {
6279  cp++;
6281  } else if (cp[0] == 'B') {
6282  cp++;
6284  }
6285  enc_idx = (enc_idx == UTF_16
6287  : (input_endian == ENDIAN_LITTLE ? UTF_32LE : UTF_32BE));
6288  input_encoding = nkf_enc_from_index(enc_idx);
6289  }
6290  continue;
6291 #endif
6292  /* Input code assumption */
6293  case 'J': /* ISO-2022-JP input */
6294  input_encoding = nkf_enc_from_index(ISO_2022_JP);
6295  continue;
6296  case 'E': /* EUC-JP input */
6297  input_encoding = nkf_enc_from_index(EUCJP_NKF);
6298  continue;
6299  case 'S': /* Shift_JIS input */
6300  input_encoding = nkf_enc_from_index(SHIFT_JIS);
6301  continue;
6302  case 'Z': /* Convert X0208 alphabet to asii */
6303  /* alpha_f
6304  bit:0 Convert JIS X 0208 Alphabet to ASCII
6305  bit:1 Convert Kankaku to one space
6306  bit:2 Convert Kankaku to two spaces
6307  bit:3 Convert HTML Entity
6308  bit:4 Convert JIS X 0208 Katakana to JIS X 0201 Katakana
6309  */
6310  while ('0'<= *cp && *cp <='4') {
6311  alpha_f |= 1 << (*cp++ - '0');
6312  }
6313  alpha_f |= 1;
6314  continue;
6315  case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */
6316  x0201_f = FALSE; /* No X0201->X0208 conversion */
6317  /* accept X0201
6318  ESC-(-I in JIS, EUC, MS Kanji
6319  SI/SO in JIS, EUC, MS Kanji
6320  SS2 in EUC, JIS, not in MS Kanji
6321  MS Kanji (0xa0-0xdf)
6322  output X0201
6323  ESC-(-I in JIS (0x20-0x5f)
6324  SS2 in EUC (0xa0-0xdf)
6325  0xa0-0xd in MS Kanji (0xa0-0xdf)
6326  */
6327  continue;
6328  case 'X': /* Convert X0201 kana to X0208 */
6329  x0201_f = TRUE;
6330  continue;
6331  case 'F': /* prserve new lines */
6333  case 'f': /* folding -f60 or -f */
6334  fold_f = TRUE;
6335  fold_len = 0;
6336  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6337  fold_len *= 10;
6338  fold_len += *cp++ - '0';
6339  }
6340  if (!(0<fold_len && fold_len<BUFSIZ))
6342  if (*cp=='-') {
6343  fold_margin = 0;
6344  cp++;
6345  while('0'<= *cp && *cp <='9') { /* we don't use atoi here */
6346  fold_margin *= 10;
6347  fold_margin += *cp++ - '0';
6348  }
6349  }
6350  continue;
6351  case 'm': /* MIME support */
6352  /* mime_decode_f = TRUE; */ /* this has too large side effects... */
6353  if (*cp=='B'||*cp=='Q') {
6354  mime_decode_mode = *cp++;
6356  } else if (*cp=='N') {
6357  mime_f = TRUE; cp++;
6358  } else if (*cp=='S') {
6359  mime_f = STRICT_MIME; cp++;
6360  } else if (*cp=='0') {
6361  mime_decode_f = FALSE;
6362  mime_f = FALSE; cp++;
6363  } else {
6364  mime_f = STRICT_MIME;
6365  }
6366  continue;
6367  case 'M': /* MIME output */
6368  if (*cp=='B') {
6369  mimeout_mode = 'B';
6370  mimeout_f = FIXED_MIME; cp++;
6371  } else if (*cp=='Q') {
6372  mimeout_mode = 'Q';
6373  mimeout_f = FIXED_MIME; cp++;
6374  } else {
6375  mimeout_f = TRUE;
6376  }
6377  continue;
6378  case 'B': /* Broken JIS support */
6379  /* bit:0 no ESC JIS
6380  bit:1 allow any x on ESC-(-x or ESC-$-x
6381  bit:2 reset to ascii on NL
6382  */
6383  if ('9'>= *cp && *cp>='0')
6384  broken_f |= 1<<(*cp++ -'0');
6385  else
6386  broken_f |= TRUE;
6387  continue;
6388 #ifndef PERL_XS
6389  case 'O':/* for Output file */
6390  file_out_f = TRUE;
6391  continue;
6392 #endif
6393  case 'c':/* add cr code */
6394  eolmode_f = CRLF;
6395  continue;
6396  case 'd':/* delete cr code */
6397  eolmode_f = LF;
6398  continue;
6399  case 'I': /* ISO-2022-JP output */
6400  iso2022jp_f = TRUE;
6401  continue;
6402  case 'L': /* line mode */
6403  if (*cp=='u') { /* unix */
6404  eolmode_f = LF; cp++;
6405  } else if (*cp=='m') { /* mac */
6406  eolmode_f = CR; cp++;
6407  } else if (*cp=='w') { /* windows */
6408  eolmode_f = CRLF; cp++;
6409  } else if (*cp=='0') { /* no conversion */
6410  eolmode_f = 0; cp++;
6411  }
6412  continue;
6413 #ifndef PERL_XS
6414  case 'g':
6415  if ('2' <= *cp && *cp <= '9') {
6416  guess_f = 2;
6417  cp++;
6418  } else if (*cp == '0' || *cp == '1') {
6419  guess_f = 1;
6420  cp++;
6421  } else {
6422  guess_f = 1;
6423  }
6424  continue;
6425 #endif
6426  case SP:
6427  /* module muliple options in a string are allowed for Perl moudle */
6428  while(*cp && *cp++!='-');
6429  continue;
6430  default:
6431 #if !defined(PERL_XS) && !defined(WIN32DLL)
6432  fprintf(stderr, "unknown option: -%c\n", *(cp-1));
6433 #endif
6434  /* bogus option but ignored */
6435  return -1;
6436  }
6437  }
6438  return 0;
6439 }
6440 
6441 #ifdef WIN32DLL
6442 #include "nkf32dll.c"
6443 #elif defined(PERL_XS)
6444 #else /* WIN32DLL */
6445 int
6446 main(int argc, char **argv)
6447 {
6448  FILE *fin;
6449  unsigned char *cp;
6450 
6451  char *outfname = NULL;
6452  char *origfname;
6453 
6454 #ifdef EASYWIN /*Easy Win */
6455  _BufferSize.y = 400;/*Set Scroll Buffer Size*/
6456 #endif
6457 #ifdef DEFAULT_CODE_LOCALE
6458  setlocale(LC_CTYPE, "");
6459 #endif
6460  nkf_state_init();
6461 
6462  for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) {
6463  cp = (unsigned char *)*argv;
6464  options(cp);
6465 #ifdef EXEC_IO
6466  if (exec_f){
6467  int fds[2], pid;
6468  if (pipe(fds) < 0 || (pid = fork()) < 0){
6469  abort();
6470  }
6471  if (pid == 0){
6472  if (exec_f > 0){
6473  close(fds[0]);
6474  dup2(fds[1], 1);
6475  }else{
6476  close(fds[1]);
6477  dup2(fds[0], 0);
6478  }
6479  execvp(argv[1], &argv[1]);
6480  }
6481  if (exec_f > 0){
6482  close(fds[1]);
6483  dup2(fds[0], 0);
6484  }else{
6485  close(fds[0]);
6486  dup2(fds[1], 1);
6487  }
6488  argc = 0;
6489  break;
6490  }
6491 #endif
6492  }
6493 
6494  if (guess_f) {
6495 #ifdef CHECK_OPTION
6496  int debug_f_back = debug_f;
6497 #endif
6498 #ifdef EXEC_IO
6499  int exec_f_back = exec_f;
6500 #endif
6501 #ifdef X0212_ENABLE
6502  int x0212_f_back = x0212_f;
6503 #endif
6504  int x0213_f_back = x0213_f;
6505  int guess_f_back = guess_f;
6506  reinit();
6507  guess_f = guess_f_back;
6508  mime_f = FALSE;
6509 #ifdef CHECK_OPTION
6510  debug_f = debug_f_back;
6511 #endif
6512 #ifdef EXEC_IO
6513  exec_f = exec_f_back;
6514 #endif
6515  x0212_f = x0212_f_back;
6516  x0213_f = x0213_f_back;
6517  }
6518 
6519  if (binmode_f == TRUE)
6520 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6521  if (freopen("","wb",stdout) == NULL)
6522  return (-1);
6523 #else
6524  setbinmode(stdout);
6525 #endif
6526 
6527  if (unbuf_f)
6528  setbuf(stdout, (char *) NULL);
6529  else
6530  setvbuffer(stdout, (char *) stdobuf, IOBUF_SIZE);
6531 
6532  if (argc == 0) {
6533  if (binmode_f == TRUE)
6534 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6535  if (freopen("","rb",stdin) == NULL) return (-1);
6536 #else
6537  setbinmode(stdin);
6538 #endif
6539  setvbuffer(stdin, (char *) stdibuf, IOBUF_SIZE);
6540  if (nop_f)
6541  noconvert(stdin);
6542  else {
6543  kanji_convert(stdin);
6545  }
6546  } else {
6547  int nfiles = argc;
6548  int is_argument_error = FALSE;
6549  while (argc--) {
6550  input_codename = NULL;
6551  input_eol = 0;
6552 #ifdef CHECK_OPTION
6553  iconv_for_check = 0;
6554 #endif
6555  if ((fin = fopen((origfname = *argv++), "r")) == NULL) {
6556  perror(*(argv-1));
6557  is_argument_error = TRUE;
6558  continue;
6559  } else {
6560 #ifdef OVERWRITE
6561  int fd = 0;
6562  int fd_backup = 0;
6563 #endif
6564 
6565  /* reopen file for stdout */
6566  if (file_out_f == TRUE) {
6567 #ifdef OVERWRITE
6568  if (overwrite_f){
6569  outfname = nkf_xmalloc(strlen(origfname)
6570  + strlen(".nkftmpXXXXXX")
6571  + 1);
6572  strcpy(outfname, origfname);
6573 #ifdef MSDOS
6574  {
6575  int i;
6576  for (i = strlen(outfname); i; --i){
6577  if (outfname[i - 1] == '/'
6578  || outfname[i - 1] == '\\'){
6579  break;
6580  }
6581  }
6582  outfname[i] = '\0';
6583  }
6584  strcat(outfname, "ntXXXXXX");
6585  mktemp(outfname);
6586  fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL,
6587  S_IREAD | S_IWRITE);
6588 #else
6589  strcat(outfname, ".nkftmpXXXXXX");
6590  fd = mkstemp(outfname);
6591 #endif
6592  if (fd < 0
6593  || (fd_backup = dup(fileno(stdout))) < 0
6594  || dup2(fd, fileno(stdout)) < 0
6595  ){
6596  perror(origfname);
6597  return -1;
6598  }
6599  }else
6600 #endif
6601  if(argc == 1) {
6602  outfname = *argv++;
6603  argc--;
6604  } else {
6605  outfname = "nkf.out";
6606  }
6607 
6608  if(freopen(outfname, "w", stdout) == NULL) {
6609  perror (outfname);
6610  return (-1);
6611  }
6612  if (binmode_f == TRUE) {
6613 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6614  if (freopen("","wb",stdout) == NULL)
6615  return (-1);
6616 #else
6617  setbinmode(stdout);
6618 #endif
6619  }
6620  }
6621  if (binmode_f == TRUE)
6622 #if defined(__OS2__) && (defined(__IBMC__) || defined(__IBMCPP__))
6623  if (freopen("","rb",fin) == NULL)
6624  return (-1);
6625 #else
6626  setbinmode(fin);
6627 #endif
6628  setvbuffer(fin, (char *) stdibuf, IOBUF_SIZE);
6629  if (nop_f)
6630  noconvert(fin);
6631  else {
6632  char *filename = NULL;
6633  kanji_convert(fin);
6634  if (nfiles > 1) filename = origfname;
6635  if (guess_f) print_guessed_code(filename);
6636  }
6637  fclose(fin);
6638 #ifdef OVERWRITE
6639  if (overwrite_f) {
6640  struct stat sb;
6641 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6642  time_t tb[2];
6643 #else
6644  struct utimbuf tb;
6645 #endif
6646 
6647  fflush(stdout);
6648  close(fd);
6649  if (dup2(fd_backup, fileno(stdout)) < 0){
6650  perror("dup2");
6651  }
6652  if (stat(origfname, &sb)) {
6653  fprintf(stderr, "Can't stat %s\n", origfname);
6654  }
6655  /* $B%Q!<%_%C%7%g%s$rI|85(B */
6656  if (chmod(outfname, sb.st_mode)) {
6657  fprintf(stderr, "Can't set permission %s\n", outfname);
6658  }
6659 
6660  /* $B%?%$%`%9%?%s%W$rI|85(B */
6661  if(preserve_time_f){
6662 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) && !defined(__WATCOMC__) && !defined(__EMX__) && !defined(__OS2__) && !defined(__DJGPP__)
6663  tb[0] = tb[1] = sb.st_mtime;
6664  if (utime(outfname, tb)) {
6665  fprintf(stderr, "Can't set timestamp %s\n", outfname);
6666  }
6667 #else
6668  tb.actime = sb.st_atime;
6669  tb.modtime = sb.st_mtime;
6670  if (utime(outfname, &tb)) {
6671  fprintf(stderr, "Can't set timestamp %s\n", outfname);
6672  }
6673 #endif
6674  }
6675  if(backup_f){
6676  char *backup_filename = get_backup_filename(backup_suffix, origfname);
6677 #ifdef MSDOS
6678  unlink(backup_filename);
6679 #endif
6680  if (rename(origfname, backup_filename)) {
6681  perror(backup_filename);
6682  fprintf(stderr, "Can't rename %s to %s\n",
6683  origfname, backup_filename);
6684  }
6685  nkf_xfree(backup_filename);
6686  }else{
6687 #ifdef MSDOS
6688  if (unlink(origfname)){
6689  perror(origfname);
6690  }
6691 #endif
6692  }
6693  if (rename(outfname, origfname)) {
6694  perror(origfname);
6695  fprintf(stderr, "Can't rename %s to %s\n",
6696  outfname, origfname);
6697  }
6698  nkf_xfree(outfname);
6699  }
6700 #endif
6701  }
6702  }
6703  if (is_argument_error)
6704  return(-1);
6705  }
6706 #ifdef EASYWIN /*Easy Win */
6707  if (file_out_f == FALSE)
6708  scanf("%d",&end_check);
6709  else
6710  fclose(stdout);
6711 #else /* for Other OS */
6712  if (file_out_f == TRUE)
6713  fclose(stdout);
6714 #endif /*Easy Win */
6715  return (0);
6716 }
6717 #endif /* WIN32DLL */
6718