Open Chinese Convert
0.4.3
A project for conversion between Traditional and Simplified Chinese
|
00001 /* 00002 * Open Chinese Convert 00003 * 00004 * Copyright 2010-2013 BYVoid <byvoid@byvoid.com> 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the "License"); 00007 * you may not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * http://www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an "AS IS" BASIS, 00014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 #include "../opencc.h" 00020 #include "../utils.h" 00021 #include <getopt.h> 00022 #include <locale.h> 00023 #include <stdio.h> 00024 #include <stdlib.h> 00025 #include <string.h> 00026 00027 #ifndef VERSION 00028 #define VERSION "" 00029 #endif 00030 00031 #define BUFFER_SIZE 65536 00032 00033 void convert(const char* input_file, 00034 const char* output_file, 00035 const char* config_file) { 00036 opencc_t od = opencc_open(config_file); 00037 if (od == (opencc_t)-1) { 00038 opencc_perror(_("OpenCC initialization error")); 00039 exit(1); 00040 } 00041 FILE* fp = stdin; 00042 FILE* fpo = stdout; 00043 if (input_file) { 00044 fp = fopen(input_file, "r"); 00045 if (!fp) { 00046 fprintf(stderr, _("Can not read file: %s\n"), input_file); 00047 exit(1); 00048 } 00049 skip_utf8_bom(fp); 00050 } 00051 if (output_file) { 00052 fpo = fopen(output_file, "w"); 00053 if (!fpo) { 00054 fprintf(stderr, _("Can not write file: %s\n"), output_file); 00055 exit(1); 00056 } 00057 } 00058 size_t size = BUFFER_SIZE; 00059 char* buffer_in = NULL, * buffer_out = NULL; 00060 buffer_in = (char*)malloc(size * sizeof(char)); 00061 char* lookahead = (char*)malloc(size * sizeof(char)); 00062 size_t lookahead_size = 0; 00063 while (!feof(fp)) { 00064 size_t read; 00065 if (lookahead_size > 0) { 00066 memcpy(buffer_in, lookahead, lookahead_size); 00067 read = 00068 fread(buffer_in + lookahead_size, 1, size - lookahead_size, 00069 fp) + lookahead_size; 00070 lookahead_size = 0; 00071 } else { 00072 read = fread(buffer_in, 1, size, fp); 00073 } 00074 // If we haven't finished reading after filling the entire buffer, 00075 // then it could be that we broke within an UTF-8 character, in 00076 // that case we must backtrack and find the boundary 00077 if (read == size) { 00078 // Find the boundary of last UTF-8 character 00079 int i; 00080 for (i = read - 1; i >= 0; i--) { 00081 char c = buffer_in[i]; 00082 if (!(c & 0x80) || ((c & 0xC0) == 0xC0)) { 00083 break; 00084 } 00085 } 00086 assert(i >= 0); 00087 memcpy(lookahead, buffer_in + i, read - i); 00088 lookahead_size = read - i; 00089 buffer_in[i] = '\0'; 00090 } else { 00091 buffer_in[read] = '\0'; 00092 } 00093 buffer_out = opencc_convert_utf8(od, buffer_in, (size_t)-1); 00094 if (buffer_out != (char*)-1) { 00095 fprintf(fpo, "%s", buffer_out); 00096 opencc_convert_utf8_free(buffer_out); 00097 } else { 00098 opencc_perror(_("OpenCC error")); 00099 break; 00100 } 00101 } 00102 00103 if (lookahead_size > 0) { 00104 assert(lookahead_size < size); 00105 lookahead[lookahead_size] = '\0'; 00106 buffer_out = opencc_convert_utf8(od, lookahead, (size_t)-1); 00107 if (buffer_out != (char*)-1) { 00108 fprintf(fpo, "%s", buffer_out); 00109 opencc_convert_utf8_free(buffer_out); 00110 } else { 00111 opencc_perror(_("OpenCC error")); 00112 } 00113 } 00114 opencc_close(od); 00115 free(lookahead); 00116 free(buffer_in); 00117 fclose(fp); 00118 fclose(fpo); 00119 } 00120 00121 void show_version() { 00122 printf(_("\n")); 00123 printf(_("Open Chinese Convert (OpenCC) Command Line Tool\n")); 00124 printf(_("Version %s\n"), VERSION); 00125 printf(_("\n")); 00126 printf(_("Author: %s\n"), "BYVoid <byvoid@byvoid.com>"); 00127 printf(_("Bug Report: %s\n"), "http://github.com/BYVoid/OpenCC/issues"); 00128 printf(_("\n")); 00129 } 00130 00131 void show_usage() { 00132 show_version(); 00133 printf(_("Usage:\n")); 00134 printf(_(" opencc [Options]\n")); 00135 printf(_("\n")); 00136 printf(_("Options:\n")); 00137 printf(_(" -i [file], --input=[file] Read original text from [file].\n")); 00138 printf(_(" -o [file], --output=[file] Write converted text to [file].\n")); 00139 printf(_( 00140 " -c [file], --config=[file] Load configuration of conversion from [file].\n")); 00141 printf(_(" -v, --version Print version and build information.\n")); 00142 printf(_(" -h, --help Print this help.\n")); 00143 printf(_("\n")); 00144 printf(_( 00145 "With no input file, reads standard input and writes converted stream to standard output.\n")); 00146 printf(_( 00147 "Default configuration(%s) will be loaded if not set.\n"), 00148 OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); 00149 printf(_("\n")); 00150 } 00151 00152 int main(int argc, char** argv) { 00153 #ifdef ENABLE_GETTEXT 00154 setlocale(LC_ALL, ""); 00155 bindtextdomain(PACKAGE_NAME, LOCALEDIR); 00156 #endif /* ifdef ENABLE_GETTEXT */ 00157 static struct option longopts[] = 00158 { 00159 { "version", no_argument, NULL, 'v' }, 00160 { "help", no_argument, NULL, 'h' }, 00161 { "input", required_argument, NULL, 'i' }, 00162 { "output", required_argument, NULL, 'o' }, 00163 { "config", required_argument, NULL, 'c' }, 00164 { 0, 0, 0, 0 }, 00165 }; 00166 static int oc; 00167 static char* input_file, * output_file, * config_file; 00168 while ((oc = getopt_long(argc, argv, "vh?i:o:c:", longopts, NULL)) != -1) { 00169 switch (oc) { 00170 case 'v': 00171 show_version(); 00172 return 0; 00173 case 'h': 00174 case '?': 00175 show_usage(); 00176 return 0; 00177 case 'i': 00178 input_file = mstrcpy(optarg); 00179 break; 00180 case 'o': 00181 output_file = mstrcpy(optarg); 00182 break; 00183 case 'c': 00184 config_file = mstrcpy(optarg); 00185 break; 00186 } 00187 } 00188 if (config_file == NULL) { 00189 config_file = mstrcpy(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD); 00190 } 00191 convert(input_file, output_file, config_file); 00192 free(input_file); 00193 free(output_file); 00194 free(config_file); 00195 return 0; 00196 }