Open Chinese Convert  0.4.3
A project for conversion between Traditional and Simplified Chinese
/usr/src/RPM/BUILD/opencc-0.4.3/src/tools/opencc.c
00001 /*
00002  * Open Chinese Convert
00003  *
00004  * Copyright 2010-2013 BYVoid <byvoid@byvoid.com>
00005  *
00006  * Licensed under the Apache License, Version 2.0 (the "License");
00007  * you may not use this file except in compliance with the License.
00008  * You may obtain a copy of the License at
00009  *
00010  *      http://www.apache.org/licenses/LICENSE-2.0
00011  *
00012  * Unless required by applicable law or agreed to in writing, software
00013  * distributed under the License is distributed on an "AS IS" BASIS,
00014  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  * See the License for the specific language governing permissions and
00016  * limitations under the License.
00017  */
00018 
00019 #include "../opencc.h"
00020 #include "../utils.h"
00021 #include <getopt.h>
00022 #include <locale.h>
00023 #include <stdio.h>
00024 #include <stdlib.h>
00025 #include <string.h>
00026 
00027 #ifndef VERSION
00028 #define VERSION ""
00029 #endif
00030 
00031 #define BUFFER_SIZE 65536
00032 
00033 void convert(const char* input_file,
00034              const char* output_file,
00035              const char* config_file) {
00036   opencc_t od = opencc_open(config_file);
00037   if (od == (opencc_t)-1) {
00038     opencc_perror(_("OpenCC initialization error"));
00039     exit(1);
00040   }
00041   FILE* fp = stdin;
00042   FILE* fpo = stdout;
00043   if (input_file) {
00044     fp = fopen(input_file, "r");
00045     if (!fp) {
00046       fprintf(stderr, _("Can not read file: %s\n"), input_file);
00047       exit(1);
00048     }
00049     skip_utf8_bom(fp);
00050   }
00051   if (output_file) {
00052     fpo = fopen(output_file, "w");
00053     if (!fpo) {
00054       fprintf(stderr, _("Can not write file: %s\n"), output_file);
00055       exit(1);
00056     }
00057   }
00058   size_t size = BUFFER_SIZE;
00059   char* buffer_in = NULL, * buffer_out = NULL;
00060   buffer_in = (char*)malloc(size * sizeof(char));
00061   char* lookahead = (char*)malloc(size * sizeof(char));
00062   size_t lookahead_size = 0;
00063   while (!feof(fp)) {
00064     size_t read;
00065     if (lookahead_size > 0) {
00066       memcpy(buffer_in, lookahead, lookahead_size);
00067       read =
00068         fread(buffer_in + lookahead_size, 1, size - lookahead_size,
00069               fp) + lookahead_size;
00070       lookahead_size = 0;
00071     } else {
00072       read = fread(buffer_in, 1, size, fp);
00073     }
00074     // If we haven't finished reading after filling the entire buffer,
00075     // then it could be that we broke within an UTF-8 character, in
00076     // that case we must backtrack and find the boundary
00077     if (read == size) {
00078       // Find the boundary of last UTF-8 character
00079       int i;
00080       for (i = read - 1; i >= 0; i--) {
00081         char c = buffer_in[i];
00082         if (!(c & 0x80) || ((c & 0xC0) == 0xC0)) {
00083           break;
00084         }
00085       }
00086       assert(i >= 0);
00087       memcpy(lookahead, buffer_in + i, read - i);
00088       lookahead_size = read - i;
00089       buffer_in[i] = '\0';
00090     } else {
00091       buffer_in[read] = '\0';
00092     }
00093     buffer_out = opencc_convert_utf8(od, buffer_in, (size_t)-1);
00094     if (buffer_out != (char*)-1) {
00095       fprintf(fpo, "%s", buffer_out);
00096       opencc_convert_utf8_free(buffer_out);
00097     } else {
00098       opencc_perror(_("OpenCC error"));
00099       break;
00100     }
00101   }
00102 
00103   if (lookahead_size > 0) {
00104     assert(lookahead_size < size);
00105     lookahead[lookahead_size] = '\0';
00106     buffer_out = opencc_convert_utf8(od, lookahead, (size_t)-1);
00107     if (buffer_out != (char*)-1) {
00108       fprintf(fpo, "%s", buffer_out);
00109       opencc_convert_utf8_free(buffer_out);
00110     } else {
00111       opencc_perror(_("OpenCC error"));
00112     }
00113   }
00114   opencc_close(od);
00115   free(lookahead);
00116   free(buffer_in);
00117   fclose(fp);
00118   fclose(fpo);
00119 }
00120 
00121 void show_version() {
00122   printf(_("\n"));
00123   printf(_("Open Chinese Convert (OpenCC) Command Line Tool\n"));
00124   printf(_("Version %s\n"), VERSION);
00125   printf(_("\n"));
00126   printf(_("Author: %s\n"), "BYVoid <byvoid@byvoid.com>");
00127   printf(_("Bug Report: %s\n"), "http://github.com/BYVoid/OpenCC/issues");
00128   printf(_("\n"));
00129 }
00130 
00131 void show_usage() {
00132   show_version();
00133   printf(_("Usage:\n"));
00134   printf(_(" opencc [Options]\n"));
00135   printf(_("\n"));
00136   printf(_("Options:\n"));
00137   printf(_(" -i [file], --input=[file]   Read original text from [file].\n"));
00138   printf(_(" -o [file], --output=[file]  Write converted text to [file].\n"));
00139   printf(_(
00140            " -c [file], --config=[file]  Load configuration of conversion from [file].\n"));
00141   printf(_(" -v, --version               Print version and build information.\n"));
00142   printf(_(" -h, --help                  Print this help.\n"));
00143   printf(_("\n"));
00144   printf(_(
00145            "With no input file, reads standard input and writes converted stream to standard output.\n"));
00146   printf(_(
00147            "Default configuration(%s) will be loaded if not set.\n"),
00148          OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD);
00149   printf(_("\n"));
00150 }
00151 
00152 int main(int argc, char** argv) {
00153 #ifdef ENABLE_GETTEXT
00154   setlocale(LC_ALL, "");
00155   bindtextdomain(PACKAGE_NAME, LOCALEDIR);
00156 #endif /* ifdef ENABLE_GETTEXT */
00157   static struct option longopts[] =
00158   {
00159     { "version", no_argument, NULL, 'v' },
00160     { "help", no_argument, NULL, 'h' },
00161     { "input", required_argument, NULL, 'i' },
00162     { "output", required_argument, NULL, 'o' },
00163     { "config", required_argument, NULL, 'c' },
00164     { 0, 0, 0, 0 },
00165   };
00166   static int oc;
00167   static char* input_file, * output_file, * config_file;
00168   while ((oc = getopt_long(argc, argv, "vh?i:o:c:", longopts, NULL)) != -1) {
00169     switch (oc) {
00170     case 'v':
00171       show_version();
00172       return 0;
00173     case 'h':
00174     case '?':
00175       show_usage();
00176       return 0;
00177     case 'i':
00178       input_file = mstrcpy(optarg);
00179       break;
00180     case 'o':
00181       output_file = mstrcpy(optarg);
00182       break;
00183     case 'c':
00184       config_file = mstrcpy(optarg);
00185       break;
00186     }
00187   }
00188   if (config_file == NULL) {
00189     config_file = mstrcpy(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD);
00190   }
00191   convert(input_file, output_file, config_file);
00192   free(input_file);
00193   free(output_file);
00194   free(config_file);
00195   return 0;
00196 }
 All Data Structures Files Functions Variables Defines