Open Chinese Convert  0.4.3
A project for conversion between Traditional and Simplified Chinese
/usr/src/RPM/BUILD/opencc-0.4.3/src/opencc.c
Go to the documentation of this file.
00001 
00023 #include "common.h"
00024 #include "config_reader.h"
00025 #include "converter.h"
00026 #include "dict_group.h"
00027 #include "dict_chain.h"
00028 #include "encoding.h"
00029 #include "opencc.h"
00030 
00031 typedef struct {
00032   DictChain* dict_chain;
00033   Converter* converter;
00034 } OpenccDesc;
00035 
00036 static opencc_error errnum = OPENCC_ERROR_VOID;
00037 static int lib_initialized = 0;
00038 
00039 static void lib_initialize(void) {
00040 #ifdef ENABLE_GETTEXT
00041   bindtextdomain(PACKAGE_NAME, LOCALEDIR);
00042 #endif /* ifdef ENABLE_GETTEXT */
00043   lib_initialized = 1;
00044 }
00045 
00046 size_t opencc_convert(opencc_t t_opencc,
00047                       ucs4_t** inbuf,
00048                       size_t* inbuf_left,
00049                       ucs4_t** outbuf,
00050                       size_t* outbuf_left) {
00051   if (!lib_initialized) {
00052     lib_initialize();
00053   }
00054   OpenccDesc* opencc = (OpenccDesc*)t_opencc;
00055   size_t retval = converter_convert(opencc->converter,
00056                                     inbuf,
00057                                     inbuf_left,
00058                                     outbuf,
00059                                     outbuf_left);
00060   if (retval == (size_t)-1) {
00061     errnum = OPENCC_ERROR_CONVERTER;
00062   }
00063   return retval;
00064 }
00065 
00066 char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) {
00067   if (!lib_initialized) {
00068     lib_initialize();
00069   }
00070   size_t actual_length = strlen(inbuf);
00071   if ((length == (size_t)-1) || (length > actual_length)) {
00072     length = actual_length;
00073   }
00074   ucs4_t* winbuf = utf8_to_ucs4(inbuf, length);
00075   if (winbuf == (ucs4_t*)-1) {
00076     /* Can not convert input UTF8 to UCS4 */
00077     errnum = OPENCC_ERROR_ENCODING;
00078     return (char*)-1;
00079   }
00080   /* Set up UTF8 buffer */
00081   size_t outbuf_len = length;
00082   size_t outsize = outbuf_len;
00083   char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1));
00084   char* outbuf = original_outbuf;
00085   original_outbuf[0] = '\0';
00086   /* Set conversion buffer */
00087   size_t wbufsize = length + 64;
00088   ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1));
00089   ucs4_t* pinbuf = winbuf;
00090   ucs4_t* poutbuf = woutbuf;
00091   size_t inbuf_left, outbuf_left;
00092   inbuf_left = ucs4len(winbuf);
00093   outbuf_left = wbufsize;
00094   while (inbuf_left > 0) {
00095     size_t retval = opencc_convert(t_opencc,
00096                                    &pinbuf,
00097                                    &inbuf_left,
00098                                    &poutbuf,
00099                                    &outbuf_left);
00100     if (retval == (size_t)-1) {
00101       free(outbuf);
00102       free(winbuf);
00103       free(woutbuf);
00104       return (char*)-1;
00105     }
00106     *poutbuf = L'\0';
00107     char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1);
00108     if (ubuff == (char*)-1) {
00109       free(outbuf);
00110       free(winbuf);
00111       free(woutbuf);
00112       errnum = OPENCC_ERROR_ENCODING;
00113       return (char*)-1;
00114     }
00115     size_t ubuff_len = strlen(ubuff);
00116     while (ubuff_len > outsize) {
00117       size_t outbuf_offset = outbuf - original_outbuf;
00118       outsize += outbuf_len;
00119       outbuf_len += outbuf_len;
00120       original_outbuf =
00121         (char*)realloc(original_outbuf, sizeof(char) * outbuf_len);
00122       outbuf = original_outbuf + outbuf_offset;
00123     }
00124     strncpy(outbuf, ubuff, ubuff_len);
00125     free(ubuff);
00126     outbuf += ubuff_len;
00127     *outbuf = '\0';
00128     outbuf_left = wbufsize;
00129     poutbuf = woutbuf;
00130   }
00131   free(winbuf);
00132   free(woutbuf);
00133   original_outbuf = (char*)realloc(original_outbuf,
00134                                    sizeof(char) * (strlen(original_outbuf) + 1));
00135   return original_outbuf;
00136 }
00137 
00138 void opencc_convert_utf8_free(char* buf) {
00139   free(buf);
00140 }
00141 
00142 opencc_t opencc_open(const char* config_file) {
00143   if (!lib_initialized) {
00144     lib_initialize();
00145   }
00146   OpenccDesc* opencc;
00147   opencc = (OpenccDesc*)malloc(sizeof(OpenccDesc));
00148   opencc->dict_chain = NULL;
00149   opencc->converter = converter_open();
00150   converter_set_conversion_mode(opencc->converter, OPENCC_CONVERSION_FAST);
00151   if (config_file == NULL) {
00152     /* TODO load default */
00153     assert(0);
00154   } else {
00155     /* Load config */
00156     Config* config = config_open(config_file);
00157     if (config == (Config*)-1) {
00158       errnum = OPENCC_ERROR_CONFIG;
00159       return (opencc_t)-1;
00160     }
00161     opencc->dict_chain = config_get_dict_chain(config);
00162     converter_assign_dictionary(opencc->converter, opencc->dict_chain);
00163     config_close(config);
00164   }
00165   return (opencc_t)opencc;
00166 }
00167 
00168 int opencc_close(opencc_t t_opencc) {
00169   if (!lib_initialized) {
00170     lib_initialize();
00171   }
00172   OpenccDesc* opencc = (OpenccDesc*)t_opencc;
00173   converter_close(opencc->converter);
00174   if (opencc->dict_chain != NULL) {
00175     dict_chain_delete(opencc->dict_chain);
00176   }
00177   free(opencc);
00178   return 0;
00179 }
00180 
00181 int opencc_dict_load(opencc_t t_opencc,
00182                      const char* dict_filename,
00183                      opencc_dictionary_type dict_type) {
00184   if (!lib_initialized) {
00185     lib_initialize();
00186   }
00187   OpenccDesc* opencc = (OpenccDesc*)t_opencc;
00188   DictGroup* DictGroup;
00189   if (opencc->dict_chain == NULL) {
00190     opencc->dict_chain = dict_chain_new(NULL);
00191     DictGroup = dict_chain_add_group(opencc->dict_chain);
00192   } else {
00193     DictGroup = dict_chain_get_group(opencc->dict_chain, 0);
00194   }
00195   int retval = dict_group_load(DictGroup, dict_filename, dict_type);
00196   if (retval == -1) {
00197     errnum = OPENCC_ERROR_DICTLOAD;
00198     return -1;
00199   }
00200   converter_assign_dictionary(opencc->converter, opencc->dict_chain);
00201   return retval;
00202 }
00203 
00204 void opencc_set_conversion_mode(opencc_t t_opencc,
00205                                 opencc_conversion_mode conversion_mode) {
00206   if (!lib_initialized) {
00207     lib_initialize();
00208   }
00209   OpenccDesc* opencc = (OpenccDesc*)t_opencc;
00210   converter_set_conversion_mode(opencc->converter, conversion_mode);
00211 }
00212 
00213 opencc_error opencc_errno(void) {
00214   if (!lib_initialized) {
00215     lib_initialize();
00216   }
00217   return errnum;
00218 }
00219 
00220 void opencc_perror(const char* spec) {
00221   if (!lib_initialized) {
00222     lib_initialize();
00223   }
00224   perr(spec);
00225   perr("\n");
00226   switch (errnum) {
00227   case OPENCC_ERROR_VOID:
00228     break;
00229   case OPENCC_ERROR_DICTLOAD:
00230     dictionary_perror(_("Dictionary loading error"));
00231     break;
00232   case OPENCC_ERROR_CONFIG:
00233     config_perror(_("Configuration error"));
00234     break;
00235   case OPENCC_ERROR_CONVERTER:
00236     converter_perror(_("Converter error"));
00237     break;
00238   case OPENCC_ERROR_ENCODING:
00239     perr(_("Encoding error"));
00240     break;
00241   default:
00242     perr(_("Unknown"));
00243   }
00244   perr("\n");
00245 }
 All Data Structures Files Functions Variables Defines