Open Chinese Convert
0.4.3
A project for conversion between Traditional and Simplified Chinese
|
00001 00023 #include "common.h" 00024 #include "config_reader.h" 00025 #include "converter.h" 00026 #include "dict_group.h" 00027 #include "dict_chain.h" 00028 #include "encoding.h" 00029 #include "opencc.h" 00030 00031 typedef struct { 00032 DictChain* dict_chain; 00033 Converter* converter; 00034 } OpenccDesc; 00035 00036 static opencc_error errnum = OPENCC_ERROR_VOID; 00037 static int lib_initialized = 0; 00038 00039 static void lib_initialize(void) { 00040 #ifdef ENABLE_GETTEXT 00041 bindtextdomain(PACKAGE_NAME, LOCALEDIR); 00042 #endif /* ifdef ENABLE_GETTEXT */ 00043 lib_initialized = 1; 00044 } 00045 00046 size_t opencc_convert(opencc_t t_opencc, 00047 ucs4_t** inbuf, 00048 size_t* inbuf_left, 00049 ucs4_t** outbuf, 00050 size_t* outbuf_left) { 00051 if (!lib_initialized) { 00052 lib_initialize(); 00053 } 00054 OpenccDesc* opencc = (OpenccDesc*)t_opencc; 00055 size_t retval = converter_convert(opencc->converter, 00056 inbuf, 00057 inbuf_left, 00058 outbuf, 00059 outbuf_left); 00060 if (retval == (size_t)-1) { 00061 errnum = OPENCC_ERROR_CONVERTER; 00062 } 00063 return retval; 00064 } 00065 00066 char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) { 00067 if (!lib_initialized) { 00068 lib_initialize(); 00069 } 00070 size_t actual_length = strlen(inbuf); 00071 if ((length == (size_t)-1) || (length > actual_length)) { 00072 length = actual_length; 00073 } 00074 ucs4_t* winbuf = utf8_to_ucs4(inbuf, length); 00075 if (winbuf == (ucs4_t*)-1) { 00076 /* Can not convert input UTF8 to UCS4 */ 00077 errnum = OPENCC_ERROR_ENCODING; 00078 return (char*)-1; 00079 } 00080 /* Set up UTF8 buffer */ 00081 size_t outbuf_len = length; 00082 size_t outsize = outbuf_len; 00083 char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1)); 00084 char* outbuf = original_outbuf; 00085 original_outbuf[0] = '\0'; 00086 /* Set conversion buffer */ 00087 size_t wbufsize = length + 64; 00088 ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1)); 00089 ucs4_t* pinbuf = winbuf; 00090 ucs4_t* poutbuf = woutbuf; 00091 size_t inbuf_left, outbuf_left; 00092 inbuf_left = ucs4len(winbuf); 00093 outbuf_left = wbufsize; 00094 while (inbuf_left > 0) { 00095 size_t retval = opencc_convert(t_opencc, 00096 &pinbuf, 00097 &inbuf_left, 00098 &poutbuf, 00099 &outbuf_left); 00100 if (retval == (size_t)-1) { 00101 free(outbuf); 00102 free(winbuf); 00103 free(woutbuf); 00104 return (char*)-1; 00105 } 00106 *poutbuf = L'\0'; 00107 char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1); 00108 if (ubuff == (char*)-1) { 00109 free(outbuf); 00110 free(winbuf); 00111 free(woutbuf); 00112 errnum = OPENCC_ERROR_ENCODING; 00113 return (char*)-1; 00114 } 00115 size_t ubuff_len = strlen(ubuff); 00116 while (ubuff_len > outsize) { 00117 size_t outbuf_offset = outbuf - original_outbuf; 00118 outsize += outbuf_len; 00119 outbuf_len += outbuf_len; 00120 original_outbuf = 00121 (char*)realloc(original_outbuf, sizeof(char) * outbuf_len); 00122 outbuf = original_outbuf + outbuf_offset; 00123 } 00124 strncpy(outbuf, ubuff, ubuff_len); 00125 free(ubuff); 00126 outbuf += ubuff_len; 00127 *outbuf = '\0'; 00128 outbuf_left = wbufsize; 00129 poutbuf = woutbuf; 00130 } 00131 free(winbuf); 00132 free(woutbuf); 00133 original_outbuf = (char*)realloc(original_outbuf, 00134 sizeof(char) * (strlen(original_outbuf) + 1)); 00135 return original_outbuf; 00136 } 00137 00138 void opencc_convert_utf8_free(char* buf) { 00139 free(buf); 00140 } 00141 00142 opencc_t opencc_open(const char* config_file) { 00143 if (!lib_initialized) { 00144 lib_initialize(); 00145 } 00146 OpenccDesc* opencc; 00147 opencc = (OpenccDesc*)malloc(sizeof(OpenccDesc)); 00148 opencc->dict_chain = NULL; 00149 opencc->converter = converter_open(); 00150 converter_set_conversion_mode(opencc->converter, OPENCC_CONVERSION_FAST); 00151 if (config_file == NULL) { 00152 /* TODO load default */ 00153 assert(0); 00154 } else { 00155 /* Load config */ 00156 Config* config = config_open(config_file); 00157 if (config == (Config*)-1) { 00158 errnum = OPENCC_ERROR_CONFIG; 00159 return (opencc_t)-1; 00160 } 00161 opencc->dict_chain = config_get_dict_chain(config); 00162 converter_assign_dictionary(opencc->converter, opencc->dict_chain); 00163 config_close(config); 00164 } 00165 return (opencc_t)opencc; 00166 } 00167 00168 int opencc_close(opencc_t t_opencc) { 00169 if (!lib_initialized) { 00170 lib_initialize(); 00171 } 00172 OpenccDesc* opencc = (OpenccDesc*)t_opencc; 00173 converter_close(opencc->converter); 00174 if (opencc->dict_chain != NULL) { 00175 dict_chain_delete(opencc->dict_chain); 00176 } 00177 free(opencc); 00178 return 0; 00179 } 00180 00181 int opencc_dict_load(opencc_t t_opencc, 00182 const char* dict_filename, 00183 opencc_dictionary_type dict_type) { 00184 if (!lib_initialized) { 00185 lib_initialize(); 00186 } 00187 OpenccDesc* opencc = (OpenccDesc*)t_opencc; 00188 DictGroup* DictGroup; 00189 if (opencc->dict_chain == NULL) { 00190 opencc->dict_chain = dict_chain_new(NULL); 00191 DictGroup = dict_chain_add_group(opencc->dict_chain); 00192 } else { 00193 DictGroup = dict_chain_get_group(opencc->dict_chain, 0); 00194 } 00195 int retval = dict_group_load(DictGroup, dict_filename, dict_type); 00196 if (retval == -1) { 00197 errnum = OPENCC_ERROR_DICTLOAD; 00198 return -1; 00199 } 00200 converter_assign_dictionary(opencc->converter, opencc->dict_chain); 00201 return retval; 00202 } 00203 00204 void opencc_set_conversion_mode(opencc_t t_opencc, 00205 opencc_conversion_mode conversion_mode) { 00206 if (!lib_initialized) { 00207 lib_initialize(); 00208 } 00209 OpenccDesc* opencc = (OpenccDesc*)t_opencc; 00210 converter_set_conversion_mode(opencc->converter, conversion_mode); 00211 } 00212 00213 opencc_error opencc_errno(void) { 00214 if (!lib_initialized) { 00215 lib_initialize(); 00216 } 00217 return errnum; 00218 } 00219 00220 void opencc_perror(const char* spec) { 00221 if (!lib_initialized) { 00222 lib_initialize(); 00223 } 00224 perr(spec); 00225 perr("\n"); 00226 switch (errnum) { 00227 case OPENCC_ERROR_VOID: 00228 break; 00229 case OPENCC_ERROR_DICTLOAD: 00230 dictionary_perror(_("Dictionary loading error")); 00231 break; 00232 case OPENCC_ERROR_CONFIG: 00233 config_perror(_("Configuration error")); 00234 break; 00235 case OPENCC_ERROR_CONVERTER: 00236 converter_perror(_("Converter error")); 00237 break; 00238 case OPENCC_ERROR_ENCODING: 00239 perr(_("Encoding error")); 00240 break; 00241 default: 00242 perr(_("Unknown")); 00243 } 00244 perr("\n"); 00245 }