Open Chinese Convert
0.4.3
A project for conversion between Traditional and Simplified Chinese
|
00001 /* 00002 * Open Chinese Convert 00003 * 00004 * Copyright 2010-2013 BYVoid <byvoid@byvoid.com> 00005 * 00006 * Licensed under the Apache License, Version 2.0 (the "License"); 00007 * you may not use this file except in compliance with the License. 00008 * You may obtain a copy of the License at 00009 * 00010 * http://www.apache.org/licenses/LICENSE-2.0 00011 * 00012 * Unless required by applicable law or agreed to in writing, software 00013 * distributed under the License is distributed on an "AS IS" BASIS, 00014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 * See the License for the specific language governing permissions and 00016 * limitations under the License. 00017 */ 00018 00019 #include "config_reader.h" 00020 #include "dict_group.h" 00021 #include "dict_chain.h" 00022 00023 static dictionary_error errnum = DICTIONARY_ERROR_VOID; 00024 00025 DictGroup* dict_group_new(DictChain* dict_chain) { 00026 DictGroup* dict_group = 00027 (DictGroup*)malloc(sizeof(DictGroup)); 00028 dict_group->count = 0; 00029 dict_group->dict_chain = dict_chain; 00030 return dict_group; 00031 } 00032 00033 void dict_group_delete(DictGroup* dict_group) { 00034 size_t i; 00035 for (i = 0; i < dict_group->count; i++) { 00036 dict_delete(dict_group->dicts[i]); 00037 } 00038 free(dict_group); 00039 } 00040 00041 static char* try_find_dictionary_with_config( 00042 DictGroup* dict_group, 00043 const char* filename) { 00044 if (is_absolute_path(filename)) { 00045 return NULL; 00046 } 00047 /* Get config path */ 00048 if (dict_group->dict_chain == NULL) { 00049 return NULL; 00050 } 00051 Config* config = dict_group->dict_chain->config; 00052 if (config == NULL) { 00053 return NULL; 00054 } 00055 const char* config_path = config->file_path; 00056 if (config_path == NULL) { 00057 return NULL; 00058 } 00059 char* config_path_filename = (char*)malloc(strlen(config_path) + strlen( 00060 filename) + 2); 00061 sprintf(config_path_filename, "%s/%s", config_path, filename); 00062 FILE* fp = fopen(config_path_filename, "r"); 00063 if (fp) { 00064 fclose(fp); 00065 return config_path_filename; 00066 } 00067 return NULL; 00068 } 00069 00070 int dict_group_load(DictGroup* dict_group, 00071 const char* filename, 00072 opencc_dictionary_type type) { 00073 Dict* dictionary; 00074 char* path = try_open_file(filename); 00075 if (path == NULL) { 00076 path = try_find_dictionary_with_config(dict_group, filename); 00077 if (path == NULL) { 00078 errnum = DICTIONARY_ERROR_CANNOT_ACCESS_DICTFILE; 00079 return -1; 00080 } 00081 } 00082 dictionary = dict_new(path, type); 00083 free(path); 00084 if (dictionary == (Dict*)-1) { 00085 errnum = DICTIONARY_ERROR_INVALID_DICT; 00086 return -1; 00087 } 00088 dict_group->dicts[dict_group->count++] = dictionary; 00089 return 0; 00090 } 00091 00092 Dict* dict_group_get_dict(DictGroup* dict_group, size_t index) { 00093 if (index >= dict_group->count) { 00094 errnum = DICTIONARY_ERROR_INVALID_INDEX; 00095 return (Dict*)-1; 00096 } 00097 return dict_group->dicts[index]; 00098 } 00099 00100 const ucs4_t* const* dict_group_match_longest( 00101 DictGroup* dict_group, 00102 const ucs4_t* word, 00103 size_t maxlen, 00104 size_t* match_length) { 00105 if (dict_group->count == 0) { 00106 errnum = DICTIONARY_ERROR_NODICT; 00107 return (const ucs4_t* const*)-1; 00108 } 00109 const ucs4_t* const* retval = NULL; 00110 size_t t_match_length, max_length = 0; 00111 size_t i; 00112 for (i = 0; i < dict_group->count; i++) { 00113 /* 依次查找每個辭典,取得最長匹配長度 */ 00114 const ucs4_t* const* t_retval = dict_match_longest( 00115 dict_group->dicts[i], 00116 word, 00117 maxlen, 00118 &t_match_length); 00119 if (t_retval != NULL) { 00120 if (t_match_length > max_length) { 00121 max_length = t_match_length; 00122 retval = t_retval; 00123 } 00124 } 00125 } 00126 if (match_length != NULL) { 00127 *match_length = max_length; 00128 } 00129 return retval; 00130 } 00131 00132 size_t dict_group_get_all_match_lengths(DictGroup* dict_group, 00133 const ucs4_t* word, 00134 size_t* match_length) { 00135 if (dict_group->count == 0) { 00136 errnum = DICTIONARY_ERROR_NODICT; 00137 return (size_t)-1; 00138 } 00139 size_t rscnt = 0; 00140 size_t i; 00141 for (i = 0; i < dict_group->count; i++) { 00142 size_t retval; 00143 retval = dict_get_all_match_lengths( 00144 dict_group->dicts[i], 00145 word, 00146 match_length + rscnt 00147 ); 00148 rscnt += retval; 00149 /* 去除重複長度 */ 00150 if ((i > 0) && (rscnt > 1)) { 00151 qsort(match_length, rscnt, sizeof(match_length[0]), qsort_int_cmp); 00152 size_t j, k; 00153 for (j = 0, k = 1; k < rscnt; k++) { 00154 if (match_length[k] != match_length[j]) { 00155 match_length[++j] = match_length[k]; 00156 } 00157 } 00158 rscnt = j + 1; 00159 } 00160 } 00161 return rscnt; 00162 } 00163 00164 dictionary_error dictionary_errno(void) { 00165 return errnum; 00166 } 00167 00168 void dictionary_perror(const char* spec) { 00169 perr(spec); 00170 perr("\n"); 00171 switch (errnum) { 00172 case DICTIONARY_ERROR_VOID: 00173 break; 00174 case DICTIONARY_ERROR_NODICT: 00175 perr(_("No dictionary loaded")); 00176 break; 00177 case DICTIONARY_ERROR_CANNOT_ACCESS_DICTFILE: 00178 perror(_("Can not open dictionary file")); 00179 break; 00180 case DICTIONARY_ERROR_INVALID_DICT: 00181 perror(_("Invalid dictionary file")); 00182 break; 00183 case DICTIONARY_ERROR_INVALID_INDEX: 00184 perror(_("Invalid dictionary index")); 00185 break; 00186 default: 00187 perr(_("Unknown")); 00188 } 00189 }