liblcf
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros
reader_util.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014 liblcf authors
3  * This file is released under the MIT License
4  * http://opensource.org/licenses/MIT
5  */
6 
7 #include "reader_options.h"
8 
9 #ifdef LCF_SUPPORT_ICU
10 # include <unicode/ucsdet.h>
11 # include <unicode/ucnv.h>
12 #else
13 # ifdef _MSC_VER
14 # error MSVC builds require ICU
15 # endif
16 #endif
17 
18 #ifdef _WIN32
19 # define WIN32_LEAN_AND_MEAN
20 # ifndef NOMINMAX
21 # define NOMINMAX
22 # endif
23 # include <windows.h>
24 #else
25 # ifndef LCF_SUPPORT_ICU
26 # include <iconv.h>
27 # endif
28 # include <locale>
29 #endif
30 
31 #include <cstdlib>
32 #include <sstream>
33 #include <vector>
34 
35 #include "data.h"
36 #include "inireader.h"
37 #include "ldb_reader.h"
38 #include "reader_util.h"
39 
40 namespace ReaderUtil {
41 }
42 
43 std::string ReaderUtil::CodepageToEncoding(int codepage) {
44  if (codepage == 0)
45  return std::string();
46 
47  if (codepage == 932) {
48 #ifdef LCF_SUPPORT_ICU
49  return "ibm-943_P130-1999";
50 #else
51  return "SHIFT_JIS";
52 #endif
53  }
54  if (codepage == 949) {
55 #ifdef LCF_SUPPORT_ICU
56  return "ibm-949_P110-1999";
57 #else
58  return "cp1361";
59 #endif
60  }
61  std::ostringstream out;
62 #ifdef LCF_SUPPORT_ICU
63  out << "windows-" << codepage;
64 #else
65  out << "CP" << codepage;
66 #endif
67 
68  // Looks like a valid codepage
69  std::string outs = out.str();
70  return outs;
71 }
72 
73 std::string ReaderUtil::DetectEncoding(const std::string& database_file) {
74  std::string encoding;
75 #ifdef LCF_SUPPORT_ICU
76  std::ostringstream text;
77 
78  //Populate Data::terms or will empty by default even if load fails
79  LDB_Reader::Load(database_file, "");
80 
81  text <<
82  Data::terms.menu_save << " " <<
83  Data::terms.menu_quit << " " <<
84  Data::terms.new_game << " " <<
85  Data::terms.load_game << " " <<
86  Data::terms.exit_game << " " <<
87  Data::terms.status << " " <<
88  Data::terms.row << " " <<
89  Data::terms.order << " " <<
90  Data::terms.wait_on << " " <<
91  Data::terms.wait_off << " " <<
92  Data::terms.level << " " <<
93  Data::terms.health_points << " " <<
94  Data::terms.spirit_points << " " <<
95  Data::terms.normal_status << " " <<
96  Data::terms.exp_short << " " <<
97  Data::terms.lvl_short << " " <<
98  Data::terms.hp_short << " " <<
99  Data::terms.sp_short << " " <<
100  Data::terms.sp_cost << " " <<
101  Data::terms.attack << " " <<
102  Data::terms.defense << " " <<
103  Data::terms.spirit << " " <<
104  Data::terms.agility << " " <<
105  Data::terms.weapon << " " <<
106  Data::terms.shield << " " <<
107  Data::terms.armor << " " <<
108  Data::terms.helmet << " " <<
109  Data::terms.accessory << " " <<
112  Data::terms.file << " " <<
114  Data::terms.yes << " " <<
115  Data::terms.no;
116 
117  // Checks if there are more than the above 33 spaces (no data)
118  if (text.str().size() > 33)
119  {
120  UErrorCode status = U_ZERO_ERROR;
121  UCharsetDetector* detector = ucsdet_open(&status);
122 
123  std::string s = text.str();
124  ucsdet_setText(detector, s.c_str(), s.length(), &status);
125 
126  const UCharsetMatch* match = ucsdet_detect(detector, &status);
127  if (match != NULL)
128  {
129  encoding = ucsdet_getName(match, &status);
130  }
131  ucsdet_close(detector);
132 
133  // Fixes to ensure proper Windows encodings
134  if (encoding == "Shift_JIS")
135  {
136  encoding = "ibm-943_P130-1999"; // Japanese with Yen backslash
137  }
138  else if (encoding == "EUC-KR")
139  {
140  encoding = "ibm-949_P110-1999"; // Korean with Won backslash
141  }
142  else if (encoding == "ISO-8859-1" || encoding == "windows-1252")
143  {
144  encoding = "ibm-5348_P100-1997"; // Occidental with Euro
145  }
146  else if (encoding == "ISO-8859-2" || encoding == "windows-1250")
147  {
148  encoding = "ibm-5346_P100-1998"; // Central Europe with Euro
149  }
150  else if (encoding == "ISO-8859-5" || encoding == "windows-1251")
151  {
152  encoding = "ibm-5347_P100-1998"; // Cyrillic with Euro
153  }
154  else if (encoding == "ISO-8859-6" || encoding == "windows-1256")
155  {
156  encoding = "ibm-9448_X100-2005"; // Arabic with Euro + 8 chars
157  }
158  else if (encoding == "ISO-8859-7" || encoding == "windows-1253")
159  {
160  encoding = "ibm-5349_P100-1998"; // Greek with Euro
161  }
162  else if (encoding == "ISO-8859-8" || encoding == "windows-1255")
163  {
164  encoding = "ibm-9447_P100-2002"; // Hebrew with Euro
165  }
166  }
167 #endif
168 
169  return encoding;
170 }
171 
172 std::string ReaderUtil::GetEncoding(const std::string& ini_file) {
173  INIReader ini(ini_file);
174  if (ini.ParseError() != -1) {
175  std::string encoding = ini.Get("EasyRPG", "Encoding", std::string());
176  if (!encoding.empty()) {
177  return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str()));
178  }
179  }
180  return std::string();
181 }
182 
184 #ifdef _WIN32
185  int codepage = GetACP();
186 #else
187  int codepage = 1252;
188 
189  std::locale loc = std::locale("");
190  // Gets the language and culture part only
191  std::string loc_full = loc.name().substr(0, loc.name().find_first_of("@."));
192  // Gets the language part only
193  std::string loc_lang = loc.name().substr(0, loc.name().find_first_of("_"));
194 
195  if (loc_lang == "th") codepage = 874;
196  else if (loc_lang == "ja") codepage = 932;
197  else if (loc_full == "zh_CN" ||
198  loc_full == "zh_SG") codepage = 936;
199  else if (loc_lang == "ko") codepage = 949;
200  else if (loc_full == "zh_TW" ||
201  loc_full == "zh_HK") codepage = 950;
202  else if (loc_lang == "cs" ||
203  loc_lang == "hu" ||
204  loc_lang == "pl" ||
205  loc_lang == "ro" ||
206  loc_lang == "hr" ||
207  loc_lang == "sk" ||
208  loc_lang == "sl") codepage = 1250;
209  else if (loc_lang == "ru") codepage = 1251;
210  else if (loc_lang == "ca" ||
211  loc_lang == "da" ||
212  loc_lang == "de" ||
213  loc_lang == "en" ||
214  loc_lang == "es" ||
215  loc_lang == "fi" ||
216  loc_lang == "fr" ||
217  loc_lang == "it" ||
218  loc_lang == "nl" ||
219  loc_lang == "nb" ||
220  loc_lang == "pt" ||
221  loc_lang == "sv" ||
222  loc_lang == "eu") codepage = 1252;
223  else if (loc_lang == "el") codepage = 1253;
224  else if (loc_lang == "tr") codepage = 1254;
225  else if (loc_lang == "he") codepage = 1255;
226  else if (loc_lang == "ar") codepage = 1256;
227  else if (loc_lang == "et" ||
228  loc_lang == "lt" ||
229  loc_lang == "lv") codepage = 1257;
230  else if (loc_lang == "vi") codepage = 1258;
231 #endif
232 
233  return CodepageToEncoding(codepage);
234 }
235 
236 std::string ReaderUtil::Recode(const std::string& str_to_encode, const std::string& source_encoding) {
237  return ReaderUtil::Recode(str_to_encode, source_encoding, "UTF-8");
238 }
239 
240 std::string ReaderUtil::Recode(const std::string& str_to_encode,
241  const std::string& src_enc,
242  const std::string& dst_enc) {
243  std::string encoding_str = src_enc;
244 
245  if (src_enc.empty()) {
246  return str_to_encode;
247  }
248  if (atoi(src_enc.c_str()) > 0) {
249  encoding_str = ReaderUtil::CodepageToEncoding(atoi(src_enc.c_str()));
250  }
251 #ifdef LCF_SUPPORT_ICU
252  UErrorCode status = U_ZERO_ERROR;
253  int size = str_to_encode.size() * 4;
254  UChar* unicode_str = new UChar[size];
255  UConverter *conv;
256  int length;
257  std::string result_str;
258 
259  conv = ucnv_open(encoding_str.c_str(), &status);
260 
261  if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
262  return std::string();
263  }
264  status = U_ZERO_ERROR;
265 
266  length = ucnv_toUChars(conv, unicode_str, size, str_to_encode.c_str(), -1, &status);
267  ucnv_close(conv);
268  if (status != U_ZERO_ERROR) return std::string();
269 
270  char* result = new char[length * 4];
271 
272  conv = ucnv_open(dst_enc.data(), &status);
273  ucnv_fromUChars(conv, result, length * 4, unicode_str, -1, &status);
274  ucnv_close(conv);
275  if (status != U_ZERO_ERROR) return std::string();
276 
277  result_str = result;
278 
279  delete[] unicode_str;
280  delete[] result;
281 
282  return std::string(result_str);
283 #else
284  iconv_t cd = iconv_open(dst_enc.c_str(), encoding_str.c_str());
285  if (cd == (iconv_t)-1)
286  return str_to_encode;
287  char *src = const_cast<char *>(str_to_encode.c_str());
288  size_t src_left = str_to_encode.size();
289  size_t dst_size = str_to_encode.size() * 5 + 10;
290  char *dst = new char[dst_size];
291  size_t dst_left = dst_size;
292 # ifdef ICONV_CONST
293  char ICONV_CONST *p = src;
294 # else
295  char *p = src;
296 # endif
297  char *q = dst;
298  size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
299  iconv_close(cd);
300  if (status == (size_t) -1 || src_left > 0) {
301  delete[] dst;
302  return std::string();
303  }
304  *q++ = '\0';
305  std::string result(dst);
306  delete[] dst;
307  return result;
308 #endif
309 }
std::string DetectEncoding(const std::string &database_file)
Definition: reader_util.cpp:73
std::string armor
Definition: rpg_terms.h:141
std::string exit_game_message
Definition: rpg_terms.h:147
std::string helmet
Definition: rpg_terms.h:142
std::string weapon
Definition: rpg_terms.h:139
std::string CodepageToEncoding(int codepage)
Definition: reader_util.cpp:43
std::string menu_save
Definition: rpg_terms.h:116
std::string exp_short
Definition: rpg_terms.h:130
std::string Recode(const std::string &str_to_encode, const std::string &source_encoding)
std::string spirit_points
Definition: rpg_terms.h:128
std::string wait_on
Definition: rpg_terms.h:124
std::string row
Definition: rpg_terms.h:122
std::string sp_cost
Definition: rpg_terms.h:134
std::string load_game
Definition: rpg_terms.h:119
std::string menu_quit
Definition: rpg_terms.h:117
std::string no
Definition: rpg_terms.h:149
std::string normal_status
Definition: rpg_terms.h:129
std::string yes
Definition: rpg_terms.h:148
std::string health_points
Definition: rpg_terms.h:127
std::string level
Definition: rpg_terms.h:126
int ParseError() const
Definition: inireader.cpp:47
std::string GetLocaleEncoding()
std::string file
Definition: rpg_terms.h:146
std::string attack
Definition: rpg_terms.h:135
std::string lvl_short
Definition: rpg_terms.h:131
bool Load(const std::string &filename, const std::string &encoding)
Definition: ldb_reader.cpp:13
std::string hp_short
Definition: rpg_terms.h:132
std::string GetEncoding(const std::string &ini_file)
std::string wait_off
Definition: rpg_terms.h:125
std::string sp_short
Definition: rpg_terms.h:133
std::string spirit
Definition: rpg_terms.h:137
std::string accessory
Definition: rpg_terms.h:143
std::string order
Definition: rpg_terms.h:123
std::string agility
Definition: rpg_terms.h:138
RPG::Terms & terms
Definition: data.cpp:27
std::string save_game_message
Definition: rpg_terms.h:144
std::string new_game
Definition: rpg_terms.h:118
std::string defense
Definition: rpg_terms.h:136
std::string Get(std::string section, std::string name, std::string default_value)
Definition: inireader.cpp:52
std::string exit_game
Definition: rpg_terms.h:120
std::string shield
Definition: rpg_terms.h:140
std::string load_game_message
Definition: rpg_terms.h:145
std::string status
Definition: rpg_terms.h:121