Libcroco
|
00001 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8 -*- */ 00002 00003 /* 00004 * This file is part of The Croco Library 00005 * 00006 * Copyright (C) 2002-2003 Dodji Seketeli <dodji@seketeli.org> 00007 * 00008 * This program is free software; you can redistribute it and/or 00009 * modify it under the terms of version 2.1 of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00020 * USA 00021 */ 00022 00023 /* 00024 *$Id$ 00025 */ 00026 00027 /** 00028 *@file 00029 *The definition of the #CREncHandler class. 00030 */ 00031 00032 #include "cr-enc-handler.h" 00033 #include "cr-utils.h" 00034 00035 #include <string.h> 00036 00037 struct CREncAlias { 00038 const gchar *name; 00039 enum CREncoding encoding; 00040 }; 00041 00042 static struct CREncAlias gv_default_aliases[] = { 00043 {"UTF-8", CR_UTF_8}, 00044 {"UTF_8", CR_UTF_8}, 00045 {"UTF8", CR_UTF_8}, 00046 {"UTF-16", CR_UTF_16}, 00047 {"UTF_16", CR_UTF_16}, 00048 {"UTF16", CR_UTF_16}, 00049 {"UCS1", CR_UCS_1}, 00050 {"UCS-1", CR_UCS_1}, 00051 {"UCS_1", CR_UCS_1}, 00052 {"ISO-8859-1", CR_UCS_1}, 00053 {"ISO_8859-1", CR_UCS_1}, 00054 {"UCS-1", CR_UCS_1}, 00055 {"UCS_1", CR_UCS_1}, 00056 {"UCS4", CR_UCS_4}, 00057 {"UCS-4", CR_UCS_4}, 00058 {"UCS_4", CR_UCS_4}, 00059 {"ASCII", CR_ASCII}, 00060 {0, 0} 00061 }; 00062 00063 static CREncHandler gv_default_enc_handlers[] = { 00064 {CR_UCS_1, cr_utils_ucs1_to_utf8, cr_utils_utf8_to_ucs1, 00065 cr_utils_ucs1_str_len_as_utf8, cr_utils_utf8_str_len_as_ucs1}, 00066 00067 {CR_ISO_8859_1, cr_utils_ucs1_to_utf8, cr_utils_utf8_to_ucs1, 00068 cr_utils_ucs1_str_len_as_utf8, cr_utils_utf8_str_len_as_ucs1}, 00069 00070 {CR_ASCII, cr_utils_ucs1_to_utf8, cr_utils_utf8_to_ucs1, 00071 cr_utils_ucs1_str_len_as_utf8, cr_utils_utf8_str_len_as_ucs1}, 00072 00073 {0, NULL, NULL, NULL, NULL} 00074 }; 00075 00076 /** 00077 * cr_enc_handler_get_instance: 00078 *@a_enc: the encoding of the Handler. 00079 * 00080 *Gets the instance of encoding handler. 00081 *This function implements a singleton pattern. 00082 * 00083 *Returns the instance of #CREncHandler. 00084 */ 00085 CREncHandler * 00086 cr_enc_handler_get_instance (enum CREncoding a_enc) 00087 { 00088 gulong i = 0; 00089 00090 for (i = 0; gv_default_enc_handlers[i].encoding; i++) { 00091 if (gv_default_enc_handlers[i].encoding == a_enc) { 00092 return (CREncHandler *) 00093 & gv_default_enc_handlers[i].encoding; 00094 } 00095 } 00096 00097 return NULL; 00098 } 00099 00100 /** 00101 * cr_enc_handler_resolve_enc_alias: 00102 *@a_alias_name: the encoding name. 00103 *@a_enc: output param. The returned encoding type 00104 *or 0 if the alias is not supported. 00105 * 00106 *Given an encoding name (called an alias name) 00107 *the function returns the matching encoding type. 00108 * 00109 *Returns CR_OK upon successfull completion, an error code otherwise. 00110 */ 00111 enum CRStatus 00112 cr_enc_handler_resolve_enc_alias (const guchar * a_alias_name, 00113 enum CREncoding *a_enc) 00114 { 00115 gulong i = 0; 00116 guchar *alias_name_up = NULL; 00117 enum CRStatus status = CR_ENCODING_NOT_FOUND_ERROR; 00118 00119 g_return_val_if_fail (a_alias_name != NULL, CR_BAD_PARAM_ERROR); 00120 00121 alias_name_up = g_strdup (a_alias_name); 00122 g_ascii_strup (alias_name_up, -1); 00123 00124 for (i = 0; gv_default_aliases[i].name; i++) { 00125 if (!strcmp (gv_default_aliases[i].name, alias_name_up)) { 00126 *a_enc = gv_default_aliases[i].encoding; 00127 status = CR_OK; 00128 break; 00129 } 00130 } 00131 00132 return status; 00133 } 00134 00135 /** 00136 * cr_enc_handler_convert_input: 00137 *@a_this: the current instance of #CREncHandler. 00138 *@a_in: the input buffer to convert. 00139 *@a_in_len: in/out parameter. The len of the input 00140 *buffer to convert. After return, contains the number of 00141 *bytes actually consumed. 00142 *@a_out: output parameter. The converted output buffer. 00143 *Must be freed by the buffer. 00144 *@a_out_len: output parameter. The length of the output buffer. 00145 * 00146 *Converts a raw input buffer into an utf8 buffer. 00147 * 00148 *Returns CR_OK upon successfull completion, an error code otherwise. 00149 */ 00150 enum CRStatus 00151 cr_enc_handler_convert_input (CREncHandler * a_this, 00152 const guchar * a_in, 00153 gulong * a_in_len, 00154 guchar ** a_out, gulong * a_out_len) 00155 { 00156 enum CRStatus status = CR_OK; 00157 00158 g_return_val_if_fail (a_this && a_in && a_in_len && a_out, 00159 CR_BAD_PARAM_ERROR); 00160 00161 if (a_this->decode_input == NULL) 00162 return CR_OK; 00163 00164 if (a_this->enc_str_len_as_utf8) { 00165 status = a_this->enc_str_len_as_utf8 (a_in, 00166 &a_in[*a_in_len - 1], 00167 a_out_len); 00168 00169 g_return_val_if_fail (status == CR_OK, status); 00170 } else { 00171 *a_out_len = *a_in_len; 00172 } 00173 00174 *a_out = g_malloc0 (*a_out_len); 00175 00176 status = a_this->decode_input (a_in, a_in_len, *a_out, a_out_len); 00177 00178 if (status != CR_OK) { 00179 g_free (*a_out); 00180 *a_out = NULL; 00181 } 00182 00183 g_return_val_if_fail (status == CR_OK, status); 00184 00185 return CR_OK; 00186 }