Main Page | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

cr-utils.c

Go to the documentation of this file.
00001 /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
00002 
00003 /*
00004  * This file is part of The Croco Library
00005  *
00006  * Copyright (C) 2002-2003 Dodji Seketeli <dodji@seketeli.org>
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of version 2.1 of the GNU Lesser General Public
00010  * License as published by the Free Software Foundation.
00011  *
00012  * This program is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  * GNU General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public License
00018  * along with this program; if not, write to the Free Software
00019  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
00020  * USA
00021  */
00022 
00023 /*
00024  *$Id: cr-utils.c,v 1.2 2003/06/16 23:54:31 dodji Exp $
00025  */
00026 
00027 #include "cr-utils.h"
00028 
00029 /**
00030  *@file:
00031  *Some misc utility functions used
00032  *in the libcroco.
00033  *Note that troughout this file I will
00034  *refer to the CSS SPECIFICATIONS DOCUMENTATION
00035  *written by the w3c guys. You can find that document
00036  *at http://www.w3.org/TR/REC-CSS2/ .
00037  */
00038 
00039 
00040 /****************************
00041  *Encoding transformations and
00042  *encoding helpers
00043  ****************************/
00044 
00045 /*
00046  *Here is the correspondance between the ucs-4 charactere codes
00047  *and there matching utf-8 encoding pattern as dscribed by RFC 2279:
00048  *
00049  *UCS-4 range (hex.)    UTF-8 octet sequence (binary)
00050  *------------------    -----------------------------
00051  *0000 0000-0000 007F   0xxxxxxx
00052  *0000 0080-0000 07FF   110xxxxx 10xxxxxx
00053  *0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
00054  *0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
00055  *0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
00056  *0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
00057  */
00058 
00059 
00060 
00061 /**
00062  *Given an utf8 string buffer, calculates
00063  *the length of this string if it was encoded
00064  *in ucs4.
00065  *@param a_in_start a pointer to the begining of
00066  *the input utf8 string.
00067  *@param a_in_end a pointre to the end of the input
00068  *utf8 string (points to the last byte of the buffer)
00069  *@param a_len out parameter the calculated length.
00070  *@return CR_OK upon succesfull completion, an error code
00071  *otherwise.
00072  */
00073 enum CRStatus
00074 cr_utils_utf8_str_len_as_ucs4 (const guchar *a_in_start,
00075                                const guchar *a_in_end,
00076                                gulong *a_len)
00077 {        
00078         guchar *byte_ptr = NULL ;
00079         gint len = 0 ;
00080 
00081         /*
00082          *to store the final decoded 
00083          *unicode char
00084          */
00085         guint c = 0 ;
00086 
00087         g_return_val_if_fail (a_in_start && a_in_end && a_len,
00088                               CR_BAD_PARAM_ERROR) ;
00089         *a_len = 0 ;
00090         
00091         for (byte_ptr = (guchar*)a_in_start ;
00092              byte_ptr <= a_in_end ;
00093              byte_ptr++) 
00094         {
00095                 gint nb_bytes_2_decode = 0 ;
00096 
00097                 if (*byte_ptr <= 0x7F) 
00098                 {
00099                         /*
00100                          *7 bits long char
00101                          *encoded over 1 byte:
00102                          * 0xxx xxxx
00103                          */
00104                         c = *byte_ptr ;
00105                         nb_bytes_2_decode = 1 ;
00106 
00107                 } 
00108                 else if ((*byte_ptr & 0xE0) == 0xC0) 
00109                 {
00110                         /*
00111                          *up to 11 bits long char.
00112                          *encoded over 2 bytes:
00113                          *110x xxxx  10xx xxxx
00114                          */
00115                         c = *byte_ptr & 0x1F ;
00116                         nb_bytes_2_decode = 2 ;
00117 
00118                 } 
00119                 else if ((*byte_ptr & 0xF0) == 0xE0) 
00120                 {
00121                         /*
00122                          *up to 16 bit long char
00123                          *encoded over 3 bytes:
00124                          *1110 xxxx  10xx xxxx  10xx xxxx
00125                          */
00126                         c = *byte_ptr & 0x0F ;
00127                         nb_bytes_2_decode = 3 ;
00128 
00129                 } 
00130                 else if ((*byte_ptr & 0xF8) == 0xF0) 
00131                 {
00132                         /*
00133                          *up to 21 bits long char
00134                          *encoded over 4 bytes:
00135                          *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
00136                          */
00137                         c = *byte_ptr & 0x7 ;
00138                         nb_bytes_2_decode = 4 ;
00139 
00140                 } 
00141                 else if ((*byte_ptr & 0xFC) == 0xF8) 
00142                 {
00143                         /*
00144                          *up to 26 bits long char
00145                          *encoded over 5 bytes.
00146                          *1111 10xx  10xx xxxx  10xx xxxx  
00147                          *10xx xxxx  10xx xxxx
00148                          */
00149                         c = *byte_ptr & 3 ;
00150                         nb_bytes_2_decode = 5 ;
00151 
00152                 } 
00153                 else if ((*byte_ptr & 0xFE) == 0xFC) 
00154                 {
00155                         /*
00156                          *up to 31 bits long char
00157                          *encoded over 6 bytes:
00158                          *1111 110x  10xx xxxx  10xx xxxx  
00159                          *10xx xxxx  10xx xxxx  10xx xxxx
00160                          */
00161                         c = *byte_ptr & 1 ;
00162                         nb_bytes_2_decode = 6 ;
00163 
00164                 } 
00165                 else 
00166                 {
00167                         /*
00168                          *BAD ENCODING
00169                          */
00170                         return CR_ENCODING_ERROR ;
00171                 }
00172 
00173                 /*
00174                  *Go and decode the remaining byte(s)
00175                  *(if any) to get the current character.
00176                  */
00177                 for ( ;
00178                       nb_bytes_2_decode > 1 ;
00179                       nb_bytes_2_decode --) 
00180                 {
00181                         /*decode the next byte*/
00182                         byte_ptr ++ ;
00183 
00184                         /*byte pattern must be: 10xx xxxx*/
00185                         if ((*byte_ptr & 0xC0) != 0x80) 
00186                         {
00187                                 return CR_ENCODING_ERROR ;
00188                         }
00189 
00190                         c = (c << 6) | (*byte_ptr & 0x3F) ;
00191                 }
00192 
00193                 len ++ ;
00194         }
00195 
00196         *a_len = len ;
00197 
00198         return CR_OK ;
00199 }
00200 
00201 
00202 
00203 /**
00204  *Given an ucs4 string, this function
00205  *returns the size (in bytes) this string
00206  *would have occupied if it was encoded in utf-8.
00207  *@param a_in_start a pointer to the beginning of the input
00208  *buffer.
00209  *@param a_in_end a pointer to the end of the input buffer.
00210  *@param a_len out parameter. The computed length.
00211  *@return CR_OK upon successfull completion, an error code otherwise.
00212  */
00213 enum CRStatus
00214 cr_utils_ucs4_str_len_as_utf8 (const guint32 *a_in_start, 
00215                                const guint32 *a_in_end,
00216                                gulong *a_len)
00217 {
00218         gint len = 0 ;
00219         guint32 *char_ptr = NULL ;
00220 
00221         g_return_val_if_fail (a_in_start && a_in_end && a_len,
00222                               CR_BAD_PARAM_ERROR) ;
00223 
00224         for (char_ptr = (guint32*)a_in_start ;
00225              char_ptr <= a_in_end ;
00226              char_ptr ++) 
00227         {
00228                 if (*char_ptr <= 0x7F) 
00229                 {
00230                         /*the utf-8 char would take 1 byte*/
00231                         len += 1 ;
00232                 } 
00233                 else if (*char_ptr <= 0x7FF) 
00234                 {
00235                         /*the utf-8 char would take 2 bytes*/
00236                         len += 2 ;
00237                 }
00238                 else if (*char_ptr <= 0xFFFF)
00239                 {
00240                         len += 3 ;
00241                 }
00242                 else if (*char_ptr <= 0x1FFFFF)
00243                 {
00244                         len += 4 ;
00245                 }
00246                 else if (*char_ptr <= 0x3FFFFFF)
00247                 {
00248                         len += 5 ;
00249                 }
00250                 else if (*char_ptr <= 0x7FFFFFFF)
00251                 {
00252                         len+= 6 ;
00253                 }
00254         }
00255 
00256         *a_len = len ;
00257         return CR_OK ;
00258 }
00259 
00260 
00261 /**
00262  *Given an ucsA string, this function
00263  *returns the size (in bytes) this string
00264  *would have occupied if it was encoded in utf-8.
00265  *@param a_in_start a pointer to the beginning of the input
00266  *buffer.
00267  *@param a_in_end a pointer to the end of the input buffer.
00268  *@param a_len out parameter. The computed length.
00269  *@return CR_OK upon successfull completion, an error code otherwise.
00270  */
00271 enum CRStatus
00272 cr_utils_ucs1_str_len_as_utf8 (const guchar *a_in_start, 
00273                                const guchar *a_in_end,
00274                                gulong *a_len)
00275 {
00276         gint len = 0 ;
00277         guchar *char_ptr = NULL ;
00278 
00279         g_return_val_if_fail (a_in_start && a_in_end && a_len,
00280                               CR_BAD_PARAM_ERROR) ;
00281 
00282         for (char_ptr = (guchar *)a_in_start ;
00283              char_ptr <= a_in_end ;
00284              char_ptr ++) 
00285         {
00286                 if (*char_ptr <= 0x7F) 
00287                 {
00288                         /*the utf-8 char would take 1 byte*/
00289                         len += 1 ;
00290                 } 
00291                 else
00292                 {
00293                         /*the utf-8 char would take 2 bytes*/
00294                         len += 2 ;
00295                 }
00296         }
00297 
00298         *a_len = len ;
00299         return CR_OK ;
00300 }
00301 
00302 /**
00303  *Converts an utf8 buffer into an ucs4 buffer.
00304  *
00305  *@param a_in the input utf8 buffer to convert.
00306  *@param a_in_len in/out parameter. The size of the
00307  *input buffer to convert. After return, this parameter contains
00308  *the actual number of bytes consumed.
00309  *@param a_out the output converted ucs4 buffer. Must be allocated by
00310  *the caller.
00311  *@param a_out_len in/out parameter. The size of the output buffer.
00312  *If this size is actually smaller than the real needed size, the function
00313  *just converts what it can and returns a success status. After return,
00314  *this param points to the actual number of characters decoded.
00315  *@return CR_OK upon successfull completion, an error code otherwise.
00316  */
00317 enum CRStatus
00318 cr_utils_utf8_to_ucs4 (const guchar * a_in, 
00319                        gulong *a_in_len,
00320                        guint32 *a_out, 
00321                        gulong *a_out_len)
00322 {
00323         gulong in_len = 0, out_len = 0, in_index = 0, out_index = 0 ;
00324         enum CRStatus status = CR_OK ;
00325 
00326         /*
00327          *to store the final decoded 
00328          *unicode char
00329          */
00330         guint c = 0 ;
00331 
00332         g_return_val_if_fail (a_in && a_in_len 
00333                               && a_out && a_out_len,
00334                               CR_BAD_PARAM_ERROR) ;
00335 
00336         if (*a_in_len < 1)
00337         {
00338                 status = CR_OK ;
00339                 goto end ;
00340         }
00341 
00342         in_len = *a_in_len ;
00343         out_len = *a_out_len ;
00344 
00345         for (in_index = 0, out_index = 0 ;
00346              (in_index < in_len) && (out_index < out_len) ;
00347              in_index++, out_index++)
00348         {
00349                 gint nb_bytes_2_decode = 0 ;
00350 
00351                 if (a_in[in_index] <= 0x7F) 
00352                 {
00353                         /*
00354                          *7 bits long char
00355                          *encoded over 1 byte:
00356                          * 0xxx xxxx
00357                          */
00358                         c = a_in[in_index] ;
00359                         nb_bytes_2_decode = 1 ;
00360 
00361                 } 
00362                 else if ((a_in[in_index] & 0xE0) == 0xC0) 
00363                 {
00364                         /*
00365                          *up to 11 bits long char.
00366                          *encoded over 2 bytes:
00367                          *110x xxxx  10xx xxxx
00368                          */
00369                         c = a_in[in_index] & 0x1F ;
00370                         nb_bytes_2_decode = 2 ;
00371 
00372                 } 
00373                 else if ((a_in[in_index] & 0xF0) == 0xE0) 
00374                 {
00375                         /*
00376                          *up to 16 bit long char
00377                          *encoded over 3 bytes:
00378                          *1110 xxxx  10xx xxxx  10xx xxxx
00379                          */
00380                         c = a_in[in_index] & 0x0F ;
00381                         nb_bytes_2_decode = 3 ;
00382 
00383                 } 
00384                 else if ((a_in[in_index] & 0xF8) == 0xF0) 
00385                 {
00386                         /*
00387                          *up to 21 bits long char
00388                          *encoded over 4 bytes:
00389                          *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
00390                          */
00391                         c = a_in[in_index] & 0x7 ;
00392                         nb_bytes_2_decode = 4 ;
00393 
00394                 } 
00395                 else if ((a_in[in_index] & 0xFC) == 0xF8) 
00396                 {
00397                         /*
00398                          *up to 26 bits long char
00399                          *encoded over 5 bytes.
00400                          *1111 10xx  10xx xxxx  10xx xxxx  
00401                          *10xx xxxx  10xx xxxx
00402                          */
00403                         c = a_in[in_index] & 3 ;
00404                         nb_bytes_2_decode = 5 ;
00405 
00406                 } 
00407                 else if ((a_in[in_index] & 0xFE) == 0xFC) 
00408                 {
00409                         /*
00410                          *up to 31 bits long char
00411                          *encoded over 6 bytes:
00412                          *1111 110x  10xx xxxx  10xx xxxx  
00413                          *10xx xxxx  10xx xxxx  10xx xxxx
00414                          */
00415                         c = a_in[in_index] & 1 ;
00416                         nb_bytes_2_decode = 6 ;
00417 
00418                 } 
00419                 else 
00420                 {
00421                         /*BAD ENCODING*/
00422                         goto end ;
00423                 }
00424 
00425                 /*
00426                  *Go and decode the remaining byte(s)
00427                  *(if any) to get the current character.
00428                  */
00429                 for ( ;
00430                       nb_bytes_2_decode > 1 ;
00431                       nb_bytes_2_decode --) 
00432                 {
00433                         /*decode the next byte*/
00434                         in_index ++ ;
00435 
00436                         /*byte pattern must be: 10xx xxxx*/
00437                         if ((a_in[in_index] & 0xC0) != 0x80)
00438                         {
00439                                 goto end ;
00440                         }
00441 
00442                         c = (c << 6) | (a_in[in_index] & 0x3F) ;
00443                 }
00444 
00445                 /*
00446                  *The decoded ucs4 char is now
00447                  *in c.
00448                  */
00449 
00450                 /************************
00451                  *Some security tests
00452                  ***********************/
00453                 
00454                 /*be sure c is a char*/
00455                 if (c == 0xFFFF || c == 0xFFFE) goto end ;
00456                 
00457                 /*be sure c is inferior to the max ucs4 char value*/
00458                 if (c > 0x10FFFF) goto end ;
00459 
00460                 /*
00461                  *c must be less than UTF16 "lower surrogate begin"
00462                  *or higher than UTF16 "High surrogate end"
00463                  */
00464                 if (c >= 0xD800 && c <= 0xDFFF) goto end ;
00465 
00466                 /*Avoid characters that equals zero*/
00467                 if (c == 0) goto end ;
00468 
00469 
00470                 a_out[out_index] = c ;
00471         }
00472 
00473  end:
00474         *a_out_len = out_index + 1;
00475         *a_in_len = in_index + 1;
00476 
00477         return status ;
00478 }
00479 
00480 
00481 /**
00482  *Reads a character from an utf8 buffer.
00483  *Actually decode the next character code (unicode character code)
00484  *and returns it.
00485  *@param a_in the starting address of the utf8 buffer.
00486  *@param a_in_len the length of the utf8 buffer.
00487  *@param a_out output parameter. The resulting read char.
00488  *@param a_consumed the number of the bytes consumed to
00489  *decode the returned character code.
00490  *@return CR_OK upon successfull completion, an error code otherwise.
00491  */
00492 enum CRStatus
00493 cr_utils_read_char_from_utf8_buf (const guchar * a_in, 
00494                                   gulong a_in_len,
00495                                   guint32 *a_out, gulong *a_consumed)
00496 {
00497         gulong in_len = 0, in_index = 0, nb_bytes_2_decode = 0 ;
00498     enum CRStatus status = CR_OK ;
00499     
00500         /*
00501      *to store the final decoded 
00502          *unicode char
00503          */
00504         guint32 c = 0 ;
00505     
00506     g_return_val_if_fail (a_in && a_out && a_out
00507                           && a_consumed, CR_BAD_PARAM_ERROR) ;
00508     
00509     if (a_in_len < 1)
00510     {
00511         status = CR_OK ;
00512         goto end ;
00513     }
00514     
00515     in_len = a_in_len ;
00516     
00517     if (*a_in <= 0x7F) 
00518     {
00519         /*
00520          *7 bits long char
00521          *encoded over 1 byte:
00522          * 0xxx xxxx
00523          */
00524         c = *a_in ;
00525         nb_bytes_2_decode = 1 ;
00526         
00527     } 
00528     else if ((*a_in & 0xE0) == 0xC0) 
00529     {
00530         /*
00531          *up to 11 bits long char.
00532          *encoded over 2 bytes:
00533          *110x xxxx  10xx xxxx
00534          */
00535         c = *a_in & 0x1F ;
00536         nb_bytes_2_decode = 2 ;
00537         
00538     } 
00539     else if ((*a_in & 0xF0) == 0xE0) 
00540     {
00541         /*
00542          *up to 16 bit long char
00543          *encoded over 3 bytes:
00544          *1110 xxxx  10xx xxxx  10xx xxxx
00545          */
00546         c = *a_in & 0x0F ;
00547         nb_bytes_2_decode = 3 ;
00548         
00549     } 
00550     else if ((*a_in & 0xF8) == 0xF0) 
00551     {
00552         /*
00553          *up to 21 bits long char
00554          *encoded over 4 bytes:
00555          *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
00556          */
00557         c = *a_in & 0x7 ;
00558         nb_bytes_2_decode = 4 ;
00559         
00560     } 
00561     else if ((*a_in & 0xFC) == 0xF8) 
00562     {
00563         /*
00564          *up to 26 bits long char
00565          *encoded over 5 bytes.
00566          *1111 10xx  10xx xxxx  10xx xxxx  
00567          *10xx xxxx  10xx xxxx
00568          */
00569         c = *a_in & 3 ;
00570         nb_bytes_2_decode = 5 ;
00571         
00572     } 
00573     else if ((*a_in & 0xFE) == 0xFC) 
00574     {
00575         /*
00576          *up to 31 bits long char
00577          *encoded over 6 bytes:
00578          *1111 110x  10xx xxxx  10xx xxxx  
00579          *10xx xxxx  10xx xxxx  10xx xxxx
00580          */
00581         c = *a_in & 1 ;
00582         nb_bytes_2_decode = 6 ;
00583         
00584     } 
00585     else 
00586     {
00587         /*BAD ENCODING*/
00588         goto end ;
00589     }
00590     
00591     if (nb_bytes_2_decode > a_in_len)
00592     {
00593         status = CR_END_OF_INPUT_ERROR ;
00594         goto end ;
00595     }
00596     
00597     /*
00598      *Go and decode the remaining byte(s)
00599      *(if any) to get the current character.
00600      */
00601     for ( in_index = 1 ;
00602           in_index < nb_bytes_2_decode ;
00603           in_index ++) 
00604     {
00605         /*byte pattern must be: 10xx xxxx*/
00606         if ((a_in[in_index] & 0xC0) != 0x80)
00607         {
00608             goto end ;
00609         }
00610         
00611         c = (c << 6) | (a_in[in_index] & 0x3F) ;
00612     }
00613     
00614     /*
00615      *The decoded ucs4 char is now
00616      *in c.
00617      */
00618     
00619     /************************
00620      *Some security tests
00621      ***********************/
00622     
00623     /*be sure c is a char*/
00624     if (c == 0xFFFF || c == 0xFFFE) goto end ;
00625     
00626     /*be sure c is inferior to the max ucs4 char value*/
00627     if (c > 0x10FFFF) goto end ;
00628     
00629     /*
00630      *c must be less than UTF16 "lower surrogate begin"
00631      *or higher than UTF16 "High surrogate end"
00632      */
00633     if (c >= 0xD800 && c <= 0xDFFF) goto end ;
00634     
00635     /*Avoid characters that equals zero*/
00636     if (c == 0) goto end ;
00637     
00638     *a_out = c ;
00639     
00640  end:
00641     *a_consumed = nb_bytes_2_decode ;
00642     
00643     return status ;
00644 }
00645 
00646 
00647 /**
00648  *
00649  */
00650 enum CRStatus
00651 cr_utils_utf8_str_len_as_ucs1 (const guchar *a_in_start,
00652                                const guchar *a_in_end,
00653                                gulong *a_len)
00654 {
00655         /*
00656          *Note: this function can be made shorter
00657          *but it considers all the cases of the utf8 encoding
00658          *to ease further extensions ...
00659          */
00660 
00661         guchar *byte_ptr = NULL ;
00662         gint len = 0 ;
00663 
00664         /*
00665          *to store the final decoded 
00666          *unicode char
00667          */
00668         guint c = 0 ;
00669 
00670         g_return_val_if_fail (a_in_start && a_in_end && a_len,
00671                               CR_BAD_PARAM_ERROR) ;
00672         *a_len = 0 ;
00673         
00674         for (byte_ptr = (guchar*)a_in_start ;
00675              byte_ptr <= a_in_end ;
00676              byte_ptr++) 
00677         {
00678                 gint nb_bytes_2_decode = 0 ;
00679 
00680                 if (*byte_ptr <= 0x7F) 
00681                 {
00682                         /*
00683                          *7 bits long char
00684                          *encoded over 1 byte:
00685                          * 0xxx xxxx
00686                          */
00687                         c = *byte_ptr ;
00688                         nb_bytes_2_decode = 1 ;
00689 
00690                 } 
00691                 else if ((*byte_ptr & 0xE0) == 0xC0) 
00692                 {
00693                         /*
00694                          *up to 11 bits long char.
00695                          *encoded over 2 bytes:
00696                          *110x xxxx  10xx xxxx
00697                          */
00698                         c = *byte_ptr & 0x1F ;
00699                         nb_bytes_2_decode = 2 ;
00700 
00701                 } 
00702                 else if ((*byte_ptr & 0xF0) == 0xE0) 
00703                 {
00704                         /*
00705                          *up to 16 bit long char
00706                          *encoded over 3 bytes:
00707                          *1110 xxxx  10xx xxxx  10xx xxxx
00708                          */
00709                         c = *byte_ptr & 0x0F ;
00710                         nb_bytes_2_decode = 3 ;
00711 
00712                 } 
00713                 else if ((*byte_ptr & 0xF8) == 0xF0) 
00714                 {
00715                         /*
00716                          *up to 21 bits long char
00717                          *encoded over 4 bytes:
00718                          *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
00719                          */
00720                         c = *byte_ptr & 0x7 ;
00721                         nb_bytes_2_decode = 4 ;
00722 
00723                 } 
00724                 else if ((*byte_ptr & 0xFC) == 0xF8) 
00725                 {
00726                         /*
00727                          *up to 26 bits long char
00728                          *encoded over 5 bytes.
00729                          *1111 10xx  10xx xxxx  10xx xxxx  
00730                          *10xx xxxx  10xx xxxx
00731                          */
00732                         c = *byte_ptr & 3 ;
00733                         nb_bytes_2_decode = 5 ;
00734 
00735                 } 
00736                 else if ((*byte_ptr & 0xFE) == 0xFC) 
00737                 {
00738                         /*
00739                          *up to 31 bits long char
00740                          *encoded over 6 bytes:
00741                          *1111 110x  10xx xxxx  10xx xxxx  
00742                          *10xx xxxx  10xx xxxx  10xx xxxx
00743                          */
00744                         c = *byte_ptr & 1 ;
00745                         nb_bytes_2_decode = 6 ;
00746 
00747                 } 
00748                 else 
00749                 {
00750                         /*
00751                          *BAD ENCODING
00752                          */
00753                         return CR_ENCODING_ERROR ;
00754                 }
00755 
00756                 /*
00757                  *Go and decode the remaining byte(s)
00758                  *(if any) to get the current character.
00759                  */
00760                 for ( ;
00761                       nb_bytes_2_decode > 1 ;
00762                       nb_bytes_2_decode --) 
00763                 {
00764                         /*decode the next byte*/
00765                         byte_ptr ++ ;
00766 
00767                         /*byte pattern must be: 10xx xxxx*/
00768                         if ((*byte_ptr & 0xC0) != 0x80) 
00769                         {
00770                                 return CR_ENCODING_ERROR ;
00771                         }
00772 
00773                         c = (c << 6) | (*byte_ptr & 0x3F) ;
00774                 }
00775 
00776                 /*
00777                  *The decoded ucs4 char is now
00778                  *in c.
00779                  */
00780 
00781                 if (c <= 0xFF) {/*Add other conditions to support
00782                                  *other char sets (ucs2, ucs3, ucs4).
00783                                  */
00784                         len ++ ;
00785                 } else {
00786                         /*the char is too long to fit
00787                          *into the supposed charset len.
00788                          */
00789                         return CR_ENCODING_ERROR ;
00790                 }
00791         }
00792 
00793         *a_len = len ;
00794 
00795         return CR_OK ;
00796 }
00797 
00798 /**
00799  *Converts an utf8 string into an ucs4 string.
00800  *@param a_in the input string to convert.
00801  *@param a_in_len in/out parameter. The length of the input
00802  *string. After return, points to the actual number of bytes
00803  *consumed. This can be usefull to debug the input stream in case
00804  *of encoding error.
00805  *@param a_out out parameter. Points to the output string. It is allocated 
00806  *by this function and must be freed by the caller.
00807  *@param a_out_len out parameter. The length of the output string.
00808  *@return CR_OK upon successfull completion, an error code otherwise.
00809  *
00810  */
00811 enum CRStatus
00812 cr_utils_utf8_str_to_ucs4 (const guchar * a_in, 
00813                            gulong *a_in_len,
00814                            guint32 **a_out, gulong *a_out_len)
00815 {
00816         enum CRStatus status = CR_OK ;
00817 
00818         g_return_val_if_fail (a_in && a_in_len 
00819                               && a_out && a_out_len,
00820                               CR_BAD_PARAM_ERROR) ;
00821 
00822         status =
00823                 cr_utils_utf8_str_len_as_ucs4 (a_in, 
00824                                                &a_in[*a_in_len - 1],
00825                                                a_out_len) ;
00826 
00827         g_return_val_if_fail (status == CR_OK, status) ;
00828 
00829         *a_out = g_malloc0 (*a_out_len * sizeof (guint32)) ;
00830         
00831         status =
00832                 cr_utils_utf8_to_ucs4 (a_in, a_in_len,
00833                                        *a_out, a_out_len) ;
00834 
00835         return status ;
00836 }
00837 
00838 /**
00839  *Converts an ucs4 buffer into an utf8 buffer.
00840  *
00841  *@param a_in the input ucs4 buffer to convert.
00842  *@param a_in_len in/out parameter. The size of the
00843  *input buffer to convert. After return, this parameter contains
00844  *the actual number of characters consumed.
00845  *@param a_out the output converted utf8 buffer. Must be allocated by
00846  *the caller.
00847  *@param a_out_len in/out parameter. The size of the output buffer.
00848  *If this size is actually smaller than the real needed size, the function
00849  *just converts what it can and returns a success status. After return,
00850  *this param points to the actual number of bytes in the buffer.
00851  *@return CR_OK upon successfull completion, an error code otherwise.
00852  */
00853 enum CRStatus
00854 cr_utils_ucs4_to_utf8 (const guint32 *a_in, 
00855                        gulong *a_in_len,
00856                        guchar *a_out, 
00857                        gulong *a_out_len)
00858 {
00859         gulong in_len = 0, in_index = 0, out_index = 0 ;
00860         enum CRStatus status = CR_OK ;
00861 
00862         g_return_val_if_fail (a_in && a_in_len && a_out && a_out_len,
00863                               CR_BAD_PARAM_ERROR) ;
00864 
00865         if (*a_in_len < 1)
00866         {
00867                 status =  CR_OK ;
00868                 goto end ;
00869         }
00870 
00871         in_len = *a_in_len ;
00872 
00873         for (in_index = 0 ;
00874              in_index < in_len ;
00875              in_index++)
00876         {
00877                 /*
00878                  *FIXME: return whenever we encounter forbidden char values.
00879                  */
00880 
00881                 if (a_in[in_index] <= 0x7F)
00882                 {
00883                         a_out[out_index] = a_in[in_index] ;
00884                         out_index ++ ;
00885                 }
00886                 else if (a_in[in_index] <= 0x7FF)
00887                 {
00888                         a_out[out_index] = (0xC0 | (a_in[in_index] >> 6)) ;
00889                         a_out[out_index + 1] = (0x80 | (a_in[in_index] & 0x3F));
00890                         out_index += 2 ;
00891                 }
00892                 else if (a_in[in_index] <= 0xFFFF)
00893                 {
00894                         a_out[out_index] = (0xE0 | (a_in[in_index] >> 12)) ;
00895                         a_out[out_index + 1] = 
00896                                 (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00897                         a_out[out_index + 2] = (0x80 | (a_in[in_index] & 0x3F)) ;
00898                         out_index += 3 ;
00899                 }
00900                 else if (a_in[in_index] <= 0x1FFFFF)
00901                 {
00902                         a_out[out_index] = (0xF0 | (a_in[in_index] >> 18)) ;
00903                         a_out[out_index + 1] 
00904                                 = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
00905                         a_out[out_index + 2] 
00906                                 = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00907                         a_out[out_index + 3] 
00908                                 = (0x80 | (a_in[in_index] & 0x3F)) ;
00909                         out_index += 4 ;
00910                 }
00911                 else if (a_in[in_index] <= 0x3FFFFFF)
00912                 {
00913                         a_out[out_index] = (0xF8 | (a_in[in_index] >> 24)) ;
00914                         a_out[out_index + 1] = (0x80 | (a_in[in_index] >> 18)) ;
00915                         a_out[out_index + 2] 
00916                                 = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
00917                         a_out[out_index + 3] 
00918                                 = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00919                         a_out[out_index + 4] 
00920                                 = (0x80 | (a_in[in_index] & 0x3F)) ;
00921                         out_index += 5 ;
00922                 }
00923                 else if (a_in[in_index] <= 0x7FFFFFFF)
00924                 {
00925                         a_out[out_index] = (0xFC | (a_in[in_index] >> 30)) ;
00926                         a_out[out_index + 1] = (0x80 | (a_in[in_index] >> 24)) ;
00927                         a_out[out_index + 2] 
00928                                 = (0x80 | ((a_in[in_index] >> 18) & 0x3F)) ;
00929                         a_out[out_index + 3] 
00930                                 = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
00931                         a_out[out_index + 4] 
00932                                 = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00933                         a_out[out_index + 4]
00934                                 = (0x80 | (a_in[in_index] & 0x3F)) ;
00935                         out_index += 6 ;
00936                 }
00937                 else
00938                 {
00939                         status = CR_ENCODING_ERROR ;
00940                         goto end ;
00941                 }
00942         }/*end for*/
00943         
00944  end:
00945         *a_in_len = in_index + 1 ;
00946         *a_out_len = out_index + 1 ;
00947 
00948         return status ;
00949 }
00950 
00951 
00952 /**
00953  *Converts an ucs4 string into an utf8 string.
00954  *@param a_in the input string to convert.
00955  *@param a_in_len in/out parameter. The length of the input
00956  *string. After return, points to the actual number of characters
00957  *consumed. This can be usefull to debug the input string in case
00958  *of encoding error.
00959  *@param a_out out parameter. Points to the output string. It is allocated 
00960  *by this function and must be freed by the caller.
00961  *@param a_out_len out parameter. The length (in bytes) of the output string.
00962  *@return CR_OK upon successfull completion, an error code otherwise.
00963  */
00964 enum CRStatus
00965 cr_utils_ucs4_str_to_utf8 (const guint32 *a_in, 
00966                            gulong *a_in_len,
00967                            guchar **a_out, gulong *a_out_len)
00968 {        
00969         enum CRStatus status = CR_OK ;
00970 
00971         g_return_val_if_fail (a_in && a_in_len && a_out
00972                               && a_out_len, CR_BAD_PARAM_ERROR) ;
00973 
00974         status =
00975                 cr_utils_ucs4_str_len_as_utf8 (a_in,
00976                                                &a_in[*a_out_len -1], 
00977                                                a_out_len) ;
00978 
00979         g_return_val_if_fail (status == CR_OK, status) ;
00980 
00981         status =
00982                 cr_utils_ucs4_to_utf8 (a_in, a_in_len, *a_out, a_out_len) ;
00983 
00984         return status ;
00985 }
00986 
00987 
00988 /**
00989  *Converts an ucs1 buffer into an utf8 buffer.
00990  *The caller must know the size of the resulting buffer and
00991  *allocate it prior to calling this function.
00992  *
00993  *@param a_in the input ucs1 buffer.
00994  *
00995  *@param a_in_len in/out parameter. The length of the input buffer.
00996  *After return, points to the number of bytes actually consumed even
00997  *in case of encoding error.
00998  *
00999  *@param a_out out parameter. The output utf8 converted buffer.
01000  *
01001  *@param a_out_len in/out parameter. The size of the output buffer.
01002  *If the output buffer size is shorter than the actual needed size, 
01003  *this function just convert what it can.
01004  *
01005  *@return CR_OK upon successfull completion, an error code otherwise.
01006  *
01007  */
01008 enum CRStatus
01009 cr_utils_ucs1_to_utf8 (const guchar *a_in, 
01010                        gulong *a_in_len,
01011                        guchar *a_out, 
01012                        gulong *a_out_len)
01013 {
01014         gulong out_index = 0, in_index = 0, in_len = 0, out_len = 0 ;
01015         enum CRStatus status = CR_OK ;
01016 
01017         g_return_val_if_fail (a_in && a_in_len && a_out
01018                               && a_out_len, CR_BAD_PARAM_ERROR) ;
01019 
01020         if (*a_in_len < 1) 
01021         {
01022                 status = CR_OK ;
01023                 goto end ;
01024         }
01025 
01026         in_len = *a_in_len ;
01027         out_len = *a_out_len ;
01028 
01029         for (in_index = 0, out_index = 0 ;  
01030              (in_index < in_len) && (out_index < out_len) ;
01031              in_index ++)
01032         {
01033                 /*
01034                  *FIXME: return whenever we encounter forbidden char values.
01035                  */
01036 
01037                 if (a_in[in_index] <= 0x7F)
01038                 {
01039                         a_out[out_index] = a_in[in_index] ;
01040                         out_index ++ ;
01041                 }
01042                 else
01043                 {
01044                         a_out[out_index] = (0xC0 | (a_in[in_index] >> 6)) ;
01045                         a_out[out_index + 1] = (0x80 | (a_in[in_index] & 0x3F));
01046                         out_index += 2 ;
01047                 }
01048         }/*end for*/
01049 
01050  end:
01051         *a_in_len = in_index  ;
01052         *a_out_len = out_index ;
01053 
01054         return CR_OK ;
01055 }
01056 
01057 
01058 /**
01059  *Converts an ucs1 string into an utf8 string.
01060  *@param a_in_start the beginning of the input string to convert.
01061  *@param a_in_end the end of the input string to convert.
01062  *@param a_out out parameter. The converted string.
01063  *@param a_out out parameter. The length of the converted string.
01064  *@return CR_OK upon successfull completion, an error code otherwise.
01065  *
01066  */
01067 enum CRStatus
01068 cr_utils_ucs1_str_to_utf8 (const guchar *a_in, 
01069                            gulong *a_in_len,
01070                            guchar **a_out, 
01071                            gulong *a_out_len)
01072 {
01073         gulong in_len = 0, out_len = 0 ;
01074         enum CRStatus status = CR_OK ;
01075 
01076         g_return_val_if_fail (a_in && a_in_len && a_out
01077                               && a_out_len, CR_BAD_PARAM_ERROR) ;
01078         
01079         if (*a_in_len < 1)
01080         {
01081                 *a_out_len = 0 ;
01082                 *a_out = NULL ;
01083                 return CR_OK ;
01084         }
01085 
01086         status =
01087                 cr_utils_ucs1_str_len_as_utf8 (a_in, &a_in[*a_in_len -1], 
01088                                                &out_len) ;
01089 
01090         g_return_val_if_fail (status == CR_OK, status) ;
01091 
01092         in_len = *a_in_len ;
01093 
01094         *a_out = g_malloc0 (out_len) ;
01095 
01096         status = cr_utils_ucs1_to_utf8 (a_in, a_in_len,
01097                                         *a_out, &out_len) ;
01098 
01099         *a_out_len = out_len ;
01100 
01101         return status ;
01102 }
01103 
01104 
01105 /**
01106  *Converts an utf8 buffer into an ucs1 buffer.
01107  *The caller must know the size of the resulting
01108  *converted buffer, and allocated it prior to calling this
01109  *function.
01110  *
01111  *@param a_in the input utf8 buffer to convert.
01112  *
01113  *@param a_in_len in/out parameter. The size of the input utf8 buffer.
01114  *After return, points to the number of bytes consumed
01115  *by the function even in case of encoding error.
01116  *
01117  *@param a_out out parameter. Points to the resulting buffer.
01118  *Must be allocated by the caller. If the size of a_out is shorter
01119  *than its required size, this function converts what it can and return
01120  *a successfull status.
01121  *
01122  *@param a_out_len in/out parameter. The size of the output buffer.
01123  *After return, points to the number of bytes consumed even in case of
01124  *encoding error.
01125  *
01126  *@return CR_OK upon successfull completion, an error code otherwise.
01127  */
01128 enum CRStatus
01129 cr_utils_utf8_to_ucs1 (const guchar * a_in, 
01130                        gulong * a_in_len,
01131                        guchar *a_out, 
01132                        gulong *a_out_len)
01133 {
01134         gulong in_index = 0, out_index = 0, in_len = 0, out_len = 0 ;
01135         enum CRStatus status = CR_OK ;
01136 
01137         /*
01138          *to store the final decoded 
01139          *unicode char
01140          */
01141         guint32 c = 0 ;
01142 
01143         g_return_val_if_fail (a_in && a_in_len
01144                               && a_out && a_out_len,
01145                               CR_BAD_PARAM_ERROR) ;
01146 
01147         if (*a_in_len < 1)
01148         {
01149                 status = CR_OK ;
01150                 goto end ;
01151         }
01152 
01153         in_len = *a_in_len ;
01154         out_len = *a_out_len ;
01155 
01156         for (in_index = 0 , out_index = 0 ;
01157              (in_index < in_len) && (out_index < out_len) ;
01158              in_index ++, out_index++)
01159         {
01160                 gint nb_bytes_2_decode = 0 ;
01161 
01162                 if (a_in[in_index] <= 0x7F) 
01163                 {
01164                         /*
01165                          *7 bits long char
01166                          *encoded over 1 byte:
01167                          * 0xxx xxxx
01168                          */
01169                         c = a_in[in_index] ;
01170                         nb_bytes_2_decode = 1 ;
01171                         
01172                 } 
01173                 else if ((a_in[in_index] & 0xE0) == 0xC0) 
01174                 {
01175                         /*
01176                          *up to 11 bits long char.
01177                          *encoded over 2 bytes:
01178                          *110x xxxx  10xx xxxx
01179                          */
01180                         c = a_in[in_index] & 0x1F ;
01181                         nb_bytes_2_decode = 2 ;
01182                         
01183                 } 
01184                 else if ((a_in[in_index] & 0xF0) == 0xE0) 
01185                 {
01186                         /*
01187                          *up to 16 bit long char
01188                          *encoded over 3 bytes:
01189                          *1110 xxxx  10xx xxxx  10xx xxxx
01190                          */
01191                         c = a_in[in_index] & 0x0F ;
01192                         nb_bytes_2_decode = 3 ;
01193                         
01194                 } 
01195                 else if ((a_in[in_index] & 0xF8) == 0xF0) 
01196                 {
01197                         /*
01198                          *up to 21 bits long char
01199                          *encoded over 4 bytes:
01200                          *1111 0xxx  10xx xxxx  10xx xxxx  10xx xxxx
01201                          */
01202                         c = a_in[in_index] & 0x7 ;
01203                         nb_bytes_2_decode = 4 ;
01204                         
01205                 } 
01206                 else if ((a_in[in_index] & 0xFC) == 0xF8) 
01207                 {
01208                         /*
01209                          *up to 26 bits long char
01210                          *encoded over 5 bytes.
01211                          *1111 10xx  10xx xxxx  10xx xxxx  
01212                          *10xx xxxx  10xx xxxx
01213                          */
01214                         c = a_in[in_index] & 3 ;
01215                         nb_bytes_2_decode = 5 ;
01216                         
01217                 } 
01218                 else if ((a_in[in_index] & 0xFE) == 0xFC) 
01219                 {
01220                         /*
01221                          *up to 31 bits long char
01222                          *encoded over 6 bytes:
01223                          *1111 110x  10xx xxxx  10xx xxxx  
01224                          *10xx xxxx  10xx xxxx  10xx xxxx
01225                          */
01226                         c = a_in[in_index] & 1 ;
01227                         nb_bytes_2_decode = 6 ;
01228                         
01229                 } 
01230                 else 
01231                 {
01232                         /*BAD ENCODING*/
01233                         status = CR_ENCODING_ERROR ;
01234                         goto end ;
01235                 }
01236                 
01237                 /*
01238                  *Go and decode the remaining byte(s)
01239                  *(if any) to get the current character.
01240                  */
01241                 if (in_index + nb_bytes_2_decode - 1 >= in_len)
01242                 {
01243                         status = CR_OK ;
01244                         goto end ;
01245                 }
01246 
01247                 for ( ;
01248                       nb_bytes_2_decode > 1 ;
01249                       nb_bytes_2_decode --) 
01250                 {
01251                         /*decode the next byte*/
01252                         in_index ++ ;
01253                         
01254                         /*byte pattern must be: 10xx xxxx*/
01255                         if ((a_in[in_index] & 0xC0) != 0x80)
01256                         {
01257                                 status = CR_ENCODING_ERROR ;
01258                                 goto end ;
01259                         }
01260 
01261                         c = (c << 6) | (a_in[in_index] & 0x3F) ;
01262                 }
01263 
01264                 /*
01265                  *The decoded ucs4 char is now
01266                  *in c.
01267                  */
01268 
01269                 if (c > 0xFF) 
01270                 {
01271                         status = CR_ENCODING_ERROR ;
01272                         goto end ;
01273                 }
01274                 
01275                 a_out[out_index] = c ;
01276         }
01277 
01278  end:
01279         *a_out_len = out_index ;
01280         *a_in_len = in_index ;
01281 
01282         return CR_OK ;        
01283 }
01284 
01285 
01286 /**
01287  *Converts an utf8 buffer into an
01288  *ucs1 buffer.
01289  *@param a_in_start the start of the input buffer.
01290  *@param a_in_end the end of the input buffer.
01291  *@param a_out out parameter. The resulting converted ucs4 buffer.
01292  *Must be freed by the caller.
01293  *@param a_out_len out parameter. The length of the converted buffer.
01294  *@return CR_OK upon successfull completion, an error code otherwise.
01295  *Note that out parameters are valid if and only if this function
01296  *returns CR_OK.
01297  */
01298 enum CRStatus
01299 cr_utils_utf8_str_to_ucs1 (const guchar * a_in, 
01300                            gulong * a_in_len,
01301                            guchar **a_out, 
01302                            gulong *a_out_len)
01303 {
01304         enum CRStatus status = CR_OK ;
01305 
01306         g_return_val_if_fail (a_in && a_in_len 
01307                               && a_out && a_out_len,
01308                               CR_BAD_PARAM_ERROR) ;
01309 
01310         if (*a_in_len < 1)
01311         {
01312                 *a_out_len = 0 ;
01313                 *a_out = NULL ;
01314                 return CR_OK ;
01315         }
01316 
01317         status =
01318                 cr_utils_utf8_str_len_as_ucs4 (a_in, &a_in[*a_in_len - 1],
01319                                                a_out_len) ;
01320 
01321         g_return_val_if_fail (status == CR_OK, status) ;
01322 
01323         *a_out = g_malloc0 (*a_out_len * sizeof (guint32)) ;
01324 
01325         status =
01326                 cr_utils_utf8_to_ucs1 (a_in, a_in_len,
01327                                        *a_out, a_out_len) ;
01328         return status ;
01329 }
01330 
01331 
01332 /*****************************************
01333  *CSS basic types identification utilities
01334  *****************************************/
01335 
01336 
01337 /**
01338  *Returns TRUE if a_char is a white space as
01339  *defined in the css spec in chap 4.1.1.
01340  *
01341  *white-space ::= ' '| \t|\r|\n|\f
01342  *
01343  *@param a_char the character to test.
01344  *return TRUE if is a white space, false otherwise.
01345  */
01346 gboolean
01347 cr_utils_is_white_space (guint32 a_char)
01348 {
01349         switch (a_char)
01350         {
01351         case ' ': 
01352         case '\t': 
01353         case '\r':
01354         case '\n': 
01355         case '\f':
01356                 return TRUE ;
01357                 break ;
01358         default:
01359                 return FALSE ;
01360         }
01361 }
01362 
01363 /**
01364  *Returns true if the character is a newline
01365  *as defined in the css spec in the chap 4.1.1.
01366  *
01367  *nl ::= \n|\r\n|\r|\f
01368  *
01369  *@param a_char the character to test.
01370  *@return TRUE if the character is a newline, FALSE otherwise.
01371  */
01372 gboolean
01373 cr_utils_is_newline (guint32 a_char)
01374 {
01375         switch (a_char)
01376         {
01377         case '\n':
01378         case '\r':
01379         case '\f':
01380                 return TRUE ;
01381                 break;
01382         default:
01383                 return FALSE ;
01384         }
01385 }
01386 
01387 /**
01388  *returns TRUE if the char is part of an hexa num char:
01389  *i.e hexa_char ::= [0-9A-F]
01390  */
01391 gboolean
01392 cr_utils_is_hexa_char (guint32 a_char)
01393 {
01394         if ((a_char >= '0' && a_char <= '9')
01395             || (a_char >= 'A' && a_char <= 'F'))
01396         {
01397                 return TRUE ;
01398         }
01399         return FALSE ;
01400 }
01401 
01402 /**
01403  *Returns true if the character is a nonascii
01404  *character (as defined in the css spec chap 4.1.1):
01405  *
01406  *nonascii ::= [^\0-\177]
01407  *
01408  *@param a_char the character to test.
01409  *@return TRUE if the character is a nonascii char,
01410  *FALSE otherwise.
01411  */
01412 gboolean
01413 cr_utils_is_nonascii (guint32 a_char)
01414 {
01415         if (a_char <= 177)
01416         {
01417                 return FALSE ;
01418         }
01419 
01420         return TRUE ;
01421 }
01422 
01423 /**
01424  *Dumps a character a_nb times on a file.
01425  *@param a_char the char to dump
01426  *@param a_fp the destination file pointer
01427  *@param a_nb the number of times a_char is to be dumped.
01428  */
01429 void
01430 cr_utils_dump_n_chars (guchar a_char, FILE *a_fp, glong a_nb)
01431 {
01432         glong i = 0 ;
01433 
01434         for (i = 0 ; i < a_nb ; i++)
01435         {
01436                 fprintf (a_fp, "%c", a_char) ;
01437         }
01438 }
01439 
01440 void
01441 cr_utils_dump_n_chars2 (guchar a_char, 
01442                         GString *a_string,
01443                         glong a_nb)
01444 {
01445         glong i = 0 ;
01446 
01447         g_return_if_fail (a_string) ;
01448 
01449         for (i = 0 ; i < a_nb ; i++)
01450         {
01451                 g_string_append_printf (a_string, "%c", a_char) ;
01452         }
01453 }
01454 
01455 gdouble
01456 cr_utils_n_to_0_dot_n (glong a_n)
01457 {
01458         gdouble result = a_n ;
01459 
01460         while (ABS (result) > 1)
01461         {
01462                 result = result / 10 ;
01463         }
01464 
01465         return result ;
01466 }
01467 
01468 /**
01469  *Duplicates a list of GString instances.
01470  *@return the duplicated list of GString instances or NULL if
01471  *something bad happened.
01472  *@param a_list_of_strings the list of strings to be duplicated.
01473  */
01474 GList *
01475 cr_dup_glist_of_string (GList *a_list_of_strings)
01476 {
01477         GList *cur = NULL, *result = NULL ;
01478 
01479         g_return_val_if_fail (a_list_of_strings, NULL) ;
01480 
01481         for (cur = a_list_of_strings ; cur ; cur = cur->next)
01482         {
01483                 GString *str = NULL ;
01484                 
01485                 str = g_string_new_len (((GString *)cur->data)->str,
01486                                         ((GString *)cur->data)->len) ;
01487                 if (str)
01488                         result = g_list_append (result, str) ;
01489         }
01490 
01491         return result ;
01492 }

Generated on Wed Oct 1 01:36:49 2003 for Libcroco by doxygen 1.3.3