00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 #include "cr-utils.h"
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073 enum CRStatus
00074 cr_utils_utf8_str_len_as_ucs4 (const guchar *a_in_start,
00075 const guchar *a_in_end,
00076 gulong *a_len)
00077 {
00078 guchar *byte_ptr = NULL ;
00079 gint len = 0 ;
00080
00081
00082
00083
00084
00085 guint c = 0 ;
00086
00087 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00088 CR_BAD_PARAM_ERROR) ;
00089 *a_len = 0 ;
00090
00091 for (byte_ptr = (guchar*)a_in_start ;
00092 byte_ptr <= a_in_end ;
00093 byte_ptr++)
00094 {
00095 gint nb_bytes_2_decode = 0 ;
00096
00097 if (*byte_ptr <= 0x7F)
00098 {
00099
00100
00101
00102
00103
00104 c = *byte_ptr ;
00105 nb_bytes_2_decode = 1 ;
00106
00107 }
00108 else if ((*byte_ptr & 0xE0) == 0xC0)
00109 {
00110
00111
00112
00113
00114
00115 c = *byte_ptr & 0x1F ;
00116 nb_bytes_2_decode = 2 ;
00117
00118 }
00119 else if ((*byte_ptr & 0xF0) == 0xE0)
00120 {
00121
00122
00123
00124
00125
00126 c = *byte_ptr & 0x0F ;
00127 nb_bytes_2_decode = 3 ;
00128
00129 }
00130 else if ((*byte_ptr & 0xF8) == 0xF0)
00131 {
00132
00133
00134
00135
00136
00137 c = *byte_ptr & 0x7 ;
00138 nb_bytes_2_decode = 4 ;
00139
00140 }
00141 else if ((*byte_ptr & 0xFC) == 0xF8)
00142 {
00143
00144
00145
00146
00147
00148
00149 c = *byte_ptr & 3 ;
00150 nb_bytes_2_decode = 5 ;
00151
00152 }
00153 else if ((*byte_ptr & 0xFE) == 0xFC)
00154 {
00155
00156
00157
00158
00159
00160
00161 c = *byte_ptr & 1 ;
00162 nb_bytes_2_decode = 6 ;
00163
00164 }
00165 else
00166 {
00167
00168
00169
00170 return CR_ENCODING_ERROR ;
00171 }
00172
00173
00174
00175
00176
00177 for ( ;
00178 nb_bytes_2_decode > 1 ;
00179 nb_bytes_2_decode --)
00180 {
00181
00182 byte_ptr ++ ;
00183
00184
00185 if ((*byte_ptr & 0xC0) != 0x80)
00186 {
00187 return CR_ENCODING_ERROR ;
00188 }
00189
00190 c = (c << 6) | (*byte_ptr & 0x3F) ;
00191 }
00192
00193 len ++ ;
00194 }
00195
00196 *a_len = len ;
00197
00198 return CR_OK ;
00199 }
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213 enum CRStatus
00214 cr_utils_ucs4_str_len_as_utf8 (const guint32 *a_in_start,
00215 const guint32 *a_in_end,
00216 gulong *a_len)
00217 {
00218 gint len = 0 ;
00219 guint32 *char_ptr = NULL ;
00220
00221 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00222 CR_BAD_PARAM_ERROR) ;
00223
00224 for (char_ptr = (guint32*)a_in_start ;
00225 char_ptr <= a_in_end ;
00226 char_ptr ++)
00227 {
00228 if (*char_ptr <= 0x7F)
00229 {
00230
00231 len += 1 ;
00232 }
00233 else if (*char_ptr <= 0x7FF)
00234 {
00235
00236 len += 2 ;
00237 }
00238 else if (*char_ptr <= 0xFFFF)
00239 {
00240 len += 3 ;
00241 }
00242 else if (*char_ptr <= 0x1FFFFF)
00243 {
00244 len += 4 ;
00245 }
00246 else if (*char_ptr <= 0x3FFFFFF)
00247 {
00248 len += 5 ;
00249 }
00250 else if (*char_ptr <= 0x7FFFFFFF)
00251 {
00252 len+= 6 ;
00253 }
00254 }
00255
00256 *a_len = len ;
00257 return CR_OK ;
00258 }
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271 enum CRStatus
00272 cr_utils_ucs1_str_len_as_utf8 (const guchar *a_in_start,
00273 const guchar *a_in_end,
00274 gulong *a_len)
00275 {
00276 gint len = 0 ;
00277 guchar *char_ptr = NULL ;
00278
00279 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00280 CR_BAD_PARAM_ERROR) ;
00281
00282 for (char_ptr = (guchar *)a_in_start ;
00283 char_ptr <= a_in_end ;
00284 char_ptr ++)
00285 {
00286 if (*char_ptr <= 0x7F)
00287 {
00288
00289 len += 1 ;
00290 }
00291 else
00292 {
00293
00294 len += 2 ;
00295 }
00296 }
00297
00298 *a_len = len ;
00299 return CR_OK ;
00300 }
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316
00317 enum CRStatus
00318 cr_utils_utf8_to_ucs4 (const guchar * a_in,
00319 gulong *a_in_len,
00320 guint32 *a_out,
00321 gulong *a_out_len)
00322 {
00323 gulong in_len = 0, out_len = 0, in_index = 0, out_index = 0 ;
00324 enum CRStatus status = CR_OK ;
00325
00326
00327
00328
00329
00330 guint c = 0 ;
00331
00332 g_return_val_if_fail (a_in && a_in_len
00333 && a_out && a_out_len,
00334 CR_BAD_PARAM_ERROR) ;
00335
00336 if (*a_in_len < 1)
00337 {
00338 status = CR_OK ;
00339 goto end ;
00340 }
00341
00342 in_len = *a_in_len ;
00343 out_len = *a_out_len ;
00344
00345 for (in_index = 0, out_index = 0 ;
00346 (in_index < in_len) && (out_index < out_len) ;
00347 in_index++, out_index++)
00348 {
00349 gint nb_bytes_2_decode = 0 ;
00350
00351 if (a_in[in_index] <= 0x7F)
00352 {
00353
00354
00355
00356
00357
00358 c = a_in[in_index] ;
00359 nb_bytes_2_decode = 1 ;
00360
00361 }
00362 else if ((a_in[in_index] & 0xE0) == 0xC0)
00363 {
00364
00365
00366
00367
00368
00369 c = a_in[in_index] & 0x1F ;
00370 nb_bytes_2_decode = 2 ;
00371
00372 }
00373 else if ((a_in[in_index] & 0xF0) == 0xE0)
00374 {
00375
00376
00377
00378
00379
00380 c = a_in[in_index] & 0x0F ;
00381 nb_bytes_2_decode = 3 ;
00382
00383 }
00384 else if ((a_in[in_index] & 0xF8) == 0xF0)
00385 {
00386
00387
00388
00389
00390
00391 c = a_in[in_index] & 0x7 ;
00392 nb_bytes_2_decode = 4 ;
00393
00394 }
00395 else if ((a_in[in_index] & 0xFC) == 0xF8)
00396 {
00397
00398
00399
00400
00401
00402
00403 c = a_in[in_index] & 3 ;
00404 nb_bytes_2_decode = 5 ;
00405
00406 }
00407 else if ((a_in[in_index] & 0xFE) == 0xFC)
00408 {
00409
00410
00411
00412
00413
00414
00415 c = a_in[in_index] & 1 ;
00416 nb_bytes_2_decode = 6 ;
00417
00418 }
00419 else
00420 {
00421
00422 goto end ;
00423 }
00424
00425
00426
00427
00428
00429 for ( ;
00430 nb_bytes_2_decode > 1 ;
00431 nb_bytes_2_decode --)
00432 {
00433
00434 in_index ++ ;
00435
00436
00437 if ((a_in[in_index] & 0xC0) != 0x80)
00438 {
00439 goto end ;
00440 }
00441
00442 c = (c << 6) | (a_in[in_index] & 0x3F) ;
00443 }
00444
00445
00446
00447
00448
00449
00450
00451
00452
00453
00454
00455 if (c == 0xFFFF || c == 0xFFFE) goto end ;
00456
00457
00458 if (c > 0x10FFFF) goto end ;
00459
00460
00461
00462
00463
00464 if (c >= 0xD800 && c <= 0xDFFF) goto end ;
00465
00466
00467 if (c == 0) goto end ;
00468
00469
00470 a_out[out_index] = c ;
00471 }
00472
00473 end:
00474 *a_out_len = out_index + 1;
00475 *a_in_len = in_index + 1;
00476
00477 return status ;
00478 }
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492 enum CRStatus
00493 cr_utils_read_char_from_utf8_buf (const guchar * a_in,
00494 gulong a_in_len,
00495 guint32 *a_out, gulong *a_consumed)
00496 {
00497 gulong in_len = 0, in_index = 0, nb_bytes_2_decode = 0 ;
00498 enum CRStatus status = CR_OK ;
00499
00500
00501
00502
00503
00504 guint32 c = 0 ;
00505
00506 g_return_val_if_fail (a_in && a_out && a_out
00507 && a_consumed, CR_BAD_PARAM_ERROR) ;
00508
00509 if (a_in_len < 1)
00510 {
00511 status = CR_OK ;
00512 goto end ;
00513 }
00514
00515 in_len = a_in_len ;
00516
00517 if (*a_in <= 0x7F)
00518 {
00519
00520
00521
00522
00523
00524 c = *a_in ;
00525 nb_bytes_2_decode = 1 ;
00526
00527 }
00528 else if ((*a_in & 0xE0) == 0xC0)
00529 {
00530
00531
00532
00533
00534
00535 c = *a_in & 0x1F ;
00536 nb_bytes_2_decode = 2 ;
00537
00538 }
00539 else if ((*a_in & 0xF0) == 0xE0)
00540 {
00541
00542
00543
00544
00545
00546 c = *a_in & 0x0F ;
00547 nb_bytes_2_decode = 3 ;
00548
00549 }
00550 else if ((*a_in & 0xF8) == 0xF0)
00551 {
00552
00553
00554
00555
00556
00557 c = *a_in & 0x7 ;
00558 nb_bytes_2_decode = 4 ;
00559
00560 }
00561 else if ((*a_in & 0xFC) == 0xF8)
00562 {
00563
00564
00565
00566
00567
00568
00569 c = *a_in & 3 ;
00570 nb_bytes_2_decode = 5 ;
00571
00572 }
00573 else if ((*a_in & 0xFE) == 0xFC)
00574 {
00575
00576
00577
00578
00579
00580
00581 c = *a_in & 1 ;
00582 nb_bytes_2_decode = 6 ;
00583
00584 }
00585 else
00586 {
00587
00588 goto end ;
00589 }
00590
00591 if (nb_bytes_2_decode > a_in_len)
00592 {
00593 status = CR_END_OF_INPUT_ERROR ;
00594 goto end ;
00595 }
00596
00597
00598
00599
00600
00601 for ( in_index = 1 ;
00602 in_index < nb_bytes_2_decode ;
00603 in_index ++)
00604 {
00605
00606 if ((a_in[in_index] & 0xC0) != 0x80)
00607 {
00608 goto end ;
00609 }
00610
00611 c = (c << 6) | (a_in[in_index] & 0x3F) ;
00612 }
00613
00614
00615
00616
00617
00618
00619
00620
00621
00622
00623
00624 if (c == 0xFFFF || c == 0xFFFE) goto end ;
00625
00626
00627 if (c > 0x10FFFF) goto end ;
00628
00629
00630
00631
00632
00633 if (c >= 0xD800 && c <= 0xDFFF) goto end ;
00634
00635
00636 if (c == 0) goto end ;
00637
00638 *a_out = c ;
00639
00640 end:
00641 *a_consumed = nb_bytes_2_decode ;
00642
00643 return status ;
00644 }
00645
00646
00647
00648
00649
00650 enum CRStatus
00651 cr_utils_utf8_str_len_as_ucs1 (const guchar *a_in_start,
00652 const guchar *a_in_end,
00653 gulong *a_len)
00654 {
00655
00656
00657
00658
00659
00660
00661 guchar *byte_ptr = NULL ;
00662 gint len = 0 ;
00663
00664
00665
00666
00667
00668 guint c = 0 ;
00669
00670 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00671 CR_BAD_PARAM_ERROR) ;
00672 *a_len = 0 ;
00673
00674 for (byte_ptr = (guchar*)a_in_start ;
00675 byte_ptr <= a_in_end ;
00676 byte_ptr++)
00677 {
00678 gint nb_bytes_2_decode = 0 ;
00679
00680 if (*byte_ptr <= 0x7F)
00681 {
00682
00683
00684
00685
00686
00687 c = *byte_ptr ;
00688 nb_bytes_2_decode = 1 ;
00689
00690 }
00691 else if ((*byte_ptr & 0xE0) == 0xC0)
00692 {
00693
00694
00695
00696
00697
00698 c = *byte_ptr & 0x1F ;
00699 nb_bytes_2_decode = 2 ;
00700
00701 }
00702 else if ((*byte_ptr & 0xF0) == 0xE0)
00703 {
00704
00705
00706
00707
00708
00709 c = *byte_ptr & 0x0F ;
00710 nb_bytes_2_decode = 3 ;
00711
00712 }
00713 else if ((*byte_ptr & 0xF8) == 0xF0)
00714 {
00715
00716
00717
00718
00719
00720 c = *byte_ptr & 0x7 ;
00721 nb_bytes_2_decode = 4 ;
00722
00723 }
00724 else if ((*byte_ptr & 0xFC) == 0xF8)
00725 {
00726
00727
00728
00729
00730
00731
00732 c = *byte_ptr & 3 ;
00733 nb_bytes_2_decode = 5 ;
00734
00735 }
00736 else if ((*byte_ptr & 0xFE) == 0xFC)
00737 {
00738
00739
00740
00741
00742
00743
00744 c = *byte_ptr & 1 ;
00745 nb_bytes_2_decode = 6 ;
00746
00747 }
00748 else
00749 {
00750
00751
00752
00753 return CR_ENCODING_ERROR ;
00754 }
00755
00756
00757
00758
00759
00760 for ( ;
00761 nb_bytes_2_decode > 1 ;
00762 nb_bytes_2_decode --)
00763 {
00764
00765 byte_ptr ++ ;
00766
00767
00768 if ((*byte_ptr & 0xC0) != 0x80)
00769 {
00770 return CR_ENCODING_ERROR ;
00771 }
00772
00773 c = (c << 6) | (*byte_ptr & 0x3F) ;
00774 }
00775
00776
00777
00778
00779
00780
00781 if (c <= 0xFF) {
00782
00783
00784 len ++ ;
00785 } else {
00786
00787
00788
00789 return CR_ENCODING_ERROR ;
00790 }
00791 }
00792
00793 *a_len = len ;
00794
00795 return CR_OK ;
00796 }
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809
00810
00811 enum CRStatus
00812 cr_utils_utf8_str_to_ucs4 (const guchar * a_in,
00813 gulong *a_in_len,
00814 guint32 **a_out, gulong *a_out_len)
00815 {
00816 enum CRStatus status = CR_OK ;
00817
00818 g_return_val_if_fail (a_in && a_in_len
00819 && a_out && a_out_len,
00820 CR_BAD_PARAM_ERROR) ;
00821
00822 status =
00823 cr_utils_utf8_str_len_as_ucs4 (a_in,
00824 &a_in[*a_in_len - 1],
00825 a_out_len) ;
00826
00827 g_return_val_if_fail (status == CR_OK, status) ;
00828
00829 *a_out = g_malloc0 (*a_out_len * sizeof (guint32)) ;
00830
00831 status =
00832 cr_utils_utf8_to_ucs4 (a_in, a_in_len,
00833 *a_out, a_out_len) ;
00834
00835 return status ;
00836 }
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853 enum CRStatus
00854 cr_utils_ucs4_to_utf8 (const guint32 *a_in,
00855 gulong *a_in_len,
00856 guchar *a_out,
00857 gulong *a_out_len)
00858 {
00859 gulong in_len = 0, in_index = 0, out_index = 0 ;
00860 enum CRStatus status = CR_OK ;
00861
00862 g_return_val_if_fail (a_in && a_in_len && a_out && a_out_len,
00863 CR_BAD_PARAM_ERROR) ;
00864
00865 if (*a_in_len < 1)
00866 {
00867 status = CR_OK ;
00868 goto end ;
00869 }
00870
00871 in_len = *a_in_len ;
00872
00873 for (in_index = 0 ;
00874 in_index < in_len ;
00875 in_index++)
00876 {
00877
00878
00879
00880
00881 if (a_in[in_index] <= 0x7F)
00882 {
00883 a_out[out_index] = a_in[in_index] ;
00884 out_index ++ ;
00885 }
00886 else if (a_in[in_index] <= 0x7FF)
00887 {
00888 a_out[out_index] = (0xC0 | (a_in[in_index] >> 6)) ;
00889 a_out[out_index + 1] = (0x80 | (a_in[in_index] & 0x3F));
00890 out_index += 2 ;
00891 }
00892 else if (a_in[in_index] <= 0xFFFF)
00893 {
00894 a_out[out_index] = (0xE0 | (a_in[in_index] >> 12)) ;
00895 a_out[out_index + 1] =
00896 (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00897 a_out[out_index + 2] = (0x80 | (a_in[in_index] & 0x3F)) ;
00898 out_index += 3 ;
00899 }
00900 else if (a_in[in_index] <= 0x1FFFFF)
00901 {
00902 a_out[out_index] = (0xF0 | (a_in[in_index] >> 18)) ;
00903 a_out[out_index + 1]
00904 = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
00905 a_out[out_index + 2]
00906 = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00907 a_out[out_index + 3]
00908 = (0x80 | (a_in[in_index] & 0x3F)) ;
00909 out_index += 4 ;
00910 }
00911 else if (a_in[in_index] <= 0x3FFFFFF)
00912 {
00913 a_out[out_index] = (0xF8 | (a_in[in_index] >> 24)) ;
00914 a_out[out_index + 1] = (0x80 | (a_in[in_index] >> 18)) ;
00915 a_out[out_index + 2]
00916 = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
00917 a_out[out_index + 3]
00918 = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00919 a_out[out_index + 4]
00920 = (0x80 | (a_in[in_index] & 0x3F)) ;
00921 out_index += 5 ;
00922 }
00923 else if (a_in[in_index] <= 0x7FFFFFFF)
00924 {
00925 a_out[out_index] = (0xFC | (a_in[in_index] >> 30)) ;
00926 a_out[out_index + 1] = (0x80 | (a_in[in_index] >> 24)) ;
00927 a_out[out_index + 2]
00928 = (0x80 | ((a_in[in_index] >> 18) & 0x3F)) ;
00929 a_out[out_index + 3]
00930 = (0x80 | ((a_in[in_index] >> 12) & 0x3F)) ;
00931 a_out[out_index + 4]
00932 = (0x80 | ((a_in[in_index] >> 6) & 0x3F)) ;
00933 a_out[out_index + 4]
00934 = (0x80 | (a_in[in_index] & 0x3F)) ;
00935 out_index += 6 ;
00936 }
00937 else
00938 {
00939 status = CR_ENCODING_ERROR ;
00940 goto end ;
00941 }
00942 }
00943
00944 end:
00945 *a_in_len = in_index + 1 ;
00946 *a_out_len = out_index + 1 ;
00947
00948 return status ;
00949 }
00950
00951
00952
00953
00954
00955
00956
00957
00958
00959
00960
00961
00962
00963
00964 enum CRStatus
00965 cr_utils_ucs4_str_to_utf8 (const guint32 *a_in,
00966 gulong *a_in_len,
00967 guchar **a_out, gulong *a_out_len)
00968 {
00969 enum CRStatus status = CR_OK ;
00970
00971 g_return_val_if_fail (a_in && a_in_len && a_out
00972 && a_out_len, CR_BAD_PARAM_ERROR) ;
00973
00974 status =
00975 cr_utils_ucs4_str_len_as_utf8 (a_in,
00976 &a_in[*a_out_len -1],
00977 a_out_len) ;
00978
00979 g_return_val_if_fail (status == CR_OK, status) ;
00980
00981 status =
00982 cr_utils_ucs4_to_utf8 (a_in, a_in_len, *a_out, a_out_len) ;
00983
00984 return status ;
00985 }
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006
01007
01008 enum CRStatus
01009 cr_utils_ucs1_to_utf8 (const guchar *a_in,
01010 gulong *a_in_len,
01011 guchar *a_out,
01012 gulong *a_out_len)
01013 {
01014 gulong out_index = 0, in_index = 0, in_len = 0, out_len = 0 ;
01015 enum CRStatus status = CR_OK ;
01016
01017 g_return_val_if_fail (a_in && a_in_len && a_out
01018 && a_out_len, CR_BAD_PARAM_ERROR) ;
01019
01020 if (*a_in_len < 1)
01021 {
01022 status = CR_OK ;
01023 goto end ;
01024 }
01025
01026 in_len = *a_in_len ;
01027 out_len = *a_out_len ;
01028
01029 for (in_index = 0, out_index = 0 ;
01030 (in_index < in_len) && (out_index < out_len) ;
01031 in_index ++)
01032 {
01033
01034
01035
01036
01037 if (a_in[in_index] <= 0x7F)
01038 {
01039 a_out[out_index] = a_in[in_index] ;
01040 out_index ++ ;
01041 }
01042 else
01043 {
01044 a_out[out_index] = (0xC0 | (a_in[in_index] >> 6)) ;
01045 a_out[out_index + 1] = (0x80 | (a_in[in_index] & 0x3F));
01046 out_index += 2 ;
01047 }
01048 }
01049
01050 end:
01051 *a_in_len = in_index ;
01052 *a_out_len = out_index ;
01053
01054 return CR_OK ;
01055 }
01056
01057
01058
01059
01060
01061
01062
01063
01064
01065
01066
01067 enum CRStatus
01068 cr_utils_ucs1_str_to_utf8 (const guchar *a_in,
01069 gulong *a_in_len,
01070 guchar **a_out,
01071 gulong *a_out_len)
01072 {
01073 gulong in_len = 0, out_len = 0 ;
01074 enum CRStatus status = CR_OK ;
01075
01076 g_return_val_if_fail (a_in && a_in_len && a_out
01077 && a_out_len, CR_BAD_PARAM_ERROR) ;
01078
01079 if (*a_in_len < 1)
01080 {
01081 *a_out_len = 0 ;
01082 *a_out = NULL ;
01083 return CR_OK ;
01084 }
01085
01086 status =
01087 cr_utils_ucs1_str_len_as_utf8 (a_in, &a_in[*a_in_len -1],
01088 &out_len) ;
01089
01090 g_return_val_if_fail (status == CR_OK, status) ;
01091
01092 in_len = *a_in_len ;
01093
01094 *a_out = g_malloc0 (out_len) ;
01095
01096 status = cr_utils_ucs1_to_utf8 (a_in, a_in_len,
01097 *a_out, &out_len) ;
01098
01099 *a_out_len = out_len ;
01100
01101 return status ;
01102 }
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128 enum CRStatus
01129 cr_utils_utf8_to_ucs1 (const guchar * a_in,
01130 gulong * a_in_len,
01131 guchar *a_out,
01132 gulong *a_out_len)
01133 {
01134 gulong in_index = 0, out_index = 0, in_len = 0, out_len = 0 ;
01135 enum CRStatus status = CR_OK ;
01136
01137
01138
01139
01140
01141 guint32 c = 0 ;
01142
01143 g_return_val_if_fail (a_in && a_in_len
01144 && a_out && a_out_len,
01145 CR_BAD_PARAM_ERROR) ;
01146
01147 if (*a_in_len < 1)
01148 {
01149 status = CR_OK ;
01150 goto end ;
01151 }
01152
01153 in_len = *a_in_len ;
01154 out_len = *a_out_len ;
01155
01156 for (in_index = 0 , out_index = 0 ;
01157 (in_index < in_len) && (out_index < out_len) ;
01158 in_index ++, out_index++)
01159 {
01160 gint nb_bytes_2_decode = 0 ;
01161
01162 if (a_in[in_index] <= 0x7F)
01163 {
01164
01165
01166
01167
01168
01169 c = a_in[in_index] ;
01170 nb_bytes_2_decode = 1 ;
01171
01172 }
01173 else if ((a_in[in_index] & 0xE0) == 0xC0)
01174 {
01175
01176
01177
01178
01179
01180 c = a_in[in_index] & 0x1F ;
01181 nb_bytes_2_decode = 2 ;
01182
01183 }
01184 else if ((a_in[in_index] & 0xF0) == 0xE0)
01185 {
01186
01187
01188
01189
01190
01191 c = a_in[in_index] & 0x0F ;
01192 nb_bytes_2_decode = 3 ;
01193
01194 }
01195 else if ((a_in[in_index] & 0xF8) == 0xF0)
01196 {
01197
01198
01199
01200
01201
01202 c = a_in[in_index] & 0x7 ;
01203 nb_bytes_2_decode = 4 ;
01204
01205 }
01206 else if ((a_in[in_index] & 0xFC) == 0xF8)
01207 {
01208
01209
01210
01211
01212
01213
01214 c = a_in[in_index] & 3 ;
01215 nb_bytes_2_decode = 5 ;
01216
01217 }
01218 else if ((a_in[in_index] & 0xFE) == 0xFC)
01219 {
01220
01221
01222
01223
01224
01225
01226 c = a_in[in_index] & 1 ;
01227 nb_bytes_2_decode = 6 ;
01228
01229 }
01230 else
01231 {
01232
01233 status = CR_ENCODING_ERROR ;
01234 goto end ;
01235 }
01236
01237
01238
01239
01240
01241 if (in_index + nb_bytes_2_decode - 1 >= in_len)
01242 {
01243 status = CR_OK ;
01244 goto end ;
01245 }
01246
01247 for ( ;
01248 nb_bytes_2_decode > 1 ;
01249 nb_bytes_2_decode --)
01250 {
01251
01252 in_index ++ ;
01253
01254
01255 if ((a_in[in_index] & 0xC0) != 0x80)
01256 {
01257 status = CR_ENCODING_ERROR ;
01258 goto end ;
01259 }
01260
01261 c = (c << 6) | (a_in[in_index] & 0x3F) ;
01262 }
01263
01264
01265
01266
01267
01268
01269 if (c > 0xFF)
01270 {
01271 status = CR_ENCODING_ERROR ;
01272 goto end ;
01273 }
01274
01275 a_out[out_index] = c ;
01276 }
01277
01278 end:
01279 *a_out_len = out_index ;
01280 *a_in_len = in_index ;
01281
01282 return CR_OK ;
01283 }
01284
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298 enum CRStatus
01299 cr_utils_utf8_str_to_ucs1 (const guchar * a_in,
01300 gulong * a_in_len,
01301 guchar **a_out,
01302 gulong *a_out_len)
01303 {
01304 enum CRStatus status = CR_OK ;
01305
01306 g_return_val_if_fail (a_in && a_in_len
01307 && a_out && a_out_len,
01308 CR_BAD_PARAM_ERROR) ;
01309
01310 if (*a_in_len < 1)
01311 {
01312 *a_out_len = 0 ;
01313 *a_out = NULL ;
01314 return CR_OK ;
01315 }
01316
01317 status =
01318 cr_utils_utf8_str_len_as_ucs4 (a_in, &a_in[*a_in_len - 1],
01319 a_out_len) ;
01320
01321 g_return_val_if_fail (status == CR_OK, status) ;
01322
01323 *a_out = g_malloc0 (*a_out_len * sizeof (guint32)) ;
01324
01325 status =
01326 cr_utils_utf8_to_ucs1 (a_in, a_in_len,
01327 *a_out, a_out_len) ;
01328 return status ;
01329 }
01330
01331
01332
01333
01334
01335
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346 gboolean
01347 cr_utils_is_white_space (guint32 a_char)
01348 {
01349 switch (a_char)
01350 {
01351 case ' ':
01352 case '\t':
01353 case '\r':
01354 case '\n':
01355 case '\f':
01356 return TRUE ;
01357 break ;
01358 default:
01359 return FALSE ;
01360 }
01361 }
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372 gboolean
01373 cr_utils_is_newline (guint32 a_char)
01374 {
01375 switch (a_char)
01376 {
01377 case '\n':
01378 case '\r':
01379 case '\f':
01380 return TRUE ;
01381 break;
01382 default:
01383 return FALSE ;
01384 }
01385 }
01386
01387
01388
01389
01390
01391 gboolean
01392 cr_utils_is_hexa_char (guint32 a_char)
01393 {
01394 if ((a_char >= '0' && a_char <= '9')
01395 || (a_char >= 'A' && a_char <= 'F'))
01396 {
01397 return TRUE ;
01398 }
01399 return FALSE ;
01400 }
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412 gboolean
01413 cr_utils_is_nonascii (guint32 a_char)
01414 {
01415 if (a_char <= 177)
01416 {
01417 return FALSE ;
01418 }
01419
01420 return TRUE ;
01421 }
01422
01423
01424
01425
01426
01427
01428
01429 void
01430 cr_utils_dump_n_chars (guchar a_char, FILE *a_fp, glong a_nb)
01431 {
01432 glong i = 0 ;
01433
01434 for (i = 0 ; i < a_nb ; i++)
01435 {
01436 fprintf (a_fp, "%c", a_char) ;
01437 }
01438 }
01439
01440 void
01441 cr_utils_dump_n_chars2 (guchar a_char,
01442 GString *a_string,
01443 glong a_nb)
01444 {
01445 glong i = 0 ;
01446
01447 g_return_if_fail (a_string) ;
01448
01449 for (i = 0 ; i < a_nb ; i++)
01450 {
01451 g_string_append_printf (a_string, "%c", a_char) ;
01452 }
01453 }
01454
01455 gdouble
01456 cr_utils_n_to_0_dot_n (glong a_n)
01457 {
01458 gdouble result = a_n ;
01459
01460 while (ABS (result) > 1)
01461 {
01462 result = result / 10 ;
01463 }
01464
01465 return result ;
01466 }
01467
01468
01469
01470
01471
01472
01473
01474 GList *
01475 cr_dup_glist_of_string (GList *a_list_of_strings)
01476 {
01477 GList *cur = NULL, *result = NULL ;
01478
01479 g_return_val_if_fail (a_list_of_strings, NULL) ;
01480
01481 for (cur = a_list_of_strings ; cur ; cur = cur->next)
01482 {
01483 GString *str = NULL ;
01484
01485 str = g_string_new_len (((GString *)cur->data)->str,
01486 ((GString *)cur->data)->len) ;
01487 if (str)
01488 result = g_list_append (result, str) ;
01489 }
01490
01491 return result ;
01492 }