00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "cr-utils.h"
00025 #include "cr-string.h"
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068 enum CRStatus
00069 cr_utils_utf8_str_len_as_ucs4 (const guchar * a_in_start,
00070 const guchar * a_in_end, gulong * a_len)
00071 {
00072 guchar *byte_ptr = NULL;
00073 gint len = 0;
00074
00075
00076
00077
00078
00079 guint c = 0;
00080
00081 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00082 CR_BAD_PARAM_ERROR);
00083 *a_len = 0;
00084
00085 for (byte_ptr = (guchar *) a_in_start;
00086 byte_ptr <= a_in_end; byte_ptr++) {
00087 gint nb_bytes_2_decode = 0;
00088
00089 if (*byte_ptr <= 0x7F) {
00090
00091
00092
00093
00094
00095 c = *byte_ptr;
00096 nb_bytes_2_decode = 1;
00097
00098 } else if ((*byte_ptr & 0xE0) == 0xC0) {
00099
00100
00101
00102
00103
00104 c = *byte_ptr & 0x1F;
00105 nb_bytes_2_decode = 2;
00106
00107 } else if ((*byte_ptr & 0xF0) == 0xE0) {
00108
00109
00110
00111
00112
00113 c = *byte_ptr & 0x0F;
00114 nb_bytes_2_decode = 3;
00115
00116 } else if ((*byte_ptr & 0xF8) == 0xF0) {
00117
00118
00119
00120
00121
00122 c = *byte_ptr & 0x7;
00123 nb_bytes_2_decode = 4;
00124
00125 } else if ((*byte_ptr & 0xFC) == 0xF8) {
00126
00127
00128
00129
00130
00131
00132 c = *byte_ptr & 3;
00133 nb_bytes_2_decode = 5;
00134
00135 } else if ((*byte_ptr & 0xFE) == 0xFC) {
00136
00137
00138
00139
00140
00141
00142 c = *byte_ptr & 1;
00143 nb_bytes_2_decode = 6;
00144
00145 } else {
00146
00147
00148
00149 return CR_ENCODING_ERROR;
00150 }
00151
00152
00153
00154
00155
00156 for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
00157
00158 byte_ptr++;
00159
00160
00161 if ((*byte_ptr & 0xC0) != 0x80) {
00162 return CR_ENCODING_ERROR;
00163 }
00164
00165 c = (c << 6) | (*byte_ptr & 0x3F);
00166 }
00167
00168 len++;
00169 }
00170
00171 *a_len = len;
00172
00173 return CR_OK;
00174 }
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186 enum CRStatus
00187 cr_utils_ucs4_str_len_as_utf8 (const guint32 * a_in_start,
00188 const guint32 * a_in_end, gulong * a_len)
00189 {
00190 gint len = 0;
00191 guint32 *char_ptr = NULL;
00192
00193 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00194 CR_BAD_PARAM_ERROR);
00195
00196 for (char_ptr = (guint32 *) a_in_start;
00197 char_ptr <= a_in_end; char_ptr++) {
00198 if (*char_ptr <= 0x7F) {
00199
00200 len += 1;
00201 } else if (*char_ptr <= 0x7FF) {
00202
00203 len += 2;
00204 } else if (*char_ptr <= 0xFFFF) {
00205 len += 3;
00206 } else if (*char_ptr <= 0x1FFFFF) {
00207 len += 4;
00208 } else if (*char_ptr <= 0x3FFFFFF) {
00209 len += 5;
00210 } else if (*char_ptr <= 0x7FFFFFFF) {
00211 len += 6;
00212 }
00213 }
00214
00215 *a_len = len;
00216 return CR_OK;
00217 }
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 enum CRStatus
00230 cr_utils_ucs1_str_len_as_utf8 (const guchar * a_in_start,
00231 const guchar * a_in_end, gulong * a_len)
00232 {
00233 gint len = 0;
00234 guchar *char_ptr = NULL;
00235
00236 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00237 CR_BAD_PARAM_ERROR);
00238
00239 for (char_ptr = (guchar *) a_in_start;
00240 char_ptr <= a_in_end; char_ptr++) {
00241 if (*char_ptr <= 0x7F) {
00242
00243 len += 1;
00244 } else {
00245
00246 len += 2;
00247 }
00248 }
00249
00250 *a_len = len;
00251 return CR_OK;
00252 }
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269 enum CRStatus
00270 cr_utils_utf8_to_ucs4 (const guchar * a_in,
00271 gulong * a_in_len, guint32 * a_out, gulong * a_out_len)
00272 {
00273 gulong in_len = 0,
00274 out_len = 0,
00275 in_index = 0,
00276 out_index = 0;
00277 enum CRStatus status = CR_OK;
00278
00279
00280
00281
00282
00283 guint c = 0;
00284
00285 g_return_val_if_fail (a_in && a_in_len
00286 && a_out && a_out_len, CR_BAD_PARAM_ERROR);
00287
00288 if (*a_in_len < 1) {
00289 status = CR_OK;
00290 goto end;
00291 }
00292
00293 in_len = *a_in_len;
00294 out_len = *a_out_len;
00295
00296 for (in_index = 0, out_index = 0;
00297 (in_index < in_len) && (out_index < out_len);
00298 in_index++, out_index++) {
00299 gint nb_bytes_2_decode = 0;
00300
00301 if (a_in[in_index] <= 0x7F) {
00302
00303
00304
00305
00306
00307 c = a_in[in_index];
00308 nb_bytes_2_decode = 1;
00309
00310 } else if ((a_in[in_index] & 0xE0) == 0xC0) {
00311
00312
00313
00314
00315
00316 c = a_in[in_index] & 0x1F;
00317 nb_bytes_2_decode = 2;
00318
00319 } else if ((a_in[in_index] & 0xF0) == 0xE0) {
00320
00321
00322
00323
00324
00325 c = a_in[in_index] & 0x0F;
00326 nb_bytes_2_decode = 3;
00327
00328 } else if ((a_in[in_index] & 0xF8) == 0xF0) {
00329
00330
00331
00332
00333
00334 c = a_in[in_index] & 0x7;
00335 nb_bytes_2_decode = 4;
00336
00337 } else if ((a_in[in_index] & 0xFC) == 0xF8) {
00338
00339
00340
00341
00342
00343
00344 c = a_in[in_index] & 3;
00345 nb_bytes_2_decode = 5;
00346
00347 } else if ((a_in[in_index] & 0xFE) == 0xFC) {
00348
00349
00350
00351
00352
00353
00354 c = a_in[in_index] & 1;
00355 nb_bytes_2_decode = 6;
00356
00357 } else {
00358
00359 goto end;
00360 }
00361
00362
00363
00364
00365
00366 for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
00367
00368 in_index++;
00369
00370
00371 if ((a_in[in_index] & 0xC0) != 0x80) {
00372 goto end;
00373 }
00374
00375 c = (c << 6) | (a_in[in_index] & 0x3F);
00376 }
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388 if (c == 0xFFFF || c == 0xFFFE)
00389 goto end;
00390
00391
00392 if (c > 0x10FFFF)
00393 goto end;
00394
00395
00396
00397
00398
00399 if (c >= 0xD800 && c <= 0xDFFF)
00400 goto end;
00401
00402
00403 if (c == 0)
00404 goto end;
00405
00406 a_out[out_index] = c;
00407 }
00408
00409 end:
00410 *a_out_len = out_index + 1;
00411 *a_in_len = in_index + 1;
00412
00413 return status;
00414 }
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427 enum CRStatus
00428 cr_utils_read_char_from_utf8_buf (const guchar * a_in,
00429 gulong a_in_len,
00430 guint32 * a_out, gulong * a_consumed)
00431 {
00432 gulong in_len = 0,
00433 in_index = 0,
00434 nb_bytes_2_decode = 0;
00435 enum CRStatus status = CR_OK;
00436
00437
00438
00439
00440
00441 guint32 c = 0;
00442
00443 g_return_val_if_fail (a_in && a_out && a_out
00444 && a_consumed, CR_BAD_PARAM_ERROR);
00445
00446 if (a_in_len < 1) {
00447 status = CR_OK;
00448 goto end;
00449 }
00450
00451 in_len = a_in_len;
00452
00453 if (*a_in <= 0x7F) {
00454
00455
00456
00457
00458
00459 c = *a_in;
00460 nb_bytes_2_decode = 1;
00461
00462 } else if ((*a_in & 0xE0) == 0xC0) {
00463
00464
00465
00466
00467
00468 c = *a_in & 0x1F;
00469 nb_bytes_2_decode = 2;
00470
00471 } else if ((*a_in & 0xF0) == 0xE0) {
00472
00473
00474
00475
00476
00477 c = *a_in & 0x0F;
00478 nb_bytes_2_decode = 3;
00479
00480 } else if ((*a_in & 0xF8) == 0xF0) {
00481
00482
00483
00484
00485
00486 c = *a_in & 0x7;
00487 nb_bytes_2_decode = 4;
00488
00489 } else if ((*a_in & 0xFC) == 0xF8) {
00490
00491
00492
00493
00494
00495
00496 c = *a_in & 3;
00497 nb_bytes_2_decode = 5;
00498
00499 } else if ((*a_in & 0xFE) == 0xFC) {
00500
00501
00502
00503
00504
00505
00506 c = *a_in & 1;
00507 nb_bytes_2_decode = 6;
00508
00509 } else {
00510
00511 goto end;
00512 }
00513
00514 if (nb_bytes_2_decode > a_in_len) {
00515 status = CR_END_OF_INPUT_ERROR;
00516 goto end;
00517 }
00518
00519
00520
00521
00522
00523 for (in_index = 1; in_index < nb_bytes_2_decode; in_index++) {
00524
00525 if ((a_in[in_index] & 0xC0) != 0x80) {
00526 goto end;
00527 }
00528
00529 c = (c << 6) | (a_in[in_index] & 0x3F);
00530 }
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542 if (c == 0xFFFF || c == 0xFFFE)
00543 goto end;
00544
00545
00546 if (c > 0x10FFFF)
00547 goto end;
00548
00549
00550
00551
00552
00553 if (c >= 0xD800 && c <= 0xDFFF)
00554 goto end;
00555
00556
00557 if (c == 0)
00558 goto end;
00559
00560 *a_out = c;
00561
00562 end:
00563 *a_consumed = nb_bytes_2_decode;
00564
00565 return status;
00566 }
00567
00568
00569
00570
00571 enum CRStatus
00572 cr_utils_utf8_str_len_as_ucs1 (const guchar * a_in_start,
00573 const guchar * a_in_end, gulong * a_len)
00574 {
00575
00576
00577
00578
00579
00580
00581 guchar *byte_ptr = NULL;
00582 gint len = 0;
00583
00584
00585
00586
00587
00588 guint c = 0;
00589
00590 g_return_val_if_fail (a_in_start && a_in_end && a_len,
00591 CR_BAD_PARAM_ERROR);
00592 *a_len = 0;
00593
00594 for (byte_ptr = (guchar *) a_in_start;
00595 byte_ptr <= a_in_end; byte_ptr++) {
00596 gint nb_bytes_2_decode = 0;
00597
00598 if (*byte_ptr <= 0x7F) {
00599
00600
00601
00602
00603
00604 c = *byte_ptr;
00605 nb_bytes_2_decode = 1;
00606
00607 } else if ((*byte_ptr & 0xE0) == 0xC0) {
00608
00609
00610
00611
00612
00613 c = *byte_ptr & 0x1F;
00614 nb_bytes_2_decode = 2;
00615
00616 } else if ((*byte_ptr & 0xF0) == 0xE0) {
00617
00618
00619
00620
00621
00622 c = *byte_ptr & 0x0F;
00623 nb_bytes_2_decode = 3;
00624
00625 } else if ((*byte_ptr & 0xF8) == 0xF0) {
00626
00627
00628
00629
00630
00631 c = *byte_ptr & 0x7;
00632 nb_bytes_2_decode = 4;
00633
00634 } else if ((*byte_ptr & 0xFC) == 0xF8) {
00635
00636
00637
00638
00639
00640
00641 c = *byte_ptr & 3;
00642 nb_bytes_2_decode = 5;
00643
00644 } else if ((*byte_ptr & 0xFE) == 0xFC) {
00645
00646
00647
00648
00649
00650
00651 c = *byte_ptr & 1;
00652 nb_bytes_2_decode = 6;
00653
00654 } else {
00655
00656
00657
00658 return CR_ENCODING_ERROR;
00659 }
00660
00661
00662
00663
00664
00665 for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
00666
00667 byte_ptr++;
00668
00669
00670 if ((*byte_ptr & 0xC0) != 0x80) {
00671 return CR_ENCODING_ERROR;
00672 }
00673
00674 c = (c << 6) | (*byte_ptr & 0x3F);
00675 }
00676
00677
00678
00679
00680
00681
00682 if (c <= 0xFF) {
00683
00684
00685 len++;
00686 } else {
00687
00688
00689
00690 return CR_ENCODING_ERROR;
00691 }
00692 }
00693
00694 *a_len = len;
00695
00696 return CR_OK;
00697 }
00698
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708
00709
00710
00711
00712 enum CRStatus
00713 cr_utils_utf8_str_to_ucs4 (const guchar * a_in,
00714 gulong * a_in_len,
00715 guint32 ** a_out, gulong * a_out_len)
00716 {
00717 enum CRStatus status = CR_OK;
00718
00719 g_return_val_if_fail (a_in && a_in_len
00720 && a_out && a_out_len, CR_BAD_PARAM_ERROR);
00721
00722 status = cr_utils_utf8_str_len_as_ucs4 (a_in,
00723 &a_in[*a_in_len - 1],
00724 a_out_len);
00725
00726 g_return_val_if_fail (status == CR_OK, status);
00727
00728 *a_out = g_malloc0 (*a_out_len * sizeof (guint32));
00729
00730 status = cr_utils_utf8_to_ucs4 (a_in, a_in_len, *a_out, a_out_len);
00731
00732 return status;
00733 }
00734
00735
00736
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750 enum CRStatus
00751 cr_utils_ucs4_to_utf8 (const guint32 * a_in,
00752 gulong * a_in_len, guchar * a_out, gulong * a_out_len)
00753 {
00754 gulong in_len = 0,
00755 in_index = 0,
00756 out_index = 0;
00757 enum CRStatus status = CR_OK;
00758
00759 g_return_val_if_fail (a_in && a_in_len && a_out && a_out_len,
00760 CR_BAD_PARAM_ERROR);
00761
00762 if (*a_in_len < 1) {
00763 status = CR_OK;
00764 goto end;
00765 }
00766
00767 in_len = *a_in_len;
00768
00769 for (in_index = 0; in_index < in_len; in_index++) {
00770
00771
00772
00773
00774 if (a_in[in_index] <= 0x7F) {
00775 a_out[out_index] = a_in[in_index];
00776 out_index++;
00777 } else if (a_in[in_index] <= 0x7FF) {
00778 a_out[out_index] = (0xC0 | (a_in[in_index] >> 6));
00779 a_out[out_index + 1] =
00780 (0x80 | (a_in[in_index] & 0x3F));
00781 out_index += 2;
00782 } else if (a_in[in_index] <= 0xFFFF) {
00783 a_out[out_index] = (0xE0 | (a_in[in_index] >> 12));
00784 a_out[out_index + 1] =
00785 (0x80 | ((a_in[in_index] >> 6) & 0x3F));
00786 a_out[out_index + 2] =
00787 (0x80 | (a_in[in_index] & 0x3F));
00788 out_index += 3;
00789 } else if (a_in[in_index] <= 0x1FFFFF) {
00790 a_out[out_index] = (0xF0 | (a_in[in_index] >> 18));
00791 a_out[out_index + 1]
00792 = (0x80 | ((a_in[in_index] >> 12) & 0x3F));
00793 a_out[out_index + 2]
00794 = (0x80 | ((a_in[in_index] >> 6) & 0x3F));
00795 a_out[out_index + 3]
00796 = (0x80 | (a_in[in_index] & 0x3F));
00797 out_index += 4;
00798 } else if (a_in[in_index] <= 0x3FFFFFF) {
00799 a_out[out_index] = (0xF8 | (a_in[in_index] >> 24));
00800 a_out[out_index + 1] =
00801 (0x80 | (a_in[in_index] >> 18));
00802 a_out[out_index + 2]
00803 = (0x80 | ((a_in[in_index] >> 12) & 0x3F));
00804 a_out[out_index + 3]
00805 = (0x80 | ((a_in[in_index] >> 6) & 0x3F));
00806 a_out[out_index + 4]
00807 = (0x80 | (a_in[in_index] & 0x3F));
00808 out_index += 5;
00809 } else if (a_in[in_index] <= 0x7FFFFFFF) {
00810 a_out[out_index] = (0xFC | (a_in[in_index] >> 30));
00811 a_out[out_index + 1] =
00812 (0x80 | (a_in[in_index] >> 24));
00813 a_out[out_index + 2]
00814 = (0x80 | ((a_in[in_index] >> 18) & 0x3F));
00815 a_out[out_index + 3]
00816 = (0x80 | ((a_in[in_index] >> 12) & 0x3F));
00817 a_out[out_index + 4]
00818 = (0x80 | ((a_in[in_index] >> 6) & 0x3F));
00819 a_out[out_index + 4]
00820 = (0x80 | (a_in[in_index] & 0x3F));
00821 out_index += 6;
00822 } else {
00823 status = CR_ENCODING_ERROR;
00824 goto end;
00825 }
00826 }
00827
00828 end:
00829 *a_in_len = in_index + 1;
00830 *a_out_len = out_index + 1;
00831
00832 return status;
00833 }
00834
00835
00836
00837
00838
00839
00840
00841
00842
00843
00844
00845
00846
00847 enum CRStatus
00848 cr_utils_ucs4_str_to_utf8 (const guint32 * a_in,
00849 gulong * a_in_len,
00850 guchar ** a_out, gulong * a_out_len)
00851 {
00852 enum CRStatus status = CR_OK;
00853
00854 g_return_val_if_fail (a_in && a_in_len && a_out
00855 && a_out_len, CR_BAD_PARAM_ERROR);
00856
00857 status = cr_utils_ucs4_str_len_as_utf8 (a_in,
00858 &a_in[*a_out_len - 1],
00859 a_out_len);
00860
00861 g_return_val_if_fail (status == CR_OK, status);
00862
00863 status = cr_utils_ucs4_to_utf8 (a_in, a_in_len, *a_out, a_out_len);
00864
00865 return status;
00866 }
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883
00884
00885
00886
00887
00888 enum CRStatus
00889 cr_utils_ucs1_to_utf8 (const guchar * a_in,
00890 gulong * a_in_len, guchar * a_out, gulong * a_out_len)
00891 {
00892 gulong out_index = 0,
00893 in_index = 0,
00894 in_len = 0,
00895 out_len = 0;
00896 enum CRStatus status = CR_OK;
00897
00898 g_return_val_if_fail (a_in && a_in_len
00899 && a_out_len,
00900 CR_BAD_PARAM_ERROR);
00901
00902 if (*a_in_len == 0) {
00903 *a_out_len = 0 ;
00904 return CR_OK ;
00905 }
00906 g_return_val_if_fail (a_out, CR_BAD_PARAM_ERROR) ;
00907
00908 if (*a_in_len < 1) {
00909 status = CR_OK;
00910 goto end;
00911 }
00912
00913 in_len = *a_in_len;
00914 out_len = *a_out_len;
00915
00916 for (in_index = 0, out_index = 0;
00917 (in_index < in_len) && (out_index < out_len); in_index++) {
00918
00919
00920
00921
00922 if (a_in[in_index] <= 0x7F) {
00923 a_out[out_index] = a_in[in_index];
00924 out_index++;
00925 } else {
00926 a_out[out_index] = (0xC0 | (a_in[in_index] >> 6));
00927 a_out[out_index + 1] =
00928 (0x80 | (a_in[in_index] & 0x3F));
00929 out_index += 2;
00930 }
00931 }
00932
00933 end:
00934 *a_in_len = in_index;
00935 *a_out_len = out_index;
00936
00937 return CR_OK;
00938 }
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949 enum CRStatus
00950 cr_utils_ucs1_str_to_utf8 (const guchar * a_in,
00951 gulong * a_in_len,
00952 guchar ** a_out, gulong * a_out_len)
00953 {
00954 gulong in_len = 0,
00955 out_len = 0;
00956 enum CRStatus status = CR_OK;
00957
00958 g_return_val_if_fail (a_in && a_in_len && a_out
00959 && a_out_len, CR_BAD_PARAM_ERROR);
00960
00961 if (*a_in_len < 1) {
00962 *a_out_len = 0;
00963 *a_out = NULL;
00964 return CR_OK;
00965 }
00966
00967 status = cr_utils_ucs1_str_len_as_utf8 (a_in, &a_in[*a_in_len - 1],
00968 &out_len);
00969
00970 g_return_val_if_fail (status == CR_OK, status);
00971
00972 in_len = *a_in_len;
00973
00974 *a_out = g_malloc0 (out_len);
00975
00976 status = cr_utils_ucs1_to_utf8 (a_in, a_in_len, *a_out, &out_len);
00977
00978 *a_out_len = out_len;
00979
00980 return status;
00981 }
00982
00983
00984
00985
00986
00987
00988
00989
00990
00991
00992
00993
00994
00995
00996
00997
00998
00999
01000
01001
01002
01003
01004
01005
01006 enum CRStatus
01007 cr_utils_utf8_to_ucs1 (const guchar * a_in,
01008 gulong * a_in_len, guchar * a_out, gulong * a_out_len)
01009 {
01010 gulong in_index = 0,
01011 out_index = 0,
01012 in_len = 0,
01013 out_len = 0;
01014 enum CRStatus status = CR_OK;
01015
01016
01017
01018
01019
01020 guint32 c = 0;
01021
01022 g_return_val_if_fail (a_in && a_in_len
01023 && a_out && a_out_len, CR_BAD_PARAM_ERROR);
01024
01025 if (*a_in_len < 1) {
01026 status = CR_OK;
01027 goto end;
01028 }
01029
01030 in_len = *a_in_len;
01031 out_len = *a_out_len;
01032
01033 for (in_index = 0, out_index = 0;
01034 (in_index < in_len) && (out_index < out_len);
01035 in_index++, out_index++) {
01036 gint nb_bytes_2_decode = 0;
01037
01038 if (a_in[in_index] <= 0x7F) {
01039
01040
01041
01042
01043
01044 c = a_in[in_index];
01045 nb_bytes_2_decode = 1;
01046
01047 } else if ((a_in[in_index] & 0xE0) == 0xC0) {
01048
01049
01050
01051
01052
01053 c = a_in[in_index] & 0x1F;
01054 nb_bytes_2_decode = 2;
01055
01056 } else if ((a_in[in_index] & 0xF0) == 0xE0) {
01057
01058
01059
01060
01061
01062 c = a_in[in_index] & 0x0F;
01063 nb_bytes_2_decode = 3;
01064
01065 } else if ((a_in[in_index] & 0xF8) == 0xF0) {
01066
01067
01068
01069
01070
01071 c = a_in[in_index] & 0x7;
01072 nb_bytes_2_decode = 4;
01073
01074 } else if ((a_in[in_index] & 0xFC) == 0xF8) {
01075
01076
01077
01078
01079
01080
01081 c = a_in[in_index] & 3;
01082 nb_bytes_2_decode = 5;
01083
01084 } else if ((a_in[in_index] & 0xFE) == 0xFC) {
01085
01086
01087
01088
01089
01090
01091 c = a_in[in_index] & 1;
01092 nb_bytes_2_decode = 6;
01093
01094 } else {
01095
01096 status = CR_ENCODING_ERROR;
01097 goto end;
01098 }
01099
01100
01101
01102
01103
01104 if (in_index + nb_bytes_2_decode - 1 >= in_len) {
01105 status = CR_OK;
01106 goto end;
01107 }
01108
01109 for (; nb_bytes_2_decode > 1; nb_bytes_2_decode--) {
01110
01111 in_index++;
01112
01113
01114 if ((a_in[in_index] & 0xC0) != 0x80) {
01115 status = CR_ENCODING_ERROR;
01116 goto end;
01117 }
01118
01119 c = (c << 6) | (a_in[in_index] & 0x3F);
01120 }
01121
01122
01123
01124
01125
01126
01127 if (c > 0xFF) {
01128 status = CR_ENCODING_ERROR;
01129 goto end;
01130 }
01131
01132 a_out[out_index] = c;
01133 }
01134
01135 end:
01136 *a_out_len = out_index;
01137 *a_in_len = in_index;
01138
01139 return CR_OK;
01140 }
01141
01142
01143
01144
01145
01146
01147
01148
01149
01150
01151
01152
01153
01154 enum CRStatus
01155 cr_utils_utf8_str_to_ucs1 (const guchar * a_in,
01156 gulong * a_in_len,
01157 guchar ** a_out, gulong * a_out_len)
01158 {
01159 enum CRStatus status = CR_OK;
01160
01161 g_return_val_if_fail (a_in && a_in_len
01162 && a_out && a_out_len, CR_BAD_PARAM_ERROR);
01163
01164 if (*a_in_len < 1) {
01165 *a_out_len = 0;
01166 *a_out = NULL;
01167 return CR_OK;
01168 }
01169
01170 status = cr_utils_utf8_str_len_as_ucs4 (a_in, &a_in[*a_in_len - 1],
01171 a_out_len);
01172
01173 g_return_val_if_fail (status == CR_OK, status);
01174
01175 *a_out = g_malloc0 (*a_out_len * sizeof (guint32));
01176
01177 status = cr_utils_utf8_to_ucs1 (a_in, a_in_len, *a_out, a_out_len);
01178 return status;
01179 }
01180
01181
01182
01183
01184
01185
01186
01187
01188
01189
01190
01191
01192
01193
01194 gboolean
01195 cr_utils_is_white_space (guint32 a_char)
01196 {
01197 switch (a_char) {
01198 case ' ':
01199 case '\t':
01200 case '\r':
01201 case '\n':
01202 case '\f':
01203 return TRUE;
01204 break;
01205 default:
01206 return FALSE;
01207 }
01208 }
01209
01210
01211
01212
01213
01214
01215
01216
01217
01218
01219 gboolean
01220 cr_utils_is_newline (guint32 a_char)
01221 {
01222 switch (a_char) {
01223 case '\n':
01224 case '\r':
01225 case '\f':
01226 return TRUE;
01227 break;
01228 default:
01229 return FALSE;
01230 }
01231 }
01232
01233
01234
01235
01236
01237 gboolean
01238 cr_utils_is_hexa_char (guint32 a_char)
01239 {
01240 if ((a_char >= '0' && a_char <= '9')
01241 || (a_char >= 'A' && a_char <= 'F')) {
01242 return TRUE;
01243 }
01244 return FALSE;
01245 }
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01257 gboolean
01258 cr_utils_is_nonascii (guint32 a_char)
01259 {
01260 if (a_char <= 177) {
01261 return FALSE;
01262 }
01263
01264 return TRUE;
01265 }
01266
01267
01268
01269
01270
01271
01272
01273 void
01274 cr_utils_dump_n_chars (guchar a_char, FILE * a_fp, glong a_nb)
01275 {
01276 glong i = 0;
01277
01278 for (i = 0; i < a_nb; i++) {
01279 fprintf (a_fp, "%c", a_char);
01280 }
01281 }
01282
01283 void
01284 cr_utils_dump_n_chars2 (guchar a_char, GString * a_string, glong a_nb)
01285 {
01286 glong i = 0;
01287
01288 g_return_if_fail (a_string);
01289
01290 for (i = 0; i < a_nb; i++) {
01291 g_string_append_printf (a_string, "%c", a_char);
01292 }
01293 }
01294
01295 gdouble
01296 cr_utils_n_to_0_dot_n (glong a_n, glong decimal_places)
01297 {
01298 gdouble result = a_n;
01299
01300 while (decimal_places > 0) {
01301 result = result / 10;
01302 decimal_places--;
01303 }
01304
01305 return result;
01306 }
01307
01308
01309
01310
01311
01312
01313
01314 GList *
01315 cr_utils_dup_glist_of_string (GList * a_list_of_strings)
01316 {
01317 GList *cur = NULL,
01318 *result = NULL;
01319
01320 g_return_val_if_fail (a_list_of_strings, NULL);
01321
01322 for (cur = a_list_of_strings; cur; cur = cur->next) {
01323 GString *str = NULL;
01324
01325 str = g_string_new_len (((GString *) cur->data)->str,
01326 ((GString *) cur->data)->len);
01327 if (str)
01328 result = g_list_append (result, str);
01329 }
01330
01331 return result;
01332 }
01333
01334
01335
01336
01337
01338
01339
01340 GList *
01341 cr_utils_dup_glist_of_cr_string (GList * a_list_of_strings)
01342 {
01343 GList *cur = NULL, *result = NULL;
01344
01345 g_return_val_if_fail (a_list_of_strings, NULL);
01346
01347 for (cur = a_list_of_strings; cur; cur = cur->next) {
01348 CRString *str = NULL;
01349
01350 str = cr_string_dup ((CRString *) cur->data) ;
01351 if (str)
01352 result = g_list_append (result, str);
01353 }
01354
01355 return result;
01356 }