Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members   Search  

utf_old.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2002-2003, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2002sep21
00014 *   created by: Markus W. Scherer
00015 */
00016 
00140 /* utf.h must be included first. */
00141 #ifndef __UTF_H__
00142 #   include "unicode/utf.h"
00143 #endif
00144 
00145 #ifndef __UTF_OLD_H__
00146 #define __UTF_OLD_H__
00147 
00148 /* Formerly utf.h, part 1 --------------------------------------------------- */
00149 
00150 #ifdef U_USE_UTF_DEPRECATES
00151 
00158 typedef int32_t UTextOffset;
00159 #endif
00160 
00162 #define UTF_SIZE 16
00163 
00170 #define UTF_SAFE
00171 
00172 #undef UTF_UNSAFE
00173 
00174 #undef UTF_STRICT
00175 
00188 #define UTF8_ERROR_VALUE_1 0x15
00189 
00195 #define UTF8_ERROR_VALUE_2 0x9f
00196 
00203 #define UTF_ERROR_VALUE 0xffff
00204 
00211 #define UTF_IS_ERROR(c) \
00212     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00213 
00219 #define UTF_IS_VALID(c) \
00220     (UTF_IS_UNICODE_CHAR(c) && \
00221      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00222 
00227 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00228 
00234 #define UTF_IS_UNICODE_NONCHAR(c) \
00235     ((c)>=0xfdd0 && \
00236      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00237      (uint32_t)(c)<=0x10ffff)
00238 
00254 #define UTF_IS_UNICODE_CHAR(c) \
00255     ((uint32_t)(c)<0xd800 || \
00256         ((uint32_t)(c)>0xdfff && \
00257          (uint32_t)(c)<=0x10ffff && \
00258          !UTF_IS_UNICODE_NONCHAR(c)))
00259 
00260 /* Formerly utf8.h ---------------------------------------------------------- */
00261 
00266 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00267 
00272 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00273 
00275 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
00276 
00277 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
00278 
00279 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
00280 
00282 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
00283 
00297 #if 1
00298 #   define UTF8_CHAR_LENGTH(c) \
00299         ((uint32_t)(c)<=0x7f ? 1 : \
00300             ((uint32_t)(c)<=0x7ff ? 2 : \
00301                 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
00302             ) \
00303         )
00304 #else
00305 #   define UTF8_CHAR_LENGTH(c) \
00306         ((uint32_t)(c)<=0x7f ? 1 : \
00307             ((uint32_t)(c)<=0x7ff ? 2 : \
00308                 ((uint32_t)(c)<=0xffff ? 3 : \
00309                     ((uint32_t)(c)<=0x10ffff ? 4 : \
00310                         ((uint32_t)(c)<=0x3ffffff ? 5 : \
00311                             ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
00312                         ) \
00313                     ) \
00314                 ) \
00315             ) \
00316         )
00317 #endif
00318 
00320 #define UTF8_MAX_CHAR_LENGTH 4
00321 
00323 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
00324 
00326 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
00327     int32_t __I=(int32_t)(i); \
00328     UTF8_SET_CHAR_START_UNSAFE(s, __I); \
00329     UTF8_NEXT_CHAR_UNSAFE(s, __I, c); \
00330 }
00331 
00333 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00334     int32_t __I=(int32_t)(i); \
00335     UTF8_SET_CHAR_START_SAFE(s, start, __I); \
00336     UTF8_NEXT_CHAR_SAFE(s, __I, length, c, strict); \
00337 }
00338 
00340 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
00341     (c)=(s)[(i)++]; \
00342     if((uint8_t)((c)-0xc0)<0x35) { \
00343         uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
00344         UTF8_MASK_LEAD_BYTE(c, __count); \
00345         switch(__count) { \
00346         /* each following branch falls through to the next one */ \
00347         case 3: \
00348             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00349         case 2: \
00350             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00351         case 1: \
00352             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00353         /* no other branches to optimize switch() */ \
00354             break; \
00355         } \
00356     } \
00357 }
00358 
00360 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
00361     if((uint32_t)(c)<=0x7f) { \
00362         (s)[(i)++]=(uint8_t)(c); \
00363     } else { \
00364         if((uint32_t)(c)<=0x7ff) { \
00365             (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00366         } else { \
00367             if((uint32_t)(c)<=0xffff) { \
00368                 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00369             } else { \
00370                 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00371                 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00372             } \
00373             (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00374         } \
00375         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00376     } \
00377 }
00378 
00380 #define UTF8_FWD_1_UNSAFE(s, i) { \
00381     (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
00382 }
00383 
00385 #define UTF8_FWD_N_UNSAFE(s, i, n) { \
00386     int32_t __N=(n); \
00387     while(__N>0) { \
00388         UTF8_FWD_1_UNSAFE(s, i); \
00389         --__N; \
00390     } \
00391 }
00392 
00394 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
00395     while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
00396 }
00397 
00399 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00400     (c)=(s)[(i)++]; \
00401     if((c)>=0x80) { \
00402         if(UTF8_IS_LEAD(c)) { \
00403             (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
00404         } else { \
00405             (c)=UTF8_ERROR_VALUE_1; \
00406         } \
00407     } \
00408 }
00409 
00411 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
00412     if((uint32_t)(c)<=0x7f) { \
00413         (s)[(i)++]=(uint8_t)(c); \
00414     } else { \
00415         (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
00416     } \
00417 }
00418 
00420 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
00421 
00423 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
00424 
00426 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
00427 
00429 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
00430     (c)=(s)[--(i)]; \
00431     if(UTF8_IS_TRAIL(c)) { \
00432         uint8_t __b, __count=1, __shift=6; \
00433 \
00434         /* c is a trail byte */ \
00435         (c)&=0x3f; \
00436         for(;;) { \
00437             __b=(s)[--(i)]; \
00438             if(__b>=0xc0) { \
00439                 UTF8_MASK_LEAD_BYTE(__b, __count); \
00440                 (c)|=(UChar32)__b<<__shift; \
00441                 break; \
00442             } else { \
00443                 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00444                 ++__count; \
00445                 __shift+=6; \
00446             } \
00447         } \
00448     } \
00449 }
00450 
00452 #define UTF8_BACK_1_UNSAFE(s, i) { \
00453     while(UTF8_IS_TRAIL((s)[--(i)])) {} \
00454 }
00455 
00457 #define UTF8_BACK_N_UNSAFE(s, i, n) { \
00458     int32_t __N=(n); \
00459     while(__N>0) { \
00460         UTF8_BACK_1_UNSAFE(s, i); \
00461         --__N; \
00462     } \
00463 }
00464 
00466 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00467     UTF8_BACK_1_UNSAFE(s, i); \
00468     UTF8_FWD_1_UNSAFE(s, i); \
00469 }
00470 
00472 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00473     (c)=(s)[--(i)]; \
00474     if((c)>=0x80) { \
00475         if((c)<=0xbf) { \
00476             (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
00477         } else { \
00478             (c)=UTF8_ERROR_VALUE_1; \
00479         } \
00480     } \
00481 }
00482 
00484 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
00485 
00487 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
00488 
00490 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
00491 
00492 /* Formerly utf16.h --------------------------------------------------------- */
00493 
00495 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00496 
00498 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00499 
00501 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00502 
00504 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00505 
00507 #define UTF16_GET_PAIR_VALUE(first, second) \
00508     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00509 
00511 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00512 
00514 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00515 
00517 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00518 
00520 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00521 
00523 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00524 
00526 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00527 
00529 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00530 
00532 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00533 
00535 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00536 
00538 #define UTF16_MAX_CHAR_LENGTH 2
00539 
00541 #define UTF16_ARRAY_SIZE(size) (size)
00542 
00554 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00555     (c)=(s)[i]; \
00556     if(UTF_IS_SURROGATE(c)) { \
00557         if(UTF_IS_SURROGATE_FIRST(c)) { \
00558             (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00559         } else { \
00560             (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00561         } \
00562     } \
00563 }
00564 
00566 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00567     (c)=(s)[i]; \
00568     if(UTF_IS_SURROGATE(c)) { \
00569         uint16_t __c2; \
00570         if(UTF_IS_SURROGATE_FIRST(c)) { \
00571             if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00572                 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00573                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00574             } else if(strict) {\
00575                 /* unmatched first surrogate */ \
00576                 (c)=UTF_ERROR_VALUE; \
00577             } \
00578         } else { \
00579             if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00580                 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00581                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00582             } else if(strict) {\
00583                 /* unmatched second surrogate */ \
00584                 (c)=UTF_ERROR_VALUE; \
00585             } \
00586         } \
00587     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00588         (c)=UTF_ERROR_VALUE; \
00589     } \
00590 }
00591 
00593 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00594     (c)=(s)[(i)++]; \
00595     if(UTF_IS_FIRST_SURROGATE(c)) { \
00596         (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00597     } \
00598 }
00599 
00601 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00602     if((uint32_t)(c)<=0xffff) { \
00603         (s)[(i)++]=(uint16_t)(c); \
00604     } else { \
00605         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00606         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00607     } \
00608 }
00609 
00611 #define UTF16_FWD_1_UNSAFE(s, i) { \
00612     if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00613         ++(i); \
00614     } \
00615 }
00616 
00618 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00619     int32_t __N=(n); \
00620     while(__N>0) { \
00621         UTF16_FWD_1_UNSAFE(s, i); \
00622         --__N; \
00623     } \
00624 }
00625 
00627 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00628     if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00629         --(i); \
00630     } \
00631 }
00632 
00634 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00635     (c)=(s)[(i)++]; \
00636     if(UTF_IS_FIRST_SURROGATE(c)) { \
00637         uint16_t __c2; \
00638         if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00639             ++(i); \
00640             (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00641             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00642         } else if(strict) {\
00643             /* unmatched first surrogate */ \
00644             (c)=UTF_ERROR_VALUE; \
00645         } \
00646     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00647         /* unmatched second surrogate or other non-character */ \
00648         (c)=UTF_ERROR_VALUE; \
00649     } \
00650 }
00651 
00653 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00654     if((uint32_t)(c)<=0xffff) { \
00655         (s)[(i)++]=(uint16_t)(c); \
00656     } else if((uint32_t)(c)<=0x10ffff) { \
00657         if((i)+1<(length)) { \
00658             (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00659             (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00660         } else /* not enough space */ { \
00661             (s)[(i)++]=UTF_ERROR_VALUE; \
00662         } \
00663     } else /* c>0x10ffff, write error value */ { \
00664         (s)[(i)++]=UTF_ERROR_VALUE; \
00665     } \
00666 }
00667 
00669 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
00670 
00672 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
00673 
00675 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
00676 
00678 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00679     (c)=(s)[--(i)]; \
00680     if(UTF_IS_SECOND_SURROGATE(c)) { \
00681         (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00682     } \
00683 }
00684 
00686 #define UTF16_BACK_1_UNSAFE(s, i) { \
00687     if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00688         --(i); \
00689     } \
00690 }
00691 
00693 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00694     int32_t __N=(n); \
00695     while(__N>0) { \
00696         UTF16_BACK_1_UNSAFE(s, i); \
00697         --__N; \
00698     } \
00699 }
00700 
00702 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00703     if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00704         ++(i); \
00705     } \
00706 }
00707 
00709 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00710     (c)=(s)[--(i)]; \
00711     if(UTF_IS_SECOND_SURROGATE(c)) { \
00712         uint16_t __c2; \
00713         if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00714             --(i); \
00715             (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00716             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00717         } else if(strict) {\
00718             /* unmatched second surrogate */ \
00719             (c)=UTF_ERROR_VALUE; \
00720         } \
00721     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00722         /* unmatched first surrogate or other non-character */ \
00723         (c)=UTF_ERROR_VALUE; \
00724     } \
00725 }
00726 
00728 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
00729 
00731 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
00732 
00734 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
00735 
00736 /* Formerly utf32.h --------------------------------------------------------- */
00737 
00738 /*
00739 * Old documentation:
00740 *
00741 *   This file defines macros to deal with UTF-32 code units and code points.
00742 *   Signatures and semantics are the same as for the similarly named macros
00743 *   in utf16.h.
00744 *   utf32.h is included by utf.h after unicode/umachine.h</p>
00745 *   and some common definitions.
00746 *   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
00747 *                  Compound statements (curly braces {}) must be used  for if-else-while...
00748 *                  bodies and all macro statements should be terminated with semicolon.</p>
00749 */
00750 
00751 /* internal definitions ----------------------------------------------------- */
00752 
00754 #define UTF32_IS_SAFE(c, strict) \
00755     (!(strict) ? \
00756         (uint32_t)(c)<=0x10ffff : \
00757         UTF_IS_UNICODE_CHAR(c))
00758 
00759 /*
00760  * For the semantics of all of these macros, see utf16.h.
00761  * The UTF-32 versions are trivial because any code point is
00762  * encoded using exactly one code unit.
00763  */
00764 
00765 /* single-code point definitions -------------------------------------------- */
00766 
00767 /* classes of code unit values */
00768 
00770 #define UTF32_IS_SINGLE(uchar) 1
00771 
00772 #define UTF32_IS_LEAD(uchar) 0
00773 
00774 #define UTF32_IS_TRAIL(uchar) 0
00775 
00776 /* number of code units per code point */
00777 
00779 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0
00780 
00781 #define UTF32_CHAR_LENGTH(c) 1
00782 
00783 #define UTF32_MAX_CHAR_LENGTH 1
00784 
00785 /* average number of code units compared to UTF-16 */
00786 
00788 #define UTF32_ARRAY_SIZE(size) (size)
00789 
00791 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
00792     (c)=(s)[i]; \
00793 }
00794 
00796 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00797     (c)=(s)[i]; \
00798     if(!UTF32_IS_SAFE(c, strict)) { \
00799         (c)=UTF_ERROR_VALUE; \
00800     } \
00801 }
00802 
00803 /* definitions with forward iteration --------------------------------------- */
00804 
00806 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
00807     (c)=(s)[(i)++]; \
00808 }
00809 
00811 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
00812     (s)[(i)++]=(c); \
00813 }
00814 
00816 #define UTF32_FWD_1_UNSAFE(s, i) { \
00817     ++(i); \
00818 }
00819 
00821 #define UTF32_FWD_N_UNSAFE(s, i, n) { \
00822     (i)+=(n); \
00823 }
00824 
00826 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
00827 }
00828 
00830 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00831     (c)=(s)[(i)++]; \
00832     if(!UTF32_IS_SAFE(c, strict)) { \
00833         (c)=UTF_ERROR_VALUE; \
00834     } \
00835 }
00836 
00838 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
00839     if((uint32_t)(c)<=0x10ffff) { \
00840         (s)[(i)++]=(c); \
00841     } else /* c>0x10ffff, write 0xfffd */ { \
00842         (s)[(i)++]=0xfffd; \
00843     } \
00844 }
00845 
00847 #define UTF32_FWD_1_SAFE(s, i, length) { \
00848     ++(i); \
00849 }
00850 
00852 #define UTF32_FWD_N_SAFE(s, i, length, n) { \
00853     if(((i)+=(n))>(length)) { \
00854         (i)=(length); \
00855     } \
00856 }
00857 
00859 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
00860 }
00861 
00862 /* definitions with backward iteration -------------------------------------- */
00863 
00865 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
00866     (c)=(s)[--(i)]; \
00867 }
00868 
00870 #define UTF32_BACK_1_UNSAFE(s, i) { \
00871     --(i); \
00872 }
00873 
00875 #define UTF32_BACK_N_UNSAFE(s, i, n) { \
00876     (i)-=(n); \
00877 }
00878 
00880 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00881 }
00882 
00884 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00885     (c)=(s)[--(i)]; \
00886     if(!UTF32_IS_SAFE(c, strict)) { \
00887         (c)=UTF_ERROR_VALUE; \
00888     } \
00889 }
00890 
00892 #define UTF32_BACK_1_SAFE(s, start, i) { \
00893     --(i); \
00894 }
00895 
00897 #define UTF32_BACK_N_SAFE(s, start, i, n) { \
00898     (i)-=(n); \
00899     if((i)<(start)) { \
00900         (i)=(start); \
00901     } \
00902 }
00903 
00905 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
00906 }
00907 
00908 /* Formerly utf.h, part 2 --------------------------------------------------- */
00909 
00915 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00916 
00918 #define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00919 
00921 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00922 
00923 
00925 #define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00926 
00928 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00929 
00930 
00932 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00933 
00935 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00936 
00937 
00939 #define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00940 
00942 #define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00943 
00944 
00946 #define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00947 
00949 #define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00950 
00951 
00953 #define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00954 
00956 #define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00957 
00958 
00960 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00961 
00963 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00964 
00965 
00967 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00968 
00970 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00971 
00972 
00974 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00975 
00977 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00978 
00979 
00981 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00982 
00984 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00985 
00986 /* Define default macros (UTF-16 "safe") ------------------------------------ */
00987 
00993 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
00994 
01000 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
01001 
01007 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
01008 
01014 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
01015 
01021 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
01022 
01028 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
01029 
01039 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
01040 
01052 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
01053 
01065 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
01066 
01076 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
01077 
01087 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
01088 
01103 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
01104 
01116 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
01117 
01129 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
01130 
01142 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
01143 
01158 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
01159 
01160 #endif

Generated on Mon Nov 24 14:35:45 2003 for ICU 2.8 by doxygen1.2.11.1 written by Dimitri van Heesch, © 1997-2001