unchecked.h
Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00031 #ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00032 #define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
00033
00034 #include "core.h"
00035
00036 namespace utf8
00037 {
00038 namespace unchecked
00039 {
00040 template <typename octet_iterator>
00041 octet_iterator append(uint32_t cp, octet_iterator result)
00042 {
00043 if (cp < 0x80)
00044 *(result++) = static_cast<uint8_t>(cp);
00045 else if (cp < 0x800) {
00046 *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
00047 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
00048 }
00049 else if (cp < 0x10000) {
00050 *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
00051 *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f | 0x80);
00052 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
00053 }
00054 else {
00055 *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
00056 *(result++) = static_cast<uint8_t>((cp >> 12)& 0x3f | 0x80);
00057 *(result++) = static_cast<uint8_t>((cp >> 6) & 0x3f | 0x80);
00058 *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
00059 }
00060 return result;
00061 }
00062
00063 template <typename octet_iterator>
00064 uint32_t next(octet_iterator& it)
00065 {
00066 uint32_t cp = internal::mask8(*it);
00067 typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
00068 switch (length) {
00069 case 1:
00070 break;
00071 case 2:
00072 it++;
00073 cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
00074 break;
00075 case 3:
00076 ++it;
00077 cp = ((cp << 12) & 0xffff) + ((internal::mask8(*it) << 6) & 0xfff);
00078 ++it;
00079 cp += (*it) & 0x3f;
00080 break;
00081 case 4:
00082 ++it;
00083 cp = ((cp << 18) & 0x1fffff) + ((internal::mask8(*it) << 12) & 0x3ffff);
00084 ++it;
00085 cp += (internal::mask8(*it) << 6) & 0xfff;
00086 ++it;
00087 cp += (*it) & 0x3f;
00088 break;
00089 }
00090 ++it;
00091 return cp;
00092 }
00093
00094 template <typename octet_iterator>
00095 uint32_t peek_next(octet_iterator it)
00096 {
00097 return next(it);
00098 }
00099
00100 template <typename octet_iterator>
00101 uint32_t prior(octet_iterator& it)
00102 {
00103 while (internal::is_trail(*(--it))) ;
00104 octet_iterator temp = it;
00105 return next(temp);
00106 }
00107
00108
00109 template <typename octet_iterator>
00110 inline uint32_t previous(octet_iterator& it)
00111 {
00112 return prior(it);
00113 }
00114
00115 template <typename octet_iterator, typename distance_type>
00116 void advance (octet_iterator& it, distance_type n)
00117 {
00118 for (distance_type i = 0; i < n; ++i)
00119 next(it);
00120 }
00121
00122 template <typename octet_iterator>
00123 typename std::iterator_traits<octet_iterator>::difference_type
00124 distance (octet_iterator first, octet_iterator last)
00125 {
00126 typename std::iterator_traits<octet_iterator>::difference_type dist;
00127 for (dist = 0; first < last; ++dist)
00128 next(first);
00129 return dist;
00130 }
00131
00132 template <typename u16bit_iterator, typename octet_iterator>
00133 octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
00134 {
00135 while (start != end) {
00136 uint32_t cp = internal::mask16(*start++);
00137
00138 if (internal::is_surrogate(cp)) {
00139 uint32_t trail_surrogate = internal::mask16(*start++);
00140 cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
00141 }
00142 result = append(cp, result);
00143 }
00144 return result;
00145 }
00146
00147 template <typename u16bit_iterator, typename octet_iterator>
00148 u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
00149 {
00150 while (start != end) {
00151 uint32_t cp = next(start);
00152 if (cp > 0xffff) {
00153 *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
00154 *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
00155 }
00156 else
00157 *result++ = static_cast<uint16_t>(cp);
00158 }
00159 return result;
00160 }
00161
00162 template <typename octet_iterator, typename u32bit_iterator>
00163 octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
00164 {
00165 while (start != end)
00166 result = append(*(start++), result);
00167
00168 return result;
00169 }
00170
00171 template <typename octet_iterator, typename u32bit_iterator>
00172 u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
00173 {
00174 while (start < end)
00175 (*result++) = next(start);
00176
00177 return result;
00178 }
00179
00180
00181 template <typename octet_iterator>
00182 class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
00183 octet_iterator it;
00184 public:
00185 iterator () {};
00186 explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
00187
00188 octet_iterator base () const { return it; }
00189 uint32_t operator * () const
00190 {
00191 octet_iterator temp = it;
00192 return next(temp);
00193 }
00194 bool operator == (const iterator& rhs) const
00195 {
00196 return (it == rhs.it);
00197 }
00198 bool operator != (const iterator& rhs) const
00199 {
00200 return !(operator == (rhs));
00201 }
00202 iterator& operator ++ ()
00203 {
00204 std::advance(it, internal::sequence_length(it));
00205 return *this;
00206 }
00207 iterator operator ++ (int)
00208 {
00209 iterator temp = *this;
00210 std::advance(it, internal::sequence_length(it));
00211 return temp;
00212 }
00213 iterator& operator -- ()
00214 {
00215 prior(it);
00216 return *this;
00217 }
00218 iterator operator -- (int)
00219 {
00220 iterator temp = *this;
00221 prior(it);
00222 return temp;
00223 }
00224 };
00225
00226 }
00227 }
00228
00229
00230 #endif // header guard
00231