UCommon
/usr/src/RPM/BUILD/ucommon-6.3.3/inc/ucommon/unicode.h
Go to the documentation of this file.
00001 // Copyright (C) 2009-2014 David Sugar, Tycho Softworks.
00002 // Copyright (C) 2015 Cherokees of Idaho.
00003 //
00004 // This file is part of GNU uCommon C++.
00005 //
00006 // GNU uCommon C++ is free software: you can redistribute it and/or modify
00007 // it under the terms of the GNU Lesser General Public License as published
00008 // by the Free Software Foundation, either version 3 of the License, or
00009 // (at your option) any later version.
00010 //
00011 // GNU uCommon C++ is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU Lesser General Public License for more details.
00015 //
00016 // You should have received a copy of the GNU Lesser General Public License
00017 // along with GNU uCommon C++.  If not, see <http://www.gnu.org/licenses/>.
00018 
00033 #ifndef _UCOMMON_UNICODE_H_
00034 #define _UCOMMON_UNICODE_H_
00035 
00036 #ifndef _UCOMMON_STRING_H_
00037 #include <ucommon/string.h>
00038 #endif
00039 
00040 #ifdef nil
00041 #undef nil
00042 #endif
00043 
00044 namespace ucommon {
00045 
00050 typedef int32_t ucs4_t;
00051 
00055 typedef int16_t ucs2_t;
00056 
00060 typedef void *unicode_t;
00061 
00067 class __EXPORT utf8
00068 {
00069 public:
00073     static const unsigned ucsize;
00074 
00078     static const char *nil;
00079 
00085     static unsigned size(const char *codepoint);
00086 
00092     static size_t count(const char *string);
00093 
00100     static char *offset(char *string, ssize_t position);
00101 
00107     static ucs4_t codepoint(const char *encoded);
00108 
00114     static size_t chars(const unicode_t string);
00115 
00121     static size_t chars(ucs4_t character);
00122 
00129     static size_t unpack(const unicode_t string, CharacterProtocol& buffer);
00130 
00138     static size_t pack(unicode_t unicode, CharacterProtocol& buffer, size_t size);
00139 
00143     static ucs4_t *udup(const char *string);
00144 
00148     static ucs2_t *wdup(const char *string);
00149 
00157     static const char *find(const char *string, ucs4_t character, size_t start = 0);
00158 
00166     static const char *rfind(const char *string, ucs4_t character, size_t end = (size_t)-1l);
00167 
00174     static unsigned ccount(const char *string, ucs4_t character);
00175 
00181     static ucs4_t get(CharacterProtocol& buffer);
00182 
00189     static ucs4_t put(ucs4_t character, CharacterProtocol& buffer);
00190 };
00191 
00198 class __EXPORT UString : public String, public utf8
00199 {
00200 protected:
00204     UString();
00205 
00210     UString(strsize_t size);
00211 
00216     UString(const unicode_t text);
00217 
00224     UString(const char *text, strsize_t size);
00225 
00232     UString(const unicode_t *text, const unicode_t *end);
00233 
00239     UString(const UString& existing);
00240 
00245     virtual ~UString();
00246 
00253     UString get(strsize_t codepoint, strsize_t size = 0) const;
00254 
00261     size_t get(unicode_t unicode, size_t size) const;
00262 
00267     void set(const unicode_t unicode);
00268 
00273     void add(const unicode_t unicode);
00274 
00280     ucs4_t at(int position) const;
00281 
00288     inline size_t operator()(unicode_t unicode, size_t size) const
00289         {return get(unicode, size);}
00290 
00297     UString operator()(int codepoint, strsize_t size) const;
00298 
00304     inline UString left(strsize_t size) const
00305         {return operator()(0, size);}
00306 
00312     inline UString right(strsize_t offset) const
00313         {return operator()(-((int)offset), 0);}
00314 
00321     inline UString copy(strsize_t offset, strsize_t size) const
00322         {return operator()((int)offset, size);}
00323 
00329     void cut(strsize_t offset, strsize_t size = 0);
00330 
00337     void paste(strsize_t offset, const char *text, strsize_t size = 0);
00338 
00346     const char *operator()(int offset) const;
00347 
00353     inline ucs4_t operator[](int position) const
00354         {return UString::at(position);}
00355 
00360     inline strsize_t count(void) const
00361         {return utf8::count(str->text);}
00362 
00368     unsigned ccount(ucs4_t character) const;
00369 
00376     const char *find(ucs4_t character, strsize_t start = 0) const;
00377 
00384     const char *rfind(ucs4_t character, strsize_t end = npos) const;
00385 };
00386 
00392 class __EXPORT utf8_pointer
00393 {
00394 protected:
00395     uint8_t *text;
00396 
00397 public:
00401     utf8_pointer();
00402 
00407     utf8_pointer(const char *string);
00408 
00413     utf8_pointer(const utf8_pointer& copy);
00414 
00419     utf8_pointer& operator ++();
00420 
00425     utf8_pointer& operator --();
00426 
00432     utf8_pointer& operator +=(long offset);
00433 
00439     utf8_pointer& operator -=(long offset);
00440 
00446     utf8_pointer operator+(long offset) const;
00447 
00453     utf8_pointer operator-(long offset) const;
00454 
00459     inline operator bool() const
00460         {return text != NULL;}
00461 
00466     inline bool operator!() const
00467         {return text == NULL;}
00468 
00474     ucs4_t operator[](long codepoint) const;
00475 
00481     utf8_pointer& operator=(const char *string);
00482 
00486     void inc(void);
00487 
00491     void dec(void);
00492 
00498     inline bool operator==(const char *string) const
00499         {return (const char *)text == string;}
00500 
00506     inline bool operator!=(const char *string) const
00507         {return (const char *)text != string;}
00508 
00513     inline  ucs4_t operator*() const
00514         {return utf8::codepoint((const char *)text);}
00515 
00520     inline char *c_str(void) const
00521         {return (char *)text;}
00522 
00527     inline operator char*() const
00528         {return (char *)text;}
00529 
00534     inline size_t len(void) const
00535         {return utf8::count((const char *)text);}
00536 };
00537 
00538 inline ucs4_t *strudup(const char *string)
00539     {return utf8::udup(string);}
00540 
00541 inline ucs2_t *strwdup(const char *string)
00542     {return utf8::wdup(string);}
00543 
00544 __EXPORT unicode_t unidup(const char *string);
00545 
00546 template<>
00547 inline void dupfree<ucs2_t*>(ucs2_t *string)
00548     {::free(string);}
00549 
00550 template<>
00551 inline void dupfree<ucs4_t*>(ucs4_t *string)
00552     {::free(string);}
00553 
00554 template<>
00555 inline void dupfree<unicode_t>(unicode_t string)
00556     {::free(string);}
00557 
00561 typedef UString ustring_t;
00562 
00566 typedef utf8_pointer utf8_t;
00567 
00568 } // namespace ucommon
00569 
00570 #endif