33 #ifndef _UCOMMON_UNICODE_H_ 34 #define _UCOMMON_UNICODE_H_ 36 #ifndef _UCOMMON_STRING_H_ 72 inline utf8(
const utf8&
copy) {};
78 static const unsigned ucsize;
83 static const char *nil;
90 static unsigned size(
const char *codepoint);
97 static size_t count(
const char *
string);
105 static char *offset(
char *
string, ssize_t position);
112 static ucs4_t codepoint(
const char *encoded);
119 static size_t chars(
const unicode_t
string);
126 static size_t chars(ucs4_t character);
134 static size_t unpack(
const unicode_t
string,
char *text,
size_t size);
143 static size_t pack(unicode_t unicode,
const char *cp,
size_t len);
148 static ucs4_t *udup(
const char *
string);
153 static ucs2_t *wdup(
const char *
string);
162 static const char *find(
const char *
string, ucs4_t character,
size_t start = 0);
171 static const char *rfind(
const char *
string, ucs4_t character,
size_t end = (
size_t)-1l);
179 static unsigned ccount(
const char *
string, ucs4_t character);
186 static ucs4_t
get(
const char *cp);
194 static void put(ucs4_t character,
char *buf);
203 class __EXPORT UString :
public String,
public utf8
215 UString(
size_t size);
221 UString(
const unicode_t text);
229 UString(
const char *text,
size_t size);
237 UString(
const unicode_t *text,
const unicode_t *end);
244 UString(
const UString& existing);
258 UString
get(
size_t codepoint,
size_t size = 0)
const;
266 size_t get(unicode_t unicode,
size_t size)
const;
272 void set(
const unicode_t unicode);
278 void add(
const unicode_t unicode);
285 ucs4_t at(
int position)
const;
293 inline size_t operator()(unicode_t unicode,
size_t size)
const {
294 return get(unicode, size);
303 UString operator()(
int codepoint,
size_t size)
const;
310 inline UString left(
size_t size)
const {
311 return operator()(0, size);
319 inline UString right(
size_t offset)
const {
320 return operator()(-((
int)offset), 0);
329 inline UString
copy(
size_t offset,
size_t size)
const {
330 return operator()((
int)offset, size);
338 void cut(
size_t offset,
size_t size = 0);
346 void paste(
size_t offset,
const char *text,
size_t size = 0);
355 const char *operator()(
int offset)
const;
362 inline ucs4_t operator[](
int position)
const {
363 return UString::at(position);
370 inline size_t count(
void)
const {
371 return (
size_t)utf8::count(str->text);
379 unsigned ccount(ucs4_t character)
const;
387 const char *find(ucs4_t character,
size_t start = 0)
const;
395 const char *rfind(ucs4_t character,
size_t end = npos)
const;
403 class __EXPORT utf8_pointer
418 utf8_pointer(
const char *
string);
424 utf8_pointer(
const utf8_pointer&
copy);
430 utf8_pointer& operator ++();
436 utf8_pointer& operator --();
443 utf8_pointer& operator +=(
long offset);
450 utf8_pointer& operator -=(
long offset);
457 utf8_pointer operator+(
long offset)
const;
464 utf8_pointer operator-(
long offset)
const;
470 inline operator bool()
const {
478 inline bool operator!()
const {
487 ucs4_t operator[](
long codepoint)
const;
494 utf8_pointer& operator=(
const char *
string);
511 inline bool operator==(
const char *
string)
const {
512 return (
const char *)text == string;
520 inline bool operator!=(
const char *
string)
const {
521 return (
const char *)text != string;
528 inline ucs4_t operator*()
const {
529 return utf8::codepoint((
const char *)text);
536 inline char *c_str(
void)
const {
544 inline operator char*()
const {
552 inline size_t len(
void)
const {
553 return utf8::count((
const char *)text);
557 inline ucs4_t *strudup(
const char *
string) {
558 return utf8::udup(
string);
561 inline ucs2_t *strwdup(
const char *
string) {
562 return utf8::wdup(
string);
565 __EXPORT unicode_t unidup(
const char *
string);
568 inline void dupfree<ucs2_t*>(ucs2_t *string) {
573 inline void dupfree<ucs4_t*>(ucs4_t *string) {
578 inline void dupfree<unicode_t>(unicode_t string) {
Common namespace for all ucommon objects.
void * unicode_t
Resolves issues where wchar_t is not defined.
int16_t ucs2_t
16 bit unicode character code.
A common string class and character string support functions.
T copy(const T &src)
Convenience function to copy objects.
utf8_pointer utf8_t
Convenience type for utf8_pointer strings.
int32_t ucs4_t
32 bit unicode character code.
UString ustring_t
Convenience type for utf8 encoded strings.