|
|
|
|
|
|
|
|
|
|
|
|
|
|
int utf8_in2(int *c, const char *buf, int backwards) |
|
|
int utf8_in2(int *c, const char *buf, int backwards) |
|
|
{ |
|
|
{ |
|
|
// Read a unicode characater from a UTF8 string |
|
|
|
|
|
|
|
|
// Reads a unicode characater from a UTF8 string |
|
|
// Returns the number of UTF8 bytes used. |
|
|
// Returns the number of UTF8 bytes used. |
|
|
|
|
|
// c: holds integer representation of multibyte character |
|
|
|
|
|
// buf: position of buffer is moved, if character is read |
|
|
// backwards: set if we are moving backwards through the UTF8 string |
|
|
// backwards: set if we are moving backwards through the UTF8 string |
|
|
|
|
|
|
|
|
int c1; |
|
|
int c1; |
|
|
|
|
|
|
|
|
#pragma GCC visibility push(default) |
|
|
#pragma GCC visibility push(default) |
|
|
int utf8_in(int *c, const char *buf) |
|
|
int utf8_in(int *c, const char *buf) |
|
|
{ |
|
|
{ |
|
|
// Read a unicode characater from a UTF8 string |
|
|
|
|
|
// Returns the number of UTF8 bytes used. |
|
|
|
|
|
|
|
|
/* Read a unicode characater from a UTF8 string |
|
|
|
|
|
* Returns the number of UTF8 bytes used. |
|
|
|
|
|
* buf: position of buffer is moved, if character is read |
|
|
|
|
|
* c: holds integer representation of multibyte character by |
|
|
|
|
|
* skipping UTF-8 header bits of bytes in following way: |
|
|
|
|
|
* 2-byte character "ā": |
|
|
|
|
|
* hex binary |
|
|
|
|
|
* c481 1100010010000001 |
|
|
|
|
|
* | 11000100 000001 |
|
|
|
|
|
* V \ \ | | |
|
|
|
|
|
* 0101 0000000100000001 |
|
|
|
|
|
* 3-byte character "ꙅ": |
|
|
|
|
|
* ea9985 111010101001100110000101 |
|
|
|
|
|
* 1010 011001 000101 |
|
|
|
|
|
* | + +--.\ \ | | |
|
|
|
|
|
* V `--. \`. `.| | |
|
|
|
|
|
* A645 0001001101000101 |
|
|
|
|
|
* 4-byte character "𠜎": |
|
|
|
|
|
* f0a09c8e 11110000101000001001110010001110 |
|
|
|
|
|
* V 000 100000 011100 001110 |
|
|
|
|
|
* 02070e 000000100000011100001110 |
|
|
|
|
|
*/ |
|
|
return utf8_in2(c, buf, 0); |
|
|
return utf8_in2(c, buf, 0); |
|
|
} |
|
|
} |
|
|
#pragma GCC visibility pop |
|
|
#pragma GCC visibility pop |