| int utf8_in2(int *c, const char *buf, int backwards) | int utf8_in2(int *c, const char *buf, int backwards) | ||||
| { | { | ||||
| // Read a unicode characater from a UTF8 string | |||||
| // Reads a unicode characater from a UTF8 string | |||||
| // Returns the number of UTF8 bytes used. | // Returns the number of UTF8 bytes used. | ||||
| // c: holds integer representation of multibyte character | |||||
| // buf: position of buffer is moved, if character is read | |||||
| // backwards: set if we are moving backwards through the UTF8 string | // backwards: set if we are moving backwards through the UTF8 string | ||||
| int c1; | int c1; | ||||
| #pragma GCC visibility push(default) | #pragma GCC visibility push(default) | ||||
| int utf8_in(int *c, const char *buf) | int utf8_in(int *c, const char *buf) | ||||
| { | { | ||||
| // Read a unicode characater from a UTF8 string | |||||
| // Returns the number of UTF8 bytes used. | |||||
| /* Read a unicode characater from a UTF8 string | |||||
| * Returns the number of UTF8 bytes used. | |||||
| * buf: position of buffer is moved, if character is read | |||||
| * c: holds integer representation of multibyte character by | |||||
| * skipping UTF-8 header bits of bytes in following way: | |||||
| * 2-byte character "ā": | |||||
| * hex binary | |||||
| * c481 1100010010000001 | |||||
| * | 11000100 000001 | |||||
| * V \ \ | | | |||||
| * 0101 0000000100000001 | |||||
| * 3-byte character "ꙅ": | |||||
| * ea9985 111010101001100110000101 | |||||
| * 1010 011001 000101 | |||||
| * | + +--.\ \ | | | |||||
| * V `--. \`. `.| | | |||||
| * A645 0001001101000101 | |||||
| * 4-byte character "𠜎": | |||||
| * f0a09c8e 11110000101000001001110010001110 | |||||
| * V 000 100000 011100 001110 | |||||
| * 02070e 000000100000011100001110 | |||||
| */ | |||||
| return utf8_in2(c, buf, 0); | return utf8_in2(c, buf, 0); | ||||
| } | } | ||||
| #pragma GCC visibility pop | #pragma GCC visibility pop |