Browse Source

Comments about implementation for utf8_in2()

master
Valdis Vitolins 8 years ago
parent
commit
d9d7006d7a
1 changed files with 25 additions and 3 deletions
  1. 25
    3
      src/libespeak-ng/translate.c

+ 25
- 3
src/libespeak-ng/translate.c View File

@@ -530,8 +530,10 @@ int utf8_nbytes(const char *buf)

int utf8_in2(int *c, const char *buf, int backwards)
{
// Read a unicode characater from a UTF8 string
// Reads a unicode characater from a UTF8 string
// Returns the number of UTF8 bytes used.
// c: holds integer representation of multibyte character
// buf: position of buffer is moved, if character is read
// backwards: set if we are moving backwards through the UTF8 string

int c1;
@@ -569,8 +571,28 @@ int utf8_in2(int *c, const char *buf, int backwards)
#pragma GCC visibility push(default)
int utf8_in(int *c, const char *buf)
{
// Read a unicode characater from a UTF8 string
// Returns the number of UTF8 bytes used.
/* Read a unicode characater from a UTF8 string
* Returns the number of UTF8 bytes used.
* buf: position of buffer is moved, if character is read
* c: holds integer representation of multibyte character by
* skipping UTF-8 header bits of bytes in following way:
* 2-byte character "ā":
* hex binary
* c481 1100010010000001
* | 11000100 000001
* V \ \ | |
* 0101 0000000100000001
* 3-byte character "ꙅ":
* ea9985 111010101001100110000101
* 1010 011001 000101
* | + +--.\ \ | |
* V `--. \`. `.| |
* A645 0001001101000101
* 4-byte character "𠜎":
* f0a09c8e 11110000101000001001110010001110
* V 000 100000 011100 001110
* 02070e 000000100000011100001110
*/
return utf8_in2(c, buf, 0);
}
#pragma GCC visibility pop

Loading…
Cancel
Save