8 years ago · d9d7006d7a
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -530,8 +530,10 @@ int utf8_nbytes(const char *buf)

 int utf8_in2(int *c, const char *buf, int backwards)
 {
 	// Read a unicode characater from a UTF8 string
 	// Reads a unicode characater from a UTF8 string
 	// Returns the number of UTF8 bytes used.
 	// c: holds integer representation of multibyte character
 	// buf: position of buffer is moved, if character is read
 	// backwards: set if we are moving backwards through the UTF8 string

 	int c1;
@@ -569,8 +571,28 @@ int utf8_in2(int *c, const char *buf, int backwards)
 #pragma GCC visibility push(default)
 int utf8_in(int *c, const char *buf)
 {
 	// Read a unicode characater from a UTF8 string
 	// Returns the number of UTF8 bytes used.
 	/* Read a unicode characater from a UTF8 string
 	 * Returns the number of UTF8 bytes used.
 	 * buf: position of buffer is moved, if character is read
 	 * c: holds integer representation of multibyte character by
 	 * skipping UTF-8 header bits of bytes in following way:
 	 * 2-byte character "ā":
 	 * hex            binary
 	 * c481           1100010010000001
 	 *    |           11000100  000001
 	 *    V              \    \ |    |
 	 * 0101           0000000100000001
 	 * 3-byte character "ꙅ":
 	 * ea9985 111010101001100110000101
 	 *            1010  011001  000101
 	 *    |       +  +--.\   \  |    |
 	 *    V        `--.  \`.  `.|    |
 	 *   A645         0001001101000101
 	 * 4-byte character "𠜎":
 	 * f0a09c8e 11110000101000001001110010001110
 	 *    V          000  100000  011100  001110
 	 *   02070e         000000100000011100001110
 	 */
 	return utf8_in2(c, buf, 0);
 }
 #pragma GCC visibility pop