@@ -31,43 +31,6 @@ | |||
#include "synthesize.h" | |||
#include "translate.h" | |||
#define LEADING_2_BITS 0xC0 // 0b11000000 | |||
#define UTF8_TAIL_BITS 0x80 // 0b10000000 | |||
int utf8_out(unsigned int c, char *buf) | |||
{ | |||
// write a unicode character into a buffer as utf8 | |||
// returns the number of bytes written | |||
int n_bytes; | |||
int j; | |||
int shift; | |||
static char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 }; | |||
if (c < 0x80) { | |||
buf[0] = c; | |||
return 1; | |||
} | |||
if (c >= 0x110000) { | |||
buf[0] = ' '; // out of range character code | |||
return 1; | |||
} | |||
if (c < 0x0800) | |||
n_bytes = 1; | |||
else if (c < 0x10000) | |||
n_bytes = 2; | |||
else | |||
n_bytes = 3; | |||
shift = 6*n_bytes; | |||
buf[0] = code[n_bytes] | (c >> shift); | |||
for (j = 0; j < n_bytes; j++) { | |||
shift -= 6; | |||
buf[j+1] = 0x80 + ((c >> shift) & 0x3f); | |||
} | |||
return n_bytes+1; | |||
} | |||
// http://www.iana.org/assignments/character-sets/character-sets.xhtml | |||
MNEM_TAB mnem_encoding[] = { | |||
{ "ANSI_X3.4-1968", ESPEAKNG_ENCODING_US_ASCII }, |
@@ -398,6 +398,40 @@ int utf8_in(int *c, const char *buf) | |||
} | |||
#pragma GCC visibility pop | |||
int utf8_out(unsigned int c, char *buf) | |||
{ | |||
// write a unicode character into a buffer as utf8 | |||
// returns the number of bytes written | |||
int n_bytes; | |||
int j; | |||
int shift; | |||
static char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 }; | |||
if (c < 0x80) { | |||
buf[0] = c; | |||
return 1; | |||
} | |||
if (c >= 0x110000) { | |||
buf[0] = ' '; // out of range character code | |||
return 1; | |||
} | |||
if (c < 0x0800) | |||
n_bytes = 1; | |||
else if (c < 0x10000) | |||
n_bytes = 2; | |||
else | |||
n_bytes = 3; | |||
shift = 6*n_bytes; | |||
buf[0] = code[n_bytes] | (c >> shift); | |||
for (j = 0; j < n_bytes; j++) { | |||
shift -= 6; | |||
buf[j+1] = 0x80 + ((c >> shift) & 0x3f); | |||
} | |||
return n_bytes+1; | |||
} | |||
char *strchr_w(const char *s, int c) | |||
{ | |||
// return NULL for any non-ascii character |
@@ -706,10 +706,15 @@ extern void SetLengthMods(Translator *tr, int value); | |||
void LoadConfig(void); | |||
int TransposeAlphabet(Translator *tr, char *text); | |||
#define LEADING_2_BITS 0xC0 // 0b11000000 | |||
#define UTF8_TAIL_BITS 0x80 // 0b10000000 | |||
ESPEAK_NG_API int utf8_in(int *c, const char *buf); | |||
int utf8_in2(int *c, const char *buf, int backwards); | |||
int utf8_out(unsigned int c, char *buf); | |||
int utf8_nbytes(const char *buf); | |||
int lookupwchar(const unsigned short *list, int c); | |||
int lookupwchar2(const unsigned short *list, int c); | |||
int Eof(void); |