3 years ago · 4a9f0e21be
--- a/src/libespeak-ng/common.c
+++ b/src/libespeak-ng/common.c
@@ -28,6 +28,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <wctype.h>

 #include <espeak-ng/espeak_ng.h>
 #include <espeak-ng/speak_lib.h>
@@ -168,6 +169,115 @@ int utf8_in2(int *c, const char *buf, int backwards)
 }


 int IsAlpha(unsigned int c)
 {
 	// Replacement for iswalph() which also checks for some in-word symbols

 	static const unsigned short extra_indic_alphas[] = {
 		0xa70, 0xa71, // Gurmukhi: tippi, addak
 		0
 	};

 	if (iswalpha(c))
 		return 1;

 	if (c < 0x300)
 		return 0;

 	if ((c >= 0x901) && (c <= 0xdf7)) {
 		// Indic scripts: Devanagari, Tamil, etc
 		if ((c & 0x7f) < 0x64)
 			return 1;
 		if (lookupwchar(extra_indic_alphas, c) != 0)
 			return 1;
 		if ((c >= 0xd7a) && (c <= 0xd7f))
 			return 1; // malaytalam chillu characters

 		return 0;
 	}

 	if ((c >= 0x5b0) && (c <= 0x5c2))
 		return 1; // Hebrew vowel marks

 	if (c == 0x0605)
 		return 1;

 	if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e)))
 		return 1; // arabic vowel marks

 	if ((c >= 0x300) && (c <= 0x36f))
 		return 1; // combining accents

 	if ((c >= 0xf40) && (c <= 0xfbc))
 		return 1; // tibetan

 	if ((c >= 0x1100) && (c <= 0x11ff))
 		return 1; // Korean jamo

 	if ((c >= 0x2800) && (c <= 0x28ff))
 		return 1; // braille

 	if ((c > 0x3040) && (c <= 0xa700))
 		return 1; // Chinese/Japanese.  Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure

 	return 0;
 }

 // brackets, also 0x2014 to 0x021f which don't need to be in this list
 static const unsigned short brackets[] = {
 	'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`',
 	0xab,   0xbb,   // double angle brackets
 	0x300a, 0x300b, // double angle brackets (ideograph)
 	0xe000+'<',     // private usage area
 	0
 };

 int IsBracket(int c)
 {
 	if ((c >= 0x2014) && (c <= 0x201f))
 		return 1;
 	return lookupwchar(brackets, c);
 }

 int IsDigit09(unsigned int c)
 {
 	if ((c >= '0') && (c <= '9'))
 		return 1;
 	return 0;
 }

 int IsDigit(unsigned int c)
 {
 	if (iswdigit(c))
 		return 1;

 	if ((c >= 0x966) && (c <= 0x96f))
 		return 1;

 	return 0;
 }

 int IsSpace(unsigned int c)
 {
 	if (c == 0)
 		return 0;
 	if ((c >= 0x2500) && (c < 0x25a0))
 		return 1; // box drawing characters
 	if ((c >= 0xfff9) && (c <= 0xffff))
 		return 1; // unicode specials
 	return iswspace(c);
 }

 int isspace2(unsigned int c)
 {
 	// can't use isspace() because on Windows, isspace(0xe1) gives TRUE !
 	int c2;

 	if (((c2 = (c & 0xff)) == 0) || (c > ' '))
 		return 0;
 	return 1;
 }

 int is_str_totally_null(const char* str, int size) {
 	// Tests if all bytes of str are null up to size
 	// This should never be reimplemented with integers, because
--- a/src/libespeak-ng/common.h
+++ b/src/libespeak-ng/common.h
@@ -26,6 +26,12 @@
 extern ESPEAK_NG_API int GetFileLength(const char *filename);
 extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size);

 int IsAlpha(unsigned int c);
 int IsBracket(int c);
 int IsDigit(unsigned int c);
 int IsDigit09(unsigned int c);
 int IsSpace(unsigned int c);
 int isspace2(unsigned int c);
 int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null
 int Read4Bytes(FILE *f);
 int towlower2(unsigned int c, Translator *translator); // Supports Turkish I
--- a/src/libespeak-ng/compiledict.c
+++ b/src/libespeak-ng/compiledict.c
@@ -41,7 +41,6 @@
 #include "phoneme.h"              // for PHONEME_TAB_LIST, phonSWITCH, phone...
 #include "speech.h"		// for path_home
 #include "synthesize.h"           // for Write4Bytes
 #include "translate.h"            // for isspace2, IsDigit09, utf8_in, utf8_out

 static FILE *f_log = NULL;

--- a/src/libespeak-ng/numbers.c
+++ b/src/libespeak-ng/numbers.c
@@ -38,7 +38,7 @@
 #include "readclause.h"  // for WordToString2
 #include "synthdata.h"   // for SelectPhonemeTable
 #include "synthesize.h"  // for phoneme_tab
 #include "translate.h"   // for Translator, LANGUAGE_OPTIONS, IsDigit09, WOR...
 #include "translate.h"   // for Translator, LANGUAGE_OPTIONS, WOR...
 #include "voice.h"       // for voice, voice_t

 #define M_LIGATURE  0x8000
--- a/src/libespeak-ng/ssml.c
+++ b/src/libespeak-ng/ssml.c
@@ -45,7 +45,7 @@
 #include "readclause.h"           // for PARAM_STACK, param_stack, AddNameData
 #include "soundicon.h"               // for LoadSoundFile2
 #include "synthesize.h"           // for SPEED_FACTORS, speed
 #include "translate.h"            // for CTRL_EMBEDDED, IsDigit09, utf8_out
 #include "translate.h"            // for CTRL_EMBEDDED
 #include "voice.h"                // for SelectVoice, SelectVoiceByName
 #include "speech.h"               // for MAKE_MEM_UNDEFINED

--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -105,111 +105,9 @@ static char source[N_TR_SOURCE+40]; // extra space for embedded command & voice
 int n_replace_phonemes;
 REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES];

 // brackets, also 0x2014 to 0x021f which don't need to be in this list
 static const unsigned short brackets[] = {
 	'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`',
 	0xab,   0xbb,   // double angle brackets
 	0x300a, 0x300b, // double angle brackets (ideograph)
 	0xe000+'<',     // private usage area
 	0
 };

 // other characters which break a word, but don't produce a pause
 static const unsigned short breaks[] = { '_', 0 };

 int IsAlpha(unsigned int c)
 {
 	// Replacement for iswalph() which also checks for some in-word symbols

 	static const unsigned short extra_indic_alphas[] = {
 		0xa70, 0xa71, // Gurmukhi: tippi, addak
 		0
 	};

 	if (iswalpha(c))
 		return 1;

 	if (c < 0x300)
 		return 0;

 	if ((c >= 0x901) && (c <= 0xdf7)) {
 		// Indic scripts: Devanagari, Tamil, etc
 		if ((c & 0x7f) < 0x64)
 			return 1;
 		if (lookupwchar(extra_indic_alphas, c) != 0)
 			return 1;
 		if ((c >= 0xd7a) && (c <= 0xd7f))
 			return 1; // malaytalam chillu characters

 		return 0;
 	}

 	if ((c >= 0x5b0) && (c <= 0x5c2))
 		return 1; // Hebrew vowel marks

 	if (c == 0x0605)
 		return 1;

 	if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e)))
 		return 1; // arabic vowel marks

 	if ((c >= 0x300) && (c <= 0x36f))
 		return 1; // combining accents

 	if ((c >= 0xf40) && (c <= 0xfbc))
 		return 1; // tibetan

 	if ((c >= 0x1100) && (c <= 0x11ff))
 		return 1; // Korean jamo

 	if ((c >= 0x2800) && (c <= 0x28ff))
 		return 1; // braille

 	if ((c > 0x3040) && (c <= 0xa700))
 		return 1; // Chinese/Japanese.  Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure

 	return 0;
 }

 int IsDigit09(unsigned int c)
 {
 	if ((c >= '0') && (c <= '9'))
 		return 1;
 	return 0;
 }

 int IsDigit(unsigned int c)
 {
 	if (iswdigit(c))
 		return 1;

 	if ((c >= 0x966) && (c <= 0x96f))
 		return 1;

 	return 0;
 }

 static int IsSpace(unsigned int c)
 {
 	if (c == 0)
 		return 0;
 	if ((c >= 0x2500) && (c < 0x25a0))
 		return 1; // box drawing characters
 	if ((c >= 0xfff9) && (c <= 0xffff))
 		return 1; // unicode specials
 	return iswspace(c);
 }

 int isspace2(unsigned int c)
 {
 	// can't use isspace() because on Windows, isspace(0xe1) gives TRUE !
 	int c2;

 	if (((c2 = (c & 0xff)) == 0) || (c > ' '))
 		return 0;
 	return 1;
 }

 void DeleteTranslator(Translator *tr)
 {
 	if (!tr) return;
@@ -231,13 +129,6 @@ int lookupwchar(const unsigned short *list, int c)
 	return 0;
 }

 int IsBracket(int c)
 {
 	if ((c >= 0x2014) && (c <= 0x201f))
 		return 1;
 	return lookupwchar(brackets, c);
 }

 char *strchr_w(const char *s, int c)
 {
 	// return NULL for any non-ascii character
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -663,14 +663,9 @@ extern int (*phoneme_callback)(const char *);

 int lookupwchar(const unsigned short *list, int c);
 char *strchr_w(const char *s, int c);
 int IsBracket(int c);
 void InitNamedata(void);
 void InitText(int flags);
 void InitText2(void);
 int IsDigit(unsigned int c);
 int IsDigit09(unsigned int c);
 int IsAlpha(unsigned int c);
 int isspace2(unsigned int c);
 ALPHABET *AlphabetFromChar(int c);

 Translator *SelectTranslator(const char *name);