Code cleanup: relocate functions & remove unused #defines

2 years ago · df8af89ddd
--- a/src/libespeak-ng/common.c
+++ b/src/libespeak-ng/common.c
@@ -28,13 +28,15 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
 #include <wctype.h>

 #include <espeak-ng/espeak_ng.h>
 #include <espeak-ng/speak_lib.h>
 #include <espeak-ng/encoding.h>

 #include <ucd/ucd.h>

 #include "common.h"
 #include "translate.h"

 #pragma GCC visibility push(default)

@@ -57,4 +59,254 @@ void strncpy0(char *to, const char *from, int size)
 	strncpy(to, from, size);
 	to[size-1] = 0;
 }

 int utf8_in(int *c, const char *buf)
 {
 	/* Read a unicode characater from a UTF8 string
 	 * Returns the number of UTF8 bytes used.
 	 * buf: position of buffer is moved, if character is read
 	 * c: holds UTF-16 representation of multibyte character by
 	 * skipping UTF-8 header bits of bytes in following way:
 	 * 2-byte character "ā":
 	 * hex            binary
 	 * c481           1100010010000001
 	 *    |           11000100  000001
 	 *    V              \    \ |    |
 	 * 0101           0000000100000001
 	 * 3-byte character "ꙅ":
 	 * ea9985 111010101001100110000101
 	 *            1010  011001  000101
 	 *    |       +  +--.\   \  |    |
 	 *    V        `--.  \`.  `.|    |
 	 *   A645         1010011001000101
 	 * 4-byte character "𠜎":
 	 * f0a09c8e 11110000101000001001110010001110
 	 *    V          000  100000  011100  001110
 	 *   02070e         000000100000011100001110
 	 */
 	return utf8_in2(c, buf, 0);
 }
 #pragma GCC visibility pop

 int utf8_out(unsigned int c, char *buf)
 {
 	// write a UTF-16 character into a buffer as UTF-8
 	// returns the number of bytes written

 	int n_bytes;
 	int j;
 	int shift;
 	static const char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 };

 	if (c < 0x80) {
 		buf[0] = c;
 		return 1;
 	}
 	if (c >= 0x110000) {
 		buf[0] = ' '; // out of range character code
 		return 1;
 	}
 	if (c < 0x0800)
 		n_bytes = 1;
 	else if (c < 0x10000)
 		n_bytes = 2;
 	else
 		n_bytes = 3;

 	shift = 6*n_bytes;
 	buf[0] = code[n_bytes] | (c >> shift);
 	for (j = 0; j < n_bytes; j++) {
 		shift -= 6;
 		buf[j+1] = 0x80 + ((c >> shift) & 0x3f);
 	}
 	return n_bytes+1;
 }

 int utf8_in2(int *c, const char *buf, int backwards)
 {
 	// Reads a unicode characater from a UTF8 string
 	// Returns the number of UTF8 bytes used.
 	// c: holds integer representation of multibyte character
 	// buf: position of buffer is moved, if character is read
 	// backwards: set if we are moving backwards through the UTF8 string

 	int c1;
 	int n_bytes;
 	int ix;
 	static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 };

 	// find the start of the next/previous character
 	while ((*buf & 0xc0) == 0x80) {
 		// skip over non-initial bytes of a multi-byte utf8 character
 		if (backwards)
 			buf--;
 		else
 			buf++;
 	}

 	n_bytes = 0;

 	if ((c1 = *buf++) & 0x80) {
 		if ((c1 & 0xe0) == 0xc0)
 			n_bytes = 1;
 		else if ((c1 & 0xf0) == 0xe0)
 			n_bytes = 2;
 		else if ((c1 & 0xf8) == 0xf0)
 			n_bytes = 3;

 		c1 &= mask[n_bytes];
 		for (ix = 0; ix < n_bytes; ix++)
 		{
 			if (!*buf)
 				/* Oops, truncated */
 				break;
 			c1 = (c1 << 6) + (*buf++ & 0x3f);
 		}
 		n_bytes = ix;
 	}
 	*c = c1;
 	return n_bytes+1;
 }


 int IsAlpha(unsigned int c)
 {
 	// Replacement for iswalph() which also checks for some in-word symbols

 	static const unsigned short extra_indic_alphas[] = {
 		0xa70, 0xa71, // Gurmukhi: tippi, addak
 		0
 	};

 	if (iswalpha(c))
 		return 1;

 	if (c < 0x300)
 		return 0;

 	if ((c >= 0x901) && (c <= 0xdf7)) {
 		// Indic scripts: Devanagari, Tamil, etc
 		if ((c & 0x7f) < 0x64)
 			return 1;
 		if (lookupwchar(extra_indic_alphas, c) != 0)
 			return 1;
 		if ((c >= 0xd7a) && (c <= 0xd7f))
 			return 1; // malaytalam chillu characters

 		return 0;
 	}

 	if ((c >= 0x5b0) && (c <= 0x5c2))
 		return 1; // Hebrew vowel marks

 	if (c == 0x0605)
 		return 1;

 	if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e)))
 		return 1; // arabic vowel marks

 	if ((c >= 0x300) && (c <= 0x36f))
 		return 1; // combining accents

 	if ((c >= 0xf40) && (c <= 0xfbc))
 		return 1; // tibetan

 	if ((c >= 0x1100) && (c <= 0x11ff))
 		return 1; // Korean jamo

 	if ((c >= 0x2800) && (c <= 0x28ff))
 		return 1; // braille

 	if ((c > 0x3040) && (c <= 0xa700))
 		return 1; // Chinese/Japanese.  Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure

 	return 0;
 }

 // brackets, also 0x2014 to 0x021f which don't need to be in this list
 static const unsigned short brackets[] = {
 	'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`',
 	0xab,   0xbb,   // double angle brackets
 	0x300a, 0x300b, // double angle brackets (ideograph)
 	0xe000+'<',     // private usage area
 	0
 };

 int IsBracket(int c)
 {
 	if ((c >= 0x2014) && (c <= 0x201f))
 		return 1;
 	return lookupwchar(brackets, c);
 }

 int IsDigit09(unsigned int c)
 {
 	if ((c >= '0') && (c <= '9'))
 		return 1;
 	return 0;
 }

 int IsDigit(unsigned int c)
 {
 	if (iswdigit(c))
 		return 1;

 	if ((c >= 0x966) && (c <= 0x96f))
 		return 1;

 	return 0;
 }

 int IsSpace(unsigned int c)
 {
 	if (c == 0)
 		return 0;
 	if ((c >= 0x2500) && (c < 0x25a0))
 		return 1; // box drawing characters
 	if ((c >= 0xfff9) && (c <= 0xffff))
 		return 1; // unicode specials
 	return iswspace(c);
 }

 int isspace2(unsigned int c)
 {
 	// can't use isspace() because on Windows, isspace(0xe1) gives TRUE !
 	int c2;

 	if (((c2 = (c & 0xff)) == 0) || (c > ' '))
 		return 0;
 	return 1;
 }

 int is_str_totally_null(const char* str, int size) {
 	// Tests if all bytes of str are null up to size
 	// This should never be reimplemented with integers, because
 	// this function has to work with unaligned char*
 	// (casting to int when unaligned may result in ungaranteed behaviors)
 	return (*str == 0 && memcmp(str, str+1, size-1) == 0);
 }

 int Read4Bytes(FILE *f)
 {
 	// Read 4 bytes (least significant first) into a word
 	int ix;
 	unsigned char c;
 	int acc = 0;

 	for (ix = 0; ix < 4; ix++) {
 		c = fgetc(f) & 0xff;
 		acc += (c << (ix*8));
 	}
 	return acc;
 }

 int towlower2(unsigned int c, Translator *translator)
 {
 	// check for non-standard upper to lower case conversions
 	if (c == 'I' && translator->langopts.dotless_i)
 		return 0x131; // I -> ı

 	return ucd_tolower(c);
 }


--- a/src/libespeak-ng/common.h
+++ b/src/libespeak-ng/common.h
@@ -21,10 +21,25 @@
 #define ESPEAK_NG_COMMON_H

 #include "espeak-ng/espeak_ng.h"
 #include "translate.h"

 extern ESPEAK_NG_API int GetFileLength(const char *filename);
 extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size);

 int IsAlpha(unsigned int c);
 int IsBracket(int c);
 int IsDigit(unsigned int c);
 int IsDigit09(unsigned int c);
 int IsSpace(unsigned int c);
 int isspace2(unsigned int c);
 int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null
 int Read4Bytes(FILE *f);
 int towlower2(unsigned int c, Translator *translator); // Supports Turkish I

 ESPEAK_NG_API int utf8_in(int *c, const char *buf);
 int utf8_in2(int *c, const char *buf, int backwards);
 int utf8_out(unsigned int c, char *buf);

 #ifdef __cplusplus
 }
 #endif
--- a/src/libespeak-ng/compiledata.c
+++ b/src/libespeak-ng/compiledata.c
@@ -35,11 +35,10 @@
 #include <espeak-ng/speak_lib.h>
 #include <espeak-ng/encoding.h>

 #include "common.h"                    // for GetFileLength, strncpy0
 #include "common.h"                    // for GetFileLength, strncpy0, ...
 #include "error.h"                    // for create_file_error_context
 #include "mnemonics.h"               // for LookupMnemName, MNEM_TAB
 #include "phoneme.h"                  // for PHONEME_TAB, PHONEME_TAB_LIST
 #include "readclause.h"               // for Read4Bytes
 #include "spect.h"                    // for SpectFrame, peak_t, SpectSeq
 #include "speech.h"			// for path_home, GetFileLength
 #include "synthdata.h"                // for LoadPhData
--- a/src/libespeak-ng/compiledict.c
+++ b/src/libespeak-ng/compiledict.c
@@ -39,10 +39,8 @@
 #include "error.h"                // for create_file_error_context
 #include "mnemonics.h"               // for LookupMnemName, MNEM_TAB
 #include "phoneme.h"              // for PHONEME_TAB_LIST, phonSWITCH, phone...
 #include "readclause.h"           // for towlower2
 #include "speech.h"		// for path_home
 #include "synthesize.h"           // for Write4Bytes
 #include "translate.h"            // for isspace2, IsDigit09, utf8_in, utf8_out

 static FILE *f_log = NULL;

--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -36,13 +36,15 @@
 #include "dictionary.h"
 #include "numbers.h"                       // for LookupAccentedLetter, Look...
 #include "phoneme.h"                       // for PHONEME_TAB, phVOWEL, phon...
 #include "readclause.h"                    // for WordToString2, is_str_tota...
 #include "readclause.h"                    // for WordToString2
 #include "speech.h"                        // for path_home
 #include "compiledict.h"                   // for DecodeRule
 #include "synthdata.h"                     // for PhonemeCode, InterpretPhoneme
 #include "synthesize.h"                    // for STRESS_IS_PRIMARY, phoneme...
 #include "translate.h"                     // for Translator, utf8_in, LANGU...

 static int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out);

 typedef struct {
 	int points;
 	const char *phonemes;
@@ -761,96 +763,7 @@ int IsVowel(Translator *tr, int letter)
 	return IsLetter(tr, letter, LETTERGP_VOWEL2);
 }

 static int Unpronouncable2(Translator *tr, char *word)
 {
 	int c;
 	int end_flags;
 	char ph_buf[N_WORD_PHONEMES];

 	ph_buf[0] = 0;
 	c = word[-1];
 	word[-1] = ' '; // ensure there is a space before the "word"
 	end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL);
 	word[-1] = c;
 	if ((end_flags == 0) || (end_flags & SUFX_UNPRON))
 		return 1;
 	return 0;
 }

 int Unpronouncable(Translator *tr, char *word, int posn)
 {
 	/* Determines whether a word in 'unpronouncable', i.e. whether it should
 	    be spoken as individual letters.

 	    This function may be language specific. This is a generic version.
 	 */

 	int c;
 	int c1 = 0;
 	int vowel_posn = 9;
 	int index;
 	int count;
 	ALPHABET *alphabet;

 	utf8_in(&c, word);
 	if ((tr->letter_bits_offset > 0) && (c < 0x241)) {
 		// Latin characters for a language with a non-latin alphabet
 		return 0;  // so we can re-translate the word as English
 	}

 	if (((alphabet = AlphabetFromChar(c)) != NULL)  && (alphabet->offset != tr->letter_bits_offset)) {
 		// Character is not in our alphabet
 		return 0;
 	}

 	if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1)
 		return 0;

 	if (((c = *word) == ' ') || (c == 0) || (c == '\''))
 		return 0;

 	index = 0;
 	count = 0;
 	for (;;) {
 		index += utf8_in(&c, &word[index]);
 		if ((c == 0) || (c == ' '))
 			break;

 		if ((c == '\'') && ((count > 1) || (posn > 0)))
 			break; // "tv'" but not "l'"

 		if (count == 0)
 			c1 = c;

 		if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) {
 			// don't count apostrophe
 		} else
 			count++;

 		if (IsVowel(tr, c)) {
 			vowel_posn = count; // position of the first vowel
 			break;
 		}

 		if ((c != '\'') && !iswalpha(c))
 			return 0;
 	}

 	if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) {
 		// Lookup unpronounable rules in *_rules
 		return Unpronouncable2(tr, word);
 	}

 	if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE])
 		vowel_posn--; // disregard this as the initial letter when counting

 	if (vowel_posn > (tr->langopts.max_initial_consonants+1))
 		return 1; // no vowel, or no vowel in first few letters

 	return 0;
 }

 static int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control)
 int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control)
 {
 	// control = 1, set stress to 1 for forced unstressed vowels
 	unsigned char phcode;
@@ -962,55 +875,11 @@ static int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *
 	return max_stress;
 }

 static char stress_phonemes[] = {
 const char stress_phonemes[] = {
 	phonSTRESS_D, phonSTRESS_U, phonSTRESS_2, phonSTRESS_3,
 	phonSTRESS_P, phonSTRESS_P2, phonSTRESS_TONIC
 };

 void ChangeWordStress(Translator *tr, char *word, int new_stress)
 {
 	int ix;
 	unsigned char *p;
 	int max_stress;
 	int vowel_count; // num of vowels + 1
 	int stressed_syllable = 0; // position of stressed syllable
 	unsigned char phonetic[N_WORD_PHONEMES];
 	signed char vowel_stress[N_WORD_PHONEMES/2];

 	strcpy((char *)phonetic, word);
 	max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0);

 	if (new_stress >= STRESS_IS_PRIMARY) {
 		// promote to primary stress
 		for (ix = 1; ix < vowel_count; ix++) {
 			if (vowel_stress[ix] >= max_stress) {
 				vowel_stress[ix] = new_stress;
 				break;
 			}
 		}
 	} else {
 		// remove primary stress
 		for (ix = 1; ix < vowel_count; ix++) {
 			if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1)
 				vowel_stress[ix] = new_stress;
 		}
 	}

 	// write out phonemes
 	ix = 1;
 	p = phonetic;
 	while (*p != 0) {
 		if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) {
 			if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED))
 				*word++ = stress_phonemes[(unsigned char)vowel_stress[ix]];

 			ix++;
 		}
 		*word++ = *p++;
 	}
 	*word = 0;
 }

 void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control)
 {
 	/* Guess stress pattern of word.  This is language specific
@@ -2345,7 +2214,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
 						if (letter == 0xe000+'(') {
 							if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED])
 								pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]; // a bracket, already spoken by AnnouncePunctuation()
 						} 
 						}
 						if (IsBracket(letter)) {
 							if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE])
 								pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE];
@@ -2456,37 +2325,6 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
 	return 0;
 }

 void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags)
 {
 	// apply after the translation is complete

 	int ix;
 	int len;
 	char *p;

 	len = strlen(phonemes);

 	if (tr->langopts.param[LOPT_ALT] & 2) {
 		for (ix = 0; ix < (len-1); ix++) {
 			if (phonemes[ix] == phonSTRESS_P) {
 				p = &phonemes[ix+1];
 				if ((dict_flags & FLAG_ALT2_TRANS) != 0) {
 					if (*p == PhonemeCode('E'))
 						*p = PhonemeCode('e');
 					if (*p == PhonemeCode('O'))
 						*p = PhonemeCode('o');
 				} else {
 					if (*p == PhonemeCode('e'))
 						*p = PhonemeCode('E');
 					if (*p == PhonemeCode('o'))
 						*p = PhonemeCode('O');
 				}
 				break;
 			}
 		}
 	}
 }

 int TransposeAlphabet(Translator *tr, char *text)
 {
 	// transpose cyrillic alphabet (for example) into ascii (single byte) character codes
@@ -2856,6 +2694,21 @@ static const char *LookupDict2(Translator *tr, const char *word, const char *wor
 	return 0;
 }


    static int utf8_nbytes(const char *buf)
 {
 	// Returns the number of bytes for the first UTF-8 character in buf

 	unsigned char c = (unsigned char)buf[0];
 	if (c < 0x80)
 		return 1;
 	if (c < 0xe0)
 		return 2;
 	if (c < 0xf0)
 		return 3;
 	return 4;
 }

 /* Lookup a specified word in the word dictionary.
   Returns phonetic data in 'phonetic' and bits in 'flags'

@@ -3022,7 +2875,7 @@ int Lookup(Translator *tr, const char *word, char *ph_out)
 	return flags0;
 }

 int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out)
 static int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out)
 {
 	char buf[100];
 	static unsigned int flags[2];
--- a/src/libespeak-ng/dictionary.h
+++ b/src/libespeak-ng/dictionary.h
@@ -31,22 +31,22 @@ extern "C"
 {
 #endif

 extern const char stress_phonemes[];

 int LoadDictionary(Translator *tr, const char *name, int no_error);
 int HashDictionary(const char *string);
 const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme);
 void DecodePhonemes(const char *inptr, char *outptr);
 char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags);
 const char *GetTranslatedPhonemeString(int phoneme_mode);
 int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control);
 int IsVowel(Translator *tr, int letter);
 int Unpronouncable(Translator *tr, char *word, int posn);
 void ChangeWordStress(Translator *tr, char *word, int new_stress);
 void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control);
 void AppendPhonemes(Translator *tr, char *string, int size, const char *ph);
 int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, char *end_phonemes, int word_flags, unsigned int *dict_flags);
 int TransposeAlphabet(Translator *tr, char *text);
 int Lookup(Translator *tr, const char *word, char *ph_out);
 int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab);
 int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out);
 int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy);

 #ifdef __cplusplus
--- a/src/libespeak-ng/klatt.h
+++ b/src/libespeak-ng/klatt.h
@@ -112,9 +112,6 @@ typedef struct {
 #define Rnpp 10
 #define R1p  11
 #define R2p  12
 #define R3p  13
 #define R4p  14
 #define R5p  15
 #define R6p  16

 #define RGL  17
--- a/src/libespeak-ng/numbers.c
+++ b/src/libespeak-ng/numbers.c
@@ -32,12 +32,13 @@
 #include <espeak-ng/encoding.h>

 #include "numbers.h"
 #include "common.h"
 #include "dictionary.h"  // for Lookup, TranslateRules, EncodePhonemes, Look...
 #include "phoneme.h"     // for phonSWITCH, PHONEME_TAB, phonEND_WORD, phonP...
 #include "readclause.h"  // for WordToString2, towlower2
 #include "readclause.h"  // for WordToString2
 #include "synthdata.h"   // for SelectPhonemeTable
 #include "synthesize.h"  // for phoneme_tab
 #include "translate.h"   // for Translator, LANGUAGE_OPTIONS, IsDigit09, WOR...
 #include "translate.h"   // for Translator, LANGUAGE_OPTIONS, WOR...
 #include "voice.h"       // for voice, voice_t

 #define M_LIGATURE  0x8000
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -131,23 +131,6 @@ int clause_type_from_codepoint(uint32_t c)
 	return CLAUSE_NONE;
 }

 int is_str_totally_null(const char* str, int size) {
 	// Tests if all bytes of str are null up to size
 	// This should never be reimplemented with integers, because
 	// this function has to work with unaligned char*
 	// (casting to int when unaligned may result in ungaranteed behaviors)
 	return (*str == 0 && memcmp(str, str+1, size-1) == 0);
 }

 int towlower2(unsigned int c, Translator *translator)
 {
 	// check for non-standard upper to lower case conversions
 	if (c == 'I' && translator->langopts.dotless_i)
 		return 0x131; // I -> ı

 	return ucd_tolower(c);
 }

 static int IsRomanU(unsigned int c)
 {
 	if ((c == 'I') || (c == 'V') || (c == 'X') || (c == 'L'))
@@ -288,20 +271,6 @@ static const char *LookupCharName(Translator *tr, int c, int only)
 	return buf;
 }

 int Read4Bytes(FILE *f)
 {
 	// Read 4 bytes (least significant first) into a word
 	int ix;
 	unsigned char c;
 	int acc = 0;

 	for (ix = 0; ix < 4; ix++) {
 		c = fgetc(f) & 0xff;
 		acc += (c << (ix*8));
 	}
 	return acc;
 }

 static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause)
 {
 	// announce punctuation names
--- a/src/libespeak-ng/readclause.h
+++ b/src/libespeak-ng/readclause.h
@@ -34,14 +34,9 @@ typedef struct {

 extern PARAM_STACK param_stack[];

 // Tests if all bytes of str up to size are null
 int is_str_totally_null(const char* str, int size);

 int clause_type_from_codepoint(uint32_t c);
 int towlower2(unsigned int c, Translator *translator); // Supports Turkish I
 int Eof(void);
 const char *WordToString2(unsigned int word);
 int Read4Bytes(FILE *f);
 int AddNameData(const char *name,
                int wide);
 int ReadClause(Translator *tr,
--- a/src/libespeak-ng/soundicon.c
+++ b/src/libespeak-ng/soundicon.c
@@ -38,7 +38,6 @@
 #include "soundicon.h" 
 #include "common.h"                // for GetFileLength
 #include "error.h"                // for create_file_error_context
 #include "readclause.h"               // for Read4Bytes
 #include "speech.h"                   // for path_home, PATHSEP
 #include "synthesize.h"                   // for samplerate

--- a/src/libespeak-ng/spect.h
+++ b/src/libespeak-ng/spect.h
@@ -33,7 +33,6 @@ float polint(float xa[], float ya[], int n, float x);

 #define FRAME_WIDTH  1000  // max width for 8000kHz frame
 #define MAX_DISPLAY_FREQ 9500
 #define FRAME_HEIGHT 240

 #define T_AMPLITUDE 308
 #define T_AV      312
--- a/src/libespeak-ng/ssml.c
+++ b/src/libespeak-ng/ssml.c
@@ -45,7 +45,7 @@
 #include "readclause.h"           // for PARAM_STACK, param_stack, AddNameData
 #include "soundicon.h"               // for LoadSoundFile2
 #include "synthesize.h"           // for SPEED_FACTORS, speed
 #include "translate.h"            // for CTRL_EMBEDDED, IsDigit09, utf8_out
 #include "translate.h"            // for CTRL_EMBEDDED
 #include "voice.h"                // for SelectVoice, SelectVoiceByName
 #include "speech.h"               // for MAKE_MEM_UNDEFINED

--- a/src/libespeak-ng/synth_mbrola.c
+++ b/src/libespeak-ng/synth_mbrola.c
@@ -34,7 +34,6 @@

 #include "dictionary.h"
 #include "mbrola.h"
 #include "readclause.h"
 #include "setlengths.h"
 #include "synthdata.h"
 #include "wavegen.h"
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
@@ -31,6 +31,7 @@
 #include <espeak-ng/speak_lib.h>
 #include <espeak-ng/encoding.h>

 #include "common.h"
 #include "setlengths.h"          // for SetLengthMods
 #include "translate.h"           // for Translator, LANGUAGE_OPTIONS, L, NUM...

--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -33,6 +33,7 @@
 #include <espeak-ng/encoding.h>

 #include "translate.h"
 #include "common.h"
 #include "dictionary.h"           // for TranslateRules, LookupDictList, Cha...
 #include "phoneme.h"              // for phonSWITCH, PHONEME_TAB, phonPAUSE_...
 #include "phonemelist.h"          // for MakePhonemeList
@@ -104,111 +105,9 @@ static char source[N_TR_SOURCE+40]; // extra space for embedded command & voice
 int n_replace_phonemes;
 REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES];

 // brackets, also 0x2014 to 0x021f which don't need to be in this list
 static const unsigned short brackets[] = {
 	'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`',
 	0xab,   0xbb,   // double angle brackets
 	0x300a, 0x300b, // double angle brackets (ideograph)
 	0xe000+'<',     // private usage area
 	0
 };

 // other characters which break a word, but don't produce a pause
 static const unsigned short breaks[] = { '_', 0 };

 int IsAlpha(unsigned int c)
 {
 	// Replacement for iswalph() which also checks for some in-word symbols

 	static const unsigned short extra_indic_alphas[] = {
 		0xa70, 0xa71, // Gurmukhi: tippi, addak
 		0
 	};

 	if (iswalpha(c))
 		return 1;

 	if (c < 0x300)
 		return 0;

 	if ((c >= 0x901) && (c <= 0xdf7)) {
 		// Indic scripts: Devanagari, Tamil, etc
 		if ((c & 0x7f) < 0x64)
 			return 1;
 		if (lookupwchar(extra_indic_alphas, c) != 0)
 			return 1;
 		if ((c >= 0xd7a) && (c <= 0xd7f))
 			return 1; // malaytalam chillu characters

 		return 0;
 	}

 	if ((c >= 0x5b0) && (c <= 0x5c2))
 		return 1; // Hebrew vowel marks

 	if (c == 0x0605)
 		return 1;

 	if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e)))
 		return 1; // arabic vowel marks

 	if ((c >= 0x300) && (c <= 0x36f))
 		return 1; // combining accents

 	if ((c >= 0xf40) && (c <= 0xfbc))
 		return 1; // tibetan

 	if ((c >= 0x1100) && (c <= 0x11ff))
 		return 1; // Korean jamo

 	if ((c >= 0x2800) && (c <= 0x28ff))
 		return 1; // braille

 	if ((c > 0x3040) && (c <= 0xa700))
 		return 1; // Chinese/Japanese.  Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure

 	return 0;
 }

 int IsDigit09(unsigned int c)
 {
 	if ((c >= '0') && (c <= '9'))
 		return 1;
 	return 0;
 }

 int IsDigit(unsigned int c)
 {
 	if (iswdigit(c))
 		return 1;

 	if ((c >= 0x966) && (c <= 0x96f))
 		return 1;

 	return 0;
 }

 static int IsSpace(unsigned int c)
 {
 	if (c == 0)
 		return 0;
 	if ((c >= 0x2500) && (c < 0x25a0))
 		return 1; // box drawing characters
 	if ((c >= 0xfff9) && (c <= 0xffff))
 		return 1; // unicode specials
 	return iswspace(c);
 }

 int isspace2(unsigned int c)
 {
 	// can't use isspace() because on Windows, isspace(0xe1) gives TRUE !
 	int c2;

 	if (((c2 = (c & 0xff)) == 0) || (c > ' '))
 		return 0;
 	return 1;
 }

 void DeleteTranslator(Translator *tr)
 {
 	if (!tr) return;
@@ -230,136 +129,6 @@ int lookupwchar(const unsigned short *list, int c)
 	return 0;
 }

 int IsBracket(int c)
 {
 	if ((c >= 0x2014) && (c <= 0x201f))
 		return 1;
 	return lookupwchar(brackets, c);
 }

 int utf8_nbytes(const char *buf)
 {
 	// Returns the number of bytes for the first UTF-8 character in buf

 	unsigned char c = (unsigned char)buf[0];
 	if (c < 0x80)
 		return 1;
 	if (c < 0xe0)
 		return 2;
 	if (c < 0xf0)
 		return 3;
 	return 4;
 }

 int utf8_in2(int *c, const char *buf, int backwards)
 {
 	// Reads a unicode characater from a UTF8 string
 	// Returns the number of UTF8 bytes used.
 	// c: holds integer representation of multibyte character
 	// buf: position of buffer is moved, if character is read
 	// backwards: set if we are moving backwards through the UTF8 string

 	int c1;
 	int n_bytes;
 	int ix;
 	static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 };

 	// find the start of the next/previous character
 	while ((*buf & 0xc0) == 0x80) {
 		// skip over non-initial bytes of a multi-byte utf8 character
 		if (backwards)
 			buf--;
 		else
 			buf++;
 	}

 	n_bytes = 0;

 	if ((c1 = *buf++) & 0x80) {
 		if ((c1 & 0xe0) == 0xc0)
 			n_bytes = 1;
 		else if ((c1 & 0xf0) == 0xe0)
 			n_bytes = 2;
 		else if ((c1 & 0xf8) == 0xf0)
 			n_bytes = 3;

 		c1 &= mask[n_bytes];
 		for (ix = 0; ix < n_bytes; ix++)
 		{
 			if (!*buf)
 				/* Oops, truncated */
 				break;
 			c1 = (c1 << 6) + (*buf++ & 0x3f);
 		}
 		n_bytes = ix;
 	}
 	*c = c1;
 	return n_bytes+1;
 }

 #pragma GCC visibility push(default)
 int utf8_in(int *c, const char *buf)
 {
 	/* Read a unicode characater from a UTF8 string
 	 * Returns the number of UTF8 bytes used.
 	 * buf: position of buffer is moved, if character is read
 	 * c: holds UTF-16 representation of multibyte character by
 	 * skipping UTF-8 header bits of bytes in following way:
 	 * 2-byte character "ā":
 	 * hex            binary
 	 * c481           1100010010000001
 	 *    |           11000100  000001
 	 *    V              \    \ |    |
 	 * 0101           0000000100000001
 	 * 3-byte character "ꙅ":
 	 * ea9985 111010101001100110000101
 	 *            1010  011001  000101
 	 *    |       +  +--.\   \  |    |
 	 *    V        `--.  \`.  `.|    |
 	 *   A645         1010011001000101
 	 * 4-byte character "𠜎":
 	 * f0a09c8e 11110000101000001001110010001110
 	 *    V          000  100000  011100  001110
 	 *   02070e         000000100000011100001110
 	 */
 	return utf8_in2(c, buf, 0);
 }
 #pragma GCC visibility pop

 int utf8_out(unsigned int c, char *buf)
 {
 	// write a UTF-16 character into a buffer as UTF-8
 	// returns the number of bytes written

 	int n_bytes;
 	int j;
 	int shift;
 	static const char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 };

 	if (c < 0x80) {
 		buf[0] = c;
 		return 1;
 	}
 	if (c >= 0x110000) {
 		buf[0] = ' '; // out of range character code
 		return 1;
 	}
 	if (c < 0x0800)
 		n_bytes = 1;
 	else if (c < 0x10000)
 		n_bytes = 2;
 	else
 		n_bytes = 3;

 	shift = 6*n_bytes;
 	buf[0] = code[n_bytes] | (c >> shift);
 	for (j = 0; j < n_bytes; j++) {
 		shift -= 6;
 		buf[j+1] = 0x80 + ((c >> shift) & 0x3f);
 	}
 	return n_bytes+1;
 }

 char *strchr_w(const char *s, int c)
 {
 	// return NULL for any non-ascii character
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -64,9 +64,6 @@ extern "C"
 #define FLAG_ALT_TRANS      0x8000 // language specific
 #define FLAG_ALT2_TRANS    0x10000 // language specific
 #define FLAG_ALT3_TRANS    0x20000 // language specific
 #define FLAG_ALT4_TRANS    0x40000 // language specific
 #define FLAG_ALT5_TRANS    0x80000 // language specific
 #define FLAG_ALT6_TRANS   0x100000 // language specific
 #define FLAG_ALT7_TRANS   0x200000 // language specific

 #define FLAG_COMBINE      0x800000 // combine with the next word
@@ -661,21 +658,11 @@ extern int (*phoneme_callback)(const char *);
 #define LEADING_2_BITS 0xC0 // 0b11000000
 #define UTF8_TAIL_BITS 0x80 // 0b10000000

 ESPEAK_NG_API int utf8_in(int *c, const char *buf);
 int utf8_in2(int *c, const char *buf, int backwards);
 int utf8_out(unsigned int c, char *buf);
 int utf8_nbytes(const char *buf);

 int lookupwchar(const unsigned short *list, int c);
 char *strchr_w(const char *s, int c);
 int IsBracket(int c);
 void InitNamedata(void);
 void InitText(int flags);
 void InitText2(void);
 int IsDigit(unsigned int c);
 int IsDigit09(unsigned int c);
 int IsAlpha(unsigned int c);
 int isspace2(unsigned int c);
 ALPHABET *AlphabetFromChar(int c);

 Translator *SelectTranslator(const char *name);
@@ -686,8 +673,6 @@ void ProcessLanguageOptions(LANGUAGE_OPTIONS *langopts);

 void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len);

 void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);

 int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out);
 void TranslateClause(Translator *tr, int *tone, char **voice_change);

--- a/src/libespeak-ng/translateword.c
+++ b/src/libespeak-ng/translateword.c
@@ -36,7 +36,7 @@
 #include "translate.h"
 #include "translateword.h"
 #include "common.h"               // for strncpy0
 #include "dictionary.h"           // for TranslateRules, LookupDictList, Cha...
 #include "dictionary.h"           // for TranslateRules, LookupDictList
 #include "numbers.h"              // for SetSpellingStress, ...
 #include "phoneme.h"              // for phonSWITCH, PHONEME_TAB, phonPAUSE_...
 #include "readclause.h"           // for towlower2
@@ -46,10 +46,14 @@


 static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes);
 static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
 static void ChangeWordStress(Translator *tr, char *word, int new_stress);
 static int CheckDottedAbbrev(char *word1);
 static int NonAsciiNumber(int letter);
 static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]);
 static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet);
 static int Unpronouncable(Translator *tr, char *word, int posn);
 static int Unpronouncable2(Translator *tr, char *word);

 int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes)
 {
@@ -667,6 +671,82 @@ int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_
 }


 void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags)
 {
 	// apply after the translation is complete

 	int ix;
 	int len;
 	char *p;

 	len = strlen(phonemes);

 	if (tr->langopts.param[LOPT_ALT] & 2) {
 		for (ix = 0; ix < (len-1); ix++) {
 			if (phonemes[ix] == phonSTRESS_P) {
 				p = &phonemes[ix+1];
 				if ((dict_flags & FLAG_ALT2_TRANS) != 0) {
 					if (*p == PhonemeCode('E'))
 						*p = PhonemeCode('e');
 					if (*p == PhonemeCode('O'))
 						*p = PhonemeCode('o');
 				} else {
 					if (*p == PhonemeCode('e'))
 						*p = PhonemeCode('E');
 					if (*p == PhonemeCode('o'))
 						*p = PhonemeCode('O');
 				}
 				break;
 			}
 		}
 	}
 }


 static void ChangeWordStress(Translator *tr, char *word, int new_stress)
 {
 	int ix;
 	unsigned char *p;
 	int max_stress;
 	int vowel_count; // num of vowels + 1
 	int stressed_syllable = 0; // position of stressed syllable
 	unsigned char phonetic[N_WORD_PHONEMES];
 	signed char vowel_stress[N_WORD_PHONEMES/2];

 	strcpy((char *)phonetic, word);
 	max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0);

 	if (new_stress >= STRESS_IS_PRIMARY) {
 		// promote to primary stress
 		for (ix = 1; ix < vowel_count; ix++) {
 			if (vowel_stress[ix] >= max_stress) {
 				vowel_stress[ix] = new_stress;
 				break;
 			}
 		}
 	} else {
 		// remove primary stress
 		for (ix = 1; ix < vowel_count; ix++) {
 			if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1)
 				vowel_stress[ix] = new_stress;
 		}
 	}

 	// write out phonemes
 	ix = 1;
 	p = phonetic;
 	while (*p != 0) {
 		if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) {
 			if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED))
 				*word++ = stress_phonemes[(unsigned char)vowel_stress[ix]];

 			ix++;
 		}
 		*word++ = *p++;
 	}
 	*word = 0;
 }

 static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[])
 {
 	int posn = 0;
@@ -1034,3 +1114,92 @@ static int NonAsciiNumber(int letter)
 	}
 	return -1;
 }

 static int Unpronouncable(Translator *tr, char *word, int posn)
 {
 	/* Determines whether a word in 'unpronouncable', i.e. whether it should
 	    be spoken as individual letters.

 	    This function may be language specific. This is a generic version.
 	 */

 	int c;
 	int c1 = 0;
 	int vowel_posn = 9;
 	int index;
 	int count;
 	ALPHABET *alphabet;

 	utf8_in(&c, word);
 	if ((tr->letter_bits_offset > 0) && (c < 0x241)) {
 		// Latin characters for a language with a non-latin alphabet
 		return 0;  // so we can re-translate the word as English
 	}

 	if (((alphabet = AlphabetFromChar(c)) != NULL)  && (alphabet->offset != tr->letter_bits_offset)) {
 		// Character is not in our alphabet
 		return 0;
 	}

 	if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1)
 		return 0;

 	if (((c = *word) == ' ') || (c == 0) || (c == '\''))
 		return 0;

 	index = 0;
 	count = 0;
 	for (;;) {
 		index += utf8_in(&c, &word[index]);
 		if ((c == 0) || (c == ' '))
 			break;

 		if ((c == '\'') && ((count > 1) || (posn > 0)))
 			break; // "tv'" but not "l'"

 		if (count == 0)
 			c1 = c;

 		if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) {
 			// don't count apostrophe
 		} else
 			count++;

 		if (IsVowel(tr, c)) {
 			vowel_posn = count; // position of the first vowel
 			break;
 		}

 		if ((c != '\'') && !iswalpha(c))
 			return 0;
 	}

 	if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) {
 		// Lookup unpronounable rules in *_rules
 		return Unpronouncable2(tr, word);
 	}

 	if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE])
 		vowel_posn--; // disregard this as the initial letter when counting

 	if (vowel_posn > (tr->langopts.max_initial_consonants+1))
 		return 1; // no vowel, or no vowel in first few letters

 	return 0;
 }

 static int Unpronouncable2(Translator *tr, char *word)
 {
 	int c;
 	int end_flags;
 	char ph_buf[N_WORD_PHONEMES];

 	ph_buf[0] = 0;
 	c = word[-1];
 	word[-1] = ' '; // ensure there is a space before the "word"
 	end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL);
 	word[-1] = c;
 	if ((end_flags == 0) || (end_flags & SUFX_UNPRON))
 		return 1;
 	return 0;
 }