8 years ago · 5975f07095
--- a/src/libespeak-ng/compiledict.c
+++ b/src/libespeak-ng/compiledict.c
@@ -26,6 +26,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wctype.h>

 #include <espeak-ng/espeak_ng.h>
 #include <espeak-ng/speak_lib.h>
@@ -585,7 +586,7 @@ static int compile_line(char *linebuf, char *dict_line, int *hash)
 			ix = utf8_in(&c2, p);
 			if (c2 == 0)
 				break;
 			if (iswupper2(c2))
 			if (iswupper(c2))
 				utf8_out(towlower2(c2), p);
 			else
 				all_upper_case = 0;
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -24,6 +24,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <wctype.h>
 #include <wchar.h>

 #include <espeak-ng/espeak_ng.h>
@@ -614,7 +615,7 @@ const char *GetTranslatedPhonemeString(int phoneme_mode)
 			p += utf8_in(&c, p);
 			if (use_tie != 0) {
 				// look for non-inital alphabetic character, but not diacritic, superscript etc.
 				if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha2(c))
 				if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha(c))
 					buf += utf8_out(use_tie, buf);
 			}
 			buf += utf8_out(c, buf);
@@ -832,7 +833,7 @@ int Unpronouncable(Translator *tr, char *word, int posn)
 			break;
 		}

 		if ((c != '\'') && !iswalpha2(c))
 		if ((c != '\'') && !iswalpha(c))
 			return 0;
 	}

@@ -1787,7 +1788,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
 						failed = 1;
 					break;
 				case RULE_NONALPHA:
 					if (!iswalpha2(letter_w)) {
 					if (!iswalpha(letter_w)) {
 						add_points = (21-distance_right);
 						post_ptr += letter_xbytes;
 					} else
@@ -1996,7 +1997,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
 						failed = 1;
 					break;
 				case RULE_NONALPHA:
 					if (!iswalpha2(letter_w)) {
 					if (!iswalpha(letter_w)) {
 						add_points = (21-distance_right);
 						pre_ptr -= letter_xbytes;
 					} else
@@ -2300,7 +2301,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c

 						if (tr->letter_bits_offset > 0) {
 							// not a Latin alphabet, switch to the default Latin alphabet language
 							if ((letter <= 0x241) && iswalpha2(letter)) {
 							if ((letter <= 0x241) && iswalpha(letter)) {
 								sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language);
 								return 0;
 							}
--- a/src/libespeak-ng/numbers.c
+++ b/src/libespeak-ng/numbers.c
@@ -701,7 +701,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)

 	if (control & 2) {
 		// include CAPITAL information
 		if (iswupper2(letter))
 		if (iswupper(letter))
 			Lookup(tr, "_cap", capital);
 	}
 	letter = towlower2(letter);
@@ -845,7 +845,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
 		if (ph_buf[0] == 0) {
 			speak_letter_number = 1;
 			if (!(al_flags & AL_NO_SYMBOL)) {
 				if (iswalpha2(letter))
 				if (iswalpha(letter))
 					Lookup(translator, "_?A", ph_buf);

 				if ((ph_buf[0] == 0) && !iswspace(letter))
@@ -2055,7 +2055,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
 		if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
 			utf8_in(&next_char, p);

 		if (!iswalpha2(next_char) && (thousands_exact == 0))
 		if (!iswalpha(next_char) && (thousands_exact == 0))
 			strcat(ph_out, str_pause); // don't add pause for 100s,  6th, etc.
 	}

--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -289,88 +289,13 @@ static const short wchar_toupper[] = {
 	0, 0
 };

 // use internal data for iswalpha up to U+024F
 // iswalpha() on Windows is unreliable  (U+AA, U+BA).
 int iswalpha2(int c)
 {
 	if (c < 0x80)
 		return isalpha(c);
 	if ((c > 0x3040) && (c <= 0xa700))
 		return 1; // japanese, chinese characters
 	if (c > MAX_WALPHA)
 		return iswalpha(c);
 	return walpha_tab[c-0x80];
 }

 int iswlower2(int c)
 {
 	if (c < 0x80)
 		return islower(c);
 	if (c > MAX_WALPHA)
 		return iswlower(c);
 	if (walpha_tab[c-0x80] == 0xff)
 		return 1;
 	return 0;
 }

 int iswupper2(int c)
 {
 	int x;
 	if (c < 0x80)
 		return isupper(c);
 	if (c > MAX_WALPHA)
 		return iswupper(c);
 	if (((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe))
 		return 1;
 	return 0;
 }

 int towlower2(unsigned int c)
 {
 	int x;
 	int ix;

 	// check for non-standard upper to lower case conversions
 	if (c == 'I') {
 		if (translator->langopts.dotless_i)
 			c = 0x131; // I -> ı
 	}

 	if (c < 0x80)
 		return tolower(c);
 	if (c == 'I' && translator->langopts.dotless_i)
 		return 0x131; // I -> ı

 	if (c > MAX_WALPHA)
 		return towlower(c);

 	if ((x = walpha_tab[c-0x80]) >= 0xfe)
 		return c; // this is not an upper case letter

 	if (x == 0xfd) {
 		// special cases, lookup translation table
 		for (ix = 0; wchar_tolower[ix] != 0; ix += 2) {
 			if (wchar_tolower[ix] == (int)c)
 				return wchar_tolower[ix+1];
 		}
 	}
 	return c + x; // convert to lower case
 }

 int towupper2(unsigned int c)
 {
 	int ix;
 	if (c > MAX_WALPHA)
 		return towupper(c);

 	// check whether a previous character code is the upper-case equivalent of this character
 	if (towlower2(c-32) == (int)c)
 		return c-32; // yes, use it
 	if (towlower2(c-1) == (int)c)
 		return c-1;
 	for (ix = 0; wchar_toupper[ix] != 0; ix += 2) {
 		if (wchar_toupper[ix] == (int)c)
 			return wchar_toupper[ix+1];
 	}
 	return c; // no
 	return towlower(c);
 }

 static int IsRomanU(unsigned int c)
@@ -2015,7 +1940,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 					while (!Eof() && (c1 != '>'))
 						c1 = GetC();
 					c2 = ' ';
 				} else if ((c2 == '/') || iswalpha2(c2)) {
 				} else if ((c2 == '/') || iswalpha(c2)) {
 					// check for space in the output buffer for embedded commands produced by the SSML tag
 					if (ix > (n_buf - 20)) {
 						// Perhaps not enough room, end the clause before the SSML tag
@@ -2167,9 +2092,9 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 			}
 		}

 		if (iswupper2(c1)) {
 		if (iswupper(c1)) {
 			tr->clause_upper_count++;
 			if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev)) {
 			if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev)) {
 				char text_buf[40];
 				char text_buf2[30];
 				if (LookupSpecial(tr, "_cap", text_buf2) != NULL) {
@@ -2181,7 +2106,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 					}
 				}
 			}
 		} else if (iswalpha2(c1))
 		} else if (iswalpha(c1))
 			tr->clause_lower_count++;

 		if (option_phoneme_input) {
@@ -2238,7 +2163,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 				// i.e. is dot followed by an upper-case letter?

 				if (!iswspace(c1)) {
 					if (!IsAlpha(c1) || !iswlower2(c1)) {
 					if (!IsAlpha(c1) || !iswlower(c1)) {
 						UngetC(c2);
 						ungot_char2 = c1;
 						buf[end_clause_index] = ' '; // delete the end-clause punctuation
@@ -2320,7 +2245,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 					punct_data |= CLAUSE_DOT;

 				if (nl_count == 0) {
 					if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next)))) {
 					if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) {
 						// lang=hu, fix for ordinal numbers, eg:  "december 2., szerda", ignore ',' after ordinal number
 						c1 = CHAR_COMMA_BREAK;
 						is_end_clause = 0;
@@ -2332,11 +2257,11 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
 							// dot after a number indicates an ordinal number
 							if (!iswdigit(cprev))
 								is_end_clause = 0; // Roman number followed by dot
 							else if (iswlower2(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
 							else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
 								is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag)
 						} else if (c_next == '\'')
 							is_end_clause = 0;    // eg. u.s.a.'s
 						if (iswlower2(c_next)) {
 						if (iswlower(c_next)) {
 							// next word has no capital letter, this dot is probably from an abbreviation
 							is_end_clause = 0;
 						}
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -363,7 +363,7 @@ int IsAlpha(unsigned int c)
 		0
 	};

 	if (iswalpha2(c))
 	if (iswalpha(c))
 		return 1;

 	if (c < 0x300)
@@ -610,7 +610,7 @@ int IsAllUpper(const char *word)
 	int c;
 	while ((*word != 0) && !isspace2(*word)) {
 		word += utf8_in(&c, word);
 		if (!iswupper2(c))
 		if (!iswupper(c))
 			return 0;
 	}
 	return 1;
@@ -904,7 +904,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
 			}
 		}

 		if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha2(first_char)) {
 		if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
 			if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
 				// emphasize words which are in capitals
 				emphasize_allcaps = FLAG_EMPHASIZED;
@@ -1331,7 +1331,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
 			tr->expect_past--;
 	}

 	if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha2(first_char) && (first_char != 'i')) {
 	if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
 		// English Specific !!!!
 		// any single letter before a dot is an abbreviation, except 'I'
 		dictionary_flags[0] |= FLAG_ALLOW_DOT;
@@ -1587,7 +1587,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
 			while (*p2 != ' ') p2++;

 			utf8_in(&c_word2, p2+1); // first character of the next word;
 			if (!iswalpha2(c_word2))
 			if (!iswalpha(c_word2))
 				ok = 0;

 			if (ok != 0) {
@@ -1957,7 +1957,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,

 	// there is a list of character codes to be substituted with alternative codes

 	if (iswupper2(c_lower = c)) {
 	if (iswupper(c_lower = c)) {
 		c_lower = towlower2(c);
 		upper_case = 1;
 	}
@@ -1984,14 +1984,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,
 		// there is a second character to be inserted
 		// don't convert the case of the second character unless the next letter is also upper case
 		c2 = new_c >> 16;
 		if (upper_case && iswupper2(next_in))
 			c2 = towupper2(c2);
 		if (upper_case && iswupper(next_in))
 			c2 = towupper(c2);
 		*insert = c2;
 		new_c &= 0xffff;
 	}

 	if (upper_case)
 		new_c = towupper2(new_c);
 		new_c = towupper(new_c);

 	*wordflags |= FLAG_CHAR_REPLACED;
 	return new_c;
@@ -2046,7 +2046,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c,
 	case L('n', 'l'):
 		// look for 'n  and replace by a special character (unicode: schwa)

 		if (!iswalpha2(prev_in)) {
 		if (!iswalpha(prev_in)) {
 			utf8_in(&next2, &ptr[1]);

 			if ((c == '\'') && IsSpace(next2)) {
@@ -2400,7 +2400,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 						if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - /  (hyphenated words, URLs, etc)
 							next_word_flags |= FLAG_NOSPACE;
 					} else {
 						if (iswupper2(c))
 						if (iswupper(c))
 							word_flags |= FLAG_FIRST_UPPER;

 						if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) {
@@ -2430,7 +2430,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 					}
 				}

 				if (iswupper2(c)) {
 				if (iswupper(c)) {
 					c = towlower2(c);

 					if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) {
@@ -2440,7 +2440,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 							syllable_marked = 1;
 						}
 					} else {
 						if (iswlower2(prev_in)) {
 						if (iswlower(prev_in)) {
 							// lower case followed by upper case in a word
 							if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) {
 								// convert to lower case and continue
@@ -2450,7 +2450,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t
 								space_inserted = 1;
 								prev_in_save = c;
 							}
 						} else if ((c != ' ') && iswupper2(prev_in) && iswlower2(next_in)) {
 						} else if ((c != ' ') && iswupper(prev_in) && iswlower(next_in)) {
 							int next2_in;
 							utf8_in(&next2_in, &source[source_index + next_in_nbytes]);

--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -724,12 +724,8 @@ int IsDigit09(unsigned int c);
 int IsAlpha(unsigned int c);
 int IsVowel(Translator *tr, int c);
 int IsSuperscript(int letter);
 int iswalpha2(int c);
 int isspace2(unsigned int c);
 int iswlower2(int c);
 int iswupper2(int c);
 int towlower2(unsigned int c);
 int towupper2(unsigned int c);
 int towlower2(unsigned int c); // Supports Turkish I
 const char *GetTranslatedPhonemeString(int phoneme_mode);
 const char *WordToString2(unsigned int word);
 ALPHABET *AlphabetFromChar(int c);