7 years ago · 55c64036e0
--- a/src/libespeak-ng/compiledict.c
+++ b/src/libespeak-ng/compiledict.c
@@ -1369,6 +1369,7 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 			if (compile_mode == 2) {
 				// end of the character replacements section
 				fwrite(&n_rules, 1, 4, f_out); // write a zero word to terminate the replacemenmt list
 				fputc(RULE_GROUP_END, f_out);
 				compile_mode = 0;
 			}

@@ -1447,33 +1448,23 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 			}
 			break;
 		case 2: //  .replace
 		{
 			int replace1;
 			int replace2;
 			char *p;
 			p = (unsigned char *)buf;

 			p = buf;
 			replace1 = 0;
 			replace2 = 0;
 			while (isspace2(*p)) p++;
 			ix = 0;
 			while ((unsigned char)(*p) > 0x20) { // not space or zero-byte
 				p += utf8_in(&c, p);
 				replace1 += (c << ix);
 				ix += 16;
 			}
 			while (isspace2(*p)) p++;
 			ix = 0;
 			while ((unsigned char)(*p) > 0x20) {
 				p += utf8_in(&c, p);
 				replace2 += (c << ix);
 				ix += 16;
 			}
 			if (replace1 != 0) {
 				Write4Bytes(f_out, replace1); // write as little-endian
 				Write4Bytes(f_out, replace2); // if big-endian, reverse the bytes in LoadDictionary()
 			if ((unsigned char)(*p) > 0x20) {
 				while ((unsigned char)(*p) > 0x20) { // not space or zero-byte
 					fputc(*p, f_out);
 					p++;
 				}
 				fputc(0, f_out);

 				while (isspace2(*p)) p++;
 				while ((unsigned char)(*p) > 0x20) {
 					fputc(*p, f_out);
 					p++;
 				}
 				fputc(0, f_out);
 			}
 		}
 			break;
 		}
 	}
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -149,20 +149,11 @@ static void InitGroups(Translator *tr)

 		if (p[0] == RULE_REPLACEMENTS) {
 			p = (char *)(((intptr_t)p+4) & ~3); // advance to next word boundary
 			tr->langopts.replace_chars = (unsigned int *)p;
 			tr->langopts.replace_chars = (unsigned char *)p;
 			while (*(unsigned int *)p != 0)
 				p += 8; // find the end of the replacement list, each entry is 2 words.
 			p += 4;

 #ifdef ARCH_BIG
 			pw = (unsigned int *)(tr->langopts.replace_chars);
 			while (*pw != 0) {
 				*pw = Reverse4Bytes(*pw);
 				pw++;
 				*pw = Reverse4Bytes(*pw);
 				pw++;
 			}
 #endif
 				p++;
 			while (*p != RULE_GROUP_END) p++;
 			p++;
 			continue;
 		}

--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -1790,15 +1790,38 @@ static int EmbeddedCommand(unsigned int *source_index_out)
 	return 1;
 }

 static const char *
 FindReplacementChars(Translator *tr, unsigned int c, unsigned int nextc, bool *ignore_next) {
 	unsigned int uc = 0;
 	const char *from = (const char *)tr->langopts.replace_chars;
 	while (*(unsigned int *)from != 0) {
 		from += utf8_in((int *)&uc, from);
 		if (c == uc) {
 			if (*from == 0) return from + 1;
 			from += utf8_in((int *)&uc, from);
 			if (*from == 0 && uc == (unsigned int)towlower2(nextc, tr)) {
 				*ignore_next = true;
 				return from + 1;
 			}
 		}

 		// replacement 'from' string (skip the remaining part, if any)
 		while (*from != '\0') from++;
 		from++;

 		// replacement 'to' string
 		while (*from != '\0') from++;
 		from++;
 	}
 	return NULL;
 }

 // handle .replace rule in xx_rules file
 static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
 {
 	int ix;
 	unsigned int word;
 	unsigned int new_c, c2 = ' ', c_lower;
 	int upper_case = 0;
 	static bool ignore_next = false;
 	const unsigned int *replace_chars;

 	if (ignore_next) {
 		ignore_next = false;
@@ -1806,7 +1829,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,
 	}
 	if (c == 0) return 0;

 	if ((replace_chars = tr->langopts.replace_chars) == NULL)
 	if (tr->langopts.replace_chars == NULL)
 		return c;

 	// there is a list of character codes to be substituted with alternative codes
@@ -1816,32 +1839,18 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,
 		upper_case = 1;
 	}

 	new_c = 0;
 	for (ix = 0; (word = replace_chars[ix]) != 0; ix += 2) {
 		if (c_lower == (word & 0xffff)) {
 			if ((word >> 16) == 0) {
 				new_c = replace_chars[ix+1];
 				break;
 			}
 			if ((word >> 16) == (unsigned int)towlower2(next_in, tr)) {
 				new_c = replace_chars[ix+1];
 				ignore_next = true;
 				break;
 			}
 		}
 	}

 	if (new_c == 0)
 	const char *to = FindReplacementChars(tr, c_lower, next_in, &ignore_next);
 	if (to == NULL)
 		return c; // no substitution

 	if (new_c & 0xffe00000) {
 	to += utf8_in((int *)&new_c, to);
 	if (*to != 0) {
 		// there is a second character to be inserted
 		// don't convert the case of the second character unless the next letter is also upper case
 		c2 = new_c >> 16;
 		to += utf8_in((int *)&c2, to);
 		if (upper_case && iswupper(next_in))
 			c2 = ucd_toupper(c2);
 		*insert = c2;
 		new_c &= 0xffff;
 	}

 	if (upper_case)
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -556,7 +556,7 @@ typedef struct {
 	bool textmode;          // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled)
 	char dotless_i;         // uses letter U+0131
 	int listx;    // compile *_listx after *list
 	const unsigned int *replace_chars;      // characters to be substitutes
 	const unsigned char *replace_chars;      // characters to be substitutes
 	int our_alphabet;           // offset for main alphabet (if not set in letter_bits_offset)
 	int alt_alphabet;       // offset for another language to recognize
 	int alt_alphabet_lang;  // language for the alt_alphabet