2 years ago · 1e2404714d
--- a/src/libespeak-ng/compiledict.c
+++ b/src/libespeak-ng/compiledict.c
@@ -42,27 +42,6 @@
 #include "speech.h"		// for path_home
 #include "synthesize.h"           // for Write4Bytes

 static FILE *f_log = NULL;

 extern char word_phonemes[N_WORD_PHONEMES];    // a word translated into phoneme codes

 static int linenum;
 static int error_count;
 static bool text_mode = false;
 static int debug_flag = 0;
 static int error_need_dictionary = 0;

 // A hash chain is a linked-list of hash chain entry objects:
 //     struct hash_chain_entry {
 //         hash_chain_entry *next_entry;
 //         // dict_line output from compile_line:
 //         uint8_t length;
 //         char contents[length];
 //     };
 static char *hash_chains[N_HASH_DICT];

 static char letterGroupsDefined[N_LETTER_GROUPS];

 static const MNEM_TAB mnem_rules[] = {
 	{ "unpr",     DOLLAR_UNPR },
 	{ "noprefix", DOLLAR_NOPREFIX },  // rule fails if a prefix has been removed
@@ -171,6 +150,59 @@ typedef struct {
 	int group3_ix;
 } RGROUP;

 typedef enum
 {
 	LINE_PARSER_WORD = 0,
 	LINE_PARSER_END_OF_WORD = 1,
 	LINE_PARSER_MULTIPLE_WORDS = 2,
 	LINE_PARSER_END_OF_WORDS = 3,
 	LINE_PARSER_PRONUNCIATION = 4,
 	LINE_PARSER_END_OF_PRONUNCIATION = 5,
 } LINE_PARSER_STATES;

 typedef struct {
 	FILE *f_log;

 	char word_phonemes[N_WORD_PHONEMES];    // a word translated into phoneme codes

 	int linenum;
 	int error_count;
 	bool text_mode;
 	int debug_flag;
 	int error_need_dictionary;

 	// A hash chain is a linked-list of hash chain entry objects:
 	//     struct hash_chain_entry {
 	//         hash_chain_entry *next_entry;
 	//         // dict_line output from compile_line:
 	//         uint8_t length;
 	//         char contents[length];
 	//     };
 	char *hash_chains[N_HASH_DICT];

 	char letterGroupsDefined[N_LETTER_GROUPS];

 	char rule_cond[80];
 	char rule_pre[80];
 	char rule_post[80];
 	char rule_match[80];
 	char rule_phonemes[80];
 	char group_name[LEN_GROUP_NAME+1];
 	int group3_ix;
 } CompileContext;

 static void clean_context(CompileContext *ctx) {
 	for (int i = 0; i < N_HASH_DICT; i++) {
 		char *p;
 		while ((p = ctx->hash_chains[i])) {
 			memcpy(&p, ctx->hash_chains[i], sizeof(char*));
 			free(ctx->hash_chains[i]);
 			ctx->hash_chains[i] = p;
 		}
 	}
 	free(ctx);
 }

 void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len)
 {
 	int stress;
@@ -199,7 +231,7 @@ void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len)
 	}
 }

 char *DecodeRule(const char *group_chars, int group_length, char *rule, int control)
 char *DecodeRule(const char *group_chars, int group_length, char *rule, int control, char *output)
 {
 	// Convert compiled match template to ascii

@@ -220,9 +252,6 @@ char *DecodeRule(const char *group_chars, int group_length, char *rule, int cont
 	char buf[200];
 	char buf_pre[200];
 	char suffix[20];
 	static char output[80];

 	MAKE_MEM_UNDEFINED(&output, sizeof(output));

 	static const char symbols[] = {
 		' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
@@ -361,17 +390,7 @@ char *DecodeRule(const char *group_chars, int group_length, char *rule, int cont
 	return output;
 }

 typedef enum
 {
 	LINE_PARSER_WORD = 0,
 	LINE_PARSER_END_OF_WORD = 1,
 	LINE_PARSER_MULTIPLE_WORDS = 2,
 	LINE_PARSER_END_OF_WORDS = 3,
 	LINE_PARSER_PRONUNCIATION = 4,
 	LINE_PARSER_END_OF_PRONUNCIATION = 5,
 } LINE_PARSER_STATES;

 static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *hash)
 static int compile_line(CompileContext *ctx, char *linebuf, char *dict_line, int n_dict_line, int *hash)
 {
 	// Compile a line in the language_list file
 	unsigned char c;
@@ -401,9 +420,9 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 	char encoded_ph[200];
 	char bad_phoneme_str[4];
 	int bad_phoneme;
 	static char nullstring[] = { 0 };
 	static const char nullstring[] = { 0 };

 	phonetic = word = nullstring;
 	phonetic = word = (char*)nullstring;

 	p = linebuf;

@@ -446,16 +465,16 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 			flagnum = LookupMnem(mnem_flags, mnemptr);
 			if (flagnum > 0) {
 				if (flagnum == 200)
 					text_mode = true;
 					ctx->text_mode = true;
 				else if (flagnum == 201)
 					text_mode = false;
 					ctx->text_mode = false;
 				else if (flagnum == BITNUM_FLAG_TEXTMODE)
 					text_not_phonemes = true;
 				else
 					flag_codes[n_flag_codes++] = flagnum;
 			} else {
 				fprintf(f_log, "%5d: Unknown keyword: %s\n", linenum, mnemptr);
 				error_count++;
 				fprintf(ctx->f_log, "%5d: Unknown keyword: %s\n", ctx->linenum, mnemptr);
 				ctx->error_count++;
 			}
 		}

@@ -495,8 +514,8 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 					multiple_words = 0;
 					step = LINE_PARSER_END_OF_WORDS;
 				} else if (word[0] != '_') {
 					fprintf(f_log, "%5d: Missing '('\n", linenum);
 					error_count++;
 					fprintf(ctx->f_log, "%5d: Missing '('\n", ctx->linenum);
 					ctx->error_count++;
 					step = LINE_PARSER_END_OF_WORDS;
 				}
 			}
@@ -536,7 +555,7 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 	if (word[0] == 0)
 		return 0; // blank line

 	if (text_mode)
 	if (ctx->text_mode)
 		text_not_phonemes = true;

 	if (text_not_phonemes) {
@@ -548,12 +567,12 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 			// condition rules are not applied
 			TranslateWord(translator, phonetic, NULL, NULL);
 			text_not_phonemes = false;
 			strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4);
 			strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4);

 			if ((word_phonemes[0] == 0) && (error_need_dictionary < 3)) {
 			if ((ctx->word_phonemes[0] == 0) && (ctx->error_need_dictionary < 3)) {
 				// the dictionary was not loaded, we need a second attempt
 				error_need_dictionary++;
 				fprintf(f_log, "%5d: Need to compile dictionary again\n", linenum);
 				ctx->error_need_dictionary++;
 				fprintf(ctx->f_log, "%5d: Need to compile dictionary again\n", ctx->linenum);
 			}
 		} else
 			// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word
@@ -567,8 +586,8 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 		if (bad_phoneme != 0) {
 			// unrecognised phoneme, report error
 			bad_phoneme_str[utf8_out(bad_phoneme, bad_phoneme_str)] = 0;
 			fprintf(f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s  %s\n", linenum, bad_phoneme_str, bad_phoneme, word, phonetic);
 			error_count++;
 			fprintf(ctx->f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s  %s\n", ctx->linenum, bad_phoneme_str, bad_phoneme, word, phonetic);
 			ctx->error_count++;
 		}
 	}

@@ -622,8 +641,8 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 		if (length < n_dict_line) {
 			strcpy(&dict_line[(len_word)+2], encoded_ph);
 		} else {
 			fprintf(f_log, "%5d: Dictionary line length would overflow the data buffer: %d\n", linenum, length);
 			error_count++;
 			fprintf(ctx->f_log, "%5d: Dictionary line length would overflow the data buffer: %d\n", ctx->linenum, length);
 			ctx->error_count++;
 			// no phonemes specified. set bit 7
 			dict_line[1] |= 0x80;
 			length = len_word + 2;
@@ -636,8 +655,8 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha

 	if ((multiple_string != NULL) && (multiple_words > 0)) {
 		if (multiple_words > 10) {
 			fprintf(f_log, "%5d: Two many parts in a multi-word entry: %d\n", linenum, multiple_words);
 			error_count++;
 			fprintf(ctx->f_log, "%5d: Two many parts in a multi-word entry: %d\n", ctx->linenum, multiple_words);
 			ctx->error_count++;
 		} else {
 			dict_line[length++] = 80 + multiple_words;
 			ix = multiple_string_end - multiple_string;
@@ -652,7 +671,7 @@ static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *ha
 	return length;
 }

 static void compile_dictlist_start(void)
 static void compile_dictlist_start(CompileContext *ctx)
 {
 	// initialise dictionary list
 	int ix;
@@ -660,17 +679,17 @@ static void compile_dictlist_start(void)
 	char *p2;

 	for (ix = 0; ix < N_HASH_DICT; ix++) {
 		p = hash_chains[ix];
 		p = ctx->hash_chains[ix];
 		while (p != NULL) {
 			memcpy(&p2, p, sizeof(char *));
 			free(p);
 			p = p2;
 		}
 		hash_chains[ix] = NULL;
 		ctx->hash_chains[ix] = NULL;
 	}
 }

 static void compile_dictlist_end(FILE *f_out)
 static void compile_dictlist_end(CompileContext *ctx, FILE *f_out)
 {
 	// Write out the compiled dictionary list
 	int hash;
@@ -678,7 +697,7 @@ static void compile_dictlist_end(FILE *f_out)
 	char *p;

 	for (hash = 0; hash < N_HASH_DICT; hash++) {
 		p = hash_chains[hash];
 		p = ctx->hash_chains[hash];

 		while (p != NULL) {
 			length = *(uint8_t *)(p+sizeof(char *));
@@ -689,7 +708,7 @@ static void compile_dictlist_end(FILE *f_out)
 	}
 }

 static int compile_dictlist_file(const char *path, const char *filename)
 static int compile_dictlist_file(CompileContext *ctx, const char *path, const char *filename)
 {
 	int length;
 	int hash;
@@ -700,7 +719,7 @@ static int compile_dictlist_file(const char *path, const char *filename)
 	char fname[sizeof(path_home)+45];
 	char dict_line[256]; // length is uint8_t, so an entry can't take up more than 256 bytes

 	text_mode = false;
 	ctx->text_mode = false;

 	// try with and without '.txt' extension
 	sprintf(fname, "%s%s.txt", path, filename);
@@ -710,47 +729,39 @@ static int compile_dictlist_file(const char *path, const char *filename)
 			return -1;
 	}

 	if (f_log != NULL)
 		fprintf(f_log, "Compiling: '%s'\n", fname);
 	if (ctx->f_log != NULL)
 		fprintf(ctx->f_log, "Compiling: '%s'\n", fname);

 	linenum = 0;
 	ctx->linenum = 0;

 	while (fgets(buf, sizeof(buf), f_in) != NULL) {
 		linenum++;
 		ctx->linenum++;

 		length = compile_line(buf, dict_line, sizeof(dict_line), &hash);
 		length = compile_line(ctx, buf, dict_line, sizeof(dict_line), &hash);
 		if (length == 0)  continue; // blank line

 		p = (char *)malloc(length+sizeof(char *));
 		if (p == NULL) {
 			if (f_log != NULL) {
 				fprintf(f_log, "Can't allocate memory\n");
 				error_count++;
 			if (ctx->f_log != NULL) {
 				fprintf(ctx->f_log, "Can't allocate memory\n");
 				ctx->error_count++;
 			}
 			break;
 		}

 		memcpy(p, &hash_chains[hash], sizeof(char *));
 		hash_chains[hash] = p;
 		memcpy(p, &ctx->hash_chains[hash], sizeof(char *));
 		ctx->hash_chains[hash] = p;
 		// NOTE: dict_line[0] is the entry length (0-255)
 		memcpy(p+sizeof(char *), dict_line, length);
 		count++;
 	}

 	if (f_log != NULL)
 		fprintf(f_log, "\t%d entries\n", count);
 	if (ctx->f_log != NULL)
 		fprintf(ctx->f_log, "\t%d entries\n", count);
 	fclose(f_in);
 	return 0;
 }

 static char rule_cond[80];
 static char rule_pre[80];
 static char rule_post[80];
 static char rule_match[80];
 static char rule_phonemes[80];
 static char group_name[LEN_GROUP_NAME+1];
 static int group3_ix;

 #define N_RULES 3000 // max rules for each group

 static int isHexDigit(int c)
@@ -764,10 +775,10 @@ static int isHexDigit(int c)
 	return -1;
 }

 static void copy_rule_string(char *string, int *state_out)
 static void copy_rule_string(CompileContext *ctx, char *string, int *state_out)
 {
 	// state 0: conditional, 1=pre, 2=match, 3=post, 4=phonemes
 	static char * const outbuf[5] = { rule_cond, rule_pre, rule_match, rule_post, rule_phonemes };
 	char * const outbuf[5] = { ctx->rule_cond, ctx->rule_pre, ctx->rule_match, ctx->rule_post, ctx->rule_phonemes };
 	static const int next_state[5] = { 2, 2, 4, 4, 4 };
 	char *output;
 	char *p;
@@ -787,10 +798,10 @@ static void copy_rule_string(char *string, int *state_out)
 	output = outbuf[state];
 	if (state == 4) {
 		// append to any previous phoneme string, i.e. allow spaces in the phoneme string
 		len = strlen(rule_phonemes);
 		len = strlen(ctx->rule_phonemes);
 		if (len > 0)
 			rule_phonemes[len++] = ' ';
 		output = &rule_phonemes[len];
 			ctx->rule_phonemes[len++] = ' ';
 		output = &ctx->rule_phonemes[len];
 	}
 	sxflags = 0x808000; // to ensure non-zero bytes

@@ -903,11 +914,11 @@ static void copy_rule_string(char *string, int *state_out)
 					c = c * 10 + value;
 					if ((value < 0) || (value > 9)) {
 						c = 0;
 						fprintf(f_log, "%5d: Expected 2 digits after 'L'\n", linenum);
 						error_count++;
 					} else if ((c <= 0) || (c >= N_LETTER_GROUPS) || (letterGroupsDefined[(int)c] == 0)) {
 						fprintf(f_log, "%5d: Letter group L%.2d not defined\n", linenum, c);
 						error_count++;
 						fprintf(ctx->f_log, "%5d: Expected 2 digits after 'L'\n", ctx->linenum);
 						ctx->error_count++;
 					} else if ((c <= 0) || (c >= N_LETTER_GROUPS) || (ctx->letterGroupsDefined[(int)c] == 0)) {
 						fprintf(ctx->f_log, "%5d: Letter group L%.2d not defined\n", ctx->linenum, c);
 						ctx->error_count++;
 					}
 					c += 'A';
 					if (state == 1) {
@@ -940,8 +951,8 @@ static void copy_rule_string(char *string, int *state_out)
 					}

 					if (value == 0) {
 						fprintf(f_log, "%5d: $ command not recognized\n", linenum);
 						error_count++;
 						fprintf(ctx->f_log, "%5d: $ command not recognized\n", ctx->linenum);
 						ctx->error_count++;
 					}
 					break;
 				case 'P': // Prefix
@@ -1007,7 +1018,7 @@ static void copy_rule_string(char *string, int *state_out)
 	*state_out = next_state[state];
 }

 static char *compile_rule(char *input)
 static char *compile_rule(CompileContext *ctx, char *input)
 {
 	int ix;
 	unsigned char c;
@@ -1025,11 +1036,11 @@ static char *compile_rule(char *input)
 	char bad_phoneme_str[4];

 	buf[0] = 0;
 	rule_cond[0] = 0;
 	rule_pre[0] = 0;
 	rule_post[0] = 0;
 	rule_match[0] = 0;
 	rule_phonemes[0] = 0;
 	ctx->rule_cond[0] = 0;
 	ctx->rule_pre[0] = 0;
 	ctx->rule_post[0] = 0;
 	ctx->rule_match[0] = 0;
 	ctx->rule_phonemes[0] = 0;

 	p = buf;

@@ -1039,31 +1050,31 @@ static char *compile_rule(char *input)
 		case ')': // end of prefix section
 			*p = 0;
 			state = 1;
 			copy_rule_string(buf, &state);
 			copy_rule_string(ctx, buf, &state);
 			p = buf;
 			break;
 		case '(': // start of suffix section
 			*p = 0;
 			state = 2;
 			copy_rule_string(buf, &state);
 			copy_rule_string(ctx, buf, &state);
 			state = 3;
 			p = buf;
 			if (input[ix+1] == ' ') {
 				fprintf(f_log, "%5d: Syntax error. Space after (, or negative score for previous rule\n", linenum);
 				error_count++;
 				fprintf(ctx->f_log, "%5d: Syntax error. Space after (, or negative score for previous rule\n", ctx->linenum);
 				ctx->error_count++;
 			}
 			break;
 		case '\n': // end of line
 		case '\r':
 		case 0:    // end of line
 			*p = 0;
 			copy_rule_string(buf, &state);
 			copy_rule_string(ctx, buf, &state);
 			finish = true;
 			break;
 		case '\t': // end of section section
 		case ' ':
 			*p = 0;
 			copy_rule_string(buf, &state);
 			copy_rule_string(ctx, buf, &state);
 			p = buf;
 			break;
 		case '?':
@@ -1078,66 +1089,66 @@ static char *compile_rule(char *input)
 		}
 	}

 	if (strcmp(rule_match, "$group") == 0)
 		strcpy(rule_match, group_name);
 	if (strcmp(ctx->rule_match, "$group") == 0)
 		strcpy(ctx->rule_match, ctx->group_name);

 	if (rule_match[0] == 0) {
 		if (rule_post[0] != 0) {
 			fprintf(f_log, "%5d: Syntax error\n", linenum);
 			error_count++;
 	if (ctx->rule_match[0] == 0) {
 		if (ctx->rule_post[0] != 0) {
 			fprintf(ctx->f_log, "%5d: Syntax error\n", ctx->linenum);
 			ctx->error_count++;
 		}
 		return NULL;
 	}

 	EncodePhonemes(rule_phonemes, buf, &bad_phoneme);
 	EncodePhonemes(ctx->rule_phonemes, buf, &bad_phoneme);
 	if (bad_phoneme != 0) {
 		bad_phoneme_str[utf8_out(bad_phoneme, bad_phoneme_str)] = 0;
 		fprintf(f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s\n", linenum, bad_phoneme_str, bad_phoneme, input);
 		error_count++;
 		fprintf(ctx->f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s\n", ctx->linenum, bad_phoneme_str, bad_phoneme, input);
 		ctx->error_count++;
 	}
 	strcpy(output, buf);
 	len = strlen(buf)+1;

 	len_name = strlen(group_name);
 	if ((len_name > 0) && (memcmp(rule_match, group_name, len_name) != 0)) {
 		utf8_in(&wc, rule_match);
 		if ((group_name[0] == '9') && IsDigit(wc)) {
 	len_name = strlen(ctx->group_name);
 	if ((len_name > 0) && (memcmp(ctx->rule_match, ctx->group_name, len_name) != 0)) {
 		utf8_in(&wc, ctx->rule_match);
 		if ((ctx->group_name[0] == '9') && IsDigit(wc)) {
 			// numeric group, rule_match starts with a digit, so OK
 		} else {
 			fprintf(f_log, "%5d: Wrong initial letters '%s' for group '%s'\n", linenum, rule_match, group_name);
 			error_count++;
 			fprintf(ctx->f_log, "%5d: Wrong initial letters '%s' for group '%s'\n", ctx->linenum, ctx->rule_match, ctx->group_name);
 			ctx->error_count++;
 		}
 	}
 	strcpy(&output[len], rule_match);
 	len += strlen(rule_match);
 	strcpy(&output[len], ctx->rule_match);
 	len += strlen(ctx->rule_match);

 	if (debug_flag) {
 	if (ctx->debug_flag) {
 		output[len] = RULE_LINENUM;
 		output[len+1] = (linenum % 255) + 1;
 		output[len+2] = (linenum / 255) + 1;
 		output[len+1] = (ctx->linenum % 255) + 1;
 		output[len+2] = (ctx->linenum / 255) + 1;
 		len += 3;
 	}

 	if (rule_cond[0] != 0) {
 		if (rule_cond[0] == '!') {
 	if (ctx->rule_cond[0] != 0) {
 		if (ctx->rule_cond[0] == '!') {
 			// allow the rule only if the condition number is NOT set for the voice
 			ix = atoi(&rule_cond[1]) + 32;
 			ix = atoi(&ctx->rule_cond[1]) + 32;
 		} else {
 			// allow the rule only if the condition number is set for the voice
 			ix = atoi(rule_cond);
 			ix = atoi(ctx->rule_cond);
 		}

 		if ((ix > 0) && (ix < 255)) {
 			output[len++] = RULE_CONDITION;
 			output[len++] = ix;
 		} else {
 			fprintf(f_log, "%5d: bad condition number ?%d\n", linenum, ix);
 			error_count++;
 			fprintf(ctx->f_log, "%5d: bad condition number ?%d\n", ctx->linenum, ix);
 			ctx->error_count++;
 		}
 	}
 	if (rule_pre[0] != 0) {
 	if (ctx->rule_pre[0] != 0) {
 		start = 0;
 		if (rule_pre[0] == RULE_SPACE) {
 		if (ctx->rule_pre[0] == RULE_SPACE) {
 			// omit '_' at the beginning of the pre-string and imply it by using RULE_PRE_ATSTART
 			c = RULE_PRE_ATSTART;
 			start = 1;
@@ -1146,13 +1157,13 @@ static char *compile_rule(char *input)
 		output[len++] = c;

 		// output PRE string in reverse order
 		for (ix = strlen(rule_pre)-1; ix >= start; ix--)
 			output[len++] = rule_pre[ix];
 		for (ix = strlen(ctx->rule_pre)-1; ix >= start; ix--)
 			output[len++] = ctx->rule_pre[ix];
 	}

 	if (rule_post[0] != 0) {
 		sprintf(&output[len], "%c%s", RULE_POST, rule_post);
 		len += (strlen(rule_post)+1);
 	if (ctx->rule_post[0] != 0) {
 		sprintf(&output[len], "%c%s", RULE_POST, ctx->rule_post);
 		len += (strlen(ctx->rule_post)+1);
 	}
 	output[len++] = 0;
 	if ((prule = (char *)malloc(len)) != NULL)
@@ -1230,7 +1241,7 @@ static void output_rule_group(FILE *f_out, int n_rules, char **rules, char *name
 	}
 }

 static int compile_lettergroup(char *input, FILE *f_out)
 static int compile_lettergroup(CompileContext *ctx, char *input, FILE *f_out)
 {
 	char *p;
 	char *p_start;
@@ -1246,15 +1257,15 @@ static int compile_lettergroup(char *input, FILE *f_out)

 	p = input;
 	if (!IsDigit09(p[0]) || !IsDigit09(p[1])) {
 		fprintf(f_log, "%5d: Expected 2 digits after '.L'\n", linenum);
 		error_count++;
 		fprintf(ctx->f_log, "%5d: Expected 2 digits after '.L'\n", ctx->linenum);
 		ctx->error_count++;
 		return 1;
 	}

 	group = atoi(&p[0]);
 	if (group >= N_LETTER_GROUPS) {
 		fprintf(f_log, "%5d: lettergroup out of range (01-%.2d)\n", linenum, N_LETTER_GROUPS-1);
 		error_count++;
 		fprintf(ctx->f_log, "%5d: lettergroup out of range (01-%.2d)\n", ctx->linenum, N_LETTER_GROUPS-1);
 		ctx->error_count++;
 		return 1;
 	}

@@ -1263,11 +1274,11 @@ static int compile_lettergroup(char *input, FILE *f_out)
 	fputc(RULE_GROUP_START, f_out);
 	fputc(RULE_LETTERGP2, f_out);
 	fputc(group + 'A', f_out);
 	if (letterGroupsDefined[group] != 0) {
 		fprintf(f_log, "%5d: lettergroup L%.2d is already defined\n", linenum, group);
 		error_count++;
 	if (ctx->letterGroupsDefined[group] != 0) {
 		fprintf(ctx->f_log, "%5d: lettergroup L%.2d is already defined\n", ctx->linenum, group);
 		ctx->error_count++;
 	}
 	letterGroupsDefined[group] = 1;
 	ctx->letterGroupsDefined[group] = 1;

 	n_items = 0;
 	while (n_items < N_LETTERGP_ITEMS) {
@@ -1309,7 +1320,7 @@ static void free_rules(char **rules, int n_rules)
 	}
 }

 static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp, espeak_ng_ERROR_CONTEXT *context)
 static espeak_ng_STATUS compile_dictrules(CompileContext *ctx, FILE *f_in, FILE *f_out, char *fname_temp, espeak_ng_ERROR_CONTEXT *context)
 {
 	char *prule;
 	unsigned char *p;
@@ -1333,14 +1344,14 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 	int n_groups3 = 0;
 	RGROUP rgroup[N_RULE_GROUP2];

 	linenum = 0;
 	group_name[0] = 0;
 	ctx->linenum = 0;
 	ctx->group_name[0] = 0;

 	if ((f_temp = fopen(fname_temp, "wb")) == NULL)
 		return create_file_error_context(context, errno, fname_temp);

 	for (;;) {
 		linenum++;
 		ctx->linenum++;
 		buf = fgets(buf1, sizeof(buf1), f_in);
 		if (buf != NULL) {
 			if ((p = (unsigned char *)strstr(buf, "//")) != NULL)
@@ -1353,10 +1364,10 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 			// next .group or end of file, write out the previous group

 			if (n_rules > 0) {
 				strcpy(rgroup[n_rgroups].name, group_name);
 				rgroup[n_rgroups].group3_ix = group3_ix;
 				strcpy(rgroup[n_rgroups].name, ctx->group_name);
 				rgroup[n_rgroups].group3_ix = ctx->group3_ix;
 				rgroup[n_rgroups].start = ftell(f_temp);
 				output_rule_group(f_temp, n_rules, rules, group_name);
 				output_rule_group(f_temp, n_rules, rules, ctx->group_name);
 				rgroup[n_rgroups].length = ftell(f_temp) - rgroup[n_rgroups].start;
 				n_rgroups++;

@@ -1376,7 +1387,7 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 			if (buf == NULL) break; // end of file

 			if (memcmp(buf, ".L", 2) == 0) {
 				compile_lettergroup(&buf[2], f_out);
 				compile_lettergroup(ctx, &buf[2], f_out);
 				continue;
 			}

@@ -1397,13 +1408,13 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 				while ((p[0] == ' ') || (p[0] == '\t')) p++; // Note: Windows isspace(0xe1) gives TRUE !
 				ix = 0;
 				while ((*p > ' ') && (ix < LEN_GROUP_NAME))
 					group_name[ix++] = *p++;
 				group_name[ix] = 0;
 				group3_ix = 0;
 					ctx->group_name[ix++] = *p++;
 				ctx->group_name[ix] = 0;
 				ctx->group3_ix = 0;

 				if (sscanf(group_name, "0x%x", &char_code) == 1) {
 				if (sscanf(ctx->group_name, "0x%x", &char_code) == 1) {
 					// group character is given as a character code (max 16 bits)
 					p = (unsigned char *)group_name;
 					p = (unsigned char *)ctx->group_name;

 					if (char_code > 0x100)
 						*p++ = (char_code >> 8);
@@ -1411,19 +1422,19 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 					*p = 0;
 				} else {
 					if (translator->letter_bits_offset > 0) {
 						utf8_in(&wc, group_name);
 						utf8_in(&wc, ctx->group_name);
 						if (((ix = (wc - translator->letter_bits_offset)) >= 0) && (ix < 128))
 							group3_ix = ix+1; // not zero
 							ctx->group3_ix = ix+1; // not zero
 					}
 				}

 				if ((group3_ix == 0) && (strlen(group_name) > 2)) {
 					if (utf8_in(&c, group_name) < 2) {
 						fprintf(f_log, "%5d: Group name longer than 2 bytes (UTF8)", linenum);
 						error_count++;
 				if ((ctx->group3_ix == 0) && (strlen(ctx->group_name) > 2)) {
 					if (utf8_in(&c, ctx->group_name) < 2) {
 						fprintf(ctx->f_log, "%5d: Group name longer than 2 bytes (UTF8)", ctx->linenum);
 						ctx->error_count++;
 					}

 					group_name[2] = 0;
 					ctx->group_name[2] = 0;
 				}
 			}

@@ -1433,14 +1444,14 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 		switch (compile_mode)
 		{
 		case 1: //  .group
 			prule = compile_rule(buf);
 			prule = compile_rule(ctx, buf);
 			if (prule != NULL) {
 				if (n_rules < N_RULES)
 					rules[n_rules++] = prule;
 				else {
 					if (err_n_rules == 0) {
 						fprintf(stderr, "\nExceeded limit of rules (%d) in group '%s'\n", N_RULES, group_name);
 						error_count++;
 						fprintf(stderr, "\nExceeded limit of rules (%d) in group '%s'\n", N_RULES, ctx->group_name);
 						ctx->error_count++;
 						err_n_rules = 1;
 					}
 				}
@@ -1508,7 +1519,7 @@ static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_t
 	fclose(f_temp);
 	remove(fname_temp);

 	fprintf(f_log, "\t%d rules, %d groups (%d)\n\n", count, n_rgroups, n_groups3);
 	fprintf(ctx->f_log, "\t%d rules, %d groups (%d)\n\n", count, n_rgroups, n_groups3);
 	free_rules(rules, n_rules);
 	return ENS_OK;
 }
@@ -1531,32 +1542,37 @@ ESPEAK_NG_API espeak_ng_STATUS espeak_ng_CompileDictionary(const char *dsource,
 	char fname_temp[sizeof(path_home)+15];
 	char path[sizeof(path_home)+40];       // path_dsource+20

 	error_count = 0;
 	error_need_dictionary = 0;
 	memset(letterGroupsDefined, 0, sizeof(letterGroupsDefined));
 	CompileContext *ctx = calloc(1, sizeof(CompileContext));

 	ctx->error_count = 0;
 	ctx->error_need_dictionary = 0;
 	memset(ctx->letterGroupsDefined, 0, sizeof(ctx->letterGroupsDefined));

 	debug_flag = flags & 1;
 	ctx->debug_flag = flags & 1;

 	if (dsource == NULL)
 		dsource = "";

 	f_log = log;
 	if (f_log == NULL)
 		f_log = stderr;
 	ctx->f_log = log;
 	if (ctx->f_log == NULL)
 		ctx->f_log = stderr;

 	// try with and without '.txt' extension
 	sprintf(path, "%s%s_", dsource, dict_name);
 	sprintf(fname_in, "%srules.txt", path);
 	if ((f_in = fopen(fname_in, "r")) == NULL) {
 		sprintf(fname_in, "%srules", path);
 		if ((f_in = fopen(fname_in, "r")) == NULL)
 		if ((f_in = fopen(fname_in, "r")) == NULL) {
 			clean_context(ctx);
 			return create_file_error_context(context, errno, fname_in);
 		}
 	}

 	sprintf(fname_out, "%s%c%s_dict", path_home, PATHSEP, dict_name);
 	if ((f_out = fopen(fname_out, "wb+")) == NULL) {
 		int error = errno;
 		fclose(f_in);
 		clean_context(ctx);
 		return create_file_error_context(context, error, fname_out);
 	}
 	/* Use dictionary-specific temp names to allow parallel compilation
@@ -1567,38 +1583,42 @@ ESPEAK_NG_API espeak_ng_STATUS espeak_ng_CompileDictionary(const char *dsource,
 	Write4Bytes(f_out, value);
 	Write4Bytes(f_out, offset_rules);

 	compile_dictlist_start();
 	compile_dictlist_start(ctx);

 	fprintf(f_log, "Using phonemetable: '%s'\n", phoneme_tab_list[phoneme_tab_number].name);
 	compile_dictlist_file(path, "roots");
 	fprintf(ctx->f_log, "Using phonemetable: '%s'\n", phoneme_tab_list[phoneme_tab_number].name);
 	compile_dictlist_file(ctx, path, "roots");
 	if (translator->langopts.listx) {
 		compile_dictlist_file(path, "list");
 		compile_dictlist_file(path, "listx");
 		compile_dictlist_file(ctx, path, "list");
 		compile_dictlist_file(ctx, path, "listx");
 	} else {
 		compile_dictlist_file(path, "listx");
 		compile_dictlist_file(path, "list");
 		compile_dictlist_file(ctx, path, "listx");
 		compile_dictlist_file(ctx, path, "list");
 	}
 	compile_dictlist_file(path, "emoji");
 	compile_dictlist_file(path, "extra");
 	compile_dictlist_file(ctx, path, "emoji");
 	compile_dictlist_file(ctx, path, "extra");

 	compile_dictlist_end(f_out);
 	compile_dictlist_end(ctx, f_out);
 	offset_rules = ftell(f_out);

 	fprintf(f_log, "Compiling: '%s'\n", fname_in);
 	fprintf(ctx->f_log, "Compiling: '%s'\n", fname_in);

 	espeak_ng_STATUS status = compile_dictrules(f_in, f_out, fname_temp, context);
 	espeak_ng_STATUS status = compile_dictrules(ctx, f_in, f_out, fname_temp, context);
 	fclose(f_in);

 	fseek(f_out, 4, SEEK_SET);
 	Write4Bytes(f_out, offset_rules);
 	fclose(f_out);
 	fflush(f_log);
 	fflush(ctx->f_log);

 	if (status != ENS_OK)
 	if (status != ENS_OK) {
 		clean_context(ctx);
 		return status;
 	}

 	LoadDictionary(translator, dict_name, 0);

 	return error_count > 0 ? ENS_COMPILE_ERROR : ENS_OK;
 	status = ctx->error_count > 0 ? ENS_COMPILE_ERROR : ENS_OK;
 	clean_context(ctx);
 	return status;
 }
 #pragma GCC visibility pop
--- a/src/libespeak-ng/compiledict.h
+++ b/src/libespeak-ng/compiledict.h
@@ -29,7 +29,8 @@ extern "C"
 char *DecodeRule(const char *group_chars, 
 		int group_length,
 		char *rule,
 		int control);
 		int control,
 		char *output);

 void print_dictionary_flags(unsigned int *flags,
 		char *buf,
@@ -40,4 +41,3 @@ void print_dictionary_flags(unsigned int *flags,
 #endif

 #endif

--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -2040,12 +2040,13 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_
 					// show each rule that matches, and it's points score
 					int pts;
 					char decoded_phonemes[80];
 					char output[80];

 					pts = match.points;
 					if (group_length > 1)
 						pts += 35; // to account for an extra letter matching
 					DecodePhonemes(match.phonemes, decoded_phonemes);
 					fprintf(f_trans, "%3d\t%s [%s]\n", pts, DecodeRule(group_chars, group_length, rule_start, word_flags), decoded_phonemes);
 					fprintf(f_trans, "%3d\t%s [%s]\n", pts, DecodeRule(group_chars, group_length, rule_start, word_flags, output), decoded_phonemes);
 				}
 			}
 		}