|  |  |  |  |  |  | 
													
												
													
														|  |  | #include "speech.h"		// for path_home |  |  | #include "speech.h"		// for path_home | 
													
												
													
														|  |  | #include "synthesize.h"           // for Write4Bytes |  |  | #include "synthesize.h"           // for Write4Bytes | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static FILE *f_log = NULL; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | extern char word_phonemes[N_WORD_PHONEMES];    // a word translated into phoneme codes |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | static int linenum; |  |  |  | 
													
												
													
														|  |  | static int error_count; |  |  |  | 
													
												
													
														|  |  | static bool text_mode = false; |  |  |  | 
													
												
													
														|  |  | static int debug_flag = 0; |  |  |  | 
													
												
													
														|  |  | static int error_need_dictionary = 0; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | // A hash chain is a linked-list of hash chain entry objects: |  |  |  | 
													
												
													
														|  |  | //     struct hash_chain_entry { |  |  |  | 
													
												
													
														|  |  | //         hash_chain_entry *next_entry; |  |  |  | 
													
												
													
														|  |  | //         // dict_line output from compile_line: |  |  |  | 
													
												
													
														|  |  | //         uint8_t length; |  |  |  | 
													
												
													
														|  |  | //         char contents[length]; |  |  |  | 
													
												
													
														|  |  | //     }; |  |  |  | 
													
												
													
														|  |  | static char *hash_chains[N_HASH_DICT]; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | static char letterGroupsDefined[N_LETTER_GROUPS]; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | static const MNEM_TAB mnem_rules[] = { |  |  | static const MNEM_TAB mnem_rules[] = { | 
													
												
													
														|  |  | { "unpr",     DOLLAR_UNPR }, |  |  | { "unpr",     DOLLAR_UNPR }, | 
													
												
													
														|  |  | { "noprefix", DOLLAR_NOPREFIX },  // rule fails if a prefix has been removed |  |  | { "noprefix", DOLLAR_NOPREFIX },  // rule fails if a prefix has been removed | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | int group3_ix; |  |  | int group3_ix; | 
													
												
													
														|  |  | } RGROUP; |  |  | } RGROUP; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  |  |  |  | typedef enum | 
													
												
													
														|  |  |  |  |  | { | 
													
												
													
														|  |  |  |  |  | LINE_PARSER_WORD = 0, | 
													
												
													
														|  |  |  |  |  | LINE_PARSER_END_OF_WORD = 1, | 
													
												
													
														|  |  |  |  |  | LINE_PARSER_MULTIPLE_WORDS = 2, | 
													
												
													
														|  |  |  |  |  | LINE_PARSER_END_OF_WORDS = 3, | 
													
												
													
														|  |  |  |  |  | LINE_PARSER_PRONUNCIATION = 4, | 
													
												
													
														|  |  |  |  |  | LINE_PARSER_END_OF_PRONUNCIATION = 5, | 
													
												
													
														|  |  |  |  |  | } LINE_PARSER_STATES; | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | typedef struct { | 
													
												
													
														|  |  |  |  |  | FILE *f_log; | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | char word_phonemes[N_WORD_PHONEMES];    // a word translated into phoneme codes | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | int linenum; | 
													
												
													
														|  |  |  |  |  | int error_count; | 
													
												
													
														|  |  |  |  |  | bool text_mode; | 
													
												
													
														|  |  |  |  |  | int debug_flag; | 
													
												
													
														|  |  |  |  |  | int error_need_dictionary; | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | // A hash chain is a linked-list of hash chain entry objects: | 
													
												
													
														|  |  |  |  |  | //     struct hash_chain_entry { | 
													
												
													
														|  |  |  |  |  | //         hash_chain_entry *next_entry; | 
													
												
													
														|  |  |  |  |  | //         // dict_line output from compile_line: | 
													
												
													
														|  |  |  |  |  | //         uint8_t length; | 
													
												
													
														|  |  |  |  |  | //         char contents[length]; | 
													
												
													
														|  |  |  |  |  | //     }; | 
													
												
													
														|  |  |  |  |  | char *hash_chains[N_HASH_DICT]; | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | char letterGroupsDefined[N_LETTER_GROUPS]; | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | char rule_cond[80]; | 
													
												
													
														|  |  |  |  |  | char rule_pre[80]; | 
													
												
													
														|  |  |  |  |  | char rule_post[80]; | 
													
												
													
														|  |  |  |  |  | char rule_match[80]; | 
													
												
													
														|  |  |  |  |  | char rule_phonemes[80]; | 
													
												
													
														|  |  |  |  |  | char group_name[LEN_GROUP_NAME+1]; | 
													
												
													
														|  |  |  |  |  | int group3_ix; | 
													
												
													
														|  |  |  |  |  | } CompileContext; | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  |  |  |  | static void clean_context(CompileContext *ctx) { | 
													
												
													
														|  |  |  |  |  | for (int i = 0; i < N_HASH_DICT; i++) { | 
													
												
													
														|  |  |  |  |  | char *p; | 
													
												
													
														|  |  |  |  |  | while ((p = ctx->hash_chains[i])) { | 
													
												
													
														|  |  |  |  |  | memcpy(&p, ctx->hash_chains[i], sizeof(char*)); | 
													
												
													
														|  |  |  |  |  | free(ctx->hash_chains[i]); | 
													
												
													
														|  |  |  |  |  | ctx->hash_chains[i] = p; | 
													
												
													
														|  |  |  |  |  | } | 
													
												
													
														|  |  |  |  |  | } | 
													
												
													
														|  |  |  |  |  | free(ctx); | 
													
												
													
														|  |  |  |  |  | } | 
													
												
													
														|  |  |  |  |  | 
 | 
													
												
													
														|  |  | void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len) |  |  | void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len) | 
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | int stress; |  |  | int stress; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | char *DecodeRule(const char *group_chars, int group_length, char *rule, int control) |  |  | char *DecodeRule(const char *group_chars, int group_length, char *rule, int control, char *output) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | // Convert compiled match template to ascii |  |  | // Convert compiled match template to ascii | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char buf[200]; |  |  | char buf[200]; | 
													
												
													
														|  |  | char buf_pre[200]; |  |  | char buf_pre[200]; | 
													
												
													
														|  |  | char suffix[20]; |  |  | char suffix[20]; | 
													
												
													
														|  |  | static char output[80]; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | MAKE_MEM_UNDEFINED(&output, sizeof(output)); |  |  |  | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static const char symbols[] = { |  |  | static const char symbols[] = { | 
													
												
													
														|  |  | ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', |  |  | ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | return output; |  |  | return output; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | typedef enum |  |  | static int compile_line(CompileContext *ctx, char *linebuf, char *dict_line, int n_dict_line, int *hash) | 
													
												
													
														|  |  | { |  |  |  | 
													
												
													
														|  |  | LINE_PARSER_WORD = 0, |  |  |  | 
													
												
													
														|  |  | LINE_PARSER_END_OF_WORD = 1, |  |  |  | 
													
												
													
														|  |  | LINE_PARSER_MULTIPLE_WORDS = 2, |  |  |  | 
													
												
													
														|  |  | LINE_PARSER_END_OF_WORDS = 3, |  |  |  | 
													
												
													
														|  |  | LINE_PARSER_PRONUNCIATION = 4, |  |  |  | 
													
												
													
														|  |  | LINE_PARSER_END_OF_PRONUNCIATION = 5, |  |  |  | 
													
												
													
														|  |  | } LINE_PARSER_STATES; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | static int compile_line(char *linebuf, char *dict_line, int n_dict_line, int *hash) |  |  |  | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | // Compile a line in the language_list file |  |  | // Compile a line in the language_list file | 
													
												
													
														|  |  | unsigned char c; |  |  | unsigned char c; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char encoded_ph[200]; |  |  | char encoded_ph[200]; | 
													
												
													
														|  |  | char bad_phoneme_str[4]; |  |  | char bad_phoneme_str[4]; | 
													
												
													
														|  |  | int bad_phoneme; |  |  | int bad_phoneme; | 
													
												
													
														|  |  | static char nullstring[] = { 0 }; |  |  | static const char nullstring[] = { 0 }; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | phonetic = word = nullstring; |  |  | phonetic = word = (char*)nullstring; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | p = linebuf; |  |  | p = linebuf; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | flagnum = LookupMnem(mnem_flags, mnemptr); |  |  | flagnum = LookupMnem(mnem_flags, mnemptr); | 
													
												
													
														|  |  | if (flagnum > 0) { |  |  | if (flagnum > 0) { | 
													
												
													
														|  |  | if (flagnum == 200) |  |  | if (flagnum == 200) | 
													
												
													
														|  |  | text_mode = true; |  |  | ctx->text_mode = true; | 
													
												
													
													
												
													
														|  |  | else if (flagnum == 201) |  |  | else if (flagnum == 201) | 
													
												
													
														|  |  | text_mode = false; |  |  | ctx->text_mode = false; | 
													
												
													
													
												
													
														|  |  | else if (flagnum == BITNUM_FLAG_TEXTMODE) |  |  | else if (flagnum == BITNUM_FLAG_TEXTMODE) | 
													
												
													
														|  |  | text_not_phonemes = true; |  |  | text_not_phonemes = true; | 
													
												
													
														|  |  | else |  |  | else | 
													
												
													
														|  |  | flag_codes[n_flag_codes++] = flagnum; |  |  | flag_codes[n_flag_codes++] = flagnum; | 
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Unknown keyword: %s\n", linenum, mnemptr); |  |  | fprintf(ctx->f_log, "%5d: Unknown keyword: %s\n", ctx->linenum, mnemptr); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | multiple_words = 0; |  |  | multiple_words = 0; | 
													
												
													
														|  |  | step = LINE_PARSER_END_OF_WORDS; |  |  | step = LINE_PARSER_END_OF_WORDS; | 
													
												
													
														|  |  | } else if (word[0] != '_') { |  |  | } else if (word[0] != '_') { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Missing '('\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: Missing '('\n", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | step = LINE_PARSER_END_OF_WORDS; |  |  | step = LINE_PARSER_END_OF_WORDS; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | if (word[0] == 0) |  |  | if (word[0] == 0) | 
													
												
													
														|  |  | return 0; // blank line |  |  | return 0; // blank line | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (text_mode) |  |  | if (ctx->text_mode) | 
													
												
													
													
												
													
														|  |  | text_not_phonemes = true; |  |  | text_not_phonemes = true; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (text_not_phonemes) { |  |  | if (text_not_phonemes) { | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | // condition rules are not applied |  |  | // condition rules are not applied | 
													
												
													
														|  |  | TranslateWord(translator, phonetic, NULL, NULL); |  |  | TranslateWord(translator, phonetic, NULL, NULL); | 
													
												
													
														|  |  | text_not_phonemes = false; |  |  | text_not_phonemes = false; | 
													
												
													
														|  |  | strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4); |  |  | strncpy0(encoded_ph, ctx->word_phonemes, N_WORD_BYTES-4); | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if ((word_phonemes[0] == 0) && (error_need_dictionary < 3)) { |  |  | if ((ctx->word_phonemes[0] == 0) && (ctx->error_need_dictionary < 3)) { | 
													
												
													
													
												
													
														|  |  | // the dictionary was not loaded, we need a second attempt |  |  | // the dictionary was not loaded, we need a second attempt | 
													
												
													
														|  |  | error_need_dictionary++; |  |  | ctx->error_need_dictionary++; | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Need to compile dictionary again\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: Need to compile dictionary again\n", ctx->linenum); | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } else |  |  | } else | 
													
												
													
														|  |  | // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word |  |  | // this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | if (bad_phoneme != 0) { |  |  | if (bad_phoneme != 0) { | 
													
												
													
														|  |  | // unrecognised phoneme, report error |  |  | // unrecognised phoneme, report error | 
													
												
													
														|  |  | bad_phoneme_str[utf8_out(bad_phoneme, bad_phoneme_str)] = 0; |  |  | bad_phoneme_str[utf8_out(bad_phoneme, bad_phoneme_str)] = 0; | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s  %s\n", linenum, bad_phoneme_str, bad_phoneme, word, phonetic); |  |  | fprintf(ctx->f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s  %s\n", ctx->linenum, bad_phoneme_str, bad_phoneme, word, phonetic); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | if (length < n_dict_line) { |  |  | if (length < n_dict_line) { | 
													
												
													
														|  |  | strcpy(&dict_line[(len_word)+2], encoded_ph); |  |  | strcpy(&dict_line[(len_word)+2], encoded_ph); | 
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Dictionary line length would overflow the data buffer: %d\n", linenum, length); |  |  | fprintf(ctx->f_log, "%5d: Dictionary line length would overflow the data buffer: %d\n", ctx->linenum, length); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | // no phonemes specified. set bit 7 |  |  | // no phonemes specified. set bit 7 | 
													
												
													
														|  |  | dict_line[1] |= 0x80; |  |  | dict_line[1] |= 0x80; | 
													
												
													
														|  |  | length = len_word + 2; |  |  | length = len_word + 2; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if ((multiple_string != NULL) && (multiple_words > 0)) { |  |  | if ((multiple_string != NULL) && (multiple_words > 0)) { | 
													
												
													
														|  |  | if (multiple_words > 10) { |  |  | if (multiple_words > 10) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Two many parts in a multi-word entry: %d\n", linenum, multiple_words); |  |  | fprintf(ctx->f_log, "%5d: Two many parts in a multi-word entry: %d\n", ctx->linenum, multiple_words); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | dict_line[length++] = 80 + multiple_words; |  |  | dict_line[length++] = 80 + multiple_words; | 
													
												
													
														|  |  | ix = multiple_string_end - multiple_string; |  |  | ix = multiple_string_end - multiple_string; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | return length; |  |  | return length; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static void compile_dictlist_start(void) |  |  | static void compile_dictlist_start(CompileContext *ctx) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | // initialise dictionary list |  |  | // initialise dictionary list | 
													
												
													
														|  |  | int ix; |  |  | int ix; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char *p2; |  |  | char *p2; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | for (ix = 0; ix < N_HASH_DICT; ix++) { |  |  | for (ix = 0; ix < N_HASH_DICT; ix++) { | 
													
												
													
														|  |  | p = hash_chains[ix]; |  |  | p = ctx->hash_chains[ix]; | 
													
												
													
													
												
													
														|  |  | while (p != NULL) { |  |  | while (p != NULL) { | 
													
												
													
														|  |  | memcpy(&p2, p, sizeof(char *)); |  |  | memcpy(&p2, p, sizeof(char *)); | 
													
												
													
														|  |  | free(p); |  |  | free(p); | 
													
												
													
														|  |  | p = p2; |  |  | p = p2; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | hash_chains[ix] = NULL; |  |  | ctx->hash_chains[ix] = NULL; | 
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static void compile_dictlist_end(FILE *f_out) |  |  | static void compile_dictlist_end(CompileContext *ctx, FILE *f_out) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | // Write out the compiled dictionary list |  |  | // Write out the compiled dictionary list | 
													
												
													
														|  |  | int hash; |  |  | int hash; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char *p; |  |  | char *p; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | for (hash = 0; hash < N_HASH_DICT; hash++) { |  |  | for (hash = 0; hash < N_HASH_DICT; hash++) { | 
													
												
													
														|  |  | p = hash_chains[hash]; |  |  | p = ctx->hash_chains[hash]; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | while (p != NULL) { |  |  | while (p != NULL) { | 
													
												
													
														|  |  | length = *(uint8_t *)(p+sizeof(char *)); |  |  | length = *(uint8_t *)(p+sizeof(char *)); | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static int compile_dictlist_file(const char *path, const char *filename) |  |  | static int compile_dictlist_file(CompileContext *ctx, const char *path, const char *filename) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | int length; |  |  | int length; | 
													
												
													
														|  |  | int hash; |  |  | int hash; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char fname[sizeof(path_home)+45]; |  |  | char fname[sizeof(path_home)+45]; | 
													
												
													
														|  |  | char dict_line[256]; // length is uint8_t, so an entry can't take up more than 256 bytes |  |  | char dict_line[256]; // length is uint8_t, so an entry can't take up more than 256 bytes | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | text_mode = false; |  |  | ctx->text_mode = false; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | // try with and without '.txt' extension |  |  | // try with and without '.txt' extension | 
													
												
													
														|  |  | sprintf(fname, "%s%s.txt", path, filename); |  |  | sprintf(fname, "%s%s.txt", path, filename); | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | return -1; |  |  | return -1; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (f_log != NULL) |  |  | if (ctx->f_log != NULL) | 
													
												
													
														|  |  | fprintf(f_log, "Compiling: '%s'\n", fname); |  |  | fprintf(ctx->f_log, "Compiling: '%s'\n", fname); | 
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | linenum = 0; |  |  | ctx->linenum = 0; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | while (fgets(buf, sizeof(buf), f_in) != NULL) { |  |  | while (fgets(buf, sizeof(buf), f_in) != NULL) { | 
													
												
													
														|  |  | linenum++; |  |  | ctx->linenum++; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | length = compile_line(buf, dict_line, sizeof(dict_line), &hash); |  |  | length = compile_line(ctx, buf, dict_line, sizeof(dict_line), &hash); | 
													
												
													
													
												
													
														|  |  | if (length == 0)  continue; // blank line |  |  | if (length == 0)  continue; // blank line | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | p = (char *)malloc(length+sizeof(char *)); |  |  | p = (char *)malloc(length+sizeof(char *)); | 
													
												
													
														|  |  | if (p == NULL) { |  |  | if (p == NULL) { | 
													
												
													
														|  |  | if (f_log != NULL) { |  |  | if (ctx->f_log != NULL) { | 
													
												
													
														|  |  | fprintf(f_log, "Can't allocate memory\n"); |  |  | fprintf(ctx->f_log, "Can't allocate memory\n"); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | break; |  |  | break; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | memcpy(p, &hash_chains[hash], sizeof(char *)); |  |  | memcpy(p, &ctx->hash_chains[hash], sizeof(char *)); | 
													
												
													
														|  |  | hash_chains[hash] = p; |  |  | ctx->hash_chains[hash] = p; | 
													
												
													
													
												
													
													
												
													
														|  |  | // NOTE: dict_line[0] is the entry length (0-255) |  |  | // NOTE: dict_line[0] is the entry length (0-255) | 
													
												
													
														|  |  | memcpy(p+sizeof(char *), dict_line, length); |  |  | memcpy(p+sizeof(char *), dict_line, length); | 
													
												
													
														|  |  | count++; |  |  | count++; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (f_log != NULL) |  |  | if (ctx->f_log != NULL) | 
													
												
													
														|  |  | fprintf(f_log, "\t%d entries\n", count); |  |  | fprintf(ctx->f_log, "\t%d entries\n", count); | 
													
												
													
													
												
													
													
												
													
														|  |  | fclose(f_in); |  |  | fclose(f_in); | 
													
												
													
														|  |  | return 0; |  |  | return 0; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static char rule_cond[80]; |  |  |  | 
													
												
													
														|  |  | static char rule_pre[80]; |  |  |  | 
													
												
													
														|  |  | static char rule_post[80]; |  |  |  | 
													
												
													
														|  |  | static char rule_match[80]; |  |  |  | 
													
												
													
														|  |  | static char rule_phonemes[80]; |  |  |  | 
													
												
													
														|  |  | static char group_name[LEN_GROUP_NAME+1]; |  |  |  | 
													
												
													
														|  |  | static int group3_ix; |  |  |  | 
													
												
													
														|  |  | 
 |  |  |  | 
													
												
													
														|  |  | #define N_RULES 3000 // max rules for each group |  |  | #define N_RULES 3000 // max rules for each group | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static int isHexDigit(int c) |  |  | static int isHexDigit(int c) | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | return -1; |  |  | return -1; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static void copy_rule_string(char *string, int *state_out) |  |  | static void copy_rule_string(CompileContext *ctx, char *string, int *state_out) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | // state 0: conditional, 1=pre, 2=match, 3=post, 4=phonemes |  |  | // state 0: conditional, 1=pre, 2=match, 3=post, 4=phonemes | 
													
												
													
														|  |  | static char * const outbuf[5] = { rule_cond, rule_pre, rule_match, rule_post, rule_phonemes }; |  |  | char * const outbuf[5] = { ctx->rule_cond, ctx->rule_pre, ctx->rule_match, ctx->rule_post, ctx->rule_phonemes }; | 
													
												
													
													
												
													
														|  |  | static const int next_state[5] = { 2, 2, 4, 4, 4 }; |  |  | static const int next_state[5] = { 2, 2, 4, 4, 4 }; | 
													
												
													
														|  |  | char *output; |  |  | char *output; | 
													
												
													
														|  |  | char *p; |  |  | char *p; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | output = outbuf[state]; |  |  | output = outbuf[state]; | 
													
												
													
														|  |  | if (state == 4) { |  |  | if (state == 4) { | 
													
												
													
														|  |  | // append to any previous phoneme string, i.e. allow spaces in the phoneme string |  |  | // append to any previous phoneme string, i.e. allow spaces in the phoneme string | 
													
												
													
														|  |  | len = strlen(rule_phonemes); |  |  | len = strlen(ctx->rule_phonemes); | 
													
												
													
													
												
													
														|  |  | if (len > 0) |  |  | if (len > 0) | 
													
												
													
														|  |  | rule_phonemes[len++] = ' '; |  |  | ctx->rule_phonemes[len++] = ' '; | 
													
												
													
														|  |  | output = &rule_phonemes[len]; |  |  | output = &ctx->rule_phonemes[len]; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | sxflags = 0x808000; // to ensure non-zero bytes |  |  | sxflags = 0x808000; // to ensure non-zero bytes | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | c = c * 10 + value; |  |  | c = c * 10 + value; | 
													
												
													
														|  |  | if ((value < 0) || (value > 9)) { |  |  | if ((value < 0) || (value > 9)) { | 
													
												
													
														|  |  | c = 0; |  |  | c = 0; | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Expected 2 digits after 'L'\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: Expected 2 digits after 'L'\n", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
														|  |  | } else if ((c <= 0) || (c >= N_LETTER_GROUPS) || (letterGroupsDefined[(int)c] == 0)) { |  |  | } else if ((c <= 0) || (c >= N_LETTER_GROUPS) || (ctx->letterGroupsDefined[(int)c] == 0)) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Letter group L%.2d not defined\n", linenum, c); |  |  | fprintf(ctx->f_log, "%5d: Letter group L%.2d not defined\n", ctx->linenum, c); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | c += 'A'; |  |  | c += 'A'; | 
													
												
													
														|  |  | if (state == 1) { |  |  | if (state == 1) { | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (value == 0) { |  |  | if (value == 0) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: $ command not recognized\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: $ command not recognized\n", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | break; |  |  | break; | 
													
												
													
														|  |  | case 'P': // Prefix |  |  | case 'P': // Prefix | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | *state_out = next_state[state]; |  |  | *state_out = next_state[state]; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static char *compile_rule(char *input) |  |  | static char *compile_rule(CompileContext *ctx, char *input) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | int ix; |  |  | int ix; | 
													
												
													
														|  |  | unsigned char c; |  |  | unsigned char c; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char bad_phoneme_str[4]; |  |  | char bad_phoneme_str[4]; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | buf[0] = 0; |  |  | buf[0] = 0; | 
													
												
													
														|  |  | rule_cond[0] = 0; |  |  | ctx->rule_cond[0] = 0; | 
													
												
													
														|  |  | rule_pre[0] = 0; |  |  | ctx->rule_pre[0] = 0; | 
													
												
													
														|  |  | rule_post[0] = 0; |  |  | ctx->rule_post[0] = 0; | 
													
												
													
														|  |  | rule_match[0] = 0; |  |  | ctx->rule_match[0] = 0; | 
													
												
													
														|  |  | rule_phonemes[0] = 0; |  |  | ctx->rule_phonemes[0] = 0; | 
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | p = buf; |  |  | p = buf; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | case ')': // end of prefix section |  |  | case ')': // end of prefix section | 
													
												
													
														|  |  | *p = 0; |  |  | *p = 0; | 
													
												
													
														|  |  | state = 1; |  |  | state = 1; | 
													
												
													
														|  |  | copy_rule_string(buf, &state); |  |  | copy_rule_string(ctx, buf, &state); | 
													
												
													
													
												
													
														|  |  | p = buf; |  |  | p = buf; | 
													
												
													
														|  |  | break; |  |  | break; | 
													
												
													
														|  |  | case '(': // start of suffix section |  |  | case '(': // start of suffix section | 
													
												
													
														|  |  | *p = 0; |  |  | *p = 0; | 
													
												
													
														|  |  | state = 2; |  |  | state = 2; | 
													
												
													
														|  |  | copy_rule_string(buf, &state); |  |  | copy_rule_string(ctx, buf, &state); | 
													
												
													
													
												
													
														|  |  | state = 3; |  |  | state = 3; | 
													
												
													
														|  |  | p = buf; |  |  | p = buf; | 
													
												
													
														|  |  | if (input[ix+1] == ' ') { |  |  | if (input[ix+1] == ' ') { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Syntax error. Space after (, or negative score for previous rule\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: Syntax error. Space after (, or negative score for previous rule\n", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | break; |  |  | break; | 
													
												
													
														|  |  | case '\n': // end of line |  |  | case '\n': // end of line | 
													
												
													
														|  |  | case '\r': |  |  | case '\r': | 
													
												
													
														|  |  | case 0:    // end of line |  |  | case 0:    // end of line | 
													
												
													
														|  |  | *p = 0; |  |  | *p = 0; | 
													
												
													
														|  |  | copy_rule_string(buf, &state); |  |  | copy_rule_string(ctx, buf, &state); | 
													
												
													
													
												
													
														|  |  | finish = true; |  |  | finish = true; | 
													
												
													
														|  |  | break; |  |  | break; | 
													
												
													
														|  |  | case '\t': // end of section section |  |  | case '\t': // end of section section | 
													
												
													
														|  |  | case ' ': |  |  | case ' ': | 
													
												
													
														|  |  | *p = 0; |  |  | *p = 0; | 
													
												
													
														|  |  | copy_rule_string(buf, &state); |  |  | copy_rule_string(ctx, buf, &state); | 
													
												
													
													
												
													
														|  |  | p = buf; |  |  | p = buf; | 
													
												
													
														|  |  | break; |  |  | break; | 
													
												
													
														|  |  | case '?': |  |  | case '?': | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (strcmp(rule_match, "$group") == 0) |  |  | if (strcmp(ctx->rule_match, "$group") == 0) | 
													
												
													
														|  |  | strcpy(rule_match, group_name); |  |  | strcpy(ctx->rule_match, ctx->group_name); | 
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (rule_match[0] == 0) { |  |  | if (ctx->rule_match[0] == 0) { | 
													
												
													
														|  |  | if (rule_post[0] != 0) { |  |  | if (ctx->rule_post[0] != 0) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Syntax error\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: Syntax error\n", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | return NULL; |  |  | return NULL; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | EncodePhonemes(rule_phonemes, buf, &bad_phoneme); |  |  | EncodePhonemes(ctx->rule_phonemes, buf, &bad_phoneme); | 
													
												
													
													
												
													
														|  |  | if (bad_phoneme != 0) { |  |  | if (bad_phoneme != 0) { | 
													
												
													
														|  |  | bad_phoneme_str[utf8_out(bad_phoneme, bad_phoneme_str)] = 0; |  |  | bad_phoneme_str[utf8_out(bad_phoneme, bad_phoneme_str)] = 0; | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s\n", linenum, bad_phoneme_str, bad_phoneme, input); |  |  | fprintf(ctx->f_log, "%5d: Bad phoneme [%s] (U+%x) in: %s\n", ctx->linenum, bad_phoneme_str, bad_phoneme, input); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | strcpy(output, buf); |  |  | strcpy(output, buf); | 
													
												
													
														|  |  | len = strlen(buf)+1; |  |  | len = strlen(buf)+1; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | len_name = strlen(group_name); |  |  | len_name = strlen(ctx->group_name); | 
													
												
													
														|  |  | if ((len_name > 0) && (memcmp(rule_match, group_name, len_name) != 0)) { |  |  | if ((len_name > 0) && (memcmp(ctx->rule_match, ctx->group_name, len_name) != 0)) { | 
													
												
													
														|  |  | utf8_in(&wc, rule_match); |  |  | utf8_in(&wc, ctx->rule_match); | 
													
												
													
														|  |  | if ((group_name[0] == '9') && IsDigit(wc)) { |  |  | if ((ctx->group_name[0] == '9') && IsDigit(wc)) { | 
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | // numeric group, rule_match starts with a digit, so OK |  |  | // numeric group, rule_match starts with a digit, so OK | 
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Wrong initial letters '%s' for group '%s'\n", linenum, rule_match, group_name); |  |  | fprintf(ctx->f_log, "%5d: Wrong initial letters '%s' for group '%s'\n", ctx->linenum, ctx->rule_match, ctx->group_name); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | strcpy(&output[len], rule_match); |  |  | strcpy(&output[len], ctx->rule_match); | 
													
												
													
														|  |  | len += strlen(rule_match); |  |  | len += strlen(ctx->rule_match); | 
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (debug_flag) { |  |  | if (ctx->debug_flag) { | 
													
												
													
													
												
													
														|  |  | output[len] = RULE_LINENUM; |  |  | output[len] = RULE_LINENUM; | 
													
												
													
														|  |  | output[len+1] = (linenum % 255) + 1; |  |  | output[len+1] = (ctx->linenum % 255) + 1; | 
													
												
													
														|  |  | output[len+2] = (linenum / 255) + 1; |  |  | output[len+2] = (ctx->linenum / 255) + 1; | 
													
												
													
													
												
													
													
												
													
														|  |  | len += 3; |  |  | len += 3; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (rule_cond[0] != 0) { |  |  | if (ctx->rule_cond[0] != 0) { | 
													
												
													
														|  |  | if (rule_cond[0] == '!') { |  |  | if (ctx->rule_cond[0] == '!') { | 
													
												
													
													
												
													
													
												
													
														|  |  | // allow the rule only if the condition number is NOT set for the voice |  |  | // allow the rule only if the condition number is NOT set for the voice | 
													
												
													
														|  |  | ix = atoi(&rule_cond[1]) + 32; |  |  | ix = atoi(&ctx->rule_cond[1]) + 32; | 
													
												
													
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | // allow the rule only if the condition number is set for the voice |  |  | // allow the rule only if the condition number is set for the voice | 
													
												
													
														|  |  | ix = atoi(rule_cond); |  |  | ix = atoi(ctx->rule_cond); | 
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if ((ix > 0) && (ix < 255)) { |  |  | if ((ix > 0) && (ix < 255)) { | 
													
												
													
														|  |  | output[len++] = RULE_CONDITION; |  |  | output[len++] = RULE_CONDITION; | 
													
												
													
														|  |  | output[len++] = ix; |  |  | output[len++] = ix; | 
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: bad condition number ?%d\n", linenum, ix); |  |  | fprintf(ctx->f_log, "%5d: bad condition number ?%d\n", ctx->linenum, ix); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | if (rule_pre[0] != 0) { |  |  | if (ctx->rule_pre[0] != 0) { | 
													
												
													
													
												
													
														|  |  | start = 0; |  |  | start = 0; | 
													
												
													
														|  |  | if (rule_pre[0] == RULE_SPACE) { |  |  | if (ctx->rule_pre[0] == RULE_SPACE) { | 
													
												
													
													
												
													
														|  |  | // omit '_' at the beginning of the pre-string and imply it by using RULE_PRE_ATSTART |  |  | // omit '_' at the beginning of the pre-string and imply it by using RULE_PRE_ATSTART | 
													
												
													
														|  |  | c = RULE_PRE_ATSTART; |  |  | c = RULE_PRE_ATSTART; | 
													
												
													
														|  |  | start = 1; |  |  | start = 1; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | output[len++] = c; |  |  | output[len++] = c; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | // output PRE string in reverse order |  |  | // output PRE string in reverse order | 
													
												
													
														|  |  | for (ix = strlen(rule_pre)-1; ix >= start; ix--) |  |  | for (ix = strlen(ctx->rule_pre)-1; ix >= start; ix--) | 
													
												
													
														|  |  | output[len++] = rule_pre[ix]; |  |  | output[len++] = ctx->rule_pre[ix]; | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (rule_post[0] != 0) { |  |  | if (ctx->rule_post[0] != 0) { | 
													
												
													
														|  |  | sprintf(&output[len], "%c%s", RULE_POST, rule_post); |  |  | sprintf(&output[len], "%c%s", RULE_POST, ctx->rule_post); | 
													
												
													
														|  |  | len += (strlen(rule_post)+1); |  |  | len += (strlen(ctx->rule_post)+1); | 
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | output[len++] = 0; |  |  | output[len++] = 0; | 
													
												
													
														|  |  | if ((prule = (char *)malloc(len)) != NULL) |  |  | if ((prule = (char *)malloc(len)) != NULL) | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static int compile_lettergroup(char *input, FILE *f_out) |  |  | static int compile_lettergroup(CompileContext *ctx, char *input, FILE *f_out) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | char *p; |  |  | char *p; | 
													
												
													
														|  |  | char *p_start; |  |  | char *p_start; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | p = input; |  |  | p = input; | 
													
												
													
														|  |  | if (!IsDigit09(p[0]) || !IsDigit09(p[1])) { |  |  | if (!IsDigit09(p[0]) || !IsDigit09(p[1])) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Expected 2 digits after '.L'\n", linenum); |  |  | fprintf(ctx->f_log, "%5d: Expected 2 digits after '.L'\n", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | return 1; |  |  | return 1; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | group = atoi(&p[0]); |  |  | group = atoi(&p[0]); | 
													
												
													
														|  |  | if (group >= N_LETTER_GROUPS) { |  |  | if (group >= N_LETTER_GROUPS) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: lettergroup out of range (01-%.2d)\n", linenum, N_LETTER_GROUPS-1); |  |  | fprintf(ctx->f_log, "%5d: lettergroup out of range (01-%.2d)\n", ctx->linenum, N_LETTER_GROUPS-1); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | return 1; |  |  | return 1; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | fputc(RULE_GROUP_START, f_out); |  |  | fputc(RULE_GROUP_START, f_out); | 
													
												
													
														|  |  | fputc(RULE_LETTERGP2, f_out); |  |  | fputc(RULE_LETTERGP2, f_out); | 
													
												
													
														|  |  | fputc(group + 'A', f_out); |  |  | fputc(group + 'A', f_out); | 
													
												
													
														|  |  | if (letterGroupsDefined[group] != 0) { |  |  | if (ctx->letterGroupsDefined[group] != 0) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: lettergroup L%.2d is already defined\n", linenum, group); |  |  | fprintf(ctx->f_log, "%5d: lettergroup L%.2d is already defined\n", ctx->linenum, group); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | letterGroupsDefined[group] = 1; |  |  | ctx->letterGroupsDefined[group] = 1; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | n_items = 0; |  |  | n_items = 0; | 
													
												
													
														|  |  | while (n_items < N_LETTERGP_ITEMS) { |  |  | while (n_items < N_LETTERGP_ITEMS) { | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | static espeak_ng_STATUS compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp, espeak_ng_ERROR_CONTEXT *context) |  |  | static espeak_ng_STATUS compile_dictrules(CompileContext *ctx, FILE *f_in, FILE *f_out, char *fname_temp, espeak_ng_ERROR_CONTEXT *context) | 
													
												
													
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | char *prule; |  |  | char *prule; | 
													
												
													
														|  |  | unsigned char *p; |  |  | unsigned char *p; | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | int n_groups3 = 0; |  |  | int n_groups3 = 0; | 
													
												
													
														|  |  | RGROUP rgroup[N_RULE_GROUP2]; |  |  | RGROUP rgroup[N_RULE_GROUP2]; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | linenum = 0; |  |  | ctx->linenum = 0; | 
													
												
													
														|  |  | group_name[0] = 0; |  |  | ctx->group_name[0] = 0; | 
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if ((f_temp = fopen(fname_temp, "wb")) == NULL) |  |  | if ((f_temp = fopen(fname_temp, "wb")) == NULL) | 
													
												
													
														|  |  | return create_file_error_context(context, errno, fname_temp); |  |  | return create_file_error_context(context, errno, fname_temp); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | for (;;) { |  |  | for (;;) { | 
													
												
													
														|  |  | linenum++; |  |  | ctx->linenum++; | 
													
												
													
													
												
													
														|  |  | buf = fgets(buf1, sizeof(buf1), f_in); |  |  | buf = fgets(buf1, sizeof(buf1), f_in); | 
													
												
													
														|  |  | if (buf != NULL) { |  |  | if (buf != NULL) { | 
													
												
													
														|  |  | if ((p = (unsigned char *)strstr(buf, "//")) != NULL) |  |  | if ((p = (unsigned char *)strstr(buf, "//")) != NULL) | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | // next .group or end of file, write out the previous group |  |  | // next .group or end of file, write out the previous group | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (n_rules > 0) { |  |  | if (n_rules > 0) { | 
													
												
													
														|  |  | strcpy(rgroup[n_rgroups].name, group_name); |  |  | strcpy(rgroup[n_rgroups].name, ctx->group_name); | 
													
												
													
														|  |  | rgroup[n_rgroups].group3_ix = group3_ix; |  |  | rgroup[n_rgroups].group3_ix = ctx->group3_ix; | 
													
												
													
													
												
													
													
												
													
														|  |  | rgroup[n_rgroups].start = ftell(f_temp); |  |  | rgroup[n_rgroups].start = ftell(f_temp); | 
													
												
													
														|  |  | output_rule_group(f_temp, n_rules, rules, group_name); |  |  | output_rule_group(f_temp, n_rules, rules, ctx->group_name); | 
													
												
													
													
												
													
														|  |  | rgroup[n_rgroups].length = ftell(f_temp) - rgroup[n_rgroups].start; |  |  | rgroup[n_rgroups].length = ftell(f_temp) - rgroup[n_rgroups].start; | 
													
												
													
														|  |  | n_rgroups++; |  |  | n_rgroups++; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | if (buf == NULL) break; // end of file |  |  | if (buf == NULL) break; // end of file | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (memcmp(buf, ".L", 2) == 0) { |  |  | if (memcmp(buf, ".L", 2) == 0) { | 
													
												
													
														|  |  | compile_lettergroup(&buf[2], f_out); |  |  | compile_lettergroup(ctx, &buf[2], f_out); | 
													
												
													
													
												
													
														|  |  | continue; |  |  | continue; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | while ((p[0] == ' ') || (p[0] == '\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! |  |  | while ((p[0] == ' ') || (p[0] == '\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | 
													
												
													
														|  |  | ix = 0; |  |  | ix = 0; | 
													
												
													
														|  |  | while ((*p > ' ') && (ix < LEN_GROUP_NAME)) |  |  | while ((*p > ' ') && (ix < LEN_GROUP_NAME)) | 
													
												
													
														|  |  | group_name[ix++] = *p++; |  |  | ctx->group_name[ix++] = *p++; | 
													
												
													
														|  |  | group_name[ix] = 0; |  |  | ctx->group_name[ix] = 0; | 
													
												
													
														|  |  | group3_ix = 0; |  |  | ctx->group3_ix = 0; | 
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (sscanf(group_name, "0x%x", &char_code) == 1) { |  |  | if (sscanf(ctx->group_name, "0x%x", &char_code) == 1) { | 
													
												
													
													
												
													
														|  |  | // group character is given as a character code (max 16 bits) |  |  | // group character is given as a character code (max 16 bits) | 
													
												
													
														|  |  | p = (unsigned char *)group_name; |  |  | p = (unsigned char *)ctx->group_name; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (char_code > 0x100) |  |  | if (char_code > 0x100) | 
													
												
													
														|  |  | *p++ = (char_code >> 8); |  |  | *p++ = (char_code >> 8); | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | *p = 0; |  |  | *p = 0; | 
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | if (translator->letter_bits_offset > 0) { |  |  | if (translator->letter_bits_offset > 0) { | 
													
												
													
														|  |  | utf8_in(&wc, group_name); |  |  | utf8_in(&wc, ctx->group_name); | 
													
												
													
													
												
													
														|  |  | if (((ix = (wc - translator->letter_bits_offset)) >= 0) && (ix < 128)) |  |  | if (((ix = (wc - translator->letter_bits_offset)) >= 0) && (ix < 128)) | 
													
												
													
														|  |  | group3_ix = ix+1; // not zero |  |  | ctx->group3_ix = ix+1; // not zero | 
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if ((group3_ix == 0) && (strlen(group_name) > 2)) { |  |  | if ((ctx->group3_ix == 0) && (strlen(ctx->group_name) > 2)) { | 
													
												
													
														|  |  | if (utf8_in(&c, group_name) < 2) { |  |  | if (utf8_in(&c, ctx->group_name) < 2) { | 
													
												
													
														|  |  | fprintf(f_log, "%5d: Group name longer than 2 bytes (UTF8)", linenum); |  |  | fprintf(ctx->f_log, "%5d: Group name longer than 2 bytes (UTF8)", ctx->linenum); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | group_name[2] = 0; |  |  | ctx->group_name[2] = 0; | 
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | switch (compile_mode) |  |  | switch (compile_mode) | 
													
												
													
														|  |  | { |  |  | { | 
													
												
													
														|  |  | case 1: //  .group |  |  | case 1: //  .group | 
													
												
													
														|  |  | prule = compile_rule(buf); |  |  | prule = compile_rule(ctx, buf); | 
													
												
													
													
												
													
														|  |  | if (prule != NULL) { |  |  | if (prule != NULL) { | 
													
												
													
														|  |  | if (n_rules < N_RULES) |  |  | if (n_rules < N_RULES) | 
													
												
													
														|  |  | rules[n_rules++] = prule; |  |  | rules[n_rules++] = prule; | 
													
												
													
														|  |  | else { |  |  | else { | 
													
												
													
														|  |  | if (err_n_rules == 0) { |  |  | if (err_n_rules == 0) { | 
													
												
													
														|  |  | fprintf(stderr, "\nExceeded limit of rules (%d) in group '%s'\n", N_RULES, group_name); |  |  | fprintf(stderr, "\nExceeded limit of rules (%d) in group '%s'\n", N_RULES, ctx->group_name); | 
													
												
													
														|  |  | error_count++; |  |  | ctx->error_count++; | 
													
												
													
													
												
													
													
												
													
														|  |  | err_n_rules = 1; |  |  | err_n_rules = 1; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | fclose(f_temp); |  |  | fclose(f_temp); | 
													
												
													
														|  |  | remove(fname_temp); |  |  | remove(fname_temp); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | fprintf(f_log, "\t%d rules, %d groups (%d)\n\n", count, n_rgroups, n_groups3); |  |  | fprintf(ctx->f_log, "\t%d rules, %d groups (%d)\n\n", count, n_rgroups, n_groups3); | 
													
												
													
													
												
													
														|  |  | free_rules(rules, n_rules); |  |  | free_rules(rules, n_rules); | 
													
												
													
														|  |  | return ENS_OK; |  |  | return ENS_OK; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | char fname_temp[sizeof(path_home)+15]; |  |  | char fname_temp[sizeof(path_home)+15]; | 
													
												
													
														|  |  | char path[sizeof(path_home)+40];       // path_dsource+20 |  |  | char path[sizeof(path_home)+40];       // path_dsource+20 | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | error_count = 0; |  |  | CompileContext *ctx = calloc(1, sizeof(CompileContext)); | 
													
												
													
														|  |  | error_need_dictionary = 0; |  |  | ctx->error_count = 0; | 
													
												
													
														|  |  | memset(letterGroupsDefined, 0, sizeof(letterGroupsDefined)); |  |  | ctx->error_need_dictionary = 0; | 
													
												
													
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  |  |  |  | memset(ctx->letterGroupsDefined, 0, sizeof(ctx->letterGroupsDefined)); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | debug_flag = flags & 1; |  |  | ctx->debug_flag = flags & 1; | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (dsource == NULL) |  |  | if (dsource == NULL) | 
													
												
													
														|  |  | dsource = ""; |  |  | dsource = ""; | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | f_log = log; |  |  | ctx->f_log = log; | 
													
												
													
														|  |  | if (f_log == NULL) |  |  | if (ctx->f_log == NULL) | 
													
												
													
														|  |  | f_log = stderr; |  |  | ctx->f_log = stderr; | 
													
												
													
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | // try with and without '.txt' extension |  |  | // try with and without '.txt' extension | 
													
												
													
														|  |  | sprintf(path, "%s%s_", dsource, dict_name); |  |  | sprintf(path, "%s%s_", dsource, dict_name); | 
													
												
													
														|  |  | sprintf(fname_in, "%srules.txt", path); |  |  | sprintf(fname_in, "%srules.txt", path); | 
													
												
													
														|  |  | if ((f_in = fopen(fname_in, "r")) == NULL) { |  |  | if ((f_in = fopen(fname_in, "r")) == NULL) { | 
													
												
													
														|  |  | sprintf(fname_in, "%srules", path); |  |  | sprintf(fname_in, "%srules", path); | 
													
												
													
														|  |  | if ((f_in = fopen(fname_in, "r")) == NULL) |  |  | if ((f_in = fopen(fname_in, "r")) == NULL) { | 
													
												
													
													
												
													
														|  |  |  |  |  | clean_context(ctx); | 
													
												
													
														|  |  | return create_file_error_context(context, errno, fname_in); |  |  | return create_file_error_context(context, errno, fname_in); | 
													
												
													
														|  |  |  |  |  | } | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | sprintf(fname_out, "%s%c%s_dict", path_home, PATHSEP, dict_name); |  |  | sprintf(fname_out, "%s%c%s_dict", path_home, PATHSEP, dict_name); | 
													
												
													
														|  |  | if ((f_out = fopen(fname_out, "wb+")) == NULL) { |  |  | if ((f_out = fopen(fname_out, "wb+")) == NULL) { | 
													
												
													
														|  |  | int error = errno; |  |  | int error = errno; | 
													
												
													
														|  |  | fclose(f_in); |  |  | fclose(f_in); | 
													
												
													
														|  |  |  |  |  | clean_context(ctx); | 
													
												
													
														|  |  | return create_file_error_context(context, error, fname_out); |  |  | return create_file_error_context(context, error, fname_out); | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | /* Use dictionary-specific temp names to allow parallel compilation |  |  | /* Use dictionary-specific temp names to allow parallel compilation | 
													
												
											
												
													
														|  |  |  |  |  |  | 
													
												
													
														|  |  | Write4Bytes(f_out, value); |  |  | Write4Bytes(f_out, value); | 
													
												
													
														|  |  | Write4Bytes(f_out, offset_rules); |  |  | Write4Bytes(f_out, offset_rules); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | compile_dictlist_start(); |  |  | compile_dictlist_start(ctx); | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | fprintf(f_log, "Using phonemetable: '%s'\n", phoneme_tab_list[phoneme_tab_number].name); |  |  | fprintf(ctx->f_log, "Using phonemetable: '%s'\n", phoneme_tab_list[phoneme_tab_number].name); | 
													
												
													
														|  |  | compile_dictlist_file(path, "roots"); |  |  | compile_dictlist_file(ctx, path, "roots"); | 
													
												
													
													
												
													
													
												
													
														|  |  | if (translator->langopts.listx) { |  |  | if (translator->langopts.listx) { | 
													
												
													
														|  |  | compile_dictlist_file(path, "list"); |  |  | compile_dictlist_file(ctx, path, "list"); | 
													
												
													
														|  |  | compile_dictlist_file(path, "listx"); |  |  | compile_dictlist_file(ctx, path, "listx"); | 
													
												
													
													
												
													
													
												
													
														|  |  | } else { |  |  | } else { | 
													
												
													
														|  |  | compile_dictlist_file(path, "listx"); |  |  | compile_dictlist_file(ctx, path, "listx"); | 
													
												
													
														|  |  | compile_dictlist_file(path, "list"); |  |  | compile_dictlist_file(ctx, path, "list"); | 
													
												
													
													
												
													
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | compile_dictlist_file(path, "emoji"); |  |  | compile_dictlist_file(ctx, path, "emoji"); | 
													
												
													
														|  |  | compile_dictlist_file(path, "extra"); |  |  | compile_dictlist_file(ctx, path, "extra"); | 
													
												
													
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | compile_dictlist_end(f_out); |  |  | compile_dictlist_end(ctx, f_out); | 
													
												
													
													
												
													
														|  |  | offset_rules = ftell(f_out); |  |  | offset_rules = ftell(f_out); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | fprintf(f_log, "Compiling: '%s'\n", fname_in); |  |  | fprintf(ctx->f_log, "Compiling: '%s'\n", fname_in); | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | espeak_ng_STATUS status = compile_dictrules(f_in, f_out, fname_temp, context); |  |  | espeak_ng_STATUS status = compile_dictrules(ctx, f_in, f_out, fname_temp, context); | 
													
												
													
													
												
													
														|  |  | fclose(f_in); |  |  | fclose(f_in); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | fseek(f_out, 4, SEEK_SET); |  |  | fseek(f_out, 4, SEEK_SET); | 
													
												
													
														|  |  | Write4Bytes(f_out, offset_rules); |  |  | Write4Bytes(f_out, offset_rules); | 
													
												
													
														|  |  | fclose(f_out); |  |  | fclose(f_out); | 
													
												
													
														|  |  | fflush(f_log); |  |  | fflush(ctx->f_log); | 
													
												
													
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | if (status != ENS_OK) |  |  | if (status != ENS_OK) { | 
													
												
													
													
												
													
														|  |  |  |  |  | clean_context(ctx); | 
													
												
													
														|  |  | return status; |  |  | return status; | 
													
												
													
														|  |  |  |  |  | } | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | LoadDictionary(translator, dict_name, 0); |  |  | LoadDictionary(translator, dict_name, 0); | 
													
												
													
														|  |  | 
 |  |  | 
 | 
													
												
													
														|  |  | return error_count > 0 ? ENS_COMPILE_ERROR : ENS_OK; |  |  | status = ctx->error_count > 0 ? ENS_COMPILE_ERROR : ENS_OK; | 
													
												
													
													
												
													
														|  |  |  |  |  | clean_context(ctx); | 
													
												
													
														|  |  |  |  |  | return status; | 
													
												
													
														|  |  | } |  |  | } | 
													
												
													
														|  |  | #pragma GCC visibility pop |  |  | #pragma GCC visibility pop |