Code cleanup: relocate functions & remove unused #definesmaster
| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <string.h> | #include <string.h> | ||||
| #include <sys/stat.h> | #include <sys/stat.h> | ||||
| #include <wctype.h> | |||||
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
| #include <ucd/ucd.h> | |||||
| #include "common.h" | #include "common.h" | ||||
| #include "translate.h" | |||||
| #pragma GCC visibility push(default) | #pragma GCC visibility push(default) | ||||
| strncpy(to, from, size); | strncpy(to, from, size); | ||||
| to[size-1] = 0; | to[size-1] = 0; | ||||
| } | } | ||||
| int utf8_in(int *c, const char *buf) | |||||
| { | |||||
| /* Read a unicode characater from a UTF8 string | |||||
| * Returns the number of UTF8 bytes used. | |||||
| * buf: position of buffer is moved, if character is read | |||||
| * c: holds UTF-16 representation of multibyte character by | |||||
| * skipping UTF-8 header bits of bytes in following way: | |||||
| * 2-byte character "ā": | |||||
| * hex binary | |||||
| * c481 1100010010000001 | |||||
| * | 11000100 000001 | |||||
| * V \ \ | | | |||||
| * 0101 0000000100000001 | |||||
| * 3-byte character "ꙅ": | |||||
| * ea9985 111010101001100110000101 | |||||
| * 1010 011001 000101 | |||||
| * | + +--.\ \ | | | |||||
| * V `--. \`. `.| | | |||||
| * A645 1010011001000101 | |||||
| * 4-byte character "𠜎": | |||||
| * f0a09c8e 11110000101000001001110010001110 | |||||
| * V 000 100000 011100 001110 | |||||
| * 02070e 000000100000011100001110 | |||||
| */ | |||||
| return utf8_in2(c, buf, 0); | |||||
| } | |||||
| #pragma GCC visibility pop | #pragma GCC visibility pop | ||||
| int utf8_out(unsigned int c, char *buf) | |||||
| { | |||||
| // write a UTF-16 character into a buffer as UTF-8 | |||||
| // returns the number of bytes written | |||||
| int n_bytes; | |||||
| int j; | |||||
| int shift; | |||||
| static const char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 }; | |||||
| if (c < 0x80) { | |||||
| buf[0] = c; | |||||
| return 1; | |||||
| } | |||||
| if (c >= 0x110000) { | |||||
| buf[0] = ' '; // out of range character code | |||||
| return 1; | |||||
| } | |||||
| if (c < 0x0800) | |||||
| n_bytes = 1; | |||||
| else if (c < 0x10000) | |||||
| n_bytes = 2; | |||||
| else | |||||
| n_bytes = 3; | |||||
| shift = 6*n_bytes; | |||||
| buf[0] = code[n_bytes] | (c >> shift); | |||||
| for (j = 0; j < n_bytes; j++) { | |||||
| shift -= 6; | |||||
| buf[j+1] = 0x80 + ((c >> shift) & 0x3f); | |||||
| } | |||||
| return n_bytes+1; | |||||
| } | |||||
| int utf8_in2(int *c, const char *buf, int backwards) | |||||
| { | |||||
| // Reads a unicode characater from a UTF8 string | |||||
| // Returns the number of UTF8 bytes used. | |||||
| // c: holds integer representation of multibyte character | |||||
| // buf: position of buffer is moved, if character is read | |||||
| // backwards: set if we are moving backwards through the UTF8 string | |||||
| int c1; | |||||
| int n_bytes; | |||||
| int ix; | |||||
| static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||||
| // find the start of the next/previous character | |||||
| while ((*buf & 0xc0) == 0x80) { | |||||
| // skip over non-initial bytes of a multi-byte utf8 character | |||||
| if (backwards) | |||||
| buf--; | |||||
| else | |||||
| buf++; | |||||
| } | |||||
| n_bytes = 0; | |||||
| if ((c1 = *buf++) & 0x80) { | |||||
| if ((c1 & 0xe0) == 0xc0) | |||||
| n_bytes = 1; | |||||
| else if ((c1 & 0xf0) == 0xe0) | |||||
| n_bytes = 2; | |||||
| else if ((c1 & 0xf8) == 0xf0) | |||||
| n_bytes = 3; | |||||
| c1 &= mask[n_bytes]; | |||||
| for (ix = 0; ix < n_bytes; ix++) | |||||
| { | |||||
| if (!*buf) | |||||
| /* Oops, truncated */ | |||||
| break; | |||||
| c1 = (c1 << 6) + (*buf++ & 0x3f); | |||||
| } | |||||
| n_bytes = ix; | |||||
| } | |||||
| *c = c1; | |||||
| return n_bytes+1; | |||||
| } | |||||
| int IsAlpha(unsigned int c) | |||||
| { | |||||
| // Replacement for iswalph() which also checks for some in-word symbols | |||||
| static const unsigned short extra_indic_alphas[] = { | |||||
| 0xa70, 0xa71, // Gurmukhi: tippi, addak | |||||
| 0 | |||||
| }; | |||||
| if (iswalpha(c)) | |||||
| return 1; | |||||
| if (c < 0x300) | |||||
| return 0; | |||||
| if ((c >= 0x901) && (c <= 0xdf7)) { | |||||
| // Indic scripts: Devanagari, Tamil, etc | |||||
| if ((c & 0x7f) < 0x64) | |||||
| return 1; | |||||
| if (lookupwchar(extra_indic_alphas, c) != 0) | |||||
| return 1; | |||||
| if ((c >= 0xd7a) && (c <= 0xd7f)) | |||||
| return 1; // malaytalam chillu characters | |||||
| return 0; | |||||
| } | |||||
| if ((c >= 0x5b0) && (c <= 0x5c2)) | |||||
| return 1; // Hebrew vowel marks | |||||
| if (c == 0x0605) | |||||
| return 1; | |||||
| if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||||
| return 1; // arabic vowel marks | |||||
| if ((c >= 0x300) && (c <= 0x36f)) | |||||
| return 1; // combining accents | |||||
| if ((c >= 0xf40) && (c <= 0xfbc)) | |||||
| return 1; // tibetan | |||||
| if ((c >= 0x1100) && (c <= 0x11ff)) | |||||
| return 1; // Korean jamo | |||||
| if ((c >= 0x2800) && (c <= 0x28ff)) | |||||
| return 1; // braille | |||||
| if ((c > 0x3040) && (c <= 0xa700)) | |||||
| return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||||
| return 0; | |||||
| } | |||||
| // brackets, also 0x2014 to 0x021f which don't need to be in this list | |||||
| static const unsigned short brackets[] = { | |||||
| '(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||||
| 0xab, 0xbb, // double angle brackets | |||||
| 0x300a, 0x300b, // double angle brackets (ideograph) | |||||
| 0xe000+'<', // private usage area | |||||
| 0 | |||||
| }; | |||||
| int IsBracket(int c) | |||||
| { | |||||
| if ((c >= 0x2014) && (c <= 0x201f)) | |||||
| return 1; | |||||
| return lookupwchar(brackets, c); | |||||
| } | |||||
| int IsDigit09(unsigned int c) | |||||
| { | |||||
| if ((c >= '0') && (c <= '9')) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int IsDigit(unsigned int c) | |||||
| { | |||||
| if (iswdigit(c)) | |||||
| return 1; | |||||
| if ((c >= 0x966) && (c <= 0x96f)) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int IsSpace(unsigned int c) | |||||
| { | |||||
| if (c == 0) | |||||
| return 0; | |||||
| if ((c >= 0x2500) && (c < 0x25a0)) | |||||
| return 1; // box drawing characters | |||||
| if ((c >= 0xfff9) && (c <= 0xffff)) | |||||
| return 1; // unicode specials | |||||
| return iswspace(c); | |||||
| } | |||||
| int isspace2(unsigned int c) | |||||
| { | |||||
| // can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||||
| int c2; | |||||
| if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||||
| return 0; | |||||
| return 1; | |||||
| } | |||||
| int is_str_totally_null(const char* str, int size) { | |||||
| // Tests if all bytes of str are null up to size | |||||
| // This should never be reimplemented with integers, because | |||||
| // this function has to work with unaligned char* | |||||
| // (casting to int when unaligned may result in ungaranteed behaviors) | |||||
| return (*str == 0 && memcmp(str, str+1, size-1) == 0); | |||||
| } | |||||
| int Read4Bytes(FILE *f) | |||||
| { | |||||
| // Read 4 bytes (least significant first) into a word | |||||
| int ix; | |||||
| unsigned char c; | |||||
| int acc = 0; | |||||
| for (ix = 0; ix < 4; ix++) { | |||||
| c = fgetc(f) & 0xff; | |||||
| acc += (c << (ix*8)); | |||||
| } | |||||
| return acc; | |||||
| } | |||||
| int towlower2(unsigned int c, Translator *translator) | |||||
| { | |||||
| // check for non-standard upper to lower case conversions | |||||
| if (c == 'I' && translator->langopts.dotless_i) | |||||
| return 0x131; // I -> ı | |||||
| return ucd_tolower(c); | |||||
| } | |||||
| #define ESPEAK_NG_COMMON_H | #define ESPEAK_NG_COMMON_H | ||||
| #include "espeak-ng/espeak_ng.h" | #include "espeak-ng/espeak_ng.h" | ||||
| #include "translate.h" | |||||
| extern ESPEAK_NG_API int GetFileLength(const char *filename); | extern ESPEAK_NG_API int GetFileLength(const char *filename); | ||||
| extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | ||||
| int IsAlpha(unsigned int c); | |||||
| int IsBracket(int c); | |||||
| int IsDigit(unsigned int c); | |||||
| int IsDigit09(unsigned int c); | |||||
| int IsSpace(unsigned int c); | |||||
| int isspace2(unsigned int c); | |||||
| int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null | |||||
| int Read4Bytes(FILE *f); | |||||
| int towlower2(unsigned int c, Translator *translator); // Supports Turkish I | |||||
| ESPEAK_NG_API int utf8_in(int *c, const char *buf); | |||||
| int utf8_in2(int *c, const char *buf, int backwards); | |||||
| int utf8_out(unsigned int c, char *buf); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| #endif | #endif |
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
| #include "common.h" // for GetFileLength, strncpy0 | |||||
| #include "common.h" // for GetFileLength, strncpy0, ... | |||||
| #include "error.h" // for create_file_error_context | #include "error.h" // for create_file_error_context | ||||
| #include "mnemonics.h" // for LookupMnemName, MNEM_TAB | #include "mnemonics.h" // for LookupMnemName, MNEM_TAB | ||||
| #include "phoneme.h" // for PHONEME_TAB, PHONEME_TAB_LIST | #include "phoneme.h" // for PHONEME_TAB, PHONEME_TAB_LIST | ||||
| #include "readclause.h" // for Read4Bytes | |||||
| #include "spect.h" // for SpectFrame, peak_t, SpectSeq | #include "spect.h" // for SpectFrame, peak_t, SpectSeq | ||||
| #include "speech.h" // for path_home, GetFileLength | #include "speech.h" // for path_home, GetFileLength | ||||
| #include "synthdata.h" // for LoadPhData | #include "synthdata.h" // for LoadPhData |
| #include "error.h" // for create_file_error_context | #include "error.h" // for create_file_error_context | ||||
| #include "mnemonics.h" // for LookupMnemName, MNEM_TAB | #include "mnemonics.h" // for LookupMnemName, MNEM_TAB | ||||
| #include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | #include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | ||||
| #include "readclause.h" // for towlower2 | |||||
| #include "speech.h" // for path_home | #include "speech.h" // for path_home | ||||
| #include "synthesize.h" // for Write4Bytes | #include "synthesize.h" // for Write4Bytes | ||||
| #include "translate.h" // for isspace2, IsDigit09, utf8_in, utf8_out | |||||
| static FILE *f_log = NULL; | static FILE *f_log = NULL; | ||||
| #include "dictionary.h" | #include "dictionary.h" | ||||
| #include "numbers.h" // for LookupAccentedLetter, Look... | #include "numbers.h" // for LookupAccentedLetter, Look... | ||||
| #include "phoneme.h" // for PHONEME_TAB, phVOWEL, phon... | #include "phoneme.h" // for PHONEME_TAB, phVOWEL, phon... | ||||
| #include "readclause.h" // for WordToString2, is_str_tota... | |||||
| #include "readclause.h" // for WordToString2 | |||||
| #include "speech.h" // for path_home | #include "speech.h" // for path_home | ||||
| #include "compiledict.h" // for DecodeRule | #include "compiledict.h" // for DecodeRule | ||||
| #include "synthdata.h" // for PhonemeCode, InterpretPhoneme | #include "synthdata.h" // for PhonemeCode, InterpretPhoneme | ||||
| #include "synthesize.h" // for STRESS_IS_PRIMARY, phoneme... | #include "synthesize.h" // for STRESS_IS_PRIMARY, phoneme... | ||||
| #include "translate.h" // for Translator, utf8_in, LANGU... | #include "translate.h" // for Translator, utf8_in, LANGU... | ||||
| static int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out); | |||||
| typedef struct { | typedef struct { | ||||
| int points; | int points; | ||||
| const char *phonemes; | const char *phonemes; | ||||
| return IsLetter(tr, letter, LETTERGP_VOWEL2); | return IsLetter(tr, letter, LETTERGP_VOWEL2); | ||||
| } | } | ||||
| static int Unpronouncable2(Translator *tr, char *word) | |||||
| { | |||||
| int c; | |||||
| int end_flags; | |||||
| char ph_buf[N_WORD_PHONEMES]; | |||||
| ph_buf[0] = 0; | |||||
| c = word[-1]; | |||||
| word[-1] = ' '; // ensure there is a space before the "word" | |||||
| end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL); | |||||
| word[-1] = c; | |||||
| if ((end_flags == 0) || (end_flags & SUFX_UNPRON)) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int Unpronouncable(Translator *tr, char *word, int posn) | |||||
| { | |||||
| /* Determines whether a word in 'unpronouncable', i.e. whether it should | |||||
| be spoken as individual letters. | |||||
| This function may be language specific. This is a generic version. | |||||
| */ | |||||
| int c; | |||||
| int c1 = 0; | |||||
| int vowel_posn = 9; | |||||
| int index; | |||||
| int count; | |||||
| ALPHABET *alphabet; | |||||
| utf8_in(&c, word); | |||||
| if ((tr->letter_bits_offset > 0) && (c < 0x241)) { | |||||
| // Latin characters for a language with a non-latin alphabet | |||||
| return 0; // so we can re-translate the word as English | |||||
| } | |||||
| if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) { | |||||
| // Character is not in our alphabet | |||||
| return 0; | |||||
| } | |||||
| if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1) | |||||
| return 0; | |||||
| if (((c = *word) == ' ') || (c == 0) || (c == '\'')) | |||||
| return 0; | |||||
| index = 0; | |||||
| count = 0; | |||||
| for (;;) { | |||||
| index += utf8_in(&c, &word[index]); | |||||
| if ((c == 0) || (c == ' ')) | |||||
| break; | |||||
| if ((c == '\'') && ((count > 1) || (posn > 0))) | |||||
| break; // "tv'" but not "l'" | |||||
| if (count == 0) | |||||
| c1 = c; | |||||
| if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) { | |||||
| // don't count apostrophe | |||||
| } else | |||||
| count++; | |||||
| if (IsVowel(tr, c)) { | |||||
| vowel_posn = count; // position of the first vowel | |||||
| break; | |||||
| } | |||||
| if ((c != '\'') && !iswalpha(c)) | |||||
| return 0; | |||||
| } | |||||
| if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) { | |||||
| // Lookup unpronounable rules in *_rules | |||||
| return Unpronouncable2(tr, word); | |||||
| } | |||||
| if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE]) | |||||
| vowel_posn--; // disregard this as the initial letter when counting | |||||
| if (vowel_posn > (tr->langopts.max_initial_consonants+1)) | |||||
| return 1; // no vowel, or no vowel in first few letters | |||||
| return 0; | |||||
| } | |||||
| static int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control) | |||||
| int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control) | |||||
| { | { | ||||
| // control = 1, set stress to 1 for forced unstressed vowels | // control = 1, set stress to 1 for forced unstressed vowels | ||||
| unsigned char phcode; | unsigned char phcode; | ||||
| return max_stress; | return max_stress; | ||||
| } | } | ||||
| static char stress_phonemes[] = { | |||||
| const char stress_phonemes[] = { | |||||
| phonSTRESS_D, phonSTRESS_U, phonSTRESS_2, phonSTRESS_3, | phonSTRESS_D, phonSTRESS_U, phonSTRESS_2, phonSTRESS_3, | ||||
| phonSTRESS_P, phonSTRESS_P2, phonSTRESS_TONIC | phonSTRESS_P, phonSTRESS_P2, phonSTRESS_TONIC | ||||
| }; | }; | ||||
| void ChangeWordStress(Translator *tr, char *word, int new_stress) | |||||
| { | |||||
| int ix; | |||||
| unsigned char *p; | |||||
| int max_stress; | |||||
| int vowel_count; // num of vowels + 1 | |||||
| int stressed_syllable = 0; // position of stressed syllable | |||||
| unsigned char phonetic[N_WORD_PHONEMES]; | |||||
| signed char vowel_stress[N_WORD_PHONEMES/2]; | |||||
| strcpy((char *)phonetic, word); | |||||
| max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0); | |||||
| if (new_stress >= STRESS_IS_PRIMARY) { | |||||
| // promote to primary stress | |||||
| for (ix = 1; ix < vowel_count; ix++) { | |||||
| if (vowel_stress[ix] >= max_stress) { | |||||
| vowel_stress[ix] = new_stress; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| // remove primary stress | |||||
| for (ix = 1; ix < vowel_count; ix++) { | |||||
| if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1) | |||||
| vowel_stress[ix] = new_stress; | |||||
| } | |||||
| } | |||||
| // write out phonemes | |||||
| ix = 1; | |||||
| p = phonetic; | |||||
| while (*p != 0) { | |||||
| if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) { | |||||
| if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED)) | |||||
| *word++ = stress_phonemes[(unsigned char)vowel_stress[ix]]; | |||||
| ix++; | |||||
| } | |||||
| *word++ = *p++; | |||||
| } | |||||
| *word = 0; | |||||
| } | |||||
| void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control) | void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control) | ||||
| { | { | ||||
| /* Guess stress pattern of word. This is language specific | /* Guess stress pattern of word. This is language specific | ||||
| if (letter == 0xe000+'(') { | if (letter == 0xe000+'(') { | ||||
| if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]) | if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]) | ||||
| pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]; // a bracket, already spoken by AnnouncePunctuation() | pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]; // a bracket, already spoken by AnnouncePunctuation() | ||||
| } | |||||
| } | |||||
| if (IsBracket(letter)) { | if (IsBracket(letter)) { | ||||
| if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE]) | if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE]) | ||||
| pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE]; | pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE]; | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags) | |||||
| { | |||||
| // apply after the translation is complete | |||||
| int ix; | |||||
| int len; | |||||
| char *p; | |||||
| len = strlen(phonemes); | |||||
| if (tr->langopts.param[LOPT_ALT] & 2) { | |||||
| for (ix = 0; ix < (len-1); ix++) { | |||||
| if (phonemes[ix] == phonSTRESS_P) { | |||||
| p = &phonemes[ix+1]; | |||||
| if ((dict_flags & FLAG_ALT2_TRANS) != 0) { | |||||
| if (*p == PhonemeCode('E')) | |||||
| *p = PhonemeCode('e'); | |||||
| if (*p == PhonemeCode('O')) | |||||
| *p = PhonemeCode('o'); | |||||
| } else { | |||||
| if (*p == PhonemeCode('e')) | |||||
| *p = PhonemeCode('E'); | |||||
| if (*p == PhonemeCode('o')) | |||||
| *p = PhonemeCode('O'); | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| int TransposeAlphabet(Translator *tr, char *text) | int TransposeAlphabet(Translator *tr, char *text) | ||||
| { | { | ||||
| // transpose cyrillic alphabet (for example) into ascii (single byte) character codes | // transpose cyrillic alphabet (for example) into ascii (single byte) character codes | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| static int utf8_nbytes(const char *buf) | |||||
| { | |||||
| // Returns the number of bytes for the first UTF-8 character in buf | |||||
| unsigned char c = (unsigned char)buf[0]; | |||||
| if (c < 0x80) | |||||
| return 1; | |||||
| if (c < 0xe0) | |||||
| return 2; | |||||
| if (c < 0xf0) | |||||
| return 3; | |||||
| return 4; | |||||
| } | |||||
| /* Lookup a specified word in the word dictionary. | /* Lookup a specified word in the word dictionary. | ||||
| Returns phonetic data in 'phonetic' and bits in 'flags' | Returns phonetic data in 'phonetic' and bits in 'flags' | ||||
| return flags0; | return flags0; | ||||
| } | } | ||||
| int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out) | |||||
| static int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out) | |||||
| { | { | ||||
| char buf[100]; | char buf[100]; | ||||
| static unsigned int flags[2]; | static unsigned int flags[2]; |
| { | { | ||||
| #endif | #endif | ||||
| extern const char stress_phonemes[]; | |||||
| int LoadDictionary(Translator *tr, const char *name, int no_error); | int LoadDictionary(Translator *tr, const char *name, int no_error); | ||||
| int HashDictionary(const char *string); | int HashDictionary(const char *string); | ||||
| const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme); | const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme); | ||||
| void DecodePhonemes(const char *inptr, char *outptr); | void DecodePhonemes(const char *inptr, char *outptr); | ||||
| char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags); | char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags); | ||||
| const char *GetTranslatedPhonemeString(int phoneme_mode); | const char *GetTranslatedPhonemeString(int phoneme_mode); | ||||
| int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control); | |||||
| int IsVowel(Translator *tr, int letter); | int IsVowel(Translator *tr, int letter); | ||||
| int Unpronouncable(Translator *tr, char *word, int posn); | |||||
| void ChangeWordStress(Translator *tr, char *word, int new_stress); | |||||
| void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control); | void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control); | ||||
| void AppendPhonemes(Translator *tr, char *string, int size, const char *ph); | void AppendPhonemes(Translator *tr, char *string, int size, const char *ph); | ||||
| int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, char *end_phonemes, int word_flags, unsigned int *dict_flags); | int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, char *end_phonemes, int word_flags, unsigned int *dict_flags); | ||||
| int TransposeAlphabet(Translator *tr, char *text); | int TransposeAlphabet(Translator *tr, char *text); | ||||
| int Lookup(Translator *tr, const char *word, char *ph_out); | int Lookup(Translator *tr, const char *word, char *ph_out); | ||||
| int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab); | int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab); | ||||
| int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out); | |||||
| int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy); | int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus |
| #define Rnpp 10 | #define Rnpp 10 | ||||
| #define R1p 11 | #define R1p 11 | ||||
| #define R2p 12 | #define R2p 12 | ||||
| #define R3p 13 | |||||
| #define R4p 14 | |||||
| #define R5p 15 | |||||
| #define R6p 16 | #define R6p 16 | ||||
| #define RGL 17 | #define RGL 17 |
| #include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
| #include "numbers.h" | #include "numbers.h" | ||||
| #include "common.h" | |||||
| #include "dictionary.h" // for Lookup, TranslateRules, EncodePhonemes, Look... | #include "dictionary.h" // for Lookup, TranslateRules, EncodePhonemes, Look... | ||||
| #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonEND_WORD, phonP... | #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonEND_WORD, phonP... | ||||
| #include "readclause.h" // for WordToString2, towlower2 | |||||
| #include "readclause.h" // for WordToString2 | |||||
| #include "synthdata.h" // for SelectPhonemeTable | #include "synthdata.h" // for SelectPhonemeTable | ||||
| #include "synthesize.h" // for phoneme_tab | #include "synthesize.h" // for phoneme_tab | ||||
| #include "translate.h" // for Translator, LANGUAGE_OPTIONS, IsDigit09, WOR... | |||||
| #include "translate.h" // for Translator, LANGUAGE_OPTIONS, WOR... | |||||
| #include "voice.h" // for voice, voice_t | #include "voice.h" // for voice, voice_t | ||||
| #define M_LIGATURE 0x8000 | #define M_LIGATURE 0x8000 |
| return CLAUSE_NONE; | return CLAUSE_NONE; | ||||
| } | } | ||||
| int is_str_totally_null(const char* str, int size) { | |||||
| // Tests if all bytes of str are null up to size | |||||
| // This should never be reimplemented with integers, because | |||||
| // this function has to work with unaligned char* | |||||
| // (casting to int when unaligned may result in ungaranteed behaviors) | |||||
| return (*str == 0 && memcmp(str, str+1, size-1) == 0); | |||||
| } | |||||
| int towlower2(unsigned int c, Translator *translator) | |||||
| { | |||||
| // check for non-standard upper to lower case conversions | |||||
| if (c == 'I' && translator->langopts.dotless_i) | |||||
| return 0x131; // I -> ı | |||||
| return ucd_tolower(c); | |||||
| } | |||||
| static int IsRomanU(unsigned int c) | static int IsRomanU(unsigned int c) | ||||
| { | { | ||||
| if ((c == 'I') || (c == 'V') || (c == 'X') || (c == 'L')) | if ((c == 'I') || (c == 'V') || (c == 'X') || (c == 'L')) | ||||
| return buf; | return buf; | ||||
| } | } | ||||
| int Read4Bytes(FILE *f) | |||||
| { | |||||
| // Read 4 bytes (least significant first) into a word | |||||
| int ix; | |||||
| unsigned char c; | |||||
| int acc = 0; | |||||
| for (ix = 0; ix < 4; ix++) { | |||||
| c = fgetc(f) & 0xff; | |||||
| acc += (c << (ix*8)); | |||||
| } | |||||
| return acc; | |||||
| } | |||||
| static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause) | static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause) | ||||
| { | { | ||||
| // announce punctuation names | // announce punctuation names |
| extern PARAM_STACK param_stack[]; | extern PARAM_STACK param_stack[]; | ||||
| // Tests if all bytes of str up to size are null | |||||
| int is_str_totally_null(const char* str, int size); | |||||
| int clause_type_from_codepoint(uint32_t c); | int clause_type_from_codepoint(uint32_t c); | ||||
| int towlower2(unsigned int c, Translator *translator); // Supports Turkish I | |||||
| int Eof(void); | int Eof(void); | ||||
| const char *WordToString2(unsigned int word); | const char *WordToString2(unsigned int word); | ||||
| int Read4Bytes(FILE *f); | |||||
| int AddNameData(const char *name, | int AddNameData(const char *name, | ||||
| int wide); | int wide); | ||||
| int ReadClause(Translator *tr, | int ReadClause(Translator *tr, |
| #include "soundicon.h" | #include "soundicon.h" | ||||
| #include "common.h" // for GetFileLength | #include "common.h" // for GetFileLength | ||||
| #include "error.h" // for create_file_error_context | #include "error.h" // for create_file_error_context | ||||
| #include "readclause.h" // for Read4Bytes | |||||
| #include "speech.h" // for path_home, PATHSEP | #include "speech.h" // for path_home, PATHSEP | ||||
| #include "synthesize.h" // for samplerate | #include "synthesize.h" // for samplerate | ||||
| #define FRAME_WIDTH 1000 // max width for 8000kHz frame | #define FRAME_WIDTH 1000 // max width for 8000kHz frame | ||||
| #define MAX_DISPLAY_FREQ 9500 | #define MAX_DISPLAY_FREQ 9500 | ||||
| #define FRAME_HEIGHT 240 | |||||
| #define T_AMPLITUDE 308 | #define T_AMPLITUDE 308 | ||||
| #define T_AV 312 | #define T_AV 312 |
| #include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | #include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | ||||
| #include "soundicon.h" // for LoadSoundFile2 | #include "soundicon.h" // for LoadSoundFile2 | ||||
| #include "synthesize.h" // for SPEED_FACTORS, speed | #include "synthesize.h" // for SPEED_FACTORS, speed | ||||
| #include "translate.h" // for CTRL_EMBEDDED, IsDigit09, utf8_out | |||||
| #include "translate.h" // for CTRL_EMBEDDED | |||||
| #include "voice.h" // for SelectVoice, SelectVoiceByName | #include "voice.h" // for SelectVoice, SelectVoiceByName | ||||
| #include "speech.h" // for MAKE_MEM_UNDEFINED | #include "speech.h" // for MAKE_MEM_UNDEFINED | ||||
| #include "dictionary.h" | #include "dictionary.h" | ||||
| #include "mbrola.h" | #include "mbrola.h" | ||||
| #include "readclause.h" | |||||
| #include "setlengths.h" | #include "setlengths.h" | ||||
| #include "synthdata.h" | #include "synthdata.h" | ||||
| #include "wavegen.h" | #include "wavegen.h" |
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| #include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
| #include "common.h" | |||||
| #include "setlengths.h" // for SetLengthMods | #include "setlengths.h" // for SetLengthMods | ||||
| #include "translate.h" // for Translator, LANGUAGE_OPTIONS, L, NUM... | #include "translate.h" // for Translator, LANGUAGE_OPTIONS, L, NUM... | ||||
| #include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
| #include "translate.h" | #include "translate.h" | ||||
| #include "common.h" | |||||
| #include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | #include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | ||||
| #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | ||||
| #include "phonemelist.h" // for MakePhonemeList | #include "phonemelist.h" // for MakePhonemeList | ||||
| int n_replace_phonemes; | int n_replace_phonemes; | ||||
| REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | ||||
| // brackets, also 0x2014 to 0x021f which don't need to be in this list | |||||
| static const unsigned short brackets[] = { | |||||
| '(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||||
| 0xab, 0xbb, // double angle brackets | |||||
| 0x300a, 0x300b, // double angle brackets (ideograph) | |||||
| 0xe000+'<', // private usage area | |||||
| 0 | |||||
| }; | |||||
| // other characters which break a word, but don't produce a pause | // other characters which break a word, but don't produce a pause | ||||
| static const unsigned short breaks[] = { '_', 0 }; | static const unsigned short breaks[] = { '_', 0 }; | ||||
| int IsAlpha(unsigned int c) | |||||
| { | |||||
| // Replacement for iswalph() which also checks for some in-word symbols | |||||
| static const unsigned short extra_indic_alphas[] = { | |||||
| 0xa70, 0xa71, // Gurmukhi: tippi, addak | |||||
| 0 | |||||
| }; | |||||
| if (iswalpha(c)) | |||||
| return 1; | |||||
| if (c < 0x300) | |||||
| return 0; | |||||
| if ((c >= 0x901) && (c <= 0xdf7)) { | |||||
| // Indic scripts: Devanagari, Tamil, etc | |||||
| if ((c & 0x7f) < 0x64) | |||||
| return 1; | |||||
| if (lookupwchar(extra_indic_alphas, c) != 0) | |||||
| return 1; | |||||
| if ((c >= 0xd7a) && (c <= 0xd7f)) | |||||
| return 1; // malaytalam chillu characters | |||||
| return 0; | |||||
| } | |||||
| if ((c >= 0x5b0) && (c <= 0x5c2)) | |||||
| return 1; // Hebrew vowel marks | |||||
| if (c == 0x0605) | |||||
| return 1; | |||||
| if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||||
| return 1; // arabic vowel marks | |||||
| if ((c >= 0x300) && (c <= 0x36f)) | |||||
| return 1; // combining accents | |||||
| if ((c >= 0xf40) && (c <= 0xfbc)) | |||||
| return 1; // tibetan | |||||
| if ((c >= 0x1100) && (c <= 0x11ff)) | |||||
| return 1; // Korean jamo | |||||
| if ((c >= 0x2800) && (c <= 0x28ff)) | |||||
| return 1; // braille | |||||
| if ((c > 0x3040) && (c <= 0xa700)) | |||||
| return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||||
| return 0; | |||||
| } | |||||
| int IsDigit09(unsigned int c) | |||||
| { | |||||
| if ((c >= '0') && (c <= '9')) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int IsDigit(unsigned int c) | |||||
| { | |||||
| if (iswdigit(c)) | |||||
| return 1; | |||||
| if ((c >= 0x966) && (c <= 0x96f)) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| static int IsSpace(unsigned int c) | |||||
| { | |||||
| if (c == 0) | |||||
| return 0; | |||||
| if ((c >= 0x2500) && (c < 0x25a0)) | |||||
| return 1; // box drawing characters | |||||
| if ((c >= 0xfff9) && (c <= 0xffff)) | |||||
| return 1; // unicode specials | |||||
| return iswspace(c); | |||||
| } | |||||
| int isspace2(unsigned int c) | |||||
| { | |||||
| // can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||||
| int c2; | |||||
| if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||||
| return 0; | |||||
| return 1; | |||||
| } | |||||
| void DeleteTranslator(Translator *tr) | void DeleteTranslator(Translator *tr) | ||||
| { | { | ||||
| if (!tr) return; | if (!tr) return; | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| int IsBracket(int c) | |||||
| { | |||||
| if ((c >= 0x2014) && (c <= 0x201f)) | |||||
| return 1; | |||||
| return lookupwchar(brackets, c); | |||||
| } | |||||
| int utf8_nbytes(const char *buf) | |||||
| { | |||||
| // Returns the number of bytes for the first UTF-8 character in buf | |||||
| unsigned char c = (unsigned char)buf[0]; | |||||
| if (c < 0x80) | |||||
| return 1; | |||||
| if (c < 0xe0) | |||||
| return 2; | |||||
| if (c < 0xf0) | |||||
| return 3; | |||||
| return 4; | |||||
| } | |||||
| int utf8_in2(int *c, const char *buf, int backwards) | |||||
| { | |||||
| // Reads a unicode characater from a UTF8 string | |||||
| // Returns the number of UTF8 bytes used. | |||||
| // c: holds integer representation of multibyte character | |||||
| // buf: position of buffer is moved, if character is read | |||||
| // backwards: set if we are moving backwards through the UTF8 string | |||||
| int c1; | |||||
| int n_bytes; | |||||
| int ix; | |||||
| static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||||
| // find the start of the next/previous character | |||||
| while ((*buf & 0xc0) == 0x80) { | |||||
| // skip over non-initial bytes of a multi-byte utf8 character | |||||
| if (backwards) | |||||
| buf--; | |||||
| else | |||||
| buf++; | |||||
| } | |||||
| n_bytes = 0; | |||||
| if ((c1 = *buf++) & 0x80) { | |||||
| if ((c1 & 0xe0) == 0xc0) | |||||
| n_bytes = 1; | |||||
| else if ((c1 & 0xf0) == 0xe0) | |||||
| n_bytes = 2; | |||||
| else if ((c1 & 0xf8) == 0xf0) | |||||
| n_bytes = 3; | |||||
| c1 &= mask[n_bytes]; | |||||
| for (ix = 0; ix < n_bytes; ix++) | |||||
| { | |||||
| if (!*buf) | |||||
| /* Oops, truncated */ | |||||
| break; | |||||
| c1 = (c1 << 6) + (*buf++ & 0x3f); | |||||
| } | |||||
| n_bytes = ix; | |||||
| } | |||||
| *c = c1; | |||||
| return n_bytes+1; | |||||
| } | |||||
| #pragma GCC visibility push(default) | |||||
| int utf8_in(int *c, const char *buf) | |||||
| { | |||||
| /* Read a unicode characater from a UTF8 string | |||||
| * Returns the number of UTF8 bytes used. | |||||
| * buf: position of buffer is moved, if character is read | |||||
| * c: holds UTF-16 representation of multibyte character by | |||||
| * skipping UTF-8 header bits of bytes in following way: | |||||
| * 2-byte character "ā": | |||||
| * hex binary | |||||
| * c481 1100010010000001 | |||||
| * | 11000100 000001 | |||||
| * V \ \ | | | |||||
| * 0101 0000000100000001 | |||||
| * 3-byte character "ꙅ": | |||||
| * ea9985 111010101001100110000101 | |||||
| * 1010 011001 000101 | |||||
| * | + +--.\ \ | | | |||||
| * V `--. \`. `.| | | |||||
| * A645 1010011001000101 | |||||
| * 4-byte character "𠜎": | |||||
| * f0a09c8e 11110000101000001001110010001110 | |||||
| * V 000 100000 011100 001110 | |||||
| * 02070e 000000100000011100001110 | |||||
| */ | |||||
| return utf8_in2(c, buf, 0); | |||||
| } | |||||
| #pragma GCC visibility pop | |||||
| int utf8_out(unsigned int c, char *buf) | |||||
| { | |||||
| // write a UTF-16 character into a buffer as UTF-8 | |||||
| // returns the number of bytes written | |||||
| int n_bytes; | |||||
| int j; | |||||
| int shift; | |||||
| static const char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 }; | |||||
| if (c < 0x80) { | |||||
| buf[0] = c; | |||||
| return 1; | |||||
| } | |||||
| if (c >= 0x110000) { | |||||
| buf[0] = ' '; // out of range character code | |||||
| return 1; | |||||
| } | |||||
| if (c < 0x0800) | |||||
| n_bytes = 1; | |||||
| else if (c < 0x10000) | |||||
| n_bytes = 2; | |||||
| else | |||||
| n_bytes = 3; | |||||
| shift = 6*n_bytes; | |||||
| buf[0] = code[n_bytes] | (c >> shift); | |||||
| for (j = 0; j < n_bytes; j++) { | |||||
| shift -= 6; | |||||
| buf[j+1] = 0x80 + ((c >> shift) & 0x3f); | |||||
| } | |||||
| return n_bytes+1; | |||||
| } | |||||
| char *strchr_w(const char *s, int c) | char *strchr_w(const char *s, int c) | ||||
| { | { | ||||
| // return NULL for any non-ascii character | // return NULL for any non-ascii character |
| #define FLAG_ALT_TRANS 0x8000 // language specific | #define FLAG_ALT_TRANS 0x8000 // language specific | ||||
| #define FLAG_ALT2_TRANS 0x10000 // language specific | #define FLAG_ALT2_TRANS 0x10000 // language specific | ||||
| #define FLAG_ALT3_TRANS 0x20000 // language specific | #define FLAG_ALT3_TRANS 0x20000 // language specific | ||||
| #define FLAG_ALT4_TRANS 0x40000 // language specific | |||||
| #define FLAG_ALT5_TRANS 0x80000 // language specific | |||||
| #define FLAG_ALT6_TRANS 0x100000 // language specific | |||||
| #define FLAG_ALT7_TRANS 0x200000 // language specific | #define FLAG_ALT7_TRANS 0x200000 // language specific | ||||
| #define FLAG_COMBINE 0x800000 // combine with the next word | #define FLAG_COMBINE 0x800000 // combine with the next word | ||||
| #define LEADING_2_BITS 0xC0 // 0b11000000 | #define LEADING_2_BITS 0xC0 // 0b11000000 | ||||
| #define UTF8_TAIL_BITS 0x80 // 0b10000000 | #define UTF8_TAIL_BITS 0x80 // 0b10000000 | ||||
| ESPEAK_NG_API int utf8_in(int *c, const char *buf); | |||||
| int utf8_in2(int *c, const char *buf, int backwards); | |||||
| int utf8_out(unsigned int c, char *buf); | |||||
| int utf8_nbytes(const char *buf); | |||||
| int lookupwchar(const unsigned short *list, int c); | int lookupwchar(const unsigned short *list, int c); | ||||
| char *strchr_w(const char *s, int c); | char *strchr_w(const char *s, int c); | ||||
| int IsBracket(int c); | |||||
| void InitNamedata(void); | void InitNamedata(void); | ||||
| void InitText(int flags); | void InitText(int flags); | ||||
| void InitText2(void); | void InitText2(void); | ||||
| int IsDigit(unsigned int c); | |||||
| int IsDigit09(unsigned int c); | |||||
| int IsAlpha(unsigned int c); | |||||
| int isspace2(unsigned int c); | |||||
| ALPHABET *AlphabetFromChar(int c); | ALPHABET *AlphabetFromChar(int c); | ||||
| Translator *SelectTranslator(const char *name); | Translator *SelectTranslator(const char *name); | ||||
| void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | ||||
| void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | |||||
| int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out); | int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out); | ||||
| void TranslateClause(Translator *tr, int *tone, char **voice_change); | void TranslateClause(Translator *tr, int *tone, char **voice_change); | ||||
| #include "translate.h" | #include "translate.h" | ||||
| #include "translateword.h" | #include "translateword.h" | ||||
| #include "common.h" // for strncpy0 | #include "common.h" // for strncpy0 | ||||
| #include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | |||||
| #include "dictionary.h" // for TranslateRules, LookupDictList | |||||
| #include "numbers.h" // for SetSpellingStress, ... | #include "numbers.h" // for SetSpellingStress, ... | ||||
| #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | ||||
| #include "readclause.h" // for towlower2 | #include "readclause.h" // for towlower2 | ||||
| static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | ||||
| static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | |||||
| static void ChangeWordStress(Translator *tr, char *word, int new_stress); | |||||
| static int CheckDottedAbbrev(char *word1); | static int CheckDottedAbbrev(char *word1); | ||||
| static int NonAsciiNumber(int letter); | static int NonAsciiNumber(int letter); | ||||
| static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); | static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); | ||||
| static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); | static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); | ||||
| static int Unpronouncable(Translator *tr, char *word, int posn); | |||||
| static int Unpronouncable2(Translator *tr, char *word); | |||||
| int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | ||||
| { | { | ||||
| } | } | ||||
| void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags) | |||||
| { | |||||
| // apply after the translation is complete | |||||
| int ix; | |||||
| int len; | |||||
| char *p; | |||||
| len = strlen(phonemes); | |||||
| if (tr->langopts.param[LOPT_ALT] & 2) { | |||||
| for (ix = 0; ix < (len-1); ix++) { | |||||
| if (phonemes[ix] == phonSTRESS_P) { | |||||
| p = &phonemes[ix+1]; | |||||
| if ((dict_flags & FLAG_ALT2_TRANS) != 0) { | |||||
| if (*p == PhonemeCode('E')) | |||||
| *p = PhonemeCode('e'); | |||||
| if (*p == PhonemeCode('O')) | |||||
| *p = PhonemeCode('o'); | |||||
| } else { | |||||
| if (*p == PhonemeCode('e')) | |||||
| *p = PhonemeCode('E'); | |||||
| if (*p == PhonemeCode('o')) | |||||
| *p = PhonemeCode('O'); | |||||
| } | |||||
| break; | |||||
| } | |||||
| } | |||||
| } | |||||
| } | |||||
| static void ChangeWordStress(Translator *tr, char *word, int new_stress) | |||||
| { | |||||
| int ix; | |||||
| unsigned char *p; | |||||
| int max_stress; | |||||
| int vowel_count; // num of vowels + 1 | |||||
| int stressed_syllable = 0; // position of stressed syllable | |||||
| unsigned char phonetic[N_WORD_PHONEMES]; | |||||
| signed char vowel_stress[N_WORD_PHONEMES/2]; | |||||
| strcpy((char *)phonetic, word); | |||||
| max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0); | |||||
| if (new_stress >= STRESS_IS_PRIMARY) { | |||||
| // promote to primary stress | |||||
| for (ix = 1; ix < vowel_count; ix++) { | |||||
| if (vowel_stress[ix] >= max_stress) { | |||||
| vowel_stress[ix] = new_stress; | |||||
| break; | |||||
| } | |||||
| } | |||||
| } else { | |||||
| // remove primary stress | |||||
| for (ix = 1; ix < vowel_count; ix++) { | |||||
| if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1) | |||||
| vowel_stress[ix] = new_stress; | |||||
| } | |||||
| } | |||||
| // write out phonemes | |||||
| ix = 1; | |||||
| p = phonetic; | |||||
| while (*p != 0) { | |||||
| if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) { | |||||
| if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED)) | |||||
| *word++ = stress_phonemes[(unsigned char)vowel_stress[ix]]; | |||||
| ix++; | |||||
| } | |||||
| *word++ = *p++; | |||||
| } | |||||
| *word = 0; | |||||
| } | |||||
| static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) | static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) | ||||
| { | { | ||||
| int posn = 0; | int posn = 0; | ||||
| } | } | ||||
| return -1; | return -1; | ||||
| } | } | ||||
| static int Unpronouncable(Translator *tr, char *word, int posn) | |||||
| { | |||||
| /* Determines whether a word in 'unpronouncable', i.e. whether it should | |||||
| be spoken as individual letters. | |||||
| This function may be language specific. This is a generic version. | |||||
| */ | |||||
| int c; | |||||
| int c1 = 0; | |||||
| int vowel_posn = 9; | |||||
| int index; | |||||
| int count; | |||||
| ALPHABET *alphabet; | |||||
| utf8_in(&c, word); | |||||
| if ((tr->letter_bits_offset > 0) && (c < 0x241)) { | |||||
| // Latin characters for a language with a non-latin alphabet | |||||
| return 0; // so we can re-translate the word as English | |||||
| } | |||||
| if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) { | |||||
| // Character is not in our alphabet | |||||
| return 0; | |||||
| } | |||||
| if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1) | |||||
| return 0; | |||||
| if (((c = *word) == ' ') || (c == 0) || (c == '\'')) | |||||
| return 0; | |||||
| index = 0; | |||||
| count = 0; | |||||
| for (;;) { | |||||
| index += utf8_in(&c, &word[index]); | |||||
| if ((c == 0) || (c == ' ')) | |||||
| break; | |||||
| if ((c == '\'') && ((count > 1) || (posn > 0))) | |||||
| break; // "tv'" but not "l'" | |||||
| if (count == 0) | |||||
| c1 = c; | |||||
| if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) { | |||||
| // don't count apostrophe | |||||
| } else | |||||
| count++; | |||||
| if (IsVowel(tr, c)) { | |||||
| vowel_posn = count; // position of the first vowel | |||||
| break; | |||||
| } | |||||
| if ((c != '\'') && !iswalpha(c)) | |||||
| return 0; | |||||
| } | |||||
| if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) { | |||||
| // Lookup unpronounable rules in *_rules | |||||
| return Unpronouncable2(tr, word); | |||||
| } | |||||
| if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE]) | |||||
| vowel_posn--; // disregard this as the initial letter when counting | |||||
| if (vowel_posn > (tr->langopts.max_initial_consonants+1)) | |||||
| return 1; // no vowel, or no vowel in first few letters | |||||
| return 0; | |||||
| } | |||||
| static int Unpronouncable2(Translator *tr, char *word) | |||||
| { | |||||
| int c; | |||||
| int end_flags; | |||||
| char ph_buf[N_WORD_PHONEMES]; | |||||
| ph_buf[0] = 0; | |||||
| c = word[-1]; | |||||
| word[-1] = ' '; // ensure there is a space before the "word" | |||||
| end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL); | |||||
| word[-1] = c; | |||||
| if ((end_flags == 0) || (end_flags & SUFX_UNPRON)) | |||||
| return 1; | |||||
| return 0; | |||||
| } |