Code cleanup: relocate functions & remove unused #definesmaster
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> | #include <string.h> | ||||
#include <sys/stat.h> | #include <sys/stat.h> | ||||
#include <wctype.h> | |||||
#include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
#include <ucd/ucd.h> | |||||
#include "common.h" | #include "common.h" | ||||
#include "translate.h" | |||||
#pragma GCC visibility push(default) | #pragma GCC visibility push(default) | ||||
strncpy(to, from, size); | strncpy(to, from, size); | ||||
to[size-1] = 0; | to[size-1] = 0; | ||||
} | } | ||||
int utf8_in(int *c, const char *buf) | |||||
{ | |||||
/* Read a unicode characater from a UTF8 string | |||||
* Returns the number of UTF8 bytes used. | |||||
* buf: position of buffer is moved, if character is read | |||||
* c: holds UTF-16 representation of multibyte character by | |||||
* skipping UTF-8 header bits of bytes in following way: | |||||
* 2-byte character "ā": | |||||
* hex binary | |||||
* c481 1100010010000001 | |||||
* | 11000100 000001 | |||||
* V \ \ | | | |||||
* 0101 0000000100000001 | |||||
* 3-byte character "ꙅ": | |||||
* ea9985 111010101001100110000101 | |||||
* 1010 011001 000101 | |||||
* | + +--.\ \ | | | |||||
* V `--. \`. `.| | | |||||
* A645 1010011001000101 | |||||
* 4-byte character "𠜎": | |||||
* f0a09c8e 11110000101000001001110010001110 | |||||
* V 000 100000 011100 001110 | |||||
* 02070e 000000100000011100001110 | |||||
*/ | |||||
return utf8_in2(c, buf, 0); | |||||
} | |||||
#pragma GCC visibility pop | #pragma GCC visibility pop | ||||
int utf8_out(unsigned int c, char *buf) | |||||
{ | |||||
// write a UTF-16 character into a buffer as UTF-8 | |||||
// returns the number of bytes written | |||||
int n_bytes; | |||||
int j; | |||||
int shift; | |||||
static const char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 }; | |||||
if (c < 0x80) { | |||||
buf[0] = c; | |||||
return 1; | |||||
} | |||||
if (c >= 0x110000) { | |||||
buf[0] = ' '; // out of range character code | |||||
return 1; | |||||
} | |||||
if (c < 0x0800) | |||||
n_bytes = 1; | |||||
else if (c < 0x10000) | |||||
n_bytes = 2; | |||||
else | |||||
n_bytes = 3; | |||||
shift = 6*n_bytes; | |||||
buf[0] = code[n_bytes] | (c >> shift); | |||||
for (j = 0; j < n_bytes; j++) { | |||||
shift -= 6; | |||||
buf[j+1] = 0x80 + ((c >> shift) & 0x3f); | |||||
} | |||||
return n_bytes+1; | |||||
} | |||||
int utf8_in2(int *c, const char *buf, int backwards) | |||||
{ | |||||
// Reads a unicode characater from a UTF8 string | |||||
// Returns the number of UTF8 bytes used. | |||||
// c: holds integer representation of multibyte character | |||||
// buf: position of buffer is moved, if character is read | |||||
// backwards: set if we are moving backwards through the UTF8 string | |||||
int c1; | |||||
int n_bytes; | |||||
int ix; | |||||
static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||||
// find the start of the next/previous character | |||||
while ((*buf & 0xc0) == 0x80) { | |||||
// skip over non-initial bytes of a multi-byte utf8 character | |||||
if (backwards) | |||||
buf--; | |||||
else | |||||
buf++; | |||||
} | |||||
n_bytes = 0; | |||||
if ((c1 = *buf++) & 0x80) { | |||||
if ((c1 & 0xe0) == 0xc0) | |||||
n_bytes = 1; | |||||
else if ((c1 & 0xf0) == 0xe0) | |||||
n_bytes = 2; | |||||
else if ((c1 & 0xf8) == 0xf0) | |||||
n_bytes = 3; | |||||
c1 &= mask[n_bytes]; | |||||
for (ix = 0; ix < n_bytes; ix++) | |||||
{ | |||||
if (!*buf) | |||||
/* Oops, truncated */ | |||||
break; | |||||
c1 = (c1 << 6) + (*buf++ & 0x3f); | |||||
} | |||||
n_bytes = ix; | |||||
} | |||||
*c = c1; | |||||
return n_bytes+1; | |||||
} | |||||
int IsAlpha(unsigned int c) | |||||
{ | |||||
// Replacement for iswalph() which also checks for some in-word symbols | |||||
static const unsigned short extra_indic_alphas[] = { | |||||
0xa70, 0xa71, // Gurmukhi: tippi, addak | |||||
0 | |||||
}; | |||||
if (iswalpha(c)) | |||||
return 1; | |||||
if (c < 0x300) | |||||
return 0; | |||||
if ((c >= 0x901) && (c <= 0xdf7)) { | |||||
// Indic scripts: Devanagari, Tamil, etc | |||||
if ((c & 0x7f) < 0x64) | |||||
return 1; | |||||
if (lookupwchar(extra_indic_alphas, c) != 0) | |||||
return 1; | |||||
if ((c >= 0xd7a) && (c <= 0xd7f)) | |||||
return 1; // malaytalam chillu characters | |||||
return 0; | |||||
} | |||||
if ((c >= 0x5b0) && (c <= 0x5c2)) | |||||
return 1; // Hebrew vowel marks | |||||
if (c == 0x0605) | |||||
return 1; | |||||
if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||||
return 1; // arabic vowel marks | |||||
if ((c >= 0x300) && (c <= 0x36f)) | |||||
return 1; // combining accents | |||||
if ((c >= 0xf40) && (c <= 0xfbc)) | |||||
return 1; // tibetan | |||||
if ((c >= 0x1100) && (c <= 0x11ff)) | |||||
return 1; // Korean jamo | |||||
if ((c >= 0x2800) && (c <= 0x28ff)) | |||||
return 1; // braille | |||||
if ((c > 0x3040) && (c <= 0xa700)) | |||||
return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||||
return 0; | |||||
} | |||||
// brackets, also 0x2014 to 0x021f which don't need to be in this list | |||||
static const unsigned short brackets[] = { | |||||
'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||||
0xab, 0xbb, // double angle brackets | |||||
0x300a, 0x300b, // double angle brackets (ideograph) | |||||
0xe000+'<', // private usage area | |||||
0 | |||||
}; | |||||
int IsBracket(int c) | |||||
{ | |||||
if ((c >= 0x2014) && (c <= 0x201f)) | |||||
return 1; | |||||
return lookupwchar(brackets, c); | |||||
} | |||||
int IsDigit09(unsigned int c) | |||||
{ | |||||
if ((c >= '0') && (c <= '9')) | |||||
return 1; | |||||
return 0; | |||||
} | |||||
int IsDigit(unsigned int c) | |||||
{ | |||||
if (iswdigit(c)) | |||||
return 1; | |||||
if ((c >= 0x966) && (c <= 0x96f)) | |||||
return 1; | |||||
return 0; | |||||
} | |||||
int IsSpace(unsigned int c) | |||||
{ | |||||
if (c == 0) | |||||
return 0; | |||||
if ((c >= 0x2500) && (c < 0x25a0)) | |||||
return 1; // box drawing characters | |||||
if ((c >= 0xfff9) && (c <= 0xffff)) | |||||
return 1; // unicode specials | |||||
return iswspace(c); | |||||
} | |||||
int isspace2(unsigned int c) | |||||
{ | |||||
// can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||||
int c2; | |||||
if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||||
return 0; | |||||
return 1; | |||||
} | |||||
int is_str_totally_null(const char* str, int size) { | |||||
// Tests if all bytes of str are null up to size | |||||
// This should never be reimplemented with integers, because | |||||
// this function has to work with unaligned char* | |||||
// (casting to int when unaligned may result in ungaranteed behaviors) | |||||
return (*str == 0 && memcmp(str, str+1, size-1) == 0); | |||||
} | |||||
int Read4Bytes(FILE *f) | |||||
{ | |||||
// Read 4 bytes (least significant first) into a word | |||||
int ix; | |||||
unsigned char c; | |||||
int acc = 0; | |||||
for (ix = 0; ix < 4; ix++) { | |||||
c = fgetc(f) & 0xff; | |||||
acc += (c << (ix*8)); | |||||
} | |||||
return acc; | |||||
} | |||||
int towlower2(unsigned int c, Translator *translator) | |||||
{ | |||||
// check for non-standard upper to lower case conversions | |||||
if (c == 'I' && translator->langopts.dotless_i) | |||||
return 0x131; // I -> ı | |||||
return ucd_tolower(c); | |||||
} | |||||
#define ESPEAK_NG_COMMON_H | #define ESPEAK_NG_COMMON_H | ||||
#include "espeak-ng/espeak_ng.h" | #include "espeak-ng/espeak_ng.h" | ||||
#include "translate.h" | |||||
extern ESPEAK_NG_API int GetFileLength(const char *filename); | extern ESPEAK_NG_API int GetFileLength(const char *filename); | ||||
extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | ||||
int IsAlpha(unsigned int c); | |||||
int IsBracket(int c); | |||||
int IsDigit(unsigned int c); | |||||
int IsDigit09(unsigned int c); | |||||
int IsSpace(unsigned int c); | |||||
int isspace2(unsigned int c); | |||||
int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null | |||||
int Read4Bytes(FILE *f); | |||||
int towlower2(unsigned int c, Translator *translator); // Supports Turkish I | |||||
ESPEAK_NG_API int utf8_in(int *c, const char *buf); | |||||
int utf8_in2(int *c, const char *buf, int backwards); | |||||
int utf8_out(unsigned int c, char *buf); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } | ||||
#endif | #endif |
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
#include "common.h" // for GetFileLength, strncpy0 | |||||
#include "common.h" // for GetFileLength, strncpy0, ... | |||||
#include "error.h" // for create_file_error_context | #include "error.h" // for create_file_error_context | ||||
#include "mnemonics.h" // for LookupMnemName, MNEM_TAB | #include "mnemonics.h" // for LookupMnemName, MNEM_TAB | ||||
#include "phoneme.h" // for PHONEME_TAB, PHONEME_TAB_LIST | #include "phoneme.h" // for PHONEME_TAB, PHONEME_TAB_LIST | ||||
#include "readclause.h" // for Read4Bytes | |||||
#include "spect.h" // for SpectFrame, peak_t, SpectSeq | #include "spect.h" // for SpectFrame, peak_t, SpectSeq | ||||
#include "speech.h" // for path_home, GetFileLength | #include "speech.h" // for path_home, GetFileLength | ||||
#include "synthdata.h" // for LoadPhData | #include "synthdata.h" // for LoadPhData |
#include "error.h" // for create_file_error_context | #include "error.h" // for create_file_error_context | ||||
#include "mnemonics.h" // for LookupMnemName, MNEM_TAB | #include "mnemonics.h" // for LookupMnemName, MNEM_TAB | ||||
#include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | #include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | ||||
#include "readclause.h" // for towlower2 | |||||
#include "speech.h" // for path_home | #include "speech.h" // for path_home | ||||
#include "synthesize.h" // for Write4Bytes | #include "synthesize.h" // for Write4Bytes | ||||
#include "translate.h" // for isspace2, IsDigit09, utf8_in, utf8_out | |||||
static FILE *f_log = NULL; | static FILE *f_log = NULL; | ||||
#include "dictionary.h" | #include "dictionary.h" | ||||
#include "numbers.h" // for LookupAccentedLetter, Look... | #include "numbers.h" // for LookupAccentedLetter, Look... | ||||
#include "phoneme.h" // for PHONEME_TAB, phVOWEL, phon... | #include "phoneme.h" // for PHONEME_TAB, phVOWEL, phon... | ||||
#include "readclause.h" // for WordToString2, is_str_tota... | |||||
#include "readclause.h" // for WordToString2 | |||||
#include "speech.h" // for path_home | #include "speech.h" // for path_home | ||||
#include "compiledict.h" // for DecodeRule | #include "compiledict.h" // for DecodeRule | ||||
#include "synthdata.h" // for PhonemeCode, InterpretPhoneme | #include "synthdata.h" // for PhonemeCode, InterpretPhoneme | ||||
#include "synthesize.h" // for STRESS_IS_PRIMARY, phoneme... | #include "synthesize.h" // for STRESS_IS_PRIMARY, phoneme... | ||||
#include "translate.h" // for Translator, utf8_in, LANGU... | #include "translate.h" // for Translator, utf8_in, LANGU... | ||||
static int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out); | |||||
typedef struct { | typedef struct { | ||||
int points; | int points; | ||||
const char *phonemes; | const char *phonemes; | ||||
return IsLetter(tr, letter, LETTERGP_VOWEL2); | return IsLetter(tr, letter, LETTERGP_VOWEL2); | ||||
} | } | ||||
static int Unpronouncable2(Translator *tr, char *word) | |||||
{ | |||||
int c; | |||||
int end_flags; | |||||
char ph_buf[N_WORD_PHONEMES]; | |||||
ph_buf[0] = 0; | |||||
c = word[-1]; | |||||
word[-1] = ' '; // ensure there is a space before the "word" | |||||
end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL); | |||||
word[-1] = c; | |||||
if ((end_flags == 0) || (end_flags & SUFX_UNPRON)) | |||||
return 1; | |||||
return 0; | |||||
} | |||||
int Unpronouncable(Translator *tr, char *word, int posn) | |||||
{ | |||||
/* Determines whether a word in 'unpronouncable', i.e. whether it should | |||||
be spoken as individual letters. | |||||
This function may be language specific. This is a generic version. | |||||
*/ | |||||
int c; | |||||
int c1 = 0; | |||||
int vowel_posn = 9; | |||||
int index; | |||||
int count; | |||||
ALPHABET *alphabet; | |||||
utf8_in(&c, word); | |||||
if ((tr->letter_bits_offset > 0) && (c < 0x241)) { | |||||
// Latin characters for a language with a non-latin alphabet | |||||
return 0; // so we can re-translate the word as English | |||||
} | |||||
if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) { | |||||
// Character is not in our alphabet | |||||
return 0; | |||||
} | |||||
if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1) | |||||
return 0; | |||||
if (((c = *word) == ' ') || (c == 0) || (c == '\'')) | |||||
return 0; | |||||
index = 0; | |||||
count = 0; | |||||
for (;;) { | |||||
index += utf8_in(&c, &word[index]); | |||||
if ((c == 0) || (c == ' ')) | |||||
break; | |||||
if ((c == '\'') && ((count > 1) || (posn > 0))) | |||||
break; // "tv'" but not "l'" | |||||
if (count == 0) | |||||
c1 = c; | |||||
if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) { | |||||
// don't count apostrophe | |||||
} else | |||||
count++; | |||||
if (IsVowel(tr, c)) { | |||||
vowel_posn = count; // position of the first vowel | |||||
break; | |||||
} | |||||
if ((c != '\'') && !iswalpha(c)) | |||||
return 0; | |||||
} | |||||
if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) { | |||||
// Lookup unpronounable rules in *_rules | |||||
return Unpronouncable2(tr, word); | |||||
} | |||||
if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE]) | |||||
vowel_posn--; // disregard this as the initial letter when counting | |||||
if (vowel_posn > (tr->langopts.max_initial_consonants+1)) | |||||
return 1; // no vowel, or no vowel in first few letters | |||||
return 0; | |||||
} | |||||
static int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control) | |||||
int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control) | |||||
{ | { | ||||
// control = 1, set stress to 1 for forced unstressed vowels | // control = 1, set stress to 1 for forced unstressed vowels | ||||
unsigned char phcode; | unsigned char phcode; | ||||
return max_stress; | return max_stress; | ||||
} | } | ||||
static char stress_phonemes[] = { | |||||
const char stress_phonemes[] = { | |||||
phonSTRESS_D, phonSTRESS_U, phonSTRESS_2, phonSTRESS_3, | phonSTRESS_D, phonSTRESS_U, phonSTRESS_2, phonSTRESS_3, | ||||
phonSTRESS_P, phonSTRESS_P2, phonSTRESS_TONIC | phonSTRESS_P, phonSTRESS_P2, phonSTRESS_TONIC | ||||
}; | }; | ||||
void ChangeWordStress(Translator *tr, char *word, int new_stress) | |||||
{ | |||||
int ix; | |||||
unsigned char *p; | |||||
int max_stress; | |||||
int vowel_count; // num of vowels + 1 | |||||
int stressed_syllable = 0; // position of stressed syllable | |||||
unsigned char phonetic[N_WORD_PHONEMES]; | |||||
signed char vowel_stress[N_WORD_PHONEMES/2]; | |||||
strcpy((char *)phonetic, word); | |||||
max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0); | |||||
if (new_stress >= STRESS_IS_PRIMARY) { | |||||
// promote to primary stress | |||||
for (ix = 1; ix < vowel_count; ix++) { | |||||
if (vowel_stress[ix] >= max_stress) { | |||||
vowel_stress[ix] = new_stress; | |||||
break; | |||||
} | |||||
} | |||||
} else { | |||||
// remove primary stress | |||||
for (ix = 1; ix < vowel_count; ix++) { | |||||
if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1) | |||||
vowel_stress[ix] = new_stress; | |||||
} | |||||
} | |||||
// write out phonemes | |||||
ix = 1; | |||||
p = phonetic; | |||||
while (*p != 0) { | |||||
if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) { | |||||
if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED)) | |||||
*word++ = stress_phonemes[(unsigned char)vowel_stress[ix]]; | |||||
ix++; | |||||
} | |||||
*word++ = *p++; | |||||
} | |||||
*word = 0; | |||||
} | |||||
void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control) | void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control) | ||||
{ | { | ||||
/* Guess stress pattern of word. This is language specific | /* Guess stress pattern of word. This is language specific | ||||
if (letter == 0xe000+'(') { | if (letter == 0xe000+'(') { | ||||
if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]) | if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]) | ||||
pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]; // a bracket, already spoken by AnnouncePunctuation() | pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE_ANNOUNCED]; // a bracket, already spoken by AnnouncePunctuation() | ||||
} | |||||
} | |||||
if (IsBracket(letter)) { | if (IsBracket(letter)) { | ||||
if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE]) | if (pre_pause < tr->langopts.param[LOPT_BRACKET_PAUSE]) | ||||
pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE]; | pre_pause = tr->langopts.param[LOPT_BRACKET_PAUSE]; | ||||
return 0; | return 0; | ||||
} | } | ||||
void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags) | |||||
{ | |||||
// apply after the translation is complete | |||||
int ix; | |||||
int len; | |||||
char *p; | |||||
len = strlen(phonemes); | |||||
if (tr->langopts.param[LOPT_ALT] & 2) { | |||||
for (ix = 0; ix < (len-1); ix++) { | |||||
if (phonemes[ix] == phonSTRESS_P) { | |||||
p = &phonemes[ix+1]; | |||||
if ((dict_flags & FLAG_ALT2_TRANS) != 0) { | |||||
if (*p == PhonemeCode('E')) | |||||
*p = PhonemeCode('e'); | |||||
if (*p == PhonemeCode('O')) | |||||
*p = PhonemeCode('o'); | |||||
} else { | |||||
if (*p == PhonemeCode('e')) | |||||
*p = PhonemeCode('E'); | |||||
if (*p == PhonemeCode('o')) | |||||
*p = PhonemeCode('O'); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
int TransposeAlphabet(Translator *tr, char *text) | int TransposeAlphabet(Translator *tr, char *text) | ||||
{ | { | ||||
// transpose cyrillic alphabet (for example) into ascii (single byte) character codes | // transpose cyrillic alphabet (for example) into ascii (single byte) character codes | ||||
return 0; | return 0; | ||||
} | } | ||||
static int utf8_nbytes(const char *buf) | |||||
{ | |||||
// Returns the number of bytes for the first UTF-8 character in buf | |||||
unsigned char c = (unsigned char)buf[0]; | |||||
if (c < 0x80) | |||||
return 1; | |||||
if (c < 0xe0) | |||||
return 2; | |||||
if (c < 0xf0) | |||||
return 3; | |||||
return 4; | |||||
} | |||||
/* Lookup a specified word in the word dictionary. | /* Lookup a specified word in the word dictionary. | ||||
Returns phonetic data in 'phonetic' and bits in 'flags' | Returns phonetic data in 'phonetic' and bits in 'flags' | ||||
return flags0; | return flags0; | ||||
} | } | ||||
int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out) | |||||
static int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out) | |||||
{ | { | ||||
char buf[100]; | char buf[100]; | ||||
static unsigned int flags[2]; | static unsigned int flags[2]; |
{ | { | ||||
#endif | #endif | ||||
extern const char stress_phonemes[]; | |||||
int LoadDictionary(Translator *tr, const char *name, int no_error); | int LoadDictionary(Translator *tr, const char *name, int no_error); | ||||
int HashDictionary(const char *string); | int HashDictionary(const char *string); | ||||
const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme); | const char *EncodePhonemes(const char *p, char *outptr, int *bad_phoneme); | ||||
void DecodePhonemes(const char *inptr, char *outptr); | void DecodePhonemes(const char *inptr, char *outptr); | ||||
char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags); | char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags); | ||||
const char *GetTranslatedPhonemeString(int phoneme_mode); | const char *GetTranslatedPhonemeString(int phoneme_mode); | ||||
int GetVowelStress(Translator *tr, unsigned char *phonemes, signed char *vowel_stress, int *vowel_count, int *stressed_syllable, int control); | |||||
int IsVowel(Translator *tr, int letter); | int IsVowel(Translator *tr, int letter); | ||||
int Unpronouncable(Translator *tr, char *word, int posn); | |||||
void ChangeWordStress(Translator *tr, char *word, int new_stress); | |||||
void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control); | void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int control); | ||||
void AppendPhonemes(Translator *tr, char *string, int size, const char *ph); | void AppendPhonemes(Translator *tr, char *string, int size, const char *ph); | ||||
int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, char *end_phonemes, int word_flags, unsigned int *dict_flags); | int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, char *end_phonemes, int word_flags, unsigned int *dict_flags); | ||||
int TransposeAlphabet(Translator *tr, char *text); | int TransposeAlphabet(Translator *tr, char *text); | ||||
int Lookup(Translator *tr, const char *word, char *ph_out); | int Lookup(Translator *tr, const char *word, char *ph_out); | ||||
int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab); | int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab); | ||||
int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out); | |||||
int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy); | int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy); | ||||
#ifdef __cplusplus | #ifdef __cplusplus |
#define Rnpp 10 | #define Rnpp 10 | ||||
#define R1p 11 | #define R1p 11 | ||||
#define R2p 12 | #define R2p 12 | ||||
#define R3p 13 | |||||
#define R4p 14 | |||||
#define R5p 15 | |||||
#define R6p 16 | #define R6p 16 | ||||
#define RGL 17 | #define RGL 17 |
#include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
#include "numbers.h" | #include "numbers.h" | ||||
#include "common.h" | |||||
#include "dictionary.h" // for Lookup, TranslateRules, EncodePhonemes, Look... | #include "dictionary.h" // for Lookup, TranslateRules, EncodePhonemes, Look... | ||||
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonEND_WORD, phonP... | #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonEND_WORD, phonP... | ||||
#include "readclause.h" // for WordToString2, towlower2 | |||||
#include "readclause.h" // for WordToString2 | |||||
#include "synthdata.h" // for SelectPhonemeTable | #include "synthdata.h" // for SelectPhonemeTable | ||||
#include "synthesize.h" // for phoneme_tab | #include "synthesize.h" // for phoneme_tab | ||||
#include "translate.h" // for Translator, LANGUAGE_OPTIONS, IsDigit09, WOR... | |||||
#include "translate.h" // for Translator, LANGUAGE_OPTIONS, WOR... | |||||
#include "voice.h" // for voice, voice_t | #include "voice.h" // for voice, voice_t | ||||
#define M_LIGATURE 0x8000 | #define M_LIGATURE 0x8000 |
return CLAUSE_NONE; | return CLAUSE_NONE; | ||||
} | } | ||||
int is_str_totally_null(const char* str, int size) { | |||||
// Tests if all bytes of str are null up to size | |||||
// This should never be reimplemented with integers, because | |||||
// this function has to work with unaligned char* | |||||
// (casting to int when unaligned may result in ungaranteed behaviors) | |||||
return (*str == 0 && memcmp(str, str+1, size-1) == 0); | |||||
} | |||||
int towlower2(unsigned int c, Translator *translator) | |||||
{ | |||||
// check for non-standard upper to lower case conversions | |||||
if (c == 'I' && translator->langopts.dotless_i) | |||||
return 0x131; // I -> ı | |||||
return ucd_tolower(c); | |||||
} | |||||
static int IsRomanU(unsigned int c) | static int IsRomanU(unsigned int c) | ||||
{ | { | ||||
if ((c == 'I') || (c == 'V') || (c == 'X') || (c == 'L')) | if ((c == 'I') || (c == 'V') || (c == 'X') || (c == 'L')) | ||||
return buf; | return buf; | ||||
} | } | ||||
int Read4Bytes(FILE *f) | |||||
{ | |||||
// Read 4 bytes (least significant first) into a word | |||||
int ix; | |||||
unsigned char c; | |||||
int acc = 0; | |||||
for (ix = 0; ix < 4; ix++) { | |||||
c = fgetc(f) & 0xff; | |||||
acc += (c << (ix*8)); | |||||
} | |||||
return acc; | |||||
} | |||||
static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause) | static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause) | ||||
{ | { | ||||
// announce punctuation names | // announce punctuation names |
extern PARAM_STACK param_stack[]; | extern PARAM_STACK param_stack[]; | ||||
// Tests if all bytes of str up to size are null | |||||
int is_str_totally_null(const char* str, int size); | |||||
int clause_type_from_codepoint(uint32_t c); | int clause_type_from_codepoint(uint32_t c); | ||||
int towlower2(unsigned int c, Translator *translator); // Supports Turkish I | |||||
int Eof(void); | int Eof(void); | ||||
const char *WordToString2(unsigned int word); | const char *WordToString2(unsigned int word); | ||||
int Read4Bytes(FILE *f); | |||||
int AddNameData(const char *name, | int AddNameData(const char *name, | ||||
int wide); | int wide); | ||||
int ReadClause(Translator *tr, | int ReadClause(Translator *tr, |
#include "soundicon.h" | #include "soundicon.h" | ||||
#include "common.h" // for GetFileLength | #include "common.h" // for GetFileLength | ||||
#include "error.h" // for create_file_error_context | #include "error.h" // for create_file_error_context | ||||
#include "readclause.h" // for Read4Bytes | |||||
#include "speech.h" // for path_home, PATHSEP | #include "speech.h" // for path_home, PATHSEP | ||||
#include "synthesize.h" // for samplerate | #include "synthesize.h" // for samplerate | ||||
#define FRAME_WIDTH 1000 // max width for 8000kHz frame | #define FRAME_WIDTH 1000 // max width for 8000kHz frame | ||||
#define MAX_DISPLAY_FREQ 9500 | #define MAX_DISPLAY_FREQ 9500 | ||||
#define FRAME_HEIGHT 240 | |||||
#define T_AMPLITUDE 308 | #define T_AMPLITUDE 308 | ||||
#define T_AV 312 | #define T_AV 312 |
#include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | #include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | ||||
#include "soundicon.h" // for LoadSoundFile2 | #include "soundicon.h" // for LoadSoundFile2 | ||||
#include "synthesize.h" // for SPEED_FACTORS, speed | #include "synthesize.h" // for SPEED_FACTORS, speed | ||||
#include "translate.h" // for CTRL_EMBEDDED, IsDigit09, utf8_out | |||||
#include "translate.h" // for CTRL_EMBEDDED | |||||
#include "voice.h" // for SelectVoice, SelectVoiceByName | #include "voice.h" // for SelectVoice, SelectVoiceByName | ||||
#include "speech.h" // for MAKE_MEM_UNDEFINED | #include "speech.h" // for MAKE_MEM_UNDEFINED | ||||
#include "dictionary.h" | #include "dictionary.h" | ||||
#include "mbrola.h" | #include "mbrola.h" | ||||
#include "readclause.h" | |||||
#include "setlengths.h" | #include "setlengths.h" | ||||
#include "synthdata.h" | #include "synthdata.h" | ||||
#include "wavegen.h" | #include "wavegen.h" |
#include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
#include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
#include "common.h" | |||||
#include "setlengths.h" // for SetLengthMods | #include "setlengths.h" // for SetLengthMods | ||||
#include "translate.h" // for Translator, LANGUAGE_OPTIONS, L, NUM... | #include "translate.h" // for Translator, LANGUAGE_OPTIONS, L, NUM... | ||||
#include <espeak-ng/encoding.h> | #include <espeak-ng/encoding.h> | ||||
#include "translate.h" | #include "translate.h" | ||||
#include "common.h" | |||||
#include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | #include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | ||||
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | ||||
#include "phonemelist.h" // for MakePhonemeList | #include "phonemelist.h" // for MakePhonemeList | ||||
int n_replace_phonemes; | int n_replace_phonemes; | ||||
REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | ||||
// brackets, also 0x2014 to 0x021f which don't need to be in this list | |||||
static const unsigned short brackets[] = { | |||||
'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||||
0xab, 0xbb, // double angle brackets | |||||
0x300a, 0x300b, // double angle brackets (ideograph) | |||||
0xe000+'<', // private usage area | |||||
0 | |||||
}; | |||||
// other characters which break a word, but don't produce a pause | // other characters which break a word, but don't produce a pause | ||||
static const unsigned short breaks[] = { '_', 0 }; | static const unsigned short breaks[] = { '_', 0 }; | ||||
int IsAlpha(unsigned int c) | |||||
{ | |||||
// Replacement for iswalph() which also checks for some in-word symbols | |||||
static const unsigned short extra_indic_alphas[] = { | |||||
0xa70, 0xa71, // Gurmukhi: tippi, addak | |||||
0 | |||||
}; | |||||
if (iswalpha(c)) | |||||
return 1; | |||||
if (c < 0x300) | |||||
return 0; | |||||
if ((c >= 0x901) && (c <= 0xdf7)) { | |||||
// Indic scripts: Devanagari, Tamil, etc | |||||
if ((c & 0x7f) < 0x64) | |||||
return 1; | |||||
if (lookupwchar(extra_indic_alphas, c) != 0) | |||||
return 1; | |||||
if ((c >= 0xd7a) && (c <= 0xd7f)) | |||||
return 1; // malaytalam chillu characters | |||||
return 0; | |||||
} | |||||
if ((c >= 0x5b0) && (c <= 0x5c2)) | |||||
return 1; // Hebrew vowel marks | |||||
if (c == 0x0605) | |||||
return 1; | |||||
if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||||
return 1; // arabic vowel marks | |||||
if ((c >= 0x300) && (c <= 0x36f)) | |||||
return 1; // combining accents | |||||
if ((c >= 0xf40) && (c <= 0xfbc)) | |||||
return 1; // tibetan | |||||
if ((c >= 0x1100) && (c <= 0x11ff)) | |||||
return 1; // Korean jamo | |||||
if ((c >= 0x2800) && (c <= 0x28ff)) | |||||
return 1; // braille | |||||
if ((c > 0x3040) && (c <= 0xa700)) | |||||
return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||||
return 0; | |||||
} | |||||
int IsDigit09(unsigned int c) | |||||
{ | |||||
if ((c >= '0') && (c <= '9')) | |||||
return 1; | |||||
return 0; | |||||
} | |||||
int IsDigit(unsigned int c) | |||||
{ | |||||
if (iswdigit(c)) | |||||
return 1; | |||||
if ((c >= 0x966) && (c <= 0x96f)) | |||||
return 1; | |||||
return 0; | |||||
} | |||||
static int IsSpace(unsigned int c) | |||||
{ | |||||
if (c == 0) | |||||
return 0; | |||||
if ((c >= 0x2500) && (c < 0x25a0)) | |||||
return 1; // box drawing characters | |||||
if ((c >= 0xfff9) && (c <= 0xffff)) | |||||
return 1; // unicode specials | |||||
return iswspace(c); | |||||
} | |||||
int isspace2(unsigned int c) | |||||
{ | |||||
// can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||||
int c2; | |||||
if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||||
return 0; | |||||
return 1; | |||||
} | |||||
void DeleteTranslator(Translator *tr) | void DeleteTranslator(Translator *tr) | ||||
{ | { | ||||
if (!tr) return; | if (!tr) return; | ||||
return 0; | return 0; | ||||
} | } | ||||
int IsBracket(int c) | |||||
{ | |||||
if ((c >= 0x2014) && (c <= 0x201f)) | |||||
return 1; | |||||
return lookupwchar(brackets, c); | |||||
} | |||||
int utf8_nbytes(const char *buf) | |||||
{ | |||||
// Returns the number of bytes for the first UTF-8 character in buf | |||||
unsigned char c = (unsigned char)buf[0]; | |||||
if (c < 0x80) | |||||
return 1; | |||||
if (c < 0xe0) | |||||
return 2; | |||||
if (c < 0xf0) | |||||
return 3; | |||||
return 4; | |||||
} | |||||
int utf8_in2(int *c, const char *buf, int backwards) | |||||
{ | |||||
// Reads a unicode characater from a UTF8 string | |||||
// Returns the number of UTF8 bytes used. | |||||
// c: holds integer representation of multibyte character | |||||
// buf: position of buffer is moved, if character is read | |||||
// backwards: set if we are moving backwards through the UTF8 string | |||||
int c1; | |||||
int n_bytes; | |||||
int ix; | |||||
static const unsigned char mask[4] = { 0xff, 0x1f, 0x0f, 0x07 }; | |||||
// find the start of the next/previous character | |||||
while ((*buf & 0xc0) == 0x80) { | |||||
// skip over non-initial bytes of a multi-byte utf8 character | |||||
if (backwards) | |||||
buf--; | |||||
else | |||||
buf++; | |||||
} | |||||
n_bytes = 0; | |||||
if ((c1 = *buf++) & 0x80) { | |||||
if ((c1 & 0xe0) == 0xc0) | |||||
n_bytes = 1; | |||||
else if ((c1 & 0xf0) == 0xe0) | |||||
n_bytes = 2; | |||||
else if ((c1 & 0xf8) == 0xf0) | |||||
n_bytes = 3; | |||||
c1 &= mask[n_bytes]; | |||||
for (ix = 0; ix < n_bytes; ix++) | |||||
{ | |||||
if (!*buf) | |||||
/* Oops, truncated */ | |||||
break; | |||||
c1 = (c1 << 6) + (*buf++ & 0x3f); | |||||
} | |||||
n_bytes = ix; | |||||
} | |||||
*c = c1; | |||||
return n_bytes+1; | |||||
} | |||||
#pragma GCC visibility push(default) | |||||
int utf8_in(int *c, const char *buf) | |||||
{ | |||||
/* Read a unicode characater from a UTF8 string | |||||
* Returns the number of UTF8 bytes used. | |||||
* buf: position of buffer is moved, if character is read | |||||
* c: holds UTF-16 representation of multibyte character by | |||||
* skipping UTF-8 header bits of bytes in following way: | |||||
* 2-byte character "ā": | |||||
* hex binary | |||||
* c481 1100010010000001 | |||||
* | 11000100 000001 | |||||
* V \ \ | | | |||||
* 0101 0000000100000001 | |||||
* 3-byte character "ꙅ": | |||||
* ea9985 111010101001100110000101 | |||||
* 1010 011001 000101 | |||||
* | + +--.\ \ | | | |||||
* V `--. \`. `.| | | |||||
* A645 1010011001000101 | |||||
* 4-byte character "𠜎": | |||||
* f0a09c8e 11110000101000001001110010001110 | |||||
* V 000 100000 011100 001110 | |||||
* 02070e 000000100000011100001110 | |||||
*/ | |||||
return utf8_in2(c, buf, 0); | |||||
} | |||||
#pragma GCC visibility pop | |||||
int utf8_out(unsigned int c, char *buf) | |||||
{ | |||||
// write a UTF-16 character into a buffer as UTF-8 | |||||
// returns the number of bytes written | |||||
int n_bytes; | |||||
int j; | |||||
int shift; | |||||
static const char unsigned code[4] = { 0, 0xc0, 0xe0, 0xf0 }; | |||||
if (c < 0x80) { | |||||
buf[0] = c; | |||||
return 1; | |||||
} | |||||
if (c >= 0x110000) { | |||||
buf[0] = ' '; // out of range character code | |||||
return 1; | |||||
} | |||||
if (c < 0x0800) | |||||
n_bytes = 1; | |||||
else if (c < 0x10000) | |||||
n_bytes = 2; | |||||
else | |||||
n_bytes = 3; | |||||
shift = 6*n_bytes; | |||||
buf[0] = code[n_bytes] | (c >> shift); | |||||
for (j = 0; j < n_bytes; j++) { | |||||
shift -= 6; | |||||
buf[j+1] = 0x80 + ((c >> shift) & 0x3f); | |||||
} | |||||
return n_bytes+1; | |||||
} | |||||
char *strchr_w(const char *s, int c) | char *strchr_w(const char *s, int c) | ||||
{ | { | ||||
// return NULL for any non-ascii character | // return NULL for any non-ascii character |
#define FLAG_ALT_TRANS 0x8000 // language specific | #define FLAG_ALT_TRANS 0x8000 // language specific | ||||
#define FLAG_ALT2_TRANS 0x10000 // language specific | #define FLAG_ALT2_TRANS 0x10000 // language specific | ||||
#define FLAG_ALT3_TRANS 0x20000 // language specific | #define FLAG_ALT3_TRANS 0x20000 // language specific | ||||
#define FLAG_ALT4_TRANS 0x40000 // language specific | |||||
#define FLAG_ALT5_TRANS 0x80000 // language specific | |||||
#define FLAG_ALT6_TRANS 0x100000 // language specific | |||||
#define FLAG_ALT7_TRANS 0x200000 // language specific | #define FLAG_ALT7_TRANS 0x200000 // language specific | ||||
#define FLAG_COMBINE 0x800000 // combine with the next word | #define FLAG_COMBINE 0x800000 // combine with the next word | ||||
#define LEADING_2_BITS 0xC0 // 0b11000000 | #define LEADING_2_BITS 0xC0 // 0b11000000 | ||||
#define UTF8_TAIL_BITS 0x80 // 0b10000000 | #define UTF8_TAIL_BITS 0x80 // 0b10000000 | ||||
ESPEAK_NG_API int utf8_in(int *c, const char *buf); | |||||
int utf8_in2(int *c, const char *buf, int backwards); | |||||
int utf8_out(unsigned int c, char *buf); | |||||
int utf8_nbytes(const char *buf); | |||||
int lookupwchar(const unsigned short *list, int c); | int lookupwchar(const unsigned short *list, int c); | ||||
char *strchr_w(const char *s, int c); | char *strchr_w(const char *s, int c); | ||||
int IsBracket(int c); | |||||
void InitNamedata(void); | void InitNamedata(void); | ||||
void InitText(int flags); | void InitText(int flags); | ||||
void InitText2(void); | void InitText2(void); | ||||
int IsDigit(unsigned int c); | |||||
int IsDigit09(unsigned int c); | |||||
int IsAlpha(unsigned int c); | |||||
int isspace2(unsigned int c); | |||||
ALPHABET *AlphabetFromChar(int c); | ALPHABET *AlphabetFromChar(int c); | ||||
Translator *SelectTranslator(const char *name); | Translator *SelectTranslator(const char *name); | ||||
void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len); | ||||
void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | |||||
int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out); | int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out); | ||||
void TranslateClause(Translator *tr, int *tone, char **voice_change); | void TranslateClause(Translator *tr, int *tone, char **voice_change); | ||||
#include "translate.h" | #include "translate.h" | ||||
#include "translateword.h" | #include "translateword.h" | ||||
#include "common.h" // for strncpy0 | #include "common.h" // for strncpy0 | ||||
#include "dictionary.h" // for TranslateRules, LookupDictList, Cha... | |||||
#include "dictionary.h" // for TranslateRules, LookupDictList | |||||
#include "numbers.h" // for SetSpellingStress, ... | #include "numbers.h" // for SetSpellingStress, ... | ||||
#include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_... | ||||
#include "readclause.h" // for towlower2 | #include "readclause.h" // for towlower2 | ||||
static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes); | ||||
static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags); | |||||
static void ChangeWordStress(Translator *tr, char *word, int new_stress); | |||||
static int CheckDottedAbbrev(char *word1); | static int CheckDottedAbbrev(char *word1); | ||||
static int NonAsciiNumber(int letter); | static int NonAsciiNumber(int letter); | ||||
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); | static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]); | ||||
static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); | static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, ALPHABET *current_alphabet); | ||||
static int Unpronouncable(Translator *tr, char *word, int posn); | |||||
static int Unpronouncable2(Translator *tr, char *word); | |||||
int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes) | ||||
{ | { | ||||
} | } | ||||
void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags) | |||||
{ | |||||
// apply after the translation is complete | |||||
int ix; | |||||
int len; | |||||
char *p; | |||||
len = strlen(phonemes); | |||||
if (tr->langopts.param[LOPT_ALT] & 2) { | |||||
for (ix = 0; ix < (len-1); ix++) { | |||||
if (phonemes[ix] == phonSTRESS_P) { | |||||
p = &phonemes[ix+1]; | |||||
if ((dict_flags & FLAG_ALT2_TRANS) != 0) { | |||||
if (*p == PhonemeCode('E')) | |||||
*p = PhonemeCode('e'); | |||||
if (*p == PhonemeCode('O')) | |||||
*p = PhonemeCode('o'); | |||||
} else { | |||||
if (*p == PhonemeCode('e')) | |||||
*p = PhonemeCode('E'); | |||||
if (*p == PhonemeCode('o')) | |||||
*p = PhonemeCode('O'); | |||||
} | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
} | |||||
static void ChangeWordStress(Translator *tr, char *word, int new_stress) | |||||
{ | |||||
int ix; | |||||
unsigned char *p; | |||||
int max_stress; | |||||
int vowel_count; // num of vowels + 1 | |||||
int stressed_syllable = 0; // position of stressed syllable | |||||
unsigned char phonetic[N_WORD_PHONEMES]; | |||||
signed char vowel_stress[N_WORD_PHONEMES/2]; | |||||
strcpy((char *)phonetic, word); | |||||
max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0); | |||||
if (new_stress >= STRESS_IS_PRIMARY) { | |||||
// promote to primary stress | |||||
for (ix = 1; ix < vowel_count; ix++) { | |||||
if (vowel_stress[ix] >= max_stress) { | |||||
vowel_stress[ix] = new_stress; | |||||
break; | |||||
} | |||||
} | |||||
} else { | |||||
// remove primary stress | |||||
for (ix = 1; ix < vowel_count; ix++) { | |||||
if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1) | |||||
vowel_stress[ix] = new_stress; | |||||
} | |||||
} | |||||
// write out phonemes | |||||
ix = 1; | |||||
p = phonetic; | |||||
while (*p != 0) { | |||||
if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) { | |||||
if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED)) | |||||
*word++ = stress_phonemes[(unsigned char)vowel_stress[ix]]; | |||||
ix++; | |||||
} | |||||
*word++ = *p++; | |||||
} | |||||
*word = 0; | |||||
} | |||||
static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) | static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, ALPHABET *current_alphabet, char word_phonemes[]) | ||||
{ | { | ||||
int posn = 0; | int posn = 0; | ||||
} | } | ||||
return -1; | return -1; | ||||
} | } | ||||
static int Unpronouncable(Translator *tr, char *word, int posn) | |||||
{ | |||||
/* Determines whether a word in 'unpronouncable', i.e. whether it should | |||||
be spoken as individual letters. | |||||
This function may be language specific. This is a generic version. | |||||
*/ | |||||
int c; | |||||
int c1 = 0; | |||||
int vowel_posn = 9; | |||||
int index; | |||||
int count; | |||||
ALPHABET *alphabet; | |||||
utf8_in(&c, word); | |||||
if ((tr->letter_bits_offset > 0) && (c < 0x241)) { | |||||
// Latin characters for a language with a non-latin alphabet | |||||
return 0; // so we can re-translate the word as English | |||||
} | |||||
if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) { | |||||
// Character is not in our alphabet | |||||
return 0; | |||||
} | |||||
if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1) | |||||
return 0; | |||||
if (((c = *word) == ' ') || (c == 0) || (c == '\'')) | |||||
return 0; | |||||
index = 0; | |||||
count = 0; | |||||
for (;;) { | |||||
index += utf8_in(&c, &word[index]); | |||||
if ((c == 0) || (c == ' ')) | |||||
break; | |||||
if ((c == '\'') && ((count > 1) || (posn > 0))) | |||||
break; // "tv'" but not "l'" | |||||
if (count == 0) | |||||
c1 = c; | |||||
if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) { | |||||
// don't count apostrophe | |||||
} else | |||||
count++; | |||||
if (IsVowel(tr, c)) { | |||||
vowel_posn = count; // position of the first vowel | |||||
break; | |||||
} | |||||
if ((c != '\'') && !iswalpha(c)) | |||||
return 0; | |||||
} | |||||
if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) { | |||||
// Lookup unpronounable rules in *_rules | |||||
return Unpronouncable2(tr, word); | |||||
} | |||||
if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE]) | |||||
vowel_posn--; // disregard this as the initial letter when counting | |||||
if (vowel_posn > (tr->langopts.max_initial_consonants+1)) | |||||
return 1; // no vowel, or no vowel in first few letters | |||||
return 0; | |||||
} | |||||
static int Unpronouncable2(Translator *tr, char *word) | |||||
{ | |||||
int c; | |||||
int end_flags; | |||||
char ph_buf[N_WORD_PHONEMES]; | |||||
ph_buf[0] = 0; | |||||
c = word[-1]; | |||||
word[-1] = ' '; // ensure there is a space before the "word" | |||||
end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL); | |||||
word[-1] = c; | |||||
if ((end_flags == 0) || (end_flags & SUFX_UNPRON)) | |||||
return 1; | |||||
return 0; | |||||
} |