git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@110 d46cf337-b52f-0410-862d-fd96e6ae7743master
i i. i[ iou o o- ou u | i i. i[ iou o o- ou u | ||||
uai y | uai y | ||||
f j k kh l m n N | |||||
p ph R R2 R3 s s. S; | |||||
t th ts ts. ts. tS; tS; tsh | |||||
v w x z z. | |||||
: f j k kh l m n | |||||
N p ph R R2 R3 s s. | |||||
S; t th ts ts. ts. tS; tS; | |||||
tsh v w x z z. | |||||
Dictionary hbs_dict | Dictionary hbs_dict |
pedalo pEd@loU | pedalo pEd@loU | ||||
pejorative p@dZ0r@tIv | pejorative p@dZ0r@tIv | ||||
penal pi:n@L | penal pi:n@L | ||||
?4 penchant p0nS0n | |||||
penchant p0nS0n | |||||
peninsula p@n'InsjUl@ | peninsula p@n'InsjUl@ | ||||
penis pi:nI2s | penis pi:nI2s | ||||
perfectly p3:fEktlI2 | perfectly p3:fEktlI2 | ||||
politic p0l@tIk | politic p0l@tIk | ||||
poll poUl | poll poUl | ||||
polls poUlz | polls poUlz | ||||
penchant p0nS0n | |||||
portend po@t'End | portend po@t'End | ||||
posse p0sI | |||||
postage poUstIdZ | postage poUstIdZ | ||||
practise praktIs | practise praktIs | ||||
prayer pre@ | prayer pre@ | ||||
already O:lr,EdI2 $verbf $strend2 | already O:lr,EdI2 $verbf $strend2 | ||||
not ,n0t $verbextend $only $strend | not ,n0t $verbextend $only $strend | ||||
not n0t $capital $abbrev // don't spell "NOT"; speak it with stress | |||||
(not have) n0ta2v $pastf $u1 | (not have) n0ta2v $pastf $u1 | ||||
(not have) n0thav $pastf $atend | (not have) n0thav $pastf $atend | ||||
(not have to) ,n0thavt@5 $pastf $strend | (not have to) ,n0thavt@5 $pastf $strend | ||||
(not a) ,n0t@ $nounf | (not a) ,n0t@ $nounf | ||||
not n0t $allcaps $verbextend // don't spell "NOT"; speak it with stress | |||||
only oUnlI2 $verbextend | only oUnlI2 $verbextend | ||||
any ,EnI2 $nounf $only | any ,EnI2 $nounf $only |
de (lph dE | de (lph dE | ||||
_) de (lt dE | _) de (lt dE | ||||
delu (g dElju: | delu (g dElju: | ||||
de (lv dE | |||||
de (lv+ dE | |||||
de (ma dI2 | de (ma dI2 | ||||
de (mea dI2 | de (mea dI2 | ||||
de (men dI2 | de (men dI2 |
nehé) z (sé z | nehé) z (sé z | ||||
pén) z z | pén) z z | ||||
szá) z (sz z | szá) z (sz z | ||||
tí) z (sz z | |||||
tű) z (sz z | tű) z (sz z | ||||
ví) z (sz z | ví) z (sz z | ||||
tor) z (szülött z | tor) z (szülött z |
_?? @11 // unrecognized character | |||||
_?? @:11 // unrecognized character | |||||
//numbers | //numbers | ||||
_0 liN35 | _0 liN35 |
phoneme '' // Primary stress (priority) | phoneme '' // Primary stress (priority) | ||||
stress | stress | ||||
length 5 | |||||
length 6 | |||||
endphoneme | endphoneme | ||||
phoneme = // Place stress on previous vowel | phoneme = // Place stress on previous vowel |
{"$unstressend",13}, /* reduce stress at end of clause */ | {"$unstressend",13}, /* reduce stress at end of clause */ | ||||
{"$atend", 14}, /* use this pronunciation if at end of clause */ | {"$atend", 14}, /* use this pronunciation if at end of clause */ | ||||
{"$capital", 15}, /* use this pronunciation if initial letter is upper case */ | |||||
{"$dot", 16}, /* ignore '.' after this word (abbreviation) */ | {"$dot", 16}, /* ignore '.' after this word (abbreviation) */ | ||||
{"$abbrev", 17}, /* use this pronuciation rather than split into letters */ | {"$abbrev", 17}, /* use this pronuciation rather than split into letters */ | ||||
{"$stem", 18}, // must have a suffix | {"$stem", 18}, // must have a suffix | ||||
{"$double", 19}, // IT double the initial consonant of next word | {"$double", 19}, // IT double the initial consonant of next word | ||||
{"$alt", 20}, // use alternative pronunciation | {"$alt", 20}, // use alternative pronunciation | ||||
{"$alt2", 21}, | {"$alt2", 21}, | ||||
{"$verbf", 22}, /* verb follows */ | |||||
{"$verbsf", 23}, /* verb follows, allow -s suffix */ | |||||
{"$nounf", 24}, /* noun follows */ | |||||
{"$verb", 25}, /* use this pronunciation when its a verb */ | |||||
{"$past", 26}, /* use this pronunciation when its past tense */ | |||||
{"$pastf", 27}, /* past tense follows */ | |||||
{"$verbextend",28}, /* extend influence of 'verb follows' */ | |||||
{"$brk", 28}, // a shorter $pause | |||||
{"$text", 29}, // word translates to replcement text, not phonemes | {"$text", 29}, // word translates to replcement text, not phonemes | ||||
{"$brk", 30}, // a shorter $pause | |||||
// flags in dictionary word 2 | |||||
{"$verbf", 0x20}, /* verb follows */ | |||||
{"$verbsf", 0x21}, /* verb follows, allow -s suffix */ | |||||
{"$nounf", 0x22}, /* noun follows */ | |||||
{"$pastf", 0x23}, /* past tense follows */ | |||||
{"$verb", 0x24}, /* use this pronunciation when its a verb */ | |||||
{"$noun", 0x25}, /* use this pronunciation when its a noun */ | |||||
{"$past", 0x26}, /* use this pronunciation when its past tense */ | |||||
{"$verbextend",0x28}, /* extend influence of 'verb follows' */ | |||||
{"$capital", 0x29}, /* use this pronunciation if initial letter is upper case */ | |||||
{"$allcaps", 0x2a}, /* use this pronunciation if initial letter is upper case */ | |||||
// doesn't set dictionary_flags | // doesn't set dictionary_flags | ||||
{"$?", 100}, // conditional rule, followed by byte giving the condition number | {"$?", 100}, // conditional rule, followed by byte giving the condition number | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
dict_line[length++] = 40 + multiple_words; | |||||
dict_line[length++] = 80 + multiple_words; | |||||
ix = multiple_string_end - multiple_string; | ix = multiple_string_end - multiple_string; | ||||
memcpy(&dict_line[length],multiple_string,ix); | memcpy(&dict_line[length],multiple_string,ix); | ||||
length += ix; | length += ix; |
static int GetVowelStress(Translator *tr, unsigned char *phonemes, unsigned char *vowel_stress, int &vowel_count, int &stressed_syllable) | |||||
{//====================================================================================================================================== | |||||
static int GetVowelStress(Translator *tr, unsigned char *phonemes, unsigned char *vowel_stress, int &vowel_count, int &stressed_syllable, int control) | |||||
{//==================================================================================================================================================== | |||||
// control = 1, set stress to 1 for forced unstressed vowels | |||||
unsigned char phcode; | unsigned char phcode; | ||||
PHONEME_TAB *ph; | PHONEME_TAB *ph; | ||||
unsigned char *ph_out = phonemes; | unsigned char *ph_out = phonemes; | ||||
max_stress = stress; | max_stress = stress; | ||||
} | } | ||||
if((stress == 0) && (ph->phflags & phUNSTRESSED)) | |||||
if((stress == 0) && (control & 1) && (ph->phflags & phUNSTRESSED)) | |||||
vowel_stress[count] = 1; /* weak vowel, must be unstressed */ | vowel_stress[count] = 1; /* weak vowel, must be unstressed */ | ||||
count++; | count++; | ||||
{ | { | ||||
// previous consonant phoneme is syllablic | // previous consonant phoneme is syllablic | ||||
vowel_stress[count] = (char)stress; | vowel_stress[count] = (char)stress; | ||||
if(stress == 0) | |||||
if((stress == 0) && (control & 1)) | |||||
vowel_stress[count++] = 1; // syllabic consonant, usually unstressed | vowel_stress[count++] = 1; // syllabic consonant, usually unstressed | ||||
} | } | ||||
unsigned char vowel_stress[N_WORD_PHONEMES/2]; | unsigned char vowel_stress[N_WORD_PHONEMES/2]; | ||||
strcpy((char *)phonetic,word); | strcpy((char *)phonetic,word); | ||||
max_stress = GetVowelStress(tr, phonetic,vowel_stress,vowel_count,stressed_syllable); | |||||
max_stress = GetVowelStress(tr, phonetic, vowel_stress, vowel_count, stressed_syllable, 0); | |||||
if(new_stress >= 4) | if(new_stress >= 4) | ||||
{ | { | ||||
// promote to primary stress | // promote to primary stress | ||||
for(ix=1; ix<vowel_count; ix++) | for(ix=1; ix<vowel_count; ix++) | ||||
{ | { | ||||
if(vowel_stress[ix] == max_stress) | |||||
if(vowel_stress[ix] >= max_stress) | |||||
{ | { | ||||
vowel_stress[ix] = new_stress; | vowel_stress[ix] = new_stress; | ||||
break; | break; | ||||
// remove primary stress | // remove primary stress | ||||
for(ix=1; ix<vowel_count; ix++) | for(ix=1; ix<vowel_count; ix++) | ||||
{ | { | ||||
if(vowel_stress[ix] > new_stress) | |||||
if(vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1) | |||||
vowel_stress[ix] = new_stress; | vowel_stress[ix] = new_stress; | ||||
} | } | ||||
} | } | ||||
unstressed_word = 1; | unstressed_word = 1; | ||||
} | } | ||||
max_stress = GetVowelStress(this,phonetic,vowel_stress,vowel_count,stressed_syllable); | |||||
max_stress = GetVowelStress(this, phonetic, vowel_stress, vowel_count, stressed_syllable, 1); | |||||
// heavy or light syllables | // heavy or light syllables | ||||
ix = 1; | ix = 1; | ||||
char *print_dflags(int flags) | |||||
{//========================== | |||||
char *print_dflags(unsigned int *flags) | |||||
{//==================================== | |||||
static char buf[20]; | static char buf[20]; | ||||
sprintf(buf,"%s 0x%x",LookupMnem(mnem_flags,(flags & 0xf)+0x40), flags); | |||||
sprintf(buf,"%s 0x%x/%x",LookupMnem(mnem_flags,(flags[0] & 0xf)+0x40), flags[0], flags[1]); | |||||
return(buf); | return(buf); | ||||
} | } | ||||
const char *Translator::LookupDict2(const char *word, const char *word2, char *phonetic, unsigned int *flags, int end_flags) | |||||
//==================================================================================================================== | |||||
const char *Translator::LookupDict2(const char *word, const char *word2, char *phonetic, unsigned int *flags, int end_flags, WORD_TAB *wtab) | |||||
//========================================================================================================================================== | |||||
/* Find an entry in the word_dict file for a specified word. | /* Find an entry in the word_dict file for a specified word. | ||||
Returns NULL if no match, else returns 'word_end' | Returns NULL if no match, else returns 'word_end' | ||||
int wlen; | int wlen; | ||||
unsigned char flag; | unsigned char flag; | ||||
unsigned int dictionary_flags; | unsigned int dictionary_flags; | ||||
unsigned int dictionary_flags2; | |||||
int condition_failed=0; | int condition_failed=0; | ||||
int n_chars; | int n_chars; | ||||
int no_phonemes; | int no_phonemes; | ||||
int skipwords; | |||||
int ix; | |||||
const char *word_end; | const char *word_end; | ||||
const char *word1; | const char *word1; | ||||
char word_buf[N_WORD_BYTES]; | char word_buf[N_WORD_BYTES]; | ||||
word_end = word2; | word_end = word2; | ||||
dictionary_flags = 0; | dictionary_flags = 0; | ||||
dictionary_flags2 = 0; | |||||
no_phonemes = p[1] & 0x80; | no_phonemes = p[1] & 0x80; | ||||
p += ((p[1] & 0x3f) + 2); | p += ((p[1] & 0x3f) + 2); | ||||
} | } | ||||
} | } | ||||
else | else | ||||
if(flag > 64) | |||||
if(flag > 80) | |||||
{ | { | ||||
// stressed syllable information, put in bits 0-3 | |||||
dictionary_flags = (dictionary_flags & ~0xf) | (flag & 0xf); | |||||
if((flag & 0xc) == 0xc) | |||||
dictionary_flags |= FLAG_STRESS_END; | |||||
} | |||||
else | |||||
if(flag > 40) | |||||
{ | |||||
// flags 41 to 50 match more than one word | |||||
// flags 81 to 90 match more than one word | |||||
// This comes after the other flags | // This comes after the other flags | ||||
n_chars = next - p; | n_chars = next - p; | ||||
if(memcmp(word2,p,n_chars)==0) | |||||
skipwords = flag - 80; | |||||
// don't use the contraction if any of the words are emphasized | |||||
for(ix=0; ix <= skipwords; ix++) | |||||
{ | { | ||||
dictionary_flags |= FLAG_SKIPWORDS; | |||||
dictionary_skipwords = (flag - 40); | |||||
p = next; | |||||
word_end = word2 + n_chars; | |||||
if((wtab != NULL) && (wtab[ix].flags & FLAG_EMPHASIZED)) | |||||
{ | |||||
condition_failed = 1; | |||||
} | |||||
} | } | ||||
else | |||||
if(memcmp(word2,p,n_chars) != 0) | |||||
condition_failed = 1; | |||||
if(condition_failed) | |||||
{ | { | ||||
p = next; | p = next; | ||||
condition_failed = 1; | |||||
break; | break; | ||||
} | } | ||||
dictionary_flags |= FLAG_SKIPWORDS; | |||||
dictionary_skipwords = skipwords; | |||||
p = next; | |||||
word_end = word2 + n_chars; | |||||
} | |||||
else | |||||
if(flag > 64) | |||||
{ | |||||
// stressed syllable information, put in bits 0-3 | |||||
dictionary_flags = (dictionary_flags & ~0xf) | (flag & 0xf); | |||||
if((flag & 0xc) == 0xc) | |||||
dictionary_flags |= FLAG_STRESS_END; | |||||
} | |||||
else | |||||
if(flag >= 32) | |||||
{ | |||||
dictionary_flags2 |= (1L << (flag-32)); | |||||
} | } | ||||
else | else | ||||
{ | { | ||||
} | } | ||||
} | } | ||||
if(dictionary_flags & FLAG_CAPITAL) | |||||
if(dictionary_flags2 & FLAG_CAPITAL) | |||||
{ | { | ||||
if(!(word_flags & FLAG_FIRST_UPPER)) | if(!(word_flags & FLAG_FIRST_UPPER)) | ||||
{ | { | ||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
if(dictionary_flags2 & FLAG_ALLCAPS) | |||||
{ | |||||
if(!(word_flags & FLAG_ALL_UPPER)) | |||||
{ | |||||
continue; | |||||
} | |||||
} | |||||
if((dictionary_flags & FLAG_ATEND) && (word_end < clause_end)) | if((dictionary_flags & FLAG_ATEND) && (word_end < clause_end)) | ||||
{ | { | ||||
continue; | continue; | ||||
} | } | ||||
if(dictionary_flags & FLAG_VERB) | |||||
if(dictionary_flags2 & FLAG_VERB) | |||||
{ | { | ||||
// this is a verb-form pronunciation | // this is a verb-form pronunciation | ||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
if(dictionary_flags & FLAG_PAST) | |||||
if(dictionary_flags2 & FLAG_PAST) | |||||
{ | { | ||||
if(!expect_past) | if(!expect_past) | ||||
{ | { | ||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
if(dictionary_flags2 & FLAG_NOUN) | |||||
{ | |||||
if(!expect_noun) | |||||
{ | |||||
/* don't use the 'noun' pronunciation unless we are | |||||
expecting a noun */ | |||||
continue; | |||||
} | |||||
} | |||||
if(flags != NULL) | if(flags != NULL) | ||||
*flags = dictionary_flags; | |||||
{ | |||||
flags[0] = dictionary_flags | FLAG_FOUND_ATTRIBUTES; | |||||
flags[1] = dictionary_flags2; | |||||
} | |||||
if(phoneme_len == 0) | if(phoneme_len == 0) | ||||
{ | { | ||||
if(option_phonemes == 2) | if(option_phonemes == 2) | ||||
{ | { | ||||
fprintf(f_trans,"Flags: %s %s\n",word1,print_dflags(*flags)); | |||||
fprintf(f_trans,"Flags: %s %s\n",word1,print_dflags(flags)); | |||||
} | } | ||||
return(0); // no phoneme translation found here, only flags. So use rules | return(0); // no phoneme translation found here, only flags. So use rules | ||||
} | } | ||||
if(flags != NULL) | if(flags != NULL) | ||||
*flags |= FLAG_FOUND; // this flag indicates word was found in dictionary | |||||
flags[0] |= FLAG_FOUND; // this flag indicates word was found in dictionary | |||||
if(option_phonemes == 2) | if(option_phonemes == 2) | ||||
{ | { | ||||
DecodePhonemes(phonetic,ph_decoded); | DecodePhonemes(phonetic,ph_decoded); | ||||
if(flags != NULL) | if(flags != NULL) | ||||
flags1 = *flags; | |||||
flags1 = flags[0]; | |||||
if((dictionary_flags & FLAG_TEXTMODE) == 0) | if((dictionary_flags & FLAG_TEXTMODE) == 0) | ||||
textmode = 0; | textmode = 0; | ||||
if(textmode == translator->langopts.textmode) | if(textmode == translator->langopts.textmode) | ||||
{ | { | ||||
// only show this line if the word translates to phonemes, not replacement text | // only show this line if the word translates to phonemes, not replacement text | ||||
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||||
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags)); | |||||
} | } | ||||
} | } | ||||
return(word_end); | return(word_end); | ||||
int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags) | |||||
//============================================================================================== | |||||
int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab) | |||||
//============================================================================================================== | |||||
/* Lookup a specified word in the word dictionary. | /* Lookup a specified word in the word dictionary. | ||||
Returns phonetic data in 'phonetic' and bits in 'flags' | Returns phonetic data in 'phonetic' and bits in 'flags' | ||||
nbytes = utf8_in(&c2,word2,0); | nbytes = utf8_in(&c2,word2,0); | ||||
memcpy(&word[length],word2,nbytes); | memcpy(&word[length],word2,nbytes); | ||||
word[length+nbytes] = 0; | word[length+nbytes] = 0; | ||||
found = LookupDict2(word,word2,ph_out,flags,end_flags); | |||||
found = LookupDict2(word, word2, ph_out, flags, end_flags, wtab); | |||||
if(found) | if(found) | ||||
{ | { | ||||
*flags = *flags & ~(7 << 5) | (length << 5); | *flags = *flags & ~(7 << 5) | (length << 5); | ||||
} | } | ||||
word[length] = 0; | word[length] = 0; | ||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | |||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
{ | { | ||||
// try removing an 'e' which has been added by RemoveEnding | // try removing an 'e' which has been added by RemoveEnding | ||||
word[length-1] = 0; | word[length-1] = 0; | ||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | |||||
} | } | ||||
else | else | ||||
if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | ||||
{ | { | ||||
// try removing a double letter | // try removing a double letter | ||||
word[length-1] = 0; | word[length-1] = 0; | ||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab); | |||||
} | } | ||||
} | } | ||||
int Translator::Lookup(const char *word, char *ph_out) | int Translator::Lookup(const char *word, char *ph_out) | ||||
{//=================================================== | {//=================================================== | ||||
unsigned int flags; | |||||
unsigned int flags[2]; | |||||
char *word1 = (char *)word; | char *word1 = (char *)word; | ||||
return(LookupDictList(&word1,ph_out,&flags,0)); | |||||
return(LookupDictList(&word1, ph_out, flags, 0, NULL)); | |||||
} | } | ||||
/* all pitches given in Hz above pitch_base */ | /* all pitches given in Hz above pitch_base */ | ||||
// pitch change during the main part of the clause | // pitch change during the main part of the clause | ||||
static int drops_0[8] = {0x400,0x400,0x700,0x700,0x700,0xa00,0x0e00,0x0e00}; | |||||
static int drops_0[8] = {0x400,0x400,0x700,0x700,0x700,0xa00,0x1800,0x0e00}; | |||||
static int drops_1[8] = {0x400,0x400,0x600,0x600,0xc00,0xc00,0x0e00,0x0e00}; | static int drops_1[8] = {0x400,0x400,0x600,0x600,0xc00,0xc00,0x0e00,0x0e00}; | ||||
static int drops_2[8] = {0x400,0x400,0x600,0x600,-0x800,0xc00,0x0e00,0x0e00}; | static int drops_2[8] = {0x400,0x400,0x600,0x600,-0x800,0xc00,0x0e00,0x0e00}; | ||||
unsigned char tonic_max1; | unsigned char tonic_max1; | ||||
unsigned char tonic_min1; | unsigned char tonic_min1; | ||||
unsigned char emph_level; | |||||
unsigned char emph_next; | |||||
unsigned char pre_start; | unsigned char pre_start; | ||||
unsigned char pre_end; | unsigned char pre_end; | ||||
static TONE_TABLE tone_table[N_TONE_TABLE] = { | static TONE_TABLE tone_table[N_TONE_TABLE] = { | ||||
{PITCHfall, 30, 5, PITCHfall, 30, 7, // statement | {PITCHfall, 30, 5, PITCHfall, 30, 7, // statement | ||||
12, 10, | |||||
20, 25, 34, 22, drops_0, 3, 3, 12, 8, 0}, | 20, 25, 34, 22, drops_0, 3, 3, 12, 8, 0}, | ||||
{PITCHfrise, 38,10, PITCHfrise2, 36,10, // comma, or question | {PITCHfrise, 38,10, PITCHfrise2, 36,10, // comma, or question | ||||
30, 20, | |||||
20, 25, 34, 20, drops_0, 3, 3, 15, 25, 0}, | 20, 25, 34, 20, drops_0, 3, 3, 15, 25, 0}, | ||||
{PITCHdrop, 38, 1, PITCHdrop, 42,25, // exclamation | {PITCHdrop, 38, 1, PITCHdrop, 42,25, // exclamation | ||||
30, 20, | |||||
20, 25, 34, 22, drops_0, 3, 3, 12, 8, 0}, | 20, 25, 34, 22, drops_0, 3, 3, 12, 8, 0}, | ||||
#ifdef deleted | |||||
{PITCHfall, 30, 5, PITCHfall, 30, 7, // statement | {PITCHfall, 30, 5, PITCHfall, 30, 7, // statement | ||||
20, 25, 34, 22, drops_1, 3, 3, 12, 8, 0}, | 20, 25, 34, 22, drops_1, 3, 3, 12, 8, 0}, | ||||
{PITCHfall, 36, 6, PITCHfall, 36, 8, | {PITCHfall, 36, 6, PITCHfall, 36, 8, | ||||
30, 20, 18, 34, drops_0, 3, 3, 12, 8, 0}, | 30, 20, 18, 34, drops_0, 3, 3, 12, 8, 0}, | ||||
#endif | |||||
}; | }; | ||||
/* indexed by stress */ | /* indexed by stress */ | ||||
static int min_drop[] = {0x300,0x300,0x300,0x300,0x300,0x500,0xc00,0xc00}; | |||||
static int min_drop[] = {0x300,0x300,0x400,0x400,0x900,0x900,0x900,0x900}; | |||||
#define SECONDARY 3 | #define SECONDARY 3 | ||||
#define PRIMARY 4 | #define PRIMARY 4 | ||||
#define PRIMARY_STRESSED 5 | |||||
#define PRIMARY_MARKED 6 | #define PRIMARY_MARKED 6 | ||||
#define BODY_RESET 7 | |||||
#define FIRST_TONE 8 /* first of the tone types */ | |||||
#define PRIMARY_LAST 7 | |||||
static int number_pre; | static int number_pre; | ||||
{ | { | ||||
int ix; | int ix; | ||||
int stress; | int stress; | ||||
int stage=0; /* 0=pre, 1=body, 2=tail */ | |||||
int max_stress = 0; | int max_stress = 0; | ||||
int max_stress_posn = 0; | int max_stress_posn = 0; | ||||
int tone_type_marker = 0; | |||||
int marked_stress_count = 0; | |||||
number_pre=0; /* number of vowels before 1st primary stress */ | |||||
number_body=0; | |||||
number_tail=0; /* number between tonic syllable and next primary */ | |||||
number_pre = -1; /* number of vowels before 1st primary stress */ | |||||
number_body = 0; | |||||
number_tail = 0; /* number between tonic syllable and next primary */ | |||||
last_primary = 0; | |||||
for(ix=vowel_ix; ix<vowel_ix_top; ix++) | for(ix=vowel_ix; ix<vowel_ix_top; ix++) | ||||
{ | { | ||||
} | } | ||||
if(stress >= PRIMARY) | if(stress >= PRIMARY) | ||||
{ | { | ||||
if(stress > PRIMARY) | |||||
{ | |||||
marked_stress_count++; | |||||
} | |||||
if(number_pre < 0) | |||||
number_pre = ix; | |||||
last_primary = ix; | last_primary = ix; | ||||
} | } | ||||
switch(stage) | |||||
{ | |||||
case 0: | |||||
if(stress < PRIMARY) | |||||
number_pre++; | |||||
else | |||||
{ | |||||
stage = 1; | |||||
ix = ix-1; | |||||
} | |||||
break; | |||||
} | |||||
case 1: | |||||
if(stress >= FIRST_TONE) | |||||
{ | |||||
tone_type_marker = stress; | |||||
tone_posn = ix; | |||||
stage = 2; | |||||
} | |||||
break; | |||||
if(number_pre < 0) | |||||
number_pre = 0; | |||||
case 2: | |||||
if(stress < PRIMARY) | |||||
number_tail++; | |||||
else | |||||
stage = 3; | |||||
break; | |||||
} | |||||
} | |||||
number_tail = vowel_ix_top - max_stress_posn - 1; | |||||
tone_posn = max_stress_posn; | |||||
tone_posn = last_primary; | |||||
if(no_tonic) | if(no_tonic) | ||||
{ | { | ||||
tone_posn = vowel_ix_top; | tone_posn = vowel_ix_top; | ||||
} | } | ||||
else | else | ||||
if((tone_type_marker >= FIRST_TONE) && (tone_type_marker < (N_TONE_TABLE + FIRST_TONE))) | |||||
{ | |||||
tone_type = tone_type_marker - FIRST_TONE; | |||||
} | |||||
else | |||||
// if((last_primary - max_stress_posn) > 2) | |||||
{ | { | ||||
/* no tonic syllable found, use highest stress */ | |||||
vowel_tab[max_stress_posn] = FIRST_TONE; | |||||
number_tail = vowel_ix_top - max_stress_posn - 1; | |||||
tone_posn = max_stress_posn; | |||||
if(vowel_tab[last_primary] < PRIMARY_MARKED) | |||||
vowel_tab[last_primary] = PRIMARY_LAST; | |||||
} | } | ||||
if(marked_stress_count > 1) | |||||
annotation = 1; | |||||
else | |||||
annotation = 0; | |||||
} /* end of count_pitch_vowels */ | } /* end of count_pitch_vowels */ | ||||
while(ix < end_ix) | while(ix < end_ix) | ||||
{ | { | ||||
stress = vowel_tab[ix++] & 0x3f; | stress = vowel_tab[ix++] & 0x3f; | ||||
if(stress >= BODY_RESET) | |||||
break; | |||||
else | |||||
// if(stress >= PRIMARY_MARKED) | |||||
// break; | |||||
if(stress >= min_stress) | if(stress >= min_stress) | ||||
count++; | count++; | ||||
} | } | ||||
int n_primary=0; | int n_primary=0; | ||||
int initial; | int initial; | ||||
int overflow=0; | int overflow=0; | ||||
int marking=0; | |||||
int *drops; | int *drops; | ||||
static char overflow_tab[5] = {0, 5, 3, 1, 0}; | |||||
static char overflow_tab[5] = {0, 20, 12, 4, 0}; | |||||
drops = t->body_drops; | drops = t->body_drops; | ||||
{ | { | ||||
stress = vowel_tab[ix] & 0x3f; | stress = vowel_tab[ix] & 0x3f; | ||||
if(stress == BODY_RESET) | |||||
initial = 1; | |||||
// if(stress == PRIMARY_MARKED) | |||||
// initial = 1; // reset the intonation pattern | |||||
if(initial || (stress >= min_stress)) | if(initial || (stress >= min_stress)) | ||||
{ | { | ||||
// a primary stress | |||||
if(initial) | if(initial) | ||||
{ | { | ||||
initial = 0; | initial = 0; | ||||
pitch += increment; | pitch += increment; | ||||
else | else | ||||
{ | { | ||||
pitch = (t->body_end << 8) - (increment * overflow_tab[overflow++])/4; | |||||
pitch = (t->body_end << 8) - (increment * overflow_tab[overflow++])/16; | |||||
if(overflow > 4) overflow = 0; | if(overflow > 4) overflow = 0; | ||||
} | } | ||||
} | } | ||||
if(stress == PRIMARY_MARKED) | |||||
{ | |||||
pitch = (t->emph_level << 8); | |||||
marking = 2; | |||||
n_primary = marking+1; // move into overflow region | |||||
} | |||||
else | |||||
if(marking > 0) | |||||
{ | |||||
marking--; | |||||
pitch = (t->emph_next << 8); | |||||
} | |||||
n_primary--; | n_primary--; | ||||
} | } | ||||
if(((annotation==0) && (stress >= PRIMARY)) || (stress >= PRIMARY_MARKED)) | |||||
if((annotation==0) && (stress >= PRIMARY)) | |||||
{ | |||||
vowel_tab[ix] = PRIMARY_STRESSED; | |||||
set_pitch(ix,pitch,drops[stress]); | |||||
} | |||||
if(stress >= PRIMARY_MARKED) | |||||
{ | { | ||||
vowel_tab[ix] = PRIMARY_MARKED; | vowel_tab[ix] = PRIMARY_MARKED; | ||||
set_pitch(ix,pitch,drops[stress]); | set_pitch(ix,pitch,drops[stress]); |
const char *Translator::LookupSpecial(const char *string) | const char *Translator::LookupSpecial(const char *string) | ||||
{//====================================================== | {//====================================================== | ||||
unsigned int flags; | |||||
unsigned int flags[2]; | |||||
char phonemes[55]; | char phonemes[55]; | ||||
char phonemes2[55]; | char phonemes2[55]; | ||||
static char buf[60]; | static char buf[60]; | ||||
char *string1 = (char *)string; | char *string1 = (char *)string; | ||||
if(LookupDictList(&string1,phonemes,&flags,0)) | |||||
if(LookupDictList(&string1,phonemes,flags,0,NULL)) | |||||
{ | { | ||||
SetWordStress(phonemes,flags,-1,0); | |||||
SetWordStress(phonemes,flags[0],-1,0); | |||||
DecodePhonemes(phonemes,phonemes2); | DecodePhonemes(phonemes,phonemes2); | ||||
sprintf(buf,"[[%s]] ",phonemes2); | sprintf(buf,"[[%s]] ",phonemes2); | ||||
option_phoneme_input = 1; | option_phoneme_input = 1; |
if(length_mod < 8) | if(length_mod < 8) | ||||
length_mod = 8; // restrict how much lengths can be reduced | length_mod = 8; // restrict how much lengths can be reduced | ||||
if(stress >= 7) | |||||
if(stress >= 6) | |||||
{ | { | ||||
// tonic syllable, include a constant component so it doesn't decrease directly with speed | // tonic syllable, include a constant component so it doesn't decrease directly with speed | ||||
length_mod += 20; | length_mod += 20; | ||||
} | } | ||||
length_mod = (length_mod * stress_lengths[stress])/128; | |||||
if((len = stress_lengths[stress]) == 0) | |||||
len = stress_lengths[6]; | |||||
length_mod = (length_mod * len)/128; | |||||
if(end_of_clause == 2) | if(end_of_clause == 2) | ||||
{ | { |
exit(1); | exit(1); | ||||
} | } | ||||
{ | |||||
espeak_VOICE *v; | |||||
v = espeak_GetCurrentVoice(); | |||||
printf("Voice id='%s' name='%s' langs={%x %x %x %x}\n",v->identifier,v->name,v->languages[0],v->languages[1],v->languages[2],v->languages[3]); | |||||
} | |||||
if(option_waveout || quiet) | if(option_waveout || quiet) | ||||
{ | { | ||||
if(quiet) | if(quiet) |
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.29.23 01.Dec.07"; | |||||
const int version_phdata = 0x012901; | |||||
const char *version_string = "1.29.24 12.Dec.07"; | |||||
const int version_phdata = 0x012924; | |||||
int option_device_number = -1; | int option_device_number = -1; | ||||
Translator_English::Translator_English() : Translator() | Translator_English::Translator_English() : Translator() | ||||
{//=================================== | {//=================================== | ||||
// static int stress_lengths2[8] = {182,140, 220,220, 220,240, 248,270}; | // static int stress_lengths2[8] = {182,140, 220,220, 220,240, 248,270}; | ||||
static const short stress_lengths2[8] = {182,140, 220,220, 0,0, 248,275}; | |||||
static const short stress_lengths2[8] = {182,140, 220,220, 248,248, 275,275}; | |||||
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths)); | ||||
langopts.stress_rule = 0; | langopts.stress_rule = 0; |
case L('c','y'): // Welsh | case L('c','y'): // Welsh | ||||
{ | { | ||||
static const short stress_lengths_cy[8] = {170,230, 190,190, 0, 0, 250,270}; | |||||
static const unsigned char stress_amps_cy[8] = {16,15, 18,18, 0,0, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
static const short stress_lengths_cy[8] = {170,230, 190,190, 250, 250, 270,270}; | |||||
static const unsigned char stress_amps_cy[8] = {16,15, 18,18, 20,24, 24,21 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_cy,stress_amps_cy); | SetupTranslator(tr,stress_lengths_cy,stress_amps_cy); | ||||
case L('d','e'): | case L('d','e'): | ||||
{ | { | ||||
static const short stress_lengths_de[8] = {150,130, 190,190, 0, 0, 260,275}; | |||||
static const short stress_lengths_de[8] = {150,130, 190,190, 260, 260, 275,275}; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
tr->langopts.word_gap = 0x8; // don't use linking phonemes | tr->langopts.word_gap = 0x8; // don't use linking phonemes | ||||
case L('e','l'): // Greek | case L('e','l'): // Greek | ||||
case L_grc: // Ancient Greek | case L_grc: // Ancient Greek | ||||
{ | { | ||||
static const short stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | |||||
static const unsigned char stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
static const short stress_lengths_el[8] = {155, 180, 210, 210, 270, 270, 300, 300}; | |||||
static const unsigned char stress_amps_el[8] = {15,12, 20,20, 20,24, 24,21 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
// character codes offset by 0x380 | // character codes offset by 0x380 | ||||
static const char el_vowels[] = {0x10,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x35,0x37,0x39,0x3f,0x45,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0}; | static const char el_vowels[] = {0x10,0x2c,0x2d,0x2e,0x2f,0x30,0x31,0x35,0x37,0x39,0x3f,0x45,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0}; | ||||
case L('e','o'): | case L('e','o'): | ||||
{ | { | ||||
static const short stress_lengths_eo[8] = {145, 180, 200, 190, 0, 0, 300, 320}; | |||||
static const unsigned char stress_amps_eo[] = {16,14, 20,20, 20,24, 24,22 }; | |||||
static const short stress_lengths_eo[8] = {145, 180, 200, 190, 300, 300, 320, 320}; | |||||
static const unsigned char stress_amps_eo[] = {16,14, 20,20, 20,24, 24,21 }; | |||||
static const wchar_t eo_char_apostrophe[2] = {'l',0}; | static const wchar_t eo_char_apostrophe[2] = {'l',0}; | ||||
tr = new Translator(); | tr = new Translator(); | ||||
case L('e','s'): // Spanish | case L('e','s'): // Spanish | ||||
{ | { | ||||
static const short stress_lengths_es[8] = {170, 200, 180, 180, 0, 0, 220, 250}; | |||||
static const unsigned char stress_amps_es[8] = {16,12, 18,18, 20,22, 22,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
static const short stress_lengths_es[8] = {170, 200, 180, 180, 220, 220, 250, 250}; | |||||
static const unsigned char stress_amps_es[8] = {16,12, 18,18, 20,22, 22,21 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_es,stress_amps_es); | SetupTranslator(tr,stress_lengths_es,stress_amps_es); | ||||
case L('f','i'): // Finnish | case L('f','i'): // Finnish | ||||
{ | { | ||||
static const unsigned char stress_amps_fi[8] = {18,16, 22,22, 20,22, 22,22 }; | |||||
static const short stress_lengths_fi[8] = {150,170, 200,200, 0,0, 210,250}; | |||||
static const short stress_lengths_fi[8] = {150,170, 200,200, 210,210, 250,240}; | |||||
static const unsigned char stress_amps_fi[8] = {18,16, 22,22, 20,22, 22,21 }; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_fi,stress_amps_fi); | SetupTranslator(tr,stress_lengths_fi,stress_amps_fi); | ||||
case L('f','r'): // french | case L('f','r'): // french | ||||
{ | { | ||||
static const short stress_lengths_fr[8] = {190, 170, 190, 200, 0, 0, 235, 235}; | |||||
static const short stress_lengths_fr[8] = {190, 170, 190, 200, 235, 235, 235, 235}; | |||||
static const unsigned char stress_amps_fr[8] = {16,14, 20,20, 20,24, 24,22 }; | static const unsigned char stress_amps_fr[8] = {16,14, 20,20, 20,24, 24,22 }; | ||||
tr = new Translator(); | tr = new Translator(); | ||||
case L('h','i'): | case L('h','i'): | ||||
{ | { | ||||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | ||||
static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | |||||
static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | |||||
static const short stress_lengths_hi[8] = {190, 190, 210, 210, 230, 230, 250, 250}; | |||||
static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,21 }; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_hi,stress_amps_hi); | SetupTranslator(tr,stress_lengths_hi,stress_amps_hi); | ||||
case L('b','s'): // Bosnian | case L('b','s'): // Bosnian | ||||
case L('s','r'): // Serbian | case L('s','r'): // Serbian | ||||
{ | { | ||||
static const unsigned char stress_amps_hr[8] = {16,16, 20,20, 20,24, 24,22 }; | |||||
static const short stress_lengths_hr[8] = {180,160, 200,200, 0,0, 220,230}; | |||||
static const unsigned char stress_amps_hr[8] = {16,16, 20,20, 20,24, 24,21 }; | |||||
static const short stress_lengths_hr[8] = {180,160, 200,200, 220,220, 230,230}; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_hr,stress_amps_hr); | SetupTranslator(tr,stress_lengths_hr,stress_amps_hr); | ||||
case L('h','u'): // Hungarian | case L('h','u'): // Hungarian | ||||
{ | { | ||||
static const unsigned char stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | |||||
static const short stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | |||||
static const unsigned char stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,21 }; | |||||
static const short stress_lengths_hu[8] = {185,195, 195,190, 210,210, 220,220}; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | ||||
case L('i','s'): // Icelandic | case L('i','s'): // Icelandic | ||||
{ | { | ||||
static const unsigned char stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 }; | |||||
static const short stress_lengths_is[8] = {180,160, 200,200, 0,0, 240,250}; | |||||
static const unsigned char stress_amps_is[] = {16,16, 20,20, 20,24, 24,21 }; | |||||
static const short stress_lengths_is[8] = {180,160, 200,200, 240,240, 250,250}; | |||||
static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | ||||
tr = new Translator(); | tr = new Translator(); | ||||
case L('i','t'): // Italian | case L('i','t'): // Italian | ||||
{ | { | ||||
static const short stress_lengths_it[8] = {150, 140, 180, 180, 0, 0, 270, 320}; | |||||
static const unsigned char stress_amps_it[8] = {15,14, 19,19, 20,24, 24,22 }; | |||||
static const short stress_lengths_it[8] = {150, 140, 180, 180, 270, 270, 320, 320}; | |||||
static const unsigned char stress_amps_it[8] = {15,14, 19,19, 20,24, 24,21 }; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_it,stress_amps_it); | SetupTranslator(tr,stress_lengths_it,stress_amps_it); | ||||
{ | { | ||||
static wchar_t vowels_cyrillic[] = {0x440, // also include 'р' [R] | static wchar_t vowels_cyrillic[] = {0x440, // also include 'р' [R] | ||||
0x430,0x435,0x438,0x439,0x43e,0x443,0x44b,0x44d,0x44e,0x44f,0x450,0x451,0x456,0x457,0x45d,0x45e,0}; | 0x430,0x435,0x438,0x439,0x43e,0x443,0x44b,0x44d,0x44e,0x44f,0x450,0x451,0x456,0x457,0x45d,0x45e,0}; | ||||
static const unsigned char stress_amps_mk[8] = {16,16, 20,20, 20,24, 24,22 }; | |||||
static const short stress_lengths_mk[8] = {180,160, 200,200, 0,0, 220,230}; | |||||
static const unsigned char stress_amps_mk[8] = {16,16, 20,20, 20,24, 24,21 }; | |||||
static const short stress_lengths_mk[8] = {180,160, 200,200, 220,220, 230,230}; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_mk,stress_amps_mk); | SetupTranslator(tr,stress_lengths_mk,stress_amps_mk); | ||||
case L('n','l'): // Dutch | case L('n','l'): // Dutch | ||||
{ | { | ||||
static const short stress_lengths_nl[8] = {160,135, 210,210, 0, 0, 260,280}; | |||||
static const short stress_lengths_nl[8] = {160,135, 210,210, 260, 260, 280,280}; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
case L('n','o'): // Norwegian | case L('n','o'): // Norwegian | ||||
{ | { | ||||
static const unsigned char stress_amps_no[] = {16,16, 20,20, 20,24, 24,22 }; | |||||
static const short stress_lengths_no[8] = {160,140, 200,190, 0,0, 220,240}; | |||||
static const short stress_lengths_no[8] = {160,140, 200,190, 220,220, 240,240}; | |||||
static const unsigned char stress_amps_no[] = {16,16, 20,20, 20,24, 24,21 }; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_no,stress_amps_no); | SetupTranslator(tr,stress_lengths_no,stress_amps_no); | ||||
case L('p','l'): // Polish | case L('p','l'): // Polish | ||||
{ | { | ||||
static const short stress_lengths_pl[8] = {160, 180, 175, 175, 0, 0, 260, 290}; | |||||
static const unsigned char stress_amps_pl[8] = {17,14, 19,19, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
static const short stress_lengths_pl[8] = {160, 180, 175, 175, 260, 260, 290, 290}; | |||||
static const unsigned char stress_amps_pl[8] = {17,14, 19,19, 20,24, 24,21 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_pl,stress_amps_pl); | SetupTranslator(tr,stress_lengths_pl,stress_amps_pl); | ||||
case L('p','t'): // Portuguese | case L('p','t'): // Portuguese | ||||
{ | { | ||||
static const short stress_lengths_pt[8] = {180, 125, 210, 210, 0, 0, 270, 295}; | |||||
static const unsigned char stress_amps_pt[8] = {16,13, 19,19, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
static const short stress_lengths_pt[8] = {180, 125, 210, 210, 270, 270, 295, 295}; | |||||
static const unsigned char stress_amps_pt[8] = {16,13, 19,19, 20,24, 24,21 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_pt,stress_amps_pt); | SetupTranslator(tr,stress_lengths_pt,stress_amps_pt); | ||||
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable | ||||
case L('r','o'): // Romanian | case L('r','o'): // Romanian | ||||
{ | { | ||||
static const short stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | |||||
static const unsigned char stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | |||||
static const short stress_lengths_ro[8] = {170, 170, 180, 180, 240, 240, 260, 260}; | |||||
static const unsigned char stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,21 }; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | ||||
case L('s','k'): // Slovak | case L('s','k'): // Slovak | ||||
case L('c','s'): // Czech | case L('c','s'): // Czech | ||||
{ | { | ||||
static const short stress_lengths_sk[8] = {190,190, 210,210, 210,210, 210,210}; | |||||
static const unsigned char stress_amps_sk[8] = {16,16, 20,20, 20,24, 24,22 }; | static const unsigned char stress_amps_sk[8] = {16,16, 20,20, 20,24, 24,22 }; | ||||
static const short stress_lengths_sk[8] = {190,190, 210,210, 0,0, 210,210}; | |||||
static const char *sk_voiced = "bdgjlmnrvwzaeiouy"; | static const char *sk_voiced = "bdgjlmnrvwzaeiouy"; | ||||
tr = new Translator(); | tr = new Translator(); | ||||
case L('s','v'): // Swedish | case L('s','v'): // Swedish | ||||
{ | { | ||||
static const short stress_lengths_sv[8] = {160,135, 220,220, 250,250, 250,280}; | |||||
static const unsigned char stress_amps_sv[] = {16,16, 20,20, 20,24, 24,22 }; | static const unsigned char stress_amps_sv[] = {16,16, 20,20, 20,24, 24,22 }; | ||||
static const short stress_lengths_sv[8] = {160,135, 220,220, 0,0, 250,280}; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_sv,stress_amps_sv); | SetupTranslator(tr,stress_lengths_sv,stress_amps_sv); | ||||
tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
// SetLetterBits(tr,6,"eiyäö"); // soft vowels NOTE accented letters don't work in SetLetterBits | |||||
tr->langopts.numbers = 0x1109; | tr->langopts.numbers = 0x1109; | ||||
} | } | ||||
break; | break; | ||||
case L('s','w'): // Swahili | case L('s','w'): // Swahili | ||||
{ | { | ||||
static const short stress_lengths_sw[8] = {160, 170, 200, 200, 0, 0, 320, 340}; | |||||
static const short stress_lengths_sw[8] = {150, 160, 190, 190, 300, 300, 320, 320}; | |||||
static const unsigned char stress_amps_sw[] = {16,12, 19,19, 20,24, 24,22 }; | static const unsigned char stress_amps_sw[] = {16,12, 19,19, 20,24, 24,22 }; | ||||
tr = new Translator(); | tr = new Translator(); | ||||
case L('t','r'): // Turkish | case L('t','r'): // Turkish | ||||
{ | { | ||||
static const unsigned char stress_amps_tr[8] = {16,16, 20,20, 20,24, 24,22 }; | |||||
static const short stress_lengths_tr[8] = {180,150, 200,180, 0,0, 230,270}; | |||||
static const short stress_lengths_tr[8] = {180,150, 200,180, 230,230, 270,270}; | |||||
static const unsigned char stress_amps_tr[8] = {16,16, 20,20, 20,24, 24,21 }; | |||||
tr = new Translator(); | tr = new Translator(); | ||||
SetupTranslator(tr,stress_lengths_tr,stress_amps_tr); | SetupTranslator(tr,stress_lengths_tr,stress_amps_tr); | ||||
case L('v','i'): // Vietnamese | case L('v','i'): // Vietnamese | ||||
{ | { | ||||
static const short stress_lengths_vi[8] = {150, 150, 180, 180, 210, 220, 220, 280}; | |||||
static const short stress_lengths_vi[8] = {150, 150, 180, 180, 220, 220, 280, 280}; | |||||
static const unsigned char stress_amps_vi[] = {16,16, 16,16, 24,24, 24,22 }; | static const unsigned char stress_amps_vi[] = {16,16, 16,16, 24,24, 24,22 }; | ||||
static wchar_t vowels_vi[] = { | static wchar_t vowels_vi[] = { | ||||
0x61, 0xe0, 0xe1, 0x1ea3, 0xe3, 0x1ea1, // a | 0x61, 0xe0, 0xe1, 0x1ea3, 0xe3, 0x1ea1, // a | ||||
case L('z','h'): | case L('z','h'): | ||||
case L_zhy: | case L_zhy: | ||||
{ | { | ||||
static const short stress_lengths_zh[8] = {230,150, 230,230, 230,0, 230,250}; | |||||
static const short stress_lengths_zh[8] = {230,150, 230,230, 230,230, 250,250}; | |||||
static const unsigned char stress_amps_zh[] = {22,16, 22,22, 22,22, 22,22 }; | static const unsigned char stress_amps_zh[] = {22,16, 22,22, 22,22, 22,22 }; | ||||
tr = new Translator; | tr = new Translator; | ||||
Translator_Russian::Translator_Russian() : Translator() | Translator_Russian::Translator_Russian() : Translator() | ||||
{//=================================== | {//=================================== | ||||
static const short stress_lengths_ru[8] = {150,140, 220,220, 260,260, 280,280}; | |||||
static const unsigned char stress_amps_ru[] = {16,16, 18,18, 20,24, 24,22 }; | static const unsigned char stress_amps_ru[] = {16,16, 18,18, 20,24, 24,22 }; | ||||
static const short stress_lengths_ru[8] = {150,140, 220,220, 0,0, 260,280}; | |||||
// character codes offset by 0x420 | // character codes offset by 0x420 | ||||
{//========================================= | {//========================================= | ||||
// Initialise options for this language | // Initialise options for this language | ||||
static const short stress_lengths2[8] = {170,140, 220,220, 0, 0, 250,270}; | |||||
static const short stress_lengths2[8] = {170,140, 220,220, 250, 250, 270,270}; | |||||
langopts.stress_rule = 0; | langopts.stress_rule = 0; | ||||
langopts.vowel_pause = 0x30; | langopts.vowel_pause = 0x30; | ||||
langopts.param[LOPT_DIERESES] = 1; | langopts.param[LOPT_DIERESES] = 1; |
int option_punctuation = 0; | int option_punctuation = 0; | ||||
int option_sayas = 0; | int option_sayas = 0; | ||||
int option_sayas2 = 0; // used in translate_clause() | int option_sayas2 = 0; // used in translate_clause() | ||||
int option_emphasis = 0; | |||||
int option_emphasis = 0; // 0=normal, 1=normal, 2=weak, 3=moderate, 4=strong | |||||
int word_emphasis = 0; // set if emphasis level 3 or 4 | |||||
int option_emphasize_allcaps = 0; | |||||
int option_ssml = 0; | int option_ssml = 0; | ||||
int option_phoneme_input = 1; // allow [[phonemes]] in input | int option_phoneme_input = 1; // allow [[phonemes]] in input | ||||
int option_phoneme_variants = 0; // 0= don't display phoneme variant mnemonics | int option_phoneme_variants = 0; // 0= don't display phoneme variant mnemonics | ||||
{//===================== | {//===================== | ||||
int ix; | int ix; | ||||
static const unsigned char stress_amps2[] = {16,16, 20,20, 20,24, 24,21 }; | static const unsigned char stress_amps2[] = {16,16, 20,20, 20,24, 24,21 }; | ||||
static const short stress_lengths2[8] = {182,140, 220,220, 220,240, 260,280}; | |||||
static const short stress_lengths2[8] = {182,140, 220,220, 250,260, 280,280}; | |||||
static const wchar_t empty_wstring[1] = {0}; | static const wchar_t empty_wstring[1] = {0}; | ||||
charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8 | charset_a0 = charsets[1]; // ISO-8859-1, this is for when the input is not utf8 | ||||
int word_length; | int word_length; | ||||
int ix; | int ix; | ||||
int posn; | int posn; | ||||
unsigned int dictionary_flags=0; | |||||
unsigned int dictionary_flags2=0; | |||||
unsigned int dictionary_flags[2]; | |||||
unsigned int dictionary_flags2[2]; | |||||
int end_type=0; | int end_type=0; | ||||
int prefix_type=0; | int prefix_type=0; | ||||
char *wordx; | char *wordx; | ||||
int prefix_flags = 0; | int prefix_flags = 0; | ||||
int confirm_prefix; | int confirm_prefix; | ||||
int spell_word; | int spell_word; | ||||
int emphasize_allcaps = 0; | |||||
int wflags = wtab->flags; | int wflags = wtab->flags; | ||||
int wmark = wtab->wmark; | int wmark = wtab->wmark; | ||||
static char word_iz[4] = {0,'i','z',0}; | static char word_iz[4] = {0,'i','z',0}; | ||||
static char word_ss[4] = {0,'s','s',0}; | static char word_ss[4] = {0,'s','s',0}; | ||||
dictionary_flags[0] = 0; | |||||
dictionary_flags[1] = 0; | |||||
dictionary_flags2[0] = 0; | |||||
dictionary_flags2[1] = 0; | |||||
dictionary_skipwords = 0; | |||||
prefix_phonemes[0] = 0; | prefix_phonemes[0] = 0; | ||||
end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
ph_limit = &phonemes[N_WORD_PHONEMES]; | ph_limit = &phonemes[N_WORD_PHONEMES]; | ||||
else | else | ||||
{ | { | ||||
spell_word = 0; | spell_word = 0; | ||||
found = LookupDictList(&word1,phonemes,&dictionary_flags,FLAG_ALLOW_TEXTMODE | wflags << 16); // the original word | |||||
found = LookupDictList(&word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
if((wmark > 0) && (wmark < 8)) | if((wmark > 0) && (wmark < 8)) | ||||
{ | { | ||||
// the stressed syllable has been specified in the text (TESTING) | // the stressed syllable has been specified in the text (TESTING) | ||||
dictionary_flags = (dictionary_flags & ~0xf) | wmark; | |||||
dictionary_flags[0] = (dictionary_flags[0] & ~0xf) | wmark; | |||||
} | } | ||||
if(!found && (dictionary_flags & FLAG_ABBREV)) | |||||
if(!found && (dictionary_flags[0] & FLAG_ABBREV)) | |||||
{ | { | ||||
// the word has $abbrev flag, but no pronunciation specified. Speak as individual letters | // the word has $abbrev flag, but no pronunciation specified. Speak as individual letters | ||||
spell_word = 1; | spell_word = 1; | ||||
if(word_phonemes[0] == phonSWITCH) | if(word_phonemes[0] == phonSWITCH) | ||||
return(0); | return(0); | ||||
found = TranslateNumber(word1,phonemes,&dictionary_flags,wflags); | |||||
found = TranslateNumber(word1,phonemes,dictionary_flags,wflags); | |||||
} | } | ||||
if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | ||||
if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | ||||
{ | { | ||||
if((found = TranslateRoman(word1,phonemes)) != 0) | if((found = TranslateRoman(word1,phonemes)) != 0) | ||||
dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | |||||
dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals | |||||
} | } | ||||
} | } | ||||
if((wflags & FLAG_ALL_UPPER) && (clause_upper_count <= clause_lower_count) && | |||||
!(dictionary_flags & (FLAG_ABBREV | FLAG_SKIPWORDS)) && (word_length>1) && (word_length<4) && iswalpha(first_char)) | |||||
if((wflags & FLAG_ALL_UPPER) && (word_length > 1) && (clause_lower_count > 3) && iswalpha(first_char)) | |||||
{ | { | ||||
// An upper case word in a lower case clause. This could be an abbreviation. | |||||
spell_word = 1; | |||||
if((option_emphasize_allcaps) && !(dictionary_flags[0] & FLAG_ABBREV)) | |||||
{ | |||||
// emphasize words which are in capitals | |||||
emphasize_allcaps = FLAG_EMPHASIZED; | |||||
} | |||||
else | |||||
if(!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length<4) && (clause_upper_count <= clause_lower_count)) | |||||
{ | |||||
// An upper case word in a lower case clause. This could be an abbreviation. | |||||
spell_word = 1; | |||||
} | |||||
} | } | ||||
} | } | ||||
char *p; | char *p; | ||||
// This word looks "unpronouncable", so speak letters individually until we | // This word looks "unpronouncable", so speak letters individually until we | ||||
// find a remainder that we can pronounce. | // find a remainder that we can pronounce. | ||||
emphasize_allcaps = 0; | |||||
wordx += TranslateLetter(wordx,phonemes,0); | wordx += TranslateLetter(wordx,phonemes,0); | ||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
{ | { | ||||
// Translate the stem | // Translate the stem | ||||
unpron_length = strlen(phonemes); | unpron_length = strlen(phonemes); | ||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags[0]); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
// remove any standard suffix and confirm that the prefix is still recognised | // remove any standard suffix and confirm that the prefix is still recognised | ||||
phonemes2[0] = 0; | phonemes2[0] = 0; | ||||
end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||||
end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags[0]); | |||||
if(end2) | if(end2) | ||||
{ | { | ||||
RemoveEnding(wordx,end2,word_copy); | RemoveEnding(wordx,end2,word_copy); | ||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags[0]); | |||||
memcpy(wordx,word_copy,strlen(word_copy)); | memcpy(wordx,word_copy,strlen(word_copy)); | ||||
if((end_type & SUFX_P) == 0) | if((end_type & SUFX_P) == 0) | ||||
{ | { | ||||
confirm_prefix = 1; | confirm_prefix = 1; | ||||
end_type = 0; | end_type = 0; | ||||
found = LookupDictList(&wordx,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); // without prefix | |||||
if(dictionary_flags==0) | |||||
dictionary_flags = dictionary_flags2; | |||||
found = LookupDictList(&wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix | |||||
if(dictionary_flags[0]==0) | |||||
{ | |||||
dictionary_flags[0] = dictionary_flags2[0]; | |||||
dictionary_flags[1] = dictionary_flags2[1]; | |||||
} | |||||
else | else | ||||
prefix_flags = 1; | prefix_flags = 1; | ||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags[0]); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
{ | { | ||||
// lookup the stem without the prefix removed | // lookup the stem without the prefix removed | ||||
wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
found = LookupDictList(&word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // include prefix, but not suffix | |||||
found = LookupDictList(&word1, phonemes_ptr, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix | |||||
wordx[-1] = ' '; | wordx[-1] = ' '; | ||||
if(dictionary_flags==0) | |||||
dictionary_flags = dictionary_flags2; | |||||
if(dictionary_flags[0]==0) | |||||
{ | |||||
dictionary_flags[0] = dictionary_flags2[0]; | |||||
dictionary_flags[1] = dictionary_flags2[1]; | |||||
} | |||||
if(found) | if(found) | ||||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | ||||
if((found==0) && (dictionary_flags2 != 0)) | |||||
if((found==0) && (dictionary_flags2[0] != 0)) | |||||
prefix_flags = 1; | prefix_flags = 1; | ||||
} | } | ||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
found = LookupDictList(&wordx,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // without prefix and suffix | |||||
found = LookupDictList(&wordx, phonemes_ptr, dictionary_flags2, end_flags, wtab); // without prefix and suffix | |||||
if(phonemes_ptr[0] == phonSWITCH) | if(phonemes_ptr[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes,phonemes_ptr); | strcpy(word_phonemes,phonemes_ptr); | ||||
return(0); | return(0); | ||||
} | } | ||||
if(dictionary_flags==0) | |||||
dictionary_flags = dictionary_flags2; | |||||
if(dictionary_flags[0]==0) | |||||
{ | |||||
dictionary_flags[0] = dictionary_flags2[0]; | |||||
dictionary_flags[1] = dictionary_flags2[1]; | |||||
} | |||||
} | } | ||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
strcpy(phonemes,phonemes2); | strcpy(phonemes,phonemes2); | ||||
// language specific changes | // language specific changes | ||||
ApplySpecialAttribute(phonemes,dictionary_flags); | |||||
ApplySpecialAttribute(phonemes,dictionary_flags[0]); | |||||
} | } | ||||
else | else | ||||
{ | { | ||||
if(end_flags & FLAG_SUFX) | if(end_flags & FLAG_SUFX) | ||||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags[0]); | |||||
else | else | ||||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags[0]); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
TranslateRules(&word_iz[1], phonemes, N_WORD_PHONEMES, NULL, 0, 0); | TranslateRules(&word_iz[1], phonemes, N_WORD_PHONEMES, NULL, 0, 0); | ||||
} | } | ||||
wflags |= emphasize_allcaps; | |||||
/* determine stress pattern for this word */ | /* determine stress pattern for this word */ | ||||
/******************************************/ | /******************************************/ | ||||
{ | { | ||||
char *p; | char *p; | ||||
// German, keep a secondary stress on the stem | // German, keep a secondary stress on the stem | ||||
SetWordStress(phonemes,dictionary_flags,3,0); | |||||
SetWordStress(phonemes,dictionary_flags[0],3,0); | |||||
// reduce all but the first primary stress | // reduce all but the first primary stress | ||||
ix=0; | ix=0; | ||||
} | } | ||||
strcpy(word_phonemes,prefix_phonemes); | strcpy(word_phonemes,prefix_phonemes); | ||||
strcat(word_phonemes,phonemes); | strcat(word_phonemes,phonemes); | ||||
SetWordStress(word_phonemes,dictionary_flags,-1,0); | |||||
SetWordStress(word_phonemes,dictionary_flags[0],-1,0); | |||||
} | } | ||||
else | else | ||||
{ | { | ||||
// stress position affects the whole word, including prefix | // stress position affects the whole word, including prefix | ||||
strcpy(word_phonemes,prefix_phonemes); | strcpy(word_phonemes,prefix_phonemes); | ||||
strcat(word_phonemes,phonemes); | strcat(word_phonemes,phonemes); | ||||
SetWordStress(word_phonemes,dictionary_flags,-1,prev_last_stress); | |||||
SetWordStress(word_phonemes,dictionary_flags[0],-1,prev_last_stress); | |||||
} | } | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
if(prefix_phonemes[0] == 0) | if(prefix_phonemes[0] == 0) | ||||
SetWordStress(phonemes,dictionary_flags,-1,prev_last_stress); | |||||
SetWordStress(phonemes,dictionary_flags[0],-1,prev_last_stress); | |||||
else | else | ||||
SetWordStress(phonemes,dictionary_flags,-1,0); | |||||
SetWordStress(phonemes,dictionary_flags[0],-1,0); | |||||
strcpy(word_phonemes,prefix_phonemes); | strcpy(word_phonemes,prefix_phonemes); | ||||
strcat(word_phonemes,phonemes); | strcat(word_phonemes,phonemes); | ||||
} | } | ||||
strcat(word_phonemes,end_phonemes); | strcat(word_phonemes,end_phonemes); | ||||
} | } | ||||
// if(next_pause > 2) | |||||
ix = ((dictionary_flags >> 5) & 7); // dictionary indicates skip next word(s) | |||||
if(wtab[ix].flags & FLAG_LAST_WORD) | |||||
{ | |||||
// the word has attribute to stress or unstress when at end of clause | |||||
if(dictionary_flags & (FLAG_STRESS_END | FLAG_STRESS_END2)) | |||||
ChangeWordStress(this,word_phonemes,4); | |||||
// SetWordStress(word_phonemes,0,4,prev_last_stress); | |||||
else | |||||
if(dictionary_flags & FLAG_UNSTRESS_END) | |||||
ChangeWordStress(this,word_phonemes,3); | |||||
// SetWordStress(word_phonemes,0,3,prev_last_stress); | |||||
} | |||||
if(wflags & FLAG_STRESSED_WORD) | |||||
if(wflags & FLAG_EMPHASIZED) | |||||
{ | { | ||||
// A word is indicated in the source text as stressed | // A word is indicated in the source text as stressed | ||||
// we need to improve the intonation module to deal better with a clauses tonic | // we need to improve the intonation module to deal better with a clauses tonic | ||||
// stress being early in the clause, before enabling this | // stress being early in the clause, before enabling this | ||||
//SetWordStress(word_phonemes,0,5,prev_last_stress); | |||||
ChangeWordStress(this,word_phonemes,6); | |||||
} | |||||
else | |||||
if(wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) | |||||
{ | |||||
// the word has attribute to stress or unstress when at end of clause | |||||
if(dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2)) | |||||
ChangeWordStress(this,word_phonemes,4); | |||||
else | |||||
if(dictionary_flags[0] & FLAG_UNSTRESS_END) | |||||
ChangeWordStress(this,word_phonemes,3); | |||||
} | } | ||||
// dictionary flags for this word give a clue about which alternative pronunciations of | // dictionary flags for this word give a clue about which alternative pronunciations of | ||||
expect_verb_s = 2; | expect_verb_s = 2; | ||||
} | } | ||||
if(dictionary_flags & FLAG_PASTF) | |||||
if(dictionary_flags[1] & FLAG_PASTF) | |||||
{ | { | ||||
/* expect perfect tense in next two words */ | /* expect perfect tense in next two words */ | ||||
expect_past = 3; | expect_past = 3; | ||||
expect_verb = 0; | expect_verb = 0; | ||||
expect_noun = 0; | |||||
} | } | ||||
else | else | ||||
if(dictionary_flags & FLAG_VERBF) | |||||
if(dictionary_flags[1] & FLAG_VERBF) | |||||
{ | { | ||||
/* expect a verb in the next word */ | /* expect a verb in the next word */ | ||||
expect_verb = 2; | expect_verb = 2; | ||||
expect_verb_s = 0; /* verb won't have -s suffix */ | expect_verb_s = 0; /* verb won't have -s suffix */ | ||||
expect_noun = 0; | |||||
} | } | ||||
else | else | ||||
if(dictionary_flags & FLAG_VERBSF) | |||||
if(dictionary_flags[1] & FLAG_VERBSF) | |||||
{ | { | ||||
// expect a verb, must have a -s suffix | // expect a verb, must have a -s suffix | ||||
expect_verb = 0; | expect_verb = 0; | ||||
expect_verb_s = 2; | expect_verb_s = 2; | ||||
expect_past = 0; | expect_past = 0; | ||||
expect_noun = 0; | |||||
} | } | ||||
else | else | ||||
if(dictionary_flags & FLAG_NOUNF) | |||||
if(dictionary_flags[1] & FLAG_NOUNF) | |||||
{ | { | ||||
/* not expecting a verb next */ | /* not expecting a verb next */ | ||||
expect_noun = 3; | |||||
expect_verb = 0; | expect_verb = 0; | ||||
expect_verb_s = 0; | expect_verb_s = 0; | ||||
expect_past = 0; | expect_past = 0; | ||||
} | } | ||||
if((wordx[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||||
if((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) | |||||
{ | { | ||||
if(expect_verb > 0) | if(expect_verb > 0) | ||||
expect_verb -= 1; | |||||
expect_verb--; | |||||
if(expect_verb_s > 0) | if(expect_verb_s > 0) | ||||
expect_verb_s -= 1; | |||||
expect_verb_s--; | |||||
if(expect_noun >0) | |||||
expect_noun--; | |||||
if(expect_past > 0) | if(expect_past > 0) | ||||
expect_past -= 1; | |||||
expect_past--; | |||||
} | } | ||||
if((word_length == 1) && iswalpha(first_char) && (first_char != 'i')) | if((word_length == 1) && iswalpha(first_char) && (first_char != 'i')) | ||||
{ | { | ||||
// English Specific !!!! | // English Specific !!!! | ||||
// any single letter before a dot is an abbreviation, except 'I' | // any single letter before a dot is an abbreviation, except 'I' | ||||
dictionary_flags |= FLAG_DOT; | |||||
dictionary_flags[0] |= FLAG_DOT; | |||||
} | } | ||||
return(dictionary_flags); | |||||
return(dictionary_flags[0]); | |||||
} // end of TranslateWord | } // end of TranslateWord | ||||
option_sayas2 = value; | option_sayas2 = value; | ||||
count_sayas_digits = 0; | count_sayas_digits = 0; | ||||
} | } | ||||
if(cmd == EMBED_F) | |||||
{ | |||||
if(value >= 3) | |||||
word_emphasis = FLAG_EMPHASIZED; | |||||
else | |||||
word_emphasis = 0; | |||||
} | |||||
embedded_list[embedded_ix++] = cmd + sign + (value << 8); | embedded_list[embedded_ix++] = cmd + sign + (value << 8); | ||||
return(1); | return(1); | ||||
} // end of EmbeddedCommand | } // end of EmbeddedCommand | ||||
int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | ||||
{//================================================================================== | {//================================================================================== | ||||
int ix; | int ix; | ||||
word_flags = 0; | word_flags = 0; | ||||
next_word_flags = 0; | next_word_flags = 0; | ||||
expect_verb=0; | expect_verb=0; | ||||
expect_noun=0; | |||||
expect_past=0; | expect_past=0; | ||||
expect_verb_s=0; | expect_verb_s=0; | ||||
end_stressed_vowel=0; | end_stressed_vowel=0; | ||||
embedded_count = 0; | embedded_count = 0; | ||||
} | } | ||||
words[word_count].pre_pause = pre_pause; | words[word_count].pre_pause = pre_pause; | ||||
words[word_count].flags |= (all_upper_case | word_flags); | |||||
words[word_count].flags |= (all_upper_case | word_flags | word_emphasis); | |||||
words[word_count].wmark = word_mark; | words[word_count].wmark = word_mark; | ||||
if(pre_pause > 0) | if(pre_pause > 0) | ||||
option_sayas = 0; | option_sayas = 0; | ||||
option_sayas2 = 0; | option_sayas2 = 0; | ||||
option_emphasis = 0; | option_emphasis = 0; | ||||
word_emphasis = 0; | |||||
InitText2(); | InitText2(); | ||||
#define N_LETTER_GROUPS 20 | #define N_LETTER_GROUPS 20 | ||||
/* flags from word dictionary */ | |||||
/* dictionary flags, word 1 */ | |||||
// bits 0-3 stressed syllable, 7=unstressed | // bits 0-3 stressed syllable, 7=unstressed | ||||
#define FLAG_SKIPWORDS 0x80 | #define FLAG_SKIPWORDS 0x80 | ||||
#define FLAG_PREPAUSE 0x100 | #define FLAG_PREPAUSE 0x100 | ||||
#define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | #define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | ||||
#define FLAG_ATEND 0x4000 /* use this pronunciation if at end of clause */ | #define FLAG_ATEND 0x4000 /* use this pronunciation if at end of clause */ | ||||
#define FLAG_CAPITAL 0x8000 /* pronunciation if initial letter is upper case */ | |||||
#define FLAG_DOT 0x10000 /* ignore '.' after word (abbreviation) */ | #define FLAG_DOT 0x10000 /* ignore '.' after word (abbreviation) */ | ||||
#define FLAG_ABBREV 0x20000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters | #define FLAG_ABBREV 0x20000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters | ||||
#define FLAG_STEM 0x40000 // must have a suffix | #define FLAG_STEM 0x40000 // must have a suffix | ||||
#define FLAG_ALT_TRANS 0x100000 // language specific | #define FLAG_ALT_TRANS 0x100000 // language specific | ||||
#define FLAG_ALT2_TRANS 0x200000 // language specific | #define FLAG_ALT2_TRANS 0x200000 // language specific | ||||
#define FLAG_VERBF 0x400000 /* verb follows */ | |||||
#define FLAG_VERBSF 0x800000 /* verb follows, may have -s suffix */ | |||||
#define FLAG_NOUNF 0x1000000 /* noun follows */ | |||||
#define FLAG_VERB 0x2000000 /* pronunciation for verb */ | |||||
#define FLAG_PAST 0x4000000 /* pronunciation for past tense */ | |||||
#define FLAG_PASTF 0x8000000 /* past tense follows */ | |||||
#define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | |||||
#define FLAG_PAUSE1 0x10000000 // shorter prepause | |||||
#define FLAG_TEXTMODE 0x20000000 // word translates to replacement text, not phonemes | #define FLAG_TEXTMODE 0x20000000 // word translates to replacement text, not phonemes | ||||
#define BITNUM_FLAG_TEXTMODE 29 | #define BITNUM_FLAG_TEXTMODE 29 | ||||
#define FLAG_PAUSE1 0x40000000 // shorter prepause | |||||
#define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | |||||
#define FLAG_FOUND_ATTRIBUTES 0x40000000 // word was found in the dictionary list (has attributes) | |||||
#define FLAG_FOUND 0x80000000 // pronunciation was found in the dictionary list | |||||
// dictionary flags, word 2 | |||||
#define FLAG_VERBF 0x1 /* verb follows */ | |||||
#define FLAG_VERBSF 0x2 /* verb follows, may have -s suffix */ | |||||
#define FLAG_NOUNF 0x4 /* noun follows */ | |||||
#define FLAG_PASTF 0x8 /* past tense follows */ | |||||
#define FLAG_VERB 0x10 /* pronunciation for verb */ | |||||
#define FLAG_NOUN 0x20 /* pronunciation for noun */ | |||||
#define FLAG_PAST 0x40 /* pronunciation for past tense */ | |||||
#define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */ | |||||
#define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */ | |||||
#define FLAG_ALLCAPS 0x400 // only if the word is all capitals | |||||
// wordflags, flags in source word | // wordflags, flags in source word | ||||
#define FLAG_ALL_UPPER 0x1 /* no lower case letters in the word */ | #define FLAG_ALL_UPPER 0x1 /* no lower case letters in the word */ | ||||
#define FLAG_HAS_PLURAL 0x4 /* upper-case word with s or 's lower-case ending */ | #define FLAG_HAS_PLURAL 0x4 /* upper-case word with s or 's lower-case ending */ | ||||
#define FLAG_PHONEMES 0x8 /* word is phonemes */ | #define FLAG_PHONEMES 0x8 /* word is phonemes */ | ||||
#define FLAG_LAST_WORD 0x10 /* last word in clause */ | #define FLAG_LAST_WORD 0x10 /* last word in clause */ | ||||
#define FLAG_STRESSED_WORD 0x20 /* this word has explicit stress */ | |||||
//#define FLAG_STRESSED_WORD 0x20 /* this word has explicit stress */ | |||||
#define FLAG_EMBEDDED 0x40 /* word is preceded by embedded commands */ | #define FLAG_EMBEDDED 0x40 /* word is preceded by embedded commands */ | ||||
#define FLAG_HYPHEN 0x80 | #define FLAG_HYPHEN 0x80 | ||||
#define FLAG_NOSPACE 0x100 // word is not seperated from previous word by a space | #define FLAG_NOSPACE 0x100 // word is not seperated from previous word by a space | ||||
#define FLAG_DONT_SWITCH_TRANSLATOR 0x1000 | #define FLAG_DONT_SWITCH_TRANSLATOR 0x1000 | ||||
#define FLAG_SUFFIX_REMOVED 0x2000 | #define FLAG_SUFFIX_REMOVED 0x2000 | ||||
#define FLAG_HYPHEN_AFTER 0x4000 | #define FLAG_HYPHEN_AFTER 0x4000 | ||||
#define FLAG_NO_PREFIX 0x8000 | |||||
#define FLAG_NO_TRACE 0x10000 | |||||
#define FLAG_EMPHASIZED 0x8000 | |||||
#define FLAG_NO_TRACE 0x10000 // passed to TranslateRules() to suppress dictionary lookup printout | |||||
#define FLAG_NO_PREFIX 0x20000 | |||||
// prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000 | // prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000 | ||||
#define SUFX_E 0x0100 // e may have been added | #define SUFX_E 0x0100 // e may have been added | ||||
int ReadClause(FILE *f_in, char *buf, unsigned short *charix, int n_buf); | int ReadClause(FILE *f_in, char *buf, unsigned short *charix, int n_buf); | ||||
int AnnouncePunctuation(int c1, int c2, char *buf, int ix); | int AnnouncePunctuation(int c1, int c2, char *buf, int ix); | ||||
const char *LookupDict2(const char *word, const char *word2, char *phonetic, unsigned int *flags, int end_flags); | |||||
const char *LookupDict2(const char *word, const char *word2, char *phonetic, unsigned int *flags, int end_flags, WORD_TAB *wtab); | |||||
const char *LookupSpecial(const char *string); | const char *LookupSpecial(const char *string); | ||||
const char *LookupCharName(int c); | const char *LookupCharName(int c); | ||||
int LookupNum2(int value, int control, char *ph_out); | int LookupNum2(int value, int control, char *ph_out); | ||||
virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | ||||
int IsVowel(int letter); | int IsVowel(int letter); | ||||
int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags); | |||||
int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab); | |||||
int Lookup(const char *word, char *ph_out); | int Lookup(const char *word, char *ph_out); | ||||
int expect_verb; | int expect_verb; | ||||
int expect_past; // expect past tense | int expect_past; // expect past tense | ||||
int expect_verb_s; | int expect_verb_s; | ||||
int expect_noun; | |||||
int word_flags; // word is all upper case | int word_flags; // word is all upper case | ||||
int prev_last_stress; | int prev_last_stress; | ||||
int prepause_timeout; | int prepause_timeout; |
voice_t voicedata; | voice_t voicedata; | ||||
voice_t *voice = &voicedata; | voice_t *voice = &voicedata; | ||||
char *fgets_strip(char *buf, int size, FILE *f_in) | |||||
{//=============================================== | |||||
// strip trailing spaces, and truncate lines at // comment | |||||
int len; | |||||
char *p; | |||||
if(fgets(buf,size,f_in) == NULL) | |||||
return(NULL); | |||||
len = strlen(buf); | |||||
while((--len > 0) && isspace(buf[len])) | |||||
buf[len] = 0; | |||||
if((p = strstr(buf,"//")) != NULL) | |||||
*p = 0; | |||||
return(buf); | |||||
} | |||||
void SetToneAdjust(voice_t *voice, int *tone_pts) | void SetToneAdjust(voice_t *voice, int *tone_pts) | ||||
{//============================================== | {//============================================== | ||||
vgender[0] = 0; | vgender[0] = 0; | ||||
age = 0; | age = 0; | ||||
while(fgets(linebuf,sizeof(linebuf),f_in) != NULL) | |||||
while(fgets_strip(linebuf,sizeof(linebuf),f_in) != NULL) | |||||
{ | { | ||||
linebuf[strlen(linebuf)-1] = 0; | |||||
if(memcmp(linebuf,"name",4)==0) | if(memcmp(linebuf,"name",4)==0) | ||||
{ | { | ||||
p = &linebuf[4]; | p = &linebuf[4]; | ||||
SelectPhonemeTableName(phonemes_name); // set up phoneme_tab | SelectPhonemeTableName(phonemes_name); // set up phoneme_tab | ||||
while((f_voice != NULL) && (fgets(buf,sizeof(buf),f_voice) != NULL)) | |||||
while((f_voice != NULL) && (fgets_strip(buf,sizeof(buf),f_voice) != NULL)) | |||||
{ | { | ||||
if((p = strstr(buf,"//")) != NULL) | |||||
*p = 0; | |||||
// isolate the attribute name | // isolate the attribute name | ||||
for(p=buf; (*p != 0) && !isspace(*p); p++); | for(p=buf; (*p != 0) && !isspace(*p); p++); | ||||
*p++ = 0; | *p++ = 0; |
int amp; | int amp; | ||||
// normal, none, reduced, moderate, strong | // normal, none, reduced, moderate, strong | ||||
static const unsigned char amp_emphasis[5] = {16, 16, 8, 24, 32}; | |||||
static const unsigned char amp_emphasis[5] = {16, 16, 8, 16, 26}; | |||||
amp = (embedded_value[EMBED_A])*60/100; | amp = (embedded_value[EMBED_A])*60/100; | ||||
general_amplitude = amp * amp_emphasis[embedded_value[EMBED_F]] / 16; | general_amplitude = amp * amp_emphasis[embedded_value[EMBED_F]] / 16; |