Lang eo: use .replace in eo_rules for "cx" etc. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@94 d46cf337-b52f-0410-862d-fd96e6ae7743master
dinosaur daIn@sO@ | dinosaur daIn@sO@ | ||||
diode daIoUd | diode daIoUd | ||||
diplomacy dIpl'oUm@sI2 | diplomacy dIpl'oUm@sI2 | ||||
dipole daIpoUl | |||||
disconsolate dIsk'0ns@l@t | disconsolate dIsk'0ns@l@t | ||||
disc dIsk // for discs | disc dIsk // for discs | ||||
disciple dIs'aIp@L | disciple dIs'aIp@L | ||||
fishnet fISnEt | fishnet fISnEt | ||||
fiord fi:O@d | fiord fi:O@d | ||||
fix fIks // for fixer | fix fIks // for fixer | ||||
fjord fIO@d | |||||
flagellum fla2dZEl@m | flagellum fla2dZEl@m | ||||
flexible flEksIb@L | flexible flEksIb@L | ||||
flier flaI3 | flier flaI3 | ||||
Phoebe fi:bi: | Phoebe fi:bi: | ||||
Rachel reItS@L | Rachel reItS@L | ||||
Rhys ri:s | Rhys ri:s | ||||
Robert r0b3t | |||||
Roderick r0drIk | Roderick r0drIk | ||||
Samantha s@manT@ | Samantha s@manT@ | ||||
Sarah se@r@ | Sarah se@r@ |
// ?7 Scottish | // ?7 Scottish | ||||
// ?8 Use full vowel, not schwa in some word endings | // ?8 Use full vowel, not schwa in some word endings | ||||
.replace | |||||
ff f // ligature | |||||
fi fi | |||||
fl fl | |||||
.group a | .group a | ||||
_) a (_ a2 | _) a (_ a2 | ||||
l) egion i:dZ@n | l) egion i:dZ@n | ||||
egy (pt i:dZI | egy (pt i:dZI | ||||
n) e (gro i: | n) e (gro i: | ||||
eh (_ eI | |||||
ei eI | ei eI | ||||
_) ei aI | _) ei aI | ||||
c) ei i: | c) ei i: | ||||
r) iv (el Iv | r) iv (el Iv | ||||
r) iv (en Iv | r) iv (en Iv | ||||
r) iv (et Iv | r) iv (et Iv | ||||
_l) iv (el aIv | |||||
_l) iv (eB aIv | |||||
XC) iv (el Iv | XC) iv (el Iv | ||||
_g) iv (e Iv | _g) iv (e Iv | ||||
_l) iv (e Iv | _l) iv (e Iv | ||||
o (bliga 0 | o (bliga 0 | ||||
o (bligato %0 | o (bligato %0 | ||||
o (blo 0 | o (blo 0 | ||||
_r) o (ber 0 | |||||
r) o (bot oU | r) o (bot oU | ||||
_) o (bs %0 | _) o (bs %0 | ||||
_) obs (er %0bz | _) obs (er %0bz | ||||
€ jU@roUz | € jU@roUz | ||||
♯ SA@p | ♯ SA@p | ||||
♭ flat | ♭ flat | ||||
ff f | |||||
fi fI | |||||
fl fl |
_. punkto | _. punkto | ||||
v vo // not Roman numbers | |||||
x ikso | |||||
t.e t'e,e | t.e t'e,e | ||||
uk $abbrev | uk $abbrev | ||||
// includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | |||||
l' la | l' la | ||||
la %la | la %la | ||||
ne $u+ // negative | ne $u+ // negative | ||||
cxu $u // interogative particle | |||||
cxi $u | |||||
ĉu $u // cxi interogative particle | |||||
ĉi $u // cxi interogative particle | |||||
ĉu $u // interogative particle | |||||
ĉi $u // interogative particle | |||||
ia $u // any | ia $u // any | ||||
// prepositions | // prepositions | ||||
de $u+ $brk | de $u+ $brk | ||||
(de la) dela $u $brk | (de la) dela $u $brk | ||||
antaux $u+ $pause | |||||
antaŭ $u+ $pause | antaŭ $u+ $pause | ||||
apud $u+ $pause | apud $u+ $pause | ||||
ol $u+ $brk | ol $u+ $brk | ||||
kun $u+ $pause | kun $u+ $pause | ||||
laŭ $u+ $brk | laŭ $u+ $brk | ||||
laux $u+ $brk | |||||
per $u+ $brk | per $u+ $brk | ||||
preter $u+ $brk | preter $u+ $brk | ||||
pri $u+ $pause | pri $u+ $pause | ||||
mi $u | mi $u | ||||
ci $u | ci $u | ||||
li $u | li $u | ||||
sxi $u | |||||
ŝi $u | ŝi $u | ||||
gxi $u | |||||
ĝi $u | ĝi $u | ||||
ni $u | ni $u | ||||
vi $u | |||||
vi vi $u | |||||
ili $u | ili $u | ||||
oni $u | oni $u | ||||
si $u | si $u | ||||
mia $u+ | mia $u+ | ||||
cia $u+ | cia $u+ | ||||
lia $u+ | lia $u+ | ||||
sxia $u+ | |||||
ŝia $u+ | ŝia $u+ | ||||
gxia $u+ | |||||
ĝia $u+ | ĝia $u+ | ||||
nia $u+ | nia $u+ | ||||
via $u+ | via $u+ | ||||
miaj $u+ | miaj $u+ | ||||
ciaj $u+ | ciaj $u+ | ||||
liaj $u+ | liaj $u+ | ||||
sxiaj $u+ | |||||
ŝiaj $u+ | ŝiaj $u+ | ||||
gxiaj $u+ | |||||
ĝiaj $u+ | ĝiaj $u+ | ||||
niaj $u+ | niaj $u+ | ||||
viaj $u+ | viaj $u+ | ||||
// conjunctions | // conjunctions | ||||
ke $u $pause // that (conj) | ke $u $pause // that (conj) | ||||
aux $u $pause | |||||
aŭ $u $pause // aux | |||||
cxar $u $pause | |||||
aŭ $u $pause | |||||
ĉar $u $pause | ĉar $u $pause | ||||
kaj $u $pause | kaj $u $pause | ||||
sed $u $pause | sed $u $pause |
// includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | // includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | ||||
.replace | |||||
cx ĉ | |||||
gx ĝ | |||||
hx ĥ | |||||
jx ĵ | |||||
sx ŝ | |||||
ux ŭ | |||||
.group a | .group a | ||||
_) a(_ a | _) a(_ a | ||||
a a | a a | ||||
aj aI | aj aI | ||||
a (jx a | |||||
aux aU | |||||
aŭ aU | aŭ aU | ||||
D_-_) a (_ %a //'a' after a number | D_-_) a (_ %a //'a' after a number | ||||
D_-_) an (_ %an //'an' after a number | D_-_) an (_ %an //'an' after a number | ||||
.group c | .group c | ||||
_) c(_ tso | _) c(_ tso | ||||
_) ch(_ tSo | _) ch(_ tSo | ||||
_) cx(_ tSo | |||||
c ts | c ts | ||||
ch (K tS // foreign words | ch (K tS // foreign words | ||||
K) ch tS // foreign words | K) ch tS // foreign words | ||||
cx tS | |||||
.group d | .group d | ||||
_) e(_ e | _) e(_ e | ||||
e e | e e | ||||
ej eI | ej eI | ||||
e (jx e | |||||
eux eU | |||||
eŭ eU | eŭ eU | ||||
.group f | .group f | ||||
.group g | .group g | ||||
_) g(_ go | _) g(_ go | ||||
_) gx(_ dZo | |||||
g g | g g | ||||
gh (K dZ | gh (K dZ | ||||
gx dZ | |||||
.group h | .group h | ||||
_) h(_ ho | _) h(_ ho | ||||
_) hx(_ xo | |||||
h h | h h | ||||
hh x | hh x | ||||
hx x | |||||
.group i | .group i | ||||
.group j | .group j | ||||
_) j(_ jo | _) j(_ jo | ||||
_) jx(_ Zo | |||||
j j | j j | ||||
jh (C Z | jh (C Z | ||||
jx Z | |||||
.group k | .group k | ||||
_) k(_ ko | _) k(_ ko | ||||
_) o(_ o | _) o(_ o | ||||
o o | o o | ||||
oj OI | oj OI | ||||
o (jx o | |||||
.group p | .group p | ||||
_) p(_ po | _) p(_ po | ||||
.group s | .group s | ||||
_) s(_ so | _) s(_ so | ||||
_) sh(_ So | _) sh(_ So | ||||
_) sx(_ So | |||||
s s | s s | ||||
sx S | |||||
sh (K S // foreign words | sh (K S // foreign words | ||||
K) sh S // foreign words | K) sh S // foreign words | ||||
.group u | .group u | ||||
_) u(_ u | _) u(_ u | ||||
_) ux(_ wo | |||||
u u | u u | ||||
uj uI | uj uI | ||||
u (jx u | |||||
ux w | |||||
.group v | .group v |
</pre> | </pre> | ||||
then if "to" is at the end of the clause, we get [tu:], if not then we get [t@]. | then if "to" is at the end of the clause, we get [tu:], if not then we get [t@]. | ||||
<p> | |||||
<h4>4.4.4 Translating a Word with another Word</h4> | |||||
Rather than specifying the pronunciation of a word by a phoneme string, you can specify another "sounds like" word.<p>Use the attribute <b>$text</b> eg.<p> | |||||
<pre> | |||||
cough coff $text | |||||
</pre> | |||||
Alternatively, use the command <b>$textmode</b> on a line by itself to turn this on for all subsequent entries in the file, until it's turned off by <b>$phonememode</b>. eg.<p> | |||||
<pre> | |||||
$textmode | |||||
cough coff | |||||
through threw | |||||
$phonememode | |||||
</pre> | |||||
This feature cannot be used for the special entries in the <b>_list</b> files which start with an underscore, such as numbers.<p> | |||||
Currently "textmode" entries are only recognized for complete words, and not for for stems from which a prefix or suffix has been removed (eg. the word "coughs" would not match the example above). | |||||
<p> | |||||
<p> <hr> | <p> <hr> | ||||
<h3>4.5 Conditional Rules</h3> | <h3>4.5 Conditional Rules</h3> | ||||
Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p> | Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p> | ||||
</tbody></table> | </tbody></table> | ||||
</ul> | </ul> | ||||
<p> <hr> | |||||
<h3>4.7 Character Substitution</h3> | |||||
Chracter substitutions can be specified by using a <b> .replace </b> section at the start of the <b> _rules </b> file. Each line specified either one or two alphabetic characters to be replaced by another one or two alphabetic characters. This substitution is done to a word before it is translated using the spelling-to-phoneme rules. Only the lower-case version of the characters needs to be specified. eg.<p> | |||||
.replace<br> | |||||
ô ő // (Hungarian) allow the use of o-circumflex instead of o-double-accute<br> | |||||
û ű<p> | |||||
cx ĉ // (Esperanto) allow "cx" as an alternative to c-circumflex<p> | |||||
fi fi // replace a single character ligature by two characters | |||||
<p> | |||||
</body> | </body> | ||||
</html> | </html> |
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | vowelin f1=0 f2=1700 -300 300 f3=-100 80 | ||||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | ||||
lengthmod 2 | lengthmod 2 | ||||
wave ustop/t | |||||
wave ustop/t%90 | |||||
before _ ustop/t_ | before _ ustop/t_ | ||||
before @- ustop/t_dnt%50 | before @- ustop/t_dnt%50 | ||||
before r ustop/tr | before r ustop/tr |
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | vowelin f1=0 f2=1700 -300 300 f3=-100 80 | ||||
vowelout f1=0 f2=1700 -300 300 f3=-100 80 | vowelout f1=0 f2=1700 -300 300 f3=-100 80 | ||||
lengthmod 5 | lengthmod 5 | ||||
wave x/d%90 | |||||
wave x/d%80 | |||||
endphoneme | endphoneme | ||||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | vowelin f1=0 f2=1700 -300 300 f3=-100 80 | ||||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | ||||
lengthmod 2 | lengthmod 2 | ||||
wave ustop/t | |||||
wave ustop/t%90 | |||||
before _ ustop/t_ | before _ ustop/t_ | ||||
before @- ustop/t_dnt%50 | before @- ustop/t_dnt%50 | ||||
before r ustop/tr | before r ustop/tr |
static int transpose_offset; // transpose character range for LookupDictList() | static int transpose_offset; // transpose character range for LookupDictList() | ||||
static int transpose_min; | static int transpose_min; | ||||
static int transpose_max; | static int transpose_max; | ||||
static int text_mode = 0; | |||||
int hash_counts[N_HASH_DICT]; | int hash_counts[N_HASH_DICT]; | ||||
char *hash_chains[N_HASH_DICT]; | char *hash_chains[N_HASH_DICT]; | ||||
{"$pastf", 27}, /* past tense follows */ | {"$pastf", 27}, /* past tense follows */ | ||||
{"$verbextend",28}, /* extend influence of 'verb follows' */ | {"$verbextend",28}, /* extend influence of 'verb follows' */ | ||||
{"$brk", 30}, /* a shorter $pause */ | |||||
{"$text", 29}, // word translates to replcement text, not phonemes | |||||
{"$brk", 30}, // a shorter $pause | |||||
// doesn't set dictionary_flags | // doesn't set dictionary_flags | ||||
{"$?", 100}, // conditional rule, followed by byte giving the condition number | {"$?", 100}, // conditional rule, followed by byte giving the condition number | ||||
{"$textmode", 200}, | |||||
{"$phonememode", 201}, | |||||
{NULL, -1} | {NULL, -1} | ||||
}; | }; | ||||
int len_word; | int len_word; | ||||
int len_phonetic; | int len_phonetic; | ||||
int text_not_phonemes; // this word specifies replacement text, not phonemes | |||||
char *mnemptr; | char *mnemptr; | ||||
char *comment; | char *comment; | ||||
unsigned char flag_codes[100]; | unsigned char flag_codes[100]; | ||||
char encoded_ph[200]; | char encoded_ph[200]; | ||||
unsigned char bad_phoneme[4]; | unsigned char bad_phoneme[4]; | ||||
p = linebuf; | |||||
comment = NULL; | comment = NULL; | ||||
text_not_phonemes = 0; | |||||
phonetic = word = ""; | phonetic = word = ""; | ||||
p = linebuf; | |||||
// while(isspace2(*p)) p++; | |||||
#ifdef deleted | |||||
if(*p == '$') | |||||
{ | |||||
if(memcmp(p,"$textmode",9) == 0) | |||||
{ | |||||
text_mode = 1; | |||||
return(0); | |||||
} | |||||
if(memcmp(p,"$phonememode",12) == 0) | |||||
{ | |||||
text_mode = 0; | |||||
return(0); | |||||
} | |||||
} | |||||
#endif | |||||
step = 0; | step = 0; | ||||
c = 0; | c = 0; | ||||
ix = LookupMnem(mnem_flags,mnemptr); | ix = LookupMnem(mnem_flags,mnemptr); | ||||
if(ix > 0) | if(ix > 0) | ||||
flag_codes[n_flag_codes++] = ix; | |||||
{ | |||||
if(ix == 200) | |||||
{ | |||||
text_mode = 1; | |||||
} | |||||
else | |||||
if(ix == 201) | |||||
{ | |||||
text_mode = 0; | |||||
} | |||||
else | |||||
if(ix == BITNUM_FLAG_DICTTEXT) | |||||
{ | |||||
text_not_phonemes = 1; | |||||
} | |||||
else | |||||
{ | |||||
flag_codes[n_flag_codes++] = ix; | |||||
} | |||||
} | |||||
else | else | ||||
{ | { | ||||
fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); | fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); | ||||
#endif | #endif | ||||
return(0); /* blank line */ | return(0); /* blank line */ | ||||
} | } | ||||
EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||||
if(strchr(encoded_ph,phonSWITCH) != 0) | |||||
if(text_not_phonemes || text_mode) | |||||
{ | { | ||||
flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; | |||||
strcpy(encoded_ph,phonetic); // this is replacement text, so don't encode as phonemes | |||||
flag_codes[n_flag_codes++] = BITNUM_FLAG_DICTTEXT; | |||||
} | } | ||||
for(ix=0; ix<255; ix++) | |||||
else | |||||
{ | { | ||||
c = encoded_ph[ix]; | |||||
if(c == 0) break; | |||||
if(c == 255) | |||||
EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||||
if(strchr(encoded_ph,phonSWITCH) != 0) | |||||
{ | { | ||||
/* unrecognised phoneme, report error */ | |||||
fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||||
error_count++; | |||||
flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages | |||||
} | |||||
// check for errors in the phonemes codes | |||||
for(ix=0; ix<sizeof(encoded_ph); ix++) | |||||
{ | |||||
c = encoded_ph[ix]; | |||||
if(c == 0) break; | |||||
if(c == 255) | |||||
{ | |||||
/* unrecognised phoneme, report error */ | |||||
fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||||
error_count++; | |||||
} | |||||
} | } | ||||
} | } | ||||
if((word[0] & 0x80)==0) // 7 bit ascii only | if((word[0] & 0x80)==0) // 7 bit ascii only | ||||
{ | { | ||||
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | // If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | ||||
char buf[sizeof(path_home)+45]; | char buf[sizeof(path_home)+45]; | ||||
char dict_line[128]; | char dict_line[128]; | ||||
text_mode = 0; | |||||
sprintf(buf,"%s%s",path,filename); | sprintf(buf,"%s%s",path,filename); | ||||
if((f_in = fopen(buf,"r")) == NULL) | if((f_in = fopen(buf,"r")) == NULL) | ||||
return(-1); | return(-1); |
DecodePhonemes(phonetic,ph_decoded); | DecodePhonemes(phonetic,ph_decoded); | ||||
if(flags != NULL) | if(flags != NULL) | ||||
flags1 = *flags; | flags1 = *flags; | ||||
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||||
if((dictionary_flags & FLAG_DICTTEXT) == 0) | |||||
{ | |||||
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||||
} | |||||
} | } | ||||
return(1); | return(1); | ||||
int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags) | |||||
//=========================================================================================== | |||||
int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags) | |||||
//============================================================================================== | |||||
/* Lookup a specified word in the word dictionary. | /* Lookup a specified word in the word dictionary. | ||||
Returns phonetic data in 'phonetic' and bits in 'flags' | Returns phonetic data in 'phonetic' and bits in 'flags' | ||||
{ | { | ||||
int length; | int length; | ||||
int found; | int found; | ||||
char *word1; | |||||
char *word2; | char *word2; | ||||
unsigned char c; | unsigned char c; | ||||
int nbytes; | int nbytes; | ||||
int c2; | int c2; | ||||
char word[N_WORD_BYTES]; | char word[N_WORD_BYTES]; | ||||
static char word_replacement[N_WORD_BYTES]; | |||||
length = 0; | length = 0; | ||||
word2 = word1; | |||||
word2 = word1 = *wordptr; | |||||
while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | ||||
{ | { | ||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | found = LookupDict2(word,word1,ph_out,flags,end_flags); | ||||
if(found) return(1); | |||||
ph_out[0] = 0; | |||||
// try modifications to find a recognised word | |||||
if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||||
if(found == 0) | |||||
{ | { | ||||
// try removing an 'e' which has been added by RemoveEnding | |||||
word[length-1] = 0; | |||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
if(found) return(1); | |||||
ph_out[0] = 0; | |||||
// try modifications to find a recognised word | |||||
if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||||
{ | |||||
// try removing an 'e' which has been added by RemoveEnding | |||||
word[length-1] = 0; | |||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
} | |||||
else | |||||
if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||||
{ | |||||
// try removing a double letter | |||||
word[length-1] = 0; | |||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
} | |||||
} | } | ||||
if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||||
if(found) | |||||
{ | { | ||||
// try removing a double letter | |||||
word[length-1] = 0; | |||||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
if(found) return(1); | |||||
if(*flags & FLAG_DICTTEXT) | |||||
{ | |||||
// the word translates to replacement text, not to phonemes | |||||
if(end_flags & FLAG_ALLOW_DICTTEXT) | |||||
{ | |||||
// only use replacement text if this is the original word, not if a prefix or suffix has been removed | |||||
word_replacement[0] = 0; | |||||
word_replacement[1] = ' '; | |||||
strcpy(&word_replacement[2],ph_out); // replacement word, preceded by zerochar and space | |||||
*wordptr = &word_replacement[2]; | |||||
if(option_phonemes == 2) | |||||
{ | |||||
fprintf(f_trans,"Replace: %s %s\n",word,*wordptr); | |||||
} | |||||
} | |||||
ph_out[0] = 0; | |||||
return(0); | |||||
} | |||||
return(1); | |||||
} | } | ||||
ph_out[0] = 0; | ph_out[0] = 0; | ||||
int Translator::Lookup(char *word, char *ph_out) | int Translator::Lookup(char *word, char *ph_out) | ||||
{//============================================= | {//============================================= | ||||
unsigned int flags; | unsigned int flags; | ||||
return(LookupDictList(word,ph_out,&flags,0)); | |||||
return(LookupDictList(&word,ph_out,&flags,0)); | |||||
} | } | ||||
char phonemes2[55]; | char phonemes2[55]; | ||||
static char buf[60]; | static char buf[60]; | ||||
if(LookupDictList(string,phonemes,&flags,0)) | |||||
if(LookupDictList(&string,phonemes,&flags,0)) | |||||
{ | { | ||||
SetWordStress(phonemes,flags,-1,0); | SetWordStress(phonemes,flags,-1,0); | ||||
DecodePhonemes(phonemes,phonemes2); | DecodePhonemes(phonemes,phonemes2); |
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.29.11 23.Oct.07"; | |||||
const char *version_string = "1.29.12 29.Oct.07"; | |||||
const int version_phdata = 0x012901; | const int version_phdata = 0x012901; | ||||
int option_device_number = -1; | int option_device_number = -1; |
tr->langopts.unstressed_wd1 = 1; | tr->langopts.unstressed_wd1 = 1; | ||||
tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
tr->langopts.numbers = 0x1409; | |||||
tr->langopts.numbers = 0x1c09 + NUM_ROMAN; | |||||
} | } | ||||
break; | break; | ||||
unsigned int dictionary_flags2=0; | unsigned int dictionary_flags2=0; | ||||
int end_type=0; | int end_type=0; | ||||
int prefix_type=0; | int prefix_type=0; | ||||
char *word; | |||||
char *wordx; | |||||
char phonemes[N_WORD_PHONEMES]; | char phonemes[N_WORD_PHONEMES]; | ||||
char *ph_limit; | char *ph_limit; | ||||
char *phonemes_ptr; | char *phonemes_ptr; | ||||
static char word_iz[4] = {0,'i','z',0}; | static char word_iz[4] = {0,'i','z',0}; | ||||
static char word_ss[4] = {0,'s','s',0}; | static char word_ss[4] = {0,'s','s',0}; | ||||
word = word1; | |||||
prefix_phonemes[0] = 0; | prefix_phonemes[0] = 0; | ||||
end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
ph_limit = &phonemes[N_WORD_PHONEMES]; | ph_limit = &phonemes[N_WORD_PHONEMES]; | ||||
// count the length of the word | // count the length of the word | ||||
utf8_in(&first_char,word,0); | |||||
wordx = word1; | |||||
utf8_in(&first_char,wordx,0); | |||||
word_length = 0; | word_length = 0; | ||||
while((*word != 0) && (*word != ' ')) | |||||
while((*wordx != 0) && (*wordx != ' ')) | |||||
{ | { | ||||
word += utf8_in(&last_char,word,0); | |||||
wordx += utf8_in(&last_char,wordx,0); | |||||
word_length++; | word_length++; | ||||
} | } | ||||
word = word1; | |||||
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or | // try an initial lookup in the dictionary list, we may find a pronunciation specified, or | ||||
// we may just find some flags | // we may just find some flags | ||||
else | else | ||||
{ | { | ||||
spell_word = 0; | spell_word = 0; | ||||
found = LookupDictList(word,phonemes,&dictionary_flags,wflags << 16); | |||||
found = LookupDictList(&word1,phonemes,&dictionary_flags,FLAG_ALLOW_DICTTEXT | wflags << 16); // the original word | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
if(word_phonemes[0] == phonSWITCH) | if(word_phonemes[0] == phonSWITCH) | ||||
return(0); | return(0); | ||||
found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | |||||
found = TranslateNumber(word1,phonemes,&dictionary_flags,wflags); | |||||
} | } | ||||
if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | ||||
if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | ||||
{ | { | ||||
if((found = TranslateRoman(word,phonemes)) != 0) | |||||
if((found = TranslateRoman(word1,phonemes)) != 0) | |||||
dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | ||||
} | } | ||||
} | } | ||||
if(spell_word > 0) | if(spell_word > 0) | ||||
{ | { | ||||
// Speak as individual letters | // Speak as individual letters | ||||
word = word1; | |||||
wordx = word1; | |||||
posn = 0; | posn = 0; | ||||
phonemes[0] = 0; | phonemes[0] = 0; | ||||
end_type = 0; | end_type = 0; | ||||
while(*word != ' ') | |||||
while(*wordx != ' ') | |||||
{ | { | ||||
word += TranslateLetter(word, phonemes,spell_word); | |||||
wordx += TranslateLetter(wordx, phonemes,spell_word); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
posn = 0; | posn = 0; | ||||
length = 999; | length = 999; | ||||
while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(word))) | |||||
wordx = word1; | |||||
while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(wordx))) | |||||
{ | { | ||||
char *p; | char *p; | ||||
// This word looks "unpronouncable", so speak letters individually until we | // This word looks "unpronouncable", so speak letters individually until we | ||||
// find a remainder that we can pronounce. | // find a remainder that we can pronounce. | ||||
word += TranslateLetter(word,phonemes,0); | |||||
wordx += TranslateLetter(wordx,phonemes,0); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
return(0); | return(0); | ||||
} | } | ||||
p = &word[word_length-3]; | |||||
p = &wordx[word_length-3]; | |||||
if(memcmp(p,"'s ",3) == 0) | if(memcmp(p,"'s ",3) == 0) | ||||
{ | { | ||||
// remove a 's suffix and pronounce this separately (not as an individual letter) | // remove a 's suffix and pronounce this separately (not as an individual letter) | ||||
} | } | ||||
length=0; | length=0; | ||||
while(word[length] != ' ') length++; | |||||
while(wordx[length] != ' ') length++; | |||||
if(length > 0) | if(length > 0) | ||||
word[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||||
wordx[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||||
} | } | ||||
SetSpellingStress(phonemes,0); | SetSpellingStress(phonemes,0); | ||||
// anything left ? | // anything left ? | ||||
if(*word != ' ') | |||||
if(*wordx != ' ') | |||||
{ | { | ||||
// Translate the stem | // Translate the stem | ||||
unpron_length = strlen(phonemes); | unpron_length = strlen(phonemes); | ||||
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
return(0); | return(0); | ||||
} | } | ||||
c_temp = word[-1]; | |||||
c_temp = wordx[-1]; | |||||
found = 0; | found = 0; | ||||
confirm_prefix = 1; | confirm_prefix = 1; | ||||
// remove any standard suffix and confirm that the prefix is still recognised | // remove any standard suffix and confirm that the prefix is still recognised | ||||
phonemes2[0] = 0; | phonemes2[0] = 0; | ||||
end2 = TranslateRules(word, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||||
end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||||
if(end2) | if(end2) | ||||
{ | { | ||||
RemoveEnding(word,end2,word_copy); | |||||
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||||
memcpy(word,word_copy,strlen(word_copy)); | |||||
RemoveEnding(wordx,end2,word_copy); | |||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||||
memcpy(wordx,word_copy,strlen(word_copy)); | |||||
if((end_type & SUFX_P) == 0) | if((end_type & SUFX_P) == 0) | ||||
{ | { | ||||
// after removing the suffix, the prefix is no longer recognised. | // after removing the suffix, the prefix is no longer recognised. | ||||
expect_verb = 1; // use the verb form of the word | expect_verb = 1; // use the verb form of the word | ||||
} | } | ||||
word[-1] = c_temp; | |||||
wordx[-1] = c_temp; | |||||
for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | ||||
{ | { | ||||
word++; | |||||
while((*word & 0xc0) == 0x80) word++; // for multibyte characters | |||||
wordx++; | |||||
while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||||
} | } | ||||
c_temp = word[-1]; | |||||
word[-1] = ' '; | |||||
c_temp = wordx[-1]; | |||||
wordx[-1] = ' '; | |||||
confirm_prefix = 1; | confirm_prefix = 1; | ||||
end_type = 0; | end_type = 0; | ||||
found = LookupDictList(word,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); | |||||
found = LookupDictList(&wordx,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); // without prefix | |||||
if(dictionary_flags==0) | if(dictionary_flags==0) | ||||
dictionary_flags = dictionary_flags2; | dictionary_flags = dictionary_flags2; | ||||
else | else | ||||
prefix_flags = 1; | prefix_flags = 1; | ||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
word[-1] = c_temp; | |||||
wordx[-1] = c_temp; | |||||
strcpy(word_phonemes,phonemes); | strcpy(word_phonemes,phonemes); | ||||
return(0); | return(0); | ||||
} | } | ||||
strcpy(phonemes2,phonemes); | strcpy(phonemes2,phonemes); | ||||
// The word has a standard ending, re-translate without this ending | // The word has a standard ending, re-translate without this ending | ||||
end_flags = RemoveEnding(word,end_type,word_copy); | |||||
end_flags = RemoveEnding(wordx,end_type,word_copy); | |||||
phonemes_ptr = &phonemes[unpron_length]; | phonemes_ptr = &phonemes[unpron_length]; | ||||
phonemes_ptr[0] = 0; | phonemes_ptr[0] = 0; | ||||
if(prefix_phonemes[0] != 0) | if(prefix_phonemes[0] != 0) | ||||
{ | { | ||||
// lookup the stem without the prefix removed | // lookup the stem without the prefix removed | ||||
word[-1] = c_temp; | |||||
found = LookupDictList(word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||||
word[-1] = ' '; | |||||
wordx[-1] = c_temp; | |||||
found = LookupDictList(&word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // include prefix, but not suffix | |||||
wordx[-1] = ' '; | |||||
if(dictionary_flags==0) | if(dictionary_flags==0) | ||||
dictionary_flags = dictionary_flags2; | dictionary_flags = dictionary_flags2; | ||||
if(found) | if(found) | ||||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | ||||
// if(found || (dictionary_flags2 != 0)) | |||||
// prefix_flags = 1; // ?? this looks wrong | |||||
if((found==0) && (dictionary_flags2 != 0)) | if((found==0) && (dictionary_flags2 != 0)) | ||||
prefix_flags = 1; | prefix_flags = 1; | ||||
} | } | ||||
if(found == 0) | if(found == 0) | ||||
{ | { | ||||
found = LookupDictList(word,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||||
found = LookupDictList(&wordx,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // without prefix and suffix | |||||
if(phonemes_ptr[0] == phonSWITCH) | if(phonemes_ptr[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
memcpy(word,word_copy,strlen(word_copy)); | |||||
memcpy(wordx,word_copy,strlen(word_copy)); | |||||
strcpy(word_phonemes,phonemes_ptr); | strcpy(word_phonemes,phonemes_ptr); | ||||
return(0); | return(0); | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
if(end_flags & FLAG_SUFX) | if(end_flags & FLAG_SUFX) | ||||
TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||||
else | else | ||||
TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||||
if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
{ | { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes,phonemes); | strcpy(word_phonemes,phonemes); | ||||
memcpy(word,word_copy,strlen(word_copy)); | |||||
word[-1] = c_temp; | |||||
memcpy(wordx,word_copy,strlen(word_copy)); | |||||
wordx[-1] = c_temp; | |||||
return(0); | return(0); | ||||
} | } | ||||
} | } | ||||
end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
} | } | ||||
} | } | ||||
word[-1] = c_temp; | |||||
wordx[-1] = c_temp; | |||||
} | } | ||||
} | } | ||||
expect_past = 0; | expect_past = 0; | ||||
} | } | ||||
if((word[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||||
if((wordx[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||||
{ | { | ||||
if(expect_verb > 0) | if(expect_verb > 0) | ||||
expect_verb -= 1; | expect_verb -= 1; |
// bits 0-3 stressed syllable, 7=unstressed | // bits 0-3 stressed syllable, 7=unstressed | ||||
#define FLAG_SKIPWORDS 0x80 | #define FLAG_SKIPWORDS 0x80 | ||||
#define FLAG_PREPAUSE 0x100 | #define FLAG_PREPAUSE 0x100 | ||||
#define FLAG_ONLY 0x200 | #define FLAG_ONLY 0x200 | ||||
#define FLAG_ONLY_S 0x400 | |||||
#define BITNUM_FLAG_ONLY 9 // bit 9 is set | #define BITNUM_FLAG_ONLY 9 // bit 9 is set | ||||
#define BITNUM_FLAG_ONLY_S 10 // bit 10 is set | #define BITNUM_FLAG_ONLY_S 10 // bit 10 is set | ||||
#define FLAG_ONLY_S 0x400 | |||||
#define FLAG_STRESS_END 0x800 /* full stress if at end of clause */ | #define FLAG_STRESS_END 0x800 /* full stress if at end of clause */ | ||||
#define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */ | #define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */ | ||||
#define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | #define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | ||||
#define FLAG_PASTF 0x8000000 /* past tense follows */ | #define FLAG_PASTF 0x8000000 /* past tense follows */ | ||||
#define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | #define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | ||||
#define FLAG_DICTTEXT 0x20000000 // word translates to replacement text, not phonemes | |||||
#define BITNUM_FLAG_DICTTEXT 29 | |||||
#define FLAG_PAUSE1 0x40000000 // shorter prepause | #define FLAG_PAUSE1 0x40000000 // shorter prepause | ||||
#define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | #define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | ||||
#define SUFX_Q 0x4000 // don't retranslate | #define SUFX_Q 0x4000 // don't retranslate | ||||
#define SUFX_T 0x10000 // don't affect the stress position in the stem | #define SUFX_T 0x10000 // don't affect the stress position in the stem | ||||
#define FLAG_ALLOW_DICTTEXT 0x02 // allow dictionary to translate to text rather than phonemes | |||||
#define FLAG_SUFX 0x04 | #define FLAG_SUFX 0x04 | ||||
#define FLAG_SUFX_S 0x08 | #define FLAG_SUFX_S 0x08 | ||||
#define FLAG_SUFX_E_ADDED 0x10 | #define FLAG_SUFX_E_ADDED 0x10 | ||||
virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | ||||
int IsVowel(int letter); | int IsVowel(int letter); | ||||
int LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags); | |||||
int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags); | |||||
int Lookup(char *word, char *ph_out); | int Lookup(char *word, char *ph_out); | ||||