Lang eo: use .replace in eo_rules for "cx" etc. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@94 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -725,6 +725,7 @@ dingy dIndZI2 | |||
dinosaur daIn@sO@ | |||
diode daIoUd | |||
diplomacy dIpl'oUm@sI2 | |||
dipole daIpoUl | |||
disconsolate dIsk'0ns@l@t | |||
disc dIsk // for discs | |||
disciple dIs'aIp@L | |||
@@ -857,6 +858,7 @@ finite faInaIt | |||
fishnet fISnEt | |||
fiord fi:O@d | |||
fix fIks // for fixer | |||
fjord fIO@d | |||
flagellum fla2dZEl@m | |||
flexible flEksIb@L | |||
flier flaI3 | |||
@@ -2255,7 +2257,6 @@ Penelope p@nEl@pI2 | |||
Phoebe fi:bi: | |||
Rachel reItS@L | |||
Rhys ri:s | |||
Robert r0b3t | |||
Roderick r0drIk | |||
Samantha s@manT@ | |||
Sarah se@r@ |
@@ -29,6 +29,11 @@ | |||
// ?7 Scottish | |||
// ?8 Use full vowel, not schwa in some word endings | |||
.replace | |||
ff f // ligature | |||
fi fi | |||
fl fl | |||
.group a | |||
_) a (_ a2 | |||
@@ -1653,6 +1658,7 @@ | |||
l) egion i:dZ@n | |||
egy (pt i:dZI | |||
n) e (gro i: | |||
eh (_ eI | |||
ei eI | |||
_) ei aI | |||
c) ei i: | |||
@@ -3092,7 +3098,7 @@ | |||
r) iv (el Iv | |||
r) iv (en Iv | |||
r) iv (et Iv | |||
_l) iv (el aIv | |||
_l) iv (eB aIv | |||
XC) iv (el Iv | |||
_g) iv (e Iv | |||
_l) iv (e Iv | |||
@@ -3377,6 +3383,7 @@ | |||
o (bliga 0 | |||
o (bligato %0 | |||
o (blo 0 | |||
_r) o (ber 0 | |||
r) o (bot oU | |||
_) o (bs %0 | |||
_) obs (er %0bz | |||
@@ -5223,6 +5230,3 @@ | |||
€ jU@roUz | |||
♯ SA@p | |||
♭ flat | |||
ff f | |||
fi fI | |||
fl fl |
@@ -32,6 +32,9 @@ _dpt komo_ | |||
_. punkto | |||
v vo // not Roman numbers | |||
x ikso | |||
t.e t'e,e | |||
@@ -54,22 +57,18 @@ uea $abbrev | |||
uk $abbrev | |||
// includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | |||
l' la | |||
la %la | |||
ne $u+ // negative | |||
cxu $u // interogative particle | |||
cxi $u | |||
ĉu $u // cxi interogative particle | |||
ĉi $u // cxi interogative particle | |||
ĉu $u // interogative particle | |||
ĉi $u // interogative particle | |||
ia $u // any | |||
// prepositions | |||
de $u+ $brk | |||
(de la) dela $u $brk | |||
antaux $u+ $pause | |||
antaŭ $u+ $pause | |||
apud $u+ $pause | |||
ol $u+ $brk | |||
@@ -87,7 +86,6 @@ je $u $brk | |||
kun $u+ $pause | |||
laŭ $u+ $brk | |||
laux $u+ $brk | |||
per $u+ $brk | |||
preter $u+ $brk | |||
pri $u+ $pause | |||
@@ -109,12 +107,10 @@ eĉ $brk | |||
mi $u | |||
ci $u | |||
li $u | |||
sxi $u | |||
ŝi $u | |||
gxi $u | |||
ĝi $u | |||
ni $u | |||
vi $u | |||
vi vi $u | |||
ili $u | |||
oni $u | |||
si $u | |||
@@ -122,9 +118,7 @@ si $u | |||
mia $u+ | |||
cia $u+ | |||
lia $u+ | |||
sxia $u+ | |||
ŝia $u+ | |||
gxia $u+ | |||
ĝia $u+ | |||
nia $u+ | |||
via $u+ | |||
@@ -134,9 +128,7 @@ sia $u+ | |||
miaj $u+ | |||
ciaj $u+ | |||
liaj $u+ | |||
sxiaj $u+ | |||
ŝiaj $u+ | |||
gxiaj $u+ | |||
ĝiaj $u+ | |||
niaj $u+ | |||
viaj $u+ | |||
@@ -193,9 +185,7 @@ tiam $brk | |||
// conjunctions | |||
ke $u $pause // that (conj) | |||
aux $u $pause | |||
aŭ $u $pause // aux | |||
cxar $u $pause | |||
aŭ $u $pause | |||
ĉar $u $pause | |||
kaj $u $pause | |||
sed $u $pause |
@@ -3,12 +3,19 @@ | |||
// includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | |||
.replace | |||
cx ĉ | |||
gx ĝ | |||
hx ĥ | |||
jx ĵ | |||
sx ŝ | |||
ux ŭ | |||
.group a | |||
_) a(_ a | |||
a a | |||
aj aI | |||
a (jx a | |||
aux aU | |||
aŭ aU | |||
D_-_) a (_ %a //'a' after a number | |||
D_-_) an (_ %an //'an' after a number | |||
@@ -22,11 +29,9 @@ | |||
.group c | |||
_) c(_ tso | |||
_) ch(_ tSo | |||
_) cx(_ tSo | |||
c ts | |||
ch (K tS // foreign words | |||
K) ch tS // foreign words | |||
cx tS | |||
.group d | |||
@@ -40,8 +45,6 @@ | |||
_) e(_ e | |||
e e | |||
ej eI | |||
e (jx e | |||
eux eU | |||
eŭ eU | |||
.group f | |||
@@ -53,17 +56,13 @@ | |||
.group g | |||
_) g(_ go | |||
_) gx(_ dZo | |||
g g | |||
gh (K dZ | |||
gx dZ | |||
.group h | |||
_) h(_ ho | |||
_) hx(_ xo | |||
h h | |||
hh x | |||
hx x | |||
.group i | |||
@@ -73,10 +72,8 @@ | |||
.group j | |||
_) j(_ jo | |||
_) jx(_ Zo | |||
j j | |||
jh (C Z | |||
jx Z | |||
.group k | |||
_) k(_ ko | |||
@@ -106,7 +103,6 @@ | |||
_) o(_ o | |||
o o | |||
oj OI | |||
o (jx o | |||
.group p | |||
_) p(_ po | |||
@@ -131,9 +127,7 @@ | |||
.group s | |||
_) s(_ so | |||
_) sh(_ So | |||
_) sx(_ So | |||
s s | |||
sx S | |||
sh (K S // foreign words | |||
K) sh S // foreign words | |||
@@ -149,11 +143,8 @@ | |||
.group u | |||
_) u(_ u | |||
_) ux(_ wo | |||
u u | |||
uj uI | |||
u (jx u | |||
ux w | |||
.group v |
@@ -528,6 +528,23 @@ The dictionary list is searched from bottom to top. The first match that satisf | |||
</pre> | |||
then if "to" is at the end of the clause, we get [tu:], if not then we get [t@]. | |||
<p> | |||
<h4>4.4.4 Translating a Word with another Word</h4> | |||
Rather than specifying the pronunciation of a word by a phoneme string, you can specify another "sounds like" word.<p>Use the attribute <b>$text</b> eg.<p> | |||
<pre> | |||
cough coff $text | |||
</pre> | |||
Alternatively, use the command <b>$textmode</b> on a line by itself to turn this on for all subsequent entries in the file, until it's turned off by <b>$phonememode</b>. eg.<p> | |||
<pre> | |||
$textmode | |||
cough coff | |||
through threw | |||
$phonememode | |||
</pre> | |||
This feature cannot be used for the special entries in the <b>_list</b> files which start with an underscore, such as numbers.<p> | |||
Currently "textmode" entries are only recognized for complete words, and not for for stems from which a prefix or suffix has been removed (eg. the word "coughs" would not match the example above). | |||
<p> | |||
<p> <hr> | |||
<h3>4.5 Conditional Rules</h3> | |||
Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p> | |||
@@ -572,6 +589,17 @@ _0 to _9 | |||
</tbody></table> | |||
</ul> | |||
<p> <hr> | |||
<h3>4.7 Character Substitution</h3> | |||
Chracter substitutions can be specified by using a <b> .replace </b> section at the start of the <b> _rules </b> file. Each line specified either one or two alphabetic characters to be replaced by another one or two alphabetic characters. This substitution is done to a word before it is translated using the spelling-to-phoneme rules. Only the lower-case version of the characters needs to be specified. eg.<p> | |||
.replace<br> | |||
ô ő // (Hungarian) allow the use of o-circumflex instead of o-double-accute<br> | |||
û ű<p> | |||
cx ĉ // (Esperanto) allow "cx" as an alternative to c-circumflex<p> | |||
fi fi // replace a single character ligature by two characters | |||
<p> | |||
</body> | |||
</html> |
@@ -18,7 +18,7 @@ phoneme t2 // [t] which doesn't reduce | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/t | |||
wave ustop/t%90 | |||
before _ ustop/t_ | |||
before @- ustop/t_dnt%50 | |||
before r ustop/tr |
@@ -304,7 +304,7 @@ phoneme t# // reduced [t] as in "city" | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 300 f3=-100 80 | |||
lengthmod 5 | |||
wave x/d%90 | |||
wave x/d%80 | |||
endphoneme | |||
@@ -895,7 +895,7 @@ phoneme t | |||
vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
lengthmod 2 | |||
wave ustop/t | |||
wave ustop/t%90 | |||
before _ ustop/t_ | |||
before @- ustop/t_dnt%50 | |||
before r ustop/tr |
@@ -44,6 +44,7 @@ static int error_count; | |||
static int transpose_offset; // transpose character range for LookupDictList() | |||
static int transpose_min; | |||
static int transpose_max; | |||
static int text_mode = 0; | |||
int hash_counts[N_HASH_DICT]; | |||
char *hash_chains[N_HASH_DICT]; | |||
@@ -94,9 +95,13 @@ MNEM_TAB mnem_flags[] = { | |||
{"$pastf", 27}, /* past tense follows */ | |||
{"$verbextend",28}, /* extend influence of 'verb follows' */ | |||
{"$brk", 30}, /* a shorter $pause */ | |||
{"$text", 29}, // word translates to replcement text, not phonemes | |||
{"$brk", 30}, // a shorter $pause | |||
// doesn't set dictionary_flags | |||
{"$?", 100}, // conditional rule, followed by byte giving the condition number | |||
{"$textmode", 200}, | |||
{"$phonememode", 201}, | |||
{NULL, -1} | |||
}; | |||
@@ -169,17 +174,37 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
int len_word; | |||
int len_phonetic; | |||
int text_not_phonemes; // this word specifies replacement text, not phonemes | |||
char *mnemptr; | |||
char *comment; | |||
unsigned char flag_codes[100]; | |||
char encoded_ph[200]; | |||
unsigned char bad_phoneme[4]; | |||
p = linebuf; | |||
comment = NULL; | |||
text_not_phonemes = 0; | |||
phonetic = word = ""; | |||
p = linebuf; | |||
// while(isspace2(*p)) p++; | |||
#ifdef deleted | |||
if(*p == '$') | |||
{ | |||
if(memcmp(p,"$textmode",9) == 0) | |||
{ | |||
text_mode = 1; | |||
return(0); | |||
} | |||
if(memcmp(p,"$phonememode",12) == 0) | |||
{ | |||
text_mode = 0; | |||
return(0); | |||
} | |||
} | |||
#endif | |||
step = 0; | |||
c = 0; | |||
@@ -224,7 +249,26 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
ix = LookupMnem(mnem_flags,mnemptr); | |||
if(ix > 0) | |||
flag_codes[n_flag_codes++] = ix; | |||
{ | |||
if(ix == 200) | |||
{ | |||
text_mode = 1; | |||
} | |||
else | |||
if(ix == 201) | |||
{ | |||
text_mode = 0; | |||
} | |||
else | |||
if(ix == BITNUM_FLAG_DICTTEXT) | |||
{ | |||
text_not_phonemes = 1; | |||
} | |||
else | |||
{ | |||
flag_codes[n_flag_codes++] = ix; | |||
} | |||
} | |||
else | |||
{ | |||
fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); | |||
@@ -325,25 +369,35 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
#endif | |||
return(0); /* blank line */ | |||
} | |||
EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||
if(strchr(encoded_ph,phonSWITCH) != 0) | |||
if(text_not_phonemes || text_mode) | |||
{ | |||
flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; | |||
strcpy(encoded_ph,phonetic); // this is replacement text, so don't encode as phonemes | |||
flag_codes[n_flag_codes++] = BITNUM_FLAG_DICTTEXT; | |||
} | |||
for(ix=0; ix<255; ix++) | |||
else | |||
{ | |||
c = encoded_ph[ix]; | |||
if(c == 0) break; | |||
if(c == 255) | |||
EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||
if(strchr(encoded_ph,phonSWITCH) != 0) | |||
{ | |||
/* unrecognised phoneme, report error */ | |||
fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||
error_count++; | |||
flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages | |||
} | |||
// check for errors in the phonemes codes | |||
for(ix=0; ix<sizeof(encoded_ph); ix++) | |||
{ | |||
c = encoded_ph[ix]; | |||
if(c == 0) break; | |||
if(c == 255) | |||
{ | |||
/* unrecognised phoneme, report error */ | |||
fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||
error_count++; | |||
} | |||
} | |||
} | |||
if((word[0] & 0x80)==0) // 7 bit ascii only | |||
{ | |||
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | |||
@@ -504,6 +558,8 @@ int compile_dictlist_file(const char *path, const char* filename) | |||
char buf[sizeof(path_home)+45]; | |||
char dict_line[128]; | |||
text_mode = 0; | |||
sprintf(buf,"%s%s",path,filename); | |||
if((f_in = fopen(buf,"r")) == NULL) | |||
return(-1); |
@@ -2795,7 +2795,11 @@ int Translator::LookupDict2(char *word, char *word2, char *phonetic, unsigned in | |||
DecodePhonemes(phonetic,ph_decoded); | |||
if(flags != NULL) | |||
flags1 = *flags; | |||
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||
if((dictionary_flags & FLAG_DICTTEXT) == 0) | |||
{ | |||
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||
} | |||
} | |||
return(1); | |||
@@ -2805,8 +2809,8 @@ int Translator::LookupDict2(char *word, char *word2, char *phonetic, unsigned in | |||
int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags) | |||
//=========================================================================================== | |||
int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags) | |||
//============================================================================================== | |||
/* Lookup a specified word in the word dictionary. | |||
Returns phonetic data in 'phonetic' and bits in 'flags' | |||
@@ -2815,14 +2819,16 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i | |||
{ | |||
int length; | |||
int found; | |||
char *word1; | |||
char *word2; | |||
unsigned char c; | |||
int nbytes; | |||
int c2; | |||
char word[N_WORD_BYTES]; | |||
static char word_replacement[N_WORD_BYTES]; | |||
length = 0; | |||
word2 = word1; | |||
word2 = word1 = *wordptr; | |||
while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | |||
{ | |||
@@ -2857,25 +2863,52 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i | |||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
if(found) return(1); | |||
ph_out[0] = 0; | |||
// try modifications to find a recognised word | |||
if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||
if(found == 0) | |||
{ | |||
// try removing an 'e' which has been added by RemoveEnding | |||
word[length-1] = 0; | |||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
if(found) return(1); | |||
ph_out[0] = 0; | |||
// try modifications to find a recognised word | |||
if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||
{ | |||
// try removing an 'e' which has been added by RemoveEnding | |||
word[length-1] = 0; | |||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
} | |||
else | |||
if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||
{ | |||
// try removing a double letter | |||
word[length-1] = 0; | |||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
} | |||
} | |||
if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||
if(found) | |||
{ | |||
// try removing a double letter | |||
word[length-1] = 0; | |||
found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
if(found) return(1); | |||
if(*flags & FLAG_DICTTEXT) | |||
{ | |||
// the word translates to replacement text, not to phonemes | |||
if(end_flags & FLAG_ALLOW_DICTTEXT) | |||
{ | |||
// only use replacement text if this is the original word, not if a prefix or suffix has been removed | |||
word_replacement[0] = 0; | |||
word_replacement[1] = ' '; | |||
strcpy(&word_replacement[2],ph_out); // replacement word, preceded by zerochar and space | |||
*wordptr = &word_replacement[2]; | |||
if(option_phonemes == 2) | |||
{ | |||
fprintf(f_trans,"Replace: %s %s\n",word,*wordptr); | |||
} | |||
} | |||
ph_out[0] = 0; | |||
return(0); | |||
} | |||
return(1); | |||
} | |||
ph_out[0] = 0; | |||
@@ -2887,7 +2920,7 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i | |||
int Translator::Lookup(char *word, char *ph_out) | |||
{//============================================= | |||
unsigned int flags; | |||
return(LookupDictList(word,ph_out,&flags,0)); | |||
return(LookupDictList(&word,ph_out,&flags,0)); | |||
} | |||
@@ -470,7 +470,7 @@ const char *Translator::LookupSpecial(char *string) | |||
char phonemes2[55]; | |||
static char buf[60]; | |||
if(LookupDictList(string,phonemes,&flags,0)) | |||
if(LookupDictList(&string,phonemes,&flags,0)) | |||
{ | |||
SetWordStress(phonemes,flags,-1,0); | |||
DecodePhonemes(phonemes,phonemes2); |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.29.11 23.Oct.07"; | |||
const char *version_string = "1.29.12 29.Oct.07"; | |||
const int version_phdata = 0x012901; | |||
int option_device_number = -1; |
@@ -219,7 +219,7 @@ Translator *SelectTranslator(const char *name) | |||
tr->langopts.unstressed_wd1 = 1; | |||
tr->langopts.unstressed_wd2 = 2; | |||
tr->langopts.numbers = 0x1409; | |||
tr->langopts.numbers = 0x1c09 + NUM_ROMAN; | |||
} | |||
break; | |||
@@ -592,7 +592,7 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
unsigned int dictionary_flags2=0; | |||
int end_type=0; | |||
int prefix_type=0; | |||
char *word; | |||
char *wordx; | |||
char phonemes[N_WORD_PHONEMES]; | |||
char *ph_limit; | |||
char *phonemes_ptr; | |||
@@ -618,20 +618,19 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
static char word_iz[4] = {0,'i','z',0}; | |||
static char word_ss[4] = {0,'s','s',0}; | |||
word = word1; | |||
prefix_phonemes[0] = 0; | |||
end_phonemes[0] = 0; | |||
ph_limit = &phonemes[N_WORD_PHONEMES]; | |||
// count the length of the word | |||
utf8_in(&first_char,word,0); | |||
wordx = word1; | |||
utf8_in(&first_char,wordx,0); | |||
word_length = 0; | |||
while((*word != 0) && (*word != ' ')) | |||
while((*wordx != 0) && (*wordx != ' ')) | |||
{ | |||
word += utf8_in(&last_char,word,0); | |||
wordx += utf8_in(&last_char,wordx,0); | |||
word_length++; | |||
} | |||
word = word1; | |||
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or | |||
// we may just find some flags | |||
@@ -643,7 +642,8 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
else | |||
{ | |||
spell_word = 0; | |||
found = LookupDictList(word,phonemes,&dictionary_flags,wflags << 16); | |||
found = LookupDictList(&word1,phonemes,&dictionary_flags,FLAG_ALLOW_DICTTEXT | wflags << 16); // the original word | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
@@ -669,7 +669,7 @@ if((wmark > 0) && (wmark < 8)) | |||
if(word_phonemes[0] == phonSWITCH) | |||
return(0); | |||
found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | |||
found = TranslateNumber(word1,phonemes,&dictionary_flags,wflags); | |||
} | |||
if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | |||
@@ -678,7 +678,7 @@ if((wmark > 0) && (wmark < 8)) | |||
if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | |||
{ | |||
if((found = TranslateRoman(word,phonemes)) != 0) | |||
if((found = TranslateRoman(word1,phonemes)) != 0) | |||
dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | |||
} | |||
} | |||
@@ -694,14 +694,14 @@ if((wmark > 0) && (wmark < 8)) | |||
if(spell_word > 0) | |||
{ | |||
// Speak as individual letters | |||
word = word1; | |||
wordx = word1; | |||
posn = 0; | |||
phonemes[0] = 0; | |||
end_type = 0; | |||
while(*word != ' ') | |||
while(*wordx != ' ') | |||
{ | |||
word += TranslateLetter(word, phonemes,spell_word); | |||
wordx += TranslateLetter(wordx, phonemes,spell_word); | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
@@ -719,12 +719,14 @@ if((wmark > 0) && (wmark < 8)) | |||
posn = 0; | |||
length = 999; | |||
while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(word))) | |||
wordx = word1; | |||
while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(wordx))) | |||
{ | |||
char *p; | |||
// This word looks "unpronouncable", so speak letters individually until we | |||
// find a remainder that we can pronounce. | |||
word += TranslateLetter(word,phonemes,0); | |||
wordx += TranslateLetter(wordx,phonemes,0); | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
@@ -732,7 +734,7 @@ if((wmark > 0) && (wmark < 8)) | |||
return(0); | |||
} | |||
p = &word[word_length-3]; | |||
p = &wordx[word_length-3]; | |||
if(memcmp(p,"'s ",3) == 0) | |||
{ | |||
// remove a 's suffix and pronounce this separately (not as an individual letter) | |||
@@ -743,18 +745,18 @@ if((wmark > 0) && (wmark < 8)) | |||
} | |||
length=0; | |||
while(word[length] != ' ') length++; | |||
while(wordx[length] != ' ') length++; | |||
if(length > 0) | |||
word[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||
wordx[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||
} | |||
SetSpellingStress(phonemes,0); | |||
// anything left ? | |||
if(*word != ' ') | |||
if(*wordx != ' ') | |||
{ | |||
// Translate the stem | |||
unpron_length = strlen(phonemes); | |||
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
@@ -763,7 +765,7 @@ if((wmark > 0) && (wmark < 8)) | |||
return(0); | |||
} | |||
c_temp = word[-1]; | |||
c_temp = wordx[-1]; | |||
found = 0; | |||
confirm_prefix = 1; | |||
@@ -779,12 +781,12 @@ if((wmark > 0) && (wmark < 8)) | |||
// remove any standard suffix and confirm that the prefix is still recognised | |||
phonemes2[0] = 0; | |||
end2 = TranslateRules(word, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||
end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||
if(end2) | |||
{ | |||
RemoveEnding(word,end2,word_copy); | |||
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||
memcpy(word,word_copy,strlen(word_copy)); | |||
RemoveEnding(wordx,end2,word_copy); | |||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
if((end_type & SUFX_P) == 0) | |||
{ | |||
// after removing the suffix, the prefix is no longer recognised. | |||
@@ -812,30 +814,30 @@ if((wmark > 0) && (wmark < 8)) | |||
expect_verb = 1; // use the verb form of the word | |||
} | |||
word[-1] = c_temp; | |||
wordx[-1] = c_temp; | |||
for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | |||
{ | |||
word++; | |||
while((*word & 0xc0) == 0x80) word++; // for multibyte characters | |||
wordx++; | |||
while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||
} | |||
c_temp = word[-1]; | |||
word[-1] = ' '; | |||
c_temp = wordx[-1]; | |||
wordx[-1] = ' '; | |||
confirm_prefix = 1; | |||
end_type = 0; | |||
found = LookupDictList(word,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); | |||
found = LookupDictList(&wordx,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); // without prefix | |||
if(dictionary_flags==0) | |||
dictionary_flags = dictionary_flags2; | |||
else | |||
prefix_flags = 1; | |||
if(found == 0) | |||
{ | |||
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
word[-1] = c_temp; | |||
wordx[-1] = c_temp; | |||
strcpy(word_phonemes,phonemes); | |||
return(0); | |||
} | |||
@@ -848,7 +850,7 @@ char phonemes2[N_WORD_PHONEMES]; | |||
strcpy(phonemes2,phonemes); | |||
// The word has a standard ending, re-translate without this ending | |||
end_flags = RemoveEnding(word,end_type,word_copy); | |||
end_flags = RemoveEnding(wordx,end_type,word_copy); | |||
phonemes_ptr = &phonemes[unpron_length]; | |||
phonemes_ptr[0] = 0; | |||
@@ -856,26 +858,24 @@ strcpy(phonemes2,phonemes); | |||
if(prefix_phonemes[0] != 0) | |||
{ | |||
// lookup the stem without the prefix removed | |||
word[-1] = c_temp; | |||
found = LookupDictList(word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||
word[-1] = ' '; | |||
wordx[-1] = c_temp; | |||
found = LookupDictList(&word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // include prefix, but not suffix | |||
wordx[-1] = ' '; | |||
if(dictionary_flags==0) | |||
dictionary_flags = dictionary_flags2; | |||
if(found) | |||
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | |||
// if(found || (dictionary_flags2 != 0)) | |||
// prefix_flags = 1; // ?? this looks wrong | |||
if((found==0) && (dictionary_flags2 != 0)) | |||
prefix_flags = 1; | |||
} | |||
if(found == 0) | |||
{ | |||
found = LookupDictList(word,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||
found = LookupDictList(&wordx,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // without prefix and suffix | |||
if(phonemes_ptr[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
memcpy(word,word_copy,strlen(word_copy)); | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
strcpy(word_phonemes,phonemes_ptr); | |||
return(0); | |||
} | |||
@@ -895,16 +895,16 @@ strcpy(phonemes2,phonemes); | |||
else | |||
{ | |||
if(end_flags & FLAG_SUFX) | |||
TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||
else | |||
TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||
if(phonemes[0] == phonSWITCH) | |||
{ | |||
// change to another language in order to translate this word | |||
strcpy(word_phonemes,phonemes); | |||
memcpy(word,word_copy,strlen(word_copy)); | |||
word[-1] = c_temp; | |||
memcpy(wordx,word_copy,strlen(word_copy)); | |||
wordx[-1] = c_temp; | |||
return(0); | |||
} | |||
} | |||
@@ -917,7 +917,7 @@ strcpy(phonemes2,phonemes); | |||
end_phonemes[0] = 0; | |||
} | |||
} | |||
word[-1] = c_temp; | |||
wordx[-1] = c_temp; | |||
} | |||
} | |||
@@ -1047,7 +1047,7 @@ strcpy(phonemes2,phonemes); | |||
expect_past = 0; | |||
} | |||
if((word[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||
if((wordx[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||
{ | |||
if(expect_verb > 0) | |||
expect_verb -= 1; |
@@ -36,10 +36,12 @@ | |||
// bits 0-3 stressed syllable, 7=unstressed | |||
#define FLAG_SKIPWORDS 0x80 | |||
#define FLAG_PREPAUSE 0x100 | |||
#define FLAG_ONLY 0x200 | |||
#define FLAG_ONLY_S 0x400 | |||
#define BITNUM_FLAG_ONLY 9 // bit 9 is set | |||
#define BITNUM_FLAG_ONLY_S 10 // bit 10 is set | |||
#define FLAG_ONLY_S 0x400 | |||
#define FLAG_STRESS_END 0x800 /* full stress if at end of clause */ | |||
#define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */ | |||
#define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | |||
@@ -62,6 +64,9 @@ | |||
#define FLAG_PASTF 0x8000000 /* past tense follows */ | |||
#define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | |||
#define FLAG_DICTTEXT 0x20000000 // word translates to replacement text, not phonemes | |||
#define BITNUM_FLAG_DICTTEXT 29 | |||
#define FLAG_PAUSE1 0x40000000 // shorter prepause | |||
#define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | |||
@@ -92,6 +97,7 @@ | |||
#define SUFX_Q 0x4000 // don't retranslate | |||
#define SUFX_T 0x10000 // don't affect the stress position in the stem | |||
#define FLAG_ALLOW_DICTTEXT 0x02 // allow dictionary to translate to text rather than phonemes | |||
#define FLAG_SUFX 0x04 | |||
#define FLAG_SUFX_S 0x08 | |||
#define FLAG_SUFX_E_ADDED 0x10 | |||
@@ -417,7 +423,7 @@ protected: | |||
virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | |||
int IsVowel(int letter); | |||
int LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags); | |||
int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags); | |||
int Lookup(char *word, char *ph_out); | |||