Browse Source

[1.29.12] *_list files can translate words to "sounds like" words as well as phonemes. $textmode, $phonememode.

Lang eo: use .replace in eo_rules for "cx" etc.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@94 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 17 years ago
parent
commit
9543dcc00d

+ 2
- 1
dictsource/en_list View File

@@ -725,6 +725,7 @@ dingy dIndZI2
dinosaur daIn@sO@
diode daIoUd
diplomacy dIpl'oUm@sI2
dipole daIpoUl
disconsolate dIsk'0ns@l@t
disc dIsk // for discs
disciple dIs'aIp@L
@@ -857,6 +858,7 @@ finite faInaIt
fishnet fISnEt
fiord fi:O@d
fix fIks // for fixer
fjord fIO@d
flagellum fla2dZEl@m
flexible flEksIb@L
flier flaI3
@@ -2255,7 +2257,6 @@ Penelope p@nEl@pI2
Phoebe fi:bi:
Rachel reItS@L
Rhys ri:s
Robert r0b3t
Roderick r0drIk
Samantha s@manT@
Sarah se@r@

+ 8
- 4
dictsource/en_rules View File

@@ -29,6 +29,11 @@
// ?7 Scottish
// ?8 Use full vowel, not schwa in some word endings

.replace
ff f // ligature
fi fi
fl fl


.group a
_) a (_ a2
@@ -1653,6 +1658,7 @@
l) egion i:dZ@n
egy (pt i:dZI
n) e (gro i:
eh (_ eI
ei eI
_) ei aI
c) ei i:
@@ -3092,7 +3098,7 @@
r) iv (el Iv
r) iv (en Iv
r) iv (et Iv
_l) iv (el aIv
_l) iv (eB aIv
XC) iv (el Iv
_g) iv (e Iv
_l) iv (e Iv
@@ -3377,6 +3383,7 @@
o (bliga 0
o (bligato %0
o (blo 0
_r) o (ber 0
r) o (bot oU
_) o (bs %0
_) obs (er %0bz
@@ -5223,6 +5230,3 @@
€ jU@roUz
♯ SA@p
♭ flat
ff f
fi fI
fl fl

+ 7
- 17
dictsource/eo_list View File

@@ -32,6 +32,9 @@ _dpt komo_

_. punkto

v vo // not Roman numbers
x ikso

t.e t'e,e


@@ -54,22 +57,18 @@ uea $abbrev
uk $abbrev


// includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc)

l' la
la %la
ne $u+ // negative
cxu $u // interogative particle
cxi $u
ĉu $u // cxi interogative particle
ĉi $u // cxi interogative particle
ĉu $u // interogative particle
ĉi $u // interogative particle
ia $u // any

// prepositions
de $u+ $brk
(de la) dela $u $brk

antaux $u+ $pause
antaŭ $u+ $pause
apud $u+ $pause
ol $u+ $brk
@@ -87,7 +86,6 @@ je $u $brk

kun $u+ $pause
laŭ $u+ $brk
laux $u+ $brk
per $u+ $brk
preter $u+ $brk
pri $u+ $pause
@@ -109,12 +107,10 @@ eĉ $brk
mi $u
ci $u
li $u
sxi $u
ŝi $u
gxi $u
ĝi $u
ni $u
vi $u
vi vi $u
ili $u
oni $u
si $u
@@ -122,9 +118,7 @@ si $u
mia $u+
cia $u+
lia $u+
sxia $u+
ŝia $u+
gxia $u+
ĝia $u+
nia $u+
via $u+
@@ -134,9 +128,7 @@ sia $u+
miaj $u+
ciaj $u+
liaj $u+
sxiaj $u+
ŝiaj $u+
gxiaj $u+
ĝiaj $u+
niaj $u+
viaj $u+
@@ -193,9 +185,7 @@ tiam $brk

// conjunctions
ke $u $pause // that (conj)
aux $u $pause
aŭ $u $pause // aux
cxar $u $pause
aŭ $u $pause
ĉar $u $pause
kaj $u $pause
sed $u $pause

+ 9
- 18
dictsource/eo_rules View File

@@ -3,12 +3,19 @@

// includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc)

.replace
cx ĉ
gx ĝ
hx ĥ
jx ĵ
sx ŝ
ux ŭ


.group a
_) a(_ a
a a
aj aI
a (jx a
aux aU
aŭ aU
D_-_) a (_ %a //'a' after a number
D_-_) an (_ %an //'an' after a number
@@ -22,11 +29,9 @@
.group c
_) c(_ tso
_) ch(_ tSo
_) cx(_ tSo
c ts
ch (K tS // foreign words
K) ch tS // foreign words
cx tS
.group d
@@ -40,8 +45,6 @@
_) e(_ e
e e
ej eI
e (jx e
eux eU
eŭ eU
.group f
@@ -53,17 +56,13 @@
.group g
_) g(_ go
_) gx(_ dZo
g g
gh (K dZ
gx dZ
.group h
_) h(_ ho
_) hx(_ xo
h h
hh x
hx x
.group i
@@ -73,10 +72,8 @@
.group j
_) j(_ jo
_) jx(_ Zo
j j
jh (C Z
jx Z
.group k
_) k(_ ko
@@ -106,7 +103,6 @@
_) o(_ o
o o
oj OI
o (jx o
.group p
_) p(_ po
@@ -131,9 +127,7 @@
.group s
_) s(_ so
_) sh(_ So
_) sx(_ So
s s
sx S
sh (K S // foreign words
K) sh S // foreign words

@@ -149,11 +143,8 @@
.group u
_) u(_ u
_) ux(_ wo
u u
uj uI
u (jx u
ux w
.group v

+ 28
- 0
docs/dictionary.html View File

@@ -528,6 +528,23 @@ The dictionary list is searched from bottom to top. The first match that satisf
</pre>
then if "to" is at the end of the clause, we get [tu:], if not then we get [t@].

<p>
<h4>4.4.4 Translating a Word with another Word</h4>
Rather than specifying the pronunciation of a word by a phoneme string, you can specify another "sounds like" word.<p>Use the attribute <b>$text</b> eg.<p>
<pre>
cough coff $text
</pre>
Alternatively, use the command <b>$textmode</b> on a line by itself to turn this on for all subsequent entries in the file, until it's turned off by <b>$phonememode</b>. eg.<p>
<pre>
$textmode
cough coff
through threw
$phonememode
</pre>
This feature cannot be used for the special entries in the <b>_list</b> files which start with an underscore, such as numbers.<p>
Currently "textmode" entries are only recognized for complete words, and not for for stems from which a prefix or suffix has been removed (eg. the word "coughs" would not match the example above).
<p>

<p>&nbsp;<hr>
<h3>4.5 Conditional Rules</h3>
Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p>
@@ -572,6 +589,17 @@ _0 to _9 &nbsp;
</tbody></table>
</ul>

<p>&nbsp;<hr>
<h3>4.7 Character Substitution</h3>
Chracter substitutions can be specified by using a <b> .replace </b> section at the start of the <b> _rules </b> file. Each line specified either one or two alphabetic characters to be replaced by another one or two alphabetic characters. This substitution is done to a word before it is translated using the spelling-to-phoneme rules. Only the lower-case version of the characters needs to be specified. eg.<p>
&nbsp; .replace<br>
&nbsp; &nbsp; &#xf4; &nbsp; &#x151; &nbsp; // (Hungarian) allow the use of o-circumflex instead of o-double-accute<br>
&nbsp; &nbsp; &#xfb; &nbsp; &#x171;<p>
&nbsp; &nbsp; cx &nbsp; &#x109; &nbsp; // (Esperanto) allow "cx" as an alternative to c-circumflex<p>

&nbsp; &nbsp; &#xfb01; &nbsp; fi &nbsp; // replace a single character ligature by two characters
<p>


</body>
</html>

+ 1
- 1
phsource/ph_english View File

@@ -18,7 +18,7 @@ phoneme t2 // [t] which doesn't reduce
vowelin f1=0 f2=1700 -300 300 f3=-100 80
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20
lengthmod 2
wave ustop/t
wave ustop/t%90
before _ ustop/t_
before @- ustop/t_dnt%50
before r ustop/tr

+ 1
- 1
phsource/ph_english_us View File

@@ -304,7 +304,7 @@ phoneme t# // reduced [t] as in "city"
vowelin f1=0 f2=1700 -300 300 f3=-100 80
vowelout f1=0 f2=1700 -300 300 f3=-100 80
lengthmod 5
wave x/d%90
wave x/d%80
endphoneme



+ 1
- 1
phsource/phonemes View File

@@ -895,7 +895,7 @@ phoneme t
vowelin f1=0 f2=1700 -300 300 f3=-100 80
vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20
lengthmod 2
wave ustop/t
wave ustop/t%90
before _ ustop/t_
before @- ustop/t_dnt%50
before r ustop/tr

+ 73
- 17
src/compiledict.cpp View File

@@ -44,6 +44,7 @@ static int error_count;
static int transpose_offset; // transpose character range for LookupDictList()
static int transpose_min;
static int transpose_max;
static int text_mode = 0;

int hash_counts[N_HASH_DICT];
char *hash_chains[N_HASH_DICT];
@@ -94,9 +95,13 @@ MNEM_TAB mnem_flags[] = {
{"$pastf", 27}, /* past tense follows */
{"$verbextend",28}, /* extend influence of 'verb follows' */

{"$brk", 30}, /* a shorter $pause */
{"$text", 29}, // word translates to replcement text, not phonemes
{"$brk", 30}, // a shorter $pause
// doesn't set dictionary_flags
{"$?", 100}, // conditional rule, followed by byte giving the condition number

{"$textmode", 200},
{"$phonememode", 201},
{NULL, -1}
};

@@ -169,17 +174,37 @@ int compile_line(char *linebuf, char *dict_line, int *hash)
int len_word;
int len_phonetic;
int text_not_phonemes; // this word specifies replacement text, not phonemes
char *mnemptr;
char *comment;
unsigned char flag_codes[100];
char encoded_ph[200];
unsigned char bad_phoneme[4];
p = linebuf;

comment = NULL;
text_not_phonemes = 0;
phonetic = word = "";

p = linebuf;
// while(isspace2(*p)) p++;

#ifdef deleted
if(*p == '$')
{
if(memcmp(p,"$textmode",9) == 0)
{
text_mode = 1;
return(0);
}
if(memcmp(p,"$phonememode",12) == 0)
{
text_mode = 0;
return(0);
}
}
#endif

step = 0;
c = 0;
@@ -224,7 +249,26 @@ int compile_line(char *linebuf, char *dict_line, int *hash)
ix = LookupMnem(mnem_flags,mnemptr);
if(ix > 0)
flag_codes[n_flag_codes++] = ix;
{
if(ix == 200)
{
text_mode = 1;
}
else
if(ix == 201)
{
text_mode = 0;
}
else
if(ix == BITNUM_FLAG_DICTTEXT)
{
text_not_phonemes = 1;
}
else
{
flag_codes[n_flag_codes++] = ix;
}
}
else
{
fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr);
@@ -325,25 +369,35 @@ int compile_line(char *linebuf, char *dict_line, int *hash)
#endif
return(0); /* blank line */
}
EncodePhonemes(phonetic,encoded_ph,bad_phoneme);
if(strchr(encoded_ph,phonSWITCH) != 0)
if(text_not_phonemes || text_mode)
{
flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S;
strcpy(encoded_ph,phonetic); // this is replacement text, so don't encode as phonemes
flag_codes[n_flag_codes++] = BITNUM_FLAG_DICTTEXT;
}
for(ix=0; ix<255; ix++)
else
{
c = encoded_ph[ix];
if(c == 0) break;
if(c == 255)
EncodePhonemes(phonetic,encoded_ph,bad_phoneme);
if(strchr(encoded_ph,phonSWITCH) != 0)
{
/* unrecognised phoneme, report error */
fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic);
error_count++;
flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages
}

// check for errors in the phonemes codes
for(ix=0; ix<sizeof(encoded_ph); ix++)
{
c = encoded_ph[ix];
if(c == 0) break;
if(c == 255)
{
/* unrecognised phoneme, report error */
fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic);
error_count++;
}
}
}
if((word[0] & 0x80)==0) // 7 bit ascii only
{
// If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii)
@@ -504,6 +558,8 @@ int compile_dictlist_file(const char *path, const char* filename)
char buf[sizeof(path_home)+45];
char dict_line[128];
text_mode = 0;

sprintf(buf,"%s%s",path,filename);
if((f_in = fopen(buf,"r")) == NULL)
return(-1);

+ 53
- 20
src/dictionary.cpp View File

@@ -2795,7 +2795,11 @@ int Translator::LookupDict2(char *word, char *word2, char *phonetic, unsigned in
DecodePhonemes(phonetic,ph_decoded);
if(flags != NULL)
flags1 = *flags;
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1));

if((dictionary_flags & FLAG_DICTTEXT) == 0)
{
fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1));
}
}
return(1);

@@ -2805,8 +2809,8 @@ int Translator::LookupDict2(char *word, char *word2, char *phonetic, unsigned in



int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags)
//===========================================================================================
int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags)
//==============================================================================================
/* Lookup a specified word in the word dictionary.
Returns phonetic data in 'phonetic' and bits in 'flags'

@@ -2815,14 +2819,16 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i
{
int length;
int found;
char *word1;
char *word2;
unsigned char c;
int nbytes;
int c2;
char word[N_WORD_BYTES];
static char word_replacement[N_WORD_BYTES];

length = 0;
word2 = word1;
word2 = word1 = *wordptr;

while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.'))
{
@@ -2857,25 +2863,52 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i

found = LookupDict2(word,word1,ph_out,flags,end_flags);

if(found) return(1);
ph_out[0] = 0;

// try modifications to find a recognised word

if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e'))
if(found == 0)
{
// try removing an 'e' which has been added by RemoveEnding
word[length-1] = 0;
found = LookupDict2(word,word1,ph_out,flags,end_flags);
if(found) return(1);
ph_out[0] = 0;
// try modifications to find a recognised word
if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e'))
{
// try removing an 'e' which has been added by RemoveEnding
word[length-1] = 0;
found = LookupDict2(word,word1,ph_out,flags,end_flags);
}
else
if((end_flags & SUFX_D) && (word[length-1] == word[length-2]))
{
// try removing a double letter
word[length-1] = 0;
found = LookupDict2(word,word1,ph_out,flags,end_flags);
}
}

if((end_flags & SUFX_D) && (word[length-1] == word[length-2]))
if(found)
{
// try removing a double letter
word[length-1] = 0;
found = LookupDict2(word,word1,ph_out,flags,end_flags);
if(found) return(1);
if(*flags & FLAG_DICTTEXT)
{
// the word translates to replacement text, not to phonemes

if(end_flags & FLAG_ALLOW_DICTTEXT)
{
// only use replacement text if this is the original word, not if a prefix or suffix has been removed
word_replacement[0] = 0;
word_replacement[1] = ' ';
strcpy(&word_replacement[2],ph_out); // replacement word, preceded by zerochar and space
*wordptr = &word_replacement[2];

if(option_phonemes == 2)
{
fprintf(f_trans,"Replace: %s %s\n",word,*wordptr);
}
}

ph_out[0] = 0;
return(0);
}

return(1);
}

ph_out[0] = 0;
@@ -2887,7 +2920,7 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i
int Translator::Lookup(char *word, char *ph_out)
{//=============================================
unsigned int flags;
return(LookupDictList(word,ph_out,&flags,0));
return(LookupDictList(&word,ph_out,&flags,0));
}



+ 1
- 1
src/readclause.cpp View File

@@ -470,7 +470,7 @@ const char *Translator::LookupSpecial(char *string)
char phonemes2[55];
static char buf[60];

if(LookupDictList(string,phonemes,&flags,0))
if(LookupDictList(&string,phonemes,&flags,0))
{
SetWordStress(phonemes,flags,-1,0);
DecodePhonemes(phonemes,phonemes2);

+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.29.11 23.Oct.07";
const char *version_string = "1.29.12 29.Oct.07";
const int version_phdata = 0x012901;

int option_device_number = -1;

+ 1
- 1
src/tr_languages.cpp View File

@@ -219,7 +219,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.unstressed_wd1 = 1;
tr->langopts.unstressed_wd2 = 2;

tr->langopts.numbers = 0x1409;
tr->langopts.numbers = 0x1c09 + NUM_ROMAN;
}
break;


+ 46
- 46
src/translate.cpp View File

@@ -592,7 +592,7 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab)
unsigned int dictionary_flags2=0;
int end_type=0;
int prefix_type=0;
char *word;
char *wordx;
char phonemes[N_WORD_PHONEMES];
char *ph_limit;
char *phonemes_ptr;
@@ -618,20 +618,19 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab)
static char word_iz[4] = {0,'i','z',0};
static char word_ss[4] = {0,'s','s',0};

word = word1;
prefix_phonemes[0] = 0;
end_phonemes[0] = 0;
ph_limit = &phonemes[N_WORD_PHONEMES];

// count the length of the word
utf8_in(&first_char,word,0);
wordx = word1;
utf8_in(&first_char,wordx,0);
word_length = 0;
while((*word != 0) && (*word != ' '))
while((*wordx != 0) && (*wordx != ' '))
{
word += utf8_in(&last_char,word,0);
wordx += utf8_in(&last_char,wordx,0);
word_length++;
}
word = word1;

// try an initial lookup in the dictionary list, we may find a pronunciation specified, or
// we may just find some flags
@@ -643,7 +642,8 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab)
else
{
spell_word = 0;
found = LookupDictList(word,phonemes,&dictionary_flags,wflags << 16);
found = LookupDictList(&word1,phonemes,&dictionary_flags,FLAG_ALLOW_DICTTEXT | wflags << 16); // the original word

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
@@ -669,7 +669,7 @@ if((wmark > 0) && (wmark < 8))
if(word_phonemes[0] == phonSWITCH)
return(0);

found = TranslateNumber(word,phonemes,&dictionary_flags,wflags);
found = TranslateNumber(word1,phonemes,&dictionary_flags,wflags);
}

if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER))
@@ -678,7 +678,7 @@ if((wmark > 0) && (wmark < 8))

if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER)))
{
if((found = TranslateRoman(word,phonemes)) != 0)
if((found = TranslateRoman(word1,phonemes)) != 0)
dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters
}
}
@@ -694,14 +694,14 @@ if((wmark > 0) && (wmark < 8))
if(spell_word > 0)
{
// Speak as individual letters
word = word1;
wordx = word1;
posn = 0;
phonemes[0] = 0;
end_type = 0;

while(*word != ' ')
while(*wordx != ' ')
{
word += TranslateLetter(word, phonemes,spell_word);
wordx += TranslateLetter(wordx, phonemes,spell_word);
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
@@ -719,12 +719,14 @@ if((wmark > 0) && (wmark < 8))

posn = 0;
length = 999;
while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(word)))
wordx = word1;

while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(wordx)))
{
char *p;
// This word looks "unpronouncable", so speak letters individually until we
// find a remainder that we can pronounce.
word += TranslateLetter(word,phonemes,0);
wordx += TranslateLetter(wordx,phonemes,0);
if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
@@ -732,7 +734,7 @@ if((wmark > 0) && (wmark < 8))
return(0);
}

p = &word[word_length-3];
p = &wordx[word_length-3];
if(memcmp(p,"'s ",3) == 0)
{
// remove a 's suffix and pronounce this separately (not as an individual letter)
@@ -743,18 +745,18 @@ if((wmark > 0) && (wmark < 8))
}

length=0;
while(word[length] != ' ') length++;
while(wordx[length] != ' ') length++;
if(length > 0)
word[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part
wordx[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part
}
SetSpellingStress(phonemes,0);

// anything left ?
if(*word != ' ')
if(*wordx != ' ')
{
// Translate the stem
unpron_length = strlen(phonemes);
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);

if(phonemes[0] == phonSWITCH)
{
@@ -763,7 +765,7 @@ if((wmark > 0) && (wmark < 8))
return(0);
}

c_temp = word[-1];
c_temp = wordx[-1];

found = 0;
confirm_prefix = 1;
@@ -779,12 +781,12 @@ if((wmark > 0) && (wmark < 8))

// remove any standard suffix and confirm that the prefix is still recognised
phonemes2[0] = 0;
end2 = TranslateRules(word, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags);
end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags);
if(end2)
{
RemoveEnding(word,end2,word_copy);
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
memcpy(word,word_copy,strlen(word_copy));
RemoveEnding(wordx,end2,word_copy);
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
memcpy(wordx,word_copy,strlen(word_copy));
if((end_type & SUFX_P) == 0)
{
// after removing the suffix, the prefix is no longer recognised.
@@ -812,30 +814,30 @@ if((wmark > 0) && (wmark < 8))
expect_verb = 1; // use the verb form of the word
}

word[-1] = c_temp;
wordx[-1] = c_temp;
for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove
{
word++;
while((*word & 0xc0) == 0x80) word++; // for multibyte characters
wordx++;
while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters
}
c_temp = word[-1];
word[-1] = ' ';
c_temp = wordx[-1];
wordx[-1] = ' ';
confirm_prefix = 1;

end_type = 0;
found = LookupDictList(word,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16));
found = LookupDictList(&wordx,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); // without prefix
if(dictionary_flags==0)
dictionary_flags = dictionary_flags2;
else
prefix_flags = 1;
if(found == 0)
{
end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags);
end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags);

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
word[-1] = c_temp;
wordx[-1] = c_temp;
strcpy(word_phonemes,phonemes);
return(0);
}
@@ -848,7 +850,7 @@ char phonemes2[N_WORD_PHONEMES];
strcpy(phonemes2,phonemes);

// The word has a standard ending, re-translate without this ending
end_flags = RemoveEnding(word,end_type,word_copy);
end_flags = RemoveEnding(wordx,end_type,word_copy);

phonemes_ptr = &phonemes[unpron_length];
phonemes_ptr[0] = 0;
@@ -856,26 +858,24 @@ strcpy(phonemes2,phonemes);
if(prefix_phonemes[0] != 0)
{
// lookup the stem without the prefix removed
word[-1] = c_temp;
found = LookupDictList(word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16));
word[-1] = ' ';
wordx[-1] = c_temp;
found = LookupDictList(&word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // include prefix, but not suffix
wordx[-1] = ' ';
if(dictionary_flags==0)
dictionary_flags = dictionary_flags2;
if(found)
prefix_phonemes[0] = 0; // matched whole word, don't need prefix now

// if(found || (dictionary_flags2 != 0))
// prefix_flags = 1; // ?? this looks wrong
if((found==0) && (dictionary_flags2 != 0))
prefix_flags = 1;
}
if(found == 0)
{
found = LookupDictList(word,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16));
found = LookupDictList(&wordx,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // without prefix and suffix
if(phonemes_ptr[0] == phonSWITCH)
{
// change to another language in order to translate this word
memcpy(word,word_copy,strlen(word_copy));
memcpy(wordx,word_copy,strlen(word_copy));
strcpy(word_phonemes,phonemes_ptr);
return(0);
}
@@ -895,16 +895,16 @@ strcpy(phonemes2,phonemes);
else
{
if(end_flags & FLAG_SUFX)
TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags);
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags);
else
TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags);
TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags);

if(phonemes[0] == phonSWITCH)
{
// change to another language in order to translate this word
strcpy(word_phonemes,phonemes);
memcpy(word,word_copy,strlen(word_copy));
word[-1] = c_temp;
memcpy(wordx,word_copy,strlen(word_copy));
wordx[-1] = c_temp;
return(0);
}
}
@@ -917,7 +917,7 @@ strcpy(phonemes2,phonemes);
end_phonemes[0] = 0;
}
}
word[-1] = c_temp;
wordx[-1] = c_temp;
}
}

@@ -1047,7 +1047,7 @@ strcpy(phonemes2,phonemes);
expect_past = 0;
}

if((word[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT)))
if((wordx[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT)))
{
if(expect_verb > 0)
expect_verb -= 1;

+ 8
- 2
src/translate.h View File

@@ -36,10 +36,12 @@
// bits 0-3 stressed syllable, 7=unstressed
#define FLAG_SKIPWORDS 0x80
#define FLAG_PREPAUSE 0x100

#define FLAG_ONLY 0x200
#define FLAG_ONLY_S 0x400
#define BITNUM_FLAG_ONLY 9 // bit 9 is set
#define BITNUM_FLAG_ONLY_S 10 // bit 10 is set
#define FLAG_ONLY_S 0x400
#define FLAG_STRESS_END 0x800 /* full stress if at end of clause */
#define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */
#define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */
@@ -62,6 +64,9 @@
#define FLAG_PASTF 0x8000000 /* past tense follows */
#define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */

#define FLAG_DICTTEXT 0x20000000 // word translates to replacement text, not phonemes
#define BITNUM_FLAG_DICTTEXT 29

#define FLAG_PAUSE1 0x40000000 // shorter prepause
#define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */

@@ -92,6 +97,7 @@
#define SUFX_Q 0x4000 // don't retranslate
#define SUFX_T 0x10000 // don't affect the stress position in the stem

#define FLAG_ALLOW_DICTTEXT 0x02 // allow dictionary to translate to text rather than phonemes
#define FLAG_SUFX 0x04
#define FLAG_SUFX_S 0x08
#define FLAG_SUFX_E_ADDED 0x10
@@ -417,7 +423,7 @@ protected:
virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch);

int IsVowel(int letter);
int LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags);
int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags);
int Lookup(char *word, char *ph_out);


Loading…
Cancel
Save