Browse Source

[1.41.10]

Fixes for ordinal numbers (lang=hu).


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@225 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 15 years ago
parent
commit
646f5aead0

+ 13
- 0
dictsource/dict_phonemes View File

@@ -589,3 +589,16 @@ a e i o u
g h j J k l m n
N n^ p q R s S S;
t tS v w x z


Dictionary pa_dict

a a~ e E e~ E~ i I
i: i~ I~ o O o: O~ o~
r- U u u~ U~ V V~

: b bh c ch d d. d.h
dh f g gh H j J Jh
k kh l l. m n N n.
n^ p ph Q R s S t
t. t.h th v x z

+ 3
- 2
dictsource/en_list View File

@@ -262,8 +262,8 @@ U+32f n,0nsI2l'abI2k

// numeric

//_0 zero $text // TEST
_0 z'i@roU
_0 zero $text // TEST
//_0 z'i@roU
_1 w'0n
?6 _1 w'Vn
_2 t'u:
@@ -323,6 +323,7 @@ _70o s'Ev@nti@
_80o 'eIti@
_90o n'aInti@
_0Co h'Vndr@dT
_0M1o T'aUz@ndT


// ABBREVIATIONS

+ 15
- 2
dictsource/hu_list View File

@@ -54,8 +54,9 @@ _0Z4 ti:zEzR2Ed
// ordinal numbers

_ord Edik
_0o n'ullAdik
_1ox ElSY: // number = '1' only
_2ox ma:Sodik // number = '2' only
_2ox ma:Sodik // number = '2' only
_1o EJ:Edik
_2o kEt:Edik
_3o hAR2_mAdik
@@ -72,6 +73,9 @@ _60o hAtvAnAdik
_80o n^oltsvAnAdik
_0Co sa:zAdik
_2Co ke:tsa:zAdik
_0M1o EzR2Edik
_1M1o EzR2Edik
_2M1o ke:tEzR2Edik

// These words mean that a dot after number, immediately preceding, does not mean an ordinal number
január $alt
@@ -100,6 +104,15 @@ okt $alt
now $alt
dec $alt

//this following 7 lines means the 7 hungarian day name
hétfő $alt
kedd $alt
szerda $alt
csütörtök $alt
péntek $alt
szombat $alt
vasárnap $alt

// accent names
_lig ligAtu:R2A
_acu e:lES
@@ -129,7 +142,7 @@ mm milime:tER2
a.m An^n^i||m'int $dot
dr doktoR2 $dot
gpu $abbrev
mvgyosz $abbrev
mvgyosz Emve:Je:oEs
OTP $abbrev
id idY:SEb: $dot
ill illEtvE $dot

+ 35
- 6
dictsource/hu_rules View File

@@ -21,7 +21,7 @@ _) alattvaló _!'AlAtvAlo:

.group b
b b
// bb b:
bb b:

biz (A b'iz, // bizalmas etc.
biedermeier bi:dER2ma:jER2
@@ -32,7 +32,8 @@ _) alattvaló _!'AlAtvAlo:
ccs tS:
!) cz ts // in names which start with a capital letter
ar) csz (e ts
anar) ch (i C
harmin) c ts
anar) ch (i C
hierar) ch (i C
me) ch (a C
te) ch (n C
@@ -71,13 +72,16 @@ pá) c ts

.group d
d d
dd d:
dts tS:
dt t:
a) dsz ts:
cselé) d d
enge) dsz ts:
engedelmeske) dsz ts:
hazu) dsz ts:
mara) dsz ts:
tu) dsz ts:
kére) dz ts
// dd d:
dz dz
@@ -98,6 +102,7 @@ kére) dz ts
dj J:
min) dny n^


.group e
e E
D_-_) es (_ %ES
@@ -115,7 +120,7 @@ _) egyezség _!'EJ:ESSe:g

.group g
g g
// gg g:
gg g:
gy J
ggy J:

@@ -125,7 +130,7 @@ _) egyezség _!'EJ:ESSe:g
A) gysz (A Js:
C) gyj J
A) gyj (A J:
ha) gyj J:
_e) gy (es J:
_e) gy (et_ J:
_e) gy (etlen J:
@@ -175,6 +180,7 @@ _) kétség ke:tS:e:g

.group l
l l
lj j
ly j
lly jj

@@ -182,9 +188,12 @@ _) kétség ke:tS:e:g
á) ll (j j
beszé) lj jj
bére) lj jj
fáj) l l
fájla) l (j jj
gondo) lj jjj
ke) lj jj
sajná) lj jj
sajná) lj jjj
llj jjj
C) ly (_ li
_kéth) ly li
szamue) lly lli
@@ -259,7 +268,7 @@ r R2
sz s
ssz ss2
szts stS:
föld) s (ánc S
s (színű S
hel) s (inki z
ki) s (ebb SS
@@ -296,18 +305,27 @@ pénze) s S
tc ts:
tt t:
tt (C tt
ttn t:n
ty c
// s) ty c: //
// z) ty c: // keztyű
ttj tc:
tty c:
apá) ts (ág tS:
becsüle) t t
cson) t t
cson) tj c
ezüs) t t
éle) t t

felej) ts tS
já) tsz (ani ts:
tsz ts:
szorí) ts (a tS:
ürí) ts (e tS:
mula) ts tS:
nemze) ts tS:
néme) ts (ég tS:
min) t t
tse tSE
tso tSo
@@ -315,6 +333,7 @@ tsa tSA
tsá tSa:
tsd tSd
tsé tSe:
t (cs t
ttsé tS:e:
C) tj c
A) tj (A c:
@@ -324,12 +343,14 @@ C) tj c
_ka) ty (n ti

vörösmar) ty ti
á) t (sza t
á) t (jár t
á) t (sző t
á) t (szú t
á) t (szű t
bizo) tts (ág tS:
állí) ts tS:
állapo) t (sor t
bará) ts tS:
dön) ts (ön tS
elhivato) tts (ág tS:
@@ -337,10 +358,16 @@ folyama) t (jel t
kiál) ts tS
köve) ts (ég tS:
kür) t t
kür) tj c
korlátozo) tts tS:
neve) ts (ég tS:
on) ts (u tS
szen)t (szék t
szé) t t
szöve) ts (ég tS:
pillan) ts tS
romlo) tts tS:
tar) ts tS
á) t (sor t
á) t (sé t
ne) t (c t
@@ -358,6 +385,7 @@ lé) t (szám t
ké) t (száz t
ö) t (száz t
ha) t (száz t
hé) t (száz t
vé) ts (ég tS:


@@ -399,6 +427,7 @@ befeje) z (te s
bi) z (tons s
bi) z (tos s
bron) z (sz z
csontvá) z z
e) z (t s
ho) z (ta s
ho) z (tá s

+ 13
- 9
dictsource/ta_list View File

@@ -1,4 +1,5 @@
//_xx விழுக்காடு $text // TESTING doesn't reduce vowels

// This file is UTF8 encoded
// Spelling to phoneme words and exceptions for Tamil

@@ -76,8 +77,8 @@ _! a:ctS:Vr,ijVkk,URi
U+bf9 ru:ba:j

// abbreviations
ரூ ru:ba:j $dot
Rs ru:ba:j $dot
ரூ ru:ba:j $dot
Rs ru:pi:z $dot

// numbers
_0 suz.ijVm // சுழியம்
@@ -139,20 +140,23 @@ _1M1x a:jirVm
_0M1 a:jirVttU
_1M1 a:jirVttU

_0M2x lVd.tSVm
_0M2x lVd.tSVm // 100,000
_1M2x orUlVd.tSVm
_0M2 lVd.tSVttU
_1M2 orUlVd.tSVttU

_0M3x ko:d.i
_0M3x ko:d.i // 10,000,000
_1M3x orUko:d.i
_0M3 ko:d.ie:
_1M3 orUko:d.ie:

_0M4 nu:RUko:d.i // not correct, but should be understandable
_1M4 nu:RUko:d.i
_0M5 patta:jiRUmko:d.i
_1M5 patta:jiRUmko:d.i
_0M4x a:jirVmko:d.i // 10,000,000,000
_1M4x a:jirVmko:d.i
_0M4 a:jirVmko:d.ie:
_1M4 a:jirVmko:d.ie:

_0M5 a:jirVma:jirVmko:d.i // 10,000,000,000,000
_1M5 a:jirVma:jirVmko:d.i

_dpt _pul.l.i

+ 11
- 5
dictsource/ta_rules View File

@@ -16,8 +16,11 @@
௭ 7
௮ 8
௯ 9
ொ ொ
ோ ோ
ௌ ௌ


.
.group 0xe0ae // characters which start with UTF-8 bytes: [e0 ae]

ஂ // anusvara
@@ -39,7 +42,7 @@
எ e
_) எ ;e // add a short [j] sound at start of word ?

ஏ e::
ஏ e:
_) ஏ ;e:: // add a short [j] sound at start of word ?

ஐ aI
@@ -65,12 +68,14 @@
ங NV
ங (B N

zV
ச (B z
sV // ?? [z]
ச (B s
_) ச sa
_) ச (B s
ச்ச tS:V
ச்ச (B tS:
ற்ச tS:
ற்ச (B tS:
ட்) ச tSV
ட்) ச (B tS
ஞ்) ச dZV
@@ -112,7 +117,7 @@
ப்ப ppV
ப்ப (B pp
ட்) ப pV
ட்) ப (B pV
ட்) ப (B p
ற்) ப pV
ற்) ப (B p
ஃ) ப fV
@@ -184,6 +189,7 @@

ௌ aU


் // virama

ௗ : // aU length mark

+ 26
- 10
phsource/compile_report View File

@@ -1,4 +1,4 @@
60 phoneme tables
61 phoneme tables
new total
base 103 103
base2 26 124
@@ -17,8 +17,8 @@
fi 40 134
fr 55 141
fr_ca 11 141
hi 60 149
ta 20 152
hi 62 151
ta 20 154
hu 23 119
lv 29 126
nl 28 126
@@ -53,13 +53,14 @@
hy 24 119
da 21 118
rw 15 131
ml 13 151
kn 15 151
bn 59 155
ne 18 157
mr 12 149
ml 13 153
kn 15 153
bn 59 157
ne 18 159
mr 12 151
eu 6 125
mn 15 114
pa 12 152

Data file Used by
b/b [b] base
@@ -1341,7 +1342,8 @@ vnasal/e_n [e~] af
vnasal/i_n [i~] pt
[i~] bn
[i:~] bn
vnasal/i_n2 [i~] hi
vnasal/i_n2 [I~] hi
[i~] hi
vnasal/m- [m-] sw
vnasal/n- [n-] sw
vnasal/nn- [N-] sw
@@ -1356,7 +1358,8 @@ vnasal/oo_n2 [O~] hi
[o] zh
[O~] bn
vnasal/oo_n3 [O~] pl
vnasal/u_n [u~] hi
vnasal/u_n [U~] hi
[u~] hi
[u~] pt
[u] zh
[u~] bn
@@ -1634,6 +1637,7 @@ vowel/aa_9 [a] fi
[a:] hi
[a:] bn
[a] ne
[a] pa
vowel/a_en [A] fr
vowel/@_bck [@] hi
[@/] hi
@@ -1641,6 +1645,7 @@ vowel/@_bck [@] hi
[@] bn
[V] ne
[@/] ne
[@] pa
vowel/e [e] base2
[e:] en
[eI] en_n
@@ -1681,6 +1686,7 @@ vowel/e_2 [eI] en_sc
[e:] no
[e] bn
[e:] bn
[e] pa
vowel/e_3 [i] en_n
[e:] hu
[e] ku
@@ -1707,6 +1713,7 @@ vowel/ee_2 [E] en
[E] zh
[E#] ku
[&] da
[E] pa
vowel/ee#_2 [E-] sv
[E#] sq
vowel/ee_3 [&] af
@@ -1833,6 +1840,7 @@ vowel/i_fnt [i:] en_wi
[i] bn
[i:] bn
[i:] mr
[i] pa
vowel/ii [I] en_n
[I2] en_n
[I] en_rp
@@ -1864,6 +1872,7 @@ vowel/ii_3 [I] cy
[I] no
[I] tr
[I] bn
[I] pa
vowel/ii#_3 [I2] en_us
vowel/ii_4 [I] en
[I2] en
@@ -1901,6 +1910,8 @@ vowel/@_low [3] en_rp
[@/] ne
[@] mr
[V] mr
[@] pa
[V] pa
vowel/@_low2 [@/] en_us
[@2] en_us
vowel/o [o] base2
@@ -1937,6 +1948,7 @@ vowel/o_2 [o:] cy
[o] vi
[o] da
[o:] bn
[o:] pa
vowel/o-_2 [V] en_n
[V] en_wm
vowel/o_3 [oU] en_sc
@@ -2005,6 +2017,7 @@ vowel/oo_4 [O] base2
[O:] hi
[O] it
[O] bn
[O] pa
vowel/oo_5 [O] pl
[O] is
[O] sq
@@ -2095,6 +2108,7 @@ vowel/u_bck [u] base2
[U] mr
[u:] mr
[u] mn
[u] pa
vowel/u_bck2 [u] fr
[u:] fr
[u:] la
@@ -2120,6 +2134,7 @@ vowel/uu_bck [U] en_wi
[u] zhy
[U] bn
[U] mn
[U] pa
vowel/V [3] en_sc
vowel/V_2 [V] en
[a] af
@@ -2130,6 +2145,7 @@ vowel/V_3 [V] en_rp
[V] hi
[V] ta
[V] bn
[V] pa
vowel/V_4 [V] en_sc
[V] da
vowel/V_6 [V] en_us

+ 13
- 1
phsource/ph_hindi View File

@@ -166,6 +166,12 @@ phoneme i~
formants vnasal/i_n2
endphoneme

phoneme I~
vowel starttype (i) endtype (i)
length 170
formants vnasal/i_n2
endphoneme

phoneme e~
vowel long starttype (e) endtype (e)
length 220
@@ -173,7 +179,7 @@ phoneme e~
endphoneme

phoneme E~
vowel starttype (e) endtype (e)
vowel long starttype (e) endtype (e)
length 230
formants vnasal/ee_n2
endphoneme
@@ -208,6 +214,12 @@ phoneme u~
formants vnasal/u_n
endphoneme

phoneme U~
vowel starttype (u) endtype (u)
length 170
formants vnasal/u_n
endphoneme


phoneme r-
vowel starttype (@) endtype (@)

+ 1
- 1
phsource/ph_tamil View File

@@ -39,7 +39,7 @@ endphoneme

phoneme e:
vowel starttype (e) endtype (e)
length 270
length 250
formants vowel/e
endphoneme


+ 2
- 0
phsource/phonemes View File

@@ -1369,3 +1369,5 @@ include ph_basque
phonemetable mn base
include ph_mongolian

phonemetable pa hi
include ph_punjabi

+ 32
- 11
src/compiledict.cpp View File

@@ -49,6 +49,7 @@ static int transpose_min;
static int transpose_max;
static int text_mode = 0;
static int debug_flag = 0;
static int error_need_dictionary = 0;

static int hash_counts[N_HASH_DICT];
static char *hash_chains[N_HASH_DICT];
@@ -223,7 +224,6 @@ static int compile_line(char *linebuf, char *dict_line, int *hash)
static char nullstring[] = {0};

WORD_TAB winfo;
char decoded_phonemes[128];

comment = NULL;
text_not_phonemes = 0;
@@ -432,23 +432,37 @@ step=1; // TEST
if(text_mode)
text_not_phonemes = 1;

if(text_not_phonemes != translator->langopts.textmode)
{
flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE;
}

if(text_not_phonemes)
{
if(word[0] == '_')
{
// This is a special word, used by eSpeak. Translate this into phonemes now
// memset(&winfo,0,sizeof(winfo));
// TranslateWord(translator,phonetic,0,&winfo); // but *_dict is not loaded ?
// DecodePhonemes(word_phonemes,decoded_phonemes);
memset(&winfo,0,sizeof(winfo));
strcat(phonetic, " "); // need a space to indicate word-boundary

// PROBLEM vowel reductions are not applied to the translated phonemes
// condition rules are not applied
TranslateWord(translator,phonetic,0,&winfo);
text_not_phonemes = 0;
strncpy0(encoded_ph, word_phonemes, N_WORD_BYTES-4);

if((word_phonemes[0] == 0) && (error_need_dictionary < 3))
{
// the dictionary was not loaded, we need a second attempt
error_need_dictionary++;
fprintf(f_log,"%5d: Need to compile dictionary again\n",linenum);
}
{
//char decoded_phonemes[128];
//DecodePhonemes(word_phonemes,decoded_phonemes);
//printf("Translator %x %s [%s] [%s]\n",translator->translator_name,word,phonetic,decoded_phonemes);
}
}
else
{
// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word
strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4);
}
// this is replacement text, so don't encode as phonemes. Restrict the length of the replacement word
strncpy0(encoded_ph,phonetic,N_WORD_BYTES-4);
}
else
{
@@ -473,6 +487,12 @@ step=1; // TEST
}
}

if(text_not_phonemes != translator->langopts.textmode)
{
flag_codes[n_flag_codes++] = BITNUM_FLAG_TEXTMODE;
}


if(sscanf(word,"U+%x",&wc) == 1)
{
// Character code
@@ -1599,6 +1619,7 @@ int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, cha
char path[sizeof(path_home)+40]; // path_dsource+20

error_count = 0;
error_need_dictionary = 0;
memset(letterGroupsDefined,0,sizeof(letterGroupsDefined));

debug_flag = flags & 1;

+ 0
- 3
src/dictionary.cpp View File

@@ -241,9 +241,6 @@ int LoadDictionary(Translator *tr, const char *name, int no_error)

strcpy(dictionary_name,name); // currently loaded dictionary name

if(no_error) // don't load dictionary, just set the dictionary_name
return(1);

// Load a pronunciation data file into memory
// bytes 0-3: offset to rules data
// bytes 4-7: number of hash table entries

+ 3
- 2
src/espeakedit.cpp View File

@@ -45,12 +45,12 @@


#ifdef deleted
static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2007\n\n"
static const char *about_string = "espeakedit: %s\nAuthor: Jonathan Duddington (c) 2009\n\n"
"Licensed under GNU General Public License version 3\n"
"http://espeak.sourceforge.net/";
#endif

static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2007<br>"
static const char *about_string = "<font size=0><b>espeakedit </b> %s<br>Author: Jonathan Duddington (c) 2009<br>"
"<a href=\"http://espeak.sourceforge.net/\">http://espeak.sourceforge.net</a><br>"
"Licensed under <a href=\"http://espeak.sourceforge.net/license.html\">GNU General Public License version 3</a></font>";

@@ -585,6 +585,7 @@ void MyFrame::OnTools(wxCommandEvent& event)
sprintf(fname_log,"%s%s",path_dsource,"dict_log");
log = fopen(fname_log,"w");

LoadDictionary(translator, dictionary_name, 0);
if((err = CompileDictionary(path_dsource,dictionary_name,log,err_fname,debug_flag)) < 0)
{
wxLogError(_T("Can't access file:\n")+wxString(err_fname,wxConvLocal));

+ 65
- 34
src/numbers.cpp View File

@@ -576,7 +576,7 @@ void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_b
ph_stress[0] = phonSTRESS_P;
ph_stress[1] = 0;

for(p=(unsigned char *)ph_buf3; *p != 0; p++)
for(p=(unsigned char *)ph_buf3; (*p != 0) && (phoneme_tab[*p] != NULL); p++)
{
if(phoneme_tab[*p]->type == phSTRESS)
ph_stress[0] = 0; // stress is already marked
@@ -865,6 +865,7 @@ static const char *M_Variant(int value)

static int LookupThousands(Translator *tr, int value, int thousandplex, int thousands_exact, char *ph_out)
{//=======================================================================================================
// thousands_exact: bit 0 no hundreds,tens,or units, bit 1 ordinal numberr
int found;
int found_value=0;
char string[12];
@@ -874,11 +875,20 @@ static int LookupThousands(Translator *tr, int value, int thousandplex, int thou
ph_of[0] = 0;

// first look for a match with the exact value of thousands
if(thousands_exact)
if(thousands_exact & 1)
{
// is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
sprintf(string,"_%dM%dx",value,thousandplex);
found_value = Lookup(tr, string, ph_thousands);
if(thousands_exact & 2)
{
// ordinal number
sprintf(string,"_%dM%do",value,thousandplex);
found_value = Lookup(tr, string, ph_thousands);
}
if(!found_value)
{
// is there a different pronunciation if there are no hundreds,tens,or units ? (LANG=ta)
sprintf(string,"_%dM%dx",value,thousandplex);
found_value = Lookup(tr, string, ph_thousands);
}
}
if(found_value == 0)
{
@@ -894,11 +904,20 @@ static int LookupThousands(Translator *tr, int value, int thousandplex, int thou
}

found = 0;
if(thousands_exact)
if(thousands_exact & 1)
{
// is there a different pronunciation if there are no hundreds,tens,or units ?
sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
found = Lookup(tr, string, ph_thousands);
if(thousands_exact & 2)
{
// ordinal number
sprintf(string,"_%s%do",M_Variant(value), thousandplex);
found = Lookup(tr, string, ph_thousands);
}
if(!found)
{
// is there a different pronunciation if there are no hundreds,tens,or units ?
sprintf(string,"_%s%dx",M_Variant(value), thousandplex);
found = Lookup(tr, string, ph_thousands);
}
}
if(found == 0)
{
@@ -994,7 +1013,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
{
units = (value % 10);

if((control & 1) && ((units == 0) || (tr->langopts.numbers & 0x10)))
if((control & 1) && ((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
{
sprintf(string,"_%dXo",value / 10);
if(Lookup(tr, string, ph_tens) != 0)
@@ -1026,7 +1045,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
sprintf(string,"_%df",units);
found = Lookup(tr, string, ph_digits);
}
if((control & 1) && ((tr->langopts.numbers & 0x10) == 0))
if((control & 1) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
{
// ordinal
sprintf(string,"_%do",units);
@@ -1046,16 +1065,16 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)

if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
{
if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & 0x10)))
if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
Lookup(tr, "_ord20", ph_ordinal);
if(ph_ordinal[0] == 0)
Lookup(tr, "_ord", ph_ordinal);
}

if((tr->langopts.numbers & 0x30) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
if((tr->langopts.numbers & (NUM_SWAP_TENS | NUM_AND_UNITS)) && (ph_tens[0] != 0) && (ph_digits[0] != 0))
{
Lookup(tr, "_0and", ph_and);
if(tr->langopts.numbers & 0x10)
if(tr->langopts.numbers & NUM_SWAP_TENS)
sprintf(ph_out,"%s%s%s%s",ph_digits, ph_and, ph_tens, ph_ordinal);
else
sprintf(ph_out,"%s%s%s%s",ph_tens, ph_and, ph_digits, ph_ordinal);
@@ -1063,7 +1082,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
}
else
{
if(tr->langopts.numbers & 0x200)
if(tr->langopts.numbers & NUM_SINGLE_VOWEL)
{
// remove vowel from the end of tens if units starts with a vowel (LANG=Italian)
if(((ix = strlen(ph_tens)-1) >= 0) && (ph_digits[0] != 0))
@@ -1078,7 +1097,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
sprintf(ph_out,"%s%s%s",ph_tens, ph_digits, ph_ordinal);
}

if(tr->langopts.numbers & 0x100)
if(tr->langopts.numbers & NUM_SINGLE_STRESS)
{
// only one primary stress
found = 0;
@@ -1107,6 +1126,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
int tensunits;
int x;
int exact;
int ordinal;
char string[12]; // for looking up entries in **_list
char buf1[100];
char buf2[100];
@@ -1117,6 +1137,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
char ph_hundred_and[12];
char ph_thousand_and[12];
ordinal = control & 2;
hundreds = value / 100;
tensunits = value % 100;
buf1[0] = 0;
@@ -1127,7 +1148,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
ph_thousand_and[0] = 0;

found = 0;
if((control & 2) && (tensunits == 0))
if(ordinal && (tensunits == 0))
{
// ordinal number, with no tens or units
found = Lookup(tr, "_0Co", ph_100);
@@ -1137,7 +1158,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
Lookup(tr, "_0C", ph_100);
}

if(((tr->langopts.numbers & 0x0800) != 0) && (hundreds == 19))
if(((tr->langopts.numbers & NUM_1900) != 0) && (hundreds == 19))
{
// speak numbers such as 1984 as years: nineteen-eighty-four
// ph_100[0] = 0; // don't say "hundred", we also need to surpess "and"
@@ -1151,7 +1172,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
if ((value % 1000) == 0)
exact = 1;

if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact, ph_10T) == 0)
if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact | ordinal, ph_10T) == 0)
{
x = 0;
if(tr->langopts.numbers2 & (1 << (thousandplex+1)))
@@ -1173,7 +1194,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
ph_digits[0] = 0;
if(hundreds > 0)
{
if((tr->langopts.numbers & 0x100000) && ((control & 1) || (ph_thousands[0] != 0)))
if((tr->langopts.numbers & NUM_AND_HUNDRED) && ((control & 1) || (ph_thousands[0] != 0)))
{
Lookup(tr, "_0and", ph_thousand_and);
}
@@ -1184,8 +1205,18 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
if(tensunits == 0)
{
// is there a special pronunciation for exactly n00 ?
sprintf(string,"_%dC0",hundreds);
found = Lookup(tr, string, ph_digits);

if(ordinal)
{
// ordinal number
sprintf(string, "_%dCo", hundreds);
found = Lookup(tr, string, ph_digits);
}
if(!found)
{
sprintf(string,"_%dC0",hundreds);
found = Lookup(tr, string, ph_digits);
}
}
if(!found)
{
@@ -1210,7 +1241,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
}

ph_hundred_and[0] = 0;
if((tr->langopts.numbers & 0x40) && (tensunits != 0))
if((tr->langopts.numbers & NUM_HUNDRED_AND) && (tensunits != 0))
{
if((value > 100) || ((control & 1) && (thousandplex==0)))
{
@@ -1227,7 +1258,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
if(thousandplex==0)
{
x = 2; // allow "eins" for 1 rather than "ein"
if(control & 2)
if(ordinal)
x = 3; // ordinal number
if((value < 100) && !(control & 1))
x |= 4; // tens and units only, no higher digits
@@ -1240,7 +1271,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null

if(LookupNum2(tr, tensunits, x, buf2) != 0)
{
if(tr->langopts.numbers & 0x80)
if(tr->langopts.numbers & NUM_SINGLE_AND)
ph_hundred_and[0] = 0; // don't put 'and' after 'hundred' if there's 'and' between tens and units
}
}
@@ -1293,7 +1324,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
value = this_value = atoi(word);

ph_ordinal2[0] = 0;
if((tr->langopts.numbers & 0x10000) && (word[ix] == '.') && !isdigit(word[ix+2]))
if((tr->langopts.numbers & NUM_ORDINAL_DOT) && (word[ix] == '.') && !isdigit(word[ix+2]))
{
// ordinal number is indicated by dot after the number
ordinal = 2;
@@ -1352,7 +1383,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
prev_thousands = 1;
}
else
if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & 0x1000))
if((tr->langopts.thousands_sep == ' ') || (tr->langopts.numbers & NUM_ALLOW_SPACE))
{
// thousands groups can be separated by spaces
if((n_digits == 3) && isdigit(word[-2]))
@@ -1373,7 +1404,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
}
}

if((tr->langopts.numbers & 0x1000) && (word[n_digits] == ' '))
if((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[n_digits] == ' '))
thousands_inc = 1;
else
if(word[n_digits] == tr->langopts.thousands_sep)
@@ -1462,9 +1493,9 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
max_decimal_count = 2;
switch(decimal_mode = (tr->langopts.numbers & 0xe000))
{
case 0x8000:
case NUM_DFRACTION_4:
max_decimal_count = 5;
case 0x4000:
case NUM_DFRACTION_2:
// French/Polish decimal fraction
while(word[n_digits] == '0')
{
@@ -1481,8 +1512,8 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
}
break;

case 0x2000: // italian, say "hundredths" is leading zero
case 0xa000: // hungarian, always say "tenths" etc.
case NUM_DFRACTION_1: // italian, say "hundredths" is leading zero
case NUM_DFRACTION_5: // hungarian, always say "tenths" etc.
if(decimal_count <= 4)
{
LookupNum3(tr, atoi(&word[n_digits]), ph_buf, 0,0,0);
@@ -1500,7 +1531,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
}
break;

case 0x6000:
case NUM_DFRACTION_3:
// Romanian decimal fractions
if((decimal_count <= 4) && (word[n_digits] != '0'))
{
@@ -1560,7 +1591,7 @@ int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *fla
if(option_sayas == SAYAS_DIGITS1)
return(0); // speak digits individually

if((tr->langopts.numbers & 0x3) == 1)
if(tr->langopts.numbers != 0)
return(TranslateNumber_1(tr, word1, ph_out, flags, wflags));

return(0);

+ 48
- 28
src/readclause.cpp View File

@@ -67,8 +67,10 @@ static const char *punct_stop = ".:!?"; // pitch fall if followed by space
static const char *punct_close = ")]}>;'\""; // always pitch fall unless followed by alnum

// alter tone for announce punctuation or capitals
static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch
static const char *tone_punct_off = "\001T";
//static const char *tone_punct_on = "\0016T"; // add reverberation, lower pitch
//static const char *tone_punct_off = "\001T\001P";
static const char *tone_punct_on = ""; // add reverberation, lower pitch TEST apply no effect
static const char *tone_punct_off = "";

// ignore these characters
static const unsigned short chars_ignore[] = {
@@ -1903,6 +1905,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
int c1=' '; // current character
int c2; // next character
int cprev=' '; // previous character
int cprev2=' ';
int parag;
int ix = 0;
int j;
@@ -1916,6 +1919,8 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix
int any_alnum = 0;
int self_closing;
int punct_data;
int is_punctuation;
int save_c2;
int stressed_word = 0;
const char *p;
wchar_t xml_buf[N_XML_BUF+1];
@@ -1975,6 +1980,7 @@ f_input = f_in; // for GetC etc
}
}

cprev2 = cprev;
cprev = c1;
c1 = c2;

@@ -2324,9 +2330,9 @@ if(option_ssml) parag=1;
if((iswspace(c2) || (punct_data & 0x8000) || IsBracket(c2) || (c2=='?') || (c2=='-') || Eof()))
{
// note: (c2='?') is for when a smart-quote has been replaced by '?'
buf[ix] = ' ';
buf[ix+1] = 0;
is_punctuation = 1;
if((c1 == '.') && (cprev == '.'))
{
c1 = 0x2026;
@@ -2334,45 +2340,59 @@ if(option_ssml) parag=1;
}
nl_count = 0;
while(!Eof() && iswspace(c2))
{
if(c2 == '\n')
nl_count++;
c2 = GetC(); // skip past space(s)
}
if(!Eof())
save_c2 = c2;

if(iswspace(c2))
{
UngetC(c2);
while(!Eof() && iswspace(c2))
{
if(c2 == '\n')
nl_count++;
c2 = GetC(); // skip past space(s)
}
if(!Eof())
{
UngetC(c2);
}
}
if((nl_count==0) && (c1 == '.'))
{
if(iswdigit(cprev) && (tr->langopts.numbers & 0x10000) && islower(c2))
// if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT) && islower(c2))
if(iswdigit(cprev) && (tr->langopts.numbers & NUM_ORDINAL_DOT))
{
// dot after a number indicates an ordinal number
c2 = '.';
continue;
is_punctuation = 0;
}
else
if(iswlower(c2))
{
c2 = ' ';
continue; // next word has no capital letter, this dot is probably from an abbreviation
// next word has no capital letter, this dot is probably from an abbreviation
c1 = ' ';
is_punctuation = 0;
}
if(any_alnum==0)
{
c2 = ' '; // no letters or digits yet, so probably not a sentence terminator
continue;
c1 = ' '; // no letters or digits yet, so probably not a sentence terminator
is_punctuation = 0;
}
}
punct_data = punct_attributes[punct];
if(nl_count > 1)
c2 = save_c2;
if(is_punctuation)
{
if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION))
return(punct_data + 35); // with a longer pause
return(CLAUSE_PARAGRAPH);
buf[ix] = ' ';
buf[ix+1] = 0;
punct_data = punct_attributes[punct];
if(nl_count > 1)
{
if((punct_data == CLAUSE_QUESTION) || (punct_data == CLAUSE_EXCLAMATION))
return(punct_data + 35); // with a longer pause
return(CLAUSE_PARAGRAPH);
}
return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote
}
return(punct_data); // only recognise punctuation if followed by a blank or bracket/quote
}
}


+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.41.08 04.Oct.09";
const char *version_string = "1.41.11 09.Oct.09";
const int version_phdata = 0x014100;

int option_device_number = -1;

+ 53
- 44
src/tr_languages.cpp View File

@@ -48,6 +48,7 @@
#define OFFSET_ARMENIAN 0x530
#define OFFSET_DEVANAGARI 0x900
#define OFFSET_BENGALI 0x980
#define OFFSET_GURMUKHI 0xa00
#define OFFSET_TAMIL 0xb80
#define OFFSET_KANNADA 0xc80
#define OFFSET_MALAYALAM 0xd00
@@ -165,6 +166,7 @@ static Translator* NewTranslator(void)
tr->langopts.max_roman = 49;
tr->langopts.thousands_sep = ',';
tr->langopts.decimal_sep = '.';
tr->langopts.break_numbers = BREAK_THOUSANDS; // 1000, 1000,000 1,000,000 etc

memcpy(tr->punct_to_tone, punctuation_to_tone, sizeof(tr->punct_to_tone));

@@ -263,7 +265,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.param[LOPT_PREFIXES] = 1;
SetLetterVowel(tr,'y'); // add 'y' to vowels
tr->langopts.numbers = 0x8d1 + NUM_ROMAN;
tr->langopts.numbers = NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_ROMAN | NUM_1900;
tr->langopts.accents = 1;
}
break;
@@ -283,7 +285,7 @@ Translator *SelectTranslator(const char *name)
SetLetterBitsRange(tr,LETTERGP_F,0x3e,0x4c); // vowel signs, but not virama

tr->langopts.numbers = 0x1;
tr->langopts.numbers2 = NUM2_100000;
tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi
}
break;

@@ -305,7 +307,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.unstressed_wd2 = 2;
tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels

tr->langopts.numbers = 0x401;
tr->langopts.numbers = NUM_OMIT_1_HUNDRED;

SetLetterVowel(tr,'w'); // add letter to vowels and remove from consonants
SetLetterVowel(tr,'y');
@@ -319,7 +321,7 @@ Translator *SelectTranslator(const char *name)

tr->langopts.stress_rule = 0;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x10c59;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_ORDINAL_DOT | NUM_1900;
}
break;

@@ -333,7 +335,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.param[LOPT_PREFIXES] = 1;
memcpy(tr->stress_lengths,stress_lengths_de,sizeof(tr->stress_lengths));
tr->langopts.numbers = 0x11419 + NUM_ROMAN;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT | NUM_ROMAN;
SetLetterVowel(tr,'y');
}
break;
@@ -344,7 +346,7 @@ Translator *SelectTranslator(const char *name)
SetupTranslator(tr,stress_lengths_en,NULL);

tr->langopts.stress_rule = 0;
tr->langopts.numbers = 0x841 + NUM_ROMAN;
tr->langopts.numbers = NUM_HUNDRED_AND | NUM_ROMAN | NUM_1900;
tr->langopts.param[LOPT_COMBINE_WORDS] = 2; // allow "mc" to cmbine with the following word
}
break;
@@ -381,7 +383,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.unstressed_wd2 = 2;
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels

tr->langopts.numbers = 0x109;
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA;
tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands

if(name2 == L_grc)
@@ -410,7 +412,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.unstressed_wd1 = 3;
tr->langopts.unstressed_wd2 = 2;

tr->langopts.numbers = 0x1409 + NUM_ROMAN;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_ROMAN;
}
break;

@@ -435,7 +437,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.unstressed_wd2 = 2;
tr->langopts.param[LOPT_SONORANT_MIN] = 120; // limit the shortening of sonorants before short vowels

tr->langopts.numbers = 0x529 + NUM_ROMAN + NUM_ROMAN_AFTER;
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_ROMAN | NUM_ROMAN_AFTER;

if(name2 == L('c','a'))
{
@@ -457,7 +459,7 @@ Translator *SelectTranslator(const char *name)
static const unsigned char stress_amps_eu[8] = {16,16, 18,18, 18,18, 18,18 };
SetupTranslator(tr,stress_lengths_eu,stress_amps_eu);
tr->langopts.stress_rule = 1; // ?? second syllable ??
tr->langopts.numbers = 0x569 + NUM_VIGESIMAL;
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_VIGESIMAL;
}
break;

@@ -474,7 +476,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.param[LOPT_IT_DOUBLING] = 1;
tr->langopts.long_stop = 130;

tr->langopts.numbers = 0x1009;
tr->langopts.numbers = NUM_DECIMAL_COMMA + NUM_ALLOW_SPACE;
SetLetterVowel(tr,'y');
// tr->langopts.max_initial_consonants = 2; // BUT foreign words may have 3
tr->langopts.spelling_stress = 1;
@@ -493,7 +495,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.stress_flags = 0x0024; // don't use secondary stress
tr->langopts.param[LOPT_IT_LENGTHEN] = 1; // remove lengthen indicator from unstressed syllables

tr->langopts.numbers = 0x1509 + 0x8000 + NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL;
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_NOPAUSE | NUM_ROMAN | NUM_VIGESIMAL | NUM_DFRACTION_4;
SetLetterVowel(tr,'y');
}
break;
@@ -508,6 +510,7 @@ Translator *SelectTranslator(const char *name)

case L('h','i'): // Hindi
case L('n','e'): // Nepali
case L('p','a'): // Punjabi
{
static const short stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250};
static const unsigned char stress_amps_hi[8] = {17,14, 20,19, 20,22, 22,21 };
@@ -518,9 +521,15 @@ Translator *SelectTranslator(const char *name)

tr->langopts.stress_rule = 6; // stress on last heaviest syllable, excluding final syllable
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
tr->langopts.numbers = 0x011;
tr->langopts.numbers2 = NUM2_100000;
tr->langopts.numbers = NUM_SWAP_TENS;
tr->langopts.break_numbers = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi
tr->letter_bits_offset = OFFSET_DEVANAGARI;

if(name2 == L('p','a'))
{
tr->langopts.numbers = 0; // no number rules yet
tr->letter_bits_offset = OFFSET_GURMUKHI;
}
SetIndicLetters(tr);
}
break;
@@ -547,7 +556,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.spelling_stress = 1;
tr->langopts.accents = 1;

tr->langopts.numbers = 0x140d + 0x4000 + NUM_ROMAN_UC;
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_HUNDRED_AND | NUM_DECIMAL_COMMA | NUM_THOUS_SPACE | NUM_DFRACTION_2 | NUM_ROMAN_UC;
tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards
tr->langopts.replace_chars = replace_cyrillic_latin;

@@ -573,7 +582,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.param[LOPT_IT_DOUBLING] = 1;
tr->langopts.param[LOPT_COMBINE_WORDS] = 99; // combine some prepositions with the following word

tr->langopts.numbers = 0x1009 + 0xa000 + NUM_ROMAN + NUM_ROMAN_ORDINAL + NUM_ORDINAL_DOT + NUM_OMIT_1_HUNDRED;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_5 | NUM_ROMAN | NUM_ROMAN_ORDINAL | NUM_ORDINAL_DOT | NUM_OMIT_1_HUNDRED;
SetLetterVowel(tr,'y');
tr->langopts.spelling_stress = 1;
SetLengthMods(tr,3); // all equal
@@ -595,7 +604,7 @@ SetLengthMods(tr,3); // all equal
SetLetterBits(tr,LETTERGP_A,hy_vowels);
SetLetterBits(tr,LETTERGP_C,hy_consonants);
tr->langopts.max_initial_consonants = 6;
tr->langopts.numbers = 0x409;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED;
// tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
}
break;
@@ -607,7 +616,7 @@ SetLengthMods(tr,3); // all equal

SetupTranslator(tr,stress_lengths_id,stress_amps_id);
tr->langopts.stress_rule = 2;
tr->langopts.numbers = 0x1009 + NUM_ROMAN;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_ROMAN;
tr->langopts.stress_flags = 0x6 | 0x10;
tr->langopts.accents = 2; // "capital" after letter name
}
@@ -629,7 +638,7 @@ SetLengthMods(tr,3); // all equal
SetLetterBits(tr,3,"jvr"); // Letter group H
tr->letter_groups[1] = is_lettergroup_B;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x8e9;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SINGLE_AND | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_1900;
tr->langopts.numbers2 = 0x2;
}
break;
@@ -652,7 +661,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.param[LOPT_SONORANT_MIN] = 130; // limit the shortening of sonorants before short vowels
tr->langopts.param[LOPT_REDUCE] = 1; // reduce vowels even if phonemes are specified in it_list
tr->langopts.param[LOPT_ALT] = 2; // call ApplySpecialAttributes2() if a word has $alt or $alt2
tr->langopts.numbers = 0x2709 + NUM_ROMAN;
tr->langopts.numbers = NUM_SINGLE_VOWEL | NUM_OMIT_1_HUNDRED |NUM_DECIMAL_COMMA | NUM_ROMAN | NUM_DFRACTION_1;
tr->langopts.accents = 2; // Say "Capital" after the letter.
SetLetterVowel(tr,'y');
}
@@ -686,7 +695,7 @@ SetLengthMods(tr,3); // all equal

tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->langopts.numbers = 0x0401;
tr->langopts.numbers = NUM_OMIT_1_HUNDRED;
}
break;

@@ -700,7 +709,7 @@ SetLengthMods(tr,3); // all equal

tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable

tr->langopts.numbers = 0x100461;
tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED;
tr->langopts.max_initial_consonants = 2;
}
break;
@@ -713,7 +722,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.unstressed_wd1 = 0;
tr->langopts.unstressed_wd2 = 2;
tr->langopts.param[LOPT_DIERESES] = 1;
tr->langopts.numbers = 0x1 + NUM_ROMAN;
tr->langopts.numbers = NUM_ROMAN;
tr->langopts.max_roman = 5000;
}
break;
@@ -728,7 +737,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 0;
tr->langopts.spelling_stress = 1;
tr->charset_a0 = charsets[4]; // ISO-8859-4
tr->langopts.numbers = 0x409 + 0x8000 + 0x10000;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_4 | NUM_ORDINAL_DOT;
tr->langopts.stress_flags = 0x16 + 0x40000;
}
break;
@@ -745,7 +754,7 @@ SetLengthMods(tr,3); // all equal
tr->letter_groups[0] = vowels_cyrillic;

tr->langopts.stress_rule = 4; // antipenultimate
tr->langopts.numbers = 0x0429 + 0x4000;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_AND_UNITS | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2;
tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards
}
break;
@@ -761,7 +770,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.param[LOPT_PREFIXES] = 1;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x11c19;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_ALLOW_SPACE | NUM_1900 | NUM_ORDINAL_DOT;
memcpy(tr->stress_lengths,stress_lengths_nl,sizeof(tr->stress_lengths));
}
break;
@@ -773,7 +782,7 @@ SetLengthMods(tr,3); // all equal
SetupTranslator(tr,stress_lengths_no,NULL);
tr->langopts.stress_rule = 0;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x11849;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_ALLOW_SPACE | NUM_1900 + NUM_ORDINAL_DOT;
}
break;

@@ -784,7 +793,7 @@ SetLengthMods(tr,3); // all equal

SetupTranslator(tr,stress_lengths_om,stress_amps_om);
tr->langopts.stress_rule = 2;
tr->langopts.stress_flags = 0x16 + 0x80000;
tr->langopts.stress_flags = 2 + NUM_SWAP_TENS | NUM_THOUS_SPACE | NUM_NOPAUSE; //??
}
break;

@@ -800,8 +809,8 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_flags = 0x6; // mark unstressed final syllables as diminished
tr->langopts.param[LOPT_REGRESSIVE_VOICING] = 0x8;
tr->langopts.max_initial_consonants = 7; // for example: wchrzczony :)
tr->langopts.numbers=0x1009 + 0x4000;
tr->langopts.numbers2=0x40;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_2;
tr->langopts.numbers2 = 0x40;
tr->langopts.param[LOPT_COMBINE_WORDS] = 4 + 0x100; // combine 'nie' (marked with $alt2) with some 1-syllable (and 2-syllable) words (marked with $alt)
SetLetterVowel(tr,'y');
}
@@ -817,7 +826,7 @@ SetLengthMods(tr,3); // all equal

tr->langopts.stress_rule = 3; // stress on final syllable
tr->langopts.stress_flags = 0x6 | 0x10 | 0x20000;
tr->langopts.numbers = 0x069 + 0x4000 + NUM_ROMAN;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_DFRACTION_2 | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_ROMAN;
SetLetterVowel(tr,'y');
ResetLetterBits(tr,0x2);
SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y
@@ -835,7 +844,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_flags = 0x100 + 0x6;

tr->charset_a0 = charsets[2]; // ISO-8859-2
tr->langopts.numbers = 0x1029+0x6000 + NUM_ROMAN;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_DFRACTION_3 | NUM_AND_UNITS | NUM_ROMAN;
tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex
}
break;
@@ -850,7 +859,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_flags = 0x16;
tr->langopts.length_mods0 = tr->langopts.length_mods; // don't lengthen vowels in the last syllable

tr->langopts.numbers = 0x61 + 0x100000 + 0x4000;
tr->langopts.numbers = NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_2 | NUM_AND_HUNDRED;
tr->langopts.numbers2 = 0x200; // say "thousands" before its number
}
break;
@@ -870,7 +879,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.spelling_stress = 1;
tr->langopts.param[LOPT_COMBINE_WORDS] = 4; // combine some prepositions with the following word

tr->langopts.numbers = 0x0401 + 0x4000 + NUM_ROMAN;
tr->langopts.numbers = NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2 | NUM_ROMAN;
tr->langopts.numbers2 = 0x100;
tr->langopts.thousands_sep = 0; //no thousands separator
tr->langopts.decimal_sep = ',';
@@ -897,7 +906,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 2;
tr->langopts.stress_flags = 0x16 + 0x100;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x69 + 0x8000;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_DFRACTION_4;
tr->langopts.accents = 2; // "capital" after letter name
}
break;
@@ -911,7 +920,7 @@ SetLengthMods(tr,3); // all equal

tr->langopts.stress_rule = 0;
SetLetterVowel(tr,'y');
tr->langopts.numbers = 0x1909;
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_1900;
tr->langopts.accents = 1;
}
break;
@@ -928,8 +937,8 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 2;
tr->langopts.stress_flags = 0x6 | 0x10;

tr->langopts.numbers = 0x4e1;
tr->langopts.numbers2 = NUM2_100000a;
tr->langopts.numbers = NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_SINGLE_AND | NUM_OMIT_1_HUNDRED;
tr->langopts.break_numbers = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000
}
break;

@@ -946,7 +955,7 @@ SetLengthMods(tr,3); // all equal

tr->langopts.stress_rule = 0;
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
tr->langopts.numbers2 = NUM2_100000;
tr->langopts.break_numbers = 0x24a8; // 1000, 100,000 10,000,000

if(name2 == L('t','a'))
{
@@ -1002,7 +1011,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 7; // stress on the last syllable, before any explicitly unstressed syllable
tr->langopts.stress_flags = 0x20; //no automatic secondary stress

tr->langopts.numbers = 0x1509 + 0x4000;
tr->langopts.numbers = NUM_SINGLE_STRESS + NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2;
tr->langopts.max_initial_consonants = 2;
}
break;
@@ -1034,7 +1043,7 @@ SetLengthMods(tr,3); // all equal
tr->letter_groups[0] = vowels_vi;
tr->langopts.tone_language = 1; // Tone language, use CalcPitches_Tone() rather than CalcPitches()
tr->langopts.unstressed_wd1 = 2;
tr->langopts.numbers = 0x0049 + 0x8000;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_DFRACTION_4;

}
break;
@@ -1070,13 +1079,13 @@ SetLengthMods(tr,3); // all equal

tr->translator_name = name2;

if(tr->langopts.numbers & 0x8)
if(tr->langopts.numbers & NUM_DECIMAL_COMMA)
{
// use . and ; for thousands and decimal separators
tr->langopts.thousands_sep = '.';
tr->langopts.decimal_sep = ',';
}
if(tr->langopts.numbers & 0x4)
if(tr->langopts.numbers & NUM_THOUS_SPACE)
{
tr->langopts.thousands_sep = 0; // don't allow thousands separator, except space
}
@@ -1128,7 +1137,7 @@ static void Translator_Russian(Translator *tr)
tr->langopts.stress_rule = 5;
tr->langopts.stress_flags = 0x0020; // waas 0x1010

tr->langopts.numbers = 0x0409;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED;
tr->langopts.numbers2 = 0xc2; // variant numbers before thousands
tr->langopts.phoneme_change = 1;
tr->langopts.testing = 2;

+ 10
- 16
src/translate.cpp View File

@@ -366,6 +366,10 @@ int IsAlpha(unsigned int c)
{//========================
// Replacement for iswalph() which also checks for some in-word symbols

const unsigned short extra_indic_alphas[] = {
0xa70,0xa71, // Gurmukhi: tippi, addak
0 };

if(iswalpha(c))
return(1);

@@ -374,6 +378,8 @@ int IsAlpha(unsigned int c)
// Indic scripts: Devanagari, Tamil, etc
if((c & 0x7f) < 0x64)
return(1);
if(lookupwchar(extra_indic_alphas, c) != 0)
return(1);
return(0);
}

@@ -2570,10 +2576,6 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
int c_temp;
char *pn;
char *pw;
static unsigned int break_numbers1 = 0x49249248;
static unsigned int break_numbers2 = 0x24924aa8; // for languages which have numbers for 100,000 and 100,00,000, eg Hindi
static unsigned int break_numbers3 = 0x49249268; // for languages which have numbers for 100,000 and 1,000,000
unsigned int break_numbers;
char number_buf[80];

// start speaking at a specified word position in the text?
@@ -2591,7 +2593,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
// digits should have been converted to Latin alphabet ('0' to '9')
word = pw = &sbuf[words[ix].start];

if(iswdigit(word[0]) && (tr->langopts.numbers2 & NUM2_100000))
if(iswdigit(word[0]) && (tr->langopts.break_numbers != BREAK_THOUSANDS))
{
// Languages with 100000 numbers. Remove thousands separators so that we can insert them again later
pn = number_buf;
@@ -2628,34 +2630,26 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
pn = &number_buf[1];
nx = n_digits;

if((tr->langopts.numbers2 & NUM2_100000a) == NUM2_100000a)
break_numbers = break_numbers3;
else
if(tr->langopts.numbers2 & NUM2_100000)
break_numbers = break_numbers2;
else
break_numbers = break_numbers1;

while(pn < &number_buf[sizeof(number_buf)-3])
{
if(!isdigit(c = *pw++) && (c != tr->langopts.decimal_sep))
break;

*pn++ = c;
if((--nx > 0) && (break_numbers & (1 << nx)))
if((--nx > 0) && (tr->langopts.break_numbers & (1 << nx)))
{
if(tr->langopts.thousands_sep != ' ')
{
*pn++ = tr->langopts.thousands_sep;
}
*pn++ = ' ';
if(break_numbers & (1 << (nx-1)))
if(tr->langopts.break_numbers & (1 << (nx-1)))
{
// the next group only has 1 digits (i.e. NUM2_10000), make it three
*pn++ = '0';
*pn++ = '0';
}
if(break_numbers & (1 << (nx-2)))
if(tr->langopts.break_numbers & (1 << (nx-2)))
{
// the next group only has 2 digits (i.e. NUM2_10000), make it three
*pn++ = '0';

+ 18
- 5
src/translate.h View File

@@ -328,12 +328,27 @@ typedef struct {
unsigned char *length_mods;
unsigned char *length_mods0;

#define NUM_THOUS_SPACE 0x4
#define NUM_DECIMAL_COMMA 0x8
#define NUM_SWAP_TENS 0x10
#define NUM_AND_UNITS 0x20
#define NUM_HUNDRED_AND 0x40
#define NUM_SINGLE_AND 0x80
#define NUM_SINGLE_STRESS 0x100
#define NUM_SINGLE_VOWEL 0x200
#define NUM_OMIT_1_HUNDRED 0x400
#define NUM_19_HUNDRED 0x800
#define NUM_1900 0x800
#define NUM_ALLOW_SPACE 0x1000
#define NUM_DFRACTION_1 0x2000
#define NUM_DFRACTION_2 0x4000
#define NUM_DFRACTION_3 0x6000
#define NUM_DFRACTION_4 0x8000
#define NUM_DFRACTION_5 0xa000
#define NUM_ORDINAL_DOT 0x10000
#define NUM_ROMAN 0x20000
#define NUM_ROMAN_UC 0x40000
#define NUM_NOPAUSE 0x80000
#define NUM_AND_HUNDRED 0x100000
#define NUM_ROMAN_AFTER 0x200000
#define NUM_VIGESIMAL 0x400000
#define NUM_ROMAN_ORDINAL 0x800000
@@ -361,17 +376,15 @@ typedef struct {
// bit23=Roman numbers are ordinal numbers
int numbers;

#define NUM2_100000 0x800 // numbers for 100,000 and 10,000,000
#define NUM2_100000a 0xc00 // numbers for 100,000 and 1,000,000
// bits 1-4 use variant form of numbers before thousands,millions,etc.
// bit6=(LANG=pl) two forms of plural, M or MA
// bit7=(LANG-ru) use MB for 1 thousand, million, etc
// bit8=(LANG=cs,sk) two forms of plural, M or MA
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
// bit10=(LANG=sw) special word for 100,000 and 1,000,000
// bit11=(LANG=hi) special word for 100,000 and 10,000,000
int numbers2;

#define BREAK_THOUSANDS 0x49249248
int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000)
int max_roman;
int thousands_sep;
int decimal_sep;

+ 4
- 1
src/wavegen.cpp View File

@@ -794,12 +794,15 @@ static void WavegenSetEcho(void)
amp = embedded_value[EMBED_H];
delay = 130;
}
#ifdef deleted
if(embedded_value[EMBED_T] > 0)
{
// announcing punctuation
// announcing punctuation, add a small echo
// This seems unpopular
amp = embedded_value[EMBED_T] * 8;
delay = 60;
}
#endif

if(delay == 0)
amp = 0;

Loading…
Cancel
Save