Browse Source

Numbers: Support "myriad" (10^4) based number systems (lang=ko).

Lang=pt: fix ordinal numbers 11-19.
Lang=en-us: minor vowel changes.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@291 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 12 years ago
parent
commit
736e6deb28

+ 1
- 1
dictsource/dict_phonemes View File

@@ -825,7 +825,7 @@ R s S S; t w x z
Z Z;


Dictionary ko_dict 2012-09-14
Dictionary ko_dict 2012-09-15

@ a e E i o u u-


+ 55
- 59
dictsource/ko_list View File

@@ -1,68 +1,64 @@
$textmode
// eSpeak Korean rules. Changed pronunciation, as numbers were not announced.

// Number forms:
_0 ᅧᆼ
_1 ᅵᆯ
_2 ᅵ
_3 삼
_4 사
_5 ᅩ
_6 ᅲᆨ
_7 칠
_8 팔
_9 구
_1X 십
_2X ᅵ십
_3X 삼십
_4X 사십
_5X ᅩ십
_6X ᅲᆨ십
_7X 칠십
_8X 팔십
_9X 구십
_0C 백
_0M1 천
_1M1 천

$phonememode
_0M2 m'an // 10^4
_1M2 m'an
_0M3 tSh-@n||m'an // 10^7
_1M3 tSh-@n||m'an
_0M4 '@q // 10^8
_1M4 '@q
_0M5 tSh-@n||'@q // 10^11
_1M5 tSh-@n||'@q
_0M6 tS;'o // 10^12
_1M6 tS;'o
_0M7 tSh-@n||tS;'o
_1M7 tSh-@n||tS;'o
_0M8 gj'@nN
_1M8 gj'@nN
$textmode
// Number forms:
_0 j'@N_!
_1 'iR_!
_2 'i_!
_3 s'am_!
_4 s'a_!
_5 'o_!
_6 j'uq_!
_7 tS'h'iR_!
_8 ph'aR_!
_9 g'u_!
_1X s'ip_!
_2X 'i_!||s'ip_!
_3X s'am_!||s'ip_!
_4X s'a_!||s'ip_!
_5X 'o_!||s'ip_!
_6X j'uq_!||s'ip_!
_7X tS'h'iR_!s'ip_!
_8X ph'aR_!||s'ip_!
_9X g'u_!||s'ip_!
_0C p'Eq_!

// Larger numbers
_0M0 tS'h-@n // 10^3
_1M0 tS'h-@n
_0M1 m'an // 10^4
_1M1 m'an
_0M2 '@q // 10^8
_1M2 '@q
_0M3 tS;'o // 10^12
_1M3 tS;'o
_0M4 gj'@N
_1M4 gj'@N

// Character names:
ㄱ ㄱㅣㅇㅕㄱ
ㄲ ㅆㅏㅇㄱㅣㅇㅕㄱ
ㄴ ㄴㅣㅇㅡㄴ
ㄷ ㄷㅣㄱㅡㄷ
ㄸ ㅆㅏㅇㄷㅣㄱㅡㄷ
ㄹ ㄹㅣㅇㅡㄹ
ㅁ ㅁㅣㅇㅡㅁ
ㅂ ㅂㅣㅇㅡㅂ
ㅃ ㅆㅏㅇㅂㅣㅇㅡㅂ
ㅅ ㅅㅣㅇㅗㅅ
ㅆ ㅆㅏㅇㅅㅣㅇㅗㅅ
ㅇ ㅇㅣㅇㅡㅇ
ㅈ ㅈㅣㅇㅡㅈ
ㅉ ㅆㅏㅇㅈㅣㅇㅡㅈ
ㅊ ㅊㅣㅇㅡㅊ
ㅋ ㅋㅣㅇㅡㅋ
ㅌ ㅌㅣㅇㅡㅌ
ㅍ ㅍㅣㅇㅡㅍ
ㅎ ㅎㅣㅇㅡㅎ
ᄀ gij'@q
ᄁ 's-aNq,ij@q
ᄂ ni;'u-n
ᄃ diq'u-d-
ᄄ 's-aNd,iqu-d-
ᄅ *i;'u-rr
ᄆ mi;'u-m
ᄇ pi;'u-p
ᄈ 's-aNb,i;u-p
ᄉ si;'ot-
ᄊ 's-aNs,i;ot-
ᄋ i;'u-N
ᄌ tS;i;'u-t
ᄍ 's-aNdZ;,i;u-t
ᄎ tSh-'i;u-t
ᄏ khi;'u-k
ᄐ thi;'u-t-
ᄑ phi;'u-p
ᄒ hi;'u-t-
// end of character names.


$textmode
// Misc:
// 사ᅵ시ᅩᆺ
고랫재 고랟째

+ 13
- 1
dictsource/ko_rules View File

@@ -1,5 +1,14 @@
// eSpeak Korean rules
// This file is UTF8 encoded

// char names (mostly to catch "ng" sound):
.group ᄋ
ᄋ i;'u-N





.replace

// Note: These specific range of "normalized" unicode characters of Korean are
@@ -15,6 +24,9 @@
// h-finals: ㅎ ㄶㅀ
.L03 ᇂ ᆭ ᆶ

// Char names:


// Initials
// Order: ㄱㄲㅋ ㄷㄸㅌ ㅂㅃㅍ ㅅㅆㅈㅉㅊ ㄴㅁ ㄹ ㅎ

@@ -241,7 +253,7 @@
ᆷ m

.group ᆼ // ㅇ
nN
ᆼ N

.group ᇂ // ㅎ
ᇂ t-

+ 1
- 1
dictsource/pt_list View File

@@ -217,7 +217,7 @@ _6o s'est
_7o s'EtSim
_8o oIt'av
_9o n'on
_10o d'Esim
_1Xo d'Esim
_2Xo viZ'Ezim
_3Xo tRiZ'Ezim
_4Xo kwad*aZ'Ezim

+ 6
- 5
docs/phonemes.html View File

@@ -24,8 +24,8 @@ Phoneme mnemonics can be used directly in the text input to <strong>espeak</stro
<table>
<tbody valign=top>
<tr>
<td width=25><code>[p]</code><td width=80>
<td width=25><code>[b]</code><td width=80>
<td width=25><code>[p]</code><td width=150>
<td width=25><code>[b]</code><td width=150>
<tr>
<td><code>[t]</code><td>
<td><code>[d]</code><td>
@@ -92,7 +92,7 @@ In rhotic accents, such as General American, the phonemes <code>[3:], [A@], [e@]
<table>
<tbody valign=top>
<tr><td width=25><code>[@]</code>
<td width=60>alph<b>a</b><td width=80>schwa
<td width=60>alph<b>a</b><td width=400>schwa

<tr><td><code>[3]</code>
<td>bett<b>er</b><td>rhotic schwa. In British English this is the same as <code>[@]</code>, but it includes 'r' colouring in American and other rhotic accents. In these cases a separate <code>[r]</code> should not be included unless it is followed immediately by another vowel.
@@ -105,7 +105,7 @@ In rhotic accents, such as General American, the phonemes <code>[3:], [A@], [e@]

<tr><td><code>[a]</code><td>tr<b>a</b>p
<tr><td><code>[aa]</code><td>b<b>a</b>th<td>This is <code>[a]</code> in some accents, <code>[A:]</code> in others.
<tr><td><code>[a2]</code><td><b>a</b>bout<td>This may be <code>[@]</code> or may be a more open schwa.
<tr><td><code>[a#]</code><td><b>a</b>bout<td>This may be <code>[@]</code> or may be a more open schwa.
<tr><td><code>[A:]</code><td>p<b>al</b>m
<tr><td><code>[A@]</code><td>st<b>ar</b>t
<tr><td><p>
@@ -161,7 +161,8 @@ Other languages will have their own vowel definitions, eg:

</tbody>
</table>

<p>
<code> [:] </code> can be used to lengthen a vowel, eg <code> [e:]</code>

</body>
</html>

+ 8
- 8
phsource/compile_report View File

@@ -417,7 +417,7 @@ l/l_ [l] base
[l/] fr
l/l_@ [l/3] base
[l/] fr
l/l@ [h] base
l/l@ [hÖ{] base
[l#] base
[l] fr
[l/2] fr
@@ -451,7 +451,7 @@ l/L2_oL [l/2] base
l/L2_uL [l/2] base
l/l_3 [l/] de
l/l_4 [ll] sq
l/la [h] base
l/la [hÖ{] base
[l#] base
[l] fr
[l/2] fr
@@ -459,7 +459,7 @@ l/la [h1
[K] tn
l/l_a [l/3] base
[l/] fr
l/le [h] base
l/le [hÖ{] base
[l#] base
[l] fr
[l/2] fr
@@ -471,7 +471,7 @@ l/L_eL_af [&] af
[&:] af
l/l_front [L] sq
l/l_front_ [l/4] sq
l/li [h] base
l/li [hÖ{] base
[l#] base
[l] fr
[l/2] fr
@@ -485,7 +485,7 @@ ll/ll [L] base
ll/_ll [L] base
l/l_long [l] base
[l] fr
l/lo [h] base
l/lo [hÖ{] base
[l#] base
[l/2] fr
[K] nso
@@ -496,7 +496,7 @@ l^/l_rfx [l.] base
[l;] lt
[l] ru
[l^] ru
l/lu [h] base
l/lu [hÖ{] base
[l#] base
[l] fr
[l/2] fr
@@ -1088,11 +1088,11 @@ vdiph2/uu@ [U@] en
[U@] en-n
[U@] en-wm
[u@] vi
vdiph2/uw [u:] en-us
vdiph2/uw_2 [u:] en
vdiph2/uw_3 [yU] ro
vdiph2/uw_4 [u:] en-n
vdiph2/uw_6 [u#] kk
vdiph2/uw_6 [u:] en-us
[u#] kk
vdiph2/y@ [y@] zh
vdiph2/y#@ [Y@] af
vdiph2/ye [yE] sq

+ 1
- 1
phsource/ph_english_us View File

@@ -235,7 +235,7 @@ endphoneme
phoneme u:
vowel starttype #u endtype #u
length 190
FMT(vdiph2/uw)
FMT(vdiph2/uw_6)
endphoneme



BIN
phsource/vdiph/aae View File


BIN
phsource/vowel/V_6 View File


BIN
phsource/vowel/aa_8 View File


BIN
phsource/vwl_en_us/3_us View File


BIN
phsource/vwl_en_us/3_us2 View File


BIN
phsource/vwl_en_us/ee View File


+ 64
- 18
src/numbers.cpp View File

@@ -1137,8 +1137,8 @@ static int LookupThousands(Translator *tr, int value, int thousandplex, int thou
} // end f LookupThousands


static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
{//========================================================================
static int LookupNum2(Translator *tr, int value, const int control, char *ph_out)
{//=============================================================================
// Lookup a 2 digit number
// control bit 0: ordinal number
// control bit 1: final tens and units (not number of thousands) (use special form of '1', LANG=de "eins")
@@ -1151,6 +1151,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
int ix;
int units;
int tens;
int is_ordinal;
int used_and=0;
int found_ordinal = 0;
int next_phtype;
@@ -1175,6 +1176,8 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
ord_type = 'q';
}

is_ordinal = control & 1;

if((control & 2) && (n_digit_lookup == 2))
{
// pronunciation of the final 2 digits has already been found
@@ -1187,11 +1190,12 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
// is there a special pronunciation for this 2-digit number
if(control & 8)
{
// is there a feminine form?
sprintf(string,"_%df",value);
found = Lookup(tr, string, ph_digits);
}
else
if(control & 1)
if(is_ordinal)
{
strcpy(ph_ordinal, ph_ordinal2);
@@ -1229,8 +1233,15 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)

if(!found)
{
sprintf(string,"_%d",value);
found = Lookup(tr, string, ph_digits);
if((is_ordinal) && (tr->langopts.numbers2 & NUM2_NO_TEEN_ORDINALS))
{
// don't use numbers 10-99 to make ordinals, always use _1Xo etc (lang=pt)
}
else
{
sprintf(string,"_%d",value);
found = Lookup(tr, string, ph_digits);
}
}
}
}
@@ -1251,7 +1262,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
else
{
if((control & 1) &&
if((is_ordinal) &&
((units == 0) || (tr->langopts.numbers & NUM_SWAP_TENS) || (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL)))
{
sprintf(string,"_%dX%c", tens, ord_type);
@@ -1300,7 +1311,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
sprintf(string,"_%df",units);
found = Lookup(tr, string, ph_digits);
}
if((control & 1) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
if((is_ordinal) && ((tr->langopts.numbers & NUM_SWAP_TENS) == 0))
{
// ordinal
sprintf(string,"_%d%c",units,ord_type);
@@ -1335,7 +1346,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
}
}
if((control & 1) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
if((is_ordinal) && (found_ordinal == 0) && (ph_ordinal[0] == 0))
{
if((value >= 20) && (((value % 10) == 0) || (tr->langopts.numbers & NUM_SWAP_TENS)))
Lookup(tr, "_ord20", ph_ordinal);
@@ -1347,7 +1358,7 @@ static int LookupNum2(Translator *tr, int value, int control, char *ph_out)
{
Lookup(tr, "_0and", ph_and);

if((control & 1) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
if((is_ordinal) && (tr->langopts.numbers2 & NUM2_MULTIPLE_ORDINAL))
ph_and[0] = 0;

if(tr->langopts.numbers & NUM_SWAP_TENS)
@@ -1406,6 +1417,7 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
int ix;
int exact;
int ordinal;
int tplex;
int say_zero_hundred=0;
char string[12]; // for looking up entries in **_list
char buf1[100];
@@ -1465,10 +1477,16 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
if ((value % 1000) == 0)
exact = 1;

if(LookupThousands(tr, hundreds / 10, thousandplex+1, exact | ordinal, ph_10T) == 0)
tplex = thousandplex+1;
if(tr->langopts.numbers2 & NUM2_MYRIADS)
{
tplex = 0;
}

if(LookupThousands(tr, hundreds / 10, tplex, exact | ordinal, ph_10T) == 0)
{
x = 0;
if(tr->langopts.numbers2 & (1 << (thousandplex+1)))
if(tr->langopts.numbers2 & (1 << tplex))
x = 8; // use variant (feminine) for before thousands and millions
LookupNum2(tr, hundreds/10, x, ph_digits);
}
@@ -1615,6 +1633,23 @@ static int LookupNum3(Translator *tr, int value, char *ph_out, int suppress_null
} // end of LookupNum3


bool CheckThousandsGroup(char *word, int group_len)
{//================================================
// Is this a group of 3 digits which looks like a thousands group?
int ix;

if(isdigit(word[group_len]) || isdigit(-1))
return(false);

for(ix=0; ix < group_len; ix++)
{
if(!isdigit(word[ix]))
return(false);
}
return(true);
}


static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control)
{//=====================================================================================================================
// Number translation with various options
@@ -1640,6 +1675,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
int decimal_mode;
int suffix_ix;
int skipwords = 0;
int group_len;
char *p;
char string[32]; // for looking up entries in **_list
char buf1[100];
@@ -1662,8 +1698,12 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
n_digits = ix;
value = this_value = atoi(word);

group_len = 3;
if(tr->langopts.numbers2 & NUM2_MYRIADS)
group_len = 4;

// is there a previous thousands part (as a previous "word") ?
if((n_digits == 3) && (word[-2] == tr->langopts.thousands_sep) && isdigit(word[-3]))
if((n_digits == group_len) && (word[-2] == tr->langopts.thousands_sep) && isdigit(word[-3]))
{
prev_thousands = 1;
}
@@ -1777,14 +1817,19 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
// a "thousand"/"million" suffix to this one
digix = n_digits + thousands_inc;

while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) &&
isdigit(word[digix]) && isdigit(word[digix+1]) && isdigit(word[digix+2]) && !isdigit(word[digix+3]) && !isdigit(word[digix-1]))
while(((wtab[thousandplex+1].flags & FLAG_MULTIPLE_SPACES) == 0) && CheckThousandsGroup(&word[digix], group_len))
{
if((word[digix] != '0') || (word[digix+1] != '0') || (word[digix+2] != '0'))
thousands_exact = 0;
for(ix=0; ix<group_len; ix++)
{
if(word[digix+ix] != '0')
{
thousands_exact = 0;
break;
}
}

thousandplex++;
digix += 3;
digix += group_len;
if((word[digix] == tr->langopts.thousands_sep) || ((tr->langopts.numbers & NUM_ALLOW_SPACE) && (word[digix] == ' ')))
{
suffix_ix = digix+2;
@@ -1820,7 +1865,8 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned
{
if(thousands_inc > 0)
{
if((thousandplex > 0) && (value < 1000))
if(thousandplex > 0)
// if((thousandplex > 0) && (value < 1000))
{
if((suppress_null == 0) && (LookupThousands(tr,value,thousandplex, thousands_exact, ph_append)))
{

+ 34
- 16
src/synthdata.cpp View File

@@ -881,39 +881,57 @@ static void SwitchOnVowelType(PHONEME_LIST *plist, PHONEME_DATA *phdata, USHORT
} // end of SwitchVowelType


static int NumInstnWords(USHORT *prog)
{//===================================
int NumInstnWords(USHORT *prog)
{//============================
int instn;
int instn2;
int instn_type;
int n;
static const char n_words[16] = {1,1,0,0,1,1,1,1,1,2,4,0,0,0,0,0};
int type2;
static const char n_words[16] = {0,1,0,0,1,1,0,1,1,2,4,0,0,0,0,0};

instn = *prog;
instn_type = instn >> 12;
if((n = n_words[instn_type]) > 0)
return(n);

if(instn_type < 4)
switch(instn_type)
{
case 0:
if(((instn & 0xf00) >> 8) == i_IPA_NAME)
{
n = ((instn & 0xff) + 1) / 2;
return(n+1);
}
return(1);;

case 6:
type2 = (instn & 0xf00) >> 9;
if((type2 == 5) || (type2 == 6))
return(12); // switch on vowel type
return(1);

case 2:
case 3:
// a condition, check for a 2-word instruction
if(((n = instn & 0x0f00) == 0x600) || (n == 0x0d00))
return(2);
return(1);
}

// instn_type 11 to 15, 2 words
instn2 = prog[2];
if((instn2 >> 12) == 0xf)
{
// This instruction is followed by addWav(), 2 more words
return(4);
}
if(instn2 == i_CONTINUE)
{
return(3);
default:
// instn_type 11 to 15, 2 words
instn2 = prog[2];
if((instn2 >> 12) == 0xf)
{
// This instruction is followed by addWav(), 2 more words
return(4);
}
if(instn2 == i_CONTINUE)
{
return(3);
}
return(2);
}
return(2);
} // end of NumInstnWords



+ 1
- 0
src/synthesize.h View File

@@ -571,6 +571,7 @@ int DoSpect2(PHONEME_TAB *this_ph, int which, FMT_PARAMS *fmt_params, PHONEME_L
int PauseLength(int pause, int control);
int LookupPhonemeTable(const char *name);
unsigned char *GetEnvelope(int index);
int NumInstnWords(USHORT *prog);

void InitBreath(void);


+ 3
- 2
src/tr_languages.cpp View File

@@ -957,7 +957,8 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 8; // ?? 1st syllable if it is heavy, else 2nd syllable
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->langopts.numbers = NUM_OMIT_1_HUNDRED;
tr->langopts.break_numbers = 0x9999998;
tr->langopts.numbers2 = NUM2_MYRIADS;
tr->langopts.break_numbers = 0x1111110;
tr->langopts.max_digits = 20;
}
break;
@@ -1115,7 +1116,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = STRESSPOSN_1R; // stress on final syllable
tr->langopts.stress_flags = 0x6 | 0x10 | 0x2000 | 0x20000;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_DFRACTION_2 | NUM_HUNDRED_AND | NUM_AND_UNITS | NUM_ROMAN;
tr->langopts.numbers2 = NUM2_MULTIPLE_ORDINAL;
tr->langopts.numbers2 = NUM2_MULTIPLE_ORDINAL | NUM2_NO_TEEN_ORDINALS;
SetLetterVowel(tr,'y');
ResetLetterBits(tr,0x2);
SetLetterBits(tr,1,"bcdfgjkmnpqstvxz"); // B hard consonants, excluding h,l,r,w,y

+ 6
- 2
src/translate.h View File

@@ -445,7 +445,9 @@ typedef struct {
int numbers;

#define NUM2_MULTIPLE_ORDINAL 0x1000
#define NUM2_ENGLISH_NUMERALS 0x2000
#define NUM2_NO_TEEN_ORDINALS 0x2000
#define NUM2_MYRIADS 0x4000
#define NUM2_ENGLISH_NUMERALS 0x8000
#define NUM2_THOUSANDS_VAR1 0x40
#define NUM2_THOUSANDS_VAR2 0x80
#define NUM2_THOUSANDS_VAR3 0xc0
@@ -455,7 +457,9 @@ typedef struct {
// bits 6-8 use different forms of thousand, million, etc (M MA MB)
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
// bit13=(LANG=ne) speak (non-replaced) English numerals in English
// bit13=(LANG=pt) don't use 11-19 numbers to make ordinals
// bit14=(LANG=ko) use myriads (groups of 4 digits) not thousands (groups of 3)
// bit15=(LANG=ne) speak (non-replaced) English numerals in English
int numbers2;

#define BREAK_THOUSANDS 0x49249248

+ 3
- 28
src/vowelchart.cpp View File

@@ -525,42 +525,17 @@ static void FindVowelFmt(int prog_start, int length)
prog_end = prog_start + length;
n_vowelfmt_addr = 0;

for(prog = &phoneme_index[prog_start]; prog < &phoneme_index[prog_end]; prog++)
for(prog = &phoneme_index[prog_start]; prog < &phoneme_index[prog_end]; prog += NumInstnWords(prog))
{
instn = *prog;
switch(instn >> 12)
if((instn >> 12) == 11)
{
case 2:
case 3:
// conditions
while((instn & 0xe000) == 0x2000)
{
instn = *(++prog);
}
prog--;
break;

case 10: // Vowelin, Vowelout
prog += 3;
break;

case 9:
case 12: // WAV
case 13: // VowelStart
case 14: // VowelEnd
case 15: // addWav
prog++;
break;

case 11: // FMT
// FMT instruction
if(n_vowelfmt_addr < N_VOWELFMT_ADDR)
{
vowelfmt_addr[n_vowelfmt_addr++] = ((instn & 0xf) << 18) + (prog[1] << 2);
}
prog++;

break;
}
}
} // end of FindVowelFmt

Loading…
Cancel
Save