Browse Source

[1.31.20]

Speak accented letter names as base-letter name plus accent name, using a table of Unicode characters up to U+17F.
Added $accent attribute for *_list files, meaning "speak as base-letter name plus accent name".
Added accent names to several languages *_list files.
Added language option to allow vowels to merge between words (used for lang=TA).


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@153 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 17 years ago
parent
commit
58fbde3153

+ 22
- 0
dictsource/af_list View File

@@ -60,6 +60,28 @@ z zEd
û kapi?y
ü de@lte@k@n?y

_cap h'o@flEt@r
_?? s@mb'o@l
_?A l'Et@r

// accent names
_lig d'Wb@llEt@r
_acu ak'yt
_ac2 d'Wb@lakyt
_brv br'i:v
_ced s'e@dIla
_cir k'api
_dia d'e@lte@k@n
_dot p'Wnt
_grv x2r'afIs
_hac k'A:rOn
_mcn m'akrOn
_ogo o@x2o@n,&k
_rng 'A:nb&l
_stk str'e@p
_tld t'Ild@


// numeric
_0 'nWl
_1 _'e@n

+ 25
- 2
dictsource/cs_list View File

@@ -51,6 +51,28 @@ _z zet

_#9 t'ap
_#32 m'ezeRa // space
_?? simbol
_?A znak
_cap m'ajuskule

// accent names
_lig l'igat,uRa
_acu tS'a:Rka
_ac2 dvj'etS'a:Rki
_brv 'obloUtSek
_ced ts'eJila
_cir stR^'i:Ska
_dia tR'ema
_dot t'etSka
_grv c'eSki:;'aktsent
_hac h'a:tSek
_mcn m'akRon
_ogo 'ogonek
_rng kR'oUZek
_stk S'ikma:tS'a:Rka
_tld c'ilda



_$ dola:R
_' apostRof
@@ -72,8 +94,6 @@ _} sl'oZena:||z'avR^i:t
_< menSi:
_> vjetSi:
_| sv'isla:||tS'a:Ra
_?? simbol
_?A znak


// Numbers
@@ -258,6 +278,9 @@ z zet $atend
// Abbreviations
// if a word has no vowels (or "r") then it will
// automatically be spoken as individual letters

e.g napR^

arc $abbrev
arj $abbrev
atd $abbrev

+ 17
- 0
dictsource/de_list View File

@@ -44,6 +44,23 @@ _#9 t,abu:l'Ato:*
_#32 l'e:*ts'aIC@n
_?? zymb'o:l

// accent names
_lig l'i:gat,u:*
_acu ak'u:t
_ac2 d'Op@l,aku:t
_brv b@-*'e:v@
_ced tse:d'Il@
_cir ts'I*kUmflEks
_dia 'UmlaUt
_dot p'Unkt
_grv g@-*'AvIs
_hac h'atSe:k
_mcn m'ak@-*o:n
_ogo 'o:go:n,e:k
_rng k@-*'u:Ze:k
_stk S@-*'Egst@-*IC
_tld t'Ild@

// Not Roman numbers
v faU
x Iks

+ 18
- 17
dictsource/dict_phonemes View File

@@ -21,10 +21,10 @@ Dictionary cs_dict
i i: l- o o: oU r- u
u:

* b c d dZ f g h
j J k l l^ m n N
n^ p r R R^ R^/ s S
t tS ts v x z Z
* ; b c d dZ f g
h j J k l l^ m n
N n^ p r R R^ R^/ s
S t tS ts v x z Z


Dictionary cy_dict
@@ -149,12 +149,12 @@ v z Z
Dictionary it_dict

@- a a/ aI aU e E i
o O oI u
I o O oI u

* : b d dZ f g h
j k l l^ m n N n^
p r R s S t tS ts
v w w2 z
* : ; b d dZ f g
h j k l l^ m n N
n^ p r R s S t tS
ts v w w2 z


Dictionary nl_dict
@@ -213,10 +213,11 @@ Dictionary sk_dict
l- l: o o: oU r- r: u
u:

* : b d d; dZ f g
h j k l l^ m n N
n^ p r R R^ R^/ s S
t t; tS ts v x z Z
* : ; b d d; dZ f
g h j k l l^ m n
N n^ p r R R^ R^/ s
S t t; tS ts v x z
Z


Dictionary sv_dict
@@ -371,10 +372,10 @@ Dictionary hbs_dict
E e e: i I i: l- O
o o: oU r* r- u U u:

* b d dZ dZ; f g h
j k l L l^ m n N
n^ p r R s S t tS
ts tS; v x z Z
* ; b d dZ dZ; f g
h j k l L l^ m n
N n^ p r R s S t
tS ts tS; v x z Z


Dictionary id_dict

+ 37
- 5
dictsource/en_list View File

@@ -47,7 +47,38 @@ y waI
z zEd
?3 z zi:

é i:a2k'ju:t
// sounds are specified for these accented characters
// use the $accent attribute here to force the use of the
// accent table when speaking the name of the character.
_é $accent
ê $accent
_ä $accent
_ë $accent
_ï $accent
_ö $accent
_ü $accent
_č $accent
_š $accent
_ş $accent
_ž $accent
_ñ $accent

// accent names
_lig l,Iga2tS@
_acu a2kj'u:t
_ac2 dVb@la2kj'u:t
_brv br'i:v
_ced s@d'Ila2
_cir s'3:kVmfl,Eks
_dia 'UmlaUt
_dot wI2Dd'0ta2bVv
_grv gr'A:v
_hac h'atS,Ek
_mcn m'akr,0n
_ogo '0g0n,Ek
_rng r'INg
_stk str'oUk
_tld t'Ild@


// character names
@@ -73,7 +104,6 @@ _+ plVs
_, k0m@
_- h,aIf@n
_. d0t
_/ stroUk
_: koUl@n
_; sEmIk'oUl0n
_< ElaNg@L
@@ -82,7 +112,6 @@ _> A@aNg@L
_? kwEstS@n
_@ at|saIn
_[ lEftskwe@
_\ bakstroUk
_] raItskwe@
_^ s3:kVmfl,Eks
?5 _^ sIRkVmfl,Eks
@@ -162,11 +191,14 @@ _₠ jU@roU
¬ n0t_|saIn
ə SwA:
æ eI'i:
ɛ oUp@n'i:
ɔ oUp@n'oU
þ TO@n
ŋ ENg
ð ED
ʃ ES
ʒ EZ
ĸ krA:
ɛ oUp@n'i:
ɔ oUp@n'oU

// numeric


+ 1
- 0
dictsource/en_rules View File

@@ -5154,6 +5154,7 @@
.group

// non-ascii characters with specified pronunciations
// List the accented characters in en_list with the $accent attribute
é eI
_) é I2
ê E:

+ 18
- 0
dictsource/es_list View File

@@ -7,6 +7,24 @@ _cap m'aJ^us
_?? s'imbOlo
_#32 Esp'aTjo

// accent names
_lig liQaD'u**a
_acu aQ'uDo
_ac2 d'Oble||aQ'uDo
_brv b**'eBe
_ced TeD'iJ^a
_cir TiRkumfl'exo
_dia dj'E**esis
_dac d'Oble||aQ'uDo
_dot p'unto // ??
_grv g**'aBe
_hac ka**'On
_mcn mak**'On
_ogo kol'ita
_rng an'iJ^o
_stk b'a*Ra // ??
_tld t'ilde


// names of symbols
_. punto

+ 3
- 0
dictsource/es_rules View File

@@ -145,6 +145,9 @@
_) r R
A) r (A **
C) r (A **
l) r (A R
m) r (A R
n) r (A R
rr *R



+ 41
- 22
dictsource/fr_list View File

@@ -5,7 +5,7 @@

// 2006-11-18 Gilles Casse <[email protected]>
//
// Updated 2008-02-24 Michel Such <[email protected]>
// Updated 2008-02-29 Michel Such <[email protected]>
//
// * Numbers, a few abbreviations and exceptions.
//
@@ -16,42 +16,42 @@
// "letter" name, then include the letter name here, with the letter
// prefixed by a _ character.

a:aksA~gRav
â a:aksA~siRkO~flEks
ä a:tRema
$accent // speak as base-letter name + accent name
â $accent
ä $accent
b be
c se
ç sesedij
ç $accent
d de
e @
ë @:tRema
é @:aksA~Egy
è @:aksA~gRav
ê @:aksA~siRkO~flEks
ë @:tRema
ë $accent
é $accent
è $accent
ê $accent
ë $accent
f Ef
g Ze
h aS
i i
ï i:tRema
ï $accent
j Zi
k ka
l El
m Em
n En
ñ Entilde
ñ $accent
o o
ô o:aksA~siRkO~flEks
ö o:tRema
ô $accent
ö $accent
p pe
q ky
r ER
s Es
t te
u y
ù y:aksA~gRav
û y:aksA~siRkO~flEks
ü y:tRema
ù $accent
û $accent
ü $accent
v ve
w dubl@v'e
x iks
@@ -60,15 +60,33 @@ _y i:gR'Ek



// character names
// accent names
_lig ligat'yr
_acu aksA~tEg'y
_ac2 dublaksA~tEg'y
_brv br'Ev
_ced sed'ij
_cir aksA~siRkO~fl'Eks
_dia tRem'a
_dot pw'E~syskr'i
_grv aksA~gR'av
_hac kar'O~
_mcn makr'O~
_ogo OgOn'Ek
_rng rO~t2A~S'Ef
_stk b'aR
_tld t'ild


// character names

//_cap k,ap@-t@L
_?A lEt@
_cap maZysk'yl
_?A l'Etr
_?? sE~b'Oll
_#9 tabylasjO~
_#32 Espas


_" gijmE
_# djEz
_' apOstROf
@@ -383,8 +401,8 @@ tout t'ut2 $u

// Letters which can be words
//===========================
à a:aksA~gRav $atend
y i:gR'Ek $atend
à $atend $accent
y $atend $accent



@@ -651,3 +669,4 @@ vincent vE~sA~





+ 20
- 0
dictsource/hbs_list View File

@@ -41,9 +41,29 @@
z z@
ž Z@

// accent names
_lig l'ig&t,UR&
_acu 'akUt
_ac2 dv'ostr**UkI;'akUt
//_brv
_ced ts'EdIl&
_cir ts'iRkUmflEks
_dia 'uml&Ut
_dot t'otSk&
//_grv
_hac kv'atSIts&
_mcn m'akr**on
_ogo 'ogonEk
_rng r**'iNg
_stk kr**'oz
_tld t'ild&


// symbols
_?? znak // unknown symbol
_?A slovo // unknown letter
_cap k'apIt&l // ?? use English until I find the correct word

© 'aUtoRsk&||pr*av&
% p'osto
+ plus

+ 17
- 0
dictsource/hu_list View File

@@ -45,6 +45,23 @@ _1M4 billio:
_2M4 ke:tbillio:
_dpt ||_vEss2Y:_

// accent names
_lig ligAtu:R2A
_acu e:lES
_ac2 kEt:Y:S
_brv fe:lkYR2
//_ced // cedilla
_cir tsiR2kumflEks
_dia tR2e:mA
//_dot // dot above
_grv tompA
_hac pipA
_mcn mAkR2on // ?? macron
_ogo hoR2gok
_rng kYR2
_stk a:thu:za:S
_tld tildE


// Abbreviations
km kilo:me:tER2

+ 22
- 2
dictsource/it_list View File

@@ -4,8 +4,28 @@


// character names
_cap k'apital
_?? s'imbolo
_cap k'apital
_?? s'imbolo
_?A let:'e:Ra


// accent names
_lig l,egat'u:Ra
_acu atS:'ento_|ak'u:to
_ac2 d'op:i;o_|atS:'ento_|ak'u:to
_brv b@-*'e:ve
_ced tSed'il^a
_cir tSi;@-*konfl'esso
_dia djeR'e:zI
_dot p'unto||s,ov@-*ask@-*'it:o
_grv atS:'entog@-*'a:ve
_hac h'atSek
_mcn m'ak@-*on
_ogo og'o:nek
_rng an'ello
_stk b'aR*a
_tld t'ilde


_! p'untoesklamat'ivo
_" viRgolet:e

+ 20
- 0
dictsource/ro_list View File

@@ -48,6 +48,26 @@ z zet
î ,ydin'i $atend

_. punkt
_?? ka*akt'er
_cap maZusk'ul@

// accent names
_lig ligat'ur@
_acu aktS'entaskuts'it
//_ac2
_brv k@tS'ul@
_ced sed'il@
_cir tSirkumfl'eks
_dia t@-*'em@
_dot punkt
_grv aktS'entg@-*'av
_hac hatS'ek
_mcn mak@-*'on
_ogo ogon'ek
//_rng
_stk sl'eS
_tld t'ild@



// numeric

+ 19
- 1
dictsource/sk_list View File

@@ -43,10 +43,28 @@ _v ve:
w dv'ojite:,ve:
x iks
y ipsilon
ý d,l-he:'i:
ý d,l-he:'ipsilon
_z zet
ž Zet

// accent names
_lig l'igat,u:Rov,ane:
_acu s_d'l:Zn^om
//_ac2
_brv bR'eve
_ced s_ts'ed;illoU
_cir s'ostRi;,eSkoU
_dia s_pR'ehla:skoU
_dot s_b'otkoU
_grv s_'obRa:t;,eni:md'l:Zn^om
_hac s_m'ektSen^om
_mcn s_m'akRonom
_ogo s_'ogon^ek
_rng s_kR'u:Skom
_stk S'ikmi: d'l:Zen^
_tld s_v'l-novkoU


// character names
_cap vel^ke:
_?? simbol

+ 36
- 36
dictsource/ta_rules View File

@@ -38,7 +38,7 @@

எ e

ஏ e:
ஏ e::

ஐ E:

@@ -50,99 +50,99 @@

// consonants

க ga // inter-vocalic, unless there is virama before or after
க gV // inter-vocalic, unless there is virama before or after
க (B g
க (் g
்) க ga
்) க gV
்) க (B g
_) க ka
_) க kV
_) க (B k
க்க k:a
க்க k:V
க்க (B k:

ங Na
ங NV
ங (B N

ச dZa
ச dZV
ச (B dZ
_) ச sa
_) ச (B s
ச்ச tS:a
ச்ச tS:V
ச்ச (B tS:

ஜ dZa
ஜ dZV
ஜ (B dZ

ஞ n^a
ஞ n^V
ஞ (B n^

ட d.a
ட d.V
ட (B d.
_) ட t.a
_) ட t.V
_) ட (B t.
ட்ட t.a
ட்ட t.V
ட்ட (B t.

ண n.a
ண n.V
ண (B n.

த da
த dV
த (B d
_) த ta
_) த tV
_) த (B t
த்த t:a
த்த (B t:
த்த ttV
த்த (B tt

ந na
ந nV
ந (B n

ன na
ன nV
ன (B n

ப ba
ப bV
ப (B b
_) ப pa
_) ப pV
_) ப (B p
ப்ப p:a
ப்ப p:V
ப்ப (B p:
ஃ) ப fa
ஃ) ப (B f

ம ma
ம mV
ம (B m

ய ja
ய jV
ய (B j

ர ra
ர rV
ர (B r

ற Ra
ற RV
ற (B R
ற் (ற t. // RR -> t.R

ல la
ல lV
ல (B l

ள l.a
ள l.V
ள (B l.

ழ z.a
ழ z.V
ழ (B z.

வ va
வ vV
வ (B v

ஶ Sa
ஶ SV
ஶ (B S

ஷ s.a
ஷ s.V
ஷ (B s.

ஸ sa
ஸ sV
ஸ (B s

ஹ ha
ஹ hV
ஹ (B h


@@ -176,4 +176,4 @@
ௗ : // aU length mark

.group
$ dola
$ dolV

+ 1
- 1
phsource/compile_report View File

@@ -18,7 +18,7 @@
fr 44 124
fr_ca 11 124
hi 51 135
ta 16 138
ta 17 138
hu 23 114
nl 28 121
pl 15 109

+ 7
- 2
phsource/ph_tamil View File

@@ -77,7 +77,7 @@ endphoneme

phoneme U
vowel starttype (u) endtype (u)
length 150
length 130
formants vowel/u#_3
endphoneme

@@ -89,7 +89,7 @@ endphoneme

phoneme U:
vowel starttype (u) endtype (u)
length 270
length 240
formants vowel/u#_3
endphoneme

@@ -113,3 +113,8 @@ phoneme v
switchvoicing f
endphoneme

phoneme : // Lengthen previous vowel by "length"
virtual
length 50
endphoneme


+ 1
- 0
src/compiledict.cpp View File

@@ -103,6 +103,7 @@ MNEM_TAB mnem_flags[] = {
{"$verbextend",0x28}, /* extend influence of 'verb follows' */
{"$capital", 0x29}, /* use this pronunciation if initial letter is upper case */
{"$allcaps", 0x2a}, /* use this pronunciation if initial letter is upper case */
{"$accent", 0x2b}, // character name is base-character name + accent name

// doesn't set dictionary_flags
{"$?", 100}, // conditional rule, followed by byte giving the condition number

+ 14
- 3
src/dictionary.cpp View File

@@ -2390,15 +2390,15 @@ int Translator::TranslateRules(char *p_start, char *phonemes, int ph_size, char
// no group for this letter, use default group
MatchRule(&p, "", groups1[0], &match1, word_flags, dict_flags);

if(match1.points == 0)
if((match1.points == 0) && ((option_sayas & 0x10) == 0))
{
// no match, try removing the accent and re-translating the word
n = utf8_in(&letter,p-1,0)-1;
if((letter >= 0xc0) && (letter <= 0x241))
if((letter >= 0xc0) && (letter <= 0x241) && ((ix = remove_accent[letter-0xc0]) != 0))
{
// within range of the remove_accent table
p2 = p-1;
p[-1] = remove_accent[letter-0xc0];
p[-1] = ix;
while((p[0] = p[n]) != ' ') p++;
while(n-- > 0) *p++ = ' '; // replacement character must be no longer than original

@@ -3020,6 +3020,16 @@ int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags

found = LookupDict2(word, word1, ph_out, flags, end_flags, wtab);

if((found == 0) && (flags[1] & FLAG_ACCENT))
{
int letter;
word2 = word;
if(*word2 == '_') word2++;
len = utf8_in(&letter, word2, 0);
LookupAccentedLetter(letter, ph_out);
found = word2 + len;
}

if(found == 0)
{
ph_out[0] = 0;
@@ -3086,6 +3096,7 @@ int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags
int Translator::Lookup(const char *word, char *ph_out)
{//===================================================
unsigned int flags[2];
flags[0] = flags[1] = 0;
char *word1 = (char *)word;
return(LookupDictList(&word1, ph_out, flags, 0, NULL));
}

+ 298
- 1
src/numbers.cpp View File

@@ -36,6 +36,290 @@



#define M_NAME 0
#define M_ACUTE 1
#define M_BREVE 2
#define M_CARON 3
#define M_CEDILLA 4
#define M_CIRCUMFLEX 5
#define M_DIAERESIS 6
#define M_DOUBLE_ACUTE 7
#define M_DOT_ABOVE 8
#define M_GRAVE 9
#define M_MACRON 10
#define M_OGONEK 11
#define M_RING 12
#define M_STROKE 13
#define M_TILDE 14

#define M_MIDDLE_DOT 8 // duplicate of M_DOT_ABOVE

typedef struct {
char *name;
int flags;
} ACCENTS;

// these are tokens to look up in the *_list file.
ACCENTS accents_tab[] = {
{"_lig", 1},
{"_acu", 0}, // acute
{"_brv", 0}, // breve
{"_hac", 0}, // caron/hacek
{"_ced", 0}, // cedilla
{"_cir", 0}, // circumflex
{"_dia", 0}, // diaeresis
{"_ac2", 0}, // double acute
{"_dot", 0}, // dot
{"_grv", 0}, // grave
{"_mcn", 0}, // macron
{"_ogo", 0}, // ogonek
{"_rng", 0}, // ring
{"_stk", 0}, // stroke
{"_tld", 0}, // tilde
};


#define CAPITAL 0
#define LETTER(ch,mod1,mod2) ch+(mod1 << 8)
#define LIGATURE(ch1,ch2,mod1) ch1+(ch2 << 8)+0x8000

// characters U+00e0 to U+017f
const short letter_accents_0e0[] = {
LETTER('a',M_GRAVE,0), // U+00e0
LETTER('a',M_ACUTE,0),
LETTER('a',M_CIRCUMFLEX,0),
LETTER('a',M_TILDE,0),
LETTER('a',M_DIAERESIS,0),
LETTER('a',M_RING,0),
LIGATURE('a','e',0),
LETTER('c',M_CEDILLA,0),
LETTER('e',M_GRAVE,0),
LETTER('e',M_ACUTE,0),
LETTER('e',M_CIRCUMFLEX,0),
LETTER('e',M_DIAERESIS,0),
LETTER('i',M_GRAVE,0),
LETTER('i',M_ACUTE,0),
LETTER('i',M_CIRCUMFLEX,0),
LETTER('i',M_DIAERESIS,0),
LETTER('d',M_NAME,0), // eth // U+00f0
LETTER('n',M_TILDE,0),
LETTER('o',M_GRAVE,0),
LETTER('o',M_ACUTE,0),
LETTER('o',M_CIRCUMFLEX,0),
LETTER('o',M_TILDE,0),
LETTER('o',M_DIAERESIS,0),
0, // division sign
LETTER('o',M_STROKE,0),
LETTER('u',M_GRAVE,0),
LETTER('u',M_ACUTE,0),
LETTER('u',M_CIRCUMFLEX,0),
LETTER('u',M_DIAERESIS,0),
LETTER('y',M_ACUTE,0),
LETTER('t',M_NAME,0), // thorn
LETTER('y',M_DIAERESIS,0),
CAPITAL, // U+0100
LETTER('a',M_MACRON,0),
CAPITAL,
LETTER('a',M_BREVE,0),
CAPITAL,
LETTER('a',M_OGONEK,0),
CAPITAL,
LETTER('c',M_ACUTE,0),
CAPITAL,
LETTER('c',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('c',M_DOT_ABOVE,0),
CAPITAL,
LETTER('c',M_CARON,0),
CAPITAL,
LETTER('d',M_CARON,0),
CAPITAL, // U+0110
LETTER('d',M_STROKE,0),
CAPITAL,
LETTER('e',M_MACRON,0),
CAPITAL,
LETTER('e',M_BREVE,0),
CAPITAL,
LETTER('e',M_DOT_ABOVE,0),
CAPITAL,
LETTER('e',M_OGONEK,0),
CAPITAL,
LETTER('e',M_CARON,0),
CAPITAL,
LETTER('g',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('g',M_BREVE,0),
CAPITAL, // U+0120
LETTER('g',M_DOT_ABOVE,0),
CAPITAL,
LETTER('g',M_CEDILLA,0),
CAPITAL,
LETTER('h',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('h',M_STROKE,0),
CAPITAL,
LETTER('i',M_TILDE,0),
CAPITAL,
LETTER('i',M_MACRON,0),
CAPITAL,
LETTER('i',M_BREVE,0),
CAPITAL,
LETTER('i',M_OGONEK,0),
CAPITAL, // U+0130
LETTER('i',M_NAME,0), // dotless i
CAPITAL,
LIGATURE('i','j',0),
CAPITAL,
LETTER('j',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('k',M_CEDILLA,0),
LETTER('k',M_NAME,0), // kra
CAPITAL,
LETTER('l',M_ACUTE,0),
CAPITAL,
LETTER('l',M_CEDILLA,0),
CAPITAL,
LETTER('1',M_CARON,0),
CAPITAL,
LETTER('1',M_MIDDLE_DOT,0), // U+0140
CAPITAL,
LETTER('1',M_STROKE,0),
CAPITAL,
LETTER('n',M_ACUTE,0),
CAPITAL,
LETTER('n',M_CEDILLA,0),
CAPITAL,
LETTER('n',M_CARON,0),
LETTER('n',M_NAME,0), // apostrophe n
CAPITAL,
LETTER('n',M_NAME,0), // eng
CAPITAL,
LETTER('o',M_MACRON,0),
CAPITAL,
LETTER('o',M_BREVE,0),
CAPITAL, // U+0150
LETTER('o',M_DOUBLE_ACUTE,0),
CAPITAL,
LIGATURE('o','e',0),
CAPITAL,
LETTER('r',M_ACUTE,0),
CAPITAL,
LETTER('r',M_CEDILLA,0),
CAPITAL,
LETTER('r',M_CARON,0),
CAPITAL,
LETTER('s',M_ACUTE,0),
CAPITAL,
LETTER('s',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('s',M_CEDILLA,0),
CAPITAL, // U+0160
LETTER('s',M_CARON,0),
CAPITAL,
LETTER('t',M_CEDILLA,0),
CAPITAL,
LETTER('t',M_CARON,0),
CAPITAL,
LETTER('t',M_STROKE,0),
CAPITAL,
LETTER('u',M_TILDE,0),
CAPITAL,
LETTER('u',M_MACRON,0),
CAPITAL,
LETTER('u',M_BREVE,0),
CAPITAL,
LETTER('u',M_RING,0),
CAPITAL, // U+0170
LETTER('u',M_DOUBLE_ACUTE,0),
CAPITAL,
LETTER('u',M_OGONEK,0),
CAPITAL,
LETTER('w',M_CIRCUMFLEX,0),
CAPITAL,
LETTER('y',M_CIRCUMFLEX,0),
CAPITAL, // Y-DIAERESIS
CAPITAL,
LETTER('z',M_ACUTE,0),
CAPITAL,
LETTER('z',M_DOT_ABOVE,0),
CAPITAL,
LETTER('z',M_CARON,0),
LETTER('s',M_NAME,0), // long-s // U+17f
};



int Translator::LookupLetter2(unsigned int letter, char *ph_buf)
{//=============================================================
int len;
char single_letter[10];

single_letter[0] = 0;
single_letter[1] = '_';
len = utf8_out(letter, &single_letter[2]);
single_letter[2+len] = 0;

if(Lookup(&single_letter[1],ph_buf) == 0)
{
single_letter[1] = ' ';
if(Lookup(&single_letter[2],ph_buf) == 0)
{
TranslateRules(&single_letter[2], ph_buf, 20, NULL,0,0);
}
}
return(ph_buf[0]);
}


void Translator::LookupAccentedLetter(unsigned int letter, char *ph_buf)
{//=====================================================================
// lookup the character in the accents table
int accent_data;
int accent1;
int basic_letter;
int letter2=0;
char ph_letter1[30];
char ph_letter2[30];
char ph_accent1[30];

if((letter >= 0xe0) && (letter < 0x17f))
{
accent_data = letter_accents_0e0[letter - 0xe0];

basic_letter = accent_data & 0x7f;
if((accent1 = (accent_data >> 8) & 0x7f) != 0)
{
if(accent_data & 0x8000)
{
letter2 = accent1;
accent1 = 0;
}

if(Lookup(accents_tab[accent1].name, ph_accent1) != 0)
{

if(LookupLetter2(basic_letter, ph_letter1) != 0)
{
if(accent1 == 0)
{
//ligature
LookupLetter2(letter2, ph_letter2);
sprintf(ph_buf,"%s%c%s%s",ph_accent1, phonPAUSE_VSHORT, ph_letter1, ph_letter2);
}
else
{
if(langopts.accents & 1)
sprintf(ph_buf,"%s%c%s", ph_accent1, phonPAUSE_VSHORT, ph_letter1);
else
sprintf(ph_buf,"%s%c%s%c", ph_letter1, phonPAUSE_VSHORT, ph_accent1, phonPAUSE_VSHORT);
}
}
}
}
}
} // end of LookupAccentedLetter



void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
{//=============================================================================
@@ -43,7 +327,9 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
unsigned char *p;
static char single_letter[10] = {0,0};
char ph_stress[2];
char ph_buf3[30];
unsigned int dict_flags[2];
char ph_buf3[40];
char *ptr;

ph_buf1[0] = 0;
len = utf8_out(letter,&single_letter[2]);
@@ -83,6 +369,11 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
single_letter[3+len] = next_byte; // follow by space-space if the end of the word, or space-0x31

single_letter[1] = '_';

// if the $accent flag is set for this letter, use the accents table (below)
dict_flags[1] = 0;
ptr = &single_letter[1];
if(Lookup(&single_letter[1],ph_buf3) == 0)
{
single_letter[1] = ' ';
@@ -92,6 +383,11 @@ void Translator::LookupLetter(unsigned int letter, int next_byte, char *ph_buf1)
}
}

if(ph_buf3[0] == 0)
{
LookupAccentedLetter(letter, ph_buf3);
}

if(ph_buf3[0] == 0)
{
ph_buf1[0] = 0;
@@ -185,6 +481,7 @@ int Translator::TranslateLetter(char *word, char *phonemes, int control, int wor
for(p2 = hexbuf; *p2 != 0; p2++)
{
pbuf += strlen(pbuf);
*pbuf++ = phonPAUSE_VSHORT;
LookupLetter(*p2, 0, pbuf);
}
}

+ 1
- 0
src/readclause.cpp View File

@@ -517,6 +517,7 @@ const char *Translator::LookupSpecial(const char *string, char* text_out)
const char *Translator::LookupCharName(int c)
{//==========================================
// Find the phoneme string (in ascii) to speak the name of character c
// Used for punctuation characters and symbols

int ix;
unsigned int flags[2];

+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.31.19 28.Feb.08";
const char *version_string = "1.31.20 01.Mar.08";
const int version_phdata = 0x013105;

int option_device_number = -1;

+ 3
- 1
src/synthesize.cpp View File

@@ -1146,7 +1146,9 @@ int Generate(PHONEME_LIST *phoneme_list, int *n_ph, int resume)

if(p->newword)
{
last_frame = NULL;
if(translator->langopts.param[LOPT_WORD_MERGE] == 0)
last_frame = NULL;

sourceix = (p->sourceix & 0x7ff) + clause_start_char;

if(p->newword & 4)

+ 2
- 0
src/tr_languages.cpp View File

@@ -663,6 +663,7 @@ SetLengthMods(tr,3); // all equal
tr->langopts.stress_rule = 1;
tr->langopts.stress_flags = 0x10004; // use 'diminished' for unstressed final syllable
tr->letter_bits_offset = OFFSET_TAMIL;
tr->langopts.param[LOPT_WORD_MERGE] = 1; // don't break vowels betwen words

memset(tr->letter_bits,0,sizeof(tr->letter_bits));
SetLetterBitsRange(tr,LETTERGP_A,0x05,0x14); // vowel letters
@@ -1055,6 +1056,7 @@ Translator_Afrikaans::Translator_Afrikaans() : Translator()
SetLetterVowel(this,'y'); // add 'y' to vowels

langopts.numbers = 0x8d1 + NUM_ROMAN;
langopts.accents = 1;
memcpy(stress_lengths,stress_lengths2,sizeof(stress_lengths));
}


+ 15
- 4
src/translate.h View File

@@ -74,6 +74,7 @@
#define FLAG_VERB_EXT 0x100 /* extend the 'verb follows' */
#define FLAG_CAPITAL 0x200 /* pronunciation if initial letter is upper case */
#define FLAG_ALLCAPS 0x400 // only if the word is all capitals
#define FLAG_ACCENT 0x800 // character name is base-character name + accent name



@@ -230,7 +231,7 @@ extern const int param_defaults[N_SPEECH_PARAM];



#define N_LOPTS 14
#define N_LOPTS 15
#define LOPT_DIERESES 1
// 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables
// bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word
@@ -254,9 +255,8 @@ extern const int param_defaults[N_SPEECH_PARAM];
// increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels
#define LOPT_SONORANT_MIN 7

// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
// bit 1=also after a word which ends with a stressed vowel
#define LOPT_IT_DOUBLING 8
// don't break vowels at word boundary
#define LOPT_WORD_MERGE 8

// max. amplitude for vowel at the end of a clause
#define LOPT_MAXAMP_EOC 9
@@ -277,6 +277,11 @@ extern const int param_defaults[N_SPEECH_PARAM];
// stressed syllable is indicated by capitals
#define LOPT_SYLLABLE_CAPS 13

// bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
// bit 1=also after a word which ends with a stressed vowel
#define LOPT_IT_DOUBLING 14



typedef struct {
// bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme)
@@ -341,6 +346,10 @@ typedef struct {
int max_roman;
int thousands_sep;
int decimal_sep;

// bit 0, accent name before the letter name
int accents;

int tone_language; // 1=tone language
int intonation_group;
int long_stop; // extra mS pause for a lengthened stop
@@ -430,6 +439,8 @@ private:
const char *LookupSpecial(const char *string, char *text_out);
const char *LookupCharName(int c);
void LookupLetter(unsigned int letter, int next_byte, char *ph_buf);
int LookupLetter2(unsigned int letter, char *ph_buf);
void LookupAccentedLetter(unsigned int letter, char *ph_buf);
int LookupNum2(int value, int control, char *ph_out);
int LookupNum3(int value, char *ph_out, int suppress_null, int thousandplex, int prev_thousands);
int LookupThousands(int value, int thousandplex, char *ph_out);

Loading…
Cancel
Save