Browse Source

eu: Updated Basque phonetics and stress rule

master
agonzalezd 3 years ago
parent
commit
2fb981855b
4 changed files with 257 additions and 264 deletions
  1. 20
    10
      dictsource/eu_list
  2. 221
    252
      dictsource/eu_rules
  3. 14
    0
      src/libespeak-ng/dictionary.c
  4. 2
    2
      src/libespeak-ng/tr_languages.c

+ 20
- 10
dictsource/eu_list View File

@@ -160,25 +160,32 @@ ra $u
etara $u

// Abbreviations
adib ad'ibidez $hasdot
adib ad'ibid,ez $hasdot

// proper names and surnames
juan $alt
jorge $1 x'orxe
jose $1 $alt
josé $alt
joaquín $alt
joaquín xoak'in
joaquin xoak'in
julian xulj'an
julián xulj'an
jacinto xaT'into
jiménez xim'eneT
genaro xen'aro
gonzález gonT'aleT
san $u $only $only
santo $u
aristoteles $3
hume hjum $1
humer hjum
google g'uQ@l
googler g'uQ@l
moodle m'uD@l
moodler m'uD@l
doodle d'uD@l
doodler m'uD@l
google g'uQel
googler g'uQel
moodle m'uDel
moodler m'uDel
doodle d'uDel
doodler m'uDel
nietztsche n'itSe
ainara $alt3

@@ -213,8 +220,7 @@ akademi $3
aktibist $3
alajain $alt
album $1
[aldez-alde] 'alDez_'alDe
[aldez-alde] 'alDez_'alDe
(aldez-alde) aldezalde $text
alfanumeriko $4
algebra $1 $alt
aljebraiko $3 $alt
@@ -226,3 +232,7 @@ alkaloide $3
alokutibo $4
alproj $alt
alprojakeri $alt
(bat-batean) bapatean $text
ijito $alt
garaje $alt
jertse xerts'e

+ 221
- 252
dictsource/eu_rules View File

@@ -1,252 +1,221 @@
// This file is UTF8 encoded
// Spelling-to-phoneme rules for Basque
// $w_alt j as [x]
// $w_alt2 don't palatalise l
// $w_alt3 don't palatalise n
.L01 b d g
.L02 e i
.group a
a a
ai aI
au aU
a (_S1 a
ak (_S2 ak
ari (_S3 ari
aren (_S4 a**en
arekin (_S6 a**ekin
arentzat (_S8 a**entzat
arentzako (_S9 arentzako
an (_S2 an
atik (_S4 atik
ara (_S3 a**a
arat (_S4 arat
atatik (_S6 atatik
atara (_S5 ata**a
atarat (_S5 atarat
ako (_S3 ako
atako (_S5 atako
araino (_S6 a**ain^o
ataraino (_S8 ata**an^o
arantz (_S6 a**antz
atarantz (_S8 a**antz
agana (_S5 agana
arengana (_S8 a**engana
arenganaino (_s11 a**enganain^o
arengatik (_S9 a**engatik
atzat (_S5 atzat
.group b
b b
_ez_) b (A p'
.group c
c k
c (L02 T
ch tS
.group d
d d
dd J
A) d (A D // ??
_ez_) d (A t'
.group e
e e
ei eI
ehi eI
l) ehi (A eI
eu eU
ek (_S2 ek
ei (_S2 eI
en (_S2 en
ean (_S3 ean
etan (_S4 etan
etatik (_S6 etatik
etara (_S5 eta**a
erat (_S4 erat
etako (_S5 etako
eko (_S3 eko
etarako (_S7 etarako
etarantz (_S8 eta**antz
etaraino (_S8 eta**aino
engana (_S6 engana
entzat (_S6 entzat
entzako (_S7 entzako
engatik (S7 engatik
ekin (_S4 ekin
ekiko (_S5 ekiko
ez (_S2 ez
etaz (_S4 etaz
.group f
f f
.group g
g g
A) g (A Q // ??
_ez_) g (A k'
gana (_S4 gana
go (_S2 go
.group h
h // silent
zi) h (o x
.group i
i i
K) i (_S1 i
le) i (h j
arraz) i (on j
.group j
j J // try [x] [J] [j] [J^] [Z] [S]
j ($w_alt x
.group k
k k
ko (_S2 ko
.group l
l l
ll l^
_ez_) l (A l'
l ($w_alt2 l2 _afi) l (ia l2
.group m
m m
.group n
n n
n (_S1 n
n ($w_alt3 n2 mi) n (is n2
afi) n (i n2
agluti) n (A n2
alpi) n (A n2
.group o
o o
oi oI
.group p
p p
.group q
q k
.group r
r r
_) r R2
rr R2
A) ri (_S2 ri
ren (_S3 ren
ra (_S2 ra
rat (_S3 rat
rantz (_S5 rantz
raino (_S5 raino
rako (_S4 rako
rentzat (_S7 rentzat
rentzako (_S8 rentzako
rekin (_S5 rekin
rengan (_S6 rengan
rengana (_S7 rengana
rengandik (_S9 rengandik
rengatik (_S8 rengatik
.group s
s s
.group t
t t
ts ts
tt c
tx tS
tz tz
tan (_S3 tan
tara (_S4 tara
tarat (_S5 tarat
tik (_S3 tik
tatik (_S5 tatik
taz (_S3 taz
tu (_S2 tu
tuko (_S4 tuko
tzen (_S4 tzen
ten (_S3 ten
.group u
u u
.group v
v b
.group w
w u
.group x
x S
.group y
y jj
.group z
z z
_e) z (_L01 %z
_e) z (_l
z (_S1 z
.group
á ''a
é ''e
í ''i
ó ''o
ú ''u
ç s
ü y
ñ n^
// This file is UTF8 encoded
// Spelling-to-phoneme rules for Basque
// $w_alt j as [x]
// $w_alt2 don't palatalise l
// $w_alt3 don't palatalise n

.L01 b d g
.L02 e i

.group a
a a
ai aI
au aU
a (_S1 a
ak (_S2 ak
ari (_S3 ari
aren (_S4 a**en
arekin (_S6 a**ekin
arentzat (_S8 a**entzat
arentzako (_S9 arentzako
an (_S2 an
atik (_S4 atik
ara (_S3 a**a
arat (_S4 arat
atatik (_S6 atatik
atara (_S5 ata**a
atarat (_S5 atarat
ako (_S3 ako
atako (_S5 atako
araino (_S6 a**ain^o
ataraino (_S8 ata**an^o
arantz (_S6 a**antz
atarantz (_S8 a**antz
agana (_S5 agana
arengana (_S8 a**engana
arenganaino (_s11 a**enganain^o
arengatik (_S9 a**engatik
atzat (_S5 atzat

.group b
b b
_ez_) b (A p'

.group c
c k
c (L02 T
ch tS

.group d
d d
dd J
A) d (A D // ??
_ez_) d (A t'

.group e
e e
ei eI
ehi eI
l) ehi (A eI
eu eU
ek (_S2 ek
ei (_S2 eI
en (_S2 en
ean (_S3 ean
etan (_S4 etan
etatik (_S6 etatik
etara (_S5 eta**a
erat (_S4 erat
etako (_S5 etako
eko (_S3 eko
etarako (_S7 etarako
etarantz (_S8 eta**antz
etaraino (_S8 eta**aino
engana (_S6 engana
entzat (_S6 entzat
entzako (_S7 entzako
engatik (S7 engatik
ekin (_S4 ekin
ekiko (_S5 ekiko
ez (_S2 ez
etaz (_S4 etaz

.group f
f f

.group g
g g
A) g (A Q // ??
_ez_) g (A k'
gana (_S4 gana
go (_S2 go

.group h
h // silent
zi) h (o x

.group i
i i
K) i (_S1 i
le) i (h j

.group j
j j // try [x] [J] [j] [J^] [Z] [S]
j ($w_alt x

.group k
k k
ko (_S2 ko

.group l
l l
i) lh (A l2
ll l^
_ez_) l (A l'
l ($w_alt2 l2
_afi) l (ia l2
_ki) l (o l2

.group m
m m

.group n
n n
n (_S1 n
n ($w_alt3 n2
i) nh (A n2
mi) n (is n2
afi) n (i n2
agluti) n (A n2
alpi) n (A n2

.group o
o o
oi oI

.group p
p p

.group q
q k
qu (L02 k // que, qui
_) qw (A kw // qwerty


.group r
r r
_) r R2
rr R2
A) ri (_S2 ri
ren (_S3 ren
ra (_S2 ra
rat (_S3 rat
rantz (_S5 rantz
raino (_S5 raino
rako (_S4 rako
rentzat (_S7 rentzat
rentzako (_S8 rentzako
rekin (_S5 rekin
rengan (_S6 rengan
rengana (_S7 rengana
rengandik (_S9 rengandik
rengatik (_S8 rengatik

.group s
s s

.group t
t t
ts ts
tt c
tx tS
tz tz
tan (_S3 tan
tara (_S4 tara
tarat (_S5 tarat
tik (_S3 tik
tatik (_S5 tatik
taz (_S3 taz
tu (_S2 tu
tuko (_S4 tuko
tzen (_S4 tzen
ten (_S3 ten

.group u
u u

.group v
v b

.group w
w u

.group x
x S
_se) x (u ks
_o) x (i ks
_ta) x (i ks
_a) x (iom ks
_fle) x (io ks

.group y
y jj

.group z
z z
_e) z (_L01 %z
_e) z (_l
_e) z (_n
_e) z (_zA tz
z (_S1 z

.group
á ''a
é ''e
í ''i
ó ''o
ú ''u
ç s
ü y
ñ n^


+ 14
- 0
src/libespeak-ng/dictionary.c View File

@@ -1327,6 +1327,20 @@ void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags,
max_stress = STRESS_IS_PRIMARY;
}
break;
case 15: // LANG=eu. If more than 2 syllables: primary stress in second syllable and secondary on last.
if ((stressed_syllable == 0) && (vowel_count > 2)) {
for (ix = 1; ix < vowel_count; ix++) {
vowel_stress[ix] = STRESS_IS_DIMINISHED;
}
stressed_syllable = 2;
if (max_stress == STRESS_IS_DIMINISHED)
vowel_stress[stressed_syllable] = STRESS_IS_PRIMARY;
max_stress = STRESS_IS_PRIMARY;
if (vowel_count > 3) {
vowel_stress[vowel_count - 1] = STRESS_IS_SECONDARY;
}
}
break;
}

if ((stressflags & S_FINAL_VOWEL_UNSTRESSED) && ((control & 2) == 0) && (vowel_count > 2) && (max_stress_input < STRESS_IS_SECONDARY) && (vowel_stress[vowel_count - 1] == STRESS_IS_PRIMARY)) {

+ 2
- 2
src/libespeak-ng/tr_languages.c View File

@@ -762,8 +762,8 @@ Translator *SelectTranslator(const char *name)
static const short stress_lengths_eu[8] = { 200, 200, 200, 200, 0, 0, 210, 230 }; // very weak stress
static const unsigned char stress_amps_eu[8] = { 16, 16, 18, 18, 18, 18, 18, 18 };
SetupTranslator(tr, stress_lengths_eu, stress_amps_eu);
tr->langopts.stress_rule = STRESSPOSN_2L; // ?? second syllable, but not on a word-final vowel
tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED;
tr->langopts.stress_rule = 15;
tr->langopts.stress_flags = S_FINAL_VOWEL_UNSTRESSED | S_MID_DIM;
tr->langopts.param[LOPT_SUFFIX] = 1;
tr->langopts.numbers = NUM_SINGLE_STRESS | NUM_DECIMAL_COMMA | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_OMIT_1_THOUSAND | NUM_VIGESIMAL;
}

Loading…
Cancel
Save