Browse Source

Belarusian: improvements (#1417)

master
Alexander Epaneshnikov 2 years ago
parent
commit
0efae53f91
No account linked to committer's email address

+ 1
- 1
ChangeLog.md View File

@@ -11,7 +11,7 @@ The espeak-ng project is a fork of the espeak project.

updated languages:
* ba (Bashkir) -- Andiv06
* be (Belarusian) -- Andiv06
* be (Belarusian) -- Andiv06, bespsm, Alaksiej Stankievič
* cmn (Mandarin) -- Cameron Wong
* en (English) -- Bill Dengler
* es (Spanish) -- Sukil Etxenike

+ 74
- 73
dictsource/be_list View File

@@ -2,78 +2,79 @@
// Spelling-to-phoneme words for Belarusian

// Letter names
а a
б bE
в vE
г QE
д dE
е jE
ё jO
ж z.E
з zE
і i
й i||n;EskladOvajE
к ka
л El
м Em
н En
о O
п pE
р Er
с Es
т tE
у u
ў u||n;EskladOvajE
ф Ef
х xa
ц tsE
ч ts.E
ш s.a
ы i"
ь m;ak;:i||znak
э E
ю ju
я ja
' apOstraf
а a $u
б bE $u
в vE $u
г QE $u
д dE $u
е jE $u
ё jO $u
ж z.E $u
з z $u
і i $u
й i||n;Esklad'OvajE
к ka $u
л El $u
м Em $u
н En $u
о O $u
п pE $u
р Er $u
с Es $u
т tE $u
у u $u
ў u||n;Esklad'OvajE
ф Ef $u
х xa $u
ц tsE $u
ч ts.E $u
ш s.a $u
ы i" $u
ь m;'ak;:i||znak
э E $u
ю ju $u
я ja $u

// Numbers
_0 nul;
_1 ad;in
_2 dva
_3 tri"
_4 ts.ati"ri"
_5 p;at;
_6 s.Es;t;
_7 s;Em
_8 vOs;E2m
_9 d;Ev;at;
_10 d;Es;at;
_11 ad;inats:at;
_12 dvanats:at;
_13 tri"nats:at;
_14 ts.ati"rnats:at;
_15 p;atnats:at;
_16 s.asnats:at;
_17 s;amnats:at;
_18 vas;amnats:at;
_19 d;Ev;atnats:at;
_2X dvats:at;
_3X tri"ts:at;
_4X sorak
_5X p;adz;:Es;at
_6X s.Ezdz;:Es;jat
_7X s;Emdz;Es;jat
_8X vos;Emdz;Es;at
_9X d;Ev;anosta
_0C stO
_2C d;v;Es;t;E
_3C tri"sta
_4C ts.ati"ri"sta
_5C p;at;sot
_6C s.Es;t;sot
_7C s;jEmsot
_8C vos;jEmsot
_9C d;Ev;at;sot
_1MA1 ti"s;ats.a
_0MA1 ti"s;ats.i"
_0M1 ti"s;ats.
_0 n'ul;
_1 adz;'in
_1f adna
_2 dv'a
_2f dz;v;'E
_3 tr'i"
_4 ts.at'i"ri"
_5 p;'ats;
_6 s.'Es;ts;
_7 s;'Em
_8 v'Os;Em
_9 dz;'Ev;ats;
_10 dz;'Es;ats;
_11 adz;'inats:ats;
_12 dvan'ats:ats;
_13 tri"n'ats:ats;
_14 ts.at'i"rnats:ats;
_15 p;atn'ats:ats;
_16 s.asn'ats:ats;
_17 s;amn'ats:ats;
_18 vas;amn'ats:ats;
_19 dz;Ev;atn'ats:ats;
_2X dv'ats:ats;||
_3X tr'i"ts:ats;||
_4X s'Orak||
_5X p;adz;:Es;'at||
_6X s.Ezdz;:Es;'at||
_7X s;'Emdz;Es;at||
_8X v'Os;Emdz;Es;at||
_9X dz;Ev;an'Osta||
_0C st'O
_2C dz;v;'Es;ts;E
_3C tr'i"sta
_4C ts.at'i"ri"sta
_5C p;ats;s'Ot
_6C s.Es;ts;s'Ot
_7C s;Ems'Ot
_8C v,Os;Ems'Ot
_9C dz;Ev;ats;s'Ot
_0MB1 t'i"s;ats.a
_0MA1 t'i"s;ats.i"
_0M1 t'i"s;ats. // NOTE: in 10k > appends `_!`

+ 44
- 5
dictsource/be_rules View File

@@ -1,6 +1,23 @@
// This file is UTF8 encoded
// Translation rules for Belarusian

// NOTE: LETTERGP_A doesn't work, L01 as replacement
//letter which can carry vowel phoneme
.L01 а е ё і о у ы э ю я

// This is more economic way to express assimilation by sibilation
.replace
сш шш
сч шч
цч чч
шс сс
шц сц
чц цц
зж жж
здж ждж
жз зз
ждз здз

.group а
а a

@@ -15,12 +32,22 @@

.group д
д d
дз dz
дж dz.

.group е
е ;E // ʲɛ
_) е jE
') е jE
ь) е jE
L01) е jE

.group ё
ё ;O // ʲɔ
ё ;'O // ʲ'ɔ
_) ё j'O
') ё j'O
ь) ё j'O
L01) ё j'O

.group ж
ж z. // ʐ
@@ -30,6 +57,7 @@

.group і
і i
C) і ;i

.group й
й j
@@ -47,7 +75,7 @@
н n

.group о
о o
о 'O

.group п
п p
@@ -74,16 +102,19 @@
х x

.group ц
ц ts // t̻͡s
ц ts // t͡s
цц ts:

.group ч
ч ts. // ʈ͡ʂ
чч ts.:

.group ш
ш s. // ʂ
шш s.:

.group ы
ы i" // (ɨ
ы i" // ɨ

.group ь
ь ; // ʲ
@@ -93,9 +124,17 @@

.group ю
ю ;u // ʲu
_) ю ju
') ю ju
ь) ю ju
L01) ю ju

.group я
я ;a // ʲa
_) я ja
') я ja
ь) я ja
L01) я ja

.group '
' j
'

+ 1
- 2
espeak-ng-data/lang/zle/be View File

@@ -1,5 +1,4 @@
name Belarusian
language be
replace 03 a a#
dict_min 20000
dict_min 2000
speed 95

+ 48
- 18
phsource/ph_belarusian View File

@@ -2,48 +2,78 @@
// Belarusian
//====================================================


phoneme a
import_phoneme ru/a
vwl starttype #a endtype #a
ipa a
length 180
FMT(vowel/a)
endphoneme

phoneme O
vwl starttype #o endtype #o
ipa ɔ
length 100
FMT(vwl_ru/8)
phoneme i"
vwl starttype #i endtype #i
ipa ɨ
length 180
FMT(vowel/ii#_2)
endphoneme

phoneme i"
import_phoneme pl/y
phoneme Q
import_phoneme base1/Q"
ipa ɣ
endphoneme

phoneme ts
import_phoneme consonants/ts
ipa t̻͡s
CALL consonants/ts
voicingswitch dz
ipa t͡s
endphoneme

phoneme ts;
import_phoneme pl/ts;
ipa t̻͡s̪ʲ
CALL base1/tS;
voicingswitch dz;
ipa t͡sʲ
endphoneme

phoneme ts.
import_phoneme sr/tS
vls pla afr sib
ipa ʈ͡ʂ
lengthmod 2
voicingswitch dz.
Vowelin f1=0 f2=2300 200 400 f3=-100 80
WAV(ustop/tsh_sr, 50)
endphoneme

phoneme dz
import_phoneme consonants/dz
ipa d̻͡z̪
CALL consonants/dz
voicingswitch ts
ipa d͡z
endphoneme

phoneme dz;
import_phoneme pl/dz;
ipa d̻͡z̪ʲ
CALL base1/dZ;
voicingswitch ts;
ipa d͡zʲ
endphoneme

phoneme dz.
import_phoneme sr/dZ
vcd pla afr sib
ipa ɖ͡ʐ
lengthmod 5
voicingswitch ts.
Vowelin f1=2 f2=2300 200 400 f3=100 80
Vowelout f1=2 f2=2300 250 300 f3=100 80 brk
endphoneme

phoneme r
liquid trl
lengthmod 6
ipa r
length 80
FMT(r3/r_trill2) addWav(r3/r_trill2.wav, 65)
endphoneme

phoneme ;
liquid pzd
lengthmod 0
ipa ʲ
endphoneme

+ 3
- 3
phsource/phonemes View File

@@ -1819,6 +1819,9 @@ include ph_yue

// *** Tables extending base2 ***

phonemetable be base2
include ph_belarusian

phonemetable chr base2
include ph_cherokee

@@ -2047,7 +2050,4 @@ include ph_uzbek
phonemetable qdb en
include ph_langbelta

phonemetable be ru
include ph_belarusian

phonemetable ms id

+ 28
- 0
src/libespeak-ng/tr_languages.c View File

@@ -538,6 +538,34 @@ Translator *SelectTranslator(const char *name)
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
SetArabicLetters(tr);
break;
case L('b', 'e'): // Belarusian
{
static const unsigned char stress_amps_be[8] = { 12, 10, 8, 8, 0, 0, 16, 17 };
static const short stress_lengths_be[8] = { 160, 140, 200, 140, 0, 0, 240, 160 };
static wchar_t vowels_be[] = { // offset by 0x420 -- а е ё о у ы э ю я і
0x10, 0x15, 0x31, 0x1e, 0x23, 0x2b, 0x2d, 0x2e, 0x2f, 0x36, 0
};
static const unsigned char consonants_be[] = { // б в г д ж з й к л м н п р с т ф х ц ч ш ў
0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x27, 0x28, 0x3e, 0
};

tr->langopts.stress_flags = S_NO_AUTO_2 | S_NO_DIM; // don't use secondary stress
tr->letter_bits_offset = OFFSET_CYRILLIC;
tr->transpose_min = 0x430; // convert cyrillic from unicode into range 0x01 to 0x2f
tr->transpose_max = 0x45e;
memset(tr->letter_bits, 0, sizeof(tr->letter_bits));
SetLetterBits(tr, LETTERGP_A, (char *)vowels_be);
SetLetterBits(tr, LETTERGP_C, (char *)consonants_be);
SetLetterBits(tr, LETTERGP_VOWEL2, (char *)vowels_be);

SetupTranslator(tr, stress_lengths_be, stress_amps_be);
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED;
tr->langopts.numbers2 = NUM2_THOUSANDPLEX_VAR_THOUSANDS | NUM2_THOUSANDS_VAR1; // variant numbers before thousands
}
break;
case L('b', 'g'): // Bulgarian
{
SetCyrillicLetters(tr);

+ 8
- 0
tests/language-numbers-cardinal.test View File

@@ -4,6 +4,14 @@

# European Style (hundred, thousand, -illion/-illiard)

echo "... be (Belarusian)" # short scale

test_phon be "n'ul; adz;'in dv'a tr'i\" ts.at'i\"ri\" p;'ats; s.'Es;ts; s;'Em v'Os;Em dz;'Ev;ats;" "0 1 2 3 4 5 6 7 8 9" "units"
test_phon be "dz;'Es;ats; adz;'inats:ats; dvan'ats:ats; tri\"n'ats:ats; ts.at'i\"rnats:ats; p;atn'ats:ats; s.asn'ats:ats; s;amn'ats:ats; vas;amn'ats:ats; dz;Ev;atn'ats:ats;" "10 11 12 13 14 15 16 17 18 19" "teens"
test_phon be "dv'ats:ats; adz;'in tr'i\"ts:ats; s'Orak p;adz;:Es;'at s.Ezdz;:Es;'at s;'Emdz;Es;at v'Os;Emdz;Es;at dz;Ev;an'Osta" "21 30 40 50 60 70 80 90" "tens"
test_phon be "st'O adz;'inats:ats; dz;v;'Es;ts;E dv'ats:ats; dv'a tr'i\"sta ts.at'i\"ri\"sta p;ats;s'Ot s.Es;ts;s'Ot s;Ems'Ot v,Os;Ems'Ot dz;Ev;ats;s'Ot" "111 222 300 400 500 600 700 800 900" "hundreds"
test_phon be "adna t'i\"s;ats.a dz;v;'Es;ts;E tr'i\"ts:ats; ts.at'i\"ri\" dz;v;'E t'i\"s;ats.i\" tr'i\"sta s'Orak tr'i\" t'i\"s;ats.i\" ts.at'i\"ri\"sta ts.at'i\"ri\" t'i\"s;ats.i\" p;'ats; t'i\"s;ats. dz;'Es;ats; t'i\"s;ats. dz;'Es;ats; t'i\"s;ats._! adz;'in" "1234 2340 3400 4000 5000 10000 10001" "thousands"

echo "... en (English)" # short scale

test_phon en "z'i@roU w'0n t'u: Tr'i: f'o@ f'aIv s'Iks s'Ev@n 'eIt n'aIn" "0 1 2 3 4 5 6 7 8 9" "units"

+ 1
- 1
tests/language-phonemes.test View File

@@ -22,7 +22,7 @@ test_phwav ar b102bcd61f4a81e22cb7a398691d2703fd5ace46 "ma na pa ta ka qa ?a ba
test_phwav as 05d4cca91fc3447ae8b6acd7892790dd364e8e23 "ma na Na pa ta ka p#a t#a k#a ba da ga b#a d#a g#a sa xa Xa ha tSa tS#a za wa ra ja la _:_ ma mi mu me m& mo mO mV ma~ mi~ mu~ me~ m&~ mo~ mO~ moj mo~j mow mo~w mew maV m@"
test_phwav az 7bdb78c37433a47fb3b15808071b1a916202400c "ma na pa ba ta da tSa dZa tsa dza ca Ja Ca ka ga fa va sa za Sa Za xa Qa ha la ja wa Ra *a _:_ mi me m& my mW mu mo ma m@"
test_phwav ba cf21e5edc1227d3483f49305fbb39e201e5a7f97 "ma na pa ba ta da tSa dZa tsa dza ca Ja Ca ka ga fa va sa za Sa Za xa Qa ha la ja wa Ra *a _:_ mi me m& my mW mu mo ma m@"
test_phwav be 88cd154560ce202e65aa281ac7b62454806c7c6c "a O i\" tse ts;e ts.i dzi dz;o dz.u"
test_phwav be 46e35280b05300c9b7ed2a6dd0434c299689b673 "a i\" Q;E Qu tsO ts;E ts:E ts.u ts.O ts.;E ts.:a dzO dz;a dz.O dz.u"
test_phwav bg 3020acb23ac0f93d5479de3b305b71fc07b5e738 "ma na n^a Na m;a pa ba ta da ka ga p;a b;a t;a d;a ca Ja tsa dza tSa dZa ts;a dz;a fa va sa za Sa Za xa Qa f;a v;a s;a z;a x;a Ra R;a wa ja l/2a la l^a _:_ mi me m@ ma mo mu"
test_phwav bn 7fc6c8c6b347677885c6e973ff4dc6b2b6fa6d59 "ma na Na pa ta t.a tSa ka p#a t#a t.#a tS#a k#a ba da d.a dZa ga b#a d#a d.#a g#a fa Ba sa za Sa Za ha Ha wa la ja ra *a Ra _:_ ma mE mO me mi mi mu m& mV"
test_phwav bpy 7fc6c8c6b347677885c6e973ff4dc6b2b6fa6d59 "ma na Na pa ta t.a tSa ka p#a t#a t.#a tS#a k#a ba da d.a dZa ga b#a d#a d.#a g#a fa Ba sa za Sa Za ha Ha wa la ja ra *a Ra _:_ ma mE mO me mi mi mu m& mV"

+ 5
- 0
tests/language-pronunciation.test View File

@@ -19,6 +19,11 @@ X'ihO~t,Or b'ibek 'a*u b'udd#i 'atS#e 'a*u X'ihO~t,e p'OrOXp,Or b#atr'itbO*,e 'a
test_phon az "byts'yn insanL'aR l&jag'&t v& hygugLa*@n'a J'W*& az'ad b&*ab'&R do:uLuRL'aR
ona*'@n Syu*aLR'@ v& vidZdanLa*'@ v'aR v& b'iRbiRl&*'in& mynasib'&td& gaRdaSL'@x Runhund'a davRanmaL@d@RL'aR" "Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar. Onarın şüuralrı və vicdanları var və bir-birlərinə münasibətdə qardaşlıq runhunda davranmalıdırlar." "Latn"
test_phon ba "m'in h'in 'uL b'eD h'eD 'uLAr b'VL t'eQe" "мин һин ул беҙ һеҙ улар был теге." "Cyrl"
test_phon be "Qr'us.a tsv;'ila ap'Os.n;i Q'Od
'us;E Q'al;ini\" j'ajE
'us;E v;'al;ik;ija ras'Ox;i
d'a ap'Os.n;aQa pr'uts;ika
b'i\"l;i ws'i\"pani\" b'ujni\"m b;'Elaruz.'Ovi\"m tsv;'Etam" "Груша цвіла апошні год. Усе галіны яе, усе вялікія расохі, да апошняга пруціка, былі ўсыпаны буйным бела-ружовым цветам." "Cyrl"
test_phon bg "'ax tS'udna b@Lg'arska z'em;o
pol'uSvaj ts@ft;'aSti: Z'ita" "Ах чудна българска земьо, полюшвай цъфтящи жита." "Cyrl"
test_phon bn "m'alOS,ij 'Obojd#,O Sr'omik,Oder b'i*udd#,e tS'OlOm,an_:_: m'egat#r'i_:_: 'ob#idZ,ane Se d'eSe S'Oto S'Oto 'ob#ib,aSik,e 'at.Ok k'O*etS#,e Se d'eSer 'imigr,eSOn p'uliS

Loading…
Cancel
Save