Browse Source

Belarusian: improvements

- fixed/removed not working rules in be_list
- added stress to the words in be_list
- fixed multi thousand transcription
- removed not working rules in be_rules
- added rules of palatalization, phonemes lengthen
- fixed dropping of [a] at the end of words
- fixed message "Full dictionary is not installed for"
- added configuration in tr_languages.c
- fixed/added phonemes for `Q`, `ts`, `ts;`, `dz`, `dz.`, `;` etc
master
Sergei B 2 years ago
parent
commit
3199d7889b
6 changed files with 132 additions and 100 deletions
  1. 67
    68
      dictsource/be_list
  2. 12
    22
      dictsource/be_rules
  3. 1
    2
      espeak-ng-data/lang/zle/be
  4. 22
    8
      phsource/ph_belarusian
  5. 2
    0
      phsource/phonemes
  6. 28
    0
      src/libespeak-ng/tr_languages.c

+ 67
- 68
dictsource/be_list View File

@@ -2,80 +2,79 @@
// Spelling-to-phoneme words for Belarusian

// Letter names
а %a
_a a
_б bE
в vE
г QE
д dE
е jE
ё jO
ж z.E
з zE
і %i
_і i
_й i||n;Esklad'OvajE
к ka
л El
м Em
н En
о O
п pE
р ER
с Es
т tE
у u
_ў u||n;Esklad'OvajE
ф Ef
х xa
ц tsE
ч ts.E
ш s.a
ы i"
ь m;'ak;:i||zn'ak
э E
ю ju
я ja
' ap'OstRaf
а a $u
б bE $u
в vE $u
г QE $u
д dE $u
е jE $u
ё jO $u
ж z.E $u
з z $u
і i $u
й i||n;Esklad'OvajE
к ka $u
л El $u
м Em $u
н En $u
о O $u
п pE $u
р Er $u
с Es $u
т tE $u
у u $u
ў u||n;Esklad'OvajE
ф Ef $u
х xa $u
ц tsE $u
ч ts.E $u
ш s.a $u
ы i" $u
ь m;'ak;:i||znak
э E $u
ю ju $u
я ja $u

// Numbers
_0 nul;
_1 ad;in
_2 dva
_3 tRi"
_4 ts.ati"Ri"
_5 p;ats;
_6 s.Es;ts;
_7 s;Em
_0 n'ul;
_1 adz;'in
_1f adna
_2 dv'a
_2f dz;v;'E
_3 tr'i"
_4 ts.at'i"ri"
_5 p;'ats;
_6 s.'Es;ts;
_7 s;'Em
_8 v'Os;Em
_9 dz;Ev;ats;
_10 dz;Es;ats;
_11 adz;inats:ats;
_12 dvanats:ats;
_13 tRi"nats:ats;
_14 ts.ati"Rnats:ats;
_15 p;atnats:ats;
_16 s.asnats:ats;
_17 s;amnats:ats;
_18 vas;amnats:ats;
_19 dz;Ev;atnats:ats;
_2X dvats:ats;
_3X tRi"ts:ats;
_4X s'ORak
_5X p;adz;:Es;at
_6X s.Ezdz;:Es;at
_7X s;Emdz;Es;at
_8X v'Os;Emdz;Es;at
_9X dz;Ev;an'Osta
_9 dz;'Ev;ats;
_10 dz;'Es;ats;
_11 adz;'inats:ats;
_12 dvan'ats:ats;
_13 tri"n'ats:ats;
_14 ts.at'i"rnats:ats;
_15 p;atn'ats:ats;
_16 s.asn'ats:ats;
_17 s;amn'ats:ats;
_18 vas;amn'ats:ats;
_19 dz;Ev;atn'ats:ats;
_2X dv'ats:ats;||
_3X tr'i"ts:ats;||
_4X s'Orak||
_5X p;adz;:Es;'at||
_6X s.Ezdz;:Es;'at||
_7X s;'Emdz;Es;at||
_8X v'Os;Emdz;Es;at||
_9X dz;Ev;an'Osta||
_0C st'O
_2C dz;v;Es;ts;E
_3C tRi"sta
_4C ts.ati"Ri"sta
_2C dz;v;'Es;ts;E
_3C tr'i"sta
_4C ts.at'i"ri"sta
_5C p;ats;s'Ot
_6C s.Es;ts;s'Ot
_7C s;Ems'Ot
_8C v,Os;Ems'Ot
_9C dz;Ev;ats;s'Ot
_1MA1 ti"s;ats.a
_0MA1 ti"s;ats.i"
_0M1 ti"s;ats.
_0MB1 t'i"s;ats.a
_0MA1 t'i"s;ats.i"
_0M1 t'i"s;ats. // NOTE: in 10k > appends `_!`

+ 12
- 22
dictsource/be_rules View File

@@ -1,16 +1,11 @@
// This file is UTF8 encoded
// Translation rules for Belarusian

// NOTE: LETTERGP_A doesn't work, L01 as replacement
//letter which can carry vowel phoneme
.L01 а е ё і о у ы э ю я
//letter which can carry consonant phoneme
.L02 б в г д ж з й к л м н нн п р с сс т ф х ц ч ш
//letter which can carry forward sibilant (like с)
//.L03 з дз с ц
//letter which can carry backward sibilant (like ш)
//.L04 ж дж ш ч

// This is more economic way to express assimilation by sibilation than using L03 and L04

// This is more economic way to express assimilation by sibilation
.replace
сш шш
сч шч
@@ -36,10 +31,9 @@
г Q // ɣ

.group д
д d[
дз dz // d̻͡z̪
ддз dz:
дж dz. // ɖ͡ʐ
д d
дз dz
дж dz.

.group е
е ;E // ʲɛ
@@ -49,7 +43,7 @@
L01) е jE

.group ё
ё ;'O // ʲɔ
ё ;'O // ʲ'ɔ
_) ё j'O
') ё j'O
ь) ё j'O
@@ -57,14 +51,13 @@

.group ж
ж z. // ʐ
жж z.:

.group з
з z

.group і
і i
L02) і ;i
C) і ;i

.group й
й j
@@ -74,14 +67,12 @@

.group л
л l
лл l:

.group м
м m

.group н
н n
нн n:

.group о
о 'O
@@ -90,14 +81,13 @@
п p

.group р
р R
р r

.group с
с s
сс s:

.group т
т t[
т t

.group у
у u
@@ -112,7 +102,7 @@
х x

.group ц
ц ts // t̻͡s̪
ц ts // t͡s
цц ts:

.group ч
@@ -124,7 +114,7 @@
шш s.:

.group ы
ы i" // (ɨ
ы i" // ɨ

.group ь
ь ; // ʲ

+ 1
- 2
espeak-ng-data/lang/zle/be View File

@@ -1,5 +1,4 @@
name Belarusian
language be
replace 03 a a#
dict_min 20000
dict_min 2000
speed 95

+ 22
- 8
phsource/ph_belarusian View File

@@ -16,16 +16,21 @@ phoneme i"
FMT(vowel/ii#_2)
endphoneme

phoneme Q
import_phoneme base1/Q"
ipa ɣ
endphoneme

phoneme ts
CALL consonants/ts
voicingswitch dz
ipa t̻͡s̪
ipa t͡s
endphoneme

phoneme ts;
CALL base1/tS;
voicingswitch dz;
ipa t̻͡s̪ʲ
ipa t͡sʲ
endphoneme

phoneme ts.
@@ -40,13 +45,14 @@ endphoneme
phoneme dz
CALL consonants/dz
voicingswitch ts
ipa d̻͡z̪
ipa d͡z
endphoneme

phoneme dz;
import_phoneme pl/dz;
CALL base1/dZ;
voicingswitch ts;
ipa d̻͡z̪ʲ
ipa d͡zʲ
endphoneme

phoneme dz.
@@ -56,10 +62,18 @@ phoneme dz.
voicingswitch ts.
Vowelin f1=2 f2=2300 200 400 f3=100 80
Vowelout f1=2 f2=2300 250 300 f3=100 80 brk
endphoneme

IF PreVoicing THEN
FMT(dzh/xdzh)
ENDIF
phoneme r
liquid trl
lengthmod 6
ipa r
length 80
FMT(r3/r_trill2) addWav(r3/r_trill2.wav, 65)
endphoneme

FMT(dzh/dzh) addWav(ufric/sh_pzd2)
phoneme ;
liquid pzd
lengthmod 0
ipa ʲ
endphoneme

+ 2
- 0
phsource/phonemes View File

@@ -2049,3 +2049,5 @@ include ph_uzbek

phonemetable qdb en
include ph_langbelta

phonemetable ms id

+ 28
- 0
src/libespeak-ng/tr_languages.c View File

@@ -538,6 +538,34 @@ Translator *SelectTranslator(const char *name)
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
SetArabicLetters(tr);
break;
case L('b', 'e'): // Belarusian
{
static const unsigned char stress_amps_be[8] = { 12, 10, 8, 8, 0, 0, 16, 17 };
static const short stress_lengths_be[8] = { 160, 140, 200, 140, 0, 0, 240, 160 };
static wchar_t vowels_be[] = { // offset by 0x420 -- а е ё о у ы э ю я і
0x10, 0x15, 0x31, 0x1e, 0x23, 0x2b, 0x2d, 0x2e, 0x2f, 0x36, 0
};
static const unsigned char consonants_be[] = { // б в г д ж з й к л м н п р с т ф х ц ч ш ў
0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x27, 0x28, 0x3e, 0
};

tr->langopts.stress_flags = S_NO_AUTO_2 | S_NO_DIM; // don't use secondary stress
tr->letter_bits_offset = OFFSET_CYRILLIC;
tr->transpose_min = 0x430; // convert cyrillic from unicode into range 0x01 to 0x2f
tr->transpose_max = 0x45e;
memset(tr->letter_bits, 0, sizeof(tr->letter_bits));
SetLetterBits(tr, LETTERGP_A, (char *)vowels_be);
SetLetterBits(tr, LETTERGP_C, (char *)consonants_be);
SetLetterBits(tr, LETTERGP_VOWEL2, (char *)vowels_be);

SetupTranslator(tr, stress_lengths_be, stress_amps_be);
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_5;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED;
tr->langopts.numbers2 = NUM2_THOUSANDPLEX_VAR_THOUSANDS | NUM2_THOUSANDS_VAR1; // variant numbers before thousands
}
break;
case L('b', 'g'): // Bulgarian
{
SetCyrillicLetters(tr);

Loading…
Cancel
Save