Browse Source

Split the hbs dictionary into bs, hr and sr.

This is because the emoji are language specific.
master
Reece H. Dunn 8 years ago
parent
commit
c4c80b11d5

+ 28
- 14
Makefile.am View File

@@ -416,7 +416,7 @@ update-emoji:
tools/emoji dictsource/en_emoji az ${CLDR_PATH} > dictsource/az_emoji
tools/emoji dictsource/en_emoji bg ${CLDR_PATH} > dictsource/bg_emoji
tools/emoji dictsource/en_emoji bn ${CLDR_PATH} > dictsource/bn_emoji
tools/emoji dictsource/en_emoji bs ${CLDR_PATH} > dictsource/hbs_emoji
tools/emoji dictsource/en_emoji bs ${CLDR_PATH} > dictsource/bs_emoji
tools/emoji dictsource/en_emoji ca ${CLDR_PATH} > dictsource/ca_emoji
tools/emoji dictsource/en_emoji cs ${CLDR_PATH} > dictsource/cs_emoji
tools/emoji dictsource/en_emoji cy ${CLDR_PATH} > dictsource/cy_emoji
@@ -445,6 +445,7 @@ dictionaries: \
espeak-ng-data/bg_dict \
espeak-ng-data/bn_dict \
espeak-ng-data/bpy_dict \
espeak-ng-data/bs_dict \
espeak-ng-data/ca_dict \
espeak-ng-data/cs_dict \
espeak-ng-data/cy_dict \
@@ -463,8 +464,8 @@ dictionaries: \
espeak-ng-data/gd_dict \
espeak-ng-data/grc_dict \
espeak-ng-data/gu_dict \
espeak-ng-data/hbs_dict \
espeak-ng-data/hi_dict \
espeak-ng-data/hr_dict \
espeak-ng-data/hu_dict \
espeak-ng-data/hy_dict \
espeak-ng-data/ia_dict \
@@ -508,6 +509,7 @@ dictionaries: \
espeak-ng-data/sk_dict \
espeak-ng-data/sl_dict \
espeak-ng-data/sq_dict \
espeak-ng-data/sr_dict \
espeak-ng-data/sv_dict \
espeak-ng-data/sw_dict \
espeak-ng-data/ta_dict \
@@ -574,6 +576,12 @@ dictsource/bpy_extra:
espeak-ng-data/bpy_dict: src/espeak-ng phsource/phonemes.stamp dictsource/bpy_list dictsource/bpy_rules dictsource/bpy_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=bpy && cd ..

bs: espeak-ng-data/bs_dict
dictsource/bs_extra:
touch dictsource/bs_extra
espeak-ng-data/bs_dict: src/espeak-ng phsource/phonemes.stamp dictsource/bs_list dictsource/bs_rules dictsource/bs_extra dictsource/bs_emoji
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=bs && cd ..

ca: espeak-ng-data/ca_dict
dictsource/ca_extra:
touch dictsource/ca_extra
@@ -688,18 +696,18 @@ dictsource/gu_extra:
espeak-ng-data/gu_dict: src/espeak-ng phsource/phonemes.stamp dictsource/gu_list dictsource/gu_rules dictsource/gu_extra dictsource/gu_emoji
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=gu && cd ..

hbs: espeak-ng-data/hbs_dict
dictsource/hbs_extra:
touch dictsource/hbs_extra
espeak-ng-data/hbs_dict: src/espeak-ng phsource/phonemes.stamp dictsource/hbs_list dictsource/hbs_rules dictsource/hbs_extra dictsource/hbs_emoji
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=hbs && cd ..

hi: espeak-ng-data/hi_dict
dictsource/hi_extra:
touch dictsource/hi_extra
espeak-ng-data/hi_dict: src/espeak-ng phsource/phonemes.stamp dictsource/hi_list dictsource/hi_rules dictsource/hi_extra dictsource/hi_emoji
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=hi && cd ..

hr: espeak-ng-data/hr_dict
dictsource/hr_extra:
touch dictsource/hr_extra
espeak-ng-data/hr_dict: src/espeak-ng phsource/phonemes.stamp dictsource/hr_list dictsource/hr_rules dictsource/hr_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=hr && cd ..

hu: espeak-ng-data/hu_dict
dictsource/hu_extra:
touch dictsource/hu_extra
@@ -934,18 +942,18 @@ espeak-ng-data/ru_dict: src/espeak-ng phsource/phonemes.stamp dictsource/ru_list
endif
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=ru && cd ..

si: espeak-ng-data/si_dict
dictsource/si_extra:
touch dictsource/si_extra
espeak-ng-data/si_dict: src/espeak-ng phsource/phonemes.stamp dictsource/si_list dictsource/si_rules dictsource/si_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=si && cd ..

sd: espeak-ng-data/sd_dict
dictsource/sd_extra:
touch dictsource/sd_extra
espeak-ng-data/sd_dict: src/espeak-ng phsource/phonemes.stamp dictsource/sd_list dictsource/sd_rules dictsource/sd_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=sd && cd ..

si: espeak-ng-data/si_dict
dictsource/si_extra:
touch dictsource/si_extra
espeak-ng-data/si_dict: src/espeak-ng phsource/phonemes.stamp dictsource/si_list dictsource/si_rules dictsource/si_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=si && cd ..

sk: espeak-ng-data/sk_dict
dictsource/sk_extra:
touch dictsource/sk_extra
@@ -964,6 +972,12 @@ dictsource/sq_extra:
espeak-ng-data/sq_dict: src/espeak-ng phsource/phonemes.stamp dictsource/sq_list dictsource/sq_rules dictsource/sq_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=sq && cd ..

sr: espeak-ng-data/sr_dict
dictsource/sr_extra:
touch dictsource/sr_extra
espeak-ng-data/sr_dict: src/espeak-ng phsource/phonemes.stamp dictsource/sr_list dictsource/sr_rules dictsource/sr_extra
cd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=sr && cd ..

sv: espeak-ng-data/sv_dict
dictsource/sv_extra:
touch dictsource/sv_extra

dictsource/hbs_emoji → dictsource/bs_emoji View File


dictsource/hbs_list → dictsource/bs_list View File


dictsource/hbs_rules → dictsource/bs_rules View File


+ 687
- 0
dictsource/hr_list View File

@@ -0,0 +1,687 @@

// "hbs" is the ISO 639-3 code for Serbian/Croatian/Bosnian macrolanguage
// This file is UTF-8 encoded

// Conditional rules:
// ?1 Croatian
// ?2 Serbian
// ?3 Bosnian (Bosna and Hercegovina)
// ?4 Serbian & Bosnian

// $alt1 first vowel is long


// Letters
b b@
c ts@
č tS@
ć tS;@2
d d@
dž dZ@
đ dZ;@2
f f@
g g@
h x@
j j@2
k k@
l l@
lj l^@
m m@
n n@
nj n^@
p p@
q kv@
r R@
_s s@
š S@
t t@
v v@
w dvostr*uko||v@
?2 w duplo||v@
x iks
y ipsilon
z z@
ž Z@
а a
б b@
ц ts@
ћ tS;@2
ч tS@
џ dZ@
ѕ dz@
д d@
ђ dZ;@2
е E
ф f@
г g@
х x@
и i
ј j@2
к k@
љ l^@
л l@
м m@
њ n^@
н n@
о o
п p@
р R@
_с s@
ш S@
т t@
у u
в v@
з z@
ж Z@
ѓ dZ;@2
ќ tS;@2

ё jo
й kr,atko||'i
щ StS;@2
ъ tvR2d'i||zn,ak
ы jeri
ь m'eki||z,nak
э e
ю ju
я ja

_á $accent
_é $accent
_í $accent
_ó $accent
_ú $accent

_ar 'Arapsko
_cyr tS;'iRilitsa


// accent names
_lig l'ig&t,UR&
_acu 'akUt
_ac2 dv'ostr**UkI;'akUt
_brv br**'Ev // ??
_ced ts'EdIl&
_cir ts'iRkUmflEks
_dia 'uml&Ut
_dot t'otSk&
_grv n'agl&s&k
_hac kv'atSIts&
_mcn m'akr**on
_ogo 'ogonEk
_rng r**'iNg
_stk kr**'oz
_tld t'ild&


// symbols
_?? znak // unknown symbol
_?A slovo // unknown letter
_cap k'apIt&l // ?? use English until I find the correct word

© 'aUtoRsk&||pr*av&
* zvj'ezdits& $max3
= _j'edn&ko $max3
% p'osto $max3
+ plus $max3
. totSk& $max3
?4 . tatSk& $max3
€ 'euRo
?4 € 'evr*o
@ _'at
?2 @ tRgovatSkoi
& _'end
! 'usklItSnIk
?2 ! 'uzvItSnIk
: dv'ototSk&
?4 : dv'ot&tSk&
# br*'oj_ $max3
/ kr*'oz_ $max3
\ b'EkslES $max3
^ ts'iRkumfleks
~ t'ild&
¤ v'alut&
§ 'odlom&k
µ m'ikr*o
¶ tSl'an&k
° st'up&n^
?4 ° st'epen
_" n'avodnIk
?2 _" n'avod
_‚ 'otvoReni||n'avodnIk
_„ sp'uSteni||n'avodnIk
?2 _„ z&tvoReni||n'avodnIk
_… tr*'i||t'otSke
?4 _… tr*'i||t'atSke
† kr*'iZ
?2 † kr-'st
‡ dv'ostr*Uki||kr*'iZ
?2 ‡ dv'ostRUki||kR'st
‰ pr*'omil
_‹ 'otvoReni||n'avodnIk
_‘ 'otvoReni||n'avodnIk
_’ j'ednostr*'uki||n'avodnIk
_“ 'otvoReni||n'avodnIk
_” z'atvoReni||n'avodnIk
_• kr*'upna||t'otSk&
?4 _• kr*'upna||t'atSk&
_– En||ts'r-t&
_— Em||ts'r-t&
™ pr*'o'izvodZ&tS
˘ br*'evis
¨ pr*'ijegl&s
?2 ¨ pr*'egl&s
_« 'otvoReni||n'avodnIk
_­ pr*'ivr*emen&||ts'r-t'its&
· p'ut&
¸ sed'il&
_» z'atvoReni||n'avodnIk
® R'egistr*atsIj&
ł l@||s&||ts'r-t'itsom
× p'ut&
÷ p'odijel^,eno
?2 ÷ p'odel^,eno

$ d'ol&R
_- m'inus
_* zvj'ezdits&
?2 _* zv'ezdits&
_@ t'r-g'ov&tSko||'a
?2 _@ et

_( 'otvoRen&||z'agr*ad&
_) z'atvoRen&||z'agr*ad&
_' 'apostr*of
_, z'aRez
?2 _, z'apeta
_- ts'r-t'its&
_. t'otSk&
?4 _. t'atSk&
_; t'otSk&z,a*Ez
?2 _; t'atSk&z,apeta
?3 _; t'atSk&z,a*Ez
_< m'an^i||'od
_= j'edn&ko
_> v'etSi||'od
_? 'upitnIk
_& t'r-g'ov&tSko||'i
_[ 'otvoRen&||'ugl&t&
?2 _[ 'otvoRen&||'ugl&st&
_] z'atvoRen&||'ugl&t&
?2 _] z'atvoRen&||'ugl&st&
__ ts'r-t&
_` gr*'avis
_{ 'otvoRen&||v'ititS&st&
_| 'okomits&
?2 _| 'uspR&vn&
_} z'atvoRen&||v'ititS&st&


// Numbers
_0 n'ul&
_1 j'ed&n
_2 dv'a
_2f dv'ije
?2 _2f dv'E
_3 tr*'i
_4 tS'EtIRI
_5 p'Et
_6 S'Est
_7 s'Ed&m
_8 'os&m
_9 d'EvEt
_10 d'EsEt
_11 j'ed&naIst
_12 dv'anaIst
_13 tr*'inaIst
_14 tS'Etr-naIst
_15 p'EtnaIst
_16 S'EsnaIst
_17 s'Ed&mnaIst
_18 'os&mnaIst
_19 d'EvEtnaIst
?2 _11 j'ed&naEst
?2 _12 dv'anaEst
?2 _13 tr*'inaEst
?2 _14 tS'Etr-naEst
?2 _15 p'EtnaEst
?2 _16 S'EsnaEst
?2 _17 s'Ed&mnaEst
?2 _18 'os&mnaEst
?2 _19 d'EvEtnaEst
_2X dv'adEsEt
_3X tr*'idEsEt
_4X tS'Etr-dEsEt
_5X p'EdEsEt
_6X S'EzdEsEt
_7X s'Ed&mdEsEt
_8X 'os&mdEsEt
_9X d'EvEdEsEt
_0C st'o_
?2 _0C st'O_
_2C dvj'est'o
?2 _2C dv'est'a_
?2 _3C tR'ist'a_
?2 _4C tSetR'ist'o_
_6C S'Est'o

_0M1 t'isUtS;&
_0MA1 t'isUtS;e
_1M1 t'isUtS;U
?2 _0M1 h'il^,&d&
?2 _0MA1 h'il^,&de
?2 _1M1 h'il^,&dU
?2 _1MA1 jedn&||hil^&d&
?3 _0M1 h'il^ad&
?3 _0MA1 h'il^ade
?3 _1M1 h'il^adU

_0M2 m'ilijU:na
_0MA2 m'ilijU:na
_1M2 m'ilijU:n
?4 _0M2 m'ilijO:na
?4 _0MA2 m'ilijO:na
?4 _1M2 m'ilijO:n

_0M3 m'ilia:RdI
_0MA3 m'ilia:Rde
_1M3 m'ilia:RdU
?2 _0M3 milijA:rdI
?2 _0MA3 milijA:rde
?2 _1M3 milijA:rdU

_0M4 b'ilijU:na
_1M4 b'ilijU:n
?4 _0M4 b'ilijO:na
?4 _1M4 b'ilijO:n

_dpt _:z'a*Ez_
_roman r*'imsko

// not Roman numerals
// vi vi // not needed if only all-capitals are spoken as Roman numbers


// Abbreviations
aaa $abbrev
aac $abbrev
ab $only $abbrev $capital
abc $abbrev
abk $only $abbrev
abs $abbrev
acc $only $abbrev
acf $only $abbrev
acfm $abbrev
acg $only $abbrev
acgm $abbrev
acl $only $abbrev
acm $only $abbrev
acw $only $abbrev
add $only $abbrev
adn $only $abbrev
adp $only $abbrev
adsl $abbrev
agm $abbrev
ahk $only $abbrev
aif $only $abbrev
aip $only $abbrev
alg $only $abbrev
alm $only $abbrev
amd $only $abbrev
amf $only $abbrev
amg $only $abbrev
aph $only $abbrev
apj $only $abbrev
apl $only $abbrev
apm $only $abbrev
app $only $abbrev
arg $only $abbrev
arj $only $abbrev
asc $only $abbrev
asf $only $abbrev
asm $only $abbrev
asn $only $abbrev
asp $only $abbrev
asx $only $abbrev
atf $only $abbrev
atp $abbrev
av $only $abbrev
avg $abbrev
avr $only $abbrev
awb $only $abbrev
awt $only $abbrev
brf $abbrev
brl $abbrev
brx $abbrev
cca $abbrev
cda $only $abbrev
cdr $abbrev
cdrw $abbrev
cgi $abbrev
clr $abbrev
cpe $only $abbrev
cpu $only $abbrev
cr $only $abbrev
crt $only $abbrev
ctrl $abbrev
daa $only $abbrev
doo $abbrev
dox $only $abbrev
dqy $abbrev
dsr $only $abbrev
ear $only $abbrev
ecdl $only $abbrev
ecp $only $abbrev
ecs $only $abbrev
elm $only $abbrev
emf $only $abbrev
emh $only $abbrev
eml $only $abbrev
ems $only $abbrev
epf $only $abbrev
eq $only $abbrev
esc $only $abbrev
etd $only $abbrev
eu $only $abbrev
eur $only $abbrev
exc $only $abbrev
faq $abbrev
fbi $abbrev
ffa $only $abbrev
fr $only $abbrev
frm $abbrev
frt $only $abbrev
frx $abbrev
gho $only $abbrev
gr $only $abbrev
grr $abbrev
grrr $abbrev
grp $abbrev
grps $abbrev
grs $only $abbrev
grx $only $abbrev
gui $only $abbrev
hho $abbrev
hr $only $abbrev
hrk $only $abbrev
hrt $only $abbrev $capital
hrv $only $abbrev
hta $only $abbrev
hzzo $abbrev
ibm $abbrev
icc $only $abbrev
icm $only $abbrev
ics $only $abbrev
idb $only $abbrev
idc $only $abbrev
idm $only $abbrev
iff $only $abbrev
ifu $only $abbrev
ilk $only $abbrev
img $only $abbrev
iml $only $abbrev
isdn $abbrev
isl $only $abbrev
isp $only $abbrev
iss $only $abbrev
iq $only $abbrev
iqy $only $abbrev
itw $only $abbrev
itz $only $abbrev
ivf $only $abbrev
jpi $only $abbrev
jrr $abbrev
jse $only $abbrev
lha $abbrev
liq $only $abbrev
maq $only $abbrev
maw $only $abbrev
mda $only $abbrev
mde $only $abbrev
mdi $only $abbrev
mga $only $abbrev
mge $only $abbrev
mgi $only $abbrev
mpa $only $abbrev
mpe $only $abbrev
mpr $only $abbrev
msi $only $abbrev
mup $abbrev $capital
nco $only $abbrev
nfo $only $abbrev
nji $only $abbrev
nr $only $abbrev
nra $only $abbrev
nrb $only $abbrev
nrc $only $abbrev
nre $only $abbrev
nrf $only $abbrev
nrg $abbrev
nrh $only $abbrev
nri $only $abbrev
nrm $only $abbrev
nrq $only $abbrev
nrs $only $abbrev
nru $only $abbrev
nrw $only $abbrev
nsa $only $abbrev
nsi $only $abbrev
nvda $abbrev
obd $only $abbrev
obj $only $abbrev
obt $only $abbrev
obz $only $abbrev
ocr $only $abbrev
ocx $only $abbrev
oem $abbrev
off $only $abbrev
oft $only $abbrev
ofx $only $abbrev
ogc $only $abbrev
ogg $only $abbrev
ogl $only $abbrev
omw $abbrev
oqy $only $abbrev
os $only $abbrev $capital
oss $only $abbrev
otf $only $abbrev
otm $only $abbrev
pda $abbrev
pdi $only $abbrev
pgi $only $abbrev
pho $only $abbrev
pko $only $abbrev
pma $only $abbrev
ppa $only $abbrev
pr $only $abbrev
prf $only $abbrev
prg $only $abbrev
prn $abbrev
prt $only $abbrev
qbr $abbrev
qpr $abbrev
raw $only $abbrev
rb $abbrev
rba $abbrev
rc $only $abbrev
rcc $abbrev
rcd $abbrev
rcs $abbrev
rd $only $abbrev
rds $abbrev
rgb $abbrev
rh $only $abbrev
rk $only $abbrev
rm $only $abbrev
rma $only $abbrev
rme $only $abbrev
rmf $only $abbrev
rmi $only $abbrev
rmt $only $abbrev
rmx $abbrev
rn $only $abbrev
rnk $only $abbrev
rp $only $abbrev
rqy $only $abbrev
rs $abbrev
rss $abbrev
rta $abbrev
rtf $abbrev
rtl $abbrev
rttl $abbrev
rtv $abbrev
rw $abbrev
rwo $only $abbrev
rx $abbrev
sbr $only $abbrev
scr $only $abbrev
sr $only $abbrev
srd $only $abbrev
ssa $only $abbrev
sys $only $abbrev
tpu $only $abbrev
udf $only $abbrev
udl $only $abbrev
udt $abbrev
uhh $abbrev
uif $only $abbrev
uip $only $abbrev
url $only $abbrev
usd $only $abbrev
usr $only $abbrev
uss $only $abbrev
uu $only $abbrev
uue $only $abbrev
uup $only $abbrev
uxd $only $abbrev
vba $only $abbrev
vbe $only $abbrev
vbr $abbrev
vga $abbrev
wax $only $abbrev
wbr $abbrev
wiz $only $abbrev
wma $only $abbrev
wri $only $abbrev
wta $abbrev
xam $only $abbrev
xba $only $abbrev
xiw $only $abbrev
xla $only $abbrev
xpa $only $abbrev
xpi $only $abbrev
xsi $only $abbrev
xxa $only $abbrev
xxe $only $abbrev
xy $abbrev
xyz $abbrev
yu $only $abbrev
zaa $only $abbrev
zoo $only $abbrev

br $dot $abbrev
dr $dot $abbrev
mr $dot $abbrev
mrs $dot $abbrev
itd $dot $abbrev
npr $dot $abbrev
str $only $dot $abbrev

inc $dot
prof $dot
sl $dot
tj $dot
tzv $dot

// Demonstratives
tko $u+
?4 šta $u+
što $u+
koga $pause
čega $pause


// Pronouns
ja $alt1

// Prepositions
o $u $brk
u $u $brk
s s_ $brk


// Conjunctions
a $u $pause // and
i $u $pause // and
ali $pause // but
nego $pause // but
ili $u $pause // or




// Auxillary verbs
li $u // question

sam $u // to be, short form
si $u
je $u
smo $u
ste $u
su $u

bio $u // to be, past tense
bila $u
bilo $u
bili $u

nisam $u+ // to be, negative
nisi $u+
nije $u+
nismo $u+
niste $u+
nisu $u+

imam $u+ // to have
imaš $u+
ima $u+
imamo $u+
imate $u+
imaju $u+

nemam $u+ // to have, negative
nemaš $u+
nema $u+
nemamo $u+
nemate $u+
nemaju $u+





// treat as letters if at the end of the clause
a a $atend // letter-a
i i $atend // letter-i
o o $atend // letter-o
s s@ $atend
u u $atend


// Exceptions

ctrl tsontRo:l
control tsontRo:l
docx d'ots||'iks
jpeg j@2||p'eg
macx m'ats||'iks
potx p'ot||'iks
sisx s'is||'iks
start st'aRt
utf u||te||ef

?2 ra R&
?2 re Re
?2 ar &R
?2 er eR

+ 206
- 0
dictsource/hr_rules View File

@@ -0,0 +1,206 @@

// "hbs" is the ISO 639-3 code for Serbian/Croatian/Bosnian macrolanguage
// This file is UTF-8 encoded

//The next line gives a voice which is used to find a phoneme table to use to compile this dictionary
//voice=hr

// Conditional rules:
// ?1 Croatian
// ?2 Serbian
// ?3 Bosnian (Bosna and Hercegovina)
// ?4 Serbian & Bosnian

.group a
a a
X) a ($w_alt1 a:
a (j A
a (r A // don't reduce to [&]
_hrv) a (t a:

.group b
b b

.group c
c ts

.group č
č tS

.group ć
ć tS;

.group d
d d
dz dz
_) dz (_ dz@
dž dZ
_) dž (_ dZ@

.group đ
đ dZ;

.group e
e E
X) e ($w_alt1 E:
j) e e
e (j e
_) exe %ekse // filetype
?2 _) exe %eikse // filetype

.group f
f f

.group g
g g

.group h
h h
_) h x

.group i
i i
X) i ($w_alt1 i:
._) ini %ini // filetype

.group j
j j
XA) j (_ j_ // short pause after final [j] in one-syllable words

.group k
k k

.group l
l l
K) l (K l-
_) lj (_ l^@
lj (+ l^
v) lj lj

.group m
m m

.group n
n n
n (g N
n (k N

nj n^
_) nj (_ n^@

.group o
o o
X) o ($w_alt1 o:
ou oU

.group p
p p

.group q
q k
qu kv
?2 qu ku

.group r
r R
A) r (A R
K) r (A r* // this phoneme expands to [@-*]
rr R
r (_ R

K) r (K R2


.group s
s s

.group š
š S

.group t
t t

.group u
u u
X) u ($w_alt1 u:

.group v
v v

.group w
w v

.group x
x ks

.group y
y j
K) y (K i


.group z
z z

.group ž
ž Z

.group
á a:
é e:
í i:
ó o:
ú u:

ä _^_DE // use German voice
ö _^_DE
ü _^_DE
ß _^_DE

$ d'ol&R

__) - (_D minus
A_) - (_D _
C_) - (_D _


.group 0xce // Greek letters

α 'alf&
ά 'alf&||t'onos
β b'et&
γ g'am&
δ d'elt&
ε 'epsIlon
έ 'epsIlon||t'onos
ζ z'et&
η 'et&
ή 'et&||t'onos
θ tx'et&
ι j'ot&
ί j'ot&||t'onos
κ k'ap&
λ l'amd&
μ m'i
ν n'i
ξ ks'i
ο 'omIkr*on_

.group 0xcf // Greek letters

π p'i
ρ R'o
σ s'igm&
ς gr-tSko_||z'avr-Sno_||s'igm&
τ t'au
υ gr-tSki_||'ipsIlon
ύ gr-tSki||'ipsIlon||t'onos
φ f'i
χ x'i
ψ ps'i
ω 'omeg&
ώ 'omeg&||t'onos
ό 'omIkr*on||t'onos



+ 687
- 0
dictsource/sr_list View File

@@ -0,0 +1,687 @@

// "hbs" is the ISO 639-3 code for Serbian/Croatian/Bosnian macrolanguage
// This file is UTF-8 encoded

// Conditional rules:
// ?1 Croatian
// ?2 Serbian
// ?3 Bosnian (Bosna and Hercegovina)
// ?4 Serbian & Bosnian

// $alt1 first vowel is long


// Letters
b b@
c ts@
č tS@
ć tS;@2
d d@
dž dZ@
đ dZ;@2
f f@
g g@
h x@
j j@2
k k@
l l@
lj l^@
m m@
n n@
nj n^@
p p@
q kv@
r R@
_s s@
š S@
t t@
v v@
w dvostr*uko||v@
?2 w duplo||v@
x iks
y ipsilon
z z@
ž Z@
а a
б b@
ц ts@
ћ tS;@2
ч tS@
џ dZ@
ѕ dz@
д d@
ђ dZ;@2
е E
ф f@
г g@
х x@
и i
ј j@2
к k@
љ l^@
л l@
м m@
њ n^@
н n@
о o
п p@
р R@
_с s@
ш S@
т t@
у u
в v@
з z@
ж Z@
ѓ dZ;@2
ќ tS;@2

ё jo
й kr,atko||'i
щ StS;@2
ъ tvR2d'i||zn,ak
ы jeri
ь m'eki||z,nak
э e
ю ju
я ja

_á $accent
_é $accent
_í $accent
_ó $accent
_ú $accent

_ar 'Arapsko
_cyr tS;'iRilitsa


// accent names
_lig l'ig&t,UR&
_acu 'akUt
_ac2 dv'ostr**UkI;'akUt
_brv br**'Ev // ??
_ced ts'EdIl&
_cir ts'iRkUmflEks
_dia 'uml&Ut
_dot t'otSk&
_grv n'agl&s&k
_hac kv'atSIts&
_mcn m'akr**on
_ogo 'ogonEk
_rng r**'iNg
_stk kr**'oz
_tld t'ild&


// symbols
_?? znak // unknown symbol
_?A slovo // unknown letter
_cap k'apIt&l // ?? use English until I find the correct word

© 'aUtoRsk&||pr*av&
* zvj'ezdits& $max3
= _j'edn&ko $max3
% p'osto $max3
+ plus $max3
. totSk& $max3
?4 . tatSk& $max3
€ 'euRo
?4 € 'evr*o
@ _'at
?2 @ tRgovatSkoi
& _'end
! 'usklItSnIk
?2 ! 'uzvItSnIk
: dv'ototSk&
?4 : dv'ot&tSk&
# br*'oj_ $max3
/ kr*'oz_ $max3
\ b'EkslES $max3
^ ts'iRkumfleks
~ t'ild&
¤ v'alut&
§ 'odlom&k
µ m'ikr*o
¶ tSl'an&k
° st'up&n^
?4 ° st'epen
_" n'avodnIk
?2 _" n'avod
_‚ 'otvoReni||n'avodnIk
_„ sp'uSteni||n'avodnIk
?2 _„ z&tvoReni||n'avodnIk
_… tr*'i||t'otSke
?4 _… tr*'i||t'atSke
† kr*'iZ
?2 † kr-'st
‡ dv'ostr*Uki||kr*'iZ
?2 ‡ dv'ostRUki||kR'st
‰ pr*'omil
_‹ 'otvoReni||n'avodnIk
_‘ 'otvoReni||n'avodnIk
_’ j'ednostr*'uki||n'avodnIk
_“ 'otvoReni||n'avodnIk
_” z'atvoReni||n'avodnIk
_• kr*'upna||t'otSk&
?4 _• kr*'upna||t'atSk&
_– En||ts'r-t&
_— Em||ts'r-t&
™ pr*'o'izvodZ&tS
˘ br*'evis
¨ pr*'ijegl&s
?2 ¨ pr*'egl&s
_« 'otvoReni||n'avodnIk
_­ pr*'ivr*emen&||ts'r-t'its&
· p'ut&
¸ sed'il&
_» z'atvoReni||n'avodnIk
® R'egistr*atsIj&
ł l@||s&||ts'r-t'itsom
× p'ut&
÷ p'odijel^,eno
?2 ÷ p'odel^,eno

$ d'ol&R
_- m'inus
_* zvj'ezdits&
?2 _* zv'ezdits&
_@ t'r-g'ov&tSko||'a
?2 _@ et

_( 'otvoRen&||z'agr*ad&
_) z'atvoRen&||z'agr*ad&
_' 'apostr*of
_, z'aRez
?2 _, z'apeta
_- ts'r-t'its&
_. t'otSk&
?4 _. t'atSk&
_; t'otSk&z,a*Ez
?2 _; t'atSk&z,apeta
?3 _; t'atSk&z,a*Ez
_< m'an^i||'od
_= j'edn&ko
_> v'etSi||'od
_? 'upitnIk
_& t'r-g'ov&tSko||'i
_[ 'otvoRen&||'ugl&t&
?2 _[ 'otvoRen&||'ugl&st&
_] z'atvoRen&||'ugl&t&
?2 _] z'atvoRen&||'ugl&st&
__ ts'r-t&
_` gr*'avis
_{ 'otvoRen&||v'ititS&st&
_| 'okomits&
?2 _| 'uspR&vn&
_} z'atvoRen&||v'ititS&st&


// Numbers
_0 n'ul&
_1 j'ed&n
_2 dv'a
_2f dv'ije
?2 _2f dv'E
_3 tr*'i
_4 tS'EtIRI
_5 p'Et
_6 S'Est
_7 s'Ed&m
_8 'os&m
_9 d'EvEt
_10 d'EsEt
_11 j'ed&naIst
_12 dv'anaIst
_13 tr*'inaIst
_14 tS'Etr-naIst
_15 p'EtnaIst
_16 S'EsnaIst
_17 s'Ed&mnaIst
_18 'os&mnaIst
_19 d'EvEtnaIst
?2 _11 j'ed&naEst
?2 _12 dv'anaEst
?2 _13 tr*'inaEst
?2 _14 tS'Etr-naEst
?2 _15 p'EtnaEst
?2 _16 S'EsnaEst
?2 _17 s'Ed&mnaEst
?2 _18 'os&mnaEst
?2 _19 d'EvEtnaEst
_2X dv'adEsEt
_3X tr*'idEsEt
_4X tS'Etr-dEsEt
_5X p'EdEsEt
_6X S'EzdEsEt
_7X s'Ed&mdEsEt
_8X 'os&mdEsEt
_9X d'EvEdEsEt
_0C st'o_
?2 _0C st'O_
_2C dvj'est'o
?2 _2C dv'est'a_
?2 _3C tR'ist'a_
?2 _4C tSetR'ist'o_
_6C S'Est'o

_0M1 t'isUtS;&
_0MA1 t'isUtS;e
_1M1 t'isUtS;U
?2 _0M1 h'il^,&d&
?2 _0MA1 h'il^,&de
?2 _1M1 h'il^,&dU
?2 _1MA1 jedn&||hil^&d&
?3 _0M1 h'il^ad&
?3 _0MA1 h'il^ade
?3 _1M1 h'il^adU

_0M2 m'ilijU:na
_0MA2 m'ilijU:na
_1M2 m'ilijU:n
?4 _0M2 m'ilijO:na
?4 _0MA2 m'ilijO:na
?4 _1M2 m'ilijO:n

_0M3 m'ilia:RdI
_0MA3 m'ilia:Rde
_1M3 m'ilia:RdU
?2 _0M3 milijA:rdI
?2 _0MA3 milijA:rde
?2 _1M3 milijA:rdU

_0M4 b'ilijU:na
_1M4 b'ilijU:n
?4 _0M4 b'ilijO:na
?4 _1M4 b'ilijO:n

_dpt _:z'a*Ez_
_roman r*'imsko

// not Roman numerals
// vi vi // not needed if only all-capitals are spoken as Roman numbers


// Abbreviations
aaa $abbrev
aac $abbrev
ab $only $abbrev $capital
abc $abbrev
abk $only $abbrev
abs $abbrev
acc $only $abbrev
acf $only $abbrev
acfm $abbrev
acg $only $abbrev
acgm $abbrev
acl $only $abbrev
acm $only $abbrev
acw $only $abbrev
add $only $abbrev
adn $only $abbrev
adp $only $abbrev
adsl $abbrev
agm $abbrev
ahk $only $abbrev
aif $only $abbrev
aip $only $abbrev
alg $only $abbrev
alm $only $abbrev
amd $only $abbrev
amf $only $abbrev
amg $only $abbrev
aph $only $abbrev
apj $only $abbrev
apl $only $abbrev
apm $only $abbrev
app $only $abbrev
arg $only $abbrev
arj $only $abbrev
asc $only $abbrev
asf $only $abbrev
asm $only $abbrev
asn $only $abbrev
asp $only $abbrev
asx $only $abbrev
atf $only $abbrev
atp $abbrev
av $only $abbrev
avg $abbrev
avr $only $abbrev
awb $only $abbrev
awt $only $abbrev
brf $abbrev
brl $abbrev
brx $abbrev
cca $abbrev
cda $only $abbrev
cdr $abbrev
cdrw $abbrev
cgi $abbrev
clr $abbrev
cpe $only $abbrev
cpu $only $abbrev
cr $only $abbrev
crt $only $abbrev
ctrl $abbrev
daa $only $abbrev
doo $abbrev
dox $only $abbrev
dqy $abbrev
dsr $only $abbrev
ear $only $abbrev
ecdl $only $abbrev
ecp $only $abbrev
ecs $only $abbrev
elm $only $abbrev
emf $only $abbrev
emh $only $abbrev
eml $only $abbrev
ems $only $abbrev
epf $only $abbrev
eq $only $abbrev
esc $only $abbrev
etd $only $abbrev
eu $only $abbrev
eur $only $abbrev
exc $only $abbrev
faq $abbrev
fbi $abbrev
ffa $only $abbrev
fr $only $abbrev
frm $abbrev
frt $only $abbrev
frx $abbrev
gho $only $abbrev
gr $only $abbrev
grr $abbrev
grrr $abbrev
grp $abbrev
grps $abbrev
grs $only $abbrev
grx $only $abbrev
gui $only $abbrev
hho $abbrev
hr $only $abbrev
hrk $only $abbrev
hrt $only $abbrev $capital
hrv $only $abbrev
hta $only $abbrev
hzzo $abbrev
ibm $abbrev
icc $only $abbrev
icm $only $abbrev
ics $only $abbrev
idb $only $abbrev
idc $only $abbrev
idm $only $abbrev
iff $only $abbrev
ifu $only $abbrev
ilk $only $abbrev
img $only $abbrev
iml $only $abbrev
isdn $abbrev
isl $only $abbrev
isp $only $abbrev
iss $only $abbrev
iq $only $abbrev
iqy $only $abbrev
itw $only $abbrev
itz $only $abbrev
ivf $only $abbrev
jpi $only $abbrev
jrr $abbrev
jse $only $abbrev
lha $abbrev
liq $only $abbrev
maq $only $abbrev
maw $only $abbrev
mda $only $abbrev
mde $only $abbrev
mdi $only $abbrev
mga $only $abbrev
mge $only $abbrev
mgi $only $abbrev
mpa $only $abbrev
mpe $only $abbrev
mpr $only $abbrev
msi $only $abbrev
mup $abbrev $capital
nco $only $abbrev
nfo $only $abbrev
nji $only $abbrev
nr $only $abbrev
nra $only $abbrev
nrb $only $abbrev
nrc $only $abbrev
nre $only $abbrev
nrf $only $abbrev
nrg $abbrev
nrh $only $abbrev
nri $only $abbrev
nrm $only $abbrev
nrq $only $abbrev
nrs $only $abbrev
nru $only $abbrev
nrw $only $abbrev
nsa $only $abbrev
nsi $only $abbrev
nvda $abbrev
obd $only $abbrev
obj $only $abbrev
obt $only $abbrev
obz $only $abbrev
ocr $only $abbrev
ocx $only $abbrev
oem $abbrev
off $only $abbrev
oft $only $abbrev
ofx $only $abbrev
ogc $only $abbrev
ogg $only $abbrev
ogl $only $abbrev
omw $abbrev
oqy $only $abbrev
os $only $abbrev $capital
oss $only $abbrev
otf $only $abbrev
otm $only $abbrev
pda $abbrev
pdi $only $abbrev
pgi $only $abbrev
pho $only $abbrev
pko $only $abbrev
pma $only $abbrev
ppa $only $abbrev
pr $only $abbrev
prf $only $abbrev
prg $only $abbrev
prn $abbrev
prt $only $abbrev
qbr $abbrev
qpr $abbrev
raw $only $abbrev
rb $abbrev
rba $abbrev
rc $only $abbrev
rcc $abbrev
rcd $abbrev
rcs $abbrev
rd $only $abbrev
rds $abbrev
rgb $abbrev
rh $only $abbrev
rk $only $abbrev
rm $only $abbrev
rma $only $abbrev
rme $only $abbrev
rmf $only $abbrev
rmi $only $abbrev
rmt $only $abbrev
rmx $abbrev
rn $only $abbrev
rnk $only $abbrev
rp $only $abbrev
rqy $only $abbrev
rs $abbrev
rss $abbrev
rta $abbrev
rtf $abbrev
rtl $abbrev
rttl $abbrev
rtv $abbrev
rw $abbrev
rwo $only $abbrev
rx $abbrev
sbr $only $abbrev
scr $only $abbrev
sr $only $abbrev
srd $only $abbrev
ssa $only $abbrev
sys $only $abbrev
tpu $only $abbrev
udf $only $abbrev
udl $only $abbrev
udt $abbrev
uhh $abbrev
uif $only $abbrev
uip $only $abbrev
url $only $abbrev
usd $only $abbrev
usr $only $abbrev
uss $only $abbrev
uu $only $abbrev
uue $only $abbrev
uup $only $abbrev
uxd $only $abbrev
vba $only $abbrev
vbe $only $abbrev
vbr $abbrev
vga $abbrev
wax $only $abbrev
wbr $abbrev
wiz $only $abbrev
wma $only $abbrev
wri $only $abbrev
wta $abbrev
xam $only $abbrev
xba $only $abbrev
xiw $only $abbrev
xla $only $abbrev
xpa $only $abbrev
xpi $only $abbrev
xsi $only $abbrev
xxa $only $abbrev
xxe $only $abbrev
xy $abbrev
xyz $abbrev
yu $only $abbrev
zaa $only $abbrev
zoo $only $abbrev

br $dot $abbrev
dr $dot $abbrev
mr $dot $abbrev
mrs $dot $abbrev
itd $dot $abbrev
npr $dot $abbrev
str $only $dot $abbrev

inc $dot
prof $dot
sl $dot
tj $dot
tzv $dot

// Demonstratives
tko $u+
?4 šta $u+
što $u+
koga $pause
čega $pause


// Pronouns
ja $alt1

// Prepositions
o $u $brk
u $u $brk
s s_ $brk


// Conjunctions
a $u $pause // and
i $u $pause // and
ali $pause // but
nego $pause // but
ili $u $pause // or




// Auxillary verbs
li $u // question

sam $u // to be, short form
si $u
je $u
smo $u
ste $u
su $u

bio $u // to be, past tense
bila $u
bilo $u
bili $u

nisam $u+ // to be, negative
nisi $u+
nije $u+
nismo $u+
niste $u+
nisu $u+

imam $u+ // to have
imaš $u+
ima $u+
imamo $u+
imate $u+
imaju $u+

nemam $u+ // to have, negative
nemaš $u+
nema $u+
nemamo $u+
nemate $u+
nemaju $u+





// treat as letters if at the end of the clause
a a $atend // letter-a
i i $atend // letter-i
o o $atend // letter-o
s s@ $atend
u u $atend


// Exceptions

ctrl tsontRo:l
control tsontRo:l
docx d'ots||'iks
jpeg j@2||p'eg
macx m'ats||'iks
potx p'ot||'iks
sisx s'is||'iks
start st'aRt
utf u||te||ef

?2 ra R&
?2 re Re
?2 ar &R
?2 er eR

+ 206
- 0
dictsource/sr_rules View File

@@ -0,0 +1,206 @@

// "hbs" is the ISO 639-3 code for Serbian/Croatian/Bosnian macrolanguage
// This file is UTF-8 encoded

//The next line gives a voice which is used to find a phoneme table to use to compile this dictionary
//voice=hr

// Conditional rules:
// ?1 Croatian
// ?2 Serbian
// ?3 Bosnian (Bosna and Hercegovina)
// ?4 Serbian & Bosnian

.group a
a a
X) a ($w_alt1 a:
a (j A
a (r A // don't reduce to [&]
_hrv) a (t a:

.group b
b b

.group c
c ts

.group č
č tS

.group ć
ć tS;

.group d
d d
dz dz
_) dz (_ dz@
dž dZ
_) dž (_ dZ@

.group đ
đ dZ;

.group e
e E
X) e ($w_alt1 E:
j) e e
e (j e
_) exe %ekse // filetype
?2 _) exe %eikse // filetype

.group f
f f

.group g
g g

.group h
h h
_) h x

.group i
i i
X) i ($w_alt1 i:
._) ini %ini // filetype

.group j
j j
XA) j (_ j_ // short pause after final [j] in one-syllable words

.group k
k k

.group l
l l
K) l (K l-
_) lj (_ l^@
lj (+ l^
v) lj lj

.group m
m m

.group n
n n
n (g N
n (k N

nj n^
_) nj (_ n^@

.group o
o o
X) o ($w_alt1 o:
ou oU

.group p
p p

.group q
q k
qu kv
?2 qu ku

.group r
r R
A) r (A R
K) r (A r* // this phoneme expands to [@-*]
rr R
r (_ R

K) r (K R2


.group s
s s

.group š
š S

.group t
t t

.group u
u u
X) u ($w_alt1 u:

.group v
v v

.group w
w v

.group x
x ks

.group y
y j
K) y (K i


.group z
z z

.group ž
ž Z

.group
á a:
é e:
í i:
ó o:
ú u:

ä _^_DE // use German voice
ö _^_DE
ü _^_DE
ß _^_DE

$ d'ol&R

__) - (_D minus
A_) - (_D _
C_) - (_D _


.group 0xce // Greek letters

α 'alf&
ά 'alf&||t'onos
β b'et&
γ g'am&
δ d'elt&
ε 'epsIlon
έ 'epsIlon||t'onos
ζ z'et&
η 'et&
ή 'et&||t'onos
θ tx'et&
ι j'ot&
ί j'ot&||t'onos
κ k'ap&
λ l'amd&
μ m'i
ν n'i
ξ ks'i
ο 'omIkr*on_

.group 0xcf // Greek letters

π p'i
ρ R'o
σ s'igm&
ς gr-tSko_||z'avr-Sno_||s'igm&
τ t'au
υ gr-tSki_||'ipsIlon
ύ gr-tSki||'ipsIlon||t'onos
φ f'i
χ x'i
ψ ps'i
ω 'omeg&
ώ 'omeg&||t'onos
ό 'omIkr*on||t'onos



+ 0
- 1
espeak-ng-data/lang/zls/bs View File

@@ -1,7 +1,6 @@
name Bosnian
language bs
phonemes hr
dictionary hbs

pitch 81 120
formant 0 100 100 100

+ 0
- 2
espeak-ng-data/lang/zls/hr View File

@@ -2,8 +2,6 @@ name Croatian
language hr
language hbs

dictionary hbs

// attributes towards !variant3
pitch 81 120
formant 0 100 100 100

+ 0
- 1
espeak-ng-data/lang/zls/sr View File

@@ -1,6 +1,5 @@
name Serbian
language sr
dictionary hbs

// attributes towards !variant3 pitch 80 120
formant 0 100 100 100

Loading…
Cancel
Save