This commit implements support for [Totontepec Mixe](https://en.wikipedia.org/wiki/Totontepec_Mixe). The Espeak rules are based on the phonological inventory, orthographic mappings, and phonetic processes described in the "Esbozo fonológico" (phonological outline/sketch) chapter of Verónica Guzmán Guzmán's 2012 master's thesis in Indo American Linguistics awarded by the [Centro de Investigaciones y Estudios Superiores en Antropología Social](https://ciesas.edu.mx/) and *Vocabulario Mixe de Totontepec* (Totontepec Mixe vocabulary), compiled by Alvin Schoenhals and Louise C. Schoenhals and published by the Summer Institute of Linguistics in 1965. This commit was developed as part of a project for [Computational Linguistics](https://jnw.domains.swarthmore.edu/ling073/syllabus.php) at [Swarthmore College](https://swarthmore.edu). We feel that this language is suitable for merge with "testing" status, but further verification/improvements by native speakers would be very helpful. co-authored-by: Elizabeth Resendiz <[email protected]>master
@@ -488,6 +488,7 @@ dictionaries: \ | |||
espeak-ng-data/mr_dict \ | |||
espeak-ng-data/ms_dict \ | |||
espeak-ng-data/mt_dict \ | |||
espeak-ng-data/mto_dict \ | |||
espeak-ng-data/my_dict \ | |||
espeak-ng-data/nci_dict \ | |||
espeak-ng-data/ne_dict \ | |||
@@ -750,6 +751,9 @@ espeak-ng-data/ms_dict: dictsource/ms_list dictsource/ms_rules dictsource/ms_ext | |||
mt: espeak-ng-data/mt_dict | |||
espeak-ng-data/mt_dict: dictsource/mt_list dictsource/mt_rules dictsource/mt_extra | |||
mto: espeak-ng-data/mto_dict | |||
espeak-ng-data/mto_dict: dictsource/mto_list dictsource/mto_rules | |||
my: espeak-ng-data/my_dict | |||
espeak-ng-data/my_dict: dictsource/my_list dictsource/my_rules dictsource/my_extra dictsource/my_emoji | |||
@@ -0,0 +1,214 @@ | |||
// This file is UTF8 encoded | |||
// letters | |||
_cap m'aJ^us | |||
_?? s'imbolo | |||
_#32 Esp'aTjo | |||
// accent names | |||
_lig liQaD'ura | |||
_acu aQ'uDo | |||
_ac2 d'oble||aQ'uDo | |||
_brv br'eBe | |||
_ced seD'iJ^a | |||
_cir sirkumfl'exo | |||
_dia dj'Eresis | |||
_dac d'oble||aQ'uDo | |||
_dot p'unto | |||
_grv gr'aBe | |||
_hac kar'on | |||
_mcn makr'on | |||
_ogo kol'ita | |||
_rng an'iJ^o | |||
_stk b'aRR2a | |||
_tld t'ilde | |||
_sup supE**'indiTe | |||
_sub suB'indiTe | |||
// names of symbols | |||
° grados | |||
_. punto | |||
_, koma | |||
_; p,untoik'oma | |||
_: d,osp'untos | |||
_! TERR2'araDmiraTj'on | |||
_? TERR2'arintERR2,oQaTj'on | |||
_¡ aBr'iraDmiraTj'on | |||
_¿ aBr'irintERR2,oQaTj'on | |||
_< men'orke | |||
_> maJ^'orke | |||
_' apostr'ofo | |||
ꞌ salt'il^o $only | |||
_" kom'iJ^as | |||
_- gJ^on | |||
__ suBraJ^'aDo | |||
_/ baRR2a | |||
_\ b'aRR2aimbErt'iDa | |||
_` aT'Ento||gr'aBe | |||
_´ aT'Ento||aQ'uDo | |||
_( ,aBrepar'Entesis | |||
_) Tj,ERR2apar'Entesis | |||
_[ ,aBrekortS'ete | |||
_] Tj,ERR2akortS'ete | |||
_{ ,aBreJ^'aBe | |||
_} Tj,ERR2aJ^'aBe | |||
_« kom'iJ^as||iTkJ^'ErDas | |||
_» kom'iJ^as||dEr'EtSas | |||
= iQw'al | |||
+ m'as | |||
# almoaD'iJ^a | |||
* astEr'isko | |||
. punto | |||
^ Tirkumfl'exo | |||
₠ 'eU*o | |||
€ eUro | |||
% porTj'Ento | |||
& ampErs'ant | |||
@ aRR2'oBa | |||
/ baRR2a | |||
© kopiRR2'aIt | |||
£ liBras | |||
¶ p'aRR2afo | |||
§ sEkTj'on | |||
¬ n'ot | |||
· p'unto||m'edjo | |||
// Language names | |||
_cyr Ti*'iliko_ | |||
_hy arm'enjo | |||
_he eB@-*'Eo | |||
_ar 'a*aBe | |||
_hi 'i:ndi | |||
_bn beNg,al'i_ | |||
_ta t'amil | |||
_te tel'ugu | |||
_si TiNgal'es | |||
_th t'a:i | |||
_my birm'ano | |||
_ja x,apon'Es | |||
_zh tS'ino | |||
// numbers | |||
_0 nitija | |||
_1 toU?k# | |||
_2 mahtsk# | |||
_3 toUoUhk# | |||
_4 mAktAAz.k# | |||
_5 mugooz.k# | |||
_6 toUht8k# | |||
_7 vuz.toUht8k# | |||
_8 toUtoUht8k# | |||
_9 tAz.toUht8k# | |||
_1X mAhk# | |||
_2x ii?p | |||
_3X mAhk#ii?p | |||
_4X v8htkup | |||
_5X v8htkupmAhk# | |||
_6X toUoUhk#ii?p | |||
_7X toUoUhk#ii?pmAhk# | |||
_8X mAktAAz.k#ii?p | |||
_9X mAktAAz.k#ii?pmAhk# | |||
_0C mugooz.k#ii?p | |||
_dpt koma | |||
_roman Rom'ano | |||
// ordinal numbers | |||
_#º o | |||
_#ª a | |||
_ord mu | |||
ºc gr'ados||T'e | |||
ºf gr'ados||'Efe | |||
ºk gr'ados||k'a | |||
// Letters (names taken from Spanish) | |||
// If a letter has a "word" pronunciation which is different from its | |||
// "letter" name, then include the letter name here, with the letter | |||
// prefixed by a _ character. | |||
_b be | |||
_c Te | |||
_d de | |||
_f Efe | |||
_g xe | |||
_h atSe | |||
_j xota | |||
_k ka | |||
_l Ele | |||
_m Eme | |||
_n Ene | |||
_ñ En^e | |||
_p pe | |||
_q ku | |||
_r Ere | |||
_s Ese | |||
_t te | |||
_v uBe | |||
_w uBe||d'oBle | |||
_x Ekis | |||
_z TEta | |||
_a a | |||
_e e | |||
_o o | |||
_y igri'eQa | |||
_á 'a||aTEntw'aDa | |||
_é 'e||aTEntw'aDa | |||
_í 'i||aTEntw'aDa | |||
_ó 'o||aTEntw'aDa | |||
_ú 'u||aTEntw'aDa | |||
_ü 'u||kon||dj'ErEsis | |||
_ç $accent | |||
// Proper Names and countries | |||
amsterdam $3 | |||
bardem $2 | |||
jerusalem $4 | |||
méxico m'Exiko | |||
vietnam $2 | |||
// foreign words | |||
android 'androId | |||
apple 'ap@l | |||
copyright k'opiRR2,aIt | |||
chrome kr'owm | |||
curriculum $2 | |||
diem d'i:em $only | |||
eloquence 'elokwens | |||
english ínglish $text | |||
espeak 'isp'ik | |||
eyes 'aIs | |||
facebook f'eIsbuk | |||
firefox f'aIrfoks | |||
free fr'i | |||
google g'ug@l | |||
hardware h'ardwer | |||
iphone 'aIfon | |||
ipod 'aIpod | |||
jaws dZ'os | |||
jazz dZ'as | |||
linux $1 | |||
live l'aIB | |||
messenger m'esendZer | |||
microsoft m'aIkrosoft | |||
mozilla moT'ila | |||
office 'ofis | |||
platform pl'atfom | |||
power p'awer | |||
service s'erBis | |||
skype sk'aIp | |||
snapshot sn'apS,ot // _^_en | |||
software s'oftwer | |||
spanish sp'aniS | |||
speech sp'itS | |||
thunderbird t'anderbird | |||
twit tw'it | |||
twitter tw'iter | |||
window w'indow | |||
(e speak) 'isp'ik | |||
(i phone) 'aIfon | |||
(i pod) 'aIpod |
@@ -0,0 +1,107 @@ | |||
// Translation rules for mto | |||
// This file is UTF-8 encoded | |||
.L01 a e i o u ä ë ï ö ü | |||
.L02 m n | |||
.group a | |||
a a | |||
.group ä | |||
ä A | |||
.group e | |||
e e | |||
.group ë | |||
ë @ | |||
.group i | |||
i i | |||
.group ï | |||
ï 8 | |||
.group o | |||
o o | |||
.group ö | |||
ö oU | |||
.group u | |||
u u | |||
.group v | |||
v(_ f | |||
_)v(o b | |||
j)v vj | |||
v(j vj | |||
v v | |||
.group s | |||
s s | |||
.group x | |||
L01)x z. | |||
L02)x z. | |||
j)x s.j | |||
x(j s.j | |||
x s. | |||
.group j | |||
j h | |||
.group ts | |||
%L01)ts dz | |||
L02)ts dz | |||
j)ts tsj | |||
ts(j tsj | |||
ts ts | |||
.group p | |||
L01)p(L01 b | |||
L02)p(L01 b | |||
p(_ p# | |||
j)p pj | |||
p(j pj | |||
p p | |||
.group t | |||
L01)t(L01 d | |||
L02)t(L01 d | |||
j)t tj | |||
t(j tj | |||
t(_ t# | |||
t t | |||
.group k | |||
L01)k(L01 g | |||
L02)k(L01 g | |||
k(_ k# | |||
j)k kj | |||
k(j kj | |||
k k | |||
.group ꞌ | |||
ꞌ ? | |||
.group m | |||
j)m mj | |||
m(j mj | |||
m m | |||
.group n | |||
n(p m | |||
n(k N | |||
j)n nj | |||
n(j nj | |||
n n | |||
.group r | |||
r r | |||
.group l | |||
l l | |||
.group y | |||
y j |
@@ -0,0 +1,8 @@ | |||
name Totontepec Mixe | |||
language mto | |||
maintainer Bill Dengler <[email protected]> and Elizabeth Resendiz <[email protected]> | |||
status testing | |||
lowercaseSentence | |||
tunes s6 c6 q6 e6 |
@@ -0,0 +1,216 @@ | |||
//==================================================== | |||
// Totontepec Mixe | |||
//==================================================== | |||
phoneme a | |||
ipa a | |||
vwl starttype #a endtype #a | |||
length 190 | |||
FMT(vowel/a_4) | |||
endphoneme | |||
phoneme A | |||
ipa ɑ | |||
vwl starttype #a endtype #a | |||
length 220 | |||
unr bck low | |||
FMT(vowel/aa_8) | |||
endphoneme | |||
phoneme e | |||
ipa e | |||
vwl starttype #e endtype #e | |||
length 190 | |||
FMT(vowel/e_mid2) | |||
endphoneme | |||
phoneme @ | |||
ipa ə | |||
vwl starttype #@ endtype #@ | |||
unstressed | |||
length 140 | |||
unr cnt mid | |||
IF thisPh(isWordEnd) THEN | |||
FMT(vowel/@_6, 90) | |||
ENDIF | |||
FMT(vowel/@) | |||
endphoneme | |||
phoneme i | |||
ipa i | |||
vwl starttype #i endtype #i | |||
length 190 | |||
IfNextVowelAppend(;) | |||
FMT(vowel/i) | |||
endphoneme | |||
phoneme 8 | |||
ipa ɘ | |||
vwl starttype #@ endtype #@ | |||
length 165 | |||
FMT(vowel/8_7) | |||
endphoneme | |||
phoneme o | |||
ipa o | |||
vwl starttype #o endtype #o | |||
length 195 | |||
FMT(vowel/oo) | |||
endphoneme | |||
phoneme oU | |||
ipa əʊ | |||
vwl starttype #@ endtype #u | |||
length 220 | |||
FMT(vdiph/@u_en) | |||
endphoneme | |||
phoneme u | |||
vwl starttype #u endtype #u | |||
length 200 | |||
FMT(vowel/u_bck2) | |||
endphoneme | |||
phoneme k | |||
import_phoneme consonants/k- | |||
endphoneme | |||
phoneme k# | |||
import_phoneme consonants/k# | |||
endphoneme | |||
phoneme g | |||
ipa g | |||
vcd vel stp | |||
lengthmod 5 | |||
voicingswitch k | |||
Vowelin f1=2 f2=2300 200 300 f3=-300 80 | |||
Vowelout f1=2 f2=2300 250 300 f3=-300 80 brk | |||
FMT(g/g) addWav(x/g2) // weaker [g] | |||
endphoneme | |||
phoneme t | |||
import_phoneme base1/t[ | |||
endphoneme | |||
phoneme T // Used in Spanish words | |||
import_phoneme base1/T | |||
endphoneme | |||
phoneme t# | |||
ipa tʰ | |||
vls dnt stp | |||
lengthmod 2 | |||
voicingswitch d | |||
Vowelin f1=0 f2=1500 -300 300 f3=-100 80 amp=16 | |||
Vowelout f1=0 f2=1500 -300 250 f3=-100 80 rms=20 | |||
IF nextPh(isPause2) THEN | |||
WAV(ustop/t_dnt, 35) | |||
ENDIF | |||
WAV(ustop/t_dnt, 50) | |||
endphoneme | |||
phoneme d | |||
import_phoneme base2/d | |||
endphoneme | |||
phoneme p | |||
import_phoneme consonants/p- | |||
endphoneme | |||
phoneme p# | |||
import_phoneme consonants/ph | |||
endphoneme | |||
phoneme b | |||
import_phoneme base1/b | |||
endphoneme | |||
phoneme v | |||
ipa v | |||
vcd lbd frc | |||
FMT(voc/v) addWav(vocw/v, 90) | |||
endphoneme | |||
phoneme f | |||
ipa f | |||
vls lbd frc | |||
WAV(ufric/f, 80) | |||
endphoneme | |||
phoneme s | |||
ipa s | |||
vls alv frc sib | |||
lengthmod 3 | |||
voicingswitch z | |||
Vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
Vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
IF nextPh(isPause) THEN | |||
WAV(ufric/s_, 60) // quieter 's' at end of word | |||
ELIF nextPh(p) OR nextPh(t) OR nextPh(k) THEN | |||
WAV(ufric/s!) | |||
ENDIF | |||
WAV(ufric/s, 80) | |||
endphoneme | |||
phoneme s. | |||
import_phoneme base1/s. | |||
endphoneme | |||
phoneme z. | |||
import_phoneme base1/z. | |||
endphoneme | |||
phoneme ts | |||
import_phoneme consonants/ts | |||
endphoneme | |||
phoneme dz | |||
import_phoneme consonants/dz | |||
endphoneme | |||
phoneme m | |||
import_phoneme base1/m- | |||
endphoneme | |||
phoneme n | |||
import_phoneme base1/n- | |||
endphoneme | |||
phoneme N | |||
import_phoneme base1/N- | |||
endphoneme | |||
phoneme r | |||
import_phoneme base1/* | |||
endphoneme | |||
phoneme l | |||
import_phoneme base1/l | |||
endphoneme | |||
phoneme j | |||
import_phoneme base1/j | |||
endphoneme | |||
phoneme C | |||
import_phoneme base1/C | |||
endphoneme | |||
phoneme ? | |||
ipa ʔ | |||
vls glt stp | |||
lengthmod 1 // 5? longer preceding vowel | |||
nolink | |||
Vowelin glstop | |||
Vowelout glstop | |||
WAV(ustop/null) | |||
endphoneme | |||
phoneme h | |||
import_phoneme base1/h | |||
endphoneme |
@@ -1852,6 +1852,9 @@ include ph_kurdish | |||
phonemetable mi base2 | |||
include ph_maori | |||
phonemetable mto base2 | |||
include ph_mixe_mto | |||
phonemetable nci base2 | |||
include ph_nahuatl | |||
@@ -97,6 +97,7 @@ test_phwav mk 072d0a74acf54bea528e7dde427eb04808d38364 "ma na n^a Na pa ta xa k^ | |||
test_phwav mr 5238ba08fba349fea6c00bdd8f1672ede229b8ec "ma na n.a n^a pa t#a t.a tSa ka qa p#a t.#a c#a k#a ba d#a d.a dZa ga b#a d.#a J#a g#a fa sa Sa xa va za Za Qa Ha ra r.a la ja _:_ mI mU me m@ mo mE mV mO ma mi: mu: me: mo: mE: mO: ma: m&: mI~ mi~ mU~ mu~ mU~ me~ mo~ mE~ mV~ mO~ ma~ mAI maU" | |||
test_phwav ms 75a57a020af2b62e3448792d3f6a945a9b2c6b75 "ma na n^a Na pa ba ta da ka ga ?a tSa dZa fa va Ta Da sa za Sa xa Qa ha ja wa la Ra R2a _:_ ma mE mO m@ me mo mi mu maI meI mOI maU m@U" | |||
test_phwav mt 03231022bb750335042309d6d2acd55f214a8967 "ma na Na pa ta ka ?a ba da ga p\`a t\`a k\`a tSa dZa tS\`a Ba sa za Sa Za xa ha la ja wa ra _:_ mi me my ma m@ mo mu" | |||
test_phwav mto bd45ab06741222328c2068f5a14a08b052dbdaa6 "a A e @ i 8 o oU u k k# ga t T t# da p p# b v f s s. z. ts dz m n N ra la j C a?e ha" | |||
test_phwav my 77eeafb213bfd0756319b2766be8364a2bff46ad "na Na la ja pa pha fa ta tha ka kha tS;a tS;ha S;a s.a tsa tsha ts.a ts.ha N-a _:_ ma mA mai mAu m@ m@r mE mei mi mi[ mi. miA miAu miE mio miou mo mo- mou mong mu muA mua muai mu@ mei muo my myu my& myE my@ myi _:_ ma11 ma21 ma214 ma22 ma33 ma35 ma44 ma51 ma53 ma55" | |||
test_phwav nb adbf0b2e74a76ff7bd2463223f648d479515a314 "m#a ma n#a na n^#a n^a N#a Na pa ta ca ka fa va Ta Da sa Ca J^a xa Qa ha l#a la tl#a r#a ra _:_ mi mi: mI mI: mE mE: ma ma: mO mO: mu mu: my my: mW mW: maI maI: meI meI: maU maU: moU moU: mYy mOI myI" | |||
test_phwav nci 8c578e588c4f0a283359d62754fde039b14c8aef "ma na Na pa ta ka ba da ga fa Ta sa Sa xa ha va Da za Za tSa dZa la ra ja wa t2a t#a d#a z#a r-a z/2a w#a m- n- N- _:_ mI mE ma m0 mV mU mi: mA: mO: mu: m3: mA@ mO@ mo@ mU@ mi@3 mIR mVR mi@ me@ mi m@ m3 me# mI# mI2 meI maI mOI moU maU maa mO2 maI@ maI3 maU@" |