This commit implements support for [Totontepec Mixe](https://en.wikipedia.org/wiki/Totontepec_Mixe). The Espeak rules are based on the phonological inventory, orthographic mappings, and phonetic processes described in the "Esbozo fonológico" (phonological outline/sketch) chapter of Verónica Guzmán Guzmán's 2012 master's thesis in Indo American Linguistics awarded by the [Centro de Investigaciones y Estudios Superiores en Antropología Social](https://ciesas.edu.mx/) and *Vocabulario Mixe de Totontepec* (Totontepec Mixe vocabulary), compiled by Alvin Schoenhals and Louise C. Schoenhals and published by the Summer Institute of Linguistics in 1965. This commit was developed as part of a project for [Computational Linguistics](https://jnw.domains.swarthmore.edu/ling073/syllabus.php) at [Swarthmore College](https://swarthmore.edu). We feel that this language is suitable for merge with "testing" status, but further verification/improvements by native speakers would be very helpful. co-authored-by: Elizabeth Resendiz <[email protected]>master
| @@ -488,6 +488,7 @@ dictionaries: \ | |||
| espeak-ng-data/mr_dict \ | |||
| espeak-ng-data/ms_dict \ | |||
| espeak-ng-data/mt_dict \ | |||
| espeak-ng-data/mto_dict \ | |||
| espeak-ng-data/my_dict \ | |||
| espeak-ng-data/nci_dict \ | |||
| espeak-ng-data/ne_dict \ | |||
| @@ -750,6 +751,9 @@ espeak-ng-data/ms_dict: dictsource/ms_list dictsource/ms_rules dictsource/ms_ext | |||
| mt: espeak-ng-data/mt_dict | |||
| espeak-ng-data/mt_dict: dictsource/mt_list dictsource/mt_rules dictsource/mt_extra | |||
| mto: espeak-ng-data/mto_dict | |||
| espeak-ng-data/mto_dict: dictsource/mto_list dictsource/mto_rules | |||
| my: espeak-ng-data/my_dict | |||
| espeak-ng-data/my_dict: dictsource/my_list dictsource/my_rules dictsource/my_extra dictsource/my_emoji | |||
| @@ -0,0 +1,214 @@ | |||
| // This file is UTF8 encoded | |||
| // letters | |||
| _cap m'aJ^us | |||
| _?? s'imbolo | |||
| _#32 Esp'aTjo | |||
| // accent names | |||
| _lig liQaD'ura | |||
| _acu aQ'uDo | |||
| _ac2 d'oble||aQ'uDo | |||
| _brv br'eBe | |||
| _ced seD'iJ^a | |||
| _cir sirkumfl'exo | |||
| _dia dj'Eresis | |||
| _dac d'oble||aQ'uDo | |||
| _dot p'unto | |||
| _grv gr'aBe | |||
| _hac kar'on | |||
| _mcn makr'on | |||
| _ogo kol'ita | |||
| _rng an'iJ^o | |||
| _stk b'aRR2a | |||
| _tld t'ilde | |||
| _sup supE**'indiTe | |||
| _sub suB'indiTe | |||
| // names of symbols | |||
| ° grados | |||
| _. punto | |||
| _, koma | |||
| _; p,untoik'oma | |||
| _: d,osp'untos | |||
| _! TERR2'araDmiraTj'on | |||
| _? TERR2'arintERR2,oQaTj'on | |||
| _¡ aBr'iraDmiraTj'on | |||
| _¿ aBr'irintERR2,oQaTj'on | |||
| _< men'orke | |||
| _> maJ^'orke | |||
| _' apostr'ofo | |||
| ꞌ salt'il^o $only | |||
| _" kom'iJ^as | |||
| _- gJ^on | |||
| __ suBraJ^'aDo | |||
| _/ baRR2a | |||
| _\ b'aRR2aimbErt'iDa | |||
| _` aT'Ento||gr'aBe | |||
| _´ aT'Ento||aQ'uDo | |||
| _( ,aBrepar'Entesis | |||
| _) Tj,ERR2apar'Entesis | |||
| _[ ,aBrekortS'ete | |||
| _] Tj,ERR2akortS'ete | |||
| _{ ,aBreJ^'aBe | |||
| _} Tj,ERR2aJ^'aBe | |||
| _« kom'iJ^as||iTkJ^'ErDas | |||
| _» kom'iJ^as||dEr'EtSas | |||
| = iQw'al | |||
| + m'as | |||
| # almoaD'iJ^a | |||
| * astEr'isko | |||
| . punto | |||
| ^ Tirkumfl'exo | |||
| ₠ 'eU*o | |||
| € eUro | |||
| % porTj'Ento | |||
| & ampErs'ant | |||
| @ aRR2'oBa | |||
| / baRR2a | |||
| © kopiRR2'aIt | |||
| £ liBras | |||
| ¶ p'aRR2afo | |||
| § sEkTj'on | |||
| ¬ n'ot | |||
| · p'unto||m'edjo | |||
| // Language names | |||
| _cyr Ti*'iliko_ | |||
| _hy arm'enjo | |||
| _he eB@-*'Eo | |||
| _ar 'a*aBe | |||
| _hi 'i:ndi | |||
| _bn beNg,al'i_ | |||
| _ta t'amil | |||
| _te tel'ugu | |||
| _si TiNgal'es | |||
| _th t'a:i | |||
| _my birm'ano | |||
| _ja x,apon'Es | |||
| _zh tS'ino | |||
| // numbers | |||
| _0 nitija | |||
| _1 toU?k# | |||
| _2 mahtsk# | |||
| _3 toUoUhk# | |||
| _4 mAktAAz.k# | |||
| _5 mugooz.k# | |||
| _6 toUht8k# | |||
| _7 vuz.toUht8k# | |||
| _8 toUtoUht8k# | |||
| _9 tAz.toUht8k# | |||
| _1X mAhk# | |||
| _2x ii?p | |||
| _3X mAhk#ii?p | |||
| _4X v8htkup | |||
| _5X v8htkupmAhk# | |||
| _6X toUoUhk#ii?p | |||
| _7X toUoUhk#ii?pmAhk# | |||
| _8X mAktAAz.k#ii?p | |||
| _9X mAktAAz.k#ii?pmAhk# | |||
| _0C mugooz.k#ii?p | |||
| _dpt koma | |||
| _roman Rom'ano | |||
| // ordinal numbers | |||
| _#º o | |||
| _#ª a | |||
| _ord mu | |||
| ºc gr'ados||T'e | |||
| ºf gr'ados||'Efe | |||
| ºk gr'ados||k'a | |||
| // Letters (names taken from Spanish) | |||
| // If a letter has a "word" pronunciation which is different from its | |||
| // "letter" name, then include the letter name here, with the letter | |||
| // prefixed by a _ character. | |||
| _b be | |||
| _c Te | |||
| _d de | |||
| _f Efe | |||
| _g xe | |||
| _h atSe | |||
| _j xota | |||
| _k ka | |||
| _l Ele | |||
| _m Eme | |||
| _n Ene | |||
| _ñ En^e | |||
| _p pe | |||
| _q ku | |||
| _r Ere | |||
| _s Ese | |||
| _t te | |||
| _v uBe | |||
| _w uBe||d'oBle | |||
| _x Ekis | |||
| _z TEta | |||
| _a a | |||
| _e e | |||
| _o o | |||
| _y igri'eQa | |||
| _á 'a||aTEntw'aDa | |||
| _é 'e||aTEntw'aDa | |||
| _í 'i||aTEntw'aDa | |||
| _ó 'o||aTEntw'aDa | |||
| _ú 'u||aTEntw'aDa | |||
| _ü 'u||kon||dj'ErEsis | |||
| _ç $accent | |||
| // Proper Names and countries | |||
| amsterdam $3 | |||
| bardem $2 | |||
| jerusalem $4 | |||
| méxico m'Exiko | |||
| vietnam $2 | |||
| // foreign words | |||
| android 'androId | |||
| apple 'ap@l | |||
| copyright k'opiRR2,aIt | |||
| chrome kr'owm | |||
| curriculum $2 | |||
| diem d'i:em $only | |||
| eloquence 'elokwens | |||
| english ínglish $text | |||
| espeak 'isp'ik | |||
| eyes 'aIs | |||
| facebook f'eIsbuk | |||
| firefox f'aIrfoks | |||
| free fr'i | |||
| google g'ug@l | |||
| hardware h'ardwer | |||
| iphone 'aIfon | |||
| ipod 'aIpod | |||
| jaws dZ'os | |||
| jazz dZ'as | |||
| linux $1 | |||
| live l'aIB | |||
| messenger m'esendZer | |||
| microsoft m'aIkrosoft | |||
| mozilla moT'ila | |||
| office 'ofis | |||
| platform pl'atfom | |||
| power p'awer | |||
| service s'erBis | |||
| skype sk'aIp | |||
| snapshot sn'apS,ot // _^_en | |||
| software s'oftwer | |||
| spanish sp'aniS | |||
| speech sp'itS | |||
| thunderbird t'anderbird | |||
| twit tw'it | |||
| twitter tw'iter | |||
| window w'indow | |||
| (e speak) 'isp'ik | |||
| (i phone) 'aIfon | |||
| (i pod) 'aIpod | |||
| @@ -0,0 +1,107 @@ | |||
| // Translation rules for mto | |||
| // This file is UTF-8 encoded | |||
| .L01 a e i o u ä ë ï ö ü | |||
| .L02 m n | |||
| .group a | |||
| a a | |||
| .group ä | |||
| ä A | |||
| .group e | |||
| e e | |||
| .group ë | |||
| ë @ | |||
| .group i | |||
| i i | |||
| .group ï | |||
| ï 8 | |||
| .group o | |||
| o o | |||
| .group ö | |||
| ö oU | |||
| .group u | |||
| u u | |||
| .group v | |||
| v(_ f | |||
| _)v(o b | |||
| j)v vj | |||
| v(j vj | |||
| v v | |||
| .group s | |||
| s s | |||
| .group x | |||
| L01)x z. | |||
| L02)x z. | |||
| j)x s.j | |||
| x(j s.j | |||
| x s. | |||
| .group j | |||
| j h | |||
| .group ts | |||
| %L01)ts dz | |||
| L02)ts dz | |||
| j)ts tsj | |||
| ts(j tsj | |||
| ts ts | |||
| .group p | |||
| L01)p(L01 b | |||
| L02)p(L01 b | |||
| p(_ p# | |||
| j)p pj | |||
| p(j pj | |||
| p p | |||
| .group t | |||
| L01)t(L01 d | |||
| L02)t(L01 d | |||
| j)t tj | |||
| t(j tj | |||
| t(_ t# | |||
| t t | |||
| .group k | |||
| L01)k(L01 g | |||
| L02)k(L01 g | |||
| k(_ k# | |||
| j)k kj | |||
| k(j kj | |||
| k k | |||
| .group ꞌ | |||
| ꞌ ? | |||
| .group m | |||
| j)m mj | |||
| m(j mj | |||
| m m | |||
| .group n | |||
| n(p m | |||
| n(k N | |||
| j)n nj | |||
| n(j nj | |||
| n n | |||
| .group r | |||
| r r | |||
| .group l | |||
| l l | |||
| .group y | |||
| y j | |||
| @@ -0,0 +1,8 @@ | |||
| name Totontepec Mixe | |||
| language mto | |||
| maintainer Bill Dengler <[email protected]> and Elizabeth Resendiz <[email protected]> | |||
| status testing | |||
| lowercaseSentence | |||
| tunes s6 c6 q6 e6 | |||
| @@ -0,0 +1,216 @@ | |||
| //==================================================== | |||
| // Totontepec Mixe | |||
| //==================================================== | |||
| phoneme a | |||
| ipa a | |||
| vwl starttype #a endtype #a | |||
| length 190 | |||
| FMT(vowel/a_4) | |||
| endphoneme | |||
| phoneme A | |||
| ipa ɑ | |||
| vwl starttype #a endtype #a | |||
| length 220 | |||
| unr bck low | |||
| FMT(vowel/aa_8) | |||
| endphoneme | |||
| phoneme e | |||
| ipa e | |||
| vwl starttype #e endtype #e | |||
| length 190 | |||
| FMT(vowel/e_mid2) | |||
| endphoneme | |||
| phoneme @ | |||
| ipa ə | |||
| vwl starttype #@ endtype #@ | |||
| unstressed | |||
| length 140 | |||
| unr cnt mid | |||
| IF thisPh(isWordEnd) THEN | |||
| FMT(vowel/@_6, 90) | |||
| ENDIF | |||
| FMT(vowel/@) | |||
| endphoneme | |||
| phoneme i | |||
| ipa i | |||
| vwl starttype #i endtype #i | |||
| length 190 | |||
| IfNextVowelAppend(;) | |||
| FMT(vowel/i) | |||
| endphoneme | |||
| phoneme 8 | |||
| ipa ɘ | |||
| vwl starttype #@ endtype #@ | |||
| length 165 | |||
| FMT(vowel/8_7) | |||
| endphoneme | |||
| phoneme o | |||
| ipa o | |||
| vwl starttype #o endtype #o | |||
| length 195 | |||
| FMT(vowel/oo) | |||
| endphoneme | |||
| phoneme oU | |||
| ipa əʊ | |||
| vwl starttype #@ endtype #u | |||
| length 220 | |||
| FMT(vdiph/@u_en) | |||
| endphoneme | |||
| phoneme u | |||
| vwl starttype #u endtype #u | |||
| length 200 | |||
| FMT(vowel/u_bck2) | |||
| endphoneme | |||
| phoneme k | |||
| import_phoneme consonants/k- | |||
| endphoneme | |||
| phoneme k# | |||
| import_phoneme consonants/k# | |||
| endphoneme | |||
| phoneme g | |||
| ipa g | |||
| vcd vel stp | |||
| lengthmod 5 | |||
| voicingswitch k | |||
| Vowelin f1=2 f2=2300 200 300 f3=-300 80 | |||
| Vowelout f1=2 f2=2300 250 300 f3=-300 80 brk | |||
| FMT(g/g) addWav(x/g2) // weaker [g] | |||
| endphoneme | |||
| phoneme t | |||
| import_phoneme base1/t[ | |||
| endphoneme | |||
| phoneme T // Used in Spanish words | |||
| import_phoneme base1/T | |||
| endphoneme | |||
| phoneme t# | |||
| ipa tʰ | |||
| vls dnt stp | |||
| lengthmod 2 | |||
| voicingswitch d | |||
| Vowelin f1=0 f2=1500 -300 300 f3=-100 80 amp=16 | |||
| Vowelout f1=0 f2=1500 -300 250 f3=-100 80 rms=20 | |||
| IF nextPh(isPause2) THEN | |||
| WAV(ustop/t_dnt, 35) | |||
| ENDIF | |||
| WAV(ustop/t_dnt, 50) | |||
| endphoneme | |||
| phoneme d | |||
| import_phoneme base2/d | |||
| endphoneme | |||
| phoneme p | |||
| import_phoneme consonants/p- | |||
| endphoneme | |||
| phoneme p# | |||
| import_phoneme consonants/ph | |||
| endphoneme | |||
| phoneme b | |||
| import_phoneme base1/b | |||
| endphoneme | |||
| phoneme v | |||
| ipa v | |||
| vcd lbd frc | |||
| FMT(voc/v) addWav(vocw/v, 90) | |||
| endphoneme | |||
| phoneme f | |||
| ipa f | |||
| vls lbd frc | |||
| WAV(ufric/f, 80) | |||
| endphoneme | |||
| phoneme s | |||
| ipa s | |||
| vls alv frc sib | |||
| lengthmod 3 | |||
| voicingswitch z | |||
| Vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
| Vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
| IF nextPh(isPause) THEN | |||
| WAV(ufric/s_, 60) // quieter 's' at end of word | |||
| ELIF nextPh(p) OR nextPh(t) OR nextPh(k) THEN | |||
| WAV(ufric/s!) | |||
| ENDIF | |||
| WAV(ufric/s, 80) | |||
| endphoneme | |||
| phoneme s. | |||
| import_phoneme base1/s. | |||
| endphoneme | |||
| phoneme z. | |||
| import_phoneme base1/z. | |||
| endphoneme | |||
| phoneme ts | |||
| import_phoneme consonants/ts | |||
| endphoneme | |||
| phoneme dz | |||
| import_phoneme consonants/dz | |||
| endphoneme | |||
| phoneme m | |||
| import_phoneme base1/m- | |||
| endphoneme | |||
| phoneme n | |||
| import_phoneme base1/n- | |||
| endphoneme | |||
| phoneme N | |||
| import_phoneme base1/N- | |||
| endphoneme | |||
| phoneme r | |||
| import_phoneme base1/* | |||
| endphoneme | |||
| phoneme l | |||
| import_phoneme base1/l | |||
| endphoneme | |||
| phoneme j | |||
| import_phoneme base1/j | |||
| endphoneme | |||
| phoneme C | |||
| import_phoneme base1/C | |||
| endphoneme | |||
| phoneme ? | |||
| ipa ʔ | |||
| vls glt stp | |||
| lengthmod 1 // 5? longer preceding vowel | |||
| nolink | |||
| Vowelin glstop | |||
| Vowelout glstop | |||
| WAV(ustop/null) | |||
| endphoneme | |||
| phoneme h | |||
| import_phoneme base1/h | |||
| endphoneme | |||
| @@ -1852,6 +1852,9 @@ include ph_kurdish | |||
| phonemetable mi base2 | |||
| include ph_maori | |||
| phonemetable mto base2 | |||
| include ph_mixe_mto | |||
| phonemetable nci base2 | |||
| include ph_nahuatl | |||
| @@ -97,6 +97,7 @@ test_phwav mk 072d0a74acf54bea528e7dde427eb04808d38364 "ma na n^a Na pa ta xa k^ | |||
| test_phwav mr 5238ba08fba349fea6c00bdd8f1672ede229b8ec "ma na n.a n^a pa t#a t.a tSa ka qa p#a t.#a c#a k#a ba d#a d.a dZa ga b#a d.#a J#a g#a fa sa Sa xa va za Za Qa Ha ra r.a la ja _:_ mI mU me m@ mo mE mV mO ma mi: mu: me: mo: mE: mO: ma: m&: mI~ mi~ mU~ mu~ mU~ me~ mo~ mE~ mV~ mO~ ma~ mAI maU" | |||
| test_phwav ms 75a57a020af2b62e3448792d3f6a945a9b2c6b75 "ma na n^a Na pa ba ta da ka ga ?a tSa dZa fa va Ta Da sa za Sa xa Qa ha ja wa la Ra R2a _:_ ma mE mO m@ me mo mi mu maI meI mOI maU m@U" | |||
| test_phwav mt 03231022bb750335042309d6d2acd55f214a8967 "ma na Na pa ta ka ?a ba da ga p\`a t\`a k\`a tSa dZa tS\`a Ba sa za Sa Za xa ha la ja wa ra _:_ mi me my ma m@ mo mu" | |||
| test_phwav mto bd45ab06741222328c2068f5a14a08b052dbdaa6 "a A e @ i 8 o oU u k k# ga t T t# da p p# b v f s s. z. ts dz m n N ra la j C a?e ha" | |||
| test_phwav my 77eeafb213bfd0756319b2766be8364a2bff46ad "na Na la ja pa pha fa ta tha ka kha tS;a tS;ha S;a s.a tsa tsha ts.a ts.ha N-a _:_ ma mA mai mAu m@ m@r mE mei mi mi[ mi. miA miAu miE mio miou mo mo- mou mong mu muA mua muai mu@ mei muo my myu my& myE my@ myi _:_ ma11 ma21 ma214 ma22 ma33 ma35 ma44 ma51 ma53 ma55" | |||
| test_phwav nb adbf0b2e74a76ff7bd2463223f648d479515a314 "m#a ma n#a na n^#a n^a N#a Na pa ta ca ka fa va Ta Da sa Ca J^a xa Qa ha l#a la tl#a r#a ra _:_ mi mi: mI mI: mE mE: ma ma: mO mO: mu mu: my my: mW mW: maI maI: meI meI: maU maU: moU moU: mYy mOI myI" | |||
| test_phwav nci 8c578e588c4f0a283359d62754fde039b14c8aef "ma na Na pa ta ka ba da ga fa Ta sa Sa xa ha va Da za Za tSa dZa la ra ja wa t2a t#a d#a z#a r-a z/2a w#a m- n- N- _:_ mI mE ma m0 mV mU mi: mA: mO: mu: m3: mA@ mO@ mo@ mU@ mi@3 mIR mVR mi@ me@ mi m@ m3 me# mI# mI2 meI maI mOI moU maU maa mO2 maI@ maI3 maU@" | |||