Browse Source

Add support for Totontepec Mixe

This commit implements support for [Totontepec Mixe](https://en.wikipedia.org/wiki/Totontepec_Mixe). The Espeak rules are based on the phonological inventory, orthographic mappings, and phonetic processes described in the "Esbozo fonológico" (phonological outline/sketch) chapter of Verónica Guzmán Guzmán's 2012 master's thesis in Indo American Linguistics awarded by the [Centro de Investigaciones y Estudios Superiores en Antropología Social](https://ciesas.edu.mx/) and *Vocabulario Mixe de Totontepec* (Totontepec Mixe vocabulary), compiled by Alvin Schoenhals and Louise C. Schoenhals and published by the Summer Institute of Linguistics in 1965.

This commit was developed as part of a project for [Computational Linguistics](https://jnw.domains.swarthmore.edu/ling073/syllabus.php) at [Swarthmore College](https://swarthmore.edu). We feel that this language is suitable for merge with "testing" status, but further verification/improvements by native speakers would be very helpful.

co-authored-by: Elizabeth Resendiz <[email protected]>
master
Bill Dengler 3 years ago
parent
commit
254b64939d
7 changed files with 553 additions and 0 deletions
  1. 4
    0
      Makefile.am
  2. 214
    0
      dictsource/mto_list
  3. 107
    0
      dictsource/mto_rules
  4. 8
    0
      espeak-ng-data/lang/miz/mto
  5. 216
    0
      phsource/ph_mixe_mto
  6. 3
    0
      phsource/phonemes
  7. 1
    0
      tests/language-phonemes.test

+ 4
- 0
Makefile.am View File

@@ -488,6 +488,7 @@ dictionaries: \
espeak-ng-data/mr_dict \
espeak-ng-data/ms_dict \
espeak-ng-data/mt_dict \
espeak-ng-data/mto_dict \
espeak-ng-data/my_dict \
espeak-ng-data/nci_dict \
espeak-ng-data/ne_dict \
@@ -750,6 +751,9 @@ espeak-ng-data/ms_dict: dictsource/ms_list dictsource/ms_rules dictsource/ms_ext
mt: espeak-ng-data/mt_dict
espeak-ng-data/mt_dict: dictsource/mt_list dictsource/mt_rules dictsource/mt_extra

mto: espeak-ng-data/mto_dict
espeak-ng-data/mto_dict: dictsource/mto_list dictsource/mto_rules

my: espeak-ng-data/my_dict
espeak-ng-data/my_dict: dictsource/my_list dictsource/my_rules dictsource/my_extra dictsource/my_emoji


+ 214
- 0
dictsource/mto_list View File

@@ -0,0 +1,214 @@
// This file is UTF8 encoded

// letters

_cap m'aJ^us
_?? s'imbolo
_#32 Esp'aTjo

// accent names
_lig liQaD'ura
_acu aQ'uDo
_ac2 d'oble||aQ'uDo
_brv br'eBe
_ced seD'iJ^a
_cir sirkumfl'exo
_dia dj'Eresis
_dac d'oble||aQ'uDo
_dot p'unto
_grv gr'aBe
_hac kar'on
_mcn makr'on
_ogo kol'ita
_rng an'iJ^o
_stk b'aRR2a
_tld t'ilde
_sup supE**'indiTe
_sub suB'indiTe


// names of symbols
° grados
_. punto
_, koma
_; p,untoik'oma
_: d,osp'untos
_! TERR2'araDmiraTj'on
_? TERR2'arintERR2,oQaTj'on
_¡ aBr'iraDmiraTj'on
_¿ aBr'irintERR2,oQaTj'on
_< men'orke
_> maJ^'orke
_' apostr'ofo
ꞌ salt'il^o $only
_" kom'iJ^as
_- gJ^on
__ suBraJ^'aDo
_/ baRR2a
_\ b'aRR2aimbErt'iDa
_` aT'Ento||gr'aBe
_´ aT'Ento||aQ'uDo
_( ,aBrepar'Entesis
_) Tj,ERR2apar'Entesis
_[ ,aBrekortS'ete
_] Tj,ERR2akortS'ete
_{ ,aBreJ^'aBe
_} Tj,ERR2aJ^'aBe
_« kom'iJ^as||iTkJ^'ErDas
_» kom'iJ^as||dEr'EtSas
= iQw'al
+ m'as
# almoaD'iJ^a
* astEr'isko
. punto
^ Tirkumfl'exo
₠ 'eU*o
€ eUro
% porTj'Ento
& ampErs'ant
@ aRR2'oBa
/ baRR2a
© kopiRR2'aIt
£ liBras
¶ p'aRR2afo
§ sEkTj'on
¬ n'ot
· p'unto||m'edjo

// Language names

_cyr Ti*'iliko_
_hy arm'enjo
_he eB@-*'Eo
_ar 'a*aBe
_hi 'i:ndi
_bn beNg,al'i_
_ta t'amil
_te tel'ugu
_si TiNgal'es
_th t'a:i
_my birm'ano
_ja x,apon'Es
_zh tS'ino

// numbers
_0 nitija
_1 toU?k#
_2 mahtsk#
_3 toUoUhk#
_4 mAktAAz.k#
_5 mugooz.k#
_6 toUht8k#
_7 vuz.toUht8k#
_8 toUtoUht8k#
_9 tAz.toUht8k#
_1X mAhk#
_2x ii?p
_3X mAhk#ii?p
_4X v8htkup
_5X v8htkupmAhk#
_6X toUoUhk#ii?p
_7X toUoUhk#ii?pmAhk#
_8X mAktAAz.k#ii?p
_9X mAktAAz.k#ii?pmAhk#
_0C mugooz.k#ii?p
_dpt koma
_roman Rom'ano

// ordinal numbers
_#º o
_#ª a
_ord mu
ºc gr'ados||T'e
ºf gr'ados||'Efe
ºk gr'ados||k'a

// Letters (names taken from Spanish)
// If a letter has a "word" pronunciation which is different from its
// "letter" name, then include the letter name here, with the letter
// prefixed by a _ character.

_b be
_c Te
_d de
_f Efe
_g xe
_h atSe
_j xota
_k ka
_l Ele
_m Eme
_n Ene
_ñ En^e
_p pe
_q ku
_r Ere
_s Ese
_t te
_v uBe
_w uBe||d'oBle
_x Ekis
_z TEta

_a a
_e e
_o o
_y igri'eQa
_á 'a||aTEntw'aDa
_é 'e||aTEntw'aDa
_í 'i||aTEntw'aDa
_ó 'o||aTEntw'aDa
_ú 'u||aTEntw'aDa
_ü 'u||kon||dj'ErEsis
_ç $accent

// Proper Names and countries
amsterdam $3
bardem $2
jerusalem $4
méxico m'Exiko
vietnam $2

// foreign words

android 'androId
apple 'ap@l
copyright k'opiRR2,aIt
chrome kr'owm
curriculum $2
diem d'i:em $only
eloquence 'elokwens
english ínglish $text
espeak 'isp'ik
eyes 'aIs
facebook f'eIsbuk
firefox f'aIrfoks
free fr'i
google g'ug@l
hardware h'ardwer
iphone 'aIfon
ipod 'aIpod
jaws dZ'os
jazz dZ'as
linux $1
live l'aIB
messenger m'esendZer
microsoft m'aIkrosoft
mozilla moT'ila
office 'ofis
platform pl'atfom
power p'awer
service s'erBis
skype sk'aIp
snapshot sn'apS,ot // _^_en
software s'oftwer
spanish sp'aniS
speech sp'itS
thunderbird t'anderbird
twit tw'it
twitter tw'iter
window w'indow

(e speak) 'isp'ik
(i phone) 'aIfon
(i pod) 'aIpod

+ 107
- 0
dictsource/mto_rules View File

@@ -0,0 +1,107 @@
// Translation rules for mto
// This file is UTF-8 encoded
.L01 a e i o u ä ë ï ö ü
.L02 m n
.group a
a a
.group ä
ä A
.group e
e e
.group ë
ë @
.group i
i i
.group ï
ï 8
.group o
o o
.group ö
ö oU
.group u
u u
.group v
v(_ f
_)v(o b
j)v vj
v(j vj
v v
.group s
s s
.group x
L01)x z.
L02)x z.
j)x s.j
x(j s.j
x s.
.group j
j h
.group ts
%L01)ts dz
L02)ts dz
j)ts tsj
ts(j tsj
ts ts
.group p
L01)p(L01 b
L02)p(L01 b
p(_ p#
j)p pj
p(j pj
p p
.group t
L01)t(L01 d
L02)t(L01 d
j)t tj
t(j tj
t(_ t#
t t
.group k
L01)k(L01 g
L02)k(L01 g
k(_ k#
j)k kj
k(j kj
k k
.group ꞌ
ꞌ ?
.group m
j)m mj
m(j mj
m m
.group n
n(p m
n(k N
j)n nj
n(j nj
n n
.group r
r r
.group l
l l
.group y
y j

+ 8
- 0
espeak-ng-data/lang/miz/mto View File

@@ -0,0 +1,8 @@
name Totontepec Mixe
language mto

maintainer Bill Dengler <[email protected]> and Elizabeth Resendiz <[email protected]>
status testing

lowercaseSentence
tunes s6 c6 q6 e6

+ 216
- 0
phsource/ph_mixe_mto View File

@@ -0,0 +1,216 @@

//====================================================
// Totontepec Mixe
//====================================================


phoneme a
ipa a
vwl starttype #a endtype #a
length 190
FMT(vowel/a_4)
endphoneme

phoneme A
ipa ɑ
vwl starttype #a endtype #a
length 220
unr bck low
FMT(vowel/aa_8)
endphoneme

phoneme e
ipa e
vwl starttype #e endtype #e
length 190
FMT(vowel/e_mid2)
endphoneme

phoneme @
ipa ə
vwl starttype #@ endtype #@
unstressed
length 140
unr cnt mid
IF thisPh(isWordEnd) THEN
FMT(vowel/@_6, 90)
ENDIF
FMT(vowel/@)
endphoneme

phoneme i
ipa i
vwl starttype #i endtype #i
length 190
IfNextVowelAppend(;)
FMT(vowel/i)
endphoneme

phoneme 8
ipa ɘ
vwl starttype #@ endtype #@
length 165
FMT(vowel/8_7)
endphoneme

phoneme o
ipa o
vwl starttype #o endtype #o
length 195
FMT(vowel/oo)
endphoneme

phoneme oU
ipa əʊ
vwl starttype #@ endtype #u
length 220
FMT(vdiph/@u_en)
endphoneme

phoneme u
vwl starttype #u endtype #u
length 200
FMT(vowel/u_bck2)
endphoneme

phoneme k
import_phoneme consonants/k-
endphoneme

phoneme k#
import_phoneme consonants/k#
endphoneme

phoneme g
ipa g
vcd vel stp
lengthmod 5
voicingswitch k
Vowelin f1=2 f2=2300 200 300 f3=-300 80
Vowelout f1=2 f2=2300 250 300 f3=-300 80 brk

FMT(g/g) addWav(x/g2) // weaker [g]
endphoneme

phoneme t
import_phoneme base1/t[
endphoneme

phoneme T // Used in Spanish words
import_phoneme base1/T
endphoneme

phoneme t#
ipa tʰ
vls dnt stp
lengthmod 2
voicingswitch d
Vowelin f1=0 f2=1500 -300 300 f3=-100 80 amp=16
Vowelout f1=0 f2=1500 -300 250 f3=-100 80 rms=20
IF nextPh(isPause2) THEN
WAV(ustop/t_dnt, 35)
ENDIF
WAV(ustop/t_dnt, 50)
endphoneme

phoneme d
import_phoneme base2/d
endphoneme

phoneme p
import_phoneme consonants/p-
endphoneme

phoneme p#
import_phoneme consonants/ph
endphoneme

phoneme b
import_phoneme base1/b
endphoneme

phoneme v
ipa v
vcd lbd frc
FMT(voc/v) addWav(vocw/v, 90)
endphoneme

phoneme f
ipa f
vls lbd frc
WAV(ufric/f, 80)
endphoneme

phoneme s
ipa s
vls alv frc sib
lengthmod 3
voicingswitch z
Vowelin f1=0 f2=1700 -300 300 f3=-100 80
Vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20

IF nextPh(isPause) THEN
WAV(ufric/s_, 60) // quieter 's' at end of word
ELIF nextPh(p) OR nextPh(t) OR nextPh(k) THEN
WAV(ufric/s!)
ENDIF
WAV(ufric/s, 80)
endphoneme

phoneme s.
import_phoneme base1/s.
endphoneme

phoneme z.
import_phoneme base1/z.
endphoneme

phoneme ts
import_phoneme consonants/ts
endphoneme

phoneme dz
import_phoneme consonants/dz
endphoneme

phoneme m
import_phoneme base1/m-
endphoneme

phoneme n
import_phoneme base1/n-
endphoneme

phoneme N
import_phoneme base1/N-
endphoneme

phoneme r
import_phoneme base1/*
endphoneme

phoneme l
import_phoneme base1/l
endphoneme

phoneme j
import_phoneme base1/j
endphoneme

phoneme C
import_phoneme base1/C
endphoneme

phoneme ?
ipa ʔ
vls glt stp
lengthmod 1 // 5? longer preceding vowel
nolink
Vowelin glstop
Vowelout glstop
WAV(ustop/null)
endphoneme

phoneme h
import_phoneme base1/h
endphoneme

+ 3
- 0
phsource/phonemes View File

@@ -1852,6 +1852,9 @@ include ph_kurdish
phonemetable mi base2
include ph_maori

phonemetable mto base2
include ph_mixe_mto

phonemetable nci base2
include ph_nahuatl


+ 1
- 0
tests/language-phonemes.test View File

@@ -97,6 +97,7 @@ test_phwav mk 072d0a74acf54bea528e7dde427eb04808d38364 "ma na n^a Na pa ta xa k^
test_phwav mr 5238ba08fba349fea6c00bdd8f1672ede229b8ec "ma na n.a n^a pa t#a t.a tSa ka qa p#a t.#a c#a k#a ba d#a d.a dZa ga b#a d.#a J#a g#a fa sa Sa xa va za Za Qa Ha ra r.a la ja _:_ mI mU me m@ mo mE mV mO ma mi: mu: me: mo: mE: mO: ma: m&: mI~ mi~ mU~ mu~ mU~ me~ mo~ mE~ mV~ mO~ ma~ mAI maU"
test_phwav ms 75a57a020af2b62e3448792d3f6a945a9b2c6b75 "ma na n^a Na pa ba ta da ka ga ?a tSa dZa fa va Ta Da sa za Sa xa Qa ha ja wa la Ra R2a _:_ ma mE mO m@ me mo mi mu maI meI mOI maU m@U"
test_phwav mt 03231022bb750335042309d6d2acd55f214a8967 "ma na Na pa ta ka ?a ba da ga p\`a t\`a k\`a tSa dZa tS\`a Ba sa za Sa Za xa ha la ja wa ra _:_ mi me my ma m@ mo mu"
test_phwav mto bd45ab06741222328c2068f5a14a08b052dbdaa6 "a A e @ i 8 o oU u k k# ga t T t# da p p# b v f s s. z. ts dz m n N ra la j C a?e ha"
test_phwav my 77eeafb213bfd0756319b2766be8364a2bff46ad "na Na la ja pa pha fa ta tha ka kha tS;a tS;ha S;a s.a tsa tsha ts.a ts.ha N-a _:_ ma mA mai mAu m@ m@r mE mei mi mi[ mi. miA miAu miE mio miou mo mo- mou mong mu muA mua muai mu@ mei muo my myu my& myE my@ myi _:_ ma11 ma21 ma214 ma22 ma33 ma35 ma44 ma51 ma53 ma55"
test_phwav nb adbf0b2e74a76ff7bd2463223f648d479515a314 "m#a ma n#a na n^#a n^a N#a Na pa ta ca ka fa va Ta Da sa Ca J^a xa Qa ha l#a la tl#a r#a ra _:_ mi mi: mI mI: mE mE: ma ma: mO mO: mu mu: my my: mW mW: maI maI: meI meI: maU maU: moU moU: mYy mOI myI"
test_phwav nci 8c578e588c4f0a283359d62754fde039b14c8aef "ma na Na pa ta ka ba da ga fa Ta sa Sa xa ha va Da za Za tSa dZa la ra ja wa t2a t#a d#a z#a r-a z/2a w#a m- n- N- _:_ mI mE ma m0 mV mU mi: mA: mO: mu: m3: mA@ mO@ mo@ mU@ mi@3 mIR mVR mi@ me@ mi m@ m3 me# mI# mI2 meI maI mOI moU maU maa mO2 maI@ maI3 maU@"

Loading…
Cancel
Save