@@ -11,6 +11,7 @@ The espeak-ng project is a fork of the espeak project. | |||
updated languages: | |||
* chr (Cherokee) -- Michael Conrad | |||
* de (German) -- Karl Eick | |||
* el (Modern Greek) -- Reece Dunn (support for variant Greek letter forms) | |||
* en (English) -- Steven Presser, Ben Talagan |
@@ -309,6 +309,7 @@ phsource/phonemes.stamp: \ | |||
phsource/ph_bengali \ | |||
phsource/ph_bulgarian \ | |||
phsource/ph_catalan \ | |||
phsource/ph_cherokee \ | |||
phsource/ph_consonants \ | |||
phsource/ph_croatian \ | |||
phsource/ph_czech \ | |||
@@ -470,6 +471,7 @@ dictionaries: \ | |||
espeak-ng-data/bpy_dict \ | |||
espeak-ng-data/bs_dict \ | |||
espeak-ng-data/ca_dict \ | |||
espeak-ng-data/chr_dict \ | |||
espeak-ng-data/cs_dict \ | |||
espeak-ng-data/cy_dict \ | |||
espeak-ng-data/da_dict \ | |||
@@ -593,6 +595,9 @@ espeak-ng-data/bs_dict: dictsource/bs_list dictsource/bs_rules dictsource/bs_ext | |||
ca: espeak-ng-data/ca_dict | |||
espeak-ng-data/ca_dict: dictsource/ca_list dictsource/ca_rules dictsource/ca_extra dictsource/ca_emoji | |||
chr: espeak-ng-data/chr_dict | |||
espeak-ng-data/chr_dict: dictsource/chr_list dictsource/chr_rules | |||
cs: espeak-ng-data/cs_dict | |||
espeak-ng-data/cs_dict: dictsource/cs_list dictsource/cs_rules dictsource/cs_extra dictsource/cs_emoji | |||
@@ -0,0 +1,320 @@ | |||
//tone place holders | |||
.L01 xb xc xcxd xdxc xd xdxf xfxd xf | |||
//nasal consonants | |||
.L02 m n | |||
//all consonants | |||
.L03 ch d g h j k l m n s t w y | |||
.replace | |||
¹ xb | |||
² xc | |||
³ xd | |||
⁴ xf | |||
//CED pp xi-xii | |||
.group x | |||
xb 21 | |||
xc 2 | |||
xcxd 23 | |||
xdxc 32 | |||
xd 3 | |||
xdxf 34 | |||
xfxd 43 | |||
xf 4 | |||
//CED page x | |||
.group ɂ | |||
ɂ ? | |||
//CED - none of vowels are dipthongs, even though the pronunciation key uses dipthongs. pg ix - footnote | |||
//CED pg ix | |||
.group a | |||
//long | |||
a A:: | |||
L02) a A~: | |||
//final word vowels are short, nasal, and have a highfall tone | |||
a (_ A~43 | |||
L02) a (_ A~43 | |||
axb (_ A~1 | |||
axc (_ A~2 | |||
axd (_ A~3 | |||
axf (_ A~4 | |||
a (L03_ A~43 | |||
a (L03L03_ A~43 | |||
a (L03L03L03_ A~43 | |||
a (L03L01 A: | |||
a (L03L03L01 A: | |||
a (L03L03L03L01 A: | |||
L02) a (L03L01 A~ | |||
L02) a (L03L03L01 A~ | |||
L02) a (L03L03L03L01 A~ | |||
//short followed by s has an h sound added, see CED pg x, footnote. | |||
as (L01 A:hs | |||
as (L03L01 A:hs | |||
as (L03L03L01 A:hs | |||
as (L03L03L03L01 A:hs | |||
L02) as (L01 A~hs | |||
L02) as (L03L01 A~hs | |||
L02) as (L03L03L01 A~hs | |||
L02) as (L03L03L03L01 A~hs | |||
.group e | |||
e E: | |||
L02) e E~: | |||
e (L03_ E43 | |||
e (L03L03_ E43 | |||
e (L03L03L03_ E43 | |||
e (L03L03L03L03_ E43 | |||
//final word vowels are short, nasal, and have a highfall tone | |||
e (_ E~43 | |||
L02) e (_ E~43 | |||
exb (_ E~1 | |||
exc (_ E~2 | |||
exd (_ E~3 | |||
exf (_ E~4 | |||
e (L03L01 E | |||
e (L03L03L01 E | |||
e (L03L03L03L01 E | |||
L02) e (L03L01 E~ | |||
L02) e (L03L03L01 E~ | |||
L02) e (L03L03L03L01 E~ | |||
es (L01 Ehs | |||
es (L03L01 Ehs | |||
es (L03L03L01 Ehs | |||
es (L03L03L03L01 Ehs | |||
L02) es (L01 E~hs | |||
L02) es (L03L01 E~hs | |||
L02) es (L03L03L01 E~hs | |||
L02) es (L03L03L03L01 E~hs | |||
.group i | |||
i I: | |||
L02) i I~: | |||
//final word vowels are short, nasal, and have a highfall tone | |||
i (_ I~43 | |||
L02) i (_ I~43 | |||
ixb (_ I~1 | |||
ixc (_ I~2 | |||
ixd (_ I~3 | |||
ixf (_ I~4 | |||
i (L03_ I43 | |||
i (L03L03_ I43 | |||
i (L03L03L03_ I43 | |||
i (L03L01 I | |||
i (L03L03L01 I | |||
i (L03L03L03L01 I | |||
L02) i (L03L01 I~ | |||
L02) i (L03L03L01 I~ | |||
L02) i (L03L03L03L01 I~ | |||
is (L01 Ihs | |||
is (L03L01 Ihs | |||
is (L03L03L01 Ihs | |||
is (L03L03L03L01 Ihs | |||
L02) is (L01 I~hs | |||
L02) is (L03L01 I~hs | |||
L02) is (L03L03L01 I~hs | |||
L02) is (L03L03L03L01 I~hs | |||
.group o | |||
o o: | |||
L02) o o~: | |||
o (L03_ o43 | |||
o (L03L03_ o43 | |||
o (L03L03L03_ o43 | |||
o (L03L03L03L03_ o43 | |||
//final word vowels are short, nasal, and have a highfall tone | |||
o (_ o~43 | |||
L02) o (_ o~43 | |||
oxb (_ o~1 | |||
oxc (_ o~2 | |||
oxd (_ o~3 | |||
oxf (_ o~4 | |||
o (L03L01 o | |||
o (L03L03L01 o | |||
o (L03L03L03L01 o | |||
L02) o (L03L01 o~ | |||
L02) o (L03L03L01 o~ | |||
L02) o (L03L03L03L01 o~ | |||
os (L01 ohs | |||
os (L03L01 ohs | |||
os (L03L03L01 ohs | |||
L02) os (L01 o~hs | |||
L02) os (L03L01 o~hs | |||
L02) os (L03L03L01 o~hs | |||
.group u | |||
u u: | |||
L02) u u~: | |||
u (L03_ u43 | |||
u (L03L03_ u43 | |||
u (L03L03L03_ u43 | |||
u (L03L03L03L03_ u43 | |||
//final word vowels are short, nasal, and have a highfall tone | |||
u (_ u~43 | |||
L02) u (_ u~43 | |||
uxb (_ u~1 | |||
uxc (_ u~2 | |||
uxd (_ u~3 | |||
uxf (_ u~4 | |||
u (L03L01 u | |||
u (L03L03L01 u | |||
u (L03L03L03L01 u | |||
L02) u (L03L01 u~ | |||
L02) u (L03L03L01 u~ | |||
L02) u (L03L03L03L01 u~ | |||
us (L01 uhs | |||
us (L03L01 uhs | |||
us (L03L03L01 uhs | |||
L02) us (L01 u~hs | |||
L02) us (L03L01 u~hs | |||
L02) us (L03L03L01 u~hs | |||
.group v | |||
v W~: | |||
v (L03_ W~43 | |||
v (L03L03_ W~43 | |||
v (L03L03L03_ W~43 | |||
v (_ W~43 | |||
vxb (_ W~1 | |||
vxc (_ W~2 | |||
vxd (_ W~3 | |||
vxf (_ W~4 | |||
v (L03L01 W~ | |||
v (L03L03L01 W~ | |||
v (L03L03L03L01 W~ | |||
vs (L01 W~hs | |||
vs (L03L01 W~hs | |||
vs (L03L03L01 W~hs | |||
.group ạ | |||
ạ A: | |||
L02) ạ A~ | |||
ạ (_ A~43 | |||
ạs A~hs | |||
L02) ạs A~hs | |||
.group ẹ | |||
ẹ E | |||
L02) ẹ E~ | |||
ẹ (_ E~43 | |||
ẹs Ehs | |||
L02) ẹs E~hs | |||
.group ị | |||
ị I | |||
L02) ị I~ | |||
ị (_ I~43 | |||
ịs Ihs | |||
L02) ịs I~hs | |||
.group ọ | |||
ọ o | |||
L02) ọ o~ | |||
ọ (_ o~43 | |||
ọs ohs | |||
L02) ọs o~hs | |||
.group ụ | |||
ụ u | |||
L02) ụ u~ | |||
ụ (_ u~43 | |||
ụs uhs | |||
L02) ụs u~hs | |||
.group ṿ | |||
ṿ W~ | |||
ṿ W~43 | |||
ṿs W~hs | |||
.group ch | |||
ch tS | |||
.group d | |||
d d | |||
.group g | |||
g g | |||
.group h | |||
h h | |||
//if word ends in hn or hy, transpose the last two sounds. CED pg x footnote. | |||
hn (_ nh | |||
hn (L01_ nh | |||
hy (_ jh | |||
hy (L01_ jh | |||
.group j | |||
j dZ | |||
.group k | |||
k k | |||
.group l | |||
l l | |||
ls hls | |||
.group m | |||
m m | |||
.group n | |||
n n | |||
.group s | |||
s s | |||
.group t | |||
t t | |||
.group w | |||
w w | |||
wh (L01 w | |||
wh (L03 w | |||
.group y | |||
y j | |||
yh (L01 j | |||
yh (L03 j |
@@ -23,6 +23,8 @@ | |||
- [Conlang X-SAMPA Transcription Scheme](phonemes/cxs.md) | |||
- Pronunciation Guides | |||
- [English](languages/gmw/en.md) | |||
- Other Miscellanous Information | |||
- Notes on the [Cherokee](languages/iro/chr.md) implementation. | |||
- [License](../COPYING) | |||
---------- |
@@ -8,7 +8,7 @@ and dialects, | |||
[private-use extensions](https://raw.githubusercontent.com/espeak-ng/bcp47-data/master/bcp47-extensions) | |||
have been used. | |||
The 112 supported languages and accents are: | |||
The 113 supported languages and accents are: | |||
| Family Code | Identifier | Language Family | Language | Accent/Dialect | | |||
|-------------|-------------------|-----------------------|-----------------------------|------------------------| | |||
@@ -29,6 +29,7 @@ The 112 supported languages and accents are: | |||
| `zls` | `bg` | South Slavic | Bulgarian | | | |||
| `sit` | `my` | Sino-Tibetan | Burmese | | | |||
| `roa` | `ca` | Romance | Catalan | | | |||
| `iro` | `chr` | Iroquoian | Cherokee <sup>\[4\]</sup> | Western/C.E.D. | | |||
| `sit` | `yue` | Sino-Tibetan | Chinese | Cantonese | | |||
| `sit` | `hak` | Sino-Tibetan | Chinese | Hakka | | |||
| `map` | `haw` | Austronesian | Hawaiian | | | |||
@@ -131,3 +132,5 @@ The 112 supported languages and accents are: | |||
\[2\] Farsi/Persian written using English (Latin) characters. | |||
\[3\] Currently, only Hiragana and Katakana are supported. | |||
\[4\] Only Cherokee-English Dictionary fully annotated UTF-8 pronunciations are supported. Syllabary is not supported. |
@@ -0,0 +1,28 @@ | |||
=== Western Cherokee | |||
Cherokee is a tonal language which does not use stress. | |||
This is an initial attempt at supporting Cherokee "DF" phonetics. (See Cherokee English Dictionary - Feeling - 1972). It does not support Syllabary or transliterated/romanized texts, neither of which indicate tone or cadence. | |||
The language file is configured to use stress on all syllables to mimic having no stress by having the stress equal across all syllables. | |||
Generally, long vowels are twice as long as their short counterparts. | |||
Trying to get espeak-ng's output to have the correct range on the various pitch contours has been difficult as documentation on how to do this is scarce. | |||
Note: The final "High Fall" tone generally found at the end of words doesn't always start high enough and never falls low enough. My attempts at increasing the range of the pitch contour for final word vowels shows that specifying too large a range in a phonemes tone statement causes some sort of disconnect between what one is trying to specify and what is actually generated. (At least by analysis using the Praat software). | |||
I have also attempted to enforce a zero intonation sentence structure to try and prevent any built-in intonation rules from slaughtering the pitch contours of syllables because of their locations. | |||
In general, the pitch contours seem to come out sounding "flat" and "constricted" when compared with the pitch contours from actual human speech. I do not know how to fix this. | |||
This language should be considered as alpha level *testing* and is **not** production ready. | |||
TODO: | |||
* Devoice short vowels when followed by an 'h'. | |||
* Fix where the 'm' and 'n' consonants are hard to perceive. | |||
* Add rules to handle the C.E.D. like phonetics as used by the Raven Rock Dictionary. | |||
* Add a basic Eastern Cherokee dialect to account for differences of pronuncations of 's vs sh' and 'j/ch vs ts'. | |||
* Figure out why ',' is treated as part of a word and not a short pause and fix it. |
@@ -0,0 +1,17 @@ | |||
name Cherokee | |||
language chr | |||
maintainer Michael Conrad <[email protected]> | |||
status testing | |||
voicing 100 | |||
speed 60 | |||
words 0 2 | |||
phonemes chr | |||
//stress on all syllables to simulate stress on no syllables | |||
stressRule 9 | |||
intonation 3 // Less intonation, and comma does not raise the pitch. | |||
pitch 30 90 | |||
tunes chrs chrc chrq chre | |||
stressAmp 8 8 8 8 8 8 8 8 //stress all vowels the same | |||
stressLength 180 180 180 180 180 180 180 180 //stress all vowels the same |
@@ -266,3 +266,47 @@ headextend 16 82 50 32 16 | |||
nucleus0 fall 92 8 | |||
nucleus fall 90 77 76 8 | |||
endtune | |||
tune chrs | |||
prehead 0 0 | |||
headenv fall 0 | |||
head 1 0 0 0 0 | |||
headextend 0 0 0 0 0 0 0 0 | |||
nucleus fall 0 0 0 0 | |||
nucleus0 fall 0 0 | |||
onset 0 0 0 | |||
headlast 0 0 0 | |||
endtune | |||
tune chrc | |||
prehead 0 0 | |||
headenv fall 0 | |||
head 1 0 0 0 0 | |||
headextend 0 0 0 0 0 0 0 0 | |||
nucleus fall 0 0 0 0 | |||
nucleus0 fall 0 0 | |||
onset 0 0 0 | |||
headlast 0 0 0 | |||
endtune | |||
tune chrq | |||
prehead 0 0 | |||
headenv fall 0 | |||
head 1 0 0 0 0 | |||
headextend 0 0 0 0 0 0 0 0 | |||
nucleus fall 0 0 0 0 | |||
nucleus0 fall 0 0 | |||
onset 0 0 0 | |||
headlast 0 0 0 | |||
endtune | |||
tune chre | |||
prehead 0 0 | |||
headenv fall 0 | |||
head 1 0 0 0 0 | |||
headextend 0 0 0 0 0 0 0 0 | |||
nucleus fall 0 0 0 0 | |||
nucleus0 fall 0 0 | |||
onset 0 0 0 | |||
headlast 0 0 0 | |||
endtune |
@@ -0,0 +1,210 @@ | |||
//******************************************************************* | |||
// TONES | |||
//******************************************************************* | |||
//******************************************************************* | |||
// REFERENCE MATERIALS | |||
//******************************************************************* | |||
// Collaborative Documentation and Revitalization of Cherokee Tone | |||
// https://scholarspace.manoa.hawaii.edu/bitstream/handle/10125/24630/herrick.pdf | |||
// Tones fall within a range of 80–140 H (pg 21, footnote) | |||
// the mid tone (2) starts at 104 Hz (P1) and falls approximately 2 Hz at of the next four points ending at 96 Hz at P5. | |||
// | |||
// Building Tone Resources for Second Language Learners from Phonetic Documentation | |||
// https://scholarspace.manoa.hawaii.edu/bitstream/10125/24737/hirata-edds_herrick.pdf | |||
// | |||
// Pitch contours taken from Praat chart, pg 295 | |||
// | |||
// (¹) low fall: 106, 102, 97, 92, 88 | |||
// (²) low/mid: 104, 100, 98, 97, 96 | |||
// (³) high: 108, 109, 110, 111, 112 | |||
// (⁴) super high: 109, 113, 118, 122, 127 | |||
// (²³) rising: 97, 98, 101, 104, 109 | |||
// (³²) falling: 122, 120, 115, 106, 97 | |||
// A REFERENCE GRAMMAR OF OKLAHOMA CHEROKEE (2008) | |||
// Short vowels preceding /h/ are devoiced, pg 45 (how to do this espeak-ng?) | |||
// Word final vowels are nasalized | |||
// Vowels are nasalized after a nasal consonant | |||
// Vowels are either long or short; a long vowel takes | |||
// approximately twice as long to pronounce as its | |||
// short counterpart., pg 46 | |||
//int pitch1 = 0; | |||
//int pitch2 = 0; | |||
//int pitch_env = 0; | |||
//int amp_env = 0; | |||
//IF (pitch1 >= pitch2) the pitch values get swapped (???) | |||
phoneme 1 // low fall (¹) | |||
stress | |||
ipa ˨˩ | |||
//Tone(106, 88, envelope/p_fall, NULL) | |||
Tone(106, 85, envelope/p_fall, NULL) //need a pitch contour of 106-88 for a | |||
endphoneme | |||
phoneme 2 // low (²) | |||
stress | |||
ipa ˨ | |||
//Tone(104, 96, envelope/p_fall, NULL) | |||
//DF voice based | |||
Tone(105, 93, envelope/p_fall, NULL) //need a pitch contour of 104-96 for a | |||
endphoneme | |||
phoneme 3 // high (³) | |||
stress | |||
ipa ˧ | |||
//Tone(108, 112, envelope/p_rise, NULL) | |||
Tone(109, 116, envelope/p_rise, NULL) //need a pitch contour of 108-112 for a | |||
endphoneme | |||
phoneme 4 // extra high rise (⁴) | |||
stress | |||
ipa ˧˦ | |||
Tone(112, 134, envelope/p_rise, NULL) //need a pitch contour of 109-127 for a | |||
endphoneme | |||
phoneme 23 // rising (²³) | |||
stress | |||
ipa ˨˧ | |||
Tone(96, 111, envelope/p_rise, NULL) //need a pitch contour of 97-109 for a | |||
endphoneme | |||
phoneme 32 // falling (³²) | |||
stress | |||
ipa ˧˨ | |||
Tone(128, 96, envelope/p_fall, NULL) //need a pitch contour of 122-97 for a | |||
endphoneme | |||
phoneme 43 // extra high fall (⁴³) - end of word tone | |||
stress | |||
ipa ˦˧ | |||
//need a pitch contour of 122-75 for a~ | |||
//but can't seem to get espeak-ng to generate it when analysed via praat | |||
Tone(127, 71, envelope/p_fall, NULL) | |||
//length 150 | |||
endphoneme | |||
//******************************************************************* | |||
// LANGUAGE SPECIFIC CONSONANTS (PLOSIVES/STOPS) | |||
//******************************************************************* | |||
//none. | |||
//******************************************************************* | |||
// LANGUAGE SPECIFIC VOWELS | |||
//******************************************************************* | |||
//from phonemes | |||
phoneme : // lengthen previous vowel by "length" | |||
virtual | |||
length 175 | |||
endphoneme | |||
// ạ, a | |||
//from ph_en | |||
phoneme A: | |||
vwl | |||
length 175 | |||
FMT(vowel/aa_2) | |||
endphoneme | |||
// ạ, a, end of word vowel | |||
//from ph_english | |||
phoneme A~ | |||
vwl | |||
length 175 | |||
FMT(vnasal/aa_n2) | |||
endphoneme | |||
// e, ẹ | |||
//from ph_base2 | |||
phoneme E | |||
vwl | |||
length 175 | |||
FMT(vowel/ee_1) | |||
endphoneme | |||
// e, ẹ, final word vowel | |||
//from ph_konkani | |||
phoneme E~ | |||
vwl | |||
length 175 | |||
FMT(vnasal/ee_n2) | |||
endphoneme | |||
// i, ị | |||
//from ph_en | |||
phoneme I | |||
vwl | |||
length 175 | |||
FMT(vowel/ii_2) | |||
endphoneme | |||
// i, ị, final word vowel | |||
//from ph_konkani | |||
phoneme I~ | |||
vwl | |||
length 175 | |||
FMT(vnasal/ii_n) | |||
endphoneme | |||
// o, ọ | |||
//from ph_base2 | |||
phoneme o | |||
vwl | |||
length 175 | |||
FMT(vowel/o) | |||
endphoneme | |||
// o, ọ, final word vowel | |||
phoneme o~ | |||
vwl | |||
length 175 | |||
FMT(vnasal/o_n) | |||
endphoneme | |||
// u, ụ | |||
//from ph_base2 | |||
phoneme u | |||
vwl | |||
length 175 | |||
FMT(vowel/u_bck) | |||
endphoneme | |||
// u, ụ, final word vowel | |||
phoneme u~ | |||
vwl | |||
length 175 | |||
FMT(vnasal/u_n) | |||
endphoneme | |||
//ṿ, v | |||
//from ph_french | |||
phoneme W~ | |||
vwl | |||
length 175 | |||
FMT(vnasal/W_n) | |||
endphoneme | |||
// fix specific consonents to last long enough to be heard | |||
phoneme l | |||
import_phoneme base2/l | |||
length 150 | |||
endphoneme | |||
phoneme m | |||
import_phoneme base2/m | |||
length 400 | |||
endphoneme | |||
phoneme n | |||
import_phoneme base2/n | |||
length 200 | |||
endphoneme | |||
phoneme w | |||
import_phoneme base2/w | |||
length 100 | |||
endphoneme |
@@ -1824,6 +1824,9 @@ include ph_zh_yue | |||
// *** Tables extending base2 *** | |||
phonemetable chr base2 | |||
include ph_cherokee | |||
phonemetable el base2 | |||
include ph_greek | |||