Browse Source

Merge pull request #748

master
Valdis Vitolins 5 years ago
parent
commit
1af7ec6e07
11 changed files with 634 additions and 1 deletions
  1. 1
    0
      CHANGELOG.md
  2. 5
    0
      Makefile.am
  3. 0
    0
      dictsource/chr_list
  4. 320
    0
      dictsource/chr_rules
  5. 2
    0
      docs/index.md
  6. 4
    1
      docs/languages.md
  7. 28
    0
      docs/languages/iro/chr.md
  8. 17
    0
      espeak-ng-data/lang/iro/chr
  9. 44
    0
      phsource/intonation
  10. 210
    0
      phsource/ph_cherokee
  11. 3
    0
      phsource/phonemes

+ 1
- 0
CHANGELOG.md View File

@@ -11,6 +11,7 @@ The espeak-ng project is a fork of the espeak project.

updated languages:

* chr (Cherokee) -- Michael Conrad
* de (German) -- Karl Eick
* el (Modern Greek) -- Reece Dunn (support for variant Greek letter forms)
* en (English) -- Steven Presser, Ben Talagan

+ 5
- 0
Makefile.am View File

@@ -309,6 +309,7 @@ phsource/phonemes.stamp: \
phsource/ph_bengali \
phsource/ph_bulgarian \
phsource/ph_catalan \
phsource/ph_cherokee \
phsource/ph_consonants \
phsource/ph_croatian \
phsource/ph_czech \
@@ -470,6 +471,7 @@ dictionaries: \
espeak-ng-data/bpy_dict \
espeak-ng-data/bs_dict \
espeak-ng-data/ca_dict \
espeak-ng-data/chr_dict \
espeak-ng-data/cs_dict \
espeak-ng-data/cy_dict \
espeak-ng-data/da_dict \
@@ -593,6 +595,9 @@ espeak-ng-data/bs_dict: dictsource/bs_list dictsource/bs_rules dictsource/bs_ext
ca: espeak-ng-data/ca_dict
espeak-ng-data/ca_dict: dictsource/ca_list dictsource/ca_rules dictsource/ca_extra dictsource/ca_emoji

chr: espeak-ng-data/chr_dict
espeak-ng-data/chr_dict: dictsource/chr_list dictsource/chr_rules

cs: espeak-ng-data/cs_dict
espeak-ng-data/cs_dict: dictsource/cs_list dictsource/cs_rules dictsource/cs_extra dictsource/cs_emoji


+ 0
- 0
dictsource/chr_list View File


+ 320
- 0
dictsource/chr_rules View File

@@ -0,0 +1,320 @@

//tone place holders
.L01 xb xc xcxd xdxc xd xdxf xfxd xf
//nasal consonants
.L02 m n
//all consonants
.L03 ch d g h j k l m n s t w y


.replace

¹ xb
² xc
³ xd
⁴ xf

//CED pp xi-xii
.group x
xb 21
xc 2
xcxd 23
xdxc 32
xd 3
xdxf 34
xfxd 43
xf 4

//CED page x
.group ɂ
ɂ ?
//CED - none of vowels are dipthongs, even though the pronunciation key uses dipthongs. pg ix - footnote

//CED pg ix
.group a
//long
a A::
L02) a A~:
//final word vowels are short, nasal, and have a highfall tone
a (_ A~43
L02) a (_ A~43
axb (_ A~1
axc (_ A~2
axd (_ A~3
axf (_ A~4
a (L03_ A~43
a (L03L03_ A~43
a (L03L03L03_ A~43
a (L03L01 A:
a (L03L03L01 A:
a (L03L03L03L01 A:
L02) a (L03L01 A~
L02) a (L03L03L01 A~
L02) a (L03L03L03L01 A~
//short followed by s has an h sound added, see CED pg x, footnote.
as (L01 A:hs
as (L03L01 A:hs
as (L03L03L01 A:hs
as (L03L03L03L01 A:hs
L02) as (L01 A~hs
L02) as (L03L01 A~hs
L02) as (L03L03L01 A~hs
L02) as (L03L03L03L01 A~hs
.group e
e E:
L02) e E~:
e (L03_ E43
e (L03L03_ E43
e (L03L03L03_ E43
e (L03L03L03L03_ E43

//final word vowels are short, nasal, and have a highfall tone
e (_ E~43
L02) e (_ E~43
exb (_ E~1
exc (_ E~2
exd (_ E~3
exf (_ E~4
e (L03L01 E
e (L03L03L01 E
e (L03L03L03L01 E

L02) e (L03L01 E~
L02) e (L03L03L01 E~
L02) e (L03L03L03L01 E~
es (L01 Ehs
es (L03L01 Ehs
es (L03L03L01 Ehs
es (L03L03L03L01 Ehs
L02) es (L01 E~hs
L02) es (L03L01 E~hs
L02) es (L03L03L01 E~hs
L02) es (L03L03L03L01 E~hs

.group i
i I:
L02) i I~:
//final word vowels are short, nasal, and have a highfall tone
i (_ I~43
L02) i (_ I~43
ixb (_ I~1
ixc (_ I~2
ixd (_ I~3
ixf (_ I~4
i (L03_ I43
i (L03L03_ I43
i (L03L03L03_ I43
i (L03L01 I
i (L03L03L01 I
i (L03L03L03L01 I
L02) i (L03L01 I~
L02) i (L03L03L01 I~
L02) i (L03L03L03L01 I~
is (L01 Ihs
is (L03L01 Ihs
is (L03L03L01 Ihs
is (L03L03L03L01 Ihs
L02) is (L01 I~hs
L02) is (L03L01 I~hs
L02) is (L03L03L01 I~hs
L02) is (L03L03L03L01 I~hs
.group o
o o:
L02) o o~:
o (L03_ o43
o (L03L03_ o43
o (L03L03L03_ o43
o (L03L03L03L03_ o43

//final word vowels are short, nasal, and have a highfall tone
o (_ o~43
L02) o (_ o~43
oxb (_ o~1
oxc (_ o~2
oxd (_ o~3
oxf (_ o~4
o (L03L01 o
o (L03L03L01 o
o (L03L03L03L01 o
L02) o (L03L01 o~
L02) o (L03L03L01 o~
L02) o (L03L03L03L01 o~
os (L01 ohs
os (L03L01 ohs
os (L03L03L01 ohs
L02) os (L01 o~hs
L02) os (L03L01 o~hs
L02) os (L03L03L01 o~hs
.group u
u u:
L02) u u~:
u (L03_ u43
u (L03L03_ u43
u (L03L03L03_ u43
u (L03L03L03L03_ u43
//final word vowels are short, nasal, and have a highfall tone
u (_ u~43
L02) u (_ u~43
uxb (_ u~1
uxc (_ u~2
uxd (_ u~3
uxf (_ u~4
u (L03L01 u
u (L03L03L01 u
u (L03L03L03L01 u
L02) u (L03L01 u~
L02) u (L03L03L01 u~
L02) u (L03L03L03L01 u~
us (L01 uhs
us (L03L01 uhs
us (L03L03L01 uhs
L02) us (L01 u~hs
L02) us (L03L01 u~hs
L02) us (L03L03L01 u~hs
.group v
v W~:
v (L03_ W~43
v (L03L03_ W~43
v (L03L03L03_ W~43
v (_ W~43
vxb (_ W~1
vxc (_ W~2
vxd (_ W~3
vxf (_ W~4
v (L03L01 W~
v (L03L03L01 W~
v (L03L03L03L01 W~
vs (L01 W~hs
vs (L03L01 W~hs
vs (L03L03L01 W~hs

.group ạ
ạ A:
L02) ạ A~
ạ (_ A~43
ạs A~hs
L02) ạs A~hs

.group ẹ
ẹ E
L02) ẹ E~
ẹ (_ E~43
ẹs Ehs
L02) ẹs E~hs
.group ị
ị I
L02) ị I~
ị (_ I~43
ịs Ihs
L02) ịs I~hs
.group ọ
ọ o
L02) ọ o~
ọ (_ o~43
ọs ohs
L02) ọs o~hs
.group ụ
ụ u
L02) ụ u~
ụ (_ u~43
ụs uhs
L02) ụs u~hs
.group ṿ
ṿ W~
ṿ W~43
ṿs W~hs

.group ch
ch tS
.group d
d d
.group g
g g

.group h
h h
//if word ends in hn or hy, transpose the last two sounds. CED pg x footnote.
hn (_ nh
hn (L01_ nh
hy (_ jh
hy (L01_ jh
.group j
j dZ
.group k
k k
.group l
l l
ls hls
.group m
m m
.group n
n n
.group s
s s
.group t
t t
.group w
w w
wh (L01 w
wh (L03 w
.group y
y j
yh (L01 j
yh (L03 j

+ 2
- 0
docs/index.md View File

@@ -23,6 +23,8 @@
- [Conlang X-SAMPA Transcription Scheme](phonemes/cxs.md)
- Pronunciation Guides
- [English](languages/gmw/en.md)
- Other Miscellanous Information
- Notes on the [Cherokee](languages/iro/chr.md) implementation.
- [License](../COPYING)

----------

+ 4
- 1
docs/languages.md View File

@@ -8,7 +8,7 @@ and dialects,
[private-use extensions](https://raw.githubusercontent.com/espeak-ng/bcp47-data/master/bcp47-extensions)
have been used.

The 112 supported languages and accents are:
The 113 supported languages and accents are:

| Family Code | Identifier | Language Family | Language | Accent/Dialect |
|-------------|-------------------|-----------------------|-----------------------------|------------------------|
@@ -29,6 +29,7 @@ The 112 supported languages and accents are:
| `zls` | `bg` | South Slavic | Bulgarian | |
| `sit` | `my` | Sino-Tibetan | Burmese | |
| `roa` | `ca` | Romance | Catalan | |
| `iro` | `chr` | Iroquoian | Cherokee <sup>\[4\]</sup> | Western/C.E.D. |
| `sit` | `yue` | Sino-Tibetan | Chinese | Cantonese |
| `sit` | `hak` | Sino-Tibetan | Chinese | Hakka |
| `map` | `haw` | Austronesian | Hawaiian | |
@@ -131,3 +132,5 @@ The 112 supported languages and accents are:
\[2\] Farsi/Persian written using English (Latin) characters.

\[3\] Currently, only Hiragana and Katakana are supported.

\[4\] Only Cherokee-English Dictionary fully annotated UTF-8 pronunciations are supported. Syllabary is not supported.

+ 28
- 0
docs/languages/iro/chr.md View File

@@ -0,0 +1,28 @@

=== Western Cherokee

Cherokee is a tonal language which does not use stress.

This is an initial attempt at supporting Cherokee "DF" phonetics. (See Cherokee English Dictionary - Feeling - 1972). It does not support Syllabary or transliterated/romanized texts, neither of which indicate tone or cadence.

The language file is configured to use stress on all syllables to mimic having no stress by having the stress equal across all syllables.

Generally, long vowels are twice as long as their short counterparts.

Trying to get espeak-ng's output to have the correct range on the various pitch contours has been difficult as documentation on how to do this is scarce.

Note: The final "High Fall" tone generally found at the end of words doesn't always start high enough and never falls low enough. My attempts at increasing the range of the pitch contour for final word vowels shows that specifying too large a range in a phonemes tone statement causes some sort of disconnect between what one is trying to specify and what is actually generated. (At least by analysis using the Praat software).

I have also attempted to enforce a zero intonation sentence structure to try and prevent any built-in intonation rules from slaughtering the pitch contours of syllables because of their locations.

In general, the pitch contours seem to come out sounding "flat" and "constricted" when compared with the pitch contours from actual human speech. I do not know how to fix this.

This language should be considered as alpha level *testing* and is **not** production ready.

TODO:

* Devoice short vowels when followed by an 'h'.
* Fix where the 'm' and 'n' consonants are hard to perceive.
* Add rules to handle the C.E.D. like phonetics as used by the Raven Rock Dictionary.
* Add a basic Eastern Cherokee dialect to account for differences of pronuncations of 's vs sh' and 'j/ch vs ts'.
* Figure out why ',' is treated as part of a word and not a short pause and fix it.

+ 17
- 0
espeak-ng-data/lang/iro/chr View File

@@ -0,0 +1,17 @@
name Cherokee
language chr
maintainer Michael Conrad <[email protected]>
status testing
voicing 100
speed 60
words 0 2
phonemes chr
//stress on all syllables to simulate stress on no syllables
stressRule 9
intonation 3 // Less intonation, and comma does not raise the pitch.
pitch 30 90

tunes chrs chrc chrq chre

stressAmp 8 8 8 8 8 8 8 8 //stress all vowels the same
stressLength 180 180 180 180 180 180 180 180 //stress all vowels the same

+ 44
- 0
phsource/intonation View File

@@ -266,3 +266,47 @@ headextend 16 82 50 32 16
nucleus0 fall 92 8
nucleus fall 90 77 76 8
endtune

tune chrs
prehead 0 0
headenv fall 0
head 1 0 0 0 0
headextend 0 0 0 0 0 0 0 0
nucleus fall 0 0 0 0
nucleus0 fall 0 0
onset 0 0 0
headlast 0 0 0
endtune

tune chrc
prehead 0 0
headenv fall 0
head 1 0 0 0 0
headextend 0 0 0 0 0 0 0 0
nucleus fall 0 0 0 0
nucleus0 fall 0 0
onset 0 0 0
headlast 0 0 0
endtune

tune chrq
prehead 0 0
headenv fall 0
head 1 0 0 0 0
headextend 0 0 0 0 0 0 0 0
nucleus fall 0 0 0 0
nucleus0 fall 0 0
onset 0 0 0
headlast 0 0 0
endtune

tune chre
prehead 0 0
headenv fall 0
head 1 0 0 0 0
headextend 0 0 0 0 0 0 0 0
nucleus fall 0 0 0 0
nucleus0 fall 0 0
onset 0 0 0
headlast 0 0 0
endtune

+ 210
- 0
phsource/ph_cherokee View File

@@ -0,0 +1,210 @@
//*******************************************************************
// TONES
//*******************************************************************

//*******************************************************************
// REFERENCE MATERIALS
//*******************************************************************
// Collaborative Documentation and Revitalization of Cherokee Tone
// https://scholarspace.manoa.hawaii.edu/bitstream/handle/10125/24630/herrick.pdf
// Tones fall within a range of 80–140 H (pg 21, footnote)
// the mid tone (2) starts at 104 Hz (P1) and falls approximately 2 Hz at of the next four points ending at 96 Hz at P5.
//
// Building Tone Resources for Second Language Learners from Phonetic Documentation
// https://scholarspace.manoa.hawaii.edu/bitstream/10125/24737/hirata-edds_herrick.pdf
//
// Pitch contours taken from Praat chart, pg 295
//
// (¹) low fall: 106, 102, 97, 92, 88
// (²) low/mid: 104, 100, 98, 97, 96
// (³) high: 108, 109, 110, 111, 112
// (⁴) super high: 109, 113, 118, 122, 127
// (²³) rising: 97, 98, 101, 104, 109
// (³²) falling: 122, 120, 115, 106, 97

// A REFERENCE GRAMMAR OF OKLAHOMA CHEROKEE (2008)
// Short vowels preceding /h/ are devoiced, pg 45 (how to do this espeak-ng?)
// Word final vowels are nasalized
// Vowels are nasalized after a nasal consonant
// Vowels are either long or short; a long vowel takes
// approximately twice as long to pronounce as its
// short counterpart., pg 46

//int pitch1 = 0;
//int pitch2 = 0;
//int pitch_env = 0;
//int amp_env = 0;
//IF (pitch1 >= pitch2) the pitch values get swapped (???)

phoneme 1 // low fall (¹)
stress
ipa ˨˩
//Tone(106, 88, envelope/p_fall, NULL)
Tone(106, 85, envelope/p_fall, NULL) //need a pitch contour of 106-88 for a
endphoneme


phoneme 2 // low (²)
stress
ipa ˨
//Tone(104, 96, envelope/p_fall, NULL)
//DF voice based
Tone(105, 93, envelope/p_fall, NULL) //need a pitch contour of 104-96 for a
endphoneme

phoneme 3 // high (³)
stress
ipa ˧
//Tone(108, 112, envelope/p_rise, NULL)
Tone(109, 116, envelope/p_rise, NULL) //need a pitch contour of 108-112 for a
endphoneme

phoneme 4 // extra high rise (⁴)
stress
ipa ˧˦
Tone(112, 134, envelope/p_rise, NULL) //need a pitch contour of 109-127 for a
endphoneme

phoneme 23 // rising (²³)
stress
ipa ˨˧
Tone(96, 111, envelope/p_rise, NULL) //need a pitch contour of 97-109 for a
endphoneme

phoneme 32 // falling (³²)
stress
ipa ˧˨
Tone(128, 96, envelope/p_fall, NULL) //need a pitch contour of 122-97 for a
endphoneme

phoneme 43 // extra high fall (⁴³) - end of word tone
stress
ipa ˦˧
//need a pitch contour of 122-75 for a~
//but can't seem to get espeak-ng to generate it when analysed via praat
Tone(127, 71, envelope/p_fall, NULL)
//length 150
endphoneme

//*******************************************************************
// LANGUAGE SPECIFIC CONSONANTS (PLOSIVES/STOPS)
//*******************************************************************
//none.

//*******************************************************************
// LANGUAGE SPECIFIC VOWELS
//*******************************************************************

//from phonemes
phoneme : // lengthen previous vowel by "length"
virtual
length 175
endphoneme

// ạ, a
//from ph_en
phoneme A:
vwl
length 175
FMT(vowel/aa_2)
endphoneme

// ạ, a, end of word vowel
//from ph_english
phoneme A~
vwl
length 175
FMT(vnasal/aa_n2)
endphoneme

// e, ẹ
//from ph_base2
phoneme E
vwl
length 175
FMT(vowel/ee_1)
endphoneme

// e, ẹ, final word vowel
//from ph_konkani
phoneme E~
vwl
length 175
FMT(vnasal/ee_n2)
endphoneme

// i, ị
//from ph_en
phoneme I
vwl
length 175
FMT(vowel/ii_2)
endphoneme

// i, ị, final word vowel
//from ph_konkani
phoneme I~
vwl
length 175
FMT(vnasal/ii_n)
endphoneme

// o, ọ
//from ph_base2
phoneme o
vwl
length 175
FMT(vowel/o)
endphoneme

// o, ọ, final word vowel
phoneme o~
vwl
length 175
FMT(vnasal/o_n)
endphoneme

// u, ụ
//from ph_base2
phoneme u
vwl
length 175
FMT(vowel/u_bck)
endphoneme

// u, ụ, final word vowel
phoneme u~
vwl
length 175
FMT(vnasal/u_n)
endphoneme

//ṿ, v
//from ph_french
phoneme W~
vwl
length 175
FMT(vnasal/W_n)
endphoneme

// fix specific consonents to last long enough to be heard
phoneme l
import_phoneme base2/l
length 150
endphoneme

phoneme m
import_phoneme base2/m
length 400
endphoneme

phoneme n
import_phoneme base2/n
length 200
endphoneme

phoneme w
import_phoneme base2/w
length 100
endphoneme

+ 3
- 0
phsource/phonemes View File

@@ -1824,6 +1824,9 @@ include ph_zh_yue

// *** Tables extending base2 ***

phonemetable chr base2
include ph_cherokee

phonemetable el base2
include ph_greek


Loading…
Cancel
Save