5 years ago · 1af7ec6e07
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ The espeak-ng project is a fork of the espeak project.

 updated languages:

 *  chr (Cherokee) -- Michael Conrad
 *  de (German) -- Karl Eick
 *  el (Modern Greek) -- Reece Dunn (support for variant Greek letter forms)
 *  en (English) -- Steven Presser, Ben Talagan
--- a/Makefile.am
+++ b/Makefile.am
@@ -309,6 +309,7 @@ phsource/phonemes.stamp: \
 	phsource/ph_bengali \
 	phsource/ph_bulgarian \
 	phsource/ph_catalan \
 	phsource/ph_cherokee \
 	phsource/ph_consonants \
 	phsource/ph_croatian \
 	phsource/ph_czech \
@@ -470,6 +471,7 @@ dictionaries: \
 	espeak-ng-data/bpy_dict \
 	espeak-ng-data/bs_dict \
 	espeak-ng-data/ca_dict \
 	espeak-ng-data/chr_dict \
 	espeak-ng-data/cs_dict \
 	espeak-ng-data/cy_dict \
 	espeak-ng-data/da_dict \
@@ -593,6 +595,9 @@ espeak-ng-data/bs_dict: dictsource/bs_list dictsource/bs_rules dictsource/bs_ext
 ca: espeak-ng-data/ca_dict
 espeak-ng-data/ca_dict: dictsource/ca_list dictsource/ca_rules dictsource/ca_extra dictsource/ca_emoji

 chr: espeak-ng-data/chr_dict
 espeak-ng-data/chr_dict: dictsource/chr_list dictsource/chr_rules

 cs: espeak-ng-data/cs_dict
 espeak-ng-data/cs_dict: dictsource/cs_list dictsource/cs_rules dictsource/cs_extra dictsource/cs_emoji

--- a/dictsource/chr_list
+++ b/dictsource/chr_list
--- a/dictsource/chr_rules
+++ b/dictsource/chr_rules
@@ -0,0 +1,320 @@

 //tone place holders
 .L01	xb xc xcxd xdxc xd xdxf xfxd xf
 //nasal consonants
 .L02	m n
 //all consonants
 .L03	ch d g h j k l m n s t w y


 .replace

 ¹		xb
 ²		xc
 ³		xd
 ⁴		xf

 //CED pp xi-xii
 .group x
 	xb	21
 	
 	xc	2
 	
 	xcxd	23
 	xdxc	32
 	
 	xd	3
 	
 	xdxf	34
 	xfxd	43
 	
 	xf	4

 //CED page x
 .group ɂ
    ɂ ?
    
 //CED - none of vowels are dipthongs, even though the pronunciation key uses dipthongs. pg ix - footnote

 //CED pg ix
 .group a
 	//long
    a A::
    L02) a A~:
    
    //final word vowels are short, nasal, and have a highfall tone
    a (_ A~43
    L02) a (_ A~43
    axb (_	A~1
    axc (_	A~2
    axd (_	A~3
    axf (_	A~4
    
    a	(L03_	A~43
    a	(L03L03_	A~43
    a	(L03L03L03_	A~43
    
    a	(L03L01 A:
    a	(L03L03L01 A:
    a	(L03L03L03L01 A:
    
    L02)	a	(L03L01 A~
    L02)	a	(L03L03L01 A~
    L02)	a	(L03L03L03L01 A~
    
    //short followed by s has an h sound added, see CED pg x, footnote.
    as	(L01 A:hs
    as	(L03L01 A:hs
    as	(L03L03L01 A:hs
    as	(L03L03L03L01 A:hs    
    
    L02)	as	(L01 A~hs
    L02)	as	(L03L01 A~hs
    L02)	as	(L03L03L01 A~hs
    L02)	as	(L03L03L03L01 A~hs
    
 .group e
    e	E:
    L02)	e	E~:
    
    e	(L03_	E43
    e	(L03L03_	E43
    e	(L03L03L03_	E43
    e	(L03L03L03L03_	E43

 	//final word vowels are short, nasal, and have a highfall tone
    e	(_	E~43
    L02)	e	(_	E~43
    exb	(_	E~1
    exc	(_	E~2
    exd	(_	E~3
    exf	(_	E~4
    
    e	(L03L01 E
    e	(L03L03L01 E
    e	(L03L03L03L01 E

 	L02)	e	(L03L01 E~
    L02)	e	(L03L03L01 E~
    L02)	e	(L03L03L03L01 E~
    
    es	(L01 Ehs
    es	(L03L01 Ehs
    es	(L03L03L01 Ehs
    es	(L03L03L03L01 Ehs
    
    L02)	es	(L01 E~hs
    L02)	es	(L03L01 E~hs
    L02)	es	(L03L03L01 E~hs
    L02)	es	(L03L03L03L01 E~hs

 .group i
    i	I:
    L02)	i	I~:
    
 	//final word vowels are short, nasal, and have a highfall tone    
    i	(_	I~43
    L02)	i	(_	I~43
    ixb	(_	I~1
    ixc	(_	I~2
    ixd	(_	I~3
    ixf	(_	I~4
    
    
    i	(L03_	I43
    i	(L03L03_	I43
    i	(L03L03L03_	I43
    
    i	(L03L01 I
    i	(L03L03L01 I
    i	(L03L03L03L01 I
    
    L02)	i	(L03L01 I~
    L02)	i	(L03L03L01 I~
    L02)	i	(L03L03L03L01 I~
    
    is	(L01 Ihs
    is	(L03L01 Ihs
    is	(L03L03L01 Ihs
    is	(L03L03L03L01 Ihs
    
    L02)	is	(L01 I~hs
    L02)	is	(L03L01 I~hs
    L02)	is	(L03L03L01 I~hs
    L02)	is	(L03L03L03L01 I~hs
    
 .group o
    o	o:
    L02)	o	o~:
    
    o	(L03_	o43
    o	(L03L03_	o43
    o	(L03L03L03_	o43
    o	(L03L03L03L03_	o43

 	//final word vowels are short, nasal, and have a highfall tone    
    o	(_	o~43
    L02)	o	(_	o~43
    oxb	(_	o~1
    oxc	(_	o~2
    oxd	(_	o~3
    oxf	(_	o~4
    
    o	(L03L01 o
    o	(L03L03L01 o
    o	(L03L03L03L01 o
    
    L02)	o	(L03L01 o~
    L02)	o	(L03L03L01 o~
    L02)	o	(L03L03L03L01 o~
    
    os	(L01 ohs
    os	(L03L01 ohs
    os	(L03L03L01 ohs
    
    L02)	os	(L01 o~hs
    L02)	os	(L03L01 o~hs
    L02)	os	(L03L03L01 o~hs
    
 .group u
    u	u:
    L02)	u	u~:
    
    u	(L03_	u43
    u	(L03L03_	u43
    u	(L03L03L03_	u43
    u	(L03L03L03L03_	u43
    
 	//final word vowels are short, nasal, and have a highfall tone    
    u	(_	u~43
    L02)	u	(_	u~43
    uxb	(_	u~1
    uxc	(_	u~2
    uxd	(_	u~3
    uxf	(_	u~4
    
    u	(L03L01 u
    u	(L03L03L01 u
    u	(L03L03L03L01 u
    
    L02)	u	(L03L01 u~
    L02)	u	(L03L03L01 u~
    L02)	u	(L03L03L03L01 u~
    
    us	(L01 uhs
    us	(L03L01 uhs
    us	(L03L03L01 uhs
    
    L02)	us	(L01 u~hs
    L02)	us	(L03L01 u~hs
    L02)	us	(L03L03L01 u~hs
    
 .group v
    v	W~:
    
    v	(L03_	W~43
    v	(L03L03_	W~43
    v	(L03L03L03_	W~43
    
    v	(_	W~43
    vxb	(_	W~1
    vxc	(_	W~2
    vxd	(_	W~3
    vxf	(_	W~4
    
    v	(L03L01 W~
    v	(L03L03L01 W~
    v	(L03L03L03L01 W~
    
    vs	(L01 W~hs
    vs	(L03L01 W~hs
    vs	(L03L03L01 W~hs

 .group ạ
    ạ A:
    L02) ạ A~
    ạ (_ A~43
    ạs A~hs
    L02) ạs A~hs

 .group ẹ
    ẹ E
    L02) ẹ E~
    ẹ (_ E~43
    ẹs Ehs
    L02) ẹs E~hs
    
 .group ị
    ị I
    L02) ị I~
    ị (_ I~43
    ịs Ihs
    L02) ịs I~hs
    
 .group ọ
    ọ o
    L02) ọ o~
    ọ (_ o~43
    ọs ohs
    L02) ọs o~hs
    
 .group ụ
    ụ u
    L02) ụ u~
    ụ (_ u~43
    ụs uhs
    L02) ụs u~hs
    
 .group ṿ
    ṿ W~
    ṿ W~43
    ṿs W~hs

 .group ch
    ch tS
    
 .group d
    d d
    
 .group g
    g g

 .group h
    h h
    //if word ends in hn or hy, transpose the last two sounds. CED pg x footnote.
    hn (_ nh
    hn (L01_ nh
    hy (_ jh
    hy (L01_ jh
    
 .group j
     j dZ
     
 .group k
     k k
     
 .group l
     l l
     ls hls
     
 .group m
     m m
    
 .group n
     n n
     
 .group s
     s s
     
 .group t
     t t
     
 .group w
     w w
     wh (L01 w
     wh (L03 w
     
 .group y
     y j
     yh (L01 j
     yh (L03 j
--- a/docs/index.md
+++ b/docs/index.md
@@ -23,6 +23,8 @@
    - [Conlang X-SAMPA Transcription Scheme](phonemes/cxs.md)
  - Pronunciation Guides
    - [English](languages/gmw/en.md)
 - Other Miscellanous Information
  - Notes on the [Cherokee](languages/iro/chr.md) implementation.  
 - [License](../COPYING)

 ----------
--- a/docs/languages.md
+++ b/docs/languages.md
@@ -8,7 +8,7 @@ and dialects,
 [private-use extensions](https://raw.githubusercontent.com/espeak-ng/bcp47-data/master/bcp47-extensions)
 have been used.

 The 112 supported languages and accents are:
 The 113 supported languages and accents are:

 | Family Code | Identifier        | Language Family       | Language                    | Accent/Dialect         |
 |-------------|-------------------|-----------------------|-----------------------------|------------------------|
@@ -29,6 +29,7 @@ The 112 supported languages and accents are:
 | `zls`       | `bg`              | South Slavic          | Bulgarian                   |                        |
 | `sit`       | `my`              | Sino-Tibetan          | Burmese                     |                        |
 | `roa`       | `ca`              | Romance               | Catalan                     |                        |
 | `iro`       | `chr`             | Iroquoian             | Cherokee <sup>\[4\]</sup>   | Western/C.E.D.         |
 | `sit`       | `yue`             | Sino-Tibetan          | Chinese                     | Cantonese              |
 | `sit`       | `hak`             | Sino-Tibetan          | Chinese                     | Hakka                  |
 | `map`       | `haw`             | Austronesian          | Hawaiian                    |                        |
@@ -131,3 +132,5 @@ The 112 supported languages and accents are:
 \[2\] Farsi/Persian written using English (Latin) characters.

 \[3\] Currently, only Hiragana and Katakana are supported.

 \[4\] Only Cherokee-English Dictionary fully annotated UTF-8 pronunciations are supported. Syllabary is not supported.
--- a/docs/languages/iro/chr.md
+++ b/docs/languages/iro/chr.md
@@ -0,0 +1,28 @@

 === Western Cherokee

 Cherokee is a tonal language which does not use stress.

 This is an initial attempt at supporting Cherokee "DF" phonetics. (See Cherokee English Dictionary - Feeling - 1972). It does not support Syllabary or transliterated/romanized texts, neither of which indicate tone or cadence.

 The language file is configured to use stress on all syllables to mimic having no stress by having the stress equal across all syllables.

 Generally, long vowels are twice as long as their short counterparts.

 Trying to get espeak-ng's output to have the correct range on the various pitch contours has been difficult as documentation on how to do this is scarce. 

 Note: The final "High Fall" tone generally found at the end of words doesn't always start high enough and never falls low enough. My attempts at increasing the range of the pitch contour for final word vowels shows that specifying too large a range in a phonemes tone statement causes some sort of disconnect between what one is trying to specify and what is actually generated. (At least by analysis using the Praat software).

 I have also attempted to enforce a zero intonation sentence structure to try and prevent any built-in intonation rules from slaughtering the pitch contours of syllables because of their locations.

 In general, the pitch contours seem to come out sounding "flat" and "constricted" when compared with the pitch contours from actual human speech. I do not know how to fix this.

 This language should be considered as alpha level *testing* and is **not** production ready.

 TODO:

 * Devoice short vowels when followed by an 'h'.
 * Fix where the 'm' and 'n' consonants are hard to perceive.
 * Add rules to handle the C.E.D. like phonetics as used by the Raven Rock Dictionary.
 * Add a basic Eastern Cherokee dialect to account for differences of pronuncations of 's vs sh' and 'j/ch vs ts'.
 * Figure out why ',' is treated as part of a word and not a short pause and fix it.
--- a/espeak-ng-data/lang/iro/chr
+++ b/espeak-ng-data/lang/iro/chr
@@ -0,0 +1,17 @@
 name Cherokee
 language chr
 maintainer Michael Conrad <[email protected]>
 status testing
 voicing 100
 speed 60
 words 0 2
 phonemes chr
 //stress on all syllables to simulate stress on no syllables
 stressRule 9 
 intonation 3 // Less intonation, and comma does not raise the pitch.
 pitch 30 90

 tunes	chrs chrc chrq chre

 stressAmp 8 8 8 8 8 8 8 8  //stress all vowels the same
 stressLength 180 180 180 180 180 180 180 180 //stress all vowels the same
--- a/phsource/intonation
+++ b/phsource/intonation
@@ -266,3 +266,47 @@ headextend 16 82 50 32 16
 nucleus0 fall  92 8
 nucleus fall  90 77 76 8
 endtune

 tune chrs
 prehead		0	0
 headenv		fall	0
 head		1	0	0	0	0
 headextend	0	0	0	0	0	0	0	0
 nucleus		fall	0	0	0	0
 nucleus0	fall	0	0
 onset		0	0	0
 headlast	0	0	0
 endtune

 tune chrc
 prehead		0	0
 headenv		fall	0
 head		1	0	0	0	0
 headextend	0	0	0	0	0	0	0	0
 nucleus		fall	0	0	0	0
 nucleus0	fall	0	0
 onset		0	0	0
 headlast	0	0	0
 endtune

 tune chrq
 prehead		0	0
 headenv		fall	0
 head		1	0	0	0	0
 headextend	0	0	0	0	0	0	0	0
 nucleus		fall	0	0	0	0
 nucleus0	fall	0	0
 onset		0	0	0
 headlast	0	0	0
 endtune

 tune chre
 prehead		0	0
 headenv		fall	0
 head		1	0	0	0	0
 headextend	0	0	0	0	0	0	0	0
 nucleus		fall	0	0	0	0
 nucleus0	fall	0	0
 onset		0	0	0
 headlast	0	0	0
 endtune
--- a/phsource/ph_cherokee
+++ b/phsource/ph_cherokee
@@ -0,0 +1,210 @@
 //*******************************************************************
 // TONES
 //*******************************************************************

 //*******************************************************************
 // REFERENCE MATERIALS
 //*******************************************************************
 // Collaborative Documentation and Revitalization of Cherokee Tone
 // https://scholarspace.manoa.hawaii.edu/bitstream/handle/10125/24630/herrick.pdf
 // Tones fall within a range of 80–140 H (pg 21, footnote)
 // the mid tone (2) starts at 104 Hz (P1) and falls approximately 2 Hz at of the next four points ending at 96 Hz at P5.
 //
 // Building Tone Resources for Second Language Learners from Phonetic Documentation
 // https://scholarspace.manoa.hawaii.edu/bitstream/10125/24737/hirata-edds_herrick.pdf
 //
 // Pitch contours taken from Praat chart, pg 295 
 //
 // (¹) low fall: 106, 102, 97, 92, 88
 // (²) low/mid: 104, 100, 98, 97, 96
 // (³) high: 108, 109, 110, 111, 112
 // (⁴) super high: 109, 113, 118, 122, 127
 // (²³) rising: 97, 98, 101, 104, 109
 // (³²) falling: 122, 120, 115, 106, 97

 // A REFERENCE GRAMMAR OF OKLAHOMA CHEROKEE (2008)
 // Short vowels preceding /h/ are devoiced, pg 45 (how to do this espeak-ng?)
 // Word final vowels are nasalized
 // Vowels are nasalized after a nasal consonant
 // Vowels are either long or short; a long vowel takes
 // approximately twice as long to pronounce as its
 // short counterpart., pg 46

 //int pitch1 = 0;
 //int pitch2 = 0;
 //int pitch_env = 0;
 //int amp_env = 0;
 //IF (pitch1 >= pitch2) the pitch values get swapped (???)

 phoneme 1 // low fall (¹)
  stress
  ipa ˨˩
  //Tone(106, 88, envelope/p_fall, NULL)
  
 	Tone(106, 85, envelope/p_fall, NULL) //need a pitch contour of 106-88 for a
 endphoneme


 phoneme 2 // low (²)
  stress
  ipa ˨
  //Tone(104, 96, envelope/p_fall, NULL)
  //DF voice based
  Tone(105, 93, envelope/p_fall, NULL) //need a pitch contour of 104-96 for a
 endphoneme

 phoneme 3 // high (³) 
  stress
  ipa ˧
  //Tone(108, 112, envelope/p_rise, NULL)
  Tone(109, 116, envelope/p_rise, NULL) //need a pitch contour of 108-112 for a
 endphoneme

 phoneme 4 // extra high rise (⁴)
  stress
  ipa ˧˦
  Tone(112, 134, envelope/p_rise, NULL) //need a pitch contour of 109-127 for a
 endphoneme

 phoneme 23 // rising (²³)
  stress
  ipa ˨˧
  Tone(96, 111, envelope/p_rise, NULL) //need a pitch contour of 97-109 for a
 endphoneme

 phoneme 32 // falling (³²)
  stress
  ipa ˧˨
  Tone(128, 96, envelope/p_fall, NULL) //need a pitch contour of 122-97 for a
 endphoneme

 phoneme 43 // extra high fall (⁴³) - end of word tone
  stress
  ipa ˦˧
  //need a pitch contour of 122-75 for a~
  //but can't seem to get espeak-ng to generate it when analysed via praat
  Tone(127, 71, envelope/p_fall, NULL)
  //length 150
 endphoneme

 //*******************************************************************
 // LANGUAGE SPECIFIC CONSONANTS (PLOSIVES/STOPS)
 //*******************************************************************
 //none.

 //*******************************************************************
 // LANGUAGE SPECIFIC VOWELS
 //*******************************************************************

 //from phonemes
 phoneme : //  lengthen previous vowel by "length"
  virtual
  length 175
 endphoneme

 // ạ, a
 //from ph_en
 phoneme A:
  vwl
  length 175
  FMT(vowel/aa_2)
 endphoneme

 // ạ, a, end of word vowel
 //from ph_english
 phoneme A~
  vwl
  length 175
  FMT(vnasal/aa_n2)
 endphoneme

 // e, ẹ
 //from ph_base2
 phoneme E
  vwl
  length 175
  FMT(vowel/ee_1)
 endphoneme

 // e, ẹ, final word vowel
 //from ph_konkani
 phoneme E~
  vwl
  length 175
  FMT(vnasal/ee_n2)
 endphoneme

 // i, ị
 //from ph_en
 phoneme I
  vwl
  length 175
  FMT(vowel/ii_2)
 endphoneme

 // i, ị, final word vowel
 //from ph_konkani
 phoneme I~
  vwl
  length 175
  FMT(vnasal/ii_n)
 endphoneme

 // o, ọ
 //from ph_base2
 phoneme o
  vwl
  length 175
  FMT(vowel/o)
 endphoneme

 // o, ọ, final word vowel
 phoneme o~
  vwl
  length 175
  FMT(vnasal/o_n)
 endphoneme

 // u, ụ
 //from ph_base2
 phoneme u
  vwl
  length 175
  FMT(vowel/u_bck)
 endphoneme

 // u, ụ, final word vowel
 phoneme u~
  vwl
  length 175
  FMT(vnasal/u_n)
 endphoneme

 //ṿ, v
 //from ph_french
 phoneme W~
  vwl
  length 175
  FMT(vnasal/W_n)
 endphoneme

 // fix specific consonents to last long enough to be heard 
 phoneme l
 	import_phoneme base2/l
 	length 150
 endphoneme

 phoneme m
 	import_phoneme base2/m
 	length 400
 endphoneme

 phoneme n
 	import_phoneme base2/n
 	length 200
 endphoneme

 phoneme w
 	import_phoneme base2/w
 	length 100
 endphoneme
--- a/phsource/phonemes
+++ b/phsource/phonemes
@@ -1824,6 +1824,9 @@ include ph_zh_yue

 // *** Tables extending base2 ***

 phonemetable chr base2
 include ph_cherokee

 phonemetable el base2
 include ph_greek