4 years ago · be962b067b
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -68,6 +68,7 @@ new languages:
 *  haw (Hawaiian) -- Valdis Vitolins
 *  he (Hebrew) -- boracasli98, Valdis Vitolins
 *  io (Ido) -- José Miguel López, Valdis Vitolins
 *  lb (Luxembourgish) -- Marco Barnig, Valdis Vitolins
 *  ltg (Latgalian) -- Valdis Vitolins
 *  nog (Nogai) -- boracasli98, Valdis Vitolins
 *  piqd (Klingon) -- Valdis Vitolins
--- a/Makefile.am
+++ b/Makefile.am
@@ -390,6 +390,7 @@ phsource/phonemes.stamp: \
 	phsource/ph_langbelta \
 	phsource/ph_latin \
 	phsource/ph_latvian \
 	phsource/ph_luxembourgish \
 	phsource/ph_lithuanian \
 	phsource/ph_lojban \
 	phsource/ph_lule_saami \
@@ -567,6 +568,7 @@ dictionaries: \
 	espeak-ng-data/ku_dict \
 	espeak-ng-data/ky_dict \
 	espeak-ng-data/la_dict \
 	espeak-ng-data/lb_dict \
 	espeak-ng-data/lfn_dict \
 	espeak-ng-data/lt_dict \
 	espeak-ng-data/lv_dict \
@@ -789,6 +791,9 @@ espeak-ng-data/ky_dict: dictsource/ky_list dictsource/ky_rules dictsource/ky_ext
 la: espeak-ng-data/la_dict
 espeak-ng-data/la_dict: dictsource/la_list dictsource/la_rules dictsource/la_extra

 lb: espeak-ng-data/lb_dict
 espeak-ng-data/lb_dict: dictsource/lb_list dictsource/lb_rules dictsource/lb_emoji

 lfn: espeak-ng-data/lfn_dict
 espeak-ng-data/lfn_dict: dictsource/lfn_list dictsource/lfn_rules dictsource/lfn_extra

--- a/dictsource/lb_emoji
+++ b/dictsource/lb_emoji
@@ -0,0 +1,37 @@
 // espeak-ng-lb
 // lb_emoji
 // created by Marco Barnig  [email protected]
 // first version : 30.9.2021
 // Emoji's and other symbol pronunciations for Luxembourgish
 // full list of emoji's : https://unicode.org/emoji/charts/full-emoji-list.html
 //
 // pictographs
 ⌨  tasta:tur
 //
 // Emojis
 🕰️  TSaIt
 🧭  nort
 💨  vant
 🌞  zon
 💪  Sta:rk
 🚶‍♂️ vand@EreR
 🧥 mant@El
 🛤️  ve:
 🖕️  eS
 👬️  Sa:rel, toma:
 👦  asto:r
 👧  kapuzin
 👩  tessa
 🎪  TSirkus
 🦍  gorila:
 🦒  giraf
 🐘  elefant
 🦏  rino:zerus
 //
 // Symbols
 + pluz
 - minuz
 & an
 € eUro
 $ dola:r
 £ pont
--- a/dictsource/lb_list
+++ b/dictsource/lb_list
--- a/dictsource/lb_rules
+++ b/dictsource/lb_rules
@@ -0,0 +1,341 @@
 // espeak-ng
 // Lëtzebuergesch
 // lb_rules
 // created by Marco Barnig  ([email protected])
 // First version 15.9.2018
 // Last update : 8.11.2021
 // 
 // References :
 // Peter Gilles; Jürgen Trouvain
 // International Journal of the IPA
 //
 // Wikipedia
 //
 // https://infolux.uni.lu
 // https://github.com/PeterGilles/Luxembourgish-language-resources
 // https://lod.lu
 // https://ortho.lod.lu
 // https://en.wikipedia.org/wiki/Luxembourgish
 // https://en.wikipedia.org/wiki/Luxembourgish_phonology
 // https://en.wikipedia.org/wiki/Help:IPA/Luxembourgish
 //
 //=========================================================
 // Lëtzebuergesch
 //=========================================================
 //
 // Vokaler
 // lëtzebuergesch Vokal Buchstawen : a, ä, e, é, ë, i, o, u, y
 // zousätzlech deitsch Vokal Buchstawen : ö, ü
 // zousätzlech franséisch Vokal Buchstawen : à, â, è, ê, ï, ô, û
 // Vokal Phonemen IPA :         ɑ, aː, ɛː, e, æ, eː,  ə,  ɐ, i, iː,  o, oː, u, uː, y:,  ãː, ɛ̃ː, õː, œː 
 // Vokal Phonemen-ID's eSpeak : A, aA, aE, e, E, eE, @E, eR, I, iI, O, oO, U, uU, UE, eA, iA, oA, OU

 // lëtzebuergesch Vokalkoppel Buchstawen : ai, au, äi, ei, eu, éi, ie, ou, ue
 // Vokalkoppel Phonemen IPA :        æːɪ, ɑʊ, æːʊ, ɑɪ, ɜɪ, oɪ, iə, əʊ, uə     
 // Vokalkoppel Phonemen-ID's eSpeak : aI, aU,  AU, eI, OI, eU, iE, oU, uE 
 // Total : 30 phonemes

 .group a
           a     a          // a : IPA ɑ
 	_) an (_  an	     // an : IPA ɑn
 	_) af (_  a:f         // Af ; IPA aː
 	C) a (C_  a:         // Kap  : IPA aː
 	   a  (CC a          // Kapp, blann  : IPA ɑ
 	   a  (CA a:         // Fabel  IPA : aː
 	   age    arR@E
 	   aller  al@Er
 	   awer   a:v@Er

 .group aa
 	   aa	   a:        // aacht, naass  : IPA aː
 	   aach    a:x

 .group ai
 	   ai      eI        // daierlech  : IPA ɑɪ
 	  
 .group au
 	   au      aU         // raumen, Auto : IPA ɑʊ
 	   
 .group ä
 	   ä	 aE           // Kären : IPA ɛː
 	   ä (CC e            // Männer, Käpp  : IPA æ
 	   äi	 aI           // Zäit, wäiss, räich : IPA : æːɪ 
 	   är    aEeR	      // Stär  :IPA ɛːɐ
 	   äer   aEeR	      // Päerd  : IPA ɛːɐ

 .group e
           e    @E            // e : IPA ə
 	C) e (C  e:           // Meter  : IPA eː
 	   er 	 eR           // Kanner, Auer : IPA ɐ
           ech  @EX          // ech  : IPA əɕ
        C) ech  EX

 .group ee
 	   ee	  e:           // Keess : IPA eː
 	   eens   e:ns         // eens I IPA eːns
 	   eech   e:X

 .group ei 
           ei	  eI           // Leit, Weis, deier : IPA ɑɪ

 .group eu
 	   eu     eU           // Euro : IPA oɪ 

 .group é
 	   é     E            // Méck, drécken : IPA e
 	   ée    e:           // Arrivée : IPA eː
 	   éi	 OI           // schéin, fréi : IPA ɜɪ
 	   
 .group ë
 	_) ë     oE           // ëffentlech  : IPA ø
 	   ë     @E	       // hëllefen : IPA ə

 .group i
           i     i            // i : IPA i
        _) i (_  i:           // Buchstaw i
 	C) i (C  i:            // riseg, siwen : IPA iː
 	C) i (CC i             // midd, Hiwwel  : IPA i
 	   ir    i:eR	       // Stir :  IPA iːR
 	   ier   ieR	       // Biergem  : IPA iːR
 	   iich  i:X
 	   issen is@En
 	 
 .group ii
 	   ii	 i:            // liicht  : IPA iː
 	   
 .group io
 	   iou  ioU            // Okkasioun : IPA iəʊ
 	   
 .group ie
 	   ie    iE            // Wieder, hien : IPA iə

 .group o
           o    o             // o : IPA o
        _) o (_ o:            // Buchstaw o
           o (_ o:            // Auto : IPA oː
 	   o (C  o:           // Schof, droleg  : IPA oː
 	   o (CC o            // Loft, Sonn  : IPA o
 	   
 .group oo
 	   oo	  o:           // Sprooch  : IPA  oː
 	   ooch   o:x
 	   
 .group ou
 	   ou	  oU           // Schoul, Okkasioun  : IPA əʊ
 	   
 .group oy
 	   oy     eU           // boy  : IPA oɪ

 .group u
            u    u             // u : IPA u
         _) u (_ u:            // Buchstaw u
 	    u (C  u:            // Tut, Bur : IPA uː
 	    u (CC u             // gutt  : IPA u
 	    um    um            // um  : IPA um
 	    uge   u:rR@E
 	   
 .group uu
 	   uu	 u:            // Duuscht, Luucht : IPA uː	
 	   uuch  u:x   	   
 	   
 .group ue
 	   ue	 uE            // Buedem, Wued  : IPA uə

 .group y 
 	   y    i 		// Syntheess  : IPA i
 	_) y (_ ipzilon	// Buchstaw y
 	_) y    J              // Yoga  : IPA ʒ
 	   y (_ i:             // Baby  : IPA iː

 .group
 	   ö    OU             // lösen, blöd, können : IPA œː
 	   ü    y:             // Süden, für  : IPA y:
 	   è    e:             // IPA eː
 	   à    a:	        // IPA aː
 	   â    a:		// IPA aː
 	   ê    e:		// IPA ɛː
 	   ô    o:		// IPA oː
 	   û    u:		// IPA uː


 // Konsonanten : Total 27 phonemes
 // lëtzebuergesch Konsonanten Buchstawen : b, c, d, f, g, h, j, k, l, m, n, p, q, r, s, t, v, w, x, z
 // lëtzebuergesch Konsonanten IPA Phonemen :         b, c, d, f, g, h, j, k, l, m, n, ŋ, p, R, s, t, v, w, z, ʒ, ʁ, ʑ, ʦ,  ʃ, dʒ, X, ɕ
 // lëtzebuergesch Konsonanten Phonemen-ID's eSpeak : b, c, d, f, g, h, j, k, l, m, n, N, p, r, s, t, v, w, z, J, rR, Z, TS, S, dZ, x, X
 // 
 //// Nasal Buchstawen : m, n
 // Nasal Phonemen : m, n, ŋ

 .group m
 	_) m (_  em		   // Buchstaw m
 	   m	 m                // Ham  : IPA m
 	   mm	 m                // Mamm : IPA m
 	   mat   mat
 	   mir   mir

 .group n
 	_) n (_  en		   // Buchstaw n
 	   n	n                 // Nues : IPA n
 	   nen  n@En		   // ..nen : IPA nən
 	   nnen n@En
 	   nn 	n                 // Mann : IPA n
 	   ng	N                 // keng : IPA ŋ
 	   nk	N		   // IPA : ŋ

 // Plosiv Buchstawen : p, b, t, d, k, g, q, x
 // Plosiv Phonemen : p, b, t, d, k, g, ʒ, ʁ, X
 .group p
 	_) p (_  pe:		   // Buchstaw p
 	   p	p                 // paken : IPA p
 	   pp	p                 // Papp  : IPA p
 	   pen  pEn

 .group b
 	_) b (_  be:		   // Buchstaw b
 	   b	b                 // baken  : IPA b
 	   bb	b                 // labber : IPA b

 .group t
 	_) t (_  te:		   // Buchstaw t
 	   t	t                 // Téi    : IPA t
 	   tt	t                 // Blutt  : IPA t
 	   tel  t@El
 	   tten t@En

 .group d
 	_) d (_  de:		   // Buchstaw d
 	   d	 d                 // Däiwel  : IPA d
 	   dd	 d                 // Brudder : IPA d
 	C) d     t                 // Nord, Wand : IPA t
 	   den   d@En              // den : IPA dən
 	   dden  d@En
 	   der   deR		    // der : IPA dɐ
 	   derer d@EreR             // Wanderer : 

 .group k
 	_) k (_  ka:		   // Buchstaw k
 	   k     k                 // Keess  : IPA k
 	   kk    k                 // Okkasioun : IPA k
 	   ks    ks      
        

 // De Buchstaw g huet siwen verschidden Aussproochen !

 .L01 e i y
 	
 .group g
 	_) g (_  ge:		   // Buchstaw g
        A) g  (_   k               // Drog  : IPA k
           g  (L01 J               // Spigel : IPA ʒ
 	a) g  (_   x		    // Dag  : IPA X
 	u) g  (_   x		    // Dug  : IPA X
 	   ge (_   S		    // Plage : IPA ʃ
        u) g  (e   rR              // Kugel, Jugend  : IPA ʁ
 	a) g  (e   rR		    // Lager : IPA ʁ
 	   g	   g               // Gees, goen, Drogen, Negatioun  : IPA g
 	   ge (C   g@E
 	   gin     gin

 .group q
 	_) q (_  ku:		    // Buchstaw q
 	_) qu    kw                // Quell : IPA kw
 	   q     k                 

 .group x
 	_) x (_  ikz		   // Buchstaw x
 	   x	 ks                // Box : IPA ks
 	

 // Frikativ Buchstawen : f, v, s, z, h, c
 // zousätzlech deitsch Buchstawen : ß
 // zousätzlech franséisch Buchstawen : ç
 // Frikativ Phonemen : f, v, s, z, ʦ, h, ʃ, dʒ, χ

 .group f
 	_) f (_  ef		   // Buchstaw f
 	   f	 f                 // Fësch  : IPA f
 	   ff	 f                 // Peffer : IPA f
 	   fir   fieR              // fir  : IPA fiɐ
 	   fen   f@En

 .group v
 	_) v (_  faU		   // Buchstaw v
 	   v	  v               // Vakanz  : IPA v
 	   vun   fun

 .group s
 	_) s (_  ez		   // Buchstaw s
 	   s	z                 // Tasen, Summer  : IPA z
 	   ss	s                 // Taass  : IPA s
 	   sch  S                 // schéin : IPA ʃ
 	   setz zETS
 	   sen  s@En
 	  
 .group st 
        _) st (A  S              // staark  : IPA ʃt
           str    Sr             // stramm : IPA ʃr

 .group ß
 	_) ß (_  eszet		   // Buchstaw ß
 	   ß    s  		   // IPA s

 .group z
 	_) z (_  zet		   // Buchstaw z
 	   z	TS                // Zuch, schwätzen : IPA ʦ
 	   ze   TS@E

 .group h
 	_) h (_  ha:		   // Buchstaw h
 	   h	 h                 // hei, haut  : IPA h

 .group c
 	_) c (_  ze:		   // Buchstaw c
 	   c	 k                 // Coca Cola  : IPA k

 .group ç
 	   ç    s                 // François  : IPA s

 .group ce
 	   ce	s                 // Glace  : IPA s

 .group ch
        i) ch	S                 // liicht  : IPA ʃ
        a) ch	x                 // aacht   : IPA X 
        u) ch	x                 // Kuch    : IPA X  

 .group ck
 	   ck	k                 // kucken : IPA k
 	   ckelt  k@Elt 

 .group w
 	_) w (_  ve:		   // Buchstaw fw
 	   w	v                 // wëschen, Hiwwel  : IPA v
 	   ww   v		   // Hiwwel  : IPA v
 	   wwert  iv@Ert                

 	
 // Approximant Buchstawen : j, l
 // Approximant Phonemen : j, ʒ, l

 .group j
 	_) j (_  jot		   // Buchstaw j
 	   j	j                // Juli, jäizen : IPA j
        i) j	J                // héijen  : IPA ʒ
  

 .group l
 	_) l (_  el		   // Buchstaw l
 	   log   lo:x
 	   loge  lo:rR@E
 	   lag   la:x
 	   lage  la:rR@E
 	   l	l                // Loft  : IPA l
 	   ll	l                // kill  : IPA l

 // Trill Buchstaw : r
 // Trill Phonem : r

 .group r
 	_) r (_  er		  // Buchstaw r
 	   r	r   		  // Rou  : IPA R
 	   rr	r  		  // Arrivée  : IPA R
--- a/docs/languages.md
+++ b/docs/languages.md
@@ -1,6 +1,6 @@
 # Languages

 Development version of eSpeak NG supports 126 languages and accents,
 Development version of eSpeak NG supports 127 languages and accents,
 which are listed in table below.

 To check, what languages are supported by distributed version, run `espeak-ng --voices` command.
@@ -75,6 +75,7 @@ To check, what languages are supported by distributed version, run `espeak-ng --
 | `trk`       | `kk`              | Turkic                | Kazakh                      |                        |
 | `trk`       | `ky`              | Turkic                | Kyrgyz                      |                        |
 | `itc`       | `la`              | Italic                | Latin                       |                        |
 | `gmw`       | `lb`              | West Germanic         | Luxembourgish               |                        |
 | `bat`       | `ltg`             | Baltic                | Latgalian                   |                        |
 | `bat`       | `lv`              | Baltic                | Latvian                     |                        |
 | `art`       | `lfn`             | Constructed           | Lingua Franca Nova<sup>\[1,2\]</sup> |               |
--- a/docs/languages/gmw/lb.md
+++ b/docs/languages/gmw/lb.md
@@ -0,0 +1,374 @@
 ## Introduction
 The present fork adds [Luxembourgish](https://en.wikipedia.org/wiki/Luxembourgish) as 127th language to the eSpeak-NG text-to-speech (TTS) synthesizer (version.1.50.1).

 The main purpose of the project is the creation of a rule based International Phonetic Alphabet ([IPA](https://en.wikipedia.org/wiki/International_Phonetic_Alphabet)) phonemizer for transcriptions of luxembourgish audio utterances.

 The phonemized text allows to create and use luxembourgish high-quality synthetic voices, trained with deep machine learning (ML) speech models, based on neural networks.

 The speech generated with the associated eSpeak-NG sound engine is based on formant synthesis techniques and is of low quality. The related luxembourgish voice `Luxi` is intelligible, but I did no sound optimization because my focus is put on the phonemization front-end process.

 ## Luxembourgish customization
 Four files are needed to include Luxembourgish as additional language in the eSpeak-NG project :   

 * [phsource/ph_luxembourgish](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/phsource/ph_letzebuergesch)
 * [dictsource/lb_rules](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/dictsource/lb_rules)
 * [dictsource/lb_list](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/dictsource/lb_list)
 * [dictsource/lb_emoji](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/dictsource/lb_emoji)

 The customization process can be splitted into two steps :  

 1. convert the characters (graphemes) of text into [phonetic descriptions](https://en.wikipedia.org/wiki/Luxembourgish_phonology) (phoneme-ID's) 
 2. convert the phoneme-ID's into [IPA phonemes](https://en.wikipedia.org/wiki/Help:IPA/Luxembourgish) and define instructions how to generate sounds for each phoneme.

 eSpeak-NG is an evolution of the famous speak-program for the [ACORN/RISC_OS](https://en.wikipedia.org/wiki/RISC_OS) computers developed in 1995 by Jonathan Duddington. In 2007 the program was renamed [eSpeak](https://en.wikipedia.org/wiki/ESpeak). After the disappearance of the original developer early 2015, a new project eSpeak-NG (new generation) was started in December 2015 under the direction of [Reece H. Dunn](https://github.com/rhdunn). 

 When speak was created the phoneme-ID's were based on the text encoding ASCII, available at this time. This was never changed in the eSpeak-NG fork to avoid too much divergence from the original code. A few years ago, the support of IPA names in the output was added, but this work-around is still laborious, compared to a state-of-art solution where IPA names are used as phoneme-ID's. The current problems with the eSpeak-NG architecture are stated in this [roadmap](https://github.com/espeak-ng/espeak-ng/wiki/eSpeak-NG-roadmap) document.

 Let's start to describe the `ph_luxembourgish` file which is the heart of the whole process.

 ### Phoneme inventory
 We distinguish two types of phonemes : [vowels]( https://en.wikipedia.org/wiki/Vowel) ([monophtongs](https://en.wikipedia.org/wiki/Monophthong) and [diphtongs](https://en.wikipedia.org/wiki/Diphthong)) and [consonants](https://en.wikipedia.org/wiki/Consonant).

 I use the word `Auto` with two vowels `au, o` and one consonant `t` to explain the definition of the related phonemes in the `ph_luxembourgish` file. The code is formed by simple text strings. The minimal code for the word `Auto` has the following format :

 ```   
 phoneme aU
  vwl starttype #a endtype #u
  ipa ɑʊ
  length 190
  FMT(vdiph/au)
 endphoneme

 phoneme t
  vls alv stp
  ipa t
  WAV(ustop/t, 90)
 endphoneme

 phoneme o:
  vwl starttype #o endtype #o
  ipa oː
  length 170
  FMT(vowel/o)
 endphoneme
 ```  

 The minimal phoneme definition includes 5 parts :

 * the term `phoneme` followed by the phoneme-ID
 * the description of the articulation
 * the IPA symbol
 * the parameters for the sound generation (length, FMT, WAV, etc)
 * the closing term `endphoneme`  

 To specify the sound generation for a phoneme, eSpeak-NG provides a whole set of specific parameters and conditions for fine-tuning. A typical more complex example is shown hereafter :

 ```  
 phoneme t
  vls alv stp
  voicingswitch d
  lengthmod 2
  Vowelin f1=0  f2=1700 -300 300  f3=-100 80
  Vowelout f1=0 f2=1700 -300 250  f3=-100 80  rms=20

  IF nextPh(isPause2) THEN
    WAV(ustop/t_)
  ELIF nextPh(r) THEN
    WAV(ustop/t_)
  ELIF nextPh(@-) THEN
    WAV(ustop/t_dnt, 50)
  ENDIF
  WAV(ustop/t, 90)
 endphoneme
 ```    
 I mentionned in the introduction that my goal is not the perfect sound design. Therefore I specified the strict minimum of parameters for each phoneme. [Interested users](https://github.com/espeak-ng/espeak-ng/issues/1013) can easely modify the code to enhance the speech quality. The official documentation provides the necessary guidance :

 * [Phoneme Model](https://github.com/espeak-ng/espeak-ng/blob/master/docs/phoneme_model.md)
 * [Phoneme features and IPA](https://github.com/espeak-ng/espeak-ng/blob/master/docs/phonemes.md)
 * [Phoneme Tables](https://github.com/espeak-ng/espeak-ng/blob/master/docs/phontab.md)

 It is not necessary to specify all the phonemes for a language, they can be inherited from a `master phoneme file` or from another language. Most of the 126 languages supported by eSpeak-NG use this mecanism. Some languages even rely only on inherited phoneme definitions.

 To get the full control of the phonemization process and to rest independent from code changes in other languages, I prefer however to define all the used phonemes for the luxembourgish language.

 Speech models based on deep ML models are usually trained with about hundred hours of audio records and related transcriptions. To train a deep ML neural network with small datasets, which is the case for Luxembourgish, it is necessary to use a low number of different phonemes to obtain valid results. Based on my earlier experience with ML-TTS models I selected the following sets of phonemes (total : 56) for this first release of my project :  

 ```   
 Monophtongs: (20)
 IPA phoneme symbols :    ɑ, aː, ɛː, e, æ, eː, ə,  ɐ,  i, iː, o, oː, u, uː, y, yː, ɑ̃ː, ɛ̃ː, õː, œː 
 my eSpeak phoneme-ID's : a, a:, aE, e, E, e:, @E, eR, i, i:, o, o:, u, u:, y, y:, eA, iA, oA, OU

 Diphtongs: (9)
 IPA phoneme symbols :     æːɪ, ɑʊ, æːʊ, ɑɪ, ɜɪ, oɪ, iə, əʊ, uə
 my eSpeak phoneme-ID's :   aI, aU,  AU, eI, OI, eU, iE, oU, uE 

 Consonants: (27)
 IPA phoneme symbols      b, c, d, f, g, h, j, k, l, m, n, ŋ, p, ʀ, s, t, v, w, z, ʒ, ʁ, ʑ,  ʦ,  ʃ, dʒ, X, ɕ
 my eSpeak phoneme-ID's : b, c, d, f, g, h, j, k, l, m, n, N, p, r, s, t, v, w, z, J, rR, Z, TS, S, dZ, x, X
 ```   
 To add more phonemes, if required in the future, it's easy to modify the `ph_luxembourgish` file and to adapt the related files `lb_rules`, `lb_list` and `lb_emoji`.

 In the next chapter I will describe the process to link letters (characters, graphemes) to phoneme-ID's. 

 ### Text to Phoneme translation
 #### lb_rules
 The main file to define the translation (correspondence) between letters and phoneme-ID's is [lb_rules](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/dictsource/lb_rules). It's a sort of large table with the following format, starting with a `.group` line :

 ```   
 .group <one character>  

 or

 .group <two characters>  

 then

   previous letter sequence )  current letter sequence ( next letter sequence   phoneme-ID's sequence  // eventually comments
 ```    

 Let's take again the `Auto` example to explain the text code by considering that the the word starts with the dipthtong `au`, followed by the consonant `t`. The word ends with the monophtong `o`, preceded by the consonant `t`. The consonant `t` itself is preceded by the dipthtong `au` and followed by the monophtong `o`.

 ```   
 .group aU
  _) au (t aU
  
 .group o
  t) o (_  o:
  
 .group t
  au) t (o  t 
 ```
 Again, eSpeak-NG provides a large set of specific commands and parameters to define the translation rules in a very detailed and comprehensive manner. A simple example are the capital letter A which indicates any vowel and the capital letter C which indicates any consonant. If we use A and C in the above example, we can easely define the rules to phonemize the words `Aula` and `Auto` with combined code : 

 ```  
 .group aU
  _) au (C aU
  
 .group o
  C) o (_  o:

 .group a
  C) a (_  a:
  
 .group t
  A) t (A  t 
  
 .group l
  A) l (A  l
 ```
 Another way for coding to obtain the same result is :

 ```  
 .group aU
  _) au (C aU
  
 .group to
  A) to (_  to:
  
 .group la
  A) la (_  la:
 ```   
 To find the pronunciation of a word, the rules are searched and any which match the letters at the current position in the word are given a score, depending on how many letters are matched. The pronunciation from the best matching rule is chosen. The position pointer inside the word is then advanced past those consumed letters which have been matched and the process is repeated until all the letters of the word have been processed.
    
 There are numerous possibilities to code the phonemization rules with `prev`, `current` and `post` letter sequences (or placeholders, symbols, options, letter groups with some common features, etc). 
  
 There are also some restrictions. The first letter of a rule-group-ID with two letters must be a 7bit-ASCII character (less than 0x80). In the case of a one-letter-group-ID extended ASCII-characters are accepted (as `è ö î` etc). 

 The guide [Text to Phoneme Translation](https://github.com/espeak-ng/espeak-ng/blob/master/docs/dictionary.md) provides all available options to set up a complete and complex translation table. I opted however for a few simple rules because I have a preference to include a luxembourgish pronunciation lexicon into the file `lb_list` described below. This way I take into account that Luxembourgish contains numerous loanwords from german, french and english, which makes the definition of global rules more difficult.
  
 An interested user can easely extend and adapt the rules in the `lb_rules` file to his own needs.

 #### lb_list
 Usually a text to synthesize does not only contain common words from a specific language, but also punctuations, numbers, symbols, personal names, abbreviations, loanwords and eventually [Speech Synthesis Markup Language](https://en.wikipedia.org/wiki/Speech_Synthesis_Markup_Language) (SSML) code. These exceptions are handled in the [lb_list](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/dictsource/lb_list). It's a simple text lexicon with the following entries :   
 ```    
 letter-name    phoneme-ID               or            $accent
 word           phoneme-ID sequence
 word           other similar word                     $text
 word           (stressed, unstressed, ..)             $u  // intonation 
 abbreviation   phoneme-ID sequence      or            $$abbrev
 number         phoneme-ID sequence
 symbol         phoneme-ID sequence
 ```    

 Here is a small excerpt from the luxembourgish `lb_list` file to show the principles :   
  
 ``` 
 // letter names
 _a a:
 _e e:
 ä    $accent  
 û    $accent  
 // intonation
 de      $u  // unstress
 den     $u
 // numbers
 _0 nul
 _1 e:nt
 _2 TSve:
 _0C honert
 _0M1 daUz@End
 // punctuations
 _. punkt
 _, koma
 // symbols
 + pluz
 & an
 € eUro
 CSV    $abbrev
 OGBL   $abbrev
 asbl   $abbrev
 ASCII azki
 // personal names
 Marco  marko:
 Barnig barniS
 Simone zimon
 ```   
 The flag $accent tells the processor to say the letter name, followed by the name of the [diacritics](https://en.wikipedia.org/wiki/Diacritic). The flag $abbrev indicates to say the letter names in sequence. Speaking the names of punctuations works only if the flag `--punc` is specified at inference. 

 Again, eSpeak-NG offers a lot of options to finetune the list entries. My `lb_list` file for the first version of this project is work in progress. I will update it progressively to comply to my needs. An interested user can do the same.

 Note: the supported SSML tags are documented in the document [SSML and HTML Support](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/docs/markup.md).
  
 #### lb_emoji
 Today, graphics are gaining more and more importance in text. For this reason eSpeak-NG replaced the former file `language_extra` by a file called `language_emoji`. [Emoji's](https://en.wikipedia.org/wiki/Emoji) are small pictures which can be represented as encoded characters. Originating on Japanese mobile phones in 1997, emoji's became increasingly popular worldwide in the 2010s after being added to several mobile operating systems. To assure an  interoperability between browsers, mobiles and messaging systems, the emoji's are standardized since 2014 by the [Unicode Consortium](https://en.wikipedia.org/wiki/Unicode_Consortium). Every year additional emoji's are added to the standard upon public proposals.
  
 The [full list of emoji's](https://unicode.org/emoji/charts/full-emoji-list.html) is available at the home-page of the Unicode Consortium.

 Here are two examples how to include emoji's (graphics) in a luxembourgish sentence to submit to the eSpeak-NG phonemizer :

 An der &#x1F570; hunn sech den &#x1F9ED;&#x1F4A8; an d’&#x1F31E; gestridden, wie vun hinnen zwee wuel méi &#x1F4AA; wier, wéi e &#x1F6B6;, deen an ee waarme &#x1F9E5; agepak war, iwwert de &#x1F6E4; koum.

 Haut sinn &#x261D; mat mengen Enkelkanner &#x1F9D1;&#x200D;&#x1F91D;&#x200D;&#x1F9D1; , &#x1F466; , &#x1F467; , an &#x1F469; an den &#x1F3AA; gaangen. Do hunn mer e &#x1F98D;, eng &#x1F992;, en &#x1F418; an en &#x1F98F; gesinn.
  
 To phonemize the above sentences I added the following entries into the [lb_emoji](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/dictsource/lb_emoji) file :
  
 ```
 &#x1F570;  TSaIt
 &#x1F9ED;  nort
 &#x1F32C;  vant
 &#x1F31E;  zon
 &#x1F4AA;  Sta:rk
 &#x1F6B6;  vand@EreR
 &#x1F9E5;  mant@El
 &#x1F6E4;  ve:
 &#x261D;   eS
 &#x1F9D1;&#x200D;&#x1F91D;&#x200D;&#x1F9D1; Sa:rel, toma:
 &#x1F466;  asto:r
 &#x1F467;  kapuzin
 &#x1F469;  tessa
 &#x1F3AA;  TSirkus
 &#x1F98D;  gorila:
 &#x1F992;  giraf
 &#x1F418;  elefant
 &#x1F98F;  rino:zerus
 ```
 A more comfortable option is to insert emoji's from the popup menu in the text-editor :   

 ![insert emoji](https://github.com/mbarnig/espeak-ng-lb/blob/l%C3%ABtzebuergesch/_layouts/insert-emoji.png)

 Here are the results of the phonemization :

 ```  
 ɑn dɐ ‚ʦæ:ɪt / hun zeɕ dən ’noʀtvɑnt ɑn ‚dzon gə’ʃtʀidən / viə fun hinən ‚ʦve: vuəl ‚meɪ ʃta:ʀk viɐ / veɪ eː ‚vɑndəʀɐ / de:n ɑn eː ‚va:ʀmə ‚mɑntəl ‚a:ɡəpa:k va:ʀ / ivɐt də ‚veː kəʊm //

 haUt sin eS mat mengen enkelkaneR Sa:rel / toma: / asto:r / kapuzin an tessa an den TSirkus ga:ngen // do: hun meR e gorila: / eN giraf / en elefant an e rino:zerus gesin //
 ```   

 For convenience I moved the symbols from the `lb_list` file to the `lb_emoji` file.

 ## Integration
 In the last step the four luxembourgish files are embedded into the source code of the eSpeak-NG project. The following source files are modified :

 * [Makefile.am](https://github.com/mbarnig/espeak-ng-lb/blob/master/Makefile.am)
 * [phsource/phonemes](https://github.com/mbarnig/espeak-ng-lb/blob/master/phsource/phonemes)
 * [docs/languages.md](https://github.com/mbarnig/espeak-ng-lb/blob/master/docs/languages.md)

 In the `Makefile.am` file I added the following lines :

 in position 392 :
 ```
 phsource/ph_luxembourgish \
 ```
 in position 570 :
 ```  
 espeak-ng-data/lb_dict \
 ```   
 in position 792 :
 ```
 lb: espeak-ng-data/lb_dict
 espeak-ng-data/lb_dict: dictsource/lb_list dictsource/lb_rules dictsource/lb_emoji
 ```   
 In the `phsource/phonemes` file I added these lines at position 1763 :
 ```  
 phonemetable lb base1
 include ph_luxembourgish
 ```  
 In the `docs/languages.md` file I added this line after the latin language :     

 `gmw` | `lb` |  West Germanic | Lëtzebuergesch   

 Two additional files must be created :
 * [espeak-ng-data/lang/gmw/lb](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/espeak-ng-data/lang/gmw/lb)
 * [espeak-ng-data/voices/!v/Luxi](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/espeak-ng-data/voices/!v/Luxi)

 Both files are very simple if we include only the strict minimum. 

 Here comes the content of the language file `lb` :
 ```
 name Lëtzebuergesch
 language lb
 ```
 Here is the voice file `Luxi` :
 ```  
 name Luxi
 language lb
 maintainer mbarnig
 ```   
 As usual, eSpeak-NG provides numerous options to customize these files. Please read the guide [Voice and Language files](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/docs/voices.md) to get a detailed documentation about all available features.
 folder 
 When building the project the first time, an additional file `lb_dict` is created inside the folder [espeak-ng-data](https://github.com/mbarnig/espeak-ng-lb/tree/l%C3%ABtzebuergesch/espeak-ng-data), which is a compressed binary combination of the three files `lb_rules`, `lb_list` and `lb_emoji`.

 ## Build and use the project
 Now the forked eSpeak-NG source code is ready for compilation, hopefully without problems. Compilation is easy if you have a personal computer with a well configured development environment and all required tools for C-compilation. On my Ubuntu 20.04 system this is the case and I build and install the project with 4 commands :

 ```   
 ./autogen.sh
 ./configure
 make
 sudo make install
 ```

 ![build espeak-ng-lb](https://github.com/mbarnig/espeak-ng-lb/blob/l%C3%ABtzebuergesch/_layouts/espeak-ng-lb.png)

 The [building guide](https://github.com/mbarnig/espeak-ng-lb/blob/master/docs/building.md) provides detailed info how to compile and build eSpeak NG from the source on different operating systems. If you need help, please visit the [issues section](https://github.com/espeak-ng/espeak-ng/issues) from the master eSpeak-NG project. Currently there are 333 open and 311 closed issues, so probably another user found already a solution for your problem.

 To use the project, I need only one command :

 `espeak-ng -v lb -q --ipa -f <text file>`   

 The content of the text file is phonemized with luxembourgish rules (flag `-v lb`) with IPA symbols (flag `--ipa`), without producing speech (flag `-q`). All the optional configuration flags are explained in the [--help document](https://github.com/mbarnig/espeak-ng-lb/blob/lëtzebuergesch/src/espeak-ng.1.ronn).

 A more comfortable option is to use the great tool [espeak-phonemizer](https://github.com/rhasspy/espeak-phonemizer), created by [Michael Hansen](https://www.linkedin.com/in/michael-hansen-9885b2105/) (alias [synesthesiam](https://synesthesiam.com)), which transforms the output from espeak-NG in a format ready for submission as input to train a deep machine learning luxembourgish TTS model, based on neural networks.

 My favorite Github projects for my luxembourgish ML-TTS experiments are : 

 * [Rhasppy/Larynx](https://github.com/rhasspy/larynx)
 * [Coqui-TTS](https://github.com/coqui-ai/TTS)
 * [Implementations by Keon Lee](https://github.com/keonlee9420)
 * [Implementations by Jaehyeon Kim](https://github.com/jaywalnut310)


 ## References
 * [The best of two breeds](https://www.web3.lu/the-best-of-two-breeds/), 2021, mbarnig  
 * [Synthèse de la parole](https://www.amazon.fr/Synthèse-parole-électrique-électronique-informatique/dp/2322238600/), 2020, Marco Barnig   
 * [Synthèse vocale](https://www.web3.lu/synthese-vocale/), 2019, mbarnig 
 * [Emojis et Unicode](https://www.web3.lu/timeline/emojis-et-unicode/), 2018, mbarnig
 * [Evolution of character encoding](https://www.web3.lu/evolution-of-character-encoding/), 2016, mbarnig  
 * [Speech Corpora for TTS](https://www.web3.lu/speech-corpora-tts/), 2015, mbarnig
 * [Festival Text-to-Speech Package](https://www.web3.lu/festival-text-speech-package/), 2015, mbarnig
 * [eSpeak Formant Synthesizer](https://www.web3.lu/espeak-formant-synthesizer/), 2014, mbarnig 
 * [Mary TTS (Text To Speech)](https://www.web3.lu/marytts-text-speech/), 2014, mbarnig
 * [Language : fr, de, en, lb, eo](https://www.web3.lu/languages/), 2014, mbarnig
 * [Spectrograms and speech processing](https://www.web3.lu/spectrogram-speech-processing/), 2014, mbarnig
 * [Phonemes, phones, graphemes and visemes](https://www.web3.lu/phonemes-phones-graphemes-visemes/), 2014, mbarnig
 * [FreeTTS : a Java speech synthesizer](https://www.web3.lu/freetts-a-java-speech-synthesizer/), 2005, mbarnig
--- a/espeak-ng-data/lang/gmw/lb
+++ b/espeak-ng-data/lang/gmw/lb
@@ -0,0 +1,2 @@
 name Luxembourgish
 language lb
--- a/phsource/ph_luxembourgish
+++ b/phsource/ph_luxembourgish
@@ -0,0 +1,412 @@
 // ph_luxembourgish
 // created by Marco Barnig ([email protected])
 // first version : 24.9.2014
 // https://www.web3.lu/espeak-formant-synthesizer
 // new version : 15.9.2018
 // latest update : 10.11.2021
 //=========================================================
 // Lëtzebuergesch
 //=========================================================
 // References :
 // Peter Gilles; Jürgen Trouvain
 // International Journal of the IPA
 // https://infolux.uni.lu
 // https://github.com/PeterGilles/Luxembourgish-language-resources
 // https://lod.lu
 // https://ortho.lod.lu
 //
 // Wikipedia
 // https://en.wikipedia.org/wiki/Luxembourgish
 // https://en.wikipedia.org/wiki/Luxembourgish_phonology
 // https://en.wikipedia.org/wiki/Help:IPA/Luxembourgish
 //
 // virtual class of vowels : #@, #a, #e, #i, #o, #u
 // IPA Vokaler (20) : ɑ, aː, ɛː, e, æ, eː, ə, ɐ, i, iː, o, oː, u, uː, y, y:, ɑ̃ː, ɛ̃ː, õː, œː
 // IPA Vokalkoppelen (9) : æːɪ, ɑʊ, æːʊ, ɑɪ, ɜɪ, oɪ, iə, əʊ, uə

 phoneme a  // K[a]pp  ; kurz geschwate Vokal a
  vwl starttype #a endtype #a
  ipa ɑ
  length 120
  FMT(vowel/a)
 endphoneme

 phoneme a:  // K[a]p  ; laang geschwate Vokal a
  vwl starttype #a endtype #a
  ipa aː
  length 190
  FMT(vowel/aa_6)
 endphoneme

 phoneme aE  // St[ä]ren
  vwl starttype #a endtype #e
  ipa ɛː
  length 190
  FMT(vdiph/ae_2)
 endphoneme

 phoneme e  // Méck
  vwl starttype #e endtype #e
  ipa e
  length 120
  FMT(vowel/e)
 endphoneme

 phoneme E  // h[e]ll  ; kurzen oppenen Vokal e
  vwl starttype #e endtype #e
  ipa æ
  length 120
  FMT(vowel/e)
 endphoneme

 phoneme e:  // K[ee]ss  ; laangen Vokal e
  vwl starttype #e endtype #e
  ipa eː
  length 190
  FMT(vowel/e)
 endphoneme

 phoneme @E //  lies[e]n, h[ë]ll[e]f[e]n, (schwa)
  vwl starttype #@ endtype #@
  ipa ə
  length 190
  FMT(vowel/@)
 endphoneme

 phoneme eR  // Kann[er] ;  open-schwa
  vwl starttype #@ endtype #@
  ipa ɐ
  length 150
  FMT(vowel/@)
 endphoneme

 phoneme i  // m[i]dd
  vwl starttype #i endtype #i
  ipa i
  length 120
  FMT(vowel/i)
 endphoneme

 phoneme i:  // B[ii]scht
  vwl starttype #i endtype #i
  ipa iː
  length 190
  FMT(vowel/i)
 endphoneme

 phoneme o // Spr[o]ch
  vwl starttype #o endtype #o
  ipa o
  length 120
  FMT(vowel/o)
 endphoneme

 phoneme o: // Spr[oo]ch
  vwl starttype #o endtype #o
  ipa oː
  length 190
  FMT(vowel/o)
 endphoneme

 phoneme u  // g[u]tt
  vwl starttype #u endtype #u
  ipa u
  length 120
  FMT(vowel/u)
 endphoneme

 phoneme u:  // D[uu]scht
  vwl starttype #u endtype #u
  ipa uː
  length 190
  FMT(vowel/u)
 endphoneme

 phoneme y
  vwl starttype #u endtype #e
  ipa y
  length 120
  FMT(vdiph2/uu@)
 endphoneme

 phoneme y:  // S[ü]den
  vwl starttype #u endtype #e
  ipa yː
  length 190
  FMT(vdiph2/uu@)
 endphoneme

 phoneme eA  // Restaur[an]t
  vwl starttype #a endtype #a
  ipa ɑ̃ː
  length 190
  FMT(vnasal/aa_n4)
 endphoneme

 phoneme iA  // Cous[in], D[in]de
  vwl starttype #i endtype #i
  ipa ɛ̃ː
  length 190
  FMT(vnasal/i_n2)
 endphoneme

 phoneme oA  // Sais[on], Pard[on], F[on]d
  vwl starttype #o endtype #o
  ipa õː
  length 190
  FMT(vnasal/o_n5)
 endphoneme

 phoneme OU  // interi[eu]r, lösen
  vwl starttype #o endtype #e
  ipa œː
  length 190
  FMT(vdiph2/o@)
 endphoneme

 phoneme aI  // Z[äi]t
  vwl starttype #e endtype #i
  ipa æːɪ
  length 190
  FMT(vdiph/ai_6)
 endphoneme

 phoneme aU  // [Au]to
  vwl starttype #a endtype #u
  ipa ɑʊ
  length 190
  FMT(vdiph/au)
 endphoneme

 phoneme AU  // R[au]m
  vwl starttype #a endtype #u
  ipa æːʊ
  length 190
  FMT(vdiph/au_4)
 endphoneme

 phoneme eI  // L[ei]t
  vwl starttype #e endtype #i
  ipa ɑɪ
  length 190
  FMT(vdiph/eei_2)
 endphoneme

 phoneme OI  // fr[éi]
  vwl starttype #e endtype #i
  ipa ɜɪ
  length 190
  FMT(vdiph/eei_5)
 endphoneme

 phoneme eU  // [Eu]ro
  vwl starttype #e endtype #u
  ipa oɪ
  length 190
  FMT(vdiph/ooi)
 endphoneme

 phoneme iE  // h[ie]n
  vwl starttype #i endtype #e
  ipa iə
  length 190
  FMT(vdiph2/ie)
 endphoneme

 phoneme oU  // Sch[ou]l
  vwl starttype #o endtype #u
  ipa əʊ
  length 190
  FMT(vdiph/ou_2)
 endphoneme

 phoneme uE  // B[ue]dem
  vwl starttype #u endtype #e
  ipa uə
  length 190
  FMT(vdiph2/u@)
 endphoneme

 // ******************************************

 // Konsonanten : Total : 27

 // Nasal Phonemen : m, n, ŋ

 phoneme m
  vcd blb nas
  ipa m
  FMT(m/mj)
 endphoneme

 phoneme n
  vcd alv nas
  ipa n
  FMT(n/nj)
 endphoneme

 phoneme N
  vcd rfx nas
  ipa ɳ
  FMT(nn/nnj)
 endphoneme

 // Plosiv Phonemen : p, b, t, d, k, g

 phoneme p
  vls blb stp
  ipa p
  WAV(ustop/p)
 endphoneme

 phoneme b
  vcd blb stp
  ipa b
  FMT(b/b) addWav(x/b)
 endphoneme

 phoneme t
 vls alv stp
  ipa t
  WAV(ustop/t, 90)
 endphoneme

 phoneme d
  vcd alv stp
  ipa d
  FMT(d/dr) addWav(x/d)
 endphoneme

 phoneme k
  vls vel stp
  ipa k
  WAV(ustop/k)
 endphoneme

 phoneme g
  vcd vel stp
  ipa g
  FMT(g/g) addWav(x/g2, 150)
 endphoneme

 // Affricate Phomenen : ʦ, dʒ

 phoneme TS
  vls pla afr sib
  ipa ʦ
  WAV(ustop/tsh)
 endphoneme

 phoneme dZ
  vcd pla afr sib
  ipa dʒ
  FMT(dzh/dzh) addWav(x/dzh)
 endphoneme

 // Frikativ Phonemen : f, v, w, s, z, ʃ, ʒ, X, ɕ, ʁ, ʑ, h

 phoneme f
  vls lbd frc
  ipa f
  WAV(ufric/f, 80)
 endphoneme

 phoneme v
  vcd lbd frc
  ipa v
  FMT(voc/v) addWav(vocw/v)
 endphoneme

 phoneme w
  liquid
  ipa w
  FMT(w/w)
 endphoneme

 phoneme s
  vcd alv frc sib
  ipa s
  FMT(voc/z) addWav(ufric/s_, 85)
 endphoneme

 phoneme z
  vcd alv frc sib
  ipa z
  FMT(voc/z) addWav(ufric/s_, 45)
 endphoneme

 phoneme S
  vls pla frc sib
  ipa ʃ
  WAV(ufric/sh, 45)
 endphoneme

 phoneme J
  vcd pal afr sib pzd
  ipa ʒ
  FMT(dzh/dz_pzd) addWav(x/dzh, 45)
 endphoneme

 phoneme x
  vls vel frc
  ipa X
  WAV(ufric/x_hr, 20)
 endphoneme

 phoneme X
  vls alp sib frc
  ipa ɕ
  WAV(ufric/sh_pzd)
 endphoneme

 phoneme rR
  vcd uvl frc
  ipa ʁ
  FMT(vwl_fr/r_) addWav(r3/rx, 20)
 endphoneme

 phoneme Z
  vcd alp sib frc
  ipa ʑ
  FMT(voc/z_pzd) addWav(ufric/sh_pzd, 80)
 endphoneme

 phoneme h
  vls glt apr
  ipa h
  IF nextPh(#@) THEN
    WAV(h/h@)
  ELIF nextPh(#a) THEN
    WAV(h/ha)
  ELIF nextPh(#e) THEN
    WAV(h/he)
  ELIF nextPh(#i) THEN
    WAV(h/hi)
  ELIF nextPh(#o) THEN
    WAV(h/ho)
  ELIF nextPh(#u) THEN
    WAV(h/hu)
  ENDIF
 endphoneme

 // Approximant Phonemen : l, j

 phoneme l
  liquid
  ipa l
  FMT(l/l)
 endphoneme

 phoneme j
  liquid pal
  ipa j
  FMT(j/j_)
 endphoneme

 // Trill Phonem : ʀ

 phoneme r
  vcd uvl frc
  ipa ʀ
  FMT(r3/r_uvl) addWav(r3/r_uvl.wav, 70)
 endphoneme
--- a/phsource/phonemes
+++ b/phsource/phonemes
@@ -1761,6 +1761,9 @@ include ph_korean
 phonemetable la base1
 include ph_latin

 phonemetable lb base1
 include ph_luxembourgish

 phonemetable lt base1
 include ph_lithuanian

--- a/tests/language-phonemes.test
+++ b/tests/language-phonemes.test
@@ -84,6 +84,7 @@ test_phwav kok 91e9bf35df942daecaa3e260807e24fb93470a55 "ma na n.a n^a Na pa ta
 test_phwav ku 4254d35e08fd4a71848ae75cd0aefc76f115a167 "ma na Na pa ba ta da ka ga qa ?a tSa dZa fa va sa za Sa Za xa ca Ja ha ja *a Ra la _:_ mi my mu mI mU me mo mE mE# ma m8 mV meI meU"
 test_phwav ky bee9e683218b0c53c29a89709501f8a2486b71b2 "ma na Na pa ba ta da ka ga tsa tSa dZa fa va sa za Sa xa Xa la La ja *a ra _:_ ma me mi mo mu mI my mO ma: me: mi: mo: mu: mI: my: mO:"
 test_phwav la 080bd53c20991eae7baec73b8c735eacc8aae076 "ma na Na pa p<h>a ba ta t<h>a da ka k<h>a ga fa sa za ha Ra la ja wa _:_ ma mE mI mO mU ma: me: mi: mo: mu: my my: maU maI meI mEU mOI"
 test_phwav lb 423fae731272b6cbcb4bb8669bd2f7da7ccacacd "a a: aE e E e: @E eR i i: o o: u u: y y: eA iA oA OU aI aU AU eI OI eU iE oU uE ma na Na pa ba ta da ka ga TSa dZa fa va wa sa za Sa Ja xa Xa rRa Za ha la ja ra"
 test_phwav lfn 044e27a5100528760a185e0773dccaca504b5bd4 "ma na Na pa ba ta da ka ga fa va sa za Sa Za ha la ja R2a **a wa _:_ ma me mi mo mu maI maU meU moI"
 test_phwav lt 615e503b996ea5f7b267ebd77b91e77c5b874e18 "ma m;a na n;a pa p;a ta t;a ka k;a ba b;a da d;a ga g;a tsa ts;a tSa tS;a dza dz;a dZa dZ;a fa f;a sa s;a Sa S;a xa x;a va v;a za z;a Za Z;a la l;a ra r;a ja _:_ m@ ma mA ma: me mE me: mee meA mi mI mi: mo mO mo: mu mU mu: mw mW mai mei mau muo moi mui mie maU meU moU maI meI"
 test_phwav ltg 9e0ee2a095cd074860c3db7aa89b2011ffb38ee8 "ma m;a na n;a pa p;a ta t;a ka k;a ba b;a da d;a ga g;a tsa ts;a tSa tS;a dza dz;a dZa dZ;a fa f;a sa s;a Sa S;a xa x;a va v;a za z;a Za Z;a la l;a ra r;a ja _:_ m@ ma mA ma: me mE me: mee meA mi mI mi: mo mO mo: mu mU mu: mw mW mai mei mau muo moi mui mie maU meU moU maI meI"