4 years ago · 7a25816336
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,7 @@ updated languages:
 *  ia (Interlingua) -- nesrad
 *  it (Italian) -- Christian Leo
 *  ja (Japanese) -- fukuen
 *  jbo (Lojban) -- Juho Hiltunen, xunsku
 *  mi (Māori) -- boracasli98
 *  shn (Shan Tay Yai) -- ronaldaug
 *  tr (Turkish) -- boracasli98
--- a/dictsource/jbo_list
+++ b/dictsource/jbo_list
@@ -18,28 +18,18 @@ _9	so
 _dpt	pi_


 _a	abu
 b	b@
 c	S@
 d	d@
 _e	ebu
 f	f@
 g	g@
 _i	ibu
 j	Z@
 k	k@
 l	l@
 m	m@	// should "words" l,m,n,r say the syllable consonant?
 n	n@
 _o	obu
 p	p@
 r	R@
 s	s@
 t	t@
 _u	ubu
 v	v@
 x	x@
 y	'@bu
 z	z@

 h	@h'@bu
@@ -48,24 +38,34 @@ w	v'@bu


 // stressed cmavo
 cai	S'aI_!
 cai	S'aI__!
 cu'i	S'uhi_!
 pei	p'eI_!
 pei	p'eI__!
 ru'e	R'uhe_!
 sai	s'aI_!
 nai	n'aI_!
 na	n'a_!
 sai	s'aI__!
 nai	n'aI__!
 na	n'a__!
 ja'a	Z'aha_!

 // question cmavo
 ma	m'a__ // a longer pause to avoid resegmentation
 mo	m'o__
 xo	x'o__
 cu'e	S'uhe_!

 // emphasizing prefixes
 ba'e $u+
 za'e $u+


 // unstressed words. Note single-syllable cmavo are all unstressed in jbo_rules
 e'o	$u+	// request
 po'e	$u+	// of
 zo'e	$u+	// pronoun
 jo'u	$u+ $brk	// and
 ce'u	$u+ $brk	// lambda-it
 ke'a	$u+ $brk	// relative-it


 i	i_:	$u	// sentence break (recognised by eSpeak program). Try a short pause after ".i"

 // end-of-clause [_;_] before these
 noi	_;_noI	$u
 poi	_;_poI	$u
@@ -73,21 +73,50 @@ no'u	_;_nohu	$u
 po'u	_;_pohu	$u
 goi	_;_goI	$u

 ija	_;_iZa		// should this series be unstressed?
 ijanai	_;_iZan'aI
 ije	_;_iZe
 ije'i	_;_iZehi
 ijenai	_;_iZenaI
 ijo	_;_iZo
 ijonai	_;_iZon'aI
 inaja	_;_inaZa

 // sentence connectives
 i	__;__i
 ija	_;__iZa
 ijanai	_;__iZan'aI
 ije	_;__iZe
 ije'i	_;__iZehi
 ijenai	_;__iZenaI
 ijo	_;__iZo
 ijonai	_;__iZon'aI
 inaja	_;__inaZa


 // connectives
 a	$brk
 e	$brk
 o	$brk
 u	$brk
 ja	$brk
 je	$brk
 ji	$brk
 jo	$brk
 ju	$brk
 joi	$brk


 // terminators and openers
 cu	$brk
 gi	$pause

 to	_::to	$u	// start parenthesis, pause but don't raise intonation

 // terminators (include a pause after)
 ku	ku_::	$u
 boi	boI_::	$u
 lu	lu_:	$u $brk
 tu'e t,uhe_:	$u $brk

 ku	ku_:	$u
 boi	boI_:	$u

 ge'u	g,ehu_:	$u $brk
 lu'u	l,uhu_:	$u $brk

 li'u	l,ihu_::	$u $brk
 tu'u	t,uhu_::	$u $brk
 vau	vaU_::	$u
 kei	keI_::	$u
 ku'o	k,uho_::	$u
 toi	toI_::	$u

--- a/dictsource/jbo_rules
+++ b/dictsource/jbo_rules
@@ -119,6 +119,7 @@
 	'	h
     _) ' (_    '@h@

 	D) . (D	||p%i||,	// decimal point
 	.	_!	// dot
     _)	. (@P1	_!      // remove . prefix
 	. (_S1  _!	// remove . suffix
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -59,7 +59,6 @@ char *namedata = NULL;
 static int ungot_char2 = 0;
 espeak_ng_TEXT_DECODER *p_decoder = NULL;
 static int ungot_char;
 static const char *ungot_word = NULL;

 static bool ignore_text = false; // set during <sub> ... </sub>  to ignore text which has been replaced by an alias
 static bool audio_text = false; // set during <audio> ... </audio>
@@ -656,12 +655,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 	*tone_type = 0;
 	*voice_change = 0;

 	if (ungot_word != NULL) {
 		strcpy(buf, ungot_word);
 		ix += strlen(ungot_word);
 		ungot_word = NULL;
 	}

 	if (ungot_char2 != 0)
 		c2 = ungot_char2;
 	else
@@ -844,26 +837,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 			if (c1 == 0xf0b)
 				c1 = ' '; // Tibet inter-syllabic mark, ?? replace by space ??

 			if (iswspace(c1)) {
 				char *p_word;

 				if (tr->translator_name == 0x6a626f) {
 					// language jbo : lojban
 					// treat "i" or ".i" as end-of-sentence
 					p_word = &buf[ix-1];
 					if (p_word[0] == 'i') {
 						if (p_word[-1] == '.')
 							p_word--;
 						if (p_word[-1] == ' ') {
 							ungot_word = "i ";
 							UngetC(c2);
 							p_word[0] = 0;
 							return CLAUSE_PERIOD;
 						}
 					}
 				}
 			}

 			if (c1 == 0xd4d) {
 				// Malayalam virama, check if next character is Zero-width-joiner
 				if (c2 == 0x200d)
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
@@ -1063,7 +1063,7 @@ Translator *SelectTranslator(const char *name)
 		tr->langopts.stress_rule = STRESSPOSN_2R;
 		tr->langopts.vowel_pause = 0x20c; // pause before a word which starts with a vowel, or after a word which ends in a consonant
 		tr->punct_within_word = jbo_punct_within_word;
 		tr->langopts.param[LOPT_CAPS_IN_WORD] = 2; // capitals indicate stressed syllables
 		tr->langopts.param[LOPT_CAPS_IN_WORD] = 1; // capitals indicate stressed syllables
 		SetLetterVowel(tr, 'y');
 		tr->langopts.max_lengthmod = 368;
 	}
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -2323,8 +2323,8 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 				if (iswupper(c)) {
 					c = towlower2(c, tr);

 					if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) {
 						if ((j == 2) && (syllable_marked == false)) {
 					if (tr->langopts.param[LOPT_CAPS_IN_WORD]) {
 						if (syllable_marked == false) {
 							char_inserted = c;
 							c = 0x2c8; // stress marker
 							syllable_marked = true;
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
@@ -331,8 +331,7 @@ typedef struct {
 // change [t] when followed by unstressed vowel
 #define LOPT_REDUCE_T 12

 // 1 = allow capitals inside a word
 // 2 = stressed syllable is indicated by capitals
 // 1 = stressed syllable is indicated by capitals
 #define LOPT_CAPS_IN_WORD 13

 // bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
--- a/tests/language-pronunciation.test
+++ b/tests/language-pronunciation.test
@@ -126,9 +126,11 @@ t_s,unenar\`'amu
 k,ep\\uko'ete
 ,asak,ijumem'is\\i
 w,eCimos'esuN\\" "イロハニホヘト. チリヌルヲ. ワカヨタレソ. ツネナラム. ウヰノオクヤマ. ケフコエテ. アサキユメミシ. ヱヒモセスン."
 test_phonemes jbo "Latn" "R,o R'emna S,u s,e Z'inzi S,o z'ifRe Z,e simd'uhi b,e l,e R@ n,ilsels'iha _!'eleI R@ s'elS_!Ru
 i_: R@ s,e m'enli _;_g'ihe s,e sezm'aRde
 i_: _!,eI Z,esekih'ubo R@ s,im@z'uhe t'ahi l,e t'unba" "ro remna cu se jinzi co zifre je simdu'i be le ry. nilselsi'a .elei ry. selcru .i ry. se menli gi'e se sezmarde .i .ei jeseki'ubo ry. simyzu'e ta'i le tunba"
 test_phonemes jbo "Latn" "l,o R,o R'emna_! S,u s,e Z'inzi l,o k,a_! S,ehu z'ifRe_! Z,e l,o k,a S_!k,in@duns'ihu g,e l,o k,a_! S,ehu_! S,ehu s,e n,ilsels'inma_:_: g,i l,o k,a_! S,ehu_! S,ehu s,e kR'ali _!__;__'i R,a z'aNkaI l,o k,a s'uho d,a_! S,ehu m'enli_! Z,e l,o k,a s'uho d,a_! S,ehu m'aRde keI_:: _;_g'ihe b'ilga l,o k,a_! S,ehu_! S,ehu f'aRtRa s'uho Z,aI s,e t'asmi b,e l,o n,u R,a t,unb@s'ihu" "lo ro remna cu se jinzi lo ka ce'u zifre je lo ka ckinydunsi'u ge lo ka ce'u ce'u se nilselsinma gi lo ka ce'u ce'u se krali .i ra zankai lo ka su'o da ce'u menli je lo ka su'o da ce'u marde kei gi'e bilga lo ka ce'u ce'u fartra su'o jai se tasmi be lo nu ra tunbysi'u"
 test_phonemes jbo "Latn" "s'uho S'ilSe _!R'atat,aR_! S,u tolS'anSi _!__;__'i m,i Rivl'iha f'aha l,a _!S_!f,aRtsen'ek_!" "su'o cilce .RAtatar. cu tolcanci .i mi rivli'a fa'a la .cfartseNEK."



 test_phonemes ky "Cyrl" "bard'Iq adamd'ar 'Oz b,edelind'e dZan'a ,uquqtarInd'a erk'in dZan'a t'eN ,uquqt'u: bol'up dZaral'at
 alard'In 'aNs,ezim'i men'en ab,ijir'i b'ar dZan'a bir'ib,irin'e b'ir t,u:Gand'Iq mamil'e q,Ilu:G'a tij'iS" "Бардык адамдар өз беделинде жана укуктарында эркин жана тең укуктуу болуп жаралат. Алардын аң-сезими менен абийири бар жана бири-бирине бир туугандык мамиле кылууга тийиш."
 test_phonemes kl "Latn" "inu'itS; tam'armik in,uNN'orput nammin,E:rs,inn,a:ssusEq'arl#utsik ,assiQ,i:mm'il#u at,aqqinassusEq,arl#uts'il#u pis,inn,a:tsit,a:ffEq'arl#utsik