7 years ago · ddaf267aa9
--- a/.gitignore
+++ b/.gitignore
@@ -96,6 +96,7 @@ src/speak-ng

 tests/*.test
 !tests/languages.test
 !tests/phoneme-output.test

 espeak-ng.pc

--- a/Makefile.am
+++ b/Makefile.am
@@ -250,6 +250,7 @@ tests_api_test_SOURCES = tests/api.c
 check:	tests/encoding.check \
 	tests/readclause.check \
 	tests/api.check \
 	tests/phoneme-output.check \
 	tests/languages.check

 ##### phoneme data:
--- a/dictsource/ka_emoji
+++ b/dictsource/ka_emoji
@@ -926,7 +926,8 @@ $textmode
 😖	შეცბუნებული სახე						// [1F616]
 😗	კოცნის გამომხატველი სახე					// [1F617]
 😘	სახე, რომელიც კოცნას აგზავნის					// [1F618]
 😙	კოცნის გამომხატველი სახე მომღიმარე თვალებით			// [1F619]
 😗	kissing face							// [1F617]
 //😙	კოცნის გამომხატველი სახე მომღიმარე თვალებით			// [1F619] TODO: This breaks speaking "7".
 😚	კოცნის გამომხატველი სახე დახუჭული თვალებით			// [1F61A]
 😛	სახე გამოყოფილი ენით						// [1F61B]
 😜	სახე გამოყოფილი ენითა და ჩაკრული თვალით				// [1F61C]
@@ -938,7 +939,8 @@ $textmode
 😢	მტირალა სახე							// [1F622]
 😣	შეუპოვარი სახე							// [1F623]
 😤	სახე ცხვირიდან გამომავალი ორთქლით				// [1F624]
 😥	იმედგაცრუებული, მაგრამ შვების გამომხატველი სახე			// [1F625]
 😥	disappointed but relieved face					// [1F625]
 //😥	იმედგაცრუებული, მაგრამ შვების გამომხატველი სახე			// [1F625] TODO: This breaks speaking "3".
 😦	შეჭმუხნილი სახე ღია პირით					// [1F626]
 😧	ტანჯული სახე							// [1F627]
 😨	შეშინებული სახე							// [1F628]
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -2394,7 +2394,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
 						match1.end_type |= p - p_start;
 					}
 					strcpy(end_phonemes, match1.phonemes);
 					memcpy(p_start, word_copy, strlen(word_copy));
 					strcpy(p_start, word_copy);
 					return match1.end_type;
 				}
 			}
@@ -2404,7 +2404,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
 		}
 	}

 	memcpy(p_start, word_copy, strlen(word_copy));
 	strcpy(p_start, word_copy);

 	return 0;
 }
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
@@ -158,19 +158,44 @@ static void SetLetterBitsRange(Translator *tr, int group, int first, int last)

 // ignore these characters
 static const unsigned short chars_ignore_default[] = {
 	0xad,    1, // soft hyphen
 	0x200c,  1, // zero width non-joiner
 	0x200d,  1, // zero width joiner
 	0, 0
 	// U+00AD SOFT HYPHEN
 	//     Used to mark hyphenation points in words for where to split a
 	//     word at the end of a line to provide readable justified text.
 	0xad,   1,
 	// U+200C ZERO WIDTH NON-JOINER
 	//     Used to prevent combined ligatures being displayed in their
 	//     combined form.
 	0x200c, 1,
 	// U+200D ZERO WIDTH JOINER
 	//     Used to indicate an alternative connected form made up of the
 	//     characters surrounding the ZWJ in Devanagari, Kannada, Malayalam
 	//     and Emoji.
 //	0x200d, 1, // Not ignored.
 	// End of the ignored character list.
 	0,      0
 };

 // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa)
 static const unsigned short chars_ignore_zwnj_hyphen[] = {
 	0xad,   1,   // soft hyphen
 	0x640,  1,   // igniore Arabic Tatweel (lang=FA)
 	0x200c, '-', // zero width non-joiner, replace with hyphen
 	0x200d, 1,   // zero width joiner
 	0, 0
 	// U+00AD SOFT HYPHEN
 	//     Used to mark hyphenation points in words for where to split a
 	//     word at the end of a line to provide readable justified text.
 	0xad,   1,
 	// U+0640 TATWEEL (KASHIDA)
 	//     Used in Arabic scripts to stretch characters for justifying
 	//     the text.
 	0x640,  1,
 	// U+200C ZERO WIDTH NON-JOINER
 	//     Used to prevent combined ligatures being displayed in their
 	//     combined form.
 	0x200c, '-',
 	// U+200D ZERO WIDTH JOINER
 	//     Used to indicate an alternative connected form made up of the
 	//     characters surrounding the ZWJ in Devanagari, Kannada, Malayalam
 	//     and Emoji.
 //	0x200d, 1, // Not ignored.
 	// End of the ignored character list.
 	0,      0
 };

 const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -795,7 +795,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
 					if (end2) {
 						RemoveEnding(tr, wordx, end2, word_copy);
 						end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
 						memcpy(wordx, word_copy, strlen(word_copy));
 						strcpy(wordx, word_copy);
 						if ((end_type & SUFX_P) == 0) {
 							// after removing the suffix, the prefix is no longer recognised.
 							// Keep the suffix, but don't use the prefix
@@ -902,7 +902,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
 						wordx[-1] = ' ';
 						if (phonemes[0] == phonSWITCH) {
 							// change to another language in order to translate this word
 							memcpy(wordx, word_copy, strlen(word_copy));
 							strcpy(wordx, word_copy);
 							strcpy(word_phonemes, phonemes);
 							return 0;
 						}
@@ -920,7 +920,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
 						found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab);  // without prefix and suffix
 						if (phonemes[0] == phonSWITCH) {
 							// change to another language in order to translate this word
 							memcpy(wordx, word_copy, strlen(word_copy));
 							strcpy(wordx, word_copy);
 							strcpy(word_phonemes, phonemes);
 							return 0;
 						}
@@ -960,7 +960,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
 							if (phonemes[0] == phonSWITCH) {
 								// change to another language in order to translate this word
 								strcpy(word_phonemes, phonemes);
 								memcpy(wordx, word_copy, strlen(word_copy));
 								strcpy(wordx, word_copy);
 								wordx[-1] = c_temp;
 								return 0;
 							}
@@ -974,7 +974,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
 					AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes);
 					end_phonemes[0] = 0;
 				}
 				memcpy(wordx, word_copy, strlen(word_copy));
 				strcpy(wordx, word_copy);
 			}

 			wordx[-1] = c_temp;
@@ -1139,7 +1139,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
 		char word[N_WORD_BYTES+1];
 		word[0] = 0;
 		word[1] = ' ';
 		memcpy(word+2, word_out, strlen(word_out));
 		strcpy(word+2, word_out);
 		word_out = word+2;

 		while (*word_out && available > 1) {
--- a/tests/phoneme-output.test
+++ b/tests/phoneme-output.test
@@ -0,0 +1,21 @@
 #!/bin/sh

 test_phonemes() {
 	TEST_LANG=$1
 	EXPECTED=$2
 	TEST_TEXT=$3

 	echo "testing ${TEST_LANG} \"${TEST_TEXT}\""
 	ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
 		src/espeak-ng -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt
 	echo "${EXPECTED}" > expected.txt
 	diff expected.txt actual.txt || exit 1
 }

 test_phonemes en " h@l'oU" "hello"

 # Emoji

 # ED-3 - emoji_character [http://www.unicode.org/reports/tr51/tr51-12.html#def_emoji_character]
 test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k" "⁉"
 test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k r'eInboU" "⁉ 🌈"