tests/*.test | tests/*.test | ||||
!tests/languages.test | !tests/languages.test | ||||
!tests/phoneme-output.test | |||||
espeak-ng.pc | espeak-ng.pc | ||||
check: tests/encoding.check \ | check: tests/encoding.check \ | ||||
tests/readclause.check \ | tests/readclause.check \ | ||||
tests/api.check \ | tests/api.check \ | ||||
tests/phoneme-output.check \ | |||||
tests/languages.check | tests/languages.check | ||||
##### phoneme data: | ##### phoneme data: |
๐ แจแแชแแฃแแแแฃแแ แกแแฎแ // [1F616] | ๐ แจแแชแแฃแแแแฃแแ แกแแฎแ // [1F616] | ||||
๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F617] | ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F617] | ||||
๐ แกแแฎแ, แ แแแแแแช แแแชแแแก แแแแแแแแก // [1F618] | ๐ แกแแฎแ, แ แแแแแแช แแแชแแแก แแแแแแแแก // [1F618] | ||||
๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแแฆแแแแ แ แแแแแแแแ // [1F619] | |||||
๐ kissing face // [1F617] | |||||
//๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแแฆแแแแ แ แแแแแแแแ // [1F619] TODO: This breaks speaking "7". | |||||
๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแฎแฃแญแฃแแ แแแแแแแแ // [1F61A] | ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแฎแฃแญแฃแแ แแแแแแแแ // [1F61A] | ||||
๐ แกแแฎแ แแแแแงแแคแแแ แแแแ // [1F61B] | ๐ แกแแฎแ แแแแแงแแคแแแ แแแแ // [1F61B] | ||||
๐ แกแแฎแ แแแแแงแแคแแแ แแแแแ แแ แฉแแแ แฃแแ แแแแแแ // [1F61C] | ๐ แกแแฎแ แแแแแงแแคแแแ แแแแแ แแ แฉแแแ แฃแแ แแแแแแ // [1F61C] | ||||
๐ข แแขแแ แแแ แกแแฎแ // [1F622] | ๐ข แแขแแ แแแ แกแแฎแ // [1F622] | ||||
๐ฃ แจแแฃแแแแแ แ แกแแฎแ // [1F623] | ๐ฃ แจแแฃแแแแแ แ แกแแฎแ // [1F623] | ||||
๐ค แกแแฎแ แชแฎแแแ แแแแ แแแแแแแแแแ แแ แแฅแแแ // [1F624] | ๐ค แกแแฎแ แชแฎแแแ แแแแ แแแแแแแแแแ แแ แแฅแแแ // [1F624] | ||||
๐ฅ แแแแแแแชแ แฃแแแฃแแ, แแแแ แแ แจแแแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F625] | |||||
๐ฅ disappointed but relieved face // [1F625] | |||||
//๐ฅ แแแแแแแชแ แฃแแแฃแแ, แแแแ แแ แจแแแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F625] TODO: This breaks speaking "3". | |||||
๐ฆ แจแแญแแฃแฎแแแแ แกแแฎแ แฆแแ แแแ แแ // [1F626] | ๐ฆ แจแแญแแฃแฎแแแแ แกแแฎแ แฆแแ แแแ แแ // [1F626] | ||||
๐ง แขแแแฏแฃแแ แกแแฎแ // [1F627] | ๐ง แขแแแฏแฃแแ แกแแฎแ // [1F627] | ||||
๐จ แจแแจแแแแแฃแแ แกแแฎแ // [1F628] | ๐จ แจแแจแแแแแฃแแ แกแแฎแ // [1F628] |
match1.end_type |= p - p_start; | match1.end_type |= p - p_start; | ||||
} | } | ||||
strcpy(end_phonemes, match1.phonemes); | strcpy(end_phonemes, match1.phonemes); | ||||
memcpy(p_start, word_copy, strlen(word_copy)); | |||||
strcpy(p_start, word_copy); | |||||
return match1.end_type; | return match1.end_type; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
} | } | ||||
memcpy(p_start, word_copy, strlen(word_copy)); | |||||
strcpy(p_start, word_copy); | |||||
return 0; | return 0; | ||||
} | } |
// ignore these characters | // ignore these characters | ||||
static const unsigned short chars_ignore_default[] = { | static const unsigned short chars_ignore_default[] = { | ||||
0xad, 1, // soft hyphen | |||||
0x200c, 1, // zero width non-joiner | |||||
0x200d, 1, // zero width joiner | |||||
0, 0 | |||||
// U+00AD SOFT HYPHEN | |||||
// Used to mark hyphenation points in words for where to split a | |||||
// word at the end of a line to provide readable justified text. | |||||
0xad, 1, | |||||
// U+200C ZERO WIDTH NON-JOINER | |||||
// Used to prevent combined ligatures being displayed in their | |||||
// combined form. | |||||
0x200c, 1, | |||||
// U+200D ZERO WIDTH JOINER | |||||
// Used to indicate an alternative connected form made up of the | |||||
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam | |||||
// and Emoji. | |||||
// 0x200d, 1, // Not ignored. | |||||
// End of the ignored character list. | |||||
0, 0 | |||||
}; | }; | ||||
// alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) | // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) | ||||
static const unsigned short chars_ignore_zwnj_hyphen[] = { | static const unsigned short chars_ignore_zwnj_hyphen[] = { | ||||
0xad, 1, // soft hyphen | |||||
0x640, 1, // igniore Arabic Tatweel (lang=FA) | |||||
0x200c, '-', // zero width non-joiner, replace with hyphen | |||||
0x200d, 1, // zero width joiner | |||||
0, 0 | |||||
// U+00AD SOFT HYPHEN | |||||
// Used to mark hyphenation points in words for where to split a | |||||
// word at the end of a line to provide readable justified text. | |||||
0xad, 1, | |||||
// U+0640 TATWEEL (KASHIDA) | |||||
// Used in Arabic scripts to stretch characters for justifying | |||||
// the text. | |||||
0x640, 1, | |||||
// U+200C ZERO WIDTH NON-JOINER | |||||
// Used to prevent combined ligatures being displayed in their | |||||
// combined form. | |||||
0x200c, '-', | |||||
// U+200D ZERO WIDTH JOINER | |||||
// Used to indicate an alternative connected form made up of the | |||||
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam | |||||
// and Emoji. | |||||
// 0x200d, 1, // Not ignored. | |||||
// End of the ignored character list. | |||||
0, 0 | |||||
}; | }; | ||||
const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 | const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 |
if (end2) { | if (end2) { | ||||
RemoveEnding(tr, wordx, end2, word_copy); | RemoveEnding(tr, wordx, end2, word_copy); | ||||
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | |||||
strcpy(wordx, word_copy); | |||||
if ((end_type & SUFX_P) == 0) { | if ((end_type & SUFX_P) == 0) { | ||||
// after removing the suffix, the prefix is no longer recognised. | // after removing the suffix, the prefix is no longer recognised. | ||||
// Keep the suffix, but don't use the prefix | // Keep the suffix, but don't use the prefix | ||||
wordx[-1] = ' '; | wordx[-1] = ' '; | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | |||||
strcpy(wordx, word_copy); | |||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | |||||
strcpy(wordx, word_copy); | |||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
return 0; | return 0; | ||||
} | } | ||||
if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
// change to another language in order to translate this word | // change to another language in order to translate this word | ||||
strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | |||||
strcpy(wordx, word_copy); | |||||
wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
return 0; | return 0; | ||||
} | } | ||||
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); | AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); | ||||
end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
} | } | ||||
memcpy(wordx, word_copy, strlen(word_copy)); | |||||
strcpy(wordx, word_copy); | |||||
} | } | ||||
wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
char word[N_WORD_BYTES+1]; | char word[N_WORD_BYTES+1]; | ||||
word[0] = 0; | word[0] = 0; | ||||
word[1] = ' '; | word[1] = ' '; | ||||
memcpy(word+2, word_out, strlen(word_out)); | |||||
strcpy(word+2, word_out); | |||||
word_out = word+2; | word_out = word+2; | ||||
while (*word_out && available > 1) { | while (*word_out && available > 1) { |
#!/bin/sh | |||||
test_phonemes() { | |||||
TEST_LANG=$1 | |||||
EXPECTED=$2 | |||||
TEST_TEXT=$3 | |||||
echo "testing ${TEST_LANG} \"${TEST_TEXT}\"" | |||||
ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | |||||
src/espeak-ng -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt | |||||
echo "${EXPECTED}" > expected.txt | |||||
diff expected.txt actual.txt || exit 1 | |||||
} | |||||
test_phonemes en " h@l'oU" "hello" | |||||
# Emoji | |||||
# ED-3 - emoji_character [http://www.unicode.org/reports/tr51/tr51-12.html#def_emoji_character] | |||||
test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k" "โ" | |||||
test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k r'eInboU" "โ ๐" |