| tests/*.test | tests/*.test | ||||
| !tests/languages.test | !tests/languages.test | ||||
| !tests/phoneme-output.test | |||||
| espeak-ng.pc | espeak-ng.pc | ||||
| check: tests/encoding.check \ | check: tests/encoding.check \ | ||||
| tests/readclause.check \ | tests/readclause.check \ | ||||
| tests/api.check \ | tests/api.check \ | ||||
| tests/phoneme-output.check \ | |||||
| tests/languages.check | tests/languages.check | ||||
| ##### phoneme data: | ##### phoneme data: |
| ๐ แจแแชแแฃแแแแฃแแ แกแแฎแ // [1F616] | ๐ แจแแชแแฃแแแแฃแแ แกแแฎแ // [1F616] | ||||
| ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F617] | ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F617] | ||||
| ๐ แกแแฎแ, แ แแแแแแช แแแชแแแก แแแแแแแแก // [1F618] | ๐ แกแแฎแ, แ แแแแแแช แแแชแแแก แแแแแแแแก // [1F618] | ||||
| ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแแฆแแแแ แ แแแแแแแแ // [1F619] | |||||
| ๐ kissing face // [1F617] | |||||
| //๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแแฆแแแแ แ แแแแแแแแ // [1F619] TODO: This breaks speaking "7". | |||||
| ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแฎแฃแญแฃแแ แแแแแแแแ // [1F61A] | ๐ แแแชแแแก แแแแแแฎแแขแแแแ แกแแฎแ แแแฎแฃแญแฃแแ แแแแแแแแ // [1F61A] | ||||
| ๐ แกแแฎแ แแแแแงแแคแแแ แแแแ // [1F61B] | ๐ แกแแฎแ แแแแแงแแคแแแ แแแแ // [1F61B] | ||||
| ๐ แกแแฎแ แแแแแงแแคแแแ แแแแแ แแ แฉแแแ แฃแแ แแแแแแ // [1F61C] | ๐ แกแแฎแ แแแแแงแแคแแแ แแแแแ แแ แฉแแแ แฃแแ แแแแแแ // [1F61C] | ||||
| ๐ข แแขแแ แแแ แกแแฎแ // [1F622] | ๐ข แแขแแ แแแ แกแแฎแ // [1F622] | ||||
| ๐ฃ แจแแฃแแแแแ แ แกแแฎแ // [1F623] | ๐ฃ แจแแฃแแแแแ แ แกแแฎแ // [1F623] | ||||
| ๐ค แกแแฎแ แชแฎแแแ แแแแ แแแแแแแแแแ แแ แแฅแแแ // [1F624] | ๐ค แกแแฎแ แชแฎแแแ แแแแ แแแแแแแแแแ แแ แแฅแแแ // [1F624] | ||||
| ๐ฅ แแแแแแแชแ แฃแแแฃแแ, แแแแ แแ แจแแแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F625] | |||||
| ๐ฅ disappointed but relieved face // [1F625] | |||||
| //๐ฅ แแแแแแแชแ แฃแแแฃแแ, แแแแ แแ แจแแแแแก แแแแแแฎแแขแแแแ แกแแฎแ // [1F625] TODO: This breaks speaking "3". | |||||
| ๐ฆ แจแแญแแฃแฎแแแแ แกแแฎแ แฆแแ แแแ แแ // [1F626] | ๐ฆ แจแแญแแฃแฎแแแแ แกแแฎแ แฆแแ แแแ แแ // [1F626] | ||||
| ๐ง แขแแแฏแฃแแ แกแแฎแ // [1F627] | ๐ง แขแแแฏแฃแแ แกแแฎแ // [1F627] | ||||
| ๐จ แจแแจแแแแแฃแแ แกแแฎแ // [1F628] | ๐จ แจแแจแแแแแฃแแ แกแแฎแ // [1F628] |
| match1.end_type |= p - p_start; | match1.end_type |= p - p_start; | ||||
| } | } | ||||
| strcpy(end_phonemes, match1.phonemes); | strcpy(end_phonemes, match1.phonemes); | ||||
| memcpy(p_start, word_copy, strlen(word_copy)); | |||||
| strcpy(p_start, word_copy); | |||||
| return match1.end_type; | return match1.end_type; | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| memcpy(p_start, word_copy, strlen(word_copy)); | |||||
| strcpy(p_start, word_copy); | |||||
| return 0; | return 0; | ||||
| } | } |
| // ignore these characters | // ignore these characters | ||||
| static const unsigned short chars_ignore_default[] = { | static const unsigned short chars_ignore_default[] = { | ||||
| 0xad, 1, // soft hyphen | |||||
| 0x200c, 1, // zero width non-joiner | |||||
| 0x200d, 1, // zero width joiner | |||||
| 0, 0 | |||||
| // U+00AD SOFT HYPHEN | |||||
| // Used to mark hyphenation points in words for where to split a | |||||
| // word at the end of a line to provide readable justified text. | |||||
| 0xad, 1, | |||||
| // U+200C ZERO WIDTH NON-JOINER | |||||
| // Used to prevent combined ligatures being displayed in their | |||||
| // combined form. | |||||
| 0x200c, 1, | |||||
| // U+200D ZERO WIDTH JOINER | |||||
| // Used to indicate an alternative connected form made up of the | |||||
| // characters surrounding the ZWJ in Devanagari, Kannada, Malayalam | |||||
| // and Emoji. | |||||
| // 0x200d, 1, // Not ignored. | |||||
| // End of the ignored character list. | |||||
| 0, 0 | |||||
| }; | }; | ||||
| // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) | // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) | ||||
| static const unsigned short chars_ignore_zwnj_hyphen[] = { | static const unsigned short chars_ignore_zwnj_hyphen[] = { | ||||
| 0xad, 1, // soft hyphen | |||||
| 0x640, 1, // igniore Arabic Tatweel (lang=FA) | |||||
| 0x200c, '-', // zero width non-joiner, replace with hyphen | |||||
| 0x200d, 1, // zero width joiner | |||||
| 0, 0 | |||||
| // U+00AD SOFT HYPHEN | |||||
| // Used to mark hyphenation points in words for where to split a | |||||
| // word at the end of a line to provide readable justified text. | |||||
| 0xad, 1, | |||||
| // U+0640 TATWEEL (KASHIDA) | |||||
| // Used in Arabic scripts to stretch characters for justifying | |||||
| // the text. | |||||
| 0x640, 1, | |||||
| // U+200C ZERO WIDTH NON-JOINER | |||||
| // Used to prevent combined ligatures being displayed in their | |||||
| // combined form. | |||||
| 0x200c, '-', | |||||
| // U+200D ZERO WIDTH JOINER | |||||
| // Used to indicate an alternative connected form made up of the | |||||
| // characters surrounding the ZWJ in Devanagari, Kannada, Malayalam | |||||
| // and Emoji. | |||||
| // 0x200d, 1, // Not ignored. | |||||
| // End of the ignored character list. | |||||
| 0, 0 | |||||
| }; | }; | ||||
| const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 | const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 |
| if (end2) { | if (end2) { | ||||
| RemoveEnding(tr, wordx, end2, word_copy); | RemoveEnding(tr, wordx, end2, word_copy); | ||||
| end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | ||||
| memcpy(wordx, word_copy, strlen(word_copy)); | |||||
| strcpy(wordx, word_copy); | |||||
| if ((end_type & SUFX_P) == 0) { | if ((end_type & SUFX_P) == 0) { | ||||
| // after removing the suffix, the prefix is no longer recognised. | // after removing the suffix, the prefix is no longer recognised. | ||||
| // Keep the suffix, but don't use the prefix | // Keep the suffix, but don't use the prefix | ||||
| wordx[-1] = ' '; | wordx[-1] = ' '; | ||||
| if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| memcpy(wordx, word_copy, strlen(word_copy)); | |||||
| strcpy(wordx, word_copy); | |||||
| strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix | ||||
| if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| memcpy(wordx, word_copy, strlen(word_copy)); | |||||
| strcpy(wordx, word_copy); | |||||
| strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| if (phonemes[0] == phonSWITCH) { | if (phonemes[0] == phonSWITCH) { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| strcpy(word_phonemes, phonemes); | strcpy(word_phonemes, phonemes); | ||||
| memcpy(wordx, word_copy, strlen(word_copy)); | |||||
| strcpy(wordx, word_copy); | |||||
| wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); | AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); | ||||
| end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
| } | } | ||||
| memcpy(wordx, word_copy, strlen(word_copy)); | |||||
| strcpy(wordx, word_copy); | |||||
| } | } | ||||
| wordx[-1] = c_temp; | wordx[-1] = c_temp; | ||||
| char word[N_WORD_BYTES+1]; | char word[N_WORD_BYTES+1]; | ||||
| word[0] = 0; | word[0] = 0; | ||||
| word[1] = ' '; | word[1] = ' '; | ||||
| memcpy(word+2, word_out, strlen(word_out)); | |||||
| strcpy(word+2, word_out); | |||||
| word_out = word+2; | word_out = word+2; | ||||
| while (*word_out && available > 1) { | while (*word_out && available > 1) { |
| #!/bin/sh | |||||
| test_phonemes() { | |||||
| TEST_LANG=$1 | |||||
| EXPECTED=$2 | |||||
| TEST_TEXT=$3 | |||||
| echo "testing ${TEST_LANG} \"${TEST_TEXT}\"" | |||||
| ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | |||||
| src/espeak-ng -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt | |||||
| echo "${EXPECTED}" > expected.txt | |||||
| diff expected.txt actual.txt || exit 1 | |||||
| } | |||||
| test_phonemes en " h@l'oU" "hello" | |||||
| # Emoji | |||||
| # ED-3 - emoji_character [http://www.unicode.org/reports/tr51/tr51-12.html#def_emoji_character] | |||||
| test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k" "โ" | |||||
| test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k r'eInboU" "โ ๐" |