Juho Hiltunen 7 years ago
parent
commit
ddaf267aa9

+ 1
- 0
.gitignore View File



tests/*.test tests/*.test
!tests/languages.test !tests/languages.test
!tests/phoneme-output.test


espeak-ng.pc espeak-ng.pc



+ 1
- 0
Makefile.am View File

check: tests/encoding.check \ check: tests/encoding.check \
tests/readclause.check \ tests/readclause.check \
tests/api.check \ tests/api.check \
tests/phoneme-output.check \
tests/languages.check tests/languages.check


##### phoneme data: ##### phoneme data:

+ 4
- 2
dictsource/ka_emoji View File

๐Ÿ˜– แƒจแƒ”แƒชแƒ‘แƒฃแƒœแƒ”แƒ‘แƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F616] ๐Ÿ˜– แƒจแƒ”แƒชแƒ‘แƒฃแƒœแƒ”แƒ‘แƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F616]
๐Ÿ˜— แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F617] ๐Ÿ˜— แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F617]
๐Ÿ˜˜ แƒกแƒแƒฎแƒ”, แƒ แƒแƒ›แƒ”แƒšแƒ˜แƒช แƒ™แƒแƒชแƒœแƒแƒก แƒแƒ’แƒ–แƒแƒ•แƒœแƒ˜แƒก // [1F618] ๐Ÿ˜˜ แƒกแƒแƒฎแƒ”, แƒ แƒแƒ›แƒ”แƒšแƒ˜แƒช แƒ™แƒแƒชแƒœแƒแƒก แƒแƒ’แƒ–แƒแƒ•แƒœแƒ˜แƒก // [1F618]
๐Ÿ˜™ แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ›แƒแƒ›แƒฆแƒ˜แƒ›แƒแƒ แƒ” แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F619]
๐Ÿ˜— kissing face // [1F617]
//๐Ÿ˜™ แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ›แƒแƒ›แƒฆแƒ˜แƒ›แƒแƒ แƒ” แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F619] TODO: This breaks speaking "7".
๐Ÿ˜š แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ“แƒแƒฎแƒฃแƒญแƒฃแƒšแƒ˜ แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F61A] ๐Ÿ˜š แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ“แƒแƒฎแƒฃแƒญแƒฃแƒšแƒ˜ แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F61A]
๐Ÿ˜› แƒกแƒแƒฎแƒ” แƒ’แƒแƒ›แƒแƒงแƒแƒคแƒ˜แƒšแƒ˜ แƒ”แƒœแƒ˜แƒ— // [1F61B] ๐Ÿ˜› แƒกแƒแƒฎแƒ” แƒ’แƒแƒ›แƒแƒงแƒแƒคแƒ˜แƒšแƒ˜ แƒ”แƒœแƒ˜แƒ— // [1F61B]
๐Ÿ˜œ แƒกแƒแƒฎแƒ” แƒ’แƒแƒ›แƒแƒงแƒแƒคแƒ˜แƒšแƒ˜ แƒ”แƒœแƒ˜แƒ—แƒ แƒ“แƒ แƒฉแƒแƒ™แƒ แƒฃแƒšแƒ˜ แƒ—แƒ•แƒแƒšแƒ˜แƒ— // [1F61C] ๐Ÿ˜œ แƒกแƒแƒฎแƒ” แƒ’แƒแƒ›แƒแƒงแƒแƒคแƒ˜แƒšแƒ˜ แƒ”แƒœแƒ˜แƒ—แƒ แƒ“แƒ แƒฉแƒแƒ™แƒ แƒฃแƒšแƒ˜ แƒ—แƒ•แƒแƒšแƒ˜แƒ— // [1F61C]
๐Ÿ˜ข แƒ›แƒขแƒ˜แƒ แƒแƒšแƒ แƒกแƒแƒฎแƒ” // [1F622] ๐Ÿ˜ข แƒ›แƒขแƒ˜แƒ แƒแƒšแƒ แƒกแƒแƒฎแƒ” // [1F622]
๐Ÿ˜ฃ แƒจแƒ”แƒฃแƒžแƒแƒ•แƒแƒ แƒ˜ แƒกแƒแƒฎแƒ” // [1F623] ๐Ÿ˜ฃ แƒจแƒ”แƒฃแƒžแƒแƒ•แƒแƒ แƒ˜ แƒกแƒแƒฎแƒ” // [1F623]
๐Ÿ˜ค แƒกแƒแƒฎแƒ” แƒชแƒฎแƒ•แƒ˜แƒ แƒ˜แƒ“แƒแƒœ แƒ’แƒแƒ›แƒแƒ›แƒแƒ•แƒแƒšแƒ˜ แƒแƒ แƒ—แƒฅแƒšแƒ˜แƒ— // [1F624] ๐Ÿ˜ค แƒกแƒแƒฎแƒ” แƒชแƒฎแƒ•แƒ˜แƒ แƒ˜แƒ“แƒแƒœ แƒ’แƒแƒ›แƒแƒ›แƒแƒ•แƒแƒšแƒ˜ แƒแƒ แƒ—แƒฅแƒšแƒ˜แƒ— // [1F624]
๐Ÿ˜ฅ แƒ˜แƒ›แƒ”แƒ“แƒ’แƒแƒชแƒ แƒฃแƒ”แƒ‘แƒฃแƒšแƒ˜, แƒ›แƒแƒ’แƒ แƒแƒ› แƒจแƒ•แƒ”แƒ‘แƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F625]
๐Ÿ˜ฅ disappointed but relieved face // [1F625]
//๐Ÿ˜ฅ แƒ˜แƒ›แƒ”แƒ“แƒ’แƒแƒชแƒ แƒฃแƒ”แƒ‘แƒฃแƒšแƒ˜, แƒ›แƒแƒ’แƒ แƒแƒ› แƒจแƒ•แƒ”แƒ‘แƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F625] TODO: This breaks speaking "3".
๐Ÿ˜ฆ แƒจแƒ”แƒญแƒ›แƒฃแƒฎแƒœแƒ˜แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒฆแƒ˜แƒ แƒžแƒ˜แƒ แƒ˜แƒ— // [1F626] ๐Ÿ˜ฆ แƒจแƒ”แƒญแƒ›แƒฃแƒฎแƒœแƒ˜แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒฆแƒ˜แƒ แƒžแƒ˜แƒ แƒ˜แƒ— // [1F626]
๐Ÿ˜ง แƒขแƒแƒœแƒฏแƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F627] ๐Ÿ˜ง แƒขแƒแƒœแƒฏแƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F627]
๐Ÿ˜จ แƒจแƒ”แƒจแƒ˜แƒœแƒ”แƒ‘แƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F628] ๐Ÿ˜จ แƒจแƒ”แƒจแƒ˜แƒœแƒ”แƒ‘แƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F628]

+ 2
- 2
src/libespeak-ng/dictionary.c View File

match1.end_type |= p - p_start; match1.end_type |= p - p_start;
} }
strcpy(end_phonemes, match1.phonemes); strcpy(end_phonemes, match1.phonemes);
memcpy(p_start, word_copy, strlen(word_copy));
strcpy(p_start, word_copy);
return match1.end_type; return match1.end_type;
} }
} }
} }
} }


memcpy(p_start, word_copy, strlen(word_copy));
strcpy(p_start, word_copy);


return 0; return 0;
} }

+ 34
- 9
src/libespeak-ng/tr_languages.c View File



// ignore these characters // ignore these characters
static const unsigned short chars_ignore_default[] = { static const unsigned short chars_ignore_default[] = {
0xad, 1, // soft hyphen
0x200c, 1, // zero width non-joiner
0x200d, 1, // zero width joiner
0, 0
// U+00AD SOFT HYPHEN
// Used to mark hyphenation points in words for where to split a
// word at the end of a line to provide readable justified text.
0xad, 1,
// U+200C ZERO WIDTH NON-JOINER
// Used to prevent combined ligatures being displayed in their
// combined form.
0x200c, 1,
// U+200D ZERO WIDTH JOINER
// Used to indicate an alternative connected form made up of the
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam
// and Emoji.
// 0x200d, 1, // Not ignored.
// End of the ignored character list.
0, 0
}; };


// alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa)
static const unsigned short chars_ignore_zwnj_hyphen[] = { static const unsigned short chars_ignore_zwnj_hyphen[] = {
0xad, 1, // soft hyphen
0x640, 1, // igniore Arabic Tatweel (lang=FA)
0x200c, '-', // zero width non-joiner, replace with hyphen
0x200d, 1, // zero width joiner
0, 0
// U+00AD SOFT HYPHEN
// Used to mark hyphenation points in words for where to split a
// word at the end of a line to provide readable justified text.
0xad, 1,
// U+0640 TATWEEL (KASHIDA)
// Used in Arabic scripts to stretch characters for justifying
// the text.
0x640, 1,
// U+200C ZERO WIDTH NON-JOINER
// Used to prevent combined ligatures being displayed in their
// combined form.
0x200c, '-',
// U+200D ZERO WIDTH JOINER
// Used to indicate an alternative connected form made up of the
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam
// and Emoji.
// 0x200d, 1, // Not ignored.
// End of the ignored character list.
0, 0
}; };


const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8

+ 6
- 6
src/libespeak-ng/translate.c View File

if (end2) { if (end2) {
RemoveEnding(tr, wordx, end2, word_copy); RemoveEnding(tr, wordx, end2, word_copy);
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
if ((end_type & SUFX_P) == 0) { if ((end_type & SUFX_P) == 0) {
// after removing the suffix, the prefix is no longer recognised. // after removing the suffix, the prefix is no longer recognised.
// Keep the suffix, but don't use the prefix // Keep the suffix, but don't use the prefix
wordx[-1] = ' '; wordx[-1] = ' ';
if (phonemes[0] == phonSWITCH) { if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word // change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
strcpy(word_phonemes, phonemes); strcpy(word_phonemes, phonemes);
return 0; return 0;
} }
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if (phonemes[0] == phonSWITCH) { if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word // change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
strcpy(word_phonemes, phonemes); strcpy(word_phonemes, phonemes);
return 0; return 0;
} }
if (phonemes[0] == phonSWITCH) { if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word // change to another language in order to translate this word
strcpy(word_phonemes, phonemes); strcpy(word_phonemes, phonemes);
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
wordx[-1] = c_temp; wordx[-1] = c_temp;
return 0; return 0;
} }
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes); AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes);
end_phonemes[0] = 0; end_phonemes[0] = 0;
} }
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
} }


wordx[-1] = c_temp; wordx[-1] = c_temp;
char word[N_WORD_BYTES+1]; char word[N_WORD_BYTES+1];
word[0] = 0; word[0] = 0;
word[1] = ' '; word[1] = ' ';
memcpy(word+2, word_out, strlen(word_out));
strcpy(word+2, word_out);
word_out = word+2; word_out = word+2;


while (*word_out && available > 1) { while (*word_out && available > 1) {

+ 21
- 0
tests/phoneme-output.test View File

#!/bin/sh

test_phonemes() {
TEST_LANG=$1
EXPECTED=$2
TEST_TEXT=$3

echo "testing ${TEST_LANG} \"${TEST_TEXT}\""
ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
src/espeak-ng -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt
echo "${EXPECTED}" > expected.txt
diff expected.txt actual.txt || exit 1
}

test_phonemes en " h@l'oU" "hello"

# Emoji

# ED-3 - emoji_character [http://www.unicode.org/reports/tr51/tr51-12.html#def_emoji_character]
test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k" "โ‰"
test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k r'eInboU" "โ‰ ๐ŸŒˆ"

Loadingโ€ฆ
Cancel
Save