Juho Hiltunen 7 years ago
parent
commit
ddaf267aa9

+ 1
- 0
.gitignore View File

@@ -96,6 +96,7 @@ src/speak-ng

tests/*.test
!tests/languages.test
!tests/phoneme-output.test

espeak-ng.pc


+ 1
- 0
Makefile.am View File

@@ -250,6 +250,7 @@ tests_api_test_SOURCES = tests/api.c
check: tests/encoding.check \
tests/readclause.check \
tests/api.check \
tests/phoneme-output.check \
tests/languages.check

##### phoneme data:

+ 4
- 2
dictsource/ka_emoji View File

@@ -926,7 +926,8 @@ $textmode
๐Ÿ˜– แƒจแƒ”แƒชแƒ‘แƒฃแƒœแƒ”แƒ‘แƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F616]
๐Ÿ˜— แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F617]
๐Ÿ˜˜ แƒกแƒแƒฎแƒ”, แƒ แƒแƒ›แƒ”แƒšแƒ˜แƒช แƒ™แƒแƒชแƒœแƒแƒก แƒแƒ’แƒ–แƒแƒ•แƒœแƒ˜แƒก // [1F618]
๐Ÿ˜™ แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ›แƒแƒ›แƒฆแƒ˜แƒ›แƒแƒ แƒ” แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F619]
๐Ÿ˜— kissing face // [1F617]
//๐Ÿ˜™ แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ›แƒแƒ›แƒฆแƒ˜แƒ›แƒแƒ แƒ” แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F619] TODO: This breaks speaking "7".
๐Ÿ˜š แƒ™แƒแƒชแƒœแƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒ“แƒแƒฎแƒฃแƒญแƒฃแƒšแƒ˜ แƒ—แƒ•แƒแƒšแƒ”แƒ‘แƒ˜แƒ— // [1F61A]
๐Ÿ˜› แƒกแƒแƒฎแƒ” แƒ’แƒแƒ›แƒแƒงแƒแƒคแƒ˜แƒšแƒ˜ แƒ”แƒœแƒ˜แƒ— // [1F61B]
๐Ÿ˜œ แƒกแƒแƒฎแƒ” แƒ’แƒแƒ›แƒแƒงแƒแƒคแƒ˜แƒšแƒ˜ แƒ”แƒœแƒ˜แƒ—แƒ แƒ“แƒ แƒฉแƒแƒ™แƒ แƒฃแƒšแƒ˜ แƒ—แƒ•แƒแƒšแƒ˜แƒ— // [1F61C]
@@ -938,7 +939,8 @@ $textmode
๐Ÿ˜ข แƒ›แƒขแƒ˜แƒ แƒแƒšแƒ แƒกแƒแƒฎแƒ” // [1F622]
๐Ÿ˜ฃ แƒจแƒ”แƒฃแƒžแƒแƒ•แƒแƒ แƒ˜ แƒกแƒแƒฎแƒ” // [1F623]
๐Ÿ˜ค แƒกแƒแƒฎแƒ” แƒชแƒฎแƒ•แƒ˜แƒ แƒ˜แƒ“แƒแƒœ แƒ’แƒแƒ›แƒแƒ›แƒแƒ•แƒแƒšแƒ˜ แƒแƒ แƒ—แƒฅแƒšแƒ˜แƒ— // [1F624]
๐Ÿ˜ฅ แƒ˜แƒ›แƒ”แƒ“แƒ’แƒแƒชแƒ แƒฃแƒ”แƒ‘แƒฃแƒšแƒ˜, แƒ›แƒแƒ’แƒ แƒแƒ› แƒจแƒ•แƒ”แƒ‘แƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F625]
๐Ÿ˜ฅ disappointed but relieved face // [1F625]
//๐Ÿ˜ฅ แƒ˜แƒ›แƒ”แƒ“แƒ’แƒแƒชแƒ แƒฃแƒ”แƒ‘แƒฃแƒšแƒ˜, แƒ›แƒแƒ’แƒ แƒแƒ› แƒจแƒ•แƒ”แƒ‘แƒ˜แƒก แƒ’แƒแƒ›แƒแƒ›แƒฎแƒแƒขแƒ•แƒ”แƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F625] TODO: This breaks speaking "3".
๐Ÿ˜ฆ แƒจแƒ”แƒญแƒ›แƒฃแƒฎแƒœแƒ˜แƒšแƒ˜ แƒกแƒแƒฎแƒ” แƒฆแƒ˜แƒ แƒžแƒ˜แƒ แƒ˜แƒ— // [1F626]
๐Ÿ˜ง แƒขแƒแƒœแƒฏแƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F627]
๐Ÿ˜จ แƒจแƒ”แƒจแƒ˜แƒœแƒ”แƒ‘แƒฃแƒšแƒ˜ แƒกแƒแƒฎแƒ” // [1F628]

+ 2
- 2
src/libespeak-ng/dictionary.c View File

@@ -2394,7 +2394,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
match1.end_type |= p - p_start;
}
strcpy(end_phonemes, match1.phonemes);
memcpy(p_start, word_copy, strlen(word_copy));
strcpy(p_start, word_copy);
return match1.end_type;
}
}
@@ -2404,7 +2404,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c
}
}

memcpy(p_start, word_copy, strlen(word_copy));
strcpy(p_start, word_copy);

return 0;
}

+ 34
- 9
src/libespeak-ng/tr_languages.c View File

@@ -158,19 +158,44 @@ static void SetLetterBitsRange(Translator *tr, int group, int first, int last)

// ignore these characters
static const unsigned short chars_ignore_default[] = {
0xad, 1, // soft hyphen
0x200c, 1, // zero width non-joiner
0x200d, 1, // zero width joiner
0, 0
// U+00AD SOFT HYPHEN
// Used to mark hyphenation points in words for where to split a
// word at the end of a line to provide readable justified text.
0xad, 1,
// U+200C ZERO WIDTH NON-JOINER
// Used to prevent combined ligatures being displayed in their
// combined form.
0x200c, 1,
// U+200D ZERO WIDTH JOINER
// Used to indicate an alternative connected form made up of the
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam
// and Emoji.
// 0x200d, 1, // Not ignored.
// End of the ignored character list.
0, 0
};

// alternatively, ignore characters but allow zero-width-non-joiner (lang-fa)
static const unsigned short chars_ignore_zwnj_hyphen[] = {
0xad, 1, // soft hyphen
0x640, 1, // igniore Arabic Tatweel (lang=FA)
0x200c, '-', // zero width non-joiner, replace with hyphen
0x200d, 1, // zero width joiner
0, 0
// U+00AD SOFT HYPHEN
// Used to mark hyphenation points in words for where to split a
// word at the end of a line to provide readable justified text.
0xad, 1,
// U+0640 TATWEEL (KASHIDA)
// Used in Arabic scripts to stretch characters for justifying
// the text.
0x640, 1,
// U+200C ZERO WIDTH NON-JOINER
// Used to prevent combined ligatures being displayed in their
// combined form.
0x200c, '-',
// U+200D ZERO WIDTH JOINER
// Used to indicate an alternative connected form made up of the
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam
// and Emoji.
// 0x200d, 1, // Not ignored.
// End of the ignored character list.
0, 0
};

const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8

+ 6
- 6
src/libespeak-ng/translate.c View File

@@ -795,7 +795,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
if (end2) {
RemoveEnding(tr, wordx, end2, word_copy);
end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
if ((end_type & SUFX_P) == 0) {
// after removing the suffix, the prefix is no longer recognised.
// Keep the suffix, but don't use the prefix
@@ -902,7 +902,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
wordx[-1] = ' ';
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -920,7 +920,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
strcpy(word_phonemes, phonemes);
return 0;
}
@@ -960,7 +960,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
if (phonemes[0] == phonSWITCH) {
// change to another language in order to translate this word
strcpy(word_phonemes, phonemes);
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
wordx[-1] = c_temp;
return 0;
}
@@ -974,7 +974,7 @@ static int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char
AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes);
end_phonemes[0] = 0;
}
memcpy(wordx, word_copy, strlen(word_copy));
strcpy(wordx, word_copy);
}

wordx[-1] = c_temp;
@@ -1139,7 +1139,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o
char word[N_WORD_BYTES+1];
word[0] = 0;
word[1] = ' ';
memcpy(word+2, word_out, strlen(word_out));
strcpy(word+2, word_out);
word_out = word+2;

while (*word_out && available > 1) {

+ 21
- 0
tests/phoneme-output.test View File

@@ -0,0 +1,21 @@
#!/bin/sh

test_phonemes() {
TEST_LANG=$1
EXPECTED=$2
TEST_TEXT=$3

echo "testing ${TEST_LANG} \"${TEST_TEXT}\""
ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
src/espeak-ng -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt
echo "${EXPECTED}" > expected.txt
diff expected.txt actual.txt || exit 1
}

test_phonemes en " h@l'oU" "hello"

# Emoji

# ED-3 - emoji_character [http://www.unicode.org/reports/tr51/tr51-12.html#def_emoji_character]
test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k" "โ‰"
test_phonemes en " Ekskla#m'eIS@N kw'EstS@n m'A@k r'eInboU" "โ‰ ๐ŸŒˆ"

Loadingโ€ฆ
Cancel
Save