When set in a language file, a period followed by a lower case letter is detected as end of sentence. Normal behavior is to require a capital letter.master
!tests/cmd_options.test | !tests/cmd_options.test | ||||
!tests/windows-data.test | !tests/windows-data.test | ||||
!tests/windows-installer.test | !tests/windows-installer.test | ||||
!tests/voices.test | |||||
espeak-ng.pc | espeak-ng.pc | ||||
* Add more tests to check the various parts of espeak-ng. | * Add more tests to check the various parts of espeak-ng. | ||||
* Various changes to clean up the codebase. | * Various changes to clean up the codebase. | ||||
* Restructure "option brackets" language option to "brackets" and "bracketsAnnounced". | * Restructure "option brackets" language option to "brackets" and "bracketsAnnounced". | ||||
* New Language option: "lowercaseSentence" for ending a sentence if a period is followed by a lower case letter. | |||||
* Added voice variants | * Added voice variants | ||||
documentation: | documentation: | ||||
* grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms) | * grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms) | ||||
* hak (Hakka Chinese) -- Juho Hiltunen | * hak (Hakka Chinese) -- Juho Hiltunen | ||||
* haw (Hawaiian) -- Juho Hiltunen | * haw (Hawaiian) -- Juho Hiltunen | ||||
* ka (Georgian) -- Juho Hiltunen | |||||
* kok (Konkani) -- Juho Hiltunen | * kok (Konkani) -- Juho Hiltunen | ||||
* nb (Norwegian Bokmål) -- Juho Hiltunen | * nb (Norwegian Bokmål) -- Juho Hiltunen | ||||
* nci (Classical Nahuatl) -- Juho Hiltunen | * nci (Classical Nahuatl) -- Juho Hiltunen |
tests/variants.check \ | tests/variants.check \ | ||||
tests/windows-installer.check \ | tests/windows-installer.check \ | ||||
tests/bom.check \ | tests/bom.check \ | ||||
tests/voices.check \ | |||||
tests/cmd_options.check | tests/cmd_options.check | ||||
##### fuzzer: | ##### fuzzer: |
- [phonemes](#phonemes) | - [phonemes](#phonemes) | ||||
- [dictionary](#dictionary) | - [dictionary](#dictionary) | ||||
- [dictrules](#dictrules) | - [dictrules](#dictrules) | ||||
- [lowercaseSentence](#lowercaseSentence) | |||||
- [replace](#replace) | - [replace](#replace) | ||||
- [stressRule](#stressrule) | - [stressRule](#stressrule) | ||||
- [stressLength](#stresslength) | - [stressLength](#stresslength) | ||||
dictionary file and also its `*_list` exceptions list. See | dictionary file and also its `*_list` exceptions list. See | ||||
[Text to Phoneme Translation](dictionary.md#conditional-rules). | [Text to Phoneme Translation](dictionary.md#conditional-rules). | ||||
### lowercaseSentence | |||||
lowercaseSentence <no arguments> | |||||
By default, a sentence end is detected if a period `.` is followed by an uppercase letter. | |||||
When lowercaseSentence is set, a period followed by a lowercase letter is also handled as end of sentence. | |||||
Note that other conditions, such as abbreviations, might override this setting. | |||||
### replace | ### replace | ||||
replace <flags> <phoneme> <replacement phoneme> | replace <flags> <phoneme> <replacement phoneme> |
name Georgian | name Georgian | ||||
language ka | language ka | ||||
lowercaseSentence // A period followed by a lowercase letter is considered a sentence |
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | ||||
is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | ||||
} | } | ||||
if (iswlower(c_next)) { | |||||
if (iswlower(c_next) && tr->langopts.lowercase_sentence == false) { | |||||
// next word has no capital letter, this dot is probably from an abbreviation | // next word has no capital letter, this dot is probably from an abbreviation | ||||
is_end_clause = 0; | |||||
is_end_clause = false; | |||||
} | } | ||||
if (any_alnum == false) { | if (any_alnum == false) { | ||||
// no letters or digits yet, so probably not a sentence terminator | // no letters or digits yet, so probably not a sentence terminator |
tr->langopts.replace_chars = NULL; | tr->langopts.replace_chars = NULL; | ||||
tr->langopts.alt_alphabet_lang = L('e', 'n'); | tr->langopts.alt_alphabet_lang = L('e', 'n'); | ||||
tr->langopts.roman_suffix = utf8_null; | tr->langopts.roman_suffix = utf8_null; | ||||
tr->langopts.lowercase_sentence = false; | |||||
SetLengthMods(tr, 201); | SetLengthMods(tr, 201); | ||||
int max_lengthmod; | int max_lengthmod; | ||||
int lengthen_tonic; // lengthen the tonic syllable | int lengthen_tonic; // lengthen the tonic syllable | ||||
int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character | int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character | ||||
bool lowercase_sentence; // when true, a period . causes a sentence stop even if next character is lowercase | |||||
} LANGUAGE_OPTIONS; | } LANGUAGE_OPTIONS; | ||||
typedef struct { | typedef struct { | ||||
int end_stressed_vowel; // word ends with stressed vowel | int end_stressed_vowel; // word ends with stressed vowel | ||||
int prev_dict_flags[2]; // dictionary flags from previous word | int prev_dict_flags[2]; // dictionary flags from previous word | ||||
int clause_terminator; | int clause_terminator; | ||||
} Translator; | } Translator; | ||||
#define OPTION_EMPHASIZE_ALLCAPS 0x100 | #define OPTION_EMPHASIZE_ALLCAPS 0x100 |
V_BREATHW, | V_BREATHW, | ||||
// these override defaults set by the translator | // these override defaults set by the translator | ||||
V_LOWERCASE_SENTENCE, | |||||
V_WORDGAP, | V_WORDGAP, | ||||
V_INTONATION, | V_INTONATION, | ||||
V_TUNES, | V_TUNES, | ||||
{ "maintainer", V_MAINTAINER }, | { "maintainer", V_MAINTAINER }, | ||||
{ "status", V_STATUS }, | { "status", V_STATUS }, | ||||
{ "lowercaseSentence", V_LOWERCASE_SENTENCE }, | |||||
{ "variants", V_VARIANTS }, | { "variants", V_VARIANTS }, | ||||
{ "formant", V_FORMANT }, | { "formant", V_FORMANT }, | ||||
{ "pitch", V_PITCH }, | { "pitch", V_PITCH }, | ||||
case V_FORMANT: | case V_FORMANT: | ||||
VoiceFormant(p); | VoiceFormant(p); | ||||
break; | break; | ||||
case V_LOWERCASE_SENTENCE: { | |||||
if (langopts) | |||||
langopts->lowercase_sentence = true; | |||||
else | |||||
fprintf(stderr, "Cannot set lowercaseSentence: language not set, or is invalid.\n"); | |||||
break; | |||||
} | |||||
case V_PITCH: | case V_PITCH: | ||||
// default is pitch 82 118 | // default is pitch 82 118 | ||||
if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) { | if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) { |
#!/bin/sh | |||||
test_voices() { | |||||
TEST_LANG=$1 | |||||
MESSAGE=$2 | |||||
EXPECTED=$3 | |||||
TEST_TEXT=$4 | |||||
TEST_BROKEN=${5:-false} | |||||
if [ "x$MESSAGE" = x- ] ; then | |||||
echo "testing ${TEST_LANG}" | |||||
else | |||||
echo "testing ${TEST_LANG} ($MESSAGE)" | |||||
fi | |||||
ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | |||||
src/espeak-ng ${OPTIONS} -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt | |||||
echo "${EXPECTED}" > expected.txt | |||||
if [ x$TEST_BROKEN = xbroken ] ; then | |||||
diff expected.txt actual.txt || (echo "... ignoring error (broken)" && true) | |||||
else | |||||
diff expected.txt actual.txt || exit 1 | |||||
fi | |||||
} | |||||
##### Voice options | |||||
test_voices ka "lowercaseSentence" "s'asi,amovn,oa t#k#v'eni g'ats#noba | |||||
r'ogor brdz'andebit#" "სასიამოვნოა თქვენი გაცნობა. როგორ ბრძანდებით" #872 |