When set in a language file, a period followed by a lower case letter is detected as end of sentence. Normal behavior is to require a capital letter.master
| !tests/cmd_options.test | !tests/cmd_options.test | ||||
| !tests/windows-data.test | !tests/windows-data.test | ||||
| !tests/windows-installer.test | !tests/windows-installer.test | ||||
| !tests/voices.test | |||||
| espeak-ng.pc | espeak-ng.pc | ||||
| * Add more tests to check the various parts of espeak-ng. | * Add more tests to check the various parts of espeak-ng. | ||||
| * Various changes to clean up the codebase. | * Various changes to clean up the codebase. | ||||
| * Restructure "option brackets" language option to "brackets" and "bracketsAnnounced". | * Restructure "option brackets" language option to "brackets" and "bracketsAnnounced". | ||||
| * New Language option: "lowercaseSentence" for ending a sentence if a period is followed by a lower case letter. | |||||
| * Added voice variants | * Added voice variants | ||||
| documentation: | documentation: | ||||
| * grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms) | * grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms) | ||||
| * hak (Hakka Chinese) -- Juho Hiltunen | * hak (Hakka Chinese) -- Juho Hiltunen | ||||
| * haw (Hawaiian) -- Juho Hiltunen | * haw (Hawaiian) -- Juho Hiltunen | ||||
| * ka (Georgian) -- Juho Hiltunen | |||||
| * kok (Konkani) -- Juho Hiltunen | * kok (Konkani) -- Juho Hiltunen | ||||
| * nb (Norwegian Bokmål) -- Juho Hiltunen | * nb (Norwegian Bokmål) -- Juho Hiltunen | ||||
| * nci (Classical Nahuatl) -- Juho Hiltunen | * nci (Classical Nahuatl) -- Juho Hiltunen |
| tests/variants.check \ | tests/variants.check \ | ||||
| tests/windows-installer.check \ | tests/windows-installer.check \ | ||||
| tests/bom.check \ | tests/bom.check \ | ||||
| tests/voices.check \ | |||||
| tests/cmd_options.check | tests/cmd_options.check | ||||
| ##### fuzzer: | ##### fuzzer: |
| - [phonemes](#phonemes) | - [phonemes](#phonemes) | ||||
| - [dictionary](#dictionary) | - [dictionary](#dictionary) | ||||
| - [dictrules](#dictrules) | - [dictrules](#dictrules) | ||||
| - [lowercaseSentence](#lowercaseSentence) | |||||
| - [replace](#replace) | - [replace](#replace) | ||||
| - [stressRule](#stressrule) | - [stressRule](#stressrule) | ||||
| - [stressLength](#stresslength) | - [stressLength](#stresslength) | ||||
| dictionary file and also its `*_list` exceptions list. See | dictionary file and also its `*_list` exceptions list. See | ||||
| [Text to Phoneme Translation](dictionary.md#conditional-rules). | [Text to Phoneme Translation](dictionary.md#conditional-rules). | ||||
| ### lowercaseSentence | |||||
| lowercaseSentence <no arguments> | |||||
| By default, a sentence end is detected if a period `.` is followed by an uppercase letter. | |||||
| When lowercaseSentence is set, a period followed by a lowercase letter is also handled as end of sentence. | |||||
| Note that other conditions, such as abbreviations, might override this setting. | |||||
| ### replace | ### replace | ||||
| replace <flags> <phoneme> <replacement phoneme> | replace <flags> <phoneme> <replacement phoneme> |
| name Georgian | name Georgian | ||||
| language ka | language ka | ||||
| lowercaseSentence // A period followed by a lowercase letter is considered a sentence |
| else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | ||||
| is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | ||||
| } | } | ||||
| if (iswlower(c_next)) { | |||||
| if (iswlower(c_next) && tr->langopts.lowercase_sentence == false) { | |||||
| // next word has no capital letter, this dot is probably from an abbreviation | // next word has no capital letter, this dot is probably from an abbreviation | ||||
| is_end_clause = 0; | |||||
| is_end_clause = false; | |||||
| } | } | ||||
| if (any_alnum == false) { | if (any_alnum == false) { | ||||
| // no letters or digits yet, so probably not a sentence terminator | // no letters or digits yet, so probably not a sentence terminator |
| tr->langopts.replace_chars = NULL; | tr->langopts.replace_chars = NULL; | ||||
| tr->langopts.alt_alphabet_lang = L('e', 'n'); | tr->langopts.alt_alphabet_lang = L('e', 'n'); | ||||
| tr->langopts.roman_suffix = utf8_null; | tr->langopts.roman_suffix = utf8_null; | ||||
| tr->langopts.lowercase_sentence = false; | |||||
| SetLengthMods(tr, 201); | SetLengthMods(tr, 201); | ||||
| int max_lengthmod; | int max_lengthmod; | ||||
| int lengthen_tonic; // lengthen the tonic syllable | int lengthen_tonic; // lengthen the tonic syllable | ||||
| int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character | int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character | ||||
| bool lowercase_sentence; // when true, a period . causes a sentence stop even if next character is lowercase | |||||
| } LANGUAGE_OPTIONS; | } LANGUAGE_OPTIONS; | ||||
| typedef struct { | typedef struct { | ||||
| int end_stressed_vowel; // word ends with stressed vowel | int end_stressed_vowel; // word ends with stressed vowel | ||||
| int prev_dict_flags[2]; // dictionary flags from previous word | int prev_dict_flags[2]; // dictionary flags from previous word | ||||
| int clause_terminator; | int clause_terminator; | ||||
| } Translator; | } Translator; | ||||
| #define OPTION_EMPHASIZE_ALLCAPS 0x100 | #define OPTION_EMPHASIZE_ALLCAPS 0x100 |
| V_BREATHW, | V_BREATHW, | ||||
| // these override defaults set by the translator | // these override defaults set by the translator | ||||
| V_LOWERCASE_SENTENCE, | |||||
| V_WORDGAP, | V_WORDGAP, | ||||
| V_INTONATION, | V_INTONATION, | ||||
| V_TUNES, | V_TUNES, | ||||
| { "maintainer", V_MAINTAINER }, | { "maintainer", V_MAINTAINER }, | ||||
| { "status", V_STATUS }, | { "status", V_STATUS }, | ||||
| { "lowercaseSentence", V_LOWERCASE_SENTENCE }, | |||||
| { "variants", V_VARIANTS }, | { "variants", V_VARIANTS }, | ||||
| { "formant", V_FORMANT }, | { "formant", V_FORMANT }, | ||||
| { "pitch", V_PITCH }, | { "pitch", V_PITCH }, | ||||
| case V_FORMANT: | case V_FORMANT: | ||||
| VoiceFormant(p); | VoiceFormant(p); | ||||
| break; | break; | ||||
| case V_LOWERCASE_SENTENCE: { | |||||
| if (langopts) | |||||
| langopts->lowercase_sentence = true; | |||||
| else | |||||
| fprintf(stderr, "Cannot set lowercaseSentence: language not set, or is invalid.\n"); | |||||
| break; | |||||
| } | |||||
| case V_PITCH: | case V_PITCH: | ||||
| // default is pitch 82 118 | // default is pitch 82 118 | ||||
| if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) { | if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) { |
| #!/bin/sh | |||||
| test_voices() { | |||||
| TEST_LANG=$1 | |||||
| MESSAGE=$2 | |||||
| EXPECTED=$3 | |||||
| TEST_TEXT=$4 | |||||
| TEST_BROKEN=${5:-false} | |||||
| if [ "x$MESSAGE" = x- ] ; then | |||||
| echo "testing ${TEST_LANG}" | |||||
| else | |||||
| echo "testing ${TEST_LANG} ($MESSAGE)" | |||||
| fi | |||||
| ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | |||||
| src/espeak-ng ${OPTIONS} -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt | |||||
| echo "${EXPECTED}" > expected.txt | |||||
| if [ x$TEST_BROKEN = xbroken ] ; then | |||||
| diff expected.txt actual.txt || (echo "... ignoring error (broken)" && true) | |||||
| else | |||||
| diff expected.txt actual.txt || exit 1 | |||||
| fi | |||||
| } | |||||
| ##### Voice options | |||||
| test_voices ka "lowercaseSentence" "s'asi,amovn,oa t#k#v'eni g'ats#noba | |||||
| r'ogor brdz'andebit#" "სასიამოვნოა თქვენი გაცნობა. როგორ ბრძანდებით" #872 |