When set in a language file, a period followed by a lower case letter is detected as end of sentence. Normal behavior is to require a capital letter.master
| @@ -118,6 +118,7 @@ tests/*.check | |||
| !tests/cmd_options.test | |||
| !tests/windows-data.test | |||
| !tests/windows-installer.test | |||
| !tests/voices.test | |||
| espeak-ng.pc | |||
| @@ -13,6 +13,7 @@ The espeak-ng project is a fork of the espeak project. | |||
| * Add more tests to check the various parts of espeak-ng. | |||
| * Various changes to clean up the codebase. | |||
| * Restructure "option brackets" language option to "brackets" and "bracketsAnnounced". | |||
| * New Language option: "lowercaseSentence" for ending a sentence if a period is followed by a lower case letter. | |||
| * Added voice variants | |||
| documentation: | |||
| @@ -29,6 +30,7 @@ updated languages: | |||
| * grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms) | |||
| * hak (Hakka Chinese) -- Juho Hiltunen | |||
| * haw (Hawaiian) -- Juho Hiltunen | |||
| * ka (Georgian) -- Juho Hiltunen | |||
| * kok (Konkani) -- Juho Hiltunen | |||
| * nb (Norwegian Bokmål) -- Juho Hiltunen | |||
| * nci (Classical Nahuatl) -- Juho Hiltunen | |||
| @@ -275,6 +275,7 @@ check: tests/encoding.check \ | |||
| tests/variants.check \ | |||
| tests/windows-installer.check \ | |||
| tests/bom.check \ | |||
| tests/voices.check \ | |||
| tests/cmd_options.check | |||
| ##### fuzzer: | |||
| @@ -26,6 +26,7 @@ | |||
| - [phonemes](#phonemes) | |||
| - [dictionary](#dictionary) | |||
| - [dictrules](#dictrules) | |||
| - [lowercaseSentence](#lowercaseSentence) | |||
| - [replace](#replace) | |||
| - [stressRule](#stressrule) | |||
| - [stressLength](#stresslength) | |||
| @@ -385,6 +386,16 @@ language dictionary. They apply to rules in the language's `*_rules` | |||
| dictionary file and also its `*_list` exceptions list. See | |||
| [Text to Phoneme Translation](dictionary.md#conditional-rules). | |||
| ### lowercaseSentence | |||
| lowercaseSentence <no arguments> | |||
| By default, a sentence end is detected if a period `.` is followed by an uppercase letter. | |||
| When lowercaseSentence is set, a period followed by a lowercase letter is also handled as end of sentence. | |||
| Note that other conditions, such as abbreviations, might override this setting. | |||
| ### replace | |||
| replace <flags> <phoneme> <replacement phoneme> | |||
| @@ -1,2 +1,3 @@ | |||
| name Georgian | |||
| language ka | |||
| lowercaseSentence // A period followed by a lowercase letter is considered a sentence | |||
| @@ -876,9 +876,9 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
| else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | |||
| is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) | |||
| } | |||
| if (iswlower(c_next)) { | |||
| if (iswlower(c_next) && tr->langopts.lowercase_sentence == false) { | |||
| // next word has no capital letter, this dot is probably from an abbreviation | |||
| is_end_clause = 0; | |||
| is_end_clause = false; | |||
| } | |||
| if (any_alnum == false) { | |||
| // no letters or digits yet, so probably not a sentence terminator | |||
| @@ -295,6 +295,7 @@ static Translator *NewTranslator(void) | |||
| tr->langopts.replace_chars = NULL; | |||
| tr->langopts.alt_alphabet_lang = L('e', 'n'); | |||
| tr->langopts.roman_suffix = utf8_null; | |||
| tr->langopts.lowercase_sentence = false; | |||
| SetLengthMods(tr, 201); | |||
| @@ -540,6 +540,7 @@ typedef struct { | |||
| int max_lengthmod; | |||
| int lengthen_tonic; // lengthen the tonic syllable | |||
| int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character | |||
| bool lowercase_sentence; // when true, a period . causes a sentence stop even if next character is lowercase | |||
| } LANGUAGE_OPTIONS; | |||
| typedef struct { | |||
| @@ -608,6 +609,7 @@ typedef struct { | |||
| int end_stressed_vowel; // word ends with stressed vowel | |||
| int prev_dict_flags[2]; // dictionary flags from previous word | |||
| int clause_terminator; | |||
| } Translator; | |||
| #define OPTION_EMPHASIZE_ALLCAPS 0x100 | |||
| @@ -92,6 +92,7 @@ enum { | |||
| V_BREATHW, | |||
| // these override defaults set by the translator | |||
| V_LOWERCASE_SENTENCE, | |||
| V_WORDGAP, | |||
| V_INTONATION, | |||
| V_TUNES, | |||
| @@ -122,6 +123,8 @@ static MNEM_TAB keyword_tab[] = { | |||
| { "maintainer", V_MAINTAINER }, | |||
| { "status", V_STATUS }, | |||
| { "lowercaseSentence", V_LOWERCASE_SENTENCE }, | |||
| { "variants", V_VARIANTS }, | |||
| { "formant", V_FORMANT }, | |||
| { "pitch", V_PITCH }, | |||
| @@ -654,6 +657,14 @@ voice_t *LoadVoice(const char *vname, int control) | |||
| case V_FORMANT: | |||
| VoiceFormant(p); | |||
| break; | |||
| case V_LOWERCASE_SENTENCE: { | |||
| if (langopts) | |||
| langopts->lowercase_sentence = true; | |||
| else | |||
| fprintf(stderr, "Cannot set lowercaseSentence: language not set, or is invalid.\n"); | |||
| break; | |||
| } | |||
| case V_PITCH: | |||
| // default is pitch 82 118 | |||
| if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) { | |||
| @@ -0,0 +1,27 @@ | |||
| #!/bin/sh | |||
| test_voices() { | |||
| TEST_LANG=$1 | |||
| MESSAGE=$2 | |||
| EXPECTED=$3 | |||
| TEST_TEXT=$4 | |||
| TEST_BROKEN=${5:-false} | |||
| if [ "x$MESSAGE" = x- ] ; then | |||
| echo "testing ${TEST_LANG}" | |||
| else | |||
| echo "testing ${TEST_LANG} ($MESSAGE)" | |||
| fi | |||
| ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \ | |||
| src/espeak-ng ${OPTIONS} -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt | |||
| echo "${EXPECTED}" > expected.txt | |||
| if [ x$TEST_BROKEN = xbroken ] ; then | |||
| diff expected.txt actual.txt || (echo "... ignoring error (broken)" && true) | |||
| else | |||
| diff expected.txt actual.txt || exit 1 | |||
| fi | |||
| } | |||
| ##### Voice options | |||
| test_voices ka "lowercaseSentence" "s'asi,amovn,oa t#k#v'eni g'ats#noba | |||
| r'ogor brdz'andebit#" "სასიამოვნოა თქვენი გაცნობა. როგორ ბრძანდებით" #872 | |||