When set in a language file, a period followed by a lower case letter is detected as end of sentence. Normal behavior is to require a capital letter.

4 years ago · c4740b3053
--- a/.gitignore
+++ b/.gitignore
 !tests/cmd_options.test
 !tests/windows-data.test
 !tests/windows-installer.test
 !tests/voices.test
 espeak-ng.pc
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
 *  Add more tests to check the various parts of espeak-ng.
 *  Various changes to clean up the codebase.
 *  Restructure "option brackets" language option to "brackets" and "bracketsAnnounced".
 *  New Language option: "lowercaseSentence" for ending a sentence if a period is followed by a lower case letter.
 *  Added voice variants
 documentation:
 *  grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms)
 *  hak (Hakka Chinese) -- Juho Hiltunen
 *  haw (Hawaiian) -- Juho Hiltunen
 *  ka (Georgian) -- Juho Hiltunen
 *  kok (Konkani) -- Juho Hiltunen
 *  nb (Norwegian Bokmål) -- Juho Hiltunen
 *  nci (Classical Nahuatl) -- Juho Hiltunen
--- a/Makefile.am
+++ b/Makefile.am
 	tests/variants.check \
 	tests/windows-installer.check \
 	tests/bom.check \
 	tests/voices.check \
 	tests/cmd_options.check
 ##### fuzzer:
--- a/docs/voices.md
+++ b/docs/voices.md
  - [phonemes](#phonemes)
  - [dictionary](#dictionary)
  - [dictrules](#dictrules)
  - [lowercaseSentence](#lowercaseSentence)
  - [replace](#replace)
  - [stressRule](#stressrule)
  - [stressLength](#stresslength)
 dictionary file and also its `*_list` exceptions list. See
 [Text to Phoneme Translation](dictionary.md#conditional-rules).
 ### lowercaseSentence
 	lowercaseSentence <no arguments>
 By default, a sentence end is detected if a period `.` is followed by an uppercase letter.
 When lowercaseSentence is set, a period followed by a lowercase letter is also handled as end of sentence.
 Note that other conditions, such as abbreviations, might override this setting.
 ### replace
 	replace <flags> <phoneme> <replacement phoneme>
--- a/espeak-ng-data/lang/ccs/ka
+++ b/espeak-ng-data/lang/ccs/ka
 name Georgian
 language ka
 lowercaseSentence	// A period followed by a lowercase letter is considered a sentence
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
 							else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
 								is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag)
 						} 
 						if (iswlower(c_next)) {
 						if (iswlower(c_next) && tr->langopts.lowercase_sentence == false) {
 							// next word has no capital letter, this dot is probably from an abbreviation
 							is_end_clause = 0;
 							is_end_clause = false;
 						}
 						if (any_alnum == false) {
 							// no letters or digits yet, so probably not a sentence terminator
--- a/src/libespeak-ng/tr_languages.c
+++ b/src/libespeak-ng/tr_languages.c
 	tr->langopts.replace_chars = NULL;
 	tr->langopts.alt_alphabet_lang = L('e', 'n');
 	tr->langopts.roman_suffix = utf8_null;
 	tr->langopts.lowercase_sentence = false;
 	SetLengthMods(tr, 201);
--- a/src/libespeak-ng/translate.h
+++ b/src/libespeak-ng/translate.h
 	int max_lengthmod;
 	int lengthen_tonic;   // lengthen the tonic syllable
 	int suffix_add_e;      // replace a suffix (which has the SUFX_E flag) with this character
 	bool lowercase_sentence;	// when true, a period . causes a sentence stop even if next character is lowercase
 } LANGUAGE_OPTIONS;
 typedef struct {
 	int end_stressed_vowel;  // word ends with stressed vowel
 	int prev_dict_flags[2];     // dictionary flags from previous word
 	int clause_terminator;
 } Translator;
 #define OPTION_EMPHASIZE_ALLCAPS  0x100
--- a/src/libespeak-ng/voices.c
+++ b/src/libespeak-ng/voices.c
 	V_BREATHW,
 	// these override defaults set by the translator
 	V_LOWERCASE_SENTENCE,
 	V_WORDGAP,
 	V_INTONATION,
 	V_TUNES,
 	{ "maintainer",   V_MAINTAINER },
 	{ "status",       V_STATUS },
 	{ "lowercaseSentence",	V_LOWERCASE_SENTENCE },
 	{ "variants",     V_VARIANTS },
 	{ "formant",      V_FORMANT },
 	{ "pitch",        V_PITCH },
 		case V_FORMANT:
 			VoiceFormant(p);
 			break;
 		case V_LOWERCASE_SENTENCE: {
 			if (langopts)
 				langopts->lowercase_sentence = true;
 			else
 				fprintf(stderr, "Cannot set lowercaseSentence: language not set, or is invalid.\n");
 			break;
 			}
 		case V_PITCH:
 			// default is  pitch 82 118
 			if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) {
--- a/tests/voices.test
+++ b/tests/voices.test
 #!/bin/sh
 test_voices() {
 	TEST_LANG=$1
 	MESSAGE=$2
 	EXPECTED=$3
 	TEST_TEXT=$4
 	TEST_BROKEN=${5:-false}
 	if [ "x$MESSAGE" = x- ] ; then
 		echo "testing ${TEST_LANG}"
 	else
 		echo "testing ${TEST_LANG} ($MESSAGE)"
 	fi
 	ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
 		src/espeak-ng ${OPTIONS} -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt
 	echo "${EXPECTED}" > expected.txt
 	if [ x$TEST_BROKEN = xbroken ] ; then
 		diff expected.txt actual.txt || (echo "... ignoring error (broken)" && true)
 	else
 		diff expected.txt actual.txt || exit 1
 	fi
 }
 ##### Voice options
 test_voices ka "lowercaseSentence" "s'asi,amovn,oa t#k#v'eni g'ats#noba
 r'ogor brdz'andebit#" "სასიამოვნოა თქვენი გაცნობა. როგორ ბრძანდებით" #872