Browse Source

New language option lowercaseSentence

When set in a language file, a period followed by a lower case letter is
detected as end of sentence. Normal behavior is to require a capital
letter.
master
Juho Hiltunen 4 years ago
parent
commit
c4740b3053

+ 1
- 0
.gitignore View File

!tests/cmd_options.test !tests/cmd_options.test
!tests/windows-data.test !tests/windows-data.test
!tests/windows-installer.test !tests/windows-installer.test
!tests/voices.test


espeak-ng.pc espeak-ng.pc



+ 2
- 0
CHANGELOG.md View File

* Add more tests to check the various parts of espeak-ng. * Add more tests to check the various parts of espeak-ng.
* Various changes to clean up the codebase. * Various changes to clean up the codebase.
* Restructure "option brackets" language option to "brackets" and "bracketsAnnounced". * Restructure "option brackets" language option to "brackets" and "bracketsAnnounced".
* New Language option: "lowercaseSentence" for ending a sentence if a period is followed by a lower case letter.
* Added voice variants * Added voice variants


documentation: documentation:
* grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms) * grc (Ancient Greek) -- Reece Dunn (support for variant Greek letter forms)
* hak (Hakka Chinese) -- Juho Hiltunen * hak (Hakka Chinese) -- Juho Hiltunen
* haw (Hawaiian) -- Juho Hiltunen * haw (Hawaiian) -- Juho Hiltunen
* ka (Georgian) -- Juho Hiltunen
* kok (Konkani) -- Juho Hiltunen * kok (Konkani) -- Juho Hiltunen
* nb (Norwegian Bokmål) -- Juho Hiltunen * nb (Norwegian Bokmål) -- Juho Hiltunen
* nci (Classical Nahuatl) -- Juho Hiltunen * nci (Classical Nahuatl) -- Juho Hiltunen

+ 1
- 0
Makefile.am View File

tests/variants.check \ tests/variants.check \
tests/windows-installer.check \ tests/windows-installer.check \
tests/bom.check \ tests/bom.check \
tests/voices.check \
tests/cmd_options.check tests/cmd_options.check


##### fuzzer: ##### fuzzer:

+ 11
- 0
docs/voices.md View File

- [phonemes](#phonemes) - [phonemes](#phonemes)
- [dictionary](#dictionary) - [dictionary](#dictionary)
- [dictrules](#dictrules) - [dictrules](#dictrules)
- [lowercaseSentence](#lowercaseSentence)
- [replace](#replace) - [replace](#replace)
- [stressRule](#stressrule) - [stressRule](#stressrule)
- [stressLength](#stresslength) - [stressLength](#stresslength)
dictionary file and also its `*_list` exceptions list. See dictionary file and also its `*_list` exceptions list. See
[Text to Phoneme Translation](dictionary.md#conditional-rules). [Text to Phoneme Translation](dictionary.md#conditional-rules).


### lowercaseSentence

lowercaseSentence <no arguments>

By default, a sentence end is detected if a period `.` is followed by an uppercase letter.
When lowercaseSentence is set, a period followed by a lowercase letter is also handled as end of sentence.

Note that other conditions, such as abbreviations, might override this setting.


### replace ### replace


replace <flags> <phoneme> <replacement phoneme> replace <flags> <phoneme> <replacement phoneme>

+ 1
- 0
espeak-ng-data/lang/ccs/ka View File

name Georgian name Georgian
language ka language ka
lowercaseSentence // A period followed by a lowercase letter is considered a sentence

+ 2
- 2
src/libespeak-ng/readclause.c View File

else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag) is_end_clause = false; // only if followed by lower-case, (or if there is a XML tag)
} }
if (iswlower(c_next)) {
if (iswlower(c_next) && tr->langopts.lowercase_sentence == false) {
// next word has no capital letter, this dot is probably from an abbreviation // next word has no capital letter, this dot is probably from an abbreviation
is_end_clause = 0;
is_end_clause = false;
} }
if (any_alnum == false) { if (any_alnum == false) {
// no letters or digits yet, so probably not a sentence terminator // no letters or digits yet, so probably not a sentence terminator

+ 1
- 0
src/libespeak-ng/tr_languages.c View File

tr->langopts.replace_chars = NULL; tr->langopts.replace_chars = NULL;
tr->langopts.alt_alphabet_lang = L('e', 'n'); tr->langopts.alt_alphabet_lang = L('e', 'n');
tr->langopts.roman_suffix = utf8_null; tr->langopts.roman_suffix = utf8_null;
tr->langopts.lowercase_sentence = false;


SetLengthMods(tr, 201); SetLengthMods(tr, 201);



+ 2
- 0
src/libespeak-ng/translate.h View File

int max_lengthmod; int max_lengthmod;
int lengthen_tonic; // lengthen the tonic syllable int lengthen_tonic; // lengthen the tonic syllable
int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character
bool lowercase_sentence; // when true, a period . causes a sentence stop even if next character is lowercase
} LANGUAGE_OPTIONS; } LANGUAGE_OPTIONS;


typedef struct { typedef struct {
int end_stressed_vowel; // word ends with stressed vowel int end_stressed_vowel; // word ends with stressed vowel
int prev_dict_flags[2]; // dictionary flags from previous word int prev_dict_flags[2]; // dictionary flags from previous word
int clause_terminator; int clause_terminator;

} Translator; } Translator;


#define OPTION_EMPHASIZE_ALLCAPS 0x100 #define OPTION_EMPHASIZE_ALLCAPS 0x100

+ 11
- 0
src/libespeak-ng/voices.c View File

V_BREATHW, V_BREATHW,


// these override defaults set by the translator // these override defaults set by the translator
V_LOWERCASE_SENTENCE,
V_WORDGAP, V_WORDGAP,
V_INTONATION, V_INTONATION,
V_TUNES, V_TUNES,
{ "maintainer", V_MAINTAINER }, { "maintainer", V_MAINTAINER },
{ "status", V_STATUS }, { "status", V_STATUS },



{ "lowercaseSentence", V_LOWERCASE_SENTENCE },
{ "variants", V_VARIANTS }, { "variants", V_VARIANTS },
{ "formant", V_FORMANT }, { "formant", V_FORMANT },
{ "pitch", V_PITCH }, { "pitch", V_PITCH },
case V_FORMANT: case V_FORMANT:
VoiceFormant(p); VoiceFormant(p);
break; break;
case V_LOWERCASE_SENTENCE: {
if (langopts)
langopts->lowercase_sentence = true;
else
fprintf(stderr, "Cannot set lowercaseSentence: language not set, or is invalid.\n");
break;
}

case V_PITCH: case V_PITCH:
// default is pitch 82 118 // default is pitch 82 118
if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) { if (sscanf(p, "%d %d", &pitch1, &pitch2) == 2) {

+ 27
- 0
tests/voices.test View File

#!/bin/sh

test_voices() {
TEST_LANG=$1
MESSAGE=$2
EXPECTED=$3
TEST_TEXT=$4
TEST_BROKEN=${5:-false}

if [ "x$MESSAGE" = x- ] ; then
echo "testing ${TEST_LANG}"
else
echo "testing ${TEST_LANG} ($MESSAGE)"
fi
ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
src/espeak-ng ${OPTIONS} -xq -v ${TEST_LANG} "${TEST_TEXT}" > actual.txt
echo "${EXPECTED}" > expected.txt
if [ x$TEST_BROKEN = xbroken ] ; then
diff expected.txt actual.txt || (echo "... ignoring error (broken)" && true)
else
diff expected.txt actual.txt || exit 1
fi
}

##### Voice options
test_voices ka "lowercaseSentence" "s'asi,amovn,oa t#k#v'eni g'ats#noba
r'ogor brdz'andebit#" "სასიამოვნოა თქვენი გაცნობა. როგორ ბრძანდებით" #872

Loading…
Cancel
Save