Fixes: #1823, #1824, #1825, #1826, #1827 - Add crash test and vectors provided by @SEU-SSL - Disallow dummy/null voice load (that causes incorrect translator initialization) - Fix empty `phondata` file load (that causes unitialized memory access) - Limit max word length for RemoveEnding (causes buffer overflow) - Limit punctlist initialization from embedded commands (buffer overflow) - Fix unitialized pitch in wavegen (DBZ and indexing problems) - Properly zeroize stack variables before use in TranslateClause and SetWordStress TODO (in nextup PR): add & fix more vectors from fuzzer.

1 year ago · 58f1e0b6a4
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
@@ -927,6 +927,9 @@ void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags,

 	static const char consonant_types[16] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 };

 	memset(syllable_weight, 0, sizeof(syllable_weight));
 	memset(vowel_length, 0, sizeof(vowel_length));

 	stressflags = tr->langopts.stress_flags;

 	if (dictionary_flags != NULL)
@@ -2897,6 +2900,7 @@ int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy)
 			*word_end = 'e';
 	}
 	i = word_end - word;
 	if (i >= N_WORD_BYTES) i = N_WORD_BYTES-1;

 	if (word_copy != NULL) {
 		memcpy(word_copy, word, i);
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -664,7 +664,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_
 					if (c2 != '1') {
 						// a list of punctuation characters to be spoken, terminated by space
 						j = 0;
 						while (!Eof() && !iswspace(c2)) {
 						while (!Eof() && !iswspace(c2) && (j < N_PUNCTLIST-1)) {
 							option_punctlist[j++] = c2;
 							c2 = GetC();
 							buf[ix++] = ' ';
--- a/src/libespeak-ng/synthdata.c
+++ b/src/libespeak-ng/synthdata.c
@@ -79,8 +79,15 @@ static espeak_ng_STATUS ReadPhFile(void **ptr, const char *fname, int *size, esp
 	if ((f_in = fopen(buf, "rb")) == NULL)
 		return create_file_error_context(context, errno, buf);

 	if (*ptr != NULL)
 	if (*ptr != NULL) {
 		free(*ptr);
 		*ptr = NULL;
 	}
 	
 	if (length == 0) {
 		*ptr = NULL;
 		return 0;
 	}

 	if ((*ptr = malloc(length)) == NULL) {
 		fclose(f_in);
@@ -90,6 +97,7 @@ static espeak_ng_STATUS ReadPhFile(void **ptr, const char *fname, int *size, esp
 		int error = errno;
 		fclose(f_in);
 		free(*ptr);
 		*ptr = NULL;
 		return create_file_error_context(context, error, buf);
 	}

@@ -122,9 +130,11 @@ espeak_ng_STATUS LoadPhData(int *srate, espeak_ng_ERROR_CONTEXT *context)
 	// read the version number and sample rate from the first 8 bytes of phondata
 	version = 0; // bytes 0-3, version number
 	rate = 0;    // bytes 4-7, sample rate
 	for (ix = 0; ix < 4; ix++) {
 		version += (wavefile_data[ix] << (ix*8));
 		rate += (wavefile_data[ix+4] << (ix*8));
 	if (wavefile_data) {
 		for (ix = 0; ix < 4; ix++) {
 			version += (wavefile_data[ix] << (ix*8));
 			rate += (wavefile_data[ix+4] << (ix*8));
 		}
 	}

 	if (version != version_phdata)
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
@@ -1601,6 +1601,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
 			if (dict_flags & FLAG_SPELLWORD) {
 				// redo the word, speaking single letters
 				for (pw = word; *pw != ' ';) {
 					memset(number_buf, 0, sizeof(number_buf));
 					memset(number_buf, ' ', 9);
 					nx = utf8_in(&c_temp, pw);
 					memcpy(&number_buf[2], pw, nx);
--- a/src/libespeak-ng/voices.c
+++ b/src/libespeak-ng/voices.c
@@ -442,6 +442,10 @@ voice_t *LoadVoice(const char *vname, int control)
 		MAKE_MEM_UNDEFINED(&voice_languages, sizeof(voice_languages));
 	}

 	if ((vname == NULL || vname[0] == 0) && !(control & 8)) {
 		return NULL;
 	}

 	strncpy0(voicename, vname, sizeof(voicename));
 	if (control & 0x10) {
 		strcpy(buf, vname);
@@ -702,14 +706,14 @@ voice_t *LoadVoice(const char *vname, int control)

 	if (!tone_only) {
 		if (!!(control & 8/*compiling phonemes*/)) {
                        /* Set by espeak_ng_CompilePhonemeDataPath when it
                         * calls LoadVoice("", 8) to set up a dummy(?) voice.
                         * As phontab may not yet exist this avoids the spurious
                         * error message and guarantees consistent results by
                         * not actually reading a potentially bogus phontab...
                         */
                        ix = 0;
                } else if ((ix = SelectPhonemeTableName(phonemes_name)) < 0) {
 			/* Set by espeak_ng_CompilePhonemeDataPath when it
 				* calls LoadVoice("", 8) to set up a dummy(?) voice.
 				* As phontab may not yet exist this avoids the spurious
 				* error message and guarantees consistent results by
 				* not actually reading a potentially bogus phontab...
 				*/
 			ix = 0;
 		} else if ((ix = SelectPhonemeTableName(phonemes_name)) < 0) {
 			fprintf(stderr, "Unknown phoneme table: '%s'\n", phonemes_name);
 			ix = 0;
 		}
--- a/src/libespeak-ng/wavegen.c
+++ b/src/libespeak-ng/wavegen.c
@@ -540,14 +540,14 @@ static void AdvanceParameters(void)
 	if (wvoice == NULL)
 		return;

 	int x;
 	int x = 0;
 	int ix;
 	static int Flutter_ix = 0;

 	// advance the pitch
 	wdata.pitch_ix += wdata.pitch_inc;
 	if ((ix = wdata.pitch_ix>>8) > 127) ix = 127;
 	x = wdata.pitch_env[ix] * wdata.pitch_range;
 	if (wdata.pitch_env) x = wdata.pitch_env[ix] * wdata.pitch_range;
 	wdata.pitch = (x>>8) + wdata.pitch_base;
 	
 	
@@ -563,7 +563,7 @@ static void AdvanceParameters(void)
 	
 	if(const_f0)
 		wdata.pitch = (const_f0<<12);
 	

 	if (wdata.pitch < 102400)
 		wdata.pitch = 102400; // min pitch, 25 Hz  (25 << 12)

@@ -1265,6 +1265,9 @@ static int WavegenFill2(void)
 	static bool resume = false;
 	static int echo_complete = 0;

 	if (wdata.pitch < 102400)
 		wdata.pitch = 102400; // min pitch, 25 Hz  (25 << 12)

 	while (out_ptr < out_end) {
 		if (WcmdqUsed() <= 0) {
 			if (echo_complete > 0) {
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -61,6 +61,7 @@ shell_test(ssml)
 shell_test(translate)
 shell_test(variants)
 shell_test(voices)
 shell_test(crash)

 # shell_test(windows-data)
 # shell_test(windows-installer)
--- a/tests/crash.test
+++ b/tests/crash.test
@@ -0,0 +1,17 @@
 #!/bin/sh
 # include common script
 . "`dirname $0`/common"

 test_crash() {
 	TEST_NAME=$1

 	echo "testing CVE-${TEST_NAME}"
 	ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
 		$VALGRIND src/espeak-ng -f "$(dirname $0)/crash_vectors/${TEST_NAME}.txt" -w /dev/null || exit 1
 }

 test_crash cve-2023-49990
 test_crash cve-2023-49991
 test_crash cve-2023-49992
 test_crash cve-2023-49993
 test_crash cve-2023-49994
--- a/tests/crash_vectors/cve-2023-49990.txt
+++ b/tests/crash_vectors/cve-2023-49990.txt
@@ -0,0 +1 @@
 ã¦à»Vñ€¦ñ
€¦V
€äVñ€ãÂà¦æsññâñþâññà¶æØØsññâñþâññeeeeeeeeseee€ññûñ
--- a/tests/crash_vectors/cve-2023-49991.txt
+++ b/tests/crash_vectors/cve-2023-49991.txt
@@ -0,0 +1 @@
 €¦Vń
€ńVđŐhńůâ˙ńVDíZ»»ŐöÖÖÖÖÖÖÖÖÖě»»ş»ÖľÖÖÖÖÖÖ´ÖÖÖ»ţţ÷ÜÖÖÖ»»ş»ŐŞ»»®î˙˙€ę`v
--- a/tests/crash_vectors/cve-2023-49992.txt
+++ b/tests/crash_vectors/cve-2023-49992.txt
--- a/tests/crash_vectors/cve-2023-49993.txt
+++ b/tests/crash_vectors/cve-2023-49993.txt
--- a/tests/crash_vectors/cve-2023-49994.txt
+++ b/tests/crash_vectors/cve-2023-49994.txt
@@ -0,0 +1 @@
 "[[-#,-	-1-2.
r--Ş#--O)C--!˙E-1‹@5-!-V-1--
			@@ -0,0 +1 @@
			ã¦à»Vñ€¦ñ €¦V €äVñ€ãÂà¦æsññâñþâññà¶æØØsññâñþâññeeeeeeeeseee€ññûñ
			@@ -0,0 +1 @@
			€¦Vń €ńVđŐhńůâ˙ńVDíZ»»ŐöÖÖÖÖÖÖÖÖÖě»»ş»ÖľÖÖÖÖÖÖ´ÖÖÖ»ţţ÷ÜÖÖÖ»»ş»ŐŞ»»®î˙˙€ę`v