Fixes: #1823, #1824, #1825, #1826, #1827 - Add crash test and vectors provided by @SEU-SSL - Disallow dummy/null voice load (that causes incorrect translator initialization) - Fix empty `phondata` file load (that causes unitialized memory access) - Limit max word length for RemoveEnding (causes buffer overflow) - Limit punctlist initialization from embedded commands (buffer overflow) - Fix unitialized pitch in wavegen (DBZ and indexing problems) - Properly zeroize stack variables before use in TranslateClause and SetWordStress TODO (in nextup PR): add & fix more vectors from fuzzer.

1 year ago · 58f1e0b6a4
--- a/src/libespeak-ng/dictionary.c
+++ b/src/libespeak-ng/dictionary.c
 	static const char consonant_types[16] = { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0 };
 	memset(syllable_weight, 0, sizeof(syllable_weight));
 	memset(vowel_length, 0, sizeof(vowel_length));
 	stressflags = tr->langopts.stress_flags;
 	if (dictionary_flags != NULL)
 			*word_end = 'e';
 	}
 	i = word_end - word;
 	if (i >= N_WORD_BYTES) i = N_WORD_BYTES-1;
 	if (word_copy != NULL) {
 		memcpy(word_copy, word, i);
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
 					if (c2 != '1') {
 						// a list of punctuation characters to be spoken, terminated by space
 						j = 0;
 						while (!Eof() && !iswspace(c2)) {
 						while (!Eof() && !iswspace(c2) && (j < N_PUNCTLIST-1)) {
 							option_punctlist[j++] = c2;
 							c2 = GetC();
 							buf[ix++] = ' ';
--- a/src/libespeak-ng/synthdata.c
+++ b/src/libespeak-ng/synthdata.c
 	if ((f_in = fopen(buf, "rb")) == NULL)
 		return create_file_error_context(context, errno, buf);
 	if (*ptr != NULL)
 	if (*ptr != NULL) {
 		free(*ptr);
 		*ptr = NULL;
 	}
 	if (length == 0) {
 		*ptr = NULL;
 		return 0;
 	}
 	if ((*ptr = malloc(length)) == NULL) {
 		fclose(f_in);
 		int error = errno;
 		fclose(f_in);
 		free(*ptr);
 		*ptr = NULL;
 		return create_file_error_context(context, error, buf);
 	}
 	// read the version number and sample rate from the first 8 bytes of phondata
 	version = 0; // bytes 0-3, version number
 	rate = 0;    // bytes 4-7, sample rate
 	for (ix = 0; ix < 4; ix++) {
 		version += (wavefile_data[ix] << (ix*8));
 		rate += (wavefile_data[ix+4] << (ix*8));
 	if (wavefile_data) {
 		for (ix = 0; ix < 4; ix++) {
 			version += (wavefile_data[ix] << (ix*8));
 			rate += (wavefile_data[ix+4] << (ix*8));
 		}
 	}
 	if (version != version_phdata)
--- a/src/libespeak-ng/translate.c
+++ b/src/libespeak-ng/translate.c
 			if (dict_flags & FLAG_SPELLWORD) {
 				// redo the word, speaking single letters
 				for (pw = word; *pw != ' ';) {
 					memset(number_buf, 0, sizeof(number_buf));
 					memset(number_buf, ' ', 9);
 					nx = utf8_in(&c_temp, pw);
 					memcpy(&number_buf[2], pw, nx);
--- a/src/libespeak-ng/voices.c
+++ b/src/libespeak-ng/voices.c
 		MAKE_MEM_UNDEFINED(&voice_languages, sizeof(voice_languages));
 	}
 	if ((vname == NULL || vname[0] == 0) && !(control & 8)) {
 		return NULL;
 	}
 	strncpy0(voicename, vname, sizeof(voicename));
 	if (control & 0x10) {
 		strcpy(buf, vname);
 	if (!tone_only) {
 		if (!!(control & 8/*compiling phonemes*/)) {
                        /* Set by espeak_ng_CompilePhonemeDataPath when it
                         * calls LoadVoice("", 8) to set up a dummy(?) voice.
                         * As phontab may not yet exist this avoids the spurious
                         * error message and guarantees consistent results by
                         * not actually reading a potentially bogus phontab...
                         */
                        ix = 0;
                } else if ((ix = SelectPhonemeTableName(phonemes_name)) < 0) {
 			/* Set by espeak_ng_CompilePhonemeDataPath when it
 				* calls LoadVoice("", 8) to set up a dummy(?) voice.
 				* As phontab may not yet exist this avoids the spurious
 				* error message and guarantees consistent results by
 				* not actually reading a potentially bogus phontab...
 				*/
 			ix = 0;
 		} else if ((ix = SelectPhonemeTableName(phonemes_name)) < 0) {
 			fprintf(stderr, "Unknown phoneme table: '%s'\n", phonemes_name);
 			ix = 0;
 		}
--- a/src/libespeak-ng/wavegen.c
+++ b/src/libespeak-ng/wavegen.c
 	if (wvoice == NULL)
 		return;
 	int x;
 	int x = 0;
 	int ix;
 	static int Flutter_ix = 0;
 	// advance the pitch
 	wdata.pitch_ix += wdata.pitch_inc;
 	if ((ix = wdata.pitch_ix>>8) > 127) ix = 127;
 	x = wdata.pitch_env[ix] * wdata.pitch_range;
 	if (wdata.pitch_env) x = wdata.pitch_env[ix] * wdata.pitch_range;
 	wdata.pitch = (x>>8) + wdata.pitch_base;
 	if(const_f0)
 		wdata.pitch = (const_f0<<12);
 	if (wdata.pitch < 102400)
 		wdata.pitch = 102400; // min pitch, 25 Hz  (25 << 12)
 	static bool resume = false;
 	static int echo_complete = 0;
 	if (wdata.pitch < 102400)
 		wdata.pitch = 102400; // min pitch, 25 Hz  (25 << 12)
 	while (out_ptr < out_end) {
 		if (WcmdqUsed() <= 0) {
 			if (echo_complete > 0) {
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
 shell_test(translate)
 shell_test(variants)
 shell_test(voices)
 shell_test(crash)
 # shell_test(windows-data)
 # shell_test(windows-installer)
--- a/tests/crash.test
+++ b/tests/crash.test
 #!/bin/sh
 # include common script
 . "`dirname $0`/common"
 test_crash() {
 	TEST_NAME=$1
 	echo "testing CVE-${TEST_NAME}"
 	ESPEAK_DATA_PATH=`pwd` LD_LIBRARY_PATH=src:${LD_LIBRARY_PATH} \
 		$VALGRIND src/espeak-ng -f "$(dirname $0)/crash_vectors/${TEST_NAME}.txt" -w /dev/null || exit 1
 }
 test_crash cve-2023-49990
 test_crash cve-2023-49991
 test_crash cve-2023-49992
 test_crash cve-2023-49993
 test_crash cve-2023-49994
--- a/tests/crash_vectors/cve-2023-49990.txt
+++ b/tests/crash_vectors/cve-2023-49990.txt
 ã¦à»Vñ€¦ñ
€¦V
€äVñ€ãÂà¦æsññâñþâññà¶æØØsññâñþâññeeeeeeeeseee€ññûñ
--- a/tests/crash_vectors/cve-2023-49991.txt
+++ b/tests/crash_vectors/cve-2023-49991.txt
 €¦Vń
€ńVđŐhńůâ˙ńVDíZ»»ŐöÖÖÖÖÖÖÖÖÖě»»ş»ÖľÖÖÖÖÖÖ´ÖÖÖ»ţţ÷ÜÖÖÖ»»ş»ŐŞ»»®î˙˙€ę`v
--- a/tests/crash_vectors/cve-2023-49992.txt
+++ b/tests/crash_vectors/cve-2023-49992.txt
--- a/tests/crash_vectors/cve-2023-49993.txt
+++ b/tests/crash_vectors/cve-2023-49993.txt
--- a/tests/crash_vectors/cve-2023-49994.txt
+++ b/tests/crash_vectors/cve-2023-49994.txt
 "[[-#,-	-1-2.
r--Ş#--O)C--!˙E-1‹@5-!-V-1--

					ã¦à»Vñ€¦ñ €¦V €äVñ€ãÂà¦æsññâñþâññà¶æØØsññâñþâññeeeeeeeeseee€ññûñ

					€¦Vń €ńVđŐhńůâ˙ńVDíZ»»ŐöÖÖÖÖÖÖÖÖÖě»»ş»ÖľÖÖÖÖÖÖ´ÖÖÖ»ţţ÷ÜÖÖÖ»»ş»ŐŞ»»®î˙˙€ę`v