The correction uses substitute character (0x001A) as a workaround. This control character is the "older brother" of the replacement character OxFFFD. The workaround is necessary because the replacement character (0xFFFD) is used to flag to indicate an error or EOF. The substitute character (0x001A) was included as an alternative flag to indicate that the input character itself is a replacement character (0xFFFD).master
@@ -562,6 +562,8 @@ string_decoder_getc_utf_8(espeak_ng_TEXT_DECODER *decoder) | |||
ret = (ret << 6) + (c & 0x3F); | |||
if (((c = *decoder->current++) & LEADING_2_BITS) != UTF8_TAIL_BITS) goto error; | |||
ret = (ret << 6) + (c & 0x3F); | |||
// fix the "I umlaut a half" bug | |||
if (ret == 0xFFFD) return 0x001A; | |||
return ret; | |||
// 4-byte UTF-8 sequence | |||
case 0xF0: |