Special characters such as N, S1, etc. are not actually eating
characters. Their treatment should thus *not* update pre_ptr and post_ptr,
otherwise those would underflow/overflow, e.g. in the case
@) s (_NS1 [z]
this would overflow. This for instance noticeable with the memory sanitizer:
ESPEAK_DATA_PATH=$PWD ./src/espeak-ng -qX "capitals"
Translate 'capitals'
1 c [k]
1 a [a]
1 p [p]
1 i [I]
1 t [t]
1 a [a]
1 l [l]
20 l (C [l]
==2837201==WARNING: MemorySanitizer: use-of-uninitialized-value
#0 0x7f7f4422744b in utf8_in2 /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:281:2
#1 0x7f7f442281bc in utf8_in /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:332:9
#2 0x7f7f440e0d31 in MatchRule /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/dictionary.c:1767:21
#3 0x7f7f440d937f in TranslateRules /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/dictionary.c:2320:6
#4 0x7f7f44230e5f in TranslateWord3 /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:733:15
#5 0x7f7f44229844 in TranslateWord /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:1100:14
#6 0x7f7f44256e50 in TranslateWord2 /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:1361:11
#7 0x7f7f4424d6cc in TranslateClause /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:2623:17
#8 0x7f7f44213359 in SpeakNextClause /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/synthesize.c:1569:2
#9 0x7f7f441a9f56 in Synthesize /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/speech.c:457:2
#10 0x7f7f441a9023 in sync_espeak_Synth /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/speech.c:570:29
#11 0x7f7f441ad59f in espeak_ng_Synthesize /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/speech.c:678:10
#12 0x7f7f4410b3f4 in espeak_Synth /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/espeak_api.c:90:32
#13 0x4a8be3 in main /home/samy/brl/speech/espeak-ng-git/src/espeak-ng.c:691:3
#14 0x7f7f43a2e7fc in __libc_start_main csu/../csu/libc-start.c:332:16
#15 0x421449 in _start (/home/samy/ens/projet/1/speech/espeak-ng-git/src/.libs/espeak-ng+0x421449)
Uninitialized value was created by an allocation of 'sbuf' in the stack frame of function 'TranslateClause'
#0 0x7f7f4423a1f0 in TranslateClause /home/samy/brl/speech/espeak-ng-git/src/libespeak-ng/translate.c:1941
While trying to match _NS1, MatchRule is overflowing the buffer.
It happens that this had not usually posed problem because rules usually
have these non-eating special characters last in the rule and thus it wasn't
mattering that post_ptr is pointing outside valid text.
master
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_DOLLAR: | case RULE_DOLLAR: | ||||
| post_ptr--; | |||||
| command = *rule++; | command = *rule++; | ||||
| if (command == DOLLAR_UNPR) | if (command == DOLLAR_UNPR) | ||||
| match.end_type = SUFX_UNPRON; // $unpron | match.end_type = SUFX_UNPRON; // $unpron | ||||
| } | } | ||||
| break; | break; | ||||
| case RULE_INC_SCORE: | case RULE_INC_SCORE: | ||||
| post_ptr--; | |||||
| add_points = 20; // force an increase in points | add_points = 20; // force an increase in points | ||||
| break; | break; | ||||
| case RULE_DEC_SCORE: | case RULE_DEC_SCORE: | ||||
| post_ptr--; | |||||
| add_points = -20; // force an decrease in points | add_points = -20; // force an decrease in points | ||||
| break; | break; | ||||
| case RULE_DEL_FWD: | case RULE_DEL_FWD: | ||||
| case RULE_NO_SUFFIX: | case RULE_NO_SUFFIX: | ||||
| if (word_flags & FLAG_SUFFIX_REMOVED) | if (word_flags & FLAG_SUFFIX_REMOVED) | ||||
| failed = 1; // a suffix has been removed | failed = 1; // a suffix has been removed | ||||
| else | |||||
| else { | |||||
| post_ptr--; | |||||
| add_points = 1; | add_points = 1; | ||||
| } | |||||
| break; | break; | ||||
| default: | default: | ||||
| if (letter == rb) { | if (letter == rb) { | ||||
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_DOLLAR: | case RULE_DOLLAR: | ||||
| pre_ptr++; | |||||
| command = *rule++; | command = *rule++; | ||||
| if ((command == DOLLAR_LIST) || ((command & 0xf0) == 0x20)) { | if ((command == DOLLAR_LIST) || ((command & 0xf0) == 0x20)) { | ||||
| // $list or $p_alt | // $list or $p_alt | ||||
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_STRESSED: | case RULE_STRESSED: | ||||
| pre_ptr++; | |||||
| if (tr->word_stressed_count > 0) | if (tr->word_stressed_count > 0) | ||||
| add_points = 19; | add_points = 19; | ||||
| else | else | ||||
| } | } | ||||
| break; | break; | ||||
| case RULE_IFVERB: | case RULE_IFVERB: | ||||
| pre_ptr++; | |||||
| if (tr->expect_verb) | if (tr->expect_verb) | ||||
| add_points = 1; | add_points = 1; | ||||
| else | else | ||||
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_CAPITAL: | case RULE_CAPITAL: | ||||
| pre_ptr++; | |||||
| if (word_flags & FLAG_FIRST_UPPER) | if (word_flags & FLAG_FIRST_UPPER) | ||||
| add_points = 1; | add_points = 1; | ||||
| else | else |