Browse Source

Properly maintain margin in ph_list2

TranslateWord2 uses phonemes in ph_list2. Apart from the breakable loops, it
may statically require up to 7 phonemes. Then TranslateClause always
uses 2 phonemes. We thus have to keep these margins along the loops to
avoid any overflow.

Fixes #1073
master
Samuel Thibault 3 years ago
parent
commit
26a675543c
2 changed files with 31 additions and 6 deletions
  1. 29
    4
      src/libespeak-ng/translate.c
  2. 2
    2
      tests/translate.test

+ 29
- 4
src/libespeak-ng/translate.c View File

@@ -1276,6 +1276,11 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
Word_EmbeddedCmd();
}

if (n_ph_list2 >= N_PHONEME_LIST-2) {
// No room, can't translate anything
return 0;
}

if ((word[0] == 0) || (word_flags & FLAG_DELETE_WORD)) {
// nothing to translate. Add a dummy phoneme to carry any embedded commands
if (embedded_flag) {
@@ -1292,6 +1297,11 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
return 0;
}

if (n_ph_list2 >= N_PHONEME_LIST-7-2) {
// We may require up to 7 phonemes, plus the 2 phonemes from the caller, can't translate safely
return 0;
}

// after a $pause word attribute, ignore a $pause attribute on the next two words
if (tr->prepause_timeout > 0)
tr->prepause_timeout--;
@@ -1487,7 +1497,10 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
if ((flags & FLAG_FOUND) && !(flags & FLAG_TEXTMODE))
found_dict_flag = SFLAG_DICTIONARY;

while ((pre_pause > 0) && (n_ph_list2 < N_PHONEME_LIST-4)) {
// Each iteration may require up to 1 phoneme
// and after this loop we may require up to 7 phonemes
// and our caller requires 2 phonemes
while ((pre_pause > 0) && (n_ph_list2 < N_PHONEME_LIST-7-2)) {
// add pause phonemes here. Either because of punctuation (brackets or quotes) in the
// text, or because the word is marked in the dictionary lookup as a conjunction
if (pre_pause > 1) {
@@ -1502,7 +1515,9 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
tr->prev_dict_flags[1] = 0;
}
plist2 = &ph_list2[n_ph_list2];
// From here we may require up to 4+1+3 phonemes

// This may require up to 4 phonemes
if ((option_capitals == 1) && (word_flags & FLAG_FIRST_UPPER)) {
SetPlist2(&ph_list2[n_ph_list2++], phonPAUSE_SHORT);
SetPlist2(&ph_list2[n_ph_list2++], phonCAPITAL);
@@ -1513,6 +1528,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
}
}

// This may require up to 1 phoneme
if (switch_phonemes >= 0) {
if ((p[0] == phonPAUSE) && (p[1] == phonSWITCH)) {
// the new word starts with a phoneme table switch, so there's no need to switch before it.
@@ -1541,7 +1557,10 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
p[1] = 0;
}

while (((ph_code = *p++) != 0) && (n_ph_list2 < N_PHONEME_LIST-4)) {
// Each iteration may require up to 1 phoneme
// and after this loop we may require up to 3 phonemes
// and our caller requires 2 phonemes
while (((ph_code = *p++) != 0) && (n_ph_list2 < N_PHONEME_LIST-3-2)) {
if (ph_code == 255)
continue; // unknown phoneme

@@ -1633,7 +1652,9 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
first_phoneme = false;
}
}
// From here, we may require up to 3 phonemes

// This may require up to 1 phoneme
if (word_flags & FLAG_COMMA_AFTER)
SetPlist2(&ph_list2[n_ph_list2++], phonPAUSE_CLAUSE);

@@ -1645,6 +1666,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
if ((stress >= 4) && (phoneme_tab[ph_list2[n_ph_list2-1].phcode]->type == phVOWEL))
tr->end_stressed_vowel = 1; // word ends with a stressed vowel

// This may require up to 1 phoneme
if (switch_phonemes >= 0) {
// this word uses a different phoneme table, now switch back
strcpy(dictionary_name, old_dictionary_name);
@@ -1654,6 +1676,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa
}


// This may require up to 1 phoneme
if (pitch_raised > 0) {
embedded_list[embedded_ix++] = EMBED_P+0x60+0x80 + (pitch_raised << 8); // lower pitch
SetPlist2(&ph_list2[n_ph_list2], phonPAUSE_SHORT);
@@ -2060,7 +2083,7 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
}
words[0].length = k;

while (!finished && (ix < (int)sizeof(sbuf) - 1) && (n_ph_list2 < N_PHONEME_LIST-4)) {
while (!finished && (ix < (int)sizeof(sbuf) - 1)) {
prev_out2 = prev_out;
utf8_in2(&prev_out, &sbuf[ix-1], 1);

@@ -2488,7 +2511,9 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change)
}
words[0].flags |= FLAG_FIRST_WORD;

for (ix = 0; ix < word_count; ix++) {
// Each TranslateWord2 may require up to 7 phonemes
// and after this loop we require 2 phonemes
for (ix = 0; ix < word_count && (n_ph_list2 < N_PHONEME_LIST-7-2); ix++) {
int nx;
int c_temp;
char *pn;

+ 2
- 2
tests/translate.test View File

@@ -34,8 +34,8 @@ test_phon hi "dUk'a:n" "दुकान"
test_phon hi "ka:n'u:n" "कानून"

# bug: https://github.com/espeak-ng/espeak-ng/issues/824
test_phon ru "(en)s'i:(ru) n'ojl t'otS;ka v'os;E2md;E2s;ats;'ejm m;,IlI;'onof_! p;,It;s'ot SE2z;d;d;Is;'jatd;'evI3t; t'ys;VtS;_! dv;'es;t;I p;Vd;d;Is;'jattR;'i p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I ojd;'in p@-rats'Ent dv;'es;t;I t'otS;ka t@-r;'ittsat;S'Es;t; t'ys;VtS;_! vOs;Ims'ot s;Imn'AttsVt; p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka dv;'e t'ys;VdZ; d;E2v;Itn'AttsVt; p@-rats'Ent dv;'es;t;I ojd;'in t'otS;ka n'ojl tR;'iv'os;E#m_!s;'ejm d;'evI3t;S'Es;t;_!tR;'i p@-rats'Ent dv'A (en)s'i:(ru) ojd;'in t'otS;ka n'ojl tR;'iv'os;E#m_!s;'ejm d;'evI3t;S'Es;t;_!tR;'i p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I ojd;'in p@-rats'Ent dv;'es;t;I t'otS;ka tS;It'yr;E2sta d;E2v;Itn'AttsVt; t'ys;VtS;_! S,E#s;t;s'ot t@-r;'ittsat;s;'ejm p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka n'ojldv'A_! tR;'is;'ejm_!p;'jat; p@-rats'Ent dv'AttsVt; (en)s'i:(ru) p@-rats'Ent dv;'es;t;I t'otS;ka t@-r;'ittsVt; t'ys;VtS;_! v'os;E2md;E2s;ats;'ejm p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka n'ojltS;It'yR;I_! p;'jat;Vjd;'in_!tS;It'yR;I p@-rats'Ent dv;'es;t;I t'otS;ka p;,It;s'ot v'os;E2md;E2s;atdv;'e t'ys;VtS;_! s;,Ims'ot t@-r;'ittsat;d;'evI3t; p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka dv;'es;t;I SE#stn'AttsVt; t'ys;VtS;_! dev;ats'ot s'o@-*Okdv'A p@-rats'Ent dv;'es;t;I t'otS;ka s;,Ims'ot p;Vd;d;Is;'jat t'ys;VtS;_! p;,It;s'ot d;E2v;In'ostOtR;'i p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka tS;It'yr;E2sta s;'emd;E2s;Vt t'ys;VtS;_! t@-r;'ista v'os;E2m p@-rats'Ent dv;'es;t;I t'otS;ka n'ojld;'evI3t;_! p;'jat;n'ojl_!dv'A p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka st'o s'o@-*Okdv;'e t'ys;VtS;_! p;,It;s'ot s;Imn'AttsVt; p@-rats'Ent
dv'A (en)s'i:(ru) v'os;E2m t'otS;ka vOs;Ims'ot dv'Attsat;d;'evI3t; t'ys;VtS;_! s;,Ims'ot SE2z;d;d;Is;'jatv'os;E2m p@-rats'Ent dv'AttsVt; (en)s'i:(ru) p@-rats'Ent dv;'es;t;I p@-rats'Ent dv'A (en)s'i:(ru) p;'jat; t'otS;ka S,E#s;t;s'ot SE2z;d;d;Is;'jatd;'evI3t; t'ys;VtS;_! s'o@-*Okojd;'in p@-rats'Ent dv'AttsVt; tS;It'yR;I t'otS;ka p;Vd;d;Is;'jatd;'evI3t; t'ys;VtS;_! S,E#s;t;s'ot d;E2v;In'ostOd;'evI3t; p@-rats'Ent dv'A (en)s'i:(ru) d;'es;It; t'otS;ka dv'A m;,IlI;'onof_! S,E#s;t;s'ot p;Vd;d;Is;'jatdv;'e t'ys;VtS;_! t@-r;'ista d;E2v;In'ostOojd;'in p@-rats'Ent dv'AttsVt; d;'es;It; t'otS;ka dv;'es;t;I SE2z;d;d;Is;'jatp;'jat; t'ys;VtS;_! dv;'es;t;I t@-r;'ittsat;v'os;E2m p@-rats'Ent dv'A (en)s'i:(ru) d;'es;It; t'otS;ka dv'A m;,IlI;'onof_! S,E#s;t;s'ot p;Vd;d;Is;'jatdv;'e t'ys;VtS;_! t@-r;'ista d;E2v;In'ostOojd;'in p@-rats'Ent dv'AttsVt; (en)z'Ed(ru) p@-rats'Ent dv'AttsVt; (en)'Em(ru) p@-rats'Ent dv;'e t'ys;VdZ; d;'es;It; t'otS;ka n'ojltR;'i_!s;'ejm v'os;E#mS'Es;t;_!p;'jat; p@-rats'Ent dv'A (en)s'i:(ru) d;E2v;Itn'AttsVt; t'otS;ka s;,Ims'ot d;E2v;In'ostOv'os;E2m t'ys;VtS;_! p;,It;s'ot t@-r;'ittsat;s;'ejm p@-rats'Ent dv'AttsVt; (en)v'i:(ru) p@-rats'Ent dv;'es;t;I ojd;'in t'otS;ka st'o s;'emd;E2s;VttS;It'yR;I t'ys;VtS;_! st'o v'os;E2md;E2s;atv'os;E2m p@-rats'Ent dv'AttsVt; (en)'eI(ru) p@-rats'Ent dv;'es;t;I ojd;'in t'otS;ka tS;It'yr;E2sta v'os;E2md;E2s;atv'os;E2m t'ys;VtS;_! p;,It;s'ot d;E2v;Itn'AttsVt; p@-rats'Ent dv'A (en)s'i:(ru) ojd;'in t'otS;ka tS;It'yr;E2sta v'os;E2md;E2s;atv'os;E2m t'ys;VtS;_! p;,It;s'ot d;E2v;Itn'AttsVt; p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I ojd;'in p@-rats'Ent dv;'es;t;I t'otS;ka vOs;Ims'ot p;Vd;d;Is;'jats;'ejm t'ys;VtS;" "C0.87569253%200%200%201%200.36817%2C-0.2019%201.0387963%2C1.0387963%200%200%201%200.419637%2C-0.02375%20c%200.30087%2C0.04514%200.582739%2C0.216942%200.750593%2C0.470308%200.09502%2C0.142517%200.153603%2C0.308788%200.18844%2C0.478226%200.03484%2C0.168646%200.0475%2C0.340459%200.05701%2C0.513064%200.03167%2C0.601741%200.03167%2C1.205067%200.01426%2C1.808392%20-0.01426%2C0.526524%20-0.04355%2C1.0673%20-0.253366%2C1.549486%20-0.271575%2C0.619159%20-0.817101%2C1.08155%20-1.405383%2C1.414092%20a%205.5835296%2C5.5835296%200%200%201%20-1.257323%2C0.512272%20c%200.38163%2C1.219319%200.580363%2C2.56532%200.580363%2C3.93349%20a%2013.935071%2C13.935071%200%200%201%20-0.106901%2C1.682498%2010.264446%2C10.264446%200%200%200%205.054631%2C-8.829768%20c%200%2C-5.669041%20-4.59699%2C-10.2652391%20-10.265238%2C-10.2652391%20z%20M%2010.037865%2C19.798537%20v%201.174188%20a%201.488519%2C1.488519%200%200%201%200.857482%2C0.286619%201.3760882%2C1.3760882%200%200%201%200.440222%2C0.538402%20c%200.0966%2C0.213775%200.131432%2C0.456056%200.09184%2C0.687252%20a%201.1821057%2C1.1821057%200%200%201%20-0.262867%2C0.560568%201.3040376%2C1.3040376%200%200%201%20-0.502772%2C0.36263%201.3760882%2C1.3760882%200%200%201%20-0.623119%2C0.0966%20v%202.953287%20l%206.145683%2C-3.33175%20-6.145683%2C-3," "ru sum strings"
test_phon ru "(en)s'i:(ru) n'ojl t'otS;ka# v'os;E2md;E2s;ats;'ejm m;,IlI;'onof_! p;,It;s'ot SE2z;d;d;Is;'jatd;'evI3t; t'ys;VtS;_! dv;'es;t;I p;Vd;d;Is;'jattR;'i p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I ojd;'in p@-rats'Ent dv;'es;t;I t'otS;ka# t@-r;'ittsat;S'Es;t; t'ys;VtS;_! vOs;Ims'ot s;Imn'AttsVt; p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# dv;'e t'ys;VdZ; d;E2v;Itn'AttsVt; p@-rats'Ent dv;'es;t;I ojd;'in t'otS;ka# n'ojl tR;'iv'os;E#m_!s;'ejm d;'evI3t;S'Es;t;_!tR;'i p@-rats'Ent dv'A (en)s'i:(ru) ojd;'in t'otS;ka# n'ojl tR;'iv'os;E#m_!s;'ejm d;'evI3t;S'Es;t;_!tR;'i p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I ojd;'in p@-rats'Ent dv;'es;t;I t'otS;ka# tS;It'yr;E2sta# d;E2v;Itn'AttsVt; t'ys;VtS;_! S,E#s;t;s'ot t@-r;'ittsat;s;'ejm p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# n'ojldv'A_! tR;'is;'ejm_!p;'jat; p@-rats'Ent dv'AttsVt; (en)s'i:(ru) p@-rats'Ent dv;'es;t;I t'otS;ka# t@-r;'ittsVt; t'ys;VtS;_! v'os;E2md;E2s;ats;'ejm p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# n'ojltS;It'yR;I_! p;'jat;Vjd;'in_!tS;It'yR;I p@-rats'Ent dv;'es;t;I t'otS;ka# p;,It;s'ot v'os;E2md;E2s;atdv;'e t'ys;VtS;_! s;,Ims'ot t@-r;'ittsat;d;'evI3t; p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# dv;'es;t;I SE#stn'AttsVt; t'ys;VtS;_! dev;ats'ot s'o@-*Okdv'A p@-rats'Ent dv;'es;t;I t'otS;ka# s;,Ims'ot p;Vd;d;Is;'jat t'ys;VtS;_! p;,It;s'ot d;E2v;In'ostOtR;'i p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# tS;It'yr;E2sta# s;'emd;E2s;Vt t'ys;VtS;_! t@-r;'ista# v'os;E2m p@-rats'Ent dv;'es;t;I t'otS;ka# n'ojld;'evI3t;_! p;'jat;n'ojl_!dv'A p@-rats'Ent dv'A (en)s'i:(ru) n'ojl t'otS;ka# st'o s'o@-*Okdv;'e t'ys;VtS;_! p;,It;s'ot s;Imn'AttsVt; p@-rats'Ent
dv'A (en)s'i:(ru) v'os;E2m t'otS;ka# vOs;Ims'ot dv'Attsat;d;'evI3t; t'ys;VtS;_! s;,Ims'ot SE2z;d;d;Is;'jatv'os;E2m p@-rats'Ent dv'AttsVt; (en)s'i:(ru) p@-rats'Ent dv;'es;t;I p@-rats'Ent dv'A (en)s'i:(ru) p;'jat; t'otS;ka# S,E#s;t;s'ot SE2z;d;d;Is;'jatd;'evI3t; t'ys;VtS;_! s'o@-*Okojd;'in p@-rats'Ent dv'AttsVt; tS;It'yR;I t'otS;ka# p;Vd;d;Is;'jatd;'evI3t; t'ys;VtS;_! S,E#s;t;s'ot d;E2v;In'ostOd;'evI3t; p@-rats'Ent dv'A (en)s'i:(ru) d;'es;It; t'otS;ka# dv'A m;,IlI;'onof_! S,E#s;t;s'ot p;Vd;d;Is;'jatdv;'e t'ys;VtS;_! t@-r;'ista# d;E2v;In'ostOojd;'in p@-rats'Ent dv'AttsVt; d;'es;It; t'otS;ka# dv;'es;t;I SE2z;d;d;Is;'jatp;'jat; t'ys;VtS;_! dv;'es;t;I t@-r;'ittsat;v'os;E2m p@-rats'Ent dv'A (en)s'i:(ru) d;'es;It; t'otS;ka# dv'A m;,IlI;'onof_! S,E#s;t;s'ot p;Vd;d;Is;'jatdv;'e t'ys;VtS;_! t@-r;'ista# d;E2v;In'ostOojd;'in p@-rats'Ent dv'AttsVt; (en)z'Ed(ru) p@-rats'Ent dv'AttsVt; (en)'Em(ru) p@-rats'Ent dv;'e t'ys;VdZ; d;'es;It; t'otS;ka# n'ojltR;'i_!s;'ejm v'os;E#mS'Es;t;_!p;'jat; p@-rats'Ent dv'A (en)s'i:(ru) d;E2v;Itn'AttsVt; t'otS;ka# s;,Ims'ot d;E2v;In'ostOv'os;E2m t'ys;VtS;_! p;,It;s'ot t@-r;'ittsat;s;'ejm p@-rats'Ent dv'AttsVt; (en)v'i:(ru) p@-rats'Ent dv;'es;t;I ojd;'in t'otS;ka# st'o s;'emd;E2s;VttS;It'yR;I t'ys;VtS;_! st'o v'os;E2md;E2s;atv'os;E2m p@-rats'Ent dv'AttsVt; (en)'eI(ru) p@-rats'Ent dv;'es;t;I ojd;'in t'otS;ka# tS;It'yr;E2sta# v'os;E2md;E2s;atv'os;E2m t'ys;VtS;_! p;,It;s'ot d;E2v;Itn'AttsVt; p@-rats'Ent dv'A (en)s'i:(ru) ojd;'in t'otS;ka# tS;It'yr;E2sta# v'os;E2md;E2s;atv'os;E2m t'ys;VtS;_! p;,It;s'ot d;E2v;Itn'AttsVt; p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I p@-rats'Ent dv;'es;t;I ojd;'in p@-rats'Ent dv;'es;t;I t'otS;ka# vOs;Ims'ot p;Vd;d;Is;'jats;'ejm t'ys;VtS;" "C0.87569253%200%200%201%200.36817%2C-0.2019%201.0387963%2C1.0387963%200%200%201%200.419637%2C-0.02375%20c%200.30087%2C0.04514%200.582739%2C0.216942%200.750593%2C0.470308%200.09502%2C0.142517%200.153603%2C0.308788%200.18844%2C0.478226%200.03484%2C0.168646%200.0475%2C0.340459%200.05701%2C0.513064%200.03167%2C0.601741%200.03167%2C1.205067%200.01426%2C1.808392%20-0.01426%2C0.526524%20-0.04355%2C1.0673%20-0.253366%2C1.549486%20-0.271575%2C0.619159%20-0.817101%2C1.08155%20-1.405383%2C1.414092%20a%205.5835296%2C5.5835296%200%200%201%20-1.257323%2C0.512272%20c%200.38163%2C1.219319%200.580363%2C2.56532%200.580363%2C3.93349%20a%2013.935071%2C13.935071%200%200%201%20-0.106901%2C1.682498%2010.264446%2C10.264446%200%200%200%205.054631%2C-8.829768%20c%200%2C-5.669041%20-4.59699%2C-10.2652391%20-10.265238%2C-10.2652391%20z%20M%2010.037865%2C19.798537%20v%201.174188%20a%201.488519%2C1.488519%200%200%201%200.857482%2C0.286619%201.3760882%2C1.3760882%200%200%201%200.440222%2C0.538402%20c%200.0966%2C0.213775%200.131432%2C0.456056%200.09184%2C0.687252%20a%201.1821057%2C1.1821057%200%200%201%20-0.262867%2C0.560568%201.3040376%2C1.3040376%200%200%201%20-0.502772%2C0.36263%201.3760882%2C1.3760882%200%200%201%20-0.623119%2C0.0966%20v%202.953287%20l%206.145683%2C-3.33175%20-6.145683%2C-3," "ru sum strings"

# A deleted phoneme at the start of a word should preserve the sourceix property.
test_phon en-GB-x-gbcwmd "aI 'av" "I have"

Loading…
Cancel
Save