|
|
@@ -1864,6 +1864,157 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, |
|
|
|
return new_c; |
|
|
|
} |
|
|
|
|
|
|
|
static bool GetNextTranslateChars(const int **from, const int **to, bool next_step) |
|
|
|
{ |
|
|
|
// Set pointers to next 'from' and 'to' part for replacement |
|
|
|
// next_step indicates second and succeeding replacement group |
|
|
|
// Return true if got data, false, if reached end |
|
|
|
if (next_step) { // first 'from' is pointed already |
|
|
|
int spaces = 0; |
|
|
|
while (**from != 0 && spaces < 2) { // Get next 'from' position skipping 'to' part |
|
|
|
(*from)++; |
|
|
|
if (**from == ' ') |
|
|
|
spaces++; |
|
|
|
} |
|
|
|
while (**from == ' ') // skip remaining spaces, if exists |
|
|
|
(*from)++; |
|
|
|
} |
|
|
|
if (**from == 0) |
|
|
|
return false; |
|
|
|
|
|
|
|
*to = *from; // start seeking 'to' from 'from' position |
|
|
|
while (**to != 0 && **to != ' ') // get next 'to' position |
|
|
|
(*to)++; |
|
|
|
|
|
|
|
while (**to == ' ') // skip remaining spaces, if exists |
|
|
|
(*to)++; |
|
|
|
|
|
|
|
return **to != 0; |
|
|
|
} |
|
|
|
|
|
|
|
static void ReplaceMatchingChars(char **matchstart, const int *from, const int *to, int upper_case) |
|
|
|
{ |
|
|
|
// Replace characters from 'from' to 'to' group in source clause, starting |
|
|
|
// with place indicated in matchstart. |
|
|
|
// Rewrites remaining part of source, if length of 'from' and 'to' groups differs. |
|
|
|
|
|
|
|
char tmpsource[N_TR_SOURCE + 40]; // Prepare working copy of source |
|
|
|
memcpy(&tmpsource, *matchstart, N_TR_SOURCE + 40); |
|
|
|
|
|
|
|
// prepare from part |
|
|
|
char charfrom[N_WORD_BYTES + 1] = { 0 }; |
|
|
|
int usedfrom = 0; |
|
|
|
int const *pintfrom = from; |
|
|
|
char *pcharfrom = charfrom; |
|
|
|
while (*pintfrom != ' ') { |
|
|
|
usedfrom += utf8_out(*pintfrom, pcharfrom + usedfrom); |
|
|
|
pintfrom++; |
|
|
|
} |
|
|
|
|
|
|
|
// prepare to part |
|
|
|
char charto[N_WORD_BYTES + 1] = { 0 }; |
|
|
|
int usedto = 0; |
|
|
|
int const *pintto = to; |
|
|
|
char *pcharto = charto; |
|
|
|
while (*pintto != 0 && *pintto != ' ') { |
|
|
|
usedto += utf8_out(*pintto, pcharto + usedto); |
|
|
|
pintto++; |
|
|
|
} |
|
|
|
|
|
|
|
// Set up replacement in temporary buffer |
|
|
|
char *psource = *matchstart; |
|
|
|
char *ptmpsource = tmpsource; |
|
|
|
pcharto = charto; |
|
|
|
|
|
|
|
// Do replacement writing 'to' part in buffer |
|
|
|
while (*pcharto != 0 ) { |
|
|
|
if(upper_case) { // TODO: should check for capital letter from source |
|
|
|
*ptmpsource = toupper(*pcharto); |
|
|
|
upper_case = 0; // capitalize only first character if necessary (will work in most cases) |
|
|
|
} |
|
|
|
else |
|
|
|
*ptmpsource = *pcharto; |
|
|
|
pcharto++; |
|
|
|
ptmpsource++; |
|
|
|
} |
|
|
|
|
|
|
|
// If 'to' part is different than 'from' part, fix remaining part of the buffer. |
|
|
|
// (Read after 'from' in source, write after 'to' in buffer. |
|
|
|
// Don't touch last 40 bytes of source, which could contain control data. |
|
|
|
if (usedfrom != usedto) { |
|
|
|
psource = *matchstart + usedfrom; |
|
|
|
while (*psource != 0 && psource < &source[N_TR_SOURCE] |
|
|
|
&& ptmpsource < &tmpsource[N_TR_SOURCE]) { |
|
|
|
*ptmpsource = *psource; |
|
|
|
ptmpsource++; |
|
|
|
psource++; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
// Write temporary buffer back to source |
|
|
|
psource = *matchstart; |
|
|
|
ptmpsource = tmpsource; |
|
|
|
while (*ptmpsource != 0 && ptmpsource < &tmpsource[N_TR_SOURCE] |
|
|
|
&& psource < &source[N_TR_SOURCE]) { |
|
|
|
*psource = *ptmpsource; |
|
|
|
ptmpsource++; |
|
|
|
psource++; |
|
|
|
} |
|
|
|
|
|
|
|
// if requested, print trace |
|
|
|
if (option_phonemes & espeakPHONEMES_TRACE) |
|
|
|
fprintf(f_trans, "Replace %s > %s\n", charfrom, charto); |
|
|
|
} |
|
|
|
|
|
|
|
static void TranslateChars(Translator *tr, char *source) |
|
|
|
{ |
|
|
|
// Replace characters using Translator.replace_chars in passed source buffer |
|
|
|
char *startmatch; |
|
|
|
const int *from; |
|
|
|
const int *to; |
|
|
|
bool upper_case = false; |
|
|
|
|
|
|
|
if ((from = tr->langopts.replace_chars) == NULL) |
|
|
|
return; |
|
|
|
|
|
|
|
char *curchar = source; // pointer to the source clause |
|
|
|
int curint; // current UTF-16 character |
|
|
|
int used; // bytes used in buffer |
|
|
|
const int *curfrom; // pointer to current 'from' |
|
|
|
bool next_step = false; // signal to jumping to next 'from' part in dictionary |
|
|
|
while (GetNextTranslateChars(&from, &to, next_step)) { |
|
|
|
next_step = true; |
|
|
|
curfrom = from; |
|
|
|
curchar = source; |
|
|
|
startmatch = 0; |
|
|
|
curint = 0; |
|
|
|
do { |
|
|
|
used = utf8_in(&curint, curchar); |
|
|
|
if (curint < ' ') { // skip control characters |
|
|
|
curchar += used; |
|
|
|
continue; |
|
|
|
} |
|
|
|
if ((upper_case = iswupper(curint)) == true) // remember, if uppercase |
|
|
|
curint = ucd_tolower(curint); |
|
|
|
|
|
|
|
if (curint == *curfrom) { // if matches |
|
|
|
if (startmatch == 0) |
|
|
|
startmatch = curchar; |
|
|
|
curfrom++; // move check to next character |
|
|
|
if (*curfrom == ' ') { // if full match, replace |
|
|
|
ReplaceMatchingChars(&startmatch, from, to, upper_case); |
|
|
|
startmatch = 0; // reset match to start of 'from' |
|
|
|
curfrom = from; |
|
|
|
} |
|
|
|
} else { // reset match to start of 'from' |
|
|
|
startmatch = 0; |
|
|
|
curfrom = from; |
|
|
|
} |
|
|
|
curchar += used; |
|
|
|
} while (curint > 0); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert, int *wordflags) |
|
|
|
{ |
|
|
|
// To allow language specific examination and replacement of characters |
|
|
@@ -1930,8 +2081,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
// handle .replace rule in xx_rules file |
|
|
|
return SubstituteChar(tr, c, next_in, insert, wordflags); |
|
|
|
return c; |
|
|
|
} |
|
|
|
|
|
|
|
static const char *UCase_ga[] = { "bp", "bhf", "dt", "gc", "hA", "mb", "nd", "ng", "ts", "tA", "nA", NULL }; |
|
|
@@ -2104,6 +2254,9 @@ void TranslateClause(Translator *tr, int *tone_out, char **voice_change) |
|
|
|
} |
|
|
|
words[0].length = k; |
|
|
|
|
|
|
|
// handle .replace rule of xx_rules file |
|
|
|
TranslateChars(tr, source); |
|
|
|
|
|
|
|
while (!finished && (ix < (int)sizeof(sbuf) - 1) && (n_ph_list2 < N_PHONEME_LIST-4)) { |
|
|
|
prev_out2 = prev_out; |
|
|
|
utf8_in2(&prev_out, &sbuf[ix-1], 1); |