@@ -14,6 +14,8 @@ The espeak-ng project is a fork of the espeak project. | |||
BCP 47 language subtag registry making the newly registered `hyw` language code the | |||
preferred value for `hy-arevmda`. This change keeps support for detecting the | |||
`hy-arevela` and `hy-arevmda` language tags. | |||
* Support any length replacement rule strings for the source part of the rule (replacing | |||
from the 'source' string to the 'target' string). | |||
* Add more tests to check the various parts of espeak-ng. | |||
* Various changes to clean up the codebase. | |||
* Fix various compiler warnings (`-Winitialized`, `-Wmissing-prototypes`, `-Wreturn-type`, |
@@ -509,11 +509,11 @@ each language. The number fragments are given in the `*_list` file. | |||
## Character Substitution | |||
Character substitutions can be specified by using a `.replace` section | |||
at the start of the `*_rules` file. In each line one character can be | |||
replaced by one or two characters. (Source and target of replacement can consume | |||
up to four bytes.) This substitution is done to a word _before_ word is searched | |||
in `*_list` or `*_listx` file and translated using the spelling-to-phoneme rules. | |||
Only the lower-case version of the characters needs to be specified. e.g.: | |||
at the start of the `*_rules` file. In each line multiple _source_ characters | |||
can be replaced by one or two characters. This substitution is done to a word | |||
_before_ word is searched in `*_list` or `*_listx` file and translated using | |||
the spelling-to-phoneme rules. Only the lower-case version of the characters | |||
needs to be specified. e.g.: | |||
.replace | |||
ô ő // (Hungarian) allow the use of o-circumflex instead of o-double-accute |
@@ -1804,12 +1804,22 @@ static const char *FindReplacementChars(Translator *tr, const char **pfrom, unsi | |||
if (nc == fc) { | |||
if (*from == 0) return from + 1; | |||
from += utf8_in((int *)&fc, from); | |||
match_next += utf8_in((int *)&nc, match_next); | |||
bool matched = true; | |||
int nmatched = 0; | |||
while (*from != 0) { | |||
from += utf8_in((int *)&fc, from); | |||
nc = towlower2(nc, tr); | |||
if (*from == 0 && nc == fc) { | |||
*ignore_next_n = 1; | |||
match_next += utf8_in((int *)&nc, match_next); | |||
nc = towlower2(nc, tr); | |||
if (nc != fc) | |||
matched = false; | |||
else | |||
nmatched++; | |||
} | |||
if (*from == 0 && matched) { | |||
*ignore_next_n = nmatched; | |||
return from + 1; | |||
} | |||
} |