@@ -26,6 +26,7 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <wctype.h> | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
@@ -585,7 +586,7 @@ static int compile_line(char *linebuf, char *dict_line, int *hash) | |||
ix = utf8_in(&c2, p); | |||
if (c2 == 0) | |||
break; | |||
if (iswupper2(c2)) | |||
if (iswupper(c2)) | |||
utf8_out(towlower2(c2), p); | |||
else | |||
all_upper_case = 0; |
@@ -24,6 +24,7 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <wctype.h> | |||
#include <wchar.h> | |||
#include <espeak-ng/espeak_ng.h> | |||
@@ -614,7 +615,7 @@ const char *GetTranslatedPhonemeString(int phoneme_mode) | |||
p += utf8_in(&c, p); | |||
if (use_tie != 0) { | |||
// look for non-inital alphabetic character, but not diacritic, superscript etc. | |||
if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha2(c)) | |||
if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha(c)) | |||
buf += utf8_out(use_tie, buf); | |||
} | |||
buf += utf8_out(c, buf); | |||
@@ -832,7 +833,7 @@ int Unpronouncable(Translator *tr, char *word, int posn) | |||
break; | |||
} | |||
if ((c != '\'') && !iswalpha2(c)) | |||
if ((c != '\'') && !iswalpha(c)) | |||
return 0; | |||
} | |||
@@ -1787,7 +1788,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_ | |||
failed = 1; | |||
break; | |||
case RULE_NONALPHA: | |||
if (!iswalpha2(letter_w)) { | |||
if (!iswalpha(letter_w)) { | |||
add_points = (21-distance_right); | |||
post_ptr += letter_xbytes; | |||
} else | |||
@@ -1996,7 +1997,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_ | |||
failed = 1; | |||
break; | |||
case RULE_NONALPHA: | |||
if (!iswalpha2(letter_w)) { | |||
if (!iswalpha(letter_w)) { | |||
add_points = (21-distance_right); | |||
pre_ptr -= letter_xbytes; | |||
} else | |||
@@ -2300,7 +2301,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c | |||
if (tr->letter_bits_offset > 0) { | |||
// not a Latin alphabet, switch to the default Latin alphabet language | |||
if ((letter <= 0x241) && iswalpha2(letter)) { | |||
if ((letter <= 0x241) && iswalpha(letter)) { | |||
sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language); | |||
return 0; | |||
} |
@@ -701,7 +701,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control) | |||
if (control & 2) { | |||
// include CAPITAL information | |||
if (iswupper2(letter)) | |||
if (iswupper(letter)) | |||
Lookup(tr, "_cap", capital); | |||
} | |||
letter = towlower2(letter); | |||
@@ -845,7 +845,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control) | |||
if (ph_buf[0] == 0) { | |||
speak_letter_number = 1; | |||
if (!(al_flags & AL_NO_SYMBOL)) { | |||
if (iswalpha2(letter)) | |||
if (iswalpha(letter)) | |||
Lookup(translator, "_?A", ph_buf); | |||
if ((ph_buf[0] == 0) && !iswspace(letter)) | |||
@@ -2055,7 +2055,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' ')) | |||
utf8_in(&next_char, p); | |||
if (!iswalpha2(next_char) && (thousands_exact == 0)) | |||
if (!iswalpha(next_char) && (thousands_exact == 0)) | |||
strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc. | |||
} | |||
@@ -289,88 +289,13 @@ static const short wchar_toupper[] = { | |||
0, 0 | |||
}; | |||
// use internal data for iswalpha up to U+024F | |||
// iswalpha() on Windows is unreliable (U+AA, U+BA). | |||
int iswalpha2(int c) | |||
{ | |||
if (c < 0x80) | |||
return isalpha(c); | |||
if ((c > 0x3040) && (c <= 0xa700)) | |||
return 1; // japanese, chinese characters | |||
if (c > MAX_WALPHA) | |||
return iswalpha(c); | |||
return walpha_tab[c-0x80]; | |||
} | |||
int iswlower2(int c) | |||
{ | |||
if (c < 0x80) | |||
return islower(c); | |||
if (c > MAX_WALPHA) | |||
return iswlower(c); | |||
if (walpha_tab[c-0x80] == 0xff) | |||
return 1; | |||
return 0; | |||
} | |||
int iswupper2(int c) | |||
{ | |||
int x; | |||
if (c < 0x80) | |||
return isupper(c); | |||
if (c > MAX_WALPHA) | |||
return iswupper(c); | |||
if (((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe)) | |||
return 1; | |||
return 0; | |||
} | |||
int towlower2(unsigned int c) | |||
{ | |||
int x; | |||
int ix; | |||
// check for non-standard upper to lower case conversions | |||
if (c == 'I') { | |||
if (translator->langopts.dotless_i) | |||
c = 0x131; // I -> ı | |||
} | |||
if (c < 0x80) | |||
return tolower(c); | |||
if (c == 'I' && translator->langopts.dotless_i) | |||
return 0x131; // I -> ı | |||
if (c > MAX_WALPHA) | |||
return towlower(c); | |||
if ((x = walpha_tab[c-0x80]) >= 0xfe) | |||
return c; // this is not an upper case letter | |||
if (x == 0xfd) { | |||
// special cases, lookup translation table | |||
for (ix = 0; wchar_tolower[ix] != 0; ix += 2) { | |||
if (wchar_tolower[ix] == (int)c) | |||
return wchar_tolower[ix+1]; | |||
} | |||
} | |||
return c + x; // convert to lower case | |||
} | |||
int towupper2(unsigned int c) | |||
{ | |||
int ix; | |||
if (c > MAX_WALPHA) | |||
return towupper(c); | |||
// check whether a previous character code is the upper-case equivalent of this character | |||
if (towlower2(c-32) == (int)c) | |||
return c-32; // yes, use it | |||
if (towlower2(c-1) == (int)c) | |||
return c-1; | |||
for (ix = 0; wchar_toupper[ix] != 0; ix += 2) { | |||
if (wchar_toupper[ix] == (int)c) | |||
return wchar_toupper[ix+1]; | |||
} | |||
return c; // no | |||
return towlower(c); | |||
} | |||
static int IsRomanU(unsigned int c) | |||
@@ -2015,7 +1940,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
while (!Eof() && (c1 != '>')) | |||
c1 = GetC(); | |||
c2 = ' '; | |||
} else if ((c2 == '/') || iswalpha2(c2)) { | |||
} else if ((c2 == '/') || iswalpha(c2)) { | |||
// check for space in the output buffer for embedded commands produced by the SSML tag | |||
if (ix > (n_buf - 20)) { | |||
// Perhaps not enough room, end the clause before the SSML tag | |||
@@ -2167,9 +2092,9 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
} | |||
} | |||
if (iswupper2(c1)) { | |||
if (iswupper(c1)) { | |||
tr->clause_upper_count++; | |||
if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev)) { | |||
if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev)) { | |||
char text_buf[40]; | |||
char text_buf2[30]; | |||
if (LookupSpecial(tr, "_cap", text_buf2) != NULL) { | |||
@@ -2181,7 +2106,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
} | |||
} | |||
} | |||
} else if (iswalpha2(c1)) | |||
} else if (iswalpha(c1)) | |||
tr->clause_lower_count++; | |||
if (option_phoneme_input) { | |||
@@ -2238,7 +2163,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
// i.e. is dot followed by an upper-case letter? | |||
if (!iswspace(c1)) { | |||
if (!IsAlpha(c1) || !iswlower2(c1)) { | |||
if (!IsAlpha(c1) || !iswlower(c1)) { | |||
UngetC(c2); | |||
ungot_char2 = c1; | |||
buf[end_clause_index] = ' '; // delete the end-clause punctuation | |||
@@ -2320,7 +2245,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
punct_data |= CLAUSE_DOT; | |||
if (nl_count == 0) { | |||
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next)))) { | |||
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) { | |||
// lang=hu, fix for ordinal numbers, eg: "december 2., szerda", ignore ',' after ordinal number | |||
c1 = CHAR_COMMA_BREAK; | |||
is_end_clause = 0; | |||
@@ -2332,11 +2257,11 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
// dot after a number indicates an ordinal number | |||
if (!iswdigit(cprev)) | |||
is_end_clause = 0; // Roman number followed by dot | |||
else if (iswlower2(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | |||
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | |||
is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag) | |||
} else if (c_next == '\'') | |||
is_end_clause = 0; // eg. u.s.a.'s | |||
if (iswlower2(c_next)) { | |||
if (iswlower(c_next)) { | |||
// next word has no capital letter, this dot is probably from an abbreviation | |||
is_end_clause = 0; | |||
} |
@@ -363,7 +363,7 @@ int IsAlpha(unsigned int c) | |||
0 | |||
}; | |||
if (iswalpha2(c)) | |||
if (iswalpha(c)) | |||
return 1; | |||
if (c < 0x300) | |||
@@ -610,7 +610,7 @@ int IsAllUpper(const char *word) | |||
int c; | |||
while ((*word != 0) && !isspace2(*word)) { | |||
word += utf8_in(&c, word); | |||
if (!iswupper2(c)) | |||
if (!iswupper(c)) | |||
return 0; | |||
} | |||
return 1; | |||
@@ -904,7 +904,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
} | |||
} | |||
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha2(first_char)) { | |||
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) { | |||
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) { | |||
// emphasize words which are in capitals | |||
emphasize_allcaps = FLAG_EMPHASIZED; | |||
@@ -1331,7 +1331,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
tr->expect_past--; | |||
} | |||
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha2(first_char) && (first_char != 'i')) { | |||
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) { | |||
// English Specific !!!! | |||
// any single letter before a dot is an abbreviation, except 'I' | |||
dictionary_flags[0] |= FLAG_ALLOW_DOT; | |||
@@ -1587,7 +1587,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
while (*p2 != ' ') p2++; | |||
utf8_in(&c_word2, p2+1); // first character of the next word; | |||
if (!iswalpha2(c_word2)) | |||
if (!iswalpha(c_word2)) | |||
ok = 0; | |||
if (ok != 0) { | |||
@@ -1957,7 +1957,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
// there is a list of character codes to be substituted with alternative codes | |||
if (iswupper2(c_lower = c)) { | |||
if (iswupper(c_lower = c)) { | |||
c_lower = towlower2(c); | |||
upper_case = 1; | |||
} | |||
@@ -1984,14 +1984,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
// there is a second character to be inserted | |||
// don't convert the case of the second character unless the next letter is also upper case | |||
c2 = new_c >> 16; | |||
if (upper_case && iswupper2(next_in)) | |||
c2 = towupper2(c2); | |||
if (upper_case && iswupper(next_in)) | |||
c2 = towupper(c2); | |||
*insert = c2; | |||
new_c &= 0xffff; | |||
} | |||
if (upper_case) | |||
new_c = towupper2(new_c); | |||
new_c = towupper(new_c); | |||
*wordflags |= FLAG_CHAR_REPLACED; | |||
return new_c; | |||
@@ -2046,7 +2046,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, | |||
case L('n', 'l'): | |||
// look for 'n and replace by a special character (unicode: schwa) | |||
if (!iswalpha2(prev_in)) { | |||
if (!iswalpha(prev_in)) { | |||
utf8_in(&next2, &ptr[1]); | |||
if ((c == '\'') && IsSpace(next2)) { | |||
@@ -2400,7 +2400,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - / (hyphenated words, URLs, etc) | |||
next_word_flags |= FLAG_NOSPACE; | |||
} else { | |||
if (iswupper2(c)) | |||
if (iswupper(c)) | |||
word_flags |= FLAG_FIRST_UPPER; | |||
if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) { | |||
@@ -2430,7 +2430,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
} | |||
} | |||
if (iswupper2(c)) { | |||
if (iswupper(c)) { | |||
c = towlower2(c); | |||
if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) { | |||
@@ -2440,7 +2440,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
syllable_marked = 1; | |||
} | |||
} else { | |||
if (iswlower2(prev_in)) { | |||
if (iswlower(prev_in)) { | |||
// lower case followed by upper case in a word | |||
if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) { | |||
// convert to lower case and continue | |||
@@ -2450,7 +2450,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
space_inserted = 1; | |||
prev_in_save = c; | |||
} | |||
} else if ((c != ' ') && iswupper2(prev_in) && iswlower2(next_in)) { | |||
} else if ((c != ' ') && iswupper(prev_in) && iswlower(next_in)) { | |||
int next2_in; | |||
utf8_in(&next2_in, &source[source_index + next_in_nbytes]); | |||
@@ -724,12 +724,8 @@ int IsDigit09(unsigned int c); | |||
int IsAlpha(unsigned int c); | |||
int IsVowel(Translator *tr, int c); | |||
int IsSuperscript(int letter); | |||
int iswalpha2(int c); | |||
int isspace2(unsigned int c); | |||
int iswlower2(int c); | |||
int iswupper2(int c); | |||
int towlower2(unsigned int c); | |||
int towupper2(unsigned int c); | |||
int towlower2(unsigned int c); // Supports Turkish I | |||
const char *GetTranslatedPhonemeString(int phoneme_mode); | |||
const char *WordToString2(unsigned int word); | |||
ALPHABET *AlphabetFromChar(int c); |