| @@ -26,6 +26,7 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <string.h> | |||
| #include <wctype.h> | |||
| #include <espeak-ng/espeak_ng.h> | |||
| #include <espeak-ng/speak_lib.h> | |||
| @@ -585,7 +586,7 @@ static int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| ix = utf8_in(&c2, p); | |||
| if (c2 == 0) | |||
| break; | |||
| if (iswupper2(c2)) | |||
| if (iswupper(c2)) | |||
| utf8_out(towlower2(c2), p); | |||
| else | |||
| all_upper_case = 0; | |||
| @@ -24,6 +24,7 @@ | |||
| #include <stdio.h> | |||
| #include <stdlib.h> | |||
| #include <string.h> | |||
| #include <wctype.h> | |||
| #include <wchar.h> | |||
| #include <espeak-ng/espeak_ng.h> | |||
| @@ -614,7 +615,7 @@ const char *GetTranslatedPhonemeString(int phoneme_mode) | |||
| p += utf8_in(&c, p); | |||
| if (use_tie != 0) { | |||
| // look for non-inital alphabetic character, but not diacritic, superscript etc. | |||
| if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha2(c)) | |||
| if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha(c)) | |||
| buf += utf8_out(use_tie, buf); | |||
| } | |||
| buf += utf8_out(c, buf); | |||
| @@ -832,7 +833,7 @@ int Unpronouncable(Translator *tr, char *word, int posn) | |||
| break; | |||
| } | |||
| if ((c != '\'') && !iswalpha2(c)) | |||
| if ((c != '\'') && !iswalpha(c)) | |||
| return 0; | |||
| } | |||
| @@ -1787,7 +1788,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_ | |||
| failed = 1; | |||
| break; | |||
| case RULE_NONALPHA: | |||
| if (!iswalpha2(letter_w)) { | |||
| if (!iswalpha(letter_w)) { | |||
| add_points = (21-distance_right); | |||
| post_ptr += letter_xbytes; | |||
| } else | |||
| @@ -1996,7 +1997,7 @@ static void MatchRule(Translator *tr, char *word[], char *word_start, int group_ | |||
| failed = 1; | |||
| break; | |||
| case RULE_NONALPHA: | |||
| if (!iswalpha2(letter_w)) { | |||
| if (!iswalpha(letter_w)) { | |||
| add_points = (21-distance_right); | |||
| pre_ptr -= letter_xbytes; | |||
| } else | |||
| @@ -2300,7 +2301,7 @@ int TranslateRules(Translator *tr, char *p_start, char *phonemes, int ph_size, c | |||
| if (tr->letter_bits_offset > 0) { | |||
| // not a Latin alphabet, switch to the default Latin alphabet language | |||
| if ((letter <= 0x241) && iswalpha2(letter)) { | |||
| if ((letter <= 0x241) && iswalpha(letter)) { | |||
| sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language); | |||
| return 0; | |||
| } | |||
| @@ -701,7 +701,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control) | |||
| if (control & 2) { | |||
| // include CAPITAL information | |||
| if (iswupper2(letter)) | |||
| if (iswupper(letter)) | |||
| Lookup(tr, "_cap", capital); | |||
| } | |||
| letter = towlower2(letter); | |||
| @@ -845,7 +845,7 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control) | |||
| if (ph_buf[0] == 0) { | |||
| speak_letter_number = 1; | |||
| if (!(al_flags & AL_NO_SYMBOL)) { | |||
| if (iswalpha2(letter)) | |||
| if (iswalpha(letter)) | |||
| Lookup(translator, "_?A", ph_buf); | |||
| if ((ph_buf[0] == 0) && !iswspace(letter)) | |||
| @@ -2055,7 +2055,7 @@ static int TranslateNumber_1(Translator *tr, char *word, char *ph_out, unsigned | |||
| if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' ')) | |||
| utf8_in(&next_char, p); | |||
| if (!iswalpha2(next_char) && (thousands_exact == 0)) | |||
| if (!iswalpha(next_char) && (thousands_exact == 0)) | |||
| strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc. | |||
| } | |||
| @@ -289,88 +289,13 @@ static const short wchar_toupper[] = { | |||
| 0, 0 | |||
| }; | |||
| // use internal data for iswalpha up to U+024F | |||
| // iswalpha() on Windows is unreliable (U+AA, U+BA). | |||
| int iswalpha2(int c) | |||
| { | |||
| if (c < 0x80) | |||
| return isalpha(c); | |||
| if ((c > 0x3040) && (c <= 0xa700)) | |||
| return 1; // japanese, chinese characters | |||
| if (c > MAX_WALPHA) | |||
| return iswalpha(c); | |||
| return walpha_tab[c-0x80]; | |||
| } | |||
| int iswlower2(int c) | |||
| { | |||
| if (c < 0x80) | |||
| return islower(c); | |||
| if (c > MAX_WALPHA) | |||
| return iswlower(c); | |||
| if (walpha_tab[c-0x80] == 0xff) | |||
| return 1; | |||
| return 0; | |||
| } | |||
| int iswupper2(int c) | |||
| { | |||
| int x; | |||
| if (c < 0x80) | |||
| return isupper(c); | |||
| if (c > MAX_WALPHA) | |||
| return iswupper(c); | |||
| if (((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe)) | |||
| return 1; | |||
| return 0; | |||
| } | |||
| int towlower2(unsigned int c) | |||
| { | |||
| int x; | |||
| int ix; | |||
| // check for non-standard upper to lower case conversions | |||
| if (c == 'I') { | |||
| if (translator->langopts.dotless_i) | |||
| c = 0x131; // I -> ı | |||
| } | |||
| if (c < 0x80) | |||
| return tolower(c); | |||
| if (c == 'I' && translator->langopts.dotless_i) | |||
| return 0x131; // I -> ı | |||
| if (c > MAX_WALPHA) | |||
| return towlower(c); | |||
| if ((x = walpha_tab[c-0x80]) >= 0xfe) | |||
| return c; // this is not an upper case letter | |||
| if (x == 0xfd) { | |||
| // special cases, lookup translation table | |||
| for (ix = 0; wchar_tolower[ix] != 0; ix += 2) { | |||
| if (wchar_tolower[ix] == (int)c) | |||
| return wchar_tolower[ix+1]; | |||
| } | |||
| } | |||
| return c + x; // convert to lower case | |||
| } | |||
| int towupper2(unsigned int c) | |||
| { | |||
| int ix; | |||
| if (c > MAX_WALPHA) | |||
| return towupper(c); | |||
| // check whether a previous character code is the upper-case equivalent of this character | |||
| if (towlower2(c-32) == (int)c) | |||
| return c-32; // yes, use it | |||
| if (towlower2(c-1) == (int)c) | |||
| return c-1; | |||
| for (ix = 0; wchar_toupper[ix] != 0; ix += 2) { | |||
| if (wchar_toupper[ix] == (int)c) | |||
| return wchar_toupper[ix+1]; | |||
| } | |||
| return c; // no | |||
| return towlower(c); | |||
| } | |||
| static int IsRomanU(unsigned int c) | |||
| @@ -2015,7 +1940,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| while (!Eof() && (c1 != '>')) | |||
| c1 = GetC(); | |||
| c2 = ' '; | |||
| } else if ((c2 == '/') || iswalpha2(c2)) { | |||
| } else if ((c2 == '/') || iswalpha(c2)) { | |||
| // check for space in the output buffer for embedded commands produced by the SSML tag | |||
| if (ix > (n_buf - 20)) { | |||
| // Perhaps not enough room, end the clause before the SSML tag | |||
| @@ -2167,9 +2092,9 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| } | |||
| } | |||
| if (iswupper2(c1)) { | |||
| if (iswupper(c1)) { | |||
| tr->clause_upper_count++; | |||
| if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev)) { | |||
| if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev)) { | |||
| char text_buf[40]; | |||
| char text_buf2[30]; | |||
| if (LookupSpecial(tr, "_cap", text_buf2) != NULL) { | |||
| @@ -2181,7 +2106,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| } | |||
| } | |||
| } | |||
| } else if (iswalpha2(c1)) | |||
| } else if (iswalpha(c1)) | |||
| tr->clause_lower_count++; | |||
| if (option_phoneme_input) { | |||
| @@ -2238,7 +2163,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| // i.e. is dot followed by an upper-case letter? | |||
| if (!iswspace(c1)) { | |||
| if (!IsAlpha(c1) || !iswlower2(c1)) { | |||
| if (!IsAlpha(c1) || !iswlower(c1)) { | |||
| UngetC(c2); | |||
| ungot_char2 = c1; | |||
| buf[end_clause_index] = ' '; // delete the end-clause punctuation | |||
| @@ -2320,7 +2245,7 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| punct_data |= CLAUSE_DOT; | |||
| if (nl_count == 0) { | |||
| if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next)))) { | |||
| if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) { | |||
| // lang=hu, fix for ordinal numbers, eg: "december 2., szerda", ignore ',' after ordinal number | |||
| c1 = CHAR_COMMA_BREAK; | |||
| is_end_clause = 0; | |||
| @@ -2332,11 +2257,11 @@ int ReadClause(Translator *tr, FILE *f_in, char *buf, short *charix, int *charix | |||
| // dot after a number indicates an ordinal number | |||
| if (!iswdigit(cprev)) | |||
| is_end_clause = 0; // Roman number followed by dot | |||
| else if (iswlower2(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | |||
| else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal) | |||
| is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag) | |||
| } else if (c_next == '\'') | |||
| is_end_clause = 0; // eg. u.s.a.'s | |||
| if (iswlower2(c_next)) { | |||
| if (iswlower(c_next)) { | |||
| // next word has no capital letter, this dot is probably from an abbreviation | |||
| is_end_clause = 0; | |||
| } | |||
| @@ -363,7 +363,7 @@ int IsAlpha(unsigned int c) | |||
| 0 | |||
| }; | |||
| if (iswalpha2(c)) | |||
| if (iswalpha(c)) | |||
| return 1; | |||
| if (c < 0x300) | |||
| @@ -610,7 +610,7 @@ int IsAllUpper(const char *word) | |||
| int c; | |||
| while ((*word != 0) && !isspace2(*word)) { | |||
| word += utf8_in(&c, word); | |||
| if (!iswupper2(c)) | |||
| if (!iswupper(c)) | |||
| return 0; | |||
| } | |||
| return 1; | |||
| @@ -904,7 +904,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
| } | |||
| } | |||
| if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha2(first_char)) { | |||
| if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) { | |||
| if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) { | |||
| // emphasize words which are in capitals | |||
| emphasize_allcaps = FLAG_EMPHASIZED; | |||
| @@ -1331,7 +1331,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
| tr->expect_past--; | |||
| } | |||
| if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha2(first_char) && (first_char != 'i')) { | |||
| if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) { | |||
| // English Specific !!!! | |||
| // any single letter before a dot is an abbreviation, except 'I' | |||
| dictionary_flags[0] |= FLAG_ALLOW_DOT; | |||
| @@ -1587,7 +1587,7 @@ static int TranslateWord2(Translator *tr, char *word, WORD_TAB *wtab, int pre_pa | |||
| while (*p2 != ' ') p2++; | |||
| utf8_in(&c_word2, p2+1); // first character of the next word; | |||
| if (!iswalpha2(c_word2)) | |||
| if (!iswalpha(c_word2)) | |||
| ok = 0; | |||
| if (ok != 0) { | |||
| @@ -1957,7 +1957,7 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
| // there is a list of character codes to be substituted with alternative codes | |||
| if (iswupper2(c_lower = c)) { | |||
| if (iswupper(c_lower = c)) { | |||
| c_lower = towlower2(c); | |||
| upper_case = 1; | |||
| } | |||
| @@ -1984,14 +1984,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
| // there is a second character to be inserted | |||
| // don't convert the case of the second character unless the next letter is also upper case | |||
| c2 = new_c >> 16; | |||
| if (upper_case && iswupper2(next_in)) | |||
| c2 = towupper2(c2); | |||
| if (upper_case && iswupper(next_in)) | |||
| c2 = towupper(c2); | |||
| *insert = c2; | |||
| new_c &= 0xffff; | |||
| } | |||
| if (upper_case) | |||
| new_c = towupper2(new_c); | |||
| new_c = towupper(new_c); | |||
| *wordflags |= FLAG_CHAR_REPLACED; | |||
| return new_c; | |||
| @@ -2046,7 +2046,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, | |||
| case L('n', 'l'): | |||
| // look for 'n and replace by a special character (unicode: schwa) | |||
| if (!iswalpha2(prev_in)) { | |||
| if (!iswalpha(prev_in)) { | |||
| utf8_in(&next2, &ptr[1]); | |||
| if ((c == '\'') && IsSpace(next2)) { | |||
| @@ -2400,7 +2400,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
| if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - / (hyphenated words, URLs, etc) | |||
| next_word_flags |= FLAG_NOSPACE; | |||
| } else { | |||
| if (iswupper2(c)) | |||
| if (iswupper(c)) | |||
| word_flags |= FLAG_FIRST_UPPER; | |||
| if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) { | |||
| @@ -2430,7 +2430,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
| } | |||
| } | |||
| if (iswupper2(c)) { | |||
| if (iswupper(c)) { | |||
| c = towlower2(c); | |||
| if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) { | |||
| @@ -2440,7 +2440,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
| syllable_marked = 1; | |||
| } | |||
| } else { | |||
| if (iswlower2(prev_in)) { | |||
| if (iswlower(prev_in)) { | |||
| // lower case followed by upper case in a word | |||
| if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) { | |||
| // convert to lower case and continue | |||
| @@ -2450,7 +2450,7 @@ void *TranslateClause(Translator *tr, FILE *f_text, const void *vp_input, int *t | |||
| space_inserted = 1; | |||
| prev_in_save = c; | |||
| } | |||
| } else if ((c != ' ') && iswupper2(prev_in) && iswlower2(next_in)) { | |||
| } else if ((c != ' ') && iswupper(prev_in) && iswlower(next_in)) { | |||
| int next2_in; | |||
| utf8_in(&next2_in, &source[source_index + next_in_nbytes]); | |||
| @@ -724,12 +724,8 @@ int IsDigit09(unsigned int c); | |||
| int IsAlpha(unsigned int c); | |||
| int IsVowel(Translator *tr, int c); | |||
| int IsSuperscript(int letter); | |||
| int iswalpha2(int c); | |||
| int isspace2(unsigned int c); | |||
| int iswlower2(int c); | |||
| int iswupper2(int c); | |||
| int towlower2(unsigned int c); | |||
| int towupper2(unsigned int c); | |||
| int towlower2(unsigned int c); // Supports Turkish I | |||
| const char *GetTranslatedPhonemeString(int phoneme_mode); | |||
| const char *WordToString2(unsigned int word); | |||
| ALPHABET *AlphabetFromChar(int c); | |||