Browse Source

Replace the *2 wchar/wctype APIs with their standard versions. These will be fixed using compatibility headers.

master
Reece H. Dunn 8 years ago
parent
commit
5975f07095

+ 2
- 1
src/libespeak-ng/compiledict.c View File

#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <wctype.h>


#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
#include <espeak-ng/speak_lib.h> #include <espeak-ng/speak_lib.h>
ix = utf8_in(&c2, p); ix = utf8_in(&c2, p);
if (c2 == 0) if (c2 == 0)
break; break;
if (iswupper2(c2))
if (iswupper(c2))
utf8_out(towlower2(c2), p); utf8_out(towlower2(c2), p);
else else
all_upper_case = 0; all_upper_case = 0;

+ 6
- 5
src/libespeak-ng/dictionary.c View File

#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <wctype.h>
#include <wchar.h> #include <wchar.h>


#include <espeak-ng/espeak_ng.h> #include <espeak-ng/espeak_ng.h>
p += utf8_in(&c, p); p += utf8_in(&c, p);
if (use_tie != 0) { if (use_tie != 0) {
// look for non-inital alphabetic character, but not diacritic, superscript etc. // look for non-inital alphabetic character, but not diacritic, superscript etc.
if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha2(c))
if ((count > 0) && !(flags & (1 << (count-1))) && ((c < 0x2b0) || (c > 0x36f)) && iswalpha(c))
buf += utf8_out(use_tie, buf); buf += utf8_out(use_tie, buf);
} }
buf += utf8_out(c, buf); buf += utf8_out(c, buf);
break; break;
} }


if ((c != '\'') && !iswalpha2(c))
if ((c != '\'') && !iswalpha(c))
return 0; return 0;
} }


failed = 1; failed = 1;
break; break;
case RULE_NONALPHA: case RULE_NONALPHA:
if (!iswalpha2(letter_w)) {
if (!iswalpha(letter_w)) {
add_points = (21-distance_right); add_points = (21-distance_right);
post_ptr += letter_xbytes; post_ptr += letter_xbytes;
} else } else
failed = 1; failed = 1;
break; break;
case RULE_NONALPHA: case RULE_NONALPHA:
if (!iswalpha2(letter_w)) {
if (!iswalpha(letter_w)) {
add_points = (21-distance_right); add_points = (21-distance_right);
pre_ptr -= letter_xbytes; pre_ptr -= letter_xbytes;
} else } else


if (tr->letter_bits_offset > 0) { if (tr->letter_bits_offset > 0) {
// not a Latin alphabet, switch to the default Latin alphabet language // not a Latin alphabet, switch to the default Latin alphabet language
if ((letter <= 0x241) && iswalpha2(letter)) {
if ((letter <= 0x241) && iswalpha(letter)) {
sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language); sprintf(phonemes, "%c%s", phonSWITCH, tr->langopts.ascii_language);
return 0; return 0;
} }

+ 3
- 3
src/libespeak-ng/numbers.c View File



if (control & 2) { if (control & 2) {
// include CAPITAL information // include CAPITAL information
if (iswupper2(letter))
if (iswupper(letter))
Lookup(tr, "_cap", capital); Lookup(tr, "_cap", capital);
} }
letter = towlower2(letter); letter = towlower2(letter);
if (ph_buf[0] == 0) { if (ph_buf[0] == 0) {
speak_letter_number = 1; speak_letter_number = 1;
if (!(al_flags & AL_NO_SYMBOL)) { if (!(al_flags & AL_NO_SYMBOL)) {
if (iswalpha2(letter))
if (iswalpha(letter))
Lookup(translator, "_?A", ph_buf); Lookup(translator, "_?A", ph_buf);


if ((ph_buf[0] == 0) && !iswspace(letter)) if ((ph_buf[0] == 0) && !iswspace(letter))
if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' ')) if ((tr->langopts.numbers & NUM_NOPAUSE) && (next_char == ' '))
utf8_in(&next_char, p); utf8_in(&next_char, p);


if (!iswalpha2(next_char) && (thousands_exact == 0))
if (!iswalpha(next_char) && (thousands_exact == 0))
strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc. strcat(ph_out, str_pause); // don't add pause for 100s, 6th, etc.
} }



+ 11
- 86
src/libespeak-ng/readclause.c View File

0, 0 0, 0
}; };


// use internal data for iswalpha up to U+024F
// iswalpha() on Windows is unreliable (U+AA, U+BA).
int iswalpha2(int c)
{
if (c < 0x80)
return isalpha(c);
if ((c > 0x3040) && (c <= 0xa700))
return 1; // japanese, chinese characters
if (c > MAX_WALPHA)
return iswalpha(c);
return walpha_tab[c-0x80];
}

int iswlower2(int c)
{
if (c < 0x80)
return islower(c);
if (c > MAX_WALPHA)
return iswlower(c);
if (walpha_tab[c-0x80] == 0xff)
return 1;
return 0;
}

int iswupper2(int c)
{
int x;
if (c < 0x80)
return isupper(c);
if (c > MAX_WALPHA)
return iswupper(c);
if (((x = walpha_tab[c-0x80]) > 0) && (x < 0xfe))
return 1;
return 0;
}

int towlower2(unsigned int c) int towlower2(unsigned int c)
{ {
int x;
int ix;

// check for non-standard upper to lower case conversions // check for non-standard upper to lower case conversions
if (c == 'I') {
if (translator->langopts.dotless_i)
c = 0x131; // I -> ı
}

if (c < 0x80)
return tolower(c);
if (c == 'I' && translator->langopts.dotless_i)
return 0x131; // I -> ı


if (c > MAX_WALPHA)
return towlower(c);

if ((x = walpha_tab[c-0x80]) >= 0xfe)
return c; // this is not an upper case letter

if (x == 0xfd) {
// special cases, lookup translation table
for (ix = 0; wchar_tolower[ix] != 0; ix += 2) {
if (wchar_tolower[ix] == (int)c)
return wchar_tolower[ix+1];
}
}
return c + x; // convert to lower case
}

int towupper2(unsigned int c)
{
int ix;
if (c > MAX_WALPHA)
return towupper(c);

// check whether a previous character code is the upper-case equivalent of this character
if (towlower2(c-32) == (int)c)
return c-32; // yes, use it
if (towlower2(c-1) == (int)c)
return c-1;
for (ix = 0; wchar_toupper[ix] != 0; ix += 2) {
if (wchar_toupper[ix] == (int)c)
return wchar_toupper[ix+1];
}
return c; // no
return towlower(c);
} }


static int IsRomanU(unsigned int c) static int IsRomanU(unsigned int c)
while (!Eof() && (c1 != '>')) while (!Eof() && (c1 != '>'))
c1 = GetC(); c1 = GetC();
c2 = ' '; c2 = ' ';
} else if ((c2 == '/') || iswalpha2(c2)) {
} else if ((c2 == '/') || iswalpha(c2)) {
// check for space in the output buffer for embedded commands produced by the SSML tag // check for space in the output buffer for embedded commands produced by the SSML tag
if (ix > (n_buf - 20)) { if (ix > (n_buf - 20)) {
// Perhaps not enough room, end the clause before the SSML tag // Perhaps not enough room, end the clause before the SSML tag
} }
} }


if (iswupper2(c1)) {
if (iswupper(c1)) {
tr->clause_upper_count++; tr->clause_upper_count++;
if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper2(cprev)) {
if ((option_capitals == 2) && (sayas_mode == 0) && !iswupper(cprev)) {
char text_buf[40]; char text_buf[40];
char text_buf2[30]; char text_buf2[30];
if (LookupSpecial(tr, "_cap", text_buf2) != NULL) { if (LookupSpecial(tr, "_cap", text_buf2) != NULL) {
} }
} }
} }
} else if (iswalpha2(c1))
} else if (iswalpha(c1))
tr->clause_lower_count++; tr->clause_lower_count++;


if (option_phoneme_input) { if (option_phoneme_input) {
// i.e. is dot followed by an upper-case letter? // i.e. is dot followed by an upper-case letter?


if (!iswspace(c1)) { if (!iswspace(c1)) {
if (!IsAlpha(c1) || !iswlower2(c1)) {
if (!IsAlpha(c1) || !iswlower(c1)) {
UngetC(c2); UngetC(c2);
ungot_char2 = c1; ungot_char2 = c1;
buf[end_clause_index] = ' '; // delete the end-clause punctuation buf[end_clause_index] = ' '; // delete the end-clause punctuation
punct_data |= CLAUSE_DOT; punct_data |= CLAUSE_DOT;


if (nl_count == 0) { if (nl_count == 0) {
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower2(c_next)))) {
if ((c1 == ',') && (cprev == '.') && (tr->translator_name == L('h', 'u')) && iswdigit(cprev2) && (iswdigit(c_next) || (iswlower(c_next)))) {
// lang=hu, fix for ordinal numbers, eg: "december 2., szerda", ignore ',' after ordinal number // lang=hu, fix for ordinal numbers, eg: "december 2., szerda", ignore ',' after ordinal number
c1 = CHAR_COMMA_BREAK; c1 = CHAR_COMMA_BREAK;
is_end_clause = 0; is_end_clause = 0;
// dot after a number indicates an ordinal number // dot after a number indicates an ordinal number
if (!iswdigit(cprev)) if (!iswdigit(cprev))
is_end_clause = 0; // Roman number followed by dot is_end_clause = 0; // Roman number followed by dot
else if (iswlower2(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
else if (iswlower(c_next) || (c_next == '-')) // hyphen is needed for lang-hu (eg. 2.-kal)
is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag) is_end_clause = 0; // only if followed by lower-case, (or if there is a XML tag)
} else if (c_next == '\'') } else if (c_next == '\'')
is_end_clause = 0; // eg. u.s.a.'s is_end_clause = 0; // eg. u.s.a.'s
if (iswlower2(c_next)) {
if (iswlower(c_next)) {
// next word has no capital letter, this dot is probably from an abbreviation // next word has no capital letter, this dot is probably from an abbreviation
is_end_clause = 0; is_end_clause = 0;
} }

+ 14
- 14
src/libespeak-ng/translate.c View File

0 0
}; };


if (iswalpha2(c))
if (iswalpha(c))
return 1; return 1;


if (c < 0x300) if (c < 0x300)
int c; int c;
while ((*word != 0) && !isspace2(*word)) { while ((*word != 0) && !isspace2(*word)) {
word += utf8_in(&c, word); word += utf8_in(&c, word);
if (!iswupper2(c))
if (!iswupper(c))
return 0; return 0;
} }
return 1; return 1;
} }
} }


if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha2(first_char)) {
if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) { if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
// emphasize words which are in capitals // emphasize words which are in capitals
emphasize_allcaps = FLAG_EMPHASIZED; emphasize_allcaps = FLAG_EMPHASIZED;
tr->expect_past--; tr->expect_past--;
} }


if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha2(first_char) && (first_char != 'i')) {
if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
// English Specific !!!! // English Specific !!!!
// any single letter before a dot is an abbreviation, except 'I' // any single letter before a dot is an abbreviation, except 'I'
dictionary_flags[0] |= FLAG_ALLOW_DOT; dictionary_flags[0] |= FLAG_ALLOW_DOT;
while (*p2 != ' ') p2++; while (*p2 != ' ') p2++;


utf8_in(&c_word2, p2+1); // first character of the next word; utf8_in(&c_word2, p2+1); // first character of the next word;
if (!iswalpha2(c_word2))
if (!iswalpha(c_word2))
ok = 0; ok = 0;


if (ok != 0) { if (ok != 0) {


// there is a list of character codes to be substituted with alternative codes // there is a list of character codes to be substituted with alternative codes


if (iswupper2(c_lower = c)) {
if (iswupper(c_lower = c)) {
c_lower = towlower2(c); c_lower = towlower2(c);
upper_case = 1; upper_case = 1;
} }
// there is a second character to be inserted // there is a second character to be inserted
// don't convert the case of the second character unless the next letter is also upper case // don't convert the case of the second character unless the next letter is also upper case
c2 = new_c >> 16; c2 = new_c >> 16;
if (upper_case && iswupper2(next_in))
c2 = towupper2(c2);
if (upper_case && iswupper(next_in))
c2 = towupper(c2);
*insert = c2; *insert = c2;
new_c &= 0xffff; new_c &= 0xffff;
} }


if (upper_case) if (upper_case)
new_c = towupper2(new_c);
new_c = towupper(new_c);


*wordflags |= FLAG_CHAR_REPLACED; *wordflags |= FLAG_CHAR_REPLACED;
return new_c; return new_c;
case L('n', 'l'): case L('n', 'l'):
// look for 'n and replace by a special character (unicode: schwa) // look for 'n and replace by a special character (unicode: schwa)


if (!iswalpha2(prev_in)) {
if (!iswalpha(prev_in)) {
utf8_in(&next2, &ptr[1]); utf8_in(&next2, &ptr[1]);


if ((c == '\'') && IsSpace(next2)) { if ((c == '\'') && IsSpace(next2)) {
if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - / (hyphenated words, URLs, etc) if (!IsBracket(prev_out)) // ?? perhaps only set FLAG_NOSPACE for . - / (hyphenated words, URLs, etc)
next_word_flags |= FLAG_NOSPACE; next_word_flags |= FLAG_NOSPACE;
} else { } else {
if (iswupper2(c))
if (iswupper(c))
word_flags |= FLAG_FIRST_UPPER; word_flags |= FLAG_FIRST_UPPER;


if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) { if ((prev_out == ' ') && iswdigit(sbuf[ix-2]) && !iswdigit(prev_in)) {
} }
} }


if (iswupper2(c)) {
if (iswupper(c)) {
c = towlower2(c); c = towlower2(c);


if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) { if ((j = tr->langopts.param[LOPT_CAPS_IN_WORD]) > 0) {
syllable_marked = 1; syllable_marked = 1;
} }
} else { } else {
if (iswlower2(prev_in)) {
if (iswlower(prev_in)) {
// lower case followed by upper case in a word // lower case followed by upper case in a word
if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) { if (UpperCaseInWord(tr, &sbuf[ix], c) == 1) {
// convert to lower case and continue // convert to lower case and continue
space_inserted = 1; space_inserted = 1;
prev_in_save = c; prev_in_save = c;
} }
} else if ((c != ' ') && iswupper2(prev_in) && iswlower2(next_in)) {
} else if ((c != ' ') && iswupper(prev_in) && iswlower(next_in)) {
int next2_in; int next2_in;
utf8_in(&next2_in, &source[source_index + next_in_nbytes]); utf8_in(&next2_in, &source[source_index + next_in_nbytes]);



+ 1
- 5
src/libespeak-ng/translate.h View File

int IsAlpha(unsigned int c); int IsAlpha(unsigned int c);
int IsVowel(Translator *tr, int c); int IsVowel(Translator *tr, int c);
int IsSuperscript(int letter); int IsSuperscript(int letter);
int iswalpha2(int c);
int isspace2(unsigned int c); int isspace2(unsigned int c);
int iswlower2(int c);
int iswupper2(int c);
int towlower2(unsigned int c);
int towupper2(unsigned int c);
int towlower2(unsigned int c); // Supports Turkish I
const char *GetTranslatedPhonemeString(int phoneme_mode); const char *GetTranslatedPhonemeString(int phoneme_mode);
const char *WordToString2(unsigned int word); const char *WordToString2(unsigned int word);
ALPHABET *AlphabetFromChar(int c); ALPHABET *AlphabetFromChar(int c);

Loading…
Cancel
Save