git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@123 d46cf337-b52f-0410-862d-fd96e6ae7743master
const char *word2; | const char *word2; | ||||
unsigned char c; | unsigned char c; | ||||
int nbytes; | int nbytes; | ||||
int c2; | |||||
int len; | int len; | ||||
char word[N_WORD_BYTES]; | char word[N_WORD_BYTES]; | ||||
static char word_replacement[N_WORD_BYTES]; | static char word_replacement[N_WORD_BYTES]; | ||||
length = 0; | length = 0; | ||||
word2 = word1 = *wordptr; | word2 = word1 = *wordptr; | ||||
while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | |||||
while((word2[nbytes = utf8_nbytes(word2)]==' ') && (word2[nbytes+1]=='.')) | |||||
{ | { | ||||
// look for an abbreviation of the form a.b.c | // look for an abbreviation of the form a.b.c | ||||
// try removing the spaces between the dots and looking for a match | // try removing the spaces between the dots and looking for a match | ||||
if(length > 0) | if(length > 0) | ||||
{ | { | ||||
// found an abbreviation containing dots | // found an abbreviation containing dots | ||||
nbytes = utf8_in(&c2,word2,0); | |||||
nbytes = utf8_nbytes(word2); | |||||
memcpy(&word[length],word2,nbytes); | memcpy(&word[length],word2,nbytes); | ||||
word[length+nbytes] = 0; | word[length+nbytes] = 0; | ||||
found = LookupDict2(word, word2, ph_out, flags, end_flags, wtab); | found = LookupDict2(word, word2, ph_out, flags, end_flags, wtab); | ||||
if(found) | if(found) | ||||
{ | { | ||||
*flags = *flags & ~(7 << 5) | (length << 5); | |||||
// set the skip words flag | |||||
flags[0] |= FLAG_SKIPWORDS; | |||||
dictionary_skipwords = length; | |||||
return(1); | return(1); | ||||
} | } | ||||
} | } |
event_list[0].unique_identifier = unique_identifier; | event_list[0].unique_identifier = unique_identifier; | ||||
event_list[0].user_data = user_data; | event_list[0].user_data = user_data; | ||||
event_list[1].type = espeakEVENT_LIST_TERMINATED; | event_list[1].type = espeakEVENT_LIST_TERMINATED; | ||||
event_list[1].unique_identifier = unique_identifier; | |||||
event_list[1].user_data = user_data; | |||||
if (my_mode==AUDIO_OUTPUT_PLAYBACK) | if (my_mode==AUDIO_OUTPUT_PLAYBACK) | ||||
{ | { |
} // end of utf8_out | } // end of utf8_out | ||||
int utf8_nbytes(const char *buf) | |||||
{//============================= | |||||
// Returns the number of bytes for the first UTF-8 character in buf | |||||
unsigned char c = (unsigned char)buf[0]; | |||||
if(c < 0x80) | |||||
return(1); | |||||
if(c < 0xe0) | |||||
return(2); | |||||
if(c < 0xf0) | |||||
return(3); | |||||
return(4); | |||||
} | |||||
int utf8_in(int *c, const char *buf, int backwards) | int utf8_in(int *c, const char *buf, int backwards) | ||||
{//================================================ | {//================================================ | ||||
// Read a unicode characater from a UTF8 string | |||||
// Returns the number of UTF8 bytes used. | |||||
// backwards: set if we are moving backwards through the UTF8 string | |||||
int c1; | int c1; | ||||
int n_bytes; | int n_bytes; | ||||
int ix; | int ix; | ||||
// try an initial lookup in the dictionary list, we may find a pronunciation specified, or | // try an initial lookup in the dictionary list, we may find a pronunciation specified, or | ||||
// we may just find some flags | // we may just find some flags | ||||
if((option_sayas & 0xf0) == 0x10) | |||||
spell_word = 0; | |||||
if(option_sayas == SAYAS_KEY) | |||||
{ | |||||
if(word_length == 1) | |||||
spell_word = 4; | |||||
} | |||||
if(option_sayas & 0x10) | |||||
{ | { | ||||
// SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR | // SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR | ||||
spell_word = option_sayas & 0xf; // 2,3,4 | spell_word = option_sayas & 0xf; // 2,3,4 | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
spell_word = 0; | |||||
found = LookupDictList(&word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word | found = LookupDictList(&word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word | ||||
// if textmode, LookupDictList() replaces word1 by the new text and returns found=0 | // if textmode, LookupDictList() replaces word1 by the new text and returns found=0 | ||||
} | } | ||||
} | } | ||||
if(option_sayas2 == SAYAS_KEY) | |||||
{ | |||||
if(((c == '_') || (c == '-')) && IsAlpha(prev_in)) | |||||
{ | |||||
c = ' '; | |||||
} | |||||
c = towlower(c); | |||||
} | |||||
if(phoneme_mode) | if(phoneme_mode) | ||||
{ | { | ||||
all_upper_case = FLAG_PHONEMES; | all_upper_case = FLAG_PHONEMES; | ||||
} | } | ||||
} | } | ||||
else | else | ||||
if((option_sayas2 & 0xf0) != 0x10) | |||||
if((option_sayas2 & 0x30) == 0) | |||||
{ | { | ||||
// speak as words | // speak as words | ||||
#define SAYAS_CHARS 0x12 | #define SAYAS_CHARS 0x12 | ||||
#define SAYAS_GLYPHS 0x13 | #define SAYAS_GLYPHS 0x13 | ||||
#define SAYAS_SINGLE_CHARS 0x14 | #define SAYAS_SINGLE_CHARS 0x14 | ||||
#define SAYAS_KEY 0x20 | |||||
#define SAYAS_DIGITS1 0x21 | |||||
#define SAYAS_DIGITS 0x30 // + number of digits | |||||
#define SAYAS_KEY 0x24 | |||||
#define SAYAS_DIGITS 0x40 // + number of digits | |||||
#define SAYAS_DIGITS1 0xc1 | |||||
// Rule: | // Rule: | ||||
// [4] [match] [1 pre] [2 post] [3 phonemes] 0 | // [4] [match] [1 pre] [2 post] [3 phonemes] 0 | ||||
int TransposeAlphabet(char *text, int offset, int min, int max); | int TransposeAlphabet(char *text, int offset, int min, int max); | ||||
int utf8_in(int *c, const char *buf, int backwards); | int utf8_in(int *c, const char *buf, int backwards); | ||||
int utf8_out(unsigned int c, char *buf); | int utf8_out(unsigned int c, char *buf); | ||||
int utf8_nbytes(const char *buf); | |||||
int lookupwchar(const unsigned short *list,int c); | int lookupwchar(const unsigned short *list,int c); | ||||
int Eof(void); | int Eof(void); | ||||
char *strchr_w(const char *s, int c); | char *strchr_w(const char *s, int c); |