| #include <stdlib.h> | #include <stdlib.h> | ||||
| #include <string.h> | #include <string.h> | ||||
| #include <sys/stat.h> | #include <sys/stat.h> | ||||
| #include <wctype.h> | |||||
| #include <espeak-ng/espeak_ng.h> | #include <espeak-ng/espeak_ng.h> | ||||
| #include <espeak-ng/speak_lib.h> | #include <espeak-ng/speak_lib.h> | ||||
| } | } | ||||
| int IsAlpha(unsigned int c) | |||||
| { | |||||
| // Replacement for iswalph() which also checks for some in-word symbols | |||||
| static const unsigned short extra_indic_alphas[] = { | |||||
| 0xa70, 0xa71, // Gurmukhi: tippi, addak | |||||
| 0 | |||||
| }; | |||||
| if (iswalpha(c)) | |||||
| return 1; | |||||
| if (c < 0x300) | |||||
| return 0; | |||||
| if ((c >= 0x901) && (c <= 0xdf7)) { | |||||
| // Indic scripts: Devanagari, Tamil, etc | |||||
| if ((c & 0x7f) < 0x64) | |||||
| return 1; | |||||
| if (lookupwchar(extra_indic_alphas, c) != 0) | |||||
| return 1; | |||||
| if ((c >= 0xd7a) && (c <= 0xd7f)) | |||||
| return 1; // malaytalam chillu characters | |||||
| return 0; | |||||
| } | |||||
| if ((c >= 0x5b0) && (c <= 0x5c2)) | |||||
| return 1; // Hebrew vowel marks | |||||
| if (c == 0x0605) | |||||
| return 1; | |||||
| if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||||
| return 1; // arabic vowel marks | |||||
| if ((c >= 0x300) && (c <= 0x36f)) | |||||
| return 1; // combining accents | |||||
| if ((c >= 0xf40) && (c <= 0xfbc)) | |||||
| return 1; // tibetan | |||||
| if ((c >= 0x1100) && (c <= 0x11ff)) | |||||
| return 1; // Korean jamo | |||||
| if ((c >= 0x2800) && (c <= 0x28ff)) | |||||
| return 1; // braille | |||||
| if ((c > 0x3040) && (c <= 0xa700)) | |||||
| return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||||
| return 0; | |||||
| } | |||||
| // brackets, also 0x2014 to 0x021f which don't need to be in this list | |||||
| static const unsigned short brackets[] = { | |||||
| '(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||||
| 0xab, 0xbb, // double angle brackets | |||||
| 0x300a, 0x300b, // double angle brackets (ideograph) | |||||
| 0xe000+'<', // private usage area | |||||
| 0 | |||||
| }; | |||||
| int IsBracket(int c) | |||||
| { | |||||
| if ((c >= 0x2014) && (c <= 0x201f)) | |||||
| return 1; | |||||
| return lookupwchar(brackets, c); | |||||
| } | |||||
| int IsDigit09(unsigned int c) | |||||
| { | |||||
| if ((c >= '0') && (c <= '9')) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int IsDigit(unsigned int c) | |||||
| { | |||||
| if (iswdigit(c)) | |||||
| return 1; | |||||
| if ((c >= 0x966) && (c <= 0x96f)) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int IsSpace(unsigned int c) | |||||
| { | |||||
| if (c == 0) | |||||
| return 0; | |||||
| if ((c >= 0x2500) && (c < 0x25a0)) | |||||
| return 1; // box drawing characters | |||||
| if ((c >= 0xfff9) && (c <= 0xffff)) | |||||
| return 1; // unicode specials | |||||
| return iswspace(c); | |||||
| } | |||||
| int isspace2(unsigned int c) | |||||
| { | |||||
| // can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||||
| int c2; | |||||
| if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||||
| return 0; | |||||
| return 1; | |||||
| } | |||||
| int is_str_totally_null(const char* str, int size) { | int is_str_totally_null(const char* str, int size) { | ||||
| // Tests if all bytes of str are null up to size | // Tests if all bytes of str are null up to size | ||||
| // This should never be reimplemented with integers, because | // This should never be reimplemented with integers, because |
| extern ESPEAK_NG_API int GetFileLength(const char *filename); | extern ESPEAK_NG_API int GetFileLength(const char *filename); | ||||
| extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | ||||
| int IsAlpha(unsigned int c); | |||||
| int IsBracket(int c); | |||||
| int IsDigit(unsigned int c); | |||||
| int IsDigit09(unsigned int c); | |||||
| int IsSpace(unsigned int c); | |||||
| int isspace2(unsigned int c); | |||||
| int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null | int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null | ||||
| int Read4Bytes(FILE *f); | int Read4Bytes(FILE *f); | ||||
| int towlower2(unsigned int c, Translator *translator); // Supports Turkish I | int towlower2(unsigned int c, Translator *translator); // Supports Turkish I |
| #include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | #include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | ||||
| #include "speech.h" // for path_home | #include "speech.h" // for path_home | ||||
| #include "synthesize.h" // for Write4Bytes | #include "synthesize.h" // for Write4Bytes | ||||
| #include "translate.h" // for isspace2, IsDigit09, utf8_in, utf8_out | |||||
| static FILE *f_log = NULL; | static FILE *f_log = NULL; | ||||
| #include "readclause.h" // for WordToString2 | #include "readclause.h" // for WordToString2 | ||||
| #include "synthdata.h" // for SelectPhonemeTable | #include "synthdata.h" // for SelectPhonemeTable | ||||
| #include "synthesize.h" // for phoneme_tab | #include "synthesize.h" // for phoneme_tab | ||||
| #include "translate.h" // for Translator, LANGUAGE_OPTIONS, IsDigit09, WOR... | |||||
| #include "translate.h" // for Translator, LANGUAGE_OPTIONS, WOR... | |||||
| #include "voice.h" // for voice, voice_t | #include "voice.h" // for voice, voice_t | ||||
| #define M_LIGATURE 0x8000 | #define M_LIGATURE 0x8000 |
| #include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | #include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | ||||
| #include "soundicon.h" // for LoadSoundFile2 | #include "soundicon.h" // for LoadSoundFile2 | ||||
| #include "synthesize.h" // for SPEED_FACTORS, speed | #include "synthesize.h" // for SPEED_FACTORS, speed | ||||
| #include "translate.h" // for CTRL_EMBEDDED, IsDigit09, utf8_out | |||||
| #include "translate.h" // for CTRL_EMBEDDED | |||||
| #include "voice.h" // for SelectVoice, SelectVoiceByName | #include "voice.h" // for SelectVoice, SelectVoiceByName | ||||
| #include "speech.h" // for MAKE_MEM_UNDEFINED | #include "speech.h" // for MAKE_MEM_UNDEFINED | ||||
| int n_replace_phonemes; | int n_replace_phonemes; | ||||
| REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | ||||
| // brackets, also 0x2014 to 0x021f which don't need to be in this list | |||||
| static const unsigned short brackets[] = { | |||||
| '(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||||
| 0xab, 0xbb, // double angle brackets | |||||
| 0x300a, 0x300b, // double angle brackets (ideograph) | |||||
| 0xe000+'<', // private usage area | |||||
| 0 | |||||
| }; | |||||
| // other characters which break a word, but don't produce a pause | // other characters which break a word, but don't produce a pause | ||||
| static const unsigned short breaks[] = { '_', 0 }; | static const unsigned short breaks[] = { '_', 0 }; | ||||
| int IsAlpha(unsigned int c) | |||||
| { | |||||
| // Replacement for iswalph() which also checks for some in-word symbols | |||||
| static const unsigned short extra_indic_alphas[] = { | |||||
| 0xa70, 0xa71, // Gurmukhi: tippi, addak | |||||
| 0 | |||||
| }; | |||||
| if (iswalpha(c)) | |||||
| return 1; | |||||
| if (c < 0x300) | |||||
| return 0; | |||||
| if ((c >= 0x901) && (c <= 0xdf7)) { | |||||
| // Indic scripts: Devanagari, Tamil, etc | |||||
| if ((c & 0x7f) < 0x64) | |||||
| return 1; | |||||
| if (lookupwchar(extra_indic_alphas, c) != 0) | |||||
| return 1; | |||||
| if ((c >= 0xd7a) && (c <= 0xd7f)) | |||||
| return 1; // malaytalam chillu characters | |||||
| return 0; | |||||
| } | |||||
| if ((c >= 0x5b0) && (c <= 0x5c2)) | |||||
| return 1; // Hebrew vowel marks | |||||
| if (c == 0x0605) | |||||
| return 1; | |||||
| if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||||
| return 1; // arabic vowel marks | |||||
| if ((c >= 0x300) && (c <= 0x36f)) | |||||
| return 1; // combining accents | |||||
| if ((c >= 0xf40) && (c <= 0xfbc)) | |||||
| return 1; // tibetan | |||||
| if ((c >= 0x1100) && (c <= 0x11ff)) | |||||
| return 1; // Korean jamo | |||||
| if ((c >= 0x2800) && (c <= 0x28ff)) | |||||
| return 1; // braille | |||||
| if ((c > 0x3040) && (c <= 0xa700)) | |||||
| return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||||
| return 0; | |||||
| } | |||||
| int IsDigit09(unsigned int c) | |||||
| { | |||||
| if ((c >= '0') && (c <= '9')) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| int IsDigit(unsigned int c) | |||||
| { | |||||
| if (iswdigit(c)) | |||||
| return 1; | |||||
| if ((c >= 0x966) && (c <= 0x96f)) | |||||
| return 1; | |||||
| return 0; | |||||
| } | |||||
| static int IsSpace(unsigned int c) | |||||
| { | |||||
| if (c == 0) | |||||
| return 0; | |||||
| if ((c >= 0x2500) && (c < 0x25a0)) | |||||
| return 1; // box drawing characters | |||||
| if ((c >= 0xfff9) && (c <= 0xffff)) | |||||
| return 1; // unicode specials | |||||
| return iswspace(c); | |||||
| } | |||||
| int isspace2(unsigned int c) | |||||
| { | |||||
| // can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||||
| int c2; | |||||
| if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||||
| return 0; | |||||
| return 1; | |||||
| } | |||||
| void DeleteTranslator(Translator *tr) | void DeleteTranslator(Translator *tr) | ||||
| { | { | ||||
| if (!tr) return; | if (!tr) return; | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| int IsBracket(int c) | |||||
| { | |||||
| if ((c >= 0x2014) && (c <= 0x201f)) | |||||
| return 1; | |||||
| return lookupwchar(brackets, c); | |||||
| } | |||||
| char *strchr_w(const char *s, int c) | char *strchr_w(const char *s, int c) | ||||
| { | { | ||||
| // return NULL for any non-ascii character | // return NULL for any non-ascii character |
| int lookupwchar(const unsigned short *list, int c); | int lookupwchar(const unsigned short *list, int c); | ||||
| char *strchr_w(const char *s, int c); | char *strchr_w(const char *s, int c); | ||||
| int IsBracket(int c); | |||||
| void InitNamedata(void); | void InitNamedata(void); | ||||
| void InitText(int flags); | void InitText(int flags); | ||||
| void InitText2(void); | void InitText2(void); | ||||
| int IsDigit(unsigned int c); | |||||
| int IsDigit09(unsigned int c); | |||||
| int IsAlpha(unsigned int c); | |||||
| int isspace2(unsigned int c); | |||||
| ALPHABET *AlphabetFromChar(int c); | ALPHABET *AlphabetFromChar(int c); | ||||
| Translator *SelectTranslator(const char *name); | Translator *SelectTranslator(const char *name); |