@@ -28,6 +28,7 @@ | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <sys/stat.h> | |||
#include <wctype.h> | |||
#include <espeak-ng/espeak_ng.h> | |||
#include <espeak-ng/speak_lib.h> | |||
@@ -168,6 +169,115 @@ int utf8_in2(int *c, const char *buf, int backwards) | |||
} | |||
int IsAlpha(unsigned int c) | |||
{ | |||
// Replacement for iswalph() which also checks for some in-word symbols | |||
static const unsigned short extra_indic_alphas[] = { | |||
0xa70, 0xa71, // Gurmukhi: tippi, addak | |||
0 | |||
}; | |||
if (iswalpha(c)) | |||
return 1; | |||
if (c < 0x300) | |||
return 0; | |||
if ((c >= 0x901) && (c <= 0xdf7)) { | |||
// Indic scripts: Devanagari, Tamil, etc | |||
if ((c & 0x7f) < 0x64) | |||
return 1; | |||
if (lookupwchar(extra_indic_alphas, c) != 0) | |||
return 1; | |||
if ((c >= 0xd7a) && (c <= 0xd7f)) | |||
return 1; // malaytalam chillu characters | |||
return 0; | |||
} | |||
if ((c >= 0x5b0) && (c <= 0x5c2)) | |||
return 1; // Hebrew vowel marks | |||
if (c == 0x0605) | |||
return 1; | |||
if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||
return 1; // arabic vowel marks | |||
if ((c >= 0x300) && (c <= 0x36f)) | |||
return 1; // combining accents | |||
if ((c >= 0xf40) && (c <= 0xfbc)) | |||
return 1; // tibetan | |||
if ((c >= 0x1100) && (c <= 0x11ff)) | |||
return 1; // Korean jamo | |||
if ((c >= 0x2800) && (c <= 0x28ff)) | |||
return 1; // braille | |||
if ((c > 0x3040) && (c <= 0xa700)) | |||
return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||
return 0; | |||
} | |||
// brackets, also 0x2014 to 0x021f which don't need to be in this list | |||
static const unsigned short brackets[] = { | |||
'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||
0xab, 0xbb, // double angle brackets | |||
0x300a, 0x300b, // double angle brackets (ideograph) | |||
0xe000+'<', // private usage area | |||
0 | |||
}; | |||
int IsBracket(int c) | |||
{ | |||
if ((c >= 0x2014) && (c <= 0x201f)) | |||
return 1; | |||
return lookupwchar(brackets, c); | |||
} | |||
int IsDigit09(unsigned int c) | |||
{ | |||
if ((c >= '0') && (c <= '9')) | |||
return 1; | |||
return 0; | |||
} | |||
int IsDigit(unsigned int c) | |||
{ | |||
if (iswdigit(c)) | |||
return 1; | |||
if ((c >= 0x966) && (c <= 0x96f)) | |||
return 1; | |||
return 0; | |||
} | |||
int IsSpace(unsigned int c) | |||
{ | |||
if (c == 0) | |||
return 0; | |||
if ((c >= 0x2500) && (c < 0x25a0)) | |||
return 1; // box drawing characters | |||
if ((c >= 0xfff9) && (c <= 0xffff)) | |||
return 1; // unicode specials | |||
return iswspace(c); | |||
} | |||
int isspace2(unsigned int c) | |||
{ | |||
// can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||
int c2; | |||
if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||
return 0; | |||
return 1; | |||
} | |||
int is_str_totally_null(const char* str, int size) { | |||
// Tests if all bytes of str are null up to size | |||
// This should never be reimplemented with integers, because |
@@ -26,6 +26,12 @@ | |||
extern ESPEAK_NG_API int GetFileLength(const char *filename); | |||
extern ESPEAK_NG_API void strncpy0(char *to, const char *from, int size); | |||
int IsAlpha(unsigned int c); | |||
int IsBracket(int c); | |||
int IsDigit(unsigned int c); | |||
int IsDigit09(unsigned int c); | |||
int IsSpace(unsigned int c); | |||
int isspace2(unsigned int c); | |||
int is_str_totally_null(const char* str, int size); // Tests if all bytes of str up to size are null | |||
int Read4Bytes(FILE *f); | |||
int towlower2(unsigned int c, Translator *translator); // Supports Turkish I |
@@ -41,7 +41,6 @@ | |||
#include "phoneme.h" // for PHONEME_TAB_LIST, phonSWITCH, phone... | |||
#include "speech.h" // for path_home | |||
#include "synthesize.h" // for Write4Bytes | |||
#include "translate.h" // for isspace2, IsDigit09, utf8_in, utf8_out | |||
static FILE *f_log = NULL; | |||
@@ -38,7 +38,7 @@ | |||
#include "readclause.h" // for WordToString2 | |||
#include "synthdata.h" // for SelectPhonemeTable | |||
#include "synthesize.h" // for phoneme_tab | |||
#include "translate.h" // for Translator, LANGUAGE_OPTIONS, IsDigit09, WOR... | |||
#include "translate.h" // for Translator, LANGUAGE_OPTIONS, WOR... | |||
#include "voice.h" // for voice, voice_t | |||
#define M_LIGATURE 0x8000 |
@@ -45,7 +45,7 @@ | |||
#include "readclause.h" // for PARAM_STACK, param_stack, AddNameData | |||
#include "soundicon.h" // for LoadSoundFile2 | |||
#include "synthesize.h" // for SPEED_FACTORS, speed | |||
#include "translate.h" // for CTRL_EMBEDDED, IsDigit09, utf8_out | |||
#include "translate.h" // for CTRL_EMBEDDED | |||
#include "voice.h" // for SelectVoice, SelectVoiceByName | |||
#include "speech.h" // for MAKE_MEM_UNDEFINED | |||
@@ -105,111 +105,9 @@ static char source[N_TR_SOURCE+40]; // extra space for embedded command & voice | |||
int n_replace_phonemes; | |||
REPLACE_PHONEMES replace_phonemes[N_REPLACE_PHONEMES]; | |||
// brackets, also 0x2014 to 0x021f which don't need to be in this list | |||
static const unsigned short brackets[] = { | |||
'(', ')', '[', ']', '{', '}', '<', '>', '"', '\'', '`', | |||
0xab, 0xbb, // double angle brackets | |||
0x300a, 0x300b, // double angle brackets (ideograph) | |||
0xe000+'<', // private usage area | |||
0 | |||
}; | |||
// other characters which break a word, but don't produce a pause | |||
static const unsigned short breaks[] = { '_', 0 }; | |||
int IsAlpha(unsigned int c) | |||
{ | |||
// Replacement for iswalph() which also checks for some in-word symbols | |||
static const unsigned short extra_indic_alphas[] = { | |||
0xa70, 0xa71, // Gurmukhi: tippi, addak | |||
0 | |||
}; | |||
if (iswalpha(c)) | |||
return 1; | |||
if (c < 0x300) | |||
return 0; | |||
if ((c >= 0x901) && (c <= 0xdf7)) { | |||
// Indic scripts: Devanagari, Tamil, etc | |||
if ((c & 0x7f) < 0x64) | |||
return 1; | |||
if (lookupwchar(extra_indic_alphas, c) != 0) | |||
return 1; | |||
if ((c >= 0xd7a) && (c <= 0xd7f)) | |||
return 1; // malaytalam chillu characters | |||
return 0; | |||
} | |||
if ((c >= 0x5b0) && (c <= 0x5c2)) | |||
return 1; // Hebrew vowel marks | |||
if (c == 0x0605) | |||
return 1; | |||
if ((c == 0x670) || ((c >= 0x64b) && (c <= 0x65e))) | |||
return 1; // arabic vowel marks | |||
if ((c >= 0x300) && (c <= 0x36f)) | |||
return 1; // combining accents | |||
if ((c >= 0xf40) && (c <= 0xfbc)) | |||
return 1; // tibetan | |||
if ((c >= 0x1100) && (c <= 0x11ff)) | |||
return 1; // Korean jamo | |||
if ((c >= 0x2800) && (c <= 0x28ff)) | |||
return 1; // braille | |||
if ((c > 0x3040) && (c <= 0xa700)) | |||
return 1; // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||
return 0; | |||
} | |||
int IsDigit09(unsigned int c) | |||
{ | |||
if ((c >= '0') && (c <= '9')) | |||
return 1; | |||
return 0; | |||
} | |||
int IsDigit(unsigned int c) | |||
{ | |||
if (iswdigit(c)) | |||
return 1; | |||
if ((c >= 0x966) && (c <= 0x96f)) | |||
return 1; | |||
return 0; | |||
} | |||
static int IsSpace(unsigned int c) | |||
{ | |||
if (c == 0) | |||
return 0; | |||
if ((c >= 0x2500) && (c < 0x25a0)) | |||
return 1; // box drawing characters | |||
if ((c >= 0xfff9) && (c <= 0xffff)) | |||
return 1; // unicode specials | |||
return iswspace(c); | |||
} | |||
int isspace2(unsigned int c) | |||
{ | |||
// can't use isspace() because on Windows, isspace(0xe1) gives TRUE ! | |||
int c2; | |||
if (((c2 = (c & 0xff)) == 0) || (c > ' ')) | |||
return 0; | |||
return 1; | |||
} | |||
void DeleteTranslator(Translator *tr) | |||
{ | |||
if (!tr) return; | |||
@@ -231,13 +129,6 @@ int lookupwchar(const unsigned short *list, int c) | |||
return 0; | |||
} | |||
int IsBracket(int c) | |||
{ | |||
if ((c >= 0x2014) && (c <= 0x201f)) | |||
return 1; | |||
return lookupwchar(brackets, c); | |||
} | |||
char *strchr_w(const char *s, int c) | |||
{ | |||
// return NULL for any non-ascii character |
@@ -663,14 +663,9 @@ extern int (*phoneme_callback)(const char *); | |||
int lookupwchar(const unsigned short *list, int c); | |||
char *strchr_w(const char *s, int c); | |||
int IsBracket(int c); | |||
void InitNamedata(void); | |||
void InitText(int flags); | |||
void InitText2(void); | |||
int IsDigit(unsigned int c); | |||
int IsDigit09(unsigned int c); | |||
int IsAlpha(unsigned int c); | |||
int isspace2(unsigned int c); | |||
ALPHABET *AlphabetFromChar(int c); | |||
Translator *SelectTranslator(const char *name); |