Speak names of characters U+2800 to U+28FF, Braille dot symbols. Language option to speak numbers which use characters '0' to '9' with the English voice. Add some more characters to the vowel and consonant lists for Indian languages. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@284 d46cf337-b52f-0410-862d-fd96e6ae7743master
@@ -665,6 +665,7 @@ numero $1 | |||
oboe o:b'o:@ | |||
orgie $alt | |||
ok o:k'e: | |||
ödem Y:dEm | |||
paradies $3 | |||
passage pas'aZ@ |
@@ -102,7 +102,7 @@ p Q r R s s; t T | |||
ts v x z | |||
Dictionary en_dict 2012-04-07 | |||
Dictionary en_dict 2012-06-15 | |||
0 3 3: @ @- @2 @5 @L | |||
a a# A: A@ aa aI aI3 aI@ | |||
@@ -202,7 +202,7 @@ q r r. s S s. t T | |||
t. th th. v w x z | |||
Dictionary ta_dict 2012-06-06 | |||
Dictionary ta_dict 2012-06-27 | |||
a a: aI aU e E e: i | |||
I i: o o: u U u2 u: | |||
@@ -797,3 +797,15 @@ o u U u# Y | |||
k l m n N p q r | |||
s S t tS v w X z | |||
Z | |||
Dictionary ga_dict 2012-06-27 | |||
0 @ a A: e E e: i | |||
I i: o O o: u U u1 | |||
u: V | |||
: ; b c C d d[ dZ | |||
f g h j k l m n | |||
p r R s S t t[ tS | |||
v w x X z |
@@ -103,6 +103,7 @@ _hok h'Uk | |||
_cap k,ap@-t@L | |||
_?A l,Et3 | |||
_?? sImb@L | |||
_braille br'eIl | |||
_#9 tab | |||
_#32 speIs | |||
//… _::d%0td,0t // for elipsis or ... while reading |
@@ -118,12 +118,17 @@ U+bf9 ru:ba:j | |||
(பி . ஏ) bije: $dot | |||
(கி . மீ) kilo:mi:t.t.Vr $dot | |||
aud ostr'eIlIVn||d'olVz | |||
cny tS'aIni:z||j'uVn | |||
eur ju:ro:z | |||
gbp br'itiS||p'aUndz | |||
inr 'IndIVn||r'u:ba:j | |||
inr 'IndIVn||r'u:pi:z | |||
jpy dZ'a:pVni:z||j'en | |||
rub r'VSVn||r'u:bVlz | |||
usd j'ues||d'olVz | |||
// numbers | |||
_0 suz.ijVm // சுழியம் | |||
_1 onRU |
@@ -502,6 +502,7 @@ _மன்மதக்) க (ுகை g | |||
//endsort | |||
//sort | |||
ஷங்) க (ர kV | |||
_கோபி) க kV# | |||
_ஸ்டா) க kV# | |||
_ஜமாய்) க kV# |
@@ -79,7 +79,7 @@ consonants 10 116 | |||
bo 10 152 | |||
kk 20 117 | |||
fa 9 110 | |||
ga 24 124 | |||
ga 25 125 | |||
Data file Used by | |||
b/b [b] base | |||
@@ -407,7 +407,7 @@ l/l_ [l] base | |||
[l/] fr | |||
l/l_@ [l/3] base | |||
[l/] fr | |||
l/l@ [hÑù] base | |||
l/l@ [h–q] base | |||
[l#] base | |||
[l] fr | |||
[l/2] fr | |||
@@ -441,7 +441,7 @@ l/L2_oL [l/2] base | |||
l/L2_uL [l/2] base | |||
l/l_3 [l/] de | |||
l/l_4 [ll] sq | |||
l/la [hÑù] base | |||
l/la [h–q] base | |||
[l#] base | |||
[l] fr | |||
[l/2] fr | |||
@@ -449,7 +449,7 @@ l/la [h | |||
[K] tn | |||
l/l_a [l/3] base | |||
[l/] fr | |||
l/le [hÑù] base | |||
l/le [h–q] base | |||
[l#] base | |||
[l] fr | |||
[l/2] fr | |||
@@ -461,7 +461,7 @@ l/L_eL_af [&] af | |||
[&:] af | |||
l/l_front [L] sq | |||
l/l_front_ [l/4] sq | |||
l/li [hÑù] base | |||
l/li [h–q] base | |||
[l#] base | |||
[l] fr | |||
[l/2] fr | |||
@@ -475,7 +475,7 @@ ll/ll [L] bg | |||
ll/_ll [L] bg | |||
l/l_long [l] base | |||
[l] fr | |||
l/lo [hÑù] base | |||
l/lo [h–q] base | |||
[l#] base | |||
[l/2] fr | |||
[K] nso | |||
@@ -485,7 +485,7 @@ l/l_o [l/3] base | |||
l^/l_rfx [l.] base | |||
[l] ru | |||
[l^] ru | |||
l/lu [hÑù] base | |||
l/lu [h–q] base | |||
[l#] base | |||
[l] fr | |||
[l/2] fr | |||
@@ -2143,6 +2143,7 @@ vowel/u [u] base | |||
[u] ak | |||
[u:] wo | |||
[u] bo | |||
[u1] ga | |||
[u:] ga | |||
vowel/u# [u:] en-sc | |||
[Y] tr |
@@ -347,8 +347,8 @@ int HashDictionary(const char *string) | |||
char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme) | |||
/*********************************************************************/ | |||
const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme) | |||
/***************************************************************************/ | |||
/* Translate a phoneme string from ascii mnemonics to internal phoneme numbers, | |||
from 'p' up to next blank . | |||
Returns advanced 'p' | |||
@@ -364,7 +364,8 @@ char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme) | |||
int consumed; | |||
unsigned int mnemonic_word; | |||
bad_phoneme[0] = 0; | |||
if(bad_phoneme != NULL) | |||
bad_phoneme[0] = 0; | |||
// skip initial blanks | |||
while(isspace(*p)) | |||
@@ -424,8 +425,11 @@ char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme) | |||
if(max_ph == 0) | |||
{ | |||
// not recognised, report and ignore | |||
bad_phoneme[0] = *p; | |||
bad_phoneme[1] = 0; | |||
if(bad_phoneme != NULL) | |||
{ | |||
bad_phoneme[0] = *p; | |||
bad_phoneme[1] = 0; | |||
} | |||
*outptr++ = 0; | |||
return(p+1); | |||
} |
@@ -590,12 +590,13 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control) | |||
int n_bytes; | |||
int letter; | |||
int len; | |||
int ix; | |||
int save_option_phonemes; | |||
char *p2; | |||
char *pbuf; | |||
char capital[20]; | |||
char ph_buf[60]; | |||
char ph_buf2[60]; | |||
char ph_buf[80]; | |||
char ph_buf2[80]; | |||
char hexbuf[6]; | |||
ph_buf[0] = 0; | |||
@@ -646,22 +647,49 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control) | |||
if(ph_buf[0] == 0) | |||
{ | |||
// character name not found | |||
if(iswalpha(letter)) | |||
Lookup(tr, "_?A", ph_buf); | |||
if((letter >= 0x2800) && (letter <= 0x28ff)) | |||
{ | |||
// braille dots symbol | |||
Lookup(tr, "_braille", ph_buf); | |||
if(ph_buf[0] == 0) | |||
{ | |||
EncodePhonemes("br'e:l", ph_buf, NULL); | |||
} | |||
if((ph_buf[0]==0) && !iswspace(letter)) | |||
Lookup(tr, "_??", ph_buf); | |||
if(ph_buf[0] != 0) | |||
{ | |||
pbuf = ph_buf + strlen(ph_buf); | |||
for(ix=0; ix<8; ix++) | |||
{ | |||
if(letter & (1 << ix)) | |||
{ | |||
*pbuf++ = phonPAUSE_VSHORT; | |||
LookupLetter(tr, '1'+ix, 0, pbuf, 1); | |||
pbuf += strlen(pbuf); | |||
} | |||
} | |||
} | |||
} | |||
if(ph_buf[0] != 0) | |||
if(ph_buf[0]== 0) | |||
{ | |||
// speak the hexadecimal number of the character code | |||
sprintf(hexbuf,"%x",letter); | |||
pbuf = ph_buf; | |||
for(p2 = hexbuf; *p2 != 0; p2++) | |||
if(iswalpha(letter)) | |||
Lookup(tr, "_?A", ph_buf); | |||
if((ph_buf[0]==0) && !iswspace(letter)) | |||
Lookup(tr, "_??", ph_buf); | |||
if(ph_buf[0] != 0) | |||
{ | |||
pbuf += strlen(pbuf); | |||
*pbuf++ = phonPAUSE_VSHORT; | |||
LookupLetter(tr, *p2, 0, pbuf, 1); | |||
// speak the hexadecimal number of the character code | |||
sprintf(hexbuf,"%x",letter); | |||
pbuf = ph_buf; | |||
for(p2 = hexbuf; *p2 != 0; p2++) | |||
{ | |||
pbuf += strlen(pbuf); | |||
*pbuf++ = phonPAUSE_VSHORT; | |||
LookupLetter(tr, *p2, 0, pbuf, 1); | |||
} | |||
} | |||
} | |||
} | |||
@@ -1951,8 +1979,9 @@ int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *fla | |||
return(0); // speak digits individually | |||
if(tr->langopts.numbers != 0) | |||
{ | |||
return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control)); | |||
} | |||
return(0); | |||
} // end of TranslateNumber | |||
@@ -161,7 +161,7 @@ typedef struct { | |||
int LookupPhonemeString(const char *string); | |||
int PhonemeCode(unsigned int mnem); | |||
char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme); | |||
const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme); | |||
void DecodePhonemes(const char *inptr, char *outptr); | |||
extern const char *WordToString(unsigned int word); |
@@ -35,7 +35,7 @@ | |||
#include "translate.h" | |||
#include "wave.h" | |||
const char *version_string = "1.46.17 05.May.12"; | |||
const char *version_string = "1.46.19 27.Jun.12"; | |||
const int version_phdata = 0x014600; | |||
int option_device_number = -1; |
@@ -308,22 +308,23 @@ static void SetCyrillicLetters(Translator *tr) | |||
void SetIndicLetters(Translator *tr) | |||
{//================================= | |||
// Set letter types for Indic scripts, Devanagari, Tamill, etc | |||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | |||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x7b,0x7c,0x7e,0x7f,0}; | |||
static const char dev_vowels2[] = {0x60,0x61, 0x55,0x56,0x57,0x62,0x63,0}; // non-consecutive vowels and vowel-signs | |||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | |||
SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14); // vowel letters | |||
SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d); // + vowel signs, and virama | |||
SetLetterBitsRange(tr,LETTERGP_A,0x55,0x57); // + vowel signs | |||
SetLetterBits(tr,LETTERGP_A, dev_vowels2); // + extra vowels and vowel signs | |||
SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d); // vowel signs, and virama | |||
SetLetterBitsRange(tr,LETTERGP_B,0x55,0x57); // + vowel signs | |||
SetLetterBits(tr,LETTERGP_B, dev_vowels2); // + extra vowels and vowel signs | |||
SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39); // the main consonant range | |||
SetLetterBits(tr,LETTERGP_C,dev_consonants2); // + additional consonants | |||
SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14); // vowel letters | |||
SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c); // + vowel signs | |||
SetLetterBitsRange(tr,LETTERGP_Y,0x55,0x57); // + vowel signs | |||
SetLetterBits(tr,LETTERGP_Y, dev_vowels2); // + extra vowels and vowel signs | |||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d; //virama | |||
@@ -711,6 +712,12 @@ Translator *SelectTranslator(const char *name) | |||
{ | |||
tr->letter_bits_offset = OFFSET_GURMUKHI; | |||
} | |||
if(name2 == L('n','e')) | |||
{ | |||
tr->langopts.break_numbers = 0x2aaaa8; | |||
tr->langopts.max_digits = 22; | |||
tr->langopts.numbers2 |= NUM2_ENGLISH_NUMERALS; | |||
} | |||
SetIndicLetters(tr); | |||
} | |||
break; |
@@ -407,6 +407,9 @@ int IsAlpha(unsigned int c) | |||
if((c >= 0x1100) && (c <= 0x11ff)) | |||
return(1); //Korean jamo | |||
if((c >= 0x2800) && (c <= 0x28ff)) | |||
return(1); // braille | |||
if((c > 0x3040) && (c <= 0xa700)) | |||
return(1); // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | |||
@@ -981,6 +984,13 @@ if((wmark > 0) && (wmark < 8)) | |||
if(word_phonemes[0] == phonSWITCH) | |||
return(0); | |||
if((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) | |||
{ | |||
// for this language, speak English numerals (0-9) with the English voice | |||
sprintf(word_phonemes,"%c",phonSWITCH); | |||
return(0); | |||
} | |||
found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0); | |||
} | |||
@@ -2223,7 +2233,7 @@ static int EmbeddedCommand(unsigned int &source_index) | |||
static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | |||
static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | |||
{//========================================================================================= | |||
int ix; | |||
unsigned int word; | |||
@@ -2285,12 +2295,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, | |||
if(upper_case) | |||
new_c = towupper(new_c); | |||
*wordflags |= FLAG_CHAR_REPLACED; | |||
return(new_c); | |||
} | |||
static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||
static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | |||
{//================================================================================================================ | |||
// To allow language specific examination and replacement of characters | |||
@@ -2368,7 +2380,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, | |||
} | |||
break; | |||
} | |||
return(SubstituteChar(tr,c,next_in,insert)); | |||
return(SubstituteChar(tr, c, next_in, insert, wordflags)); | |||
} | |||
@@ -2680,7 +2692,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre | |||
word_flags |= FLAG_COMMA_AFTER; | |||
} | |||
c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted); // optional language specific function | |||
c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted, &word_flags); // optional language specific function | |||
if(c == 8) | |||
continue; // ignore this character | |||
@@ -110,6 +110,7 @@ | |||
#define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab | |||
#define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits | |||
#define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word | |||
#define FLAG_CHAR_REPLACED 0x200000 // characters have been replaced by .replace in the *_rules | |||
#define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix | |||
#define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout | |||
@@ -444,12 +445,14 @@ typedef struct { | |||
int numbers; | |||
#define NUM2_MULTIPLE_ORDINAL 0x1000 | |||
#define NUM2_ENGLISH_NUMERALS 0x2000 | |||
// bits 1-4 use variant form of numbers before thousands,millions,etc. | |||
// bit6=(LANG=pl) two forms of plural, M or MA | |||
// bit7=(LANG-ru) use MB for 1 thousand, million, etc | |||
// bit8=(LANG=cs,sk) two forms of plural, M or MA | |||
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after | |||
// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units | |||
// bit13=(LANG=ne) speak (non-replaced) English numerals in English | |||
int numbers2; | |||
#define BREAK_THOUSANDS 0x49249248 |