Speak names of characters U+2800 to U+28FF, Braille dot symbols. Language option to speak numbers which use characters '0' to '9' with the English voice. Add some more characters to the vowel and consonant lists for Indian languages. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@284 d46cf337-b52f-0410-862d-fd96e6ae7743master
oboe o:b'o:@ | oboe o:b'o:@ | ||||
orgie $alt | orgie $alt | ||||
ok o:k'e: | ok o:k'e: | ||||
ödem Y:dEm | |||||
paradies $3 | paradies $3 | ||||
passage pas'aZ@ | passage pas'aZ@ |
ts v x z | ts v x z | ||||
Dictionary en_dict 2012-04-07 | |||||
Dictionary en_dict 2012-06-15 | |||||
0 3 3: @ @- @2 @5 @L | 0 3 3: @ @- @2 @5 @L | ||||
a a# A: A@ aa aI aI3 aI@ | a a# A: A@ aa aI aI3 aI@ | ||||
t. th th. v w x z | t. th th. v w x z | ||||
Dictionary ta_dict 2012-06-06 | |||||
Dictionary ta_dict 2012-06-27 | |||||
a a: aI aU e E e: i | a a: aI aU e E e: i | ||||
I i: o o: u U u2 u: | I i: o o: u U u2 u: | ||||
k l m n N p q r | k l m n N p q r | ||||
s S t tS v w X z | s S t tS v w X z | ||||
Z | Z | ||||
Dictionary ga_dict 2012-06-27 | |||||
0 @ a A: e E e: i | |||||
I i: o O o: u U u1 | |||||
u: V | |||||
: ; b c C d d[ dZ | |||||
f g h j k l m n | |||||
p r R s S t t[ tS | |||||
v w x X z |
_cap k,ap@-t@L | _cap k,ap@-t@L | ||||
_?A l,Et3 | _?A l,Et3 | ||||
_?? sImb@L | _?? sImb@L | ||||
_braille br'eIl | |||||
_#9 tab | _#9 tab | ||||
_#32 speIs | _#32 speIs | ||||
//… _::d%0td,0t // for elipsis or ... while reading | //… _::d%0td,0t // for elipsis or ... while reading |
(பி . ஏ) bije: $dot | (பி . ஏ) bije: $dot | ||||
(கி . மீ) kilo:mi:t.t.Vr $dot | (கி . மீ) kilo:mi:t.t.Vr $dot | ||||
aud ostr'eIlIVn||d'olVz | aud ostr'eIlIVn||d'olVz | ||||
cny tS'aIni:z||j'uVn | |||||
eur ju:ro:z | |||||
gbp br'itiS||p'aUndz | gbp br'itiS||p'aUndz | ||||
inr 'IndIVn||r'u:ba:j | |||||
inr 'IndIVn||r'u:pi:z | |||||
jpy dZ'a:pVni:z||j'en | |||||
rub r'VSVn||r'u:bVlz | |||||
usd j'ues||d'olVz | usd j'ues||d'olVz | ||||
// numbers | // numbers | ||||
_0 suz.ijVm // சுழியம் | _0 suz.ijVm // சுழியம் | ||||
_1 onRU | _1 onRU |
//endsort | //endsort | ||||
//sort | //sort | ||||
ஷங்) க (ர kV | |||||
_கோபி) க kV# | _கோபி) க kV# | ||||
_ஸ்டா) க kV# | _ஸ்டா) க kV# | ||||
_ஜமாய்) க kV# | _ஜமாய்) க kV# |
bo 10 152 | bo 10 152 | ||||
kk 20 117 | kk 20 117 | ||||
fa 9 110 | fa 9 110 | ||||
ga 24 124 | |||||
ga 25 125 | |||||
Data file Used by | Data file Used by | ||||
b/b [b] base | b/b [b] base | ||||
[l/] fr | [l/] fr | ||||
l/l_@ [l/3] base | l/l_@ [l/3] base | ||||
[l/] fr | [l/] fr | ||||
l/l@ [hÑù] base | |||||
l/l@ [h–q] base | |||||
[l#] base | [l#] base | ||||
[l] fr | [l] fr | ||||
[l/2] fr | [l/2] fr | ||||
l/L2_uL [l/2] base | l/L2_uL [l/2] base | ||||
l/l_3 [l/] de | l/l_3 [l/] de | ||||
l/l_4 [ll] sq | l/l_4 [ll] sq | ||||
l/la [hÑù] base | |||||
l/la [h–q] base | |||||
[l#] base | [l#] base | ||||
[l] fr | [l] fr | ||||
[l/2] fr | [l/2] fr | ||||
[K] tn | [K] tn | ||||
l/l_a [l/3] base | l/l_a [l/3] base | ||||
[l/] fr | [l/] fr | ||||
l/le [hÑù] base | |||||
l/le [h–q] base | |||||
[l#] base | [l#] base | ||||
[l] fr | [l] fr | ||||
[l/2] fr | [l/2] fr | ||||
[&:] af | [&:] af | ||||
l/l_front [L] sq | l/l_front [L] sq | ||||
l/l_front_ [l/4] sq | l/l_front_ [l/4] sq | ||||
l/li [hÑù] base | |||||
l/li [h–q] base | |||||
[l#] base | [l#] base | ||||
[l] fr | [l] fr | ||||
[l/2] fr | [l/2] fr | ||||
ll/_ll [L] bg | ll/_ll [L] bg | ||||
l/l_long [l] base | l/l_long [l] base | ||||
[l] fr | [l] fr | ||||
l/lo [hÑù] base | |||||
l/lo [h–q] base | |||||
[l#] base | [l#] base | ||||
[l/2] fr | [l/2] fr | ||||
[K] nso | [K] nso | ||||
l^/l_rfx [l.] base | l^/l_rfx [l.] base | ||||
[l] ru | [l] ru | ||||
[l^] ru | [l^] ru | ||||
l/lu [hÑù] base | |||||
l/lu [h–q] base | |||||
[l#] base | [l#] base | ||||
[l] fr | [l] fr | ||||
[l/2] fr | [l/2] fr | ||||
[u] ak | [u] ak | ||||
[u:] wo | [u:] wo | ||||
[u] bo | [u] bo | ||||
[u1] ga | |||||
[u:] ga | [u:] ga | ||||
vowel/u# [u:] en-sc | vowel/u# [u:] en-sc | ||||
[Y] tr | [Y] tr |
char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme) | |||||
/*********************************************************************/ | |||||
const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme) | |||||
/***************************************************************************/ | |||||
/* Translate a phoneme string from ascii mnemonics to internal phoneme numbers, | /* Translate a phoneme string from ascii mnemonics to internal phoneme numbers, | ||||
from 'p' up to next blank . | from 'p' up to next blank . | ||||
Returns advanced 'p' | Returns advanced 'p' | ||||
int consumed; | int consumed; | ||||
unsigned int mnemonic_word; | unsigned int mnemonic_word; | ||||
bad_phoneme[0] = 0; | |||||
if(bad_phoneme != NULL) | |||||
bad_phoneme[0] = 0; | |||||
// skip initial blanks | // skip initial blanks | ||||
while(isspace(*p)) | while(isspace(*p)) | ||||
if(max_ph == 0) | if(max_ph == 0) | ||||
{ | { | ||||
// not recognised, report and ignore | // not recognised, report and ignore | ||||
bad_phoneme[0] = *p; | |||||
bad_phoneme[1] = 0; | |||||
if(bad_phoneme != NULL) | |||||
{ | |||||
bad_phoneme[0] = *p; | |||||
bad_phoneme[1] = 0; | |||||
} | |||||
*outptr++ = 0; | *outptr++ = 0; | ||||
return(p+1); | return(p+1); | ||||
} | } |
int n_bytes; | int n_bytes; | ||||
int letter; | int letter; | ||||
int len; | int len; | ||||
int ix; | |||||
int save_option_phonemes; | int save_option_phonemes; | ||||
char *p2; | char *p2; | ||||
char *pbuf; | char *pbuf; | ||||
char capital[20]; | char capital[20]; | ||||
char ph_buf[60]; | |||||
char ph_buf2[60]; | |||||
char ph_buf[80]; | |||||
char ph_buf2[80]; | |||||
char hexbuf[6]; | char hexbuf[6]; | ||||
ph_buf[0] = 0; | ph_buf[0] = 0; | ||||
if(ph_buf[0] == 0) | if(ph_buf[0] == 0) | ||||
{ | { | ||||
// character name not found | // character name not found | ||||
if(iswalpha(letter)) | |||||
Lookup(tr, "_?A", ph_buf); | |||||
if((letter >= 0x2800) && (letter <= 0x28ff)) | |||||
{ | |||||
// braille dots symbol | |||||
Lookup(tr, "_braille", ph_buf); | |||||
if(ph_buf[0] == 0) | |||||
{ | |||||
EncodePhonemes("br'e:l", ph_buf, NULL); | |||||
} | |||||
if((ph_buf[0]==0) && !iswspace(letter)) | |||||
Lookup(tr, "_??", ph_buf); | |||||
if(ph_buf[0] != 0) | |||||
{ | |||||
pbuf = ph_buf + strlen(ph_buf); | |||||
for(ix=0; ix<8; ix++) | |||||
{ | |||||
if(letter & (1 << ix)) | |||||
{ | |||||
*pbuf++ = phonPAUSE_VSHORT; | |||||
LookupLetter(tr, '1'+ix, 0, pbuf, 1); | |||||
pbuf += strlen(pbuf); | |||||
} | |||||
} | |||||
} | |||||
} | |||||
if(ph_buf[0] != 0) | |||||
if(ph_buf[0]== 0) | |||||
{ | { | ||||
// speak the hexadecimal number of the character code | |||||
sprintf(hexbuf,"%x",letter); | |||||
pbuf = ph_buf; | |||||
for(p2 = hexbuf; *p2 != 0; p2++) | |||||
if(iswalpha(letter)) | |||||
Lookup(tr, "_?A", ph_buf); | |||||
if((ph_buf[0]==0) && !iswspace(letter)) | |||||
Lookup(tr, "_??", ph_buf); | |||||
if(ph_buf[0] != 0) | |||||
{ | { | ||||
pbuf += strlen(pbuf); | |||||
*pbuf++ = phonPAUSE_VSHORT; | |||||
LookupLetter(tr, *p2, 0, pbuf, 1); | |||||
// speak the hexadecimal number of the character code | |||||
sprintf(hexbuf,"%x",letter); | |||||
pbuf = ph_buf; | |||||
for(p2 = hexbuf; *p2 != 0; p2++) | |||||
{ | |||||
pbuf += strlen(pbuf); | |||||
*pbuf++ = phonPAUSE_VSHORT; | |||||
LookupLetter(tr, *p2, 0, pbuf, 1); | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
return(0); // speak digits individually | return(0); // speak digits individually | ||||
if(tr->langopts.numbers != 0) | if(tr->langopts.numbers != 0) | ||||
{ | |||||
return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control)); | return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control)); | ||||
} | |||||
return(0); | return(0); | ||||
} // end of TranslateNumber | } // end of TranslateNumber | ||||
int LookupPhonemeString(const char *string); | int LookupPhonemeString(const char *string); | ||||
int PhonemeCode(unsigned int mnem); | int PhonemeCode(unsigned int mnem); | ||||
char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme); | |||||
const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme); | |||||
void DecodePhonemes(const char *inptr, char *outptr); | void DecodePhonemes(const char *inptr, char *outptr); | ||||
extern const char *WordToString(unsigned int word); | extern const char *WordToString(unsigned int word); |
#include "translate.h" | #include "translate.h" | ||||
#include "wave.h" | #include "wave.h" | ||||
const char *version_string = "1.46.17 05.May.12"; | |||||
const char *version_string = "1.46.19 27.Jun.12"; | |||||
const int version_phdata = 0x014600; | const int version_phdata = 0x014600; | ||||
int option_device_number = -1; | int option_device_number = -1; |
void SetIndicLetters(Translator *tr) | void SetIndicLetters(Translator *tr) | ||||
{//================================= | {//================================= | ||||
// Set letter types for Indic scripts, Devanagari, Tamill, etc | // Set letter types for Indic scripts, Devanagari, Tamill, etc | ||||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | |||||
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x7b,0x7c,0x7e,0x7f,0}; | |||||
static const char dev_vowels2[] = {0x60,0x61, 0x55,0x56,0x57,0x62,0x63,0}; // non-consecutive vowels and vowel-signs | |||||
memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | ||||
SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14); // vowel letters | SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14); // vowel letters | ||||
SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d); // + vowel signs, and virama | SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d); // + vowel signs, and virama | ||||
SetLetterBitsRange(tr,LETTERGP_A,0x55,0x57); // + vowel signs | |||||
SetLetterBits(tr,LETTERGP_A, dev_vowels2); // + extra vowels and vowel signs | |||||
SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d); // vowel signs, and virama | SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d); // vowel signs, and virama | ||||
SetLetterBitsRange(tr,LETTERGP_B,0x55,0x57); // + vowel signs | |||||
SetLetterBits(tr,LETTERGP_B, dev_vowels2); // + extra vowels and vowel signs | |||||
SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39); // the main consonant range | SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39); // the main consonant range | ||||
SetLetterBits(tr,LETTERGP_C,dev_consonants2); // + additional consonants | SetLetterBits(tr,LETTERGP_C,dev_consonants2); // + additional consonants | ||||
SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14); // vowel letters | SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14); // vowel letters | ||||
SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c); // + vowel signs | SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c); // + vowel signs | ||||
SetLetterBitsRange(tr,LETTERGP_Y,0x55,0x57); // + vowel signs | |||||
SetLetterBits(tr,LETTERGP_Y, dev_vowels2); // + extra vowels and vowel signs | |||||
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | ||||
tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d; //virama | tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d; //virama | ||||
{ | { | ||||
tr->letter_bits_offset = OFFSET_GURMUKHI; | tr->letter_bits_offset = OFFSET_GURMUKHI; | ||||
} | } | ||||
if(name2 == L('n','e')) | |||||
{ | |||||
tr->langopts.break_numbers = 0x2aaaa8; | |||||
tr->langopts.max_digits = 22; | |||||
tr->langopts.numbers2 |= NUM2_ENGLISH_NUMERALS; | |||||
} | |||||
SetIndicLetters(tr); | SetIndicLetters(tr); | ||||
} | } | ||||
break; | break; |
if((c >= 0x1100) && (c <= 0x11ff)) | if((c >= 0x1100) && (c <= 0x11ff)) | ||||
return(1); //Korean jamo | return(1); //Korean jamo | ||||
if((c >= 0x2800) && (c <= 0x28ff)) | |||||
return(1); // braille | |||||
if((c > 0x3040) && (c <= 0xa700)) | if((c > 0x3040) && (c <= 0xa700)) | ||||
return(1); // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | return(1); // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure | ||||
if(word_phonemes[0] == phonSWITCH) | if(word_phonemes[0] == phonSWITCH) | ||||
return(0); | return(0); | ||||
if((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) | |||||
{ | |||||
// for this language, speak English numerals (0-9) with the English voice | |||||
sprintf(word_phonemes,"%c",phonSWITCH); | |||||
return(0); | |||||
} | |||||
found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0); | found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0); | ||||
} | } | ||||
static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | |||||
static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | |||||
{//========================================================================================= | {//========================================================================================= | ||||
int ix; | int ix; | ||||
unsigned int word; | unsigned int word; | ||||
if(upper_case) | if(upper_case) | ||||
new_c = towupper(new_c); | new_c = towupper(new_c); | ||||
*wordflags |= FLAG_CHAR_REPLACED; | |||||
return(new_c); | return(new_c); | ||||
} | } | ||||
static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||||
static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert, int *wordflags) | |||||
{//================================================================================================================ | {//================================================================================================================ | ||||
// To allow language specific examination and replacement of characters | // To allow language specific examination and replacement of characters | ||||
} | } | ||||
break; | break; | ||||
} | } | ||||
return(SubstituteChar(tr,c,next_in,insert)); | |||||
return(SubstituteChar(tr, c, next_in, insert, wordflags)); | |||||
} | } | ||||
word_flags |= FLAG_COMMA_AFTER; | word_flags |= FLAG_COMMA_AFTER; | ||||
} | } | ||||
c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted); // optional language specific function | |||||
c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted, &word_flags); // optional language specific function | |||||
if(c == 8) | if(c == 8) | ||||
continue; // ignore this character | continue; // ignore this character | ||||
#define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab | #define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab | ||||
#define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits | #define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits | ||||
#define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word | #define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word | ||||
#define FLAG_CHAR_REPLACED 0x200000 // characters have been replaced by .replace in the *_rules | |||||
#define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix | #define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix | ||||
#define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout | #define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout | ||||
int numbers; | int numbers; | ||||
#define NUM2_MULTIPLE_ORDINAL 0x1000 | #define NUM2_MULTIPLE_ORDINAL 0x1000 | ||||
#define NUM2_ENGLISH_NUMERALS 0x2000 | |||||
// bits 1-4 use variant form of numbers before thousands,millions,etc. | // bits 1-4 use variant form of numbers before thousands,millions,etc. | ||||
// bit6=(LANG=pl) two forms of plural, M or MA | // bit6=(LANG=pl) two forms of plural, M or MA | ||||
// bit7=(LANG-ru) use MB for 1 thousand, million, etc | // bit7=(LANG-ru) use MB for 1 thousand, million, etc | ||||
// bit8=(LANG=cs,sk) two forms of plural, M or MA | // bit8=(LANG=cs,sk) two forms of plural, M or MA | ||||
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after | // bit9=(LANG=rw) say "thousand" and "million" before its number, not after | ||||
// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units | // bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units | ||||
// bit13=(LANG=ne) speak (non-replaced) English numerals in English | |||||
int numbers2; | int numbers2; | ||||
#define BREAK_THOUSANDS 0x49249248 | #define BREAK_THOUSANDS 0x49249248 |