Browse Source

[1.46.19]

Speak names of characters U+2800 to U+28FF, Braille dot symbols.
Language option to speak numbers which use characters '0' to '9' with the English voice.
Add some more characters to the vowel and consonant lists for Indian languages.


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@284 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 13 years ago
parent
commit
084584ee16

+ 1
- 0
dictsource/de_list View File

@@ -665,6 +665,7 @@ numero $1
oboe o:b'o:@
orgie $alt
ok o:k'e:
ödem Y:dEm

paradies $3
passage pas'aZ@

+ 14
- 2
dictsource/dict_phonemes View File

@@ -102,7 +102,7 @@ p Q r R s s; t T
ts v x z


Dictionary en_dict 2012-04-07
Dictionary en_dict 2012-06-15

0 3 3: @ @- @2 @5 @L
a a# A: A@ aa aI aI3 aI@
@@ -202,7 +202,7 @@ q r r. s S s. t T
t. th th. v w x z


Dictionary ta_dict 2012-06-06
Dictionary ta_dict 2012-06-27

a a: aI aU e E e: i
I i: o o: u U u2 u:
@@ -797,3 +797,15 @@ o u U u# Y
k l m n N p q r
s S t tS v w X z
Z


Dictionary ga_dict 2012-06-27

0 @ a A: e E e: i
I i: o O o: u U u1
u: V

: ; b c C d d[ dZ
f g h j k l m n
p r R s S t t[ tS
v w x X z

+ 1
- 0
dictsource/en_list View File

@@ -103,6 +103,7 @@ _hok h'Uk
_cap k,ap@-t@L
_?A l,Et3
_?? sImb@L
_braille br'eIl
_#9 tab
_#32 speIs
//… _::d%0td,0t // for elipsis or ... while reading

+ 6
- 1
dictsource/ta_list View File

@@ -118,12 +118,17 @@ U+bf9 ru:ba:j
(பி . ஏ) bije: $dot
(கி . மீ) kilo:mi:t.t.Vr $dot
aud ostr'eIlIVn||d'olVz
cny tS'aIni:z||j'uVn
eur ju:ro:z
gbp br'itiS||p'aUndz
inr 'IndIVn||r'u:ba:j
inr 'IndIVn||r'u:pi:z
jpy dZ'a:pVni:z||j'en
rub r'VSVn||r'u:bVlz
usd j'ues||d'olVz




// numbers
_0 suz.ijVm // சுழியம்
_1 onRU

+ 1
- 0
dictsource/ta_rules View File

@@ -502,6 +502,7 @@ _மன்மதக்) க (ுகை g
//endsort

//sort
ஷங்) க (ர kV
_கோபி) க kV#
_ஸ்டா) க kV#
_ஜமாய்) க kV#

+ 8
- 7
phsource/compile_report View File

@@ -79,7 +79,7 @@ consonants 10 116
bo 10 152
kk 20 117
fa 9 110
ga 24 124
ga 25 125

Data file Used by
b/b [b] base
@@ -407,7 +407,7 @@ l/l_ [l] base
[l/] fr
l/l_@ [l/3] base
[l/] fr
l/l@ [hÑù] base
l/l@ [h–q] base
[l#] base
[l] fr
[l/2] fr
@@ -441,7 +441,7 @@ l/L2_oL [l/2] base
l/L2_uL [l/2] base
l/l_3 [l/] de
l/l_4 [ll] sq
l/la [hÑù] base
l/la [h–q] base
[l#] base
[l] fr
[l/2] fr
@@ -449,7 +449,7 @@ l/la [h
[K] tn
l/l_a [l/3] base
[l/] fr
l/le [hÑù] base
l/le [h–q] base
[l#] base
[l] fr
[l/2] fr
@@ -461,7 +461,7 @@ l/L_eL_af [&] af
[&:] af
l/l_front [L] sq
l/l_front_ [l/4] sq
l/li [hÑù] base
l/li [h–q] base
[l#] base
[l] fr
[l/2] fr
@@ -475,7 +475,7 @@ ll/ll [L] bg
ll/_ll [L] bg
l/l_long [l] base
[l] fr
l/lo [hÑù] base
l/lo [h–q] base
[l#] base
[l/2] fr
[K] nso
@@ -485,7 +485,7 @@ l/l_o [l/3] base
l^/l_rfx [l.] base
[l] ru
[l^] ru
l/lu [hÑù] base
l/lu [h–q] base
[l#] base
[l] fr
[l/2] fr
@@ -2143,6 +2143,7 @@ vowel/u [u] base
[u] ak
[u:] wo
[u] bo
[u1] ga
[u:] ga
vowel/u# [u:] en-sc
[Y] tr

+ 9
- 5
src/dictionary.cpp View File

@@ -347,8 +347,8 @@ int HashDictionary(const char *string)



char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
/*********************************************************************/
const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme)
/***************************************************************************/
/* Translate a phoneme string from ascii mnemonics to internal phoneme numbers,
from 'p' up to next blank .
Returns advanced 'p'
@@ -364,7 +364,8 @@ char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
int consumed;
unsigned int mnemonic_word;

bad_phoneme[0] = 0;
if(bad_phoneme != NULL)
bad_phoneme[0] = 0;

// skip initial blanks
while(isspace(*p))
@@ -424,8 +425,11 @@ char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
if(max_ph == 0)
{
// not recognised, report and ignore
bad_phoneme[0] = *p;
bad_phoneme[1] = 0;
if(bad_phoneme != NULL)
{
bad_phoneme[0] = *p;
bad_phoneme[1] = 0;
}
*outptr++ = 0;
return(p+1);
}

+ 44
- 15
src/numbers.cpp View File

@@ -590,12 +590,13 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
int n_bytes;
int letter;
int len;
int ix;
int save_option_phonemes;
char *p2;
char *pbuf;
char capital[20];
char ph_buf[60];
char ph_buf2[60];
char ph_buf[80];
char ph_buf2[80];
char hexbuf[6];

ph_buf[0] = 0;
@@ -646,22 +647,49 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
if(ph_buf[0] == 0)
{
// character name not found
if(iswalpha(letter))
Lookup(tr, "_?A", ph_buf);
if((letter >= 0x2800) && (letter <= 0x28ff))
{
// braille dots symbol
Lookup(tr, "_braille", ph_buf);
if(ph_buf[0] == 0)
{
EncodePhonemes("br'e:l", ph_buf, NULL);
}

if((ph_buf[0]==0) && !iswspace(letter))
Lookup(tr, "_??", ph_buf);
if(ph_buf[0] != 0)
{
pbuf = ph_buf + strlen(ph_buf);
for(ix=0; ix<8; ix++)
{
if(letter & (1 << ix))
{
*pbuf++ = phonPAUSE_VSHORT;
LookupLetter(tr, '1'+ix, 0, pbuf, 1);
pbuf += strlen(pbuf);
}
}
}
}

if(ph_buf[0] != 0)
if(ph_buf[0]== 0)
{
// speak the hexadecimal number of the character code
sprintf(hexbuf,"%x",letter);
pbuf = ph_buf;
for(p2 = hexbuf; *p2 != 0; p2++)
if(iswalpha(letter))
Lookup(tr, "_?A", ph_buf);

if((ph_buf[0]==0) && !iswspace(letter))
Lookup(tr, "_??", ph_buf);
if(ph_buf[0] != 0)
{
pbuf += strlen(pbuf);
*pbuf++ = phonPAUSE_VSHORT;
LookupLetter(tr, *p2, 0, pbuf, 1);
// speak the hexadecimal number of the character code
sprintf(hexbuf,"%x",letter);
pbuf = ph_buf;
for(p2 = hexbuf; *p2 != 0; p2++)
{
pbuf += strlen(pbuf);
*pbuf++ = phonPAUSE_VSHORT;
LookupLetter(tr, *p2, 0, pbuf, 1);
}
}
}
}
@@ -1951,8 +1979,9 @@ int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *fla
return(0); // speak digits individually

if(tr->langopts.numbers != 0)
{
return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
}
return(0);
} // end of TranslateNumber


+ 1
- 1
src/phoneme.h View File

@@ -161,7 +161,7 @@ typedef struct {
int LookupPhonemeString(const char *string);
int PhonemeCode(unsigned int mnem);

char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme);
const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme);
void DecodePhonemes(const char *inptr, char *outptr);

extern const char *WordToString(unsigned int word);

+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.46.17 05.May.12";
const char *version_string = "1.46.19 27.Jun.12";
const int version_phdata = 0x014600;

int option_device_number = -1;

+ 11
- 4
src/tr_languages.cpp View File

@@ -308,22 +308,23 @@ static void SetCyrillicLetters(Translator *tr)
void SetIndicLetters(Translator *tr)
{//=================================
// Set letter types for Indic scripts, Devanagari, Tamill, etc
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f};
static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x7b,0x7c,0x7e,0x7f,0};
static const char dev_vowels2[] = {0x60,0x61, 0x55,0x56,0x57,0x62,0x63,0}; // non-consecutive vowels and vowel-signs

memset(tr->letter_bits,0,sizeof(tr->letter_bits));
SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14); // vowel letters
SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d); // + vowel signs, and virama
SetLetterBitsRange(tr,LETTERGP_A,0x55,0x57); // + vowel signs
SetLetterBits(tr,LETTERGP_A, dev_vowels2); // + extra vowels and vowel signs

SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d); // vowel signs, and virama
SetLetterBitsRange(tr,LETTERGP_B,0x55,0x57); // + vowel signs
SetLetterBits(tr,LETTERGP_B, dev_vowels2); // + extra vowels and vowel signs

SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39); // the main consonant range
SetLetterBits(tr,LETTERGP_C,dev_consonants2); // + additional consonants

SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14); // vowel letters
SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c); // + vowel signs
SetLetterBitsRange(tr,LETTERGP_Y,0x55,0x57); // + vowel signs
SetLetterBits(tr,LETTERGP_Y, dev_vowels2); // + extra vowels and vowel signs

tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d; //virama
@@ -711,6 +712,12 @@ Translator *SelectTranslator(const char *name)
{
tr->letter_bits_offset = OFFSET_GURMUKHI;
}
if(name2 == L('n','e'))
{
tr->langopts.break_numbers = 0x2aaaa8;
tr->langopts.max_digits = 22;
tr->langopts.numbers2 |= NUM2_ENGLISH_NUMERALS;
}
SetIndicLetters(tr);
}
break;

+ 16
- 4
src/translate.cpp View File

@@ -407,6 +407,9 @@ int IsAlpha(unsigned int c)
if((c >= 0x1100) && (c <= 0x11ff))
return(1); //Korean jamo

if((c >= 0x2800) && (c <= 0x28ff))
return(1); // braille

if((c > 0x3040) && (c <= 0xa700))
return(1); // Chinese/Japanese. Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure

@@ -981,6 +984,13 @@ if((wmark > 0) && (wmark < 8))
if(word_phonemes[0] == phonSWITCH)
return(0);

if((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED))
{
// for this language, speak English numerals (0-9) with the English voice
sprintf(word_phonemes,"%c",phonSWITCH);
return(0);
}

found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0);
}

@@ -2223,7 +2233,7 @@ static int EmbeddedCommand(unsigned int &source_index)



static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert)
static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
{//=========================================================================================
int ix;
unsigned int word;
@@ -2285,12 +2295,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,

if(upper_case)
new_c = towupper(new_c);

*wordflags |= FLAG_CHAR_REPLACED;
return(new_c);

}


static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
{//================================================================================================================
// To allow language specific examination and replacement of characters

@@ -2368,7 +2380,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c,
}
break;
}
return(SubstituteChar(tr,c,next_in,insert));
return(SubstituteChar(tr, c, next_in, insert, wordflags));
}


@@ -2680,7 +2692,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
word_flags |= FLAG_COMMA_AFTER;
}

c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted); // optional language specific function
c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted, &word_flags); // optional language specific function
if(c == 8)
continue; // ignore this character


+ 3
- 0
src/translate.h View File

@@ -110,6 +110,7 @@
#define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab
#define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits
#define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word
#define FLAG_CHAR_REPLACED 0x200000 // characters have been replaced by .replace in the *_rules

#define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix
#define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout
@@ -444,12 +445,14 @@ typedef struct {
int numbers;

#define NUM2_MULTIPLE_ORDINAL 0x1000
#define NUM2_ENGLISH_NUMERALS 0x2000
// bits 1-4 use variant form of numbers before thousands,millions,etc.
// bit6=(LANG=pl) two forms of plural, M or MA
// bit7=(LANG-ru) use MB for 1 thousand, million, etc
// bit8=(LANG=cs,sk) two forms of plural, M or MA
// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
// bit13=(LANG=ne) speak (non-replaced) English numerals in English
int numbers2;

#define BREAK_THOUSANDS 0x49249248

Loading…
Cancel
Save