Speak names of characters U+2800 to U+28FF, Braille dot symbols. Language option to speak numbers which use characters '0' to '9' with the English voice. Add some more characters to the vowel and consonant lists for Indian languages. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@284 d46cf337-b52f-0410-862d-fd96e6ae7743

13 years ago · 084584ee16
--- a/dictsource/de_list
+++ b/dictsource/de_list
@@ -665,6 +665,7 @@ numero           $1
 oboe            o:b'o:@
 orgie            $alt
 ok              o:k'e:
 ödem		Y:dEm

 paradies         $3
 passage         pas'aZ@
--- a/dictsource/dict_phonemes
+++ b/dictsource/dict_phonemes
@@ -102,7 +102,7 @@ p    Q    r    R    s    s;   t    T
 ts   v    x    z    


 Dictionary en_dict  2012-04-07
 Dictionary en_dict  2012-06-15

 0    3    3:   @    @-   @2   @5   @L   
 a    a#   A:   A@   aa   aI   aI3  aI@  
@@ -202,7 +202,7 @@ q    r    r.   s    S    s.   t    T
 t.   th   th.  v    w    x    z    


 Dictionary ta_dict  2012-06-06
 Dictionary ta_dict  2012-06-27

 a    a:   aI   aU   e    E    e:   i    
 I    i:   o    o:   u    U    u2   u:   
@@ -797,3 +797,15 @@ o    u    U    u#   Y
 k    l    m    n    N    p    q    r    
 s    S    t    tS   v    w    X    z    
 Z    


 Dictionary ga_dict  2012-06-27

 0    @    a    A:   e    E    e:   i    
 I    i:   o    O    o:   u    U    u1   
 u:   V    

 :    ;    b    c    C    d    d[   dZ   
 f    g    h    j    k    l    m    n    
 p    r    R    s    S    t    t[   tS   
 v    w    x    X    z    
--- a/dictsource/en_list
+++ b/dictsource/en_list
@@ -103,6 +103,7 @@ _hok	h'Uk
 _cap	k,ap@-t@L
 _?A	l,Et3
 _??	sImb@L
 _braille br'eIl
 _#9	tab
 _#32	speIs
 //…	_::d%0td,0t     // for elipsis or ...  while reading
--- a/dictsource/ta_list
+++ b/dictsource/ta_list
@@ -118,12 +118,17 @@ U+bf9	ru:ba:j
 (பி . ஏ)	bije: $dot
 (கி . மீ)	kilo:mi:t.t.Vr $dot
 aud	ostr'eIlIVn||d'olVz
 cny	tS'aIni:z||j'uVn
 eur	ju:ro:z
 gbp	br'itiS||p'aUndz
 inr	'IndIVn||r'u:ba:j
 inr	'IndIVn||r'u:pi:z
 jpy	dZ'a:pVni:z||j'en
 rub	r'VSVn||r'u:bVlz
 usd     j'ues||d'olVz




 // numbers
 _0	suz.ijVm    // சுழியம்
 _1	onRU
--- a/dictsource/ta_rules
+++ b/dictsource/ta_rules
@@ -502,6 +502,7 @@ _மன்மதக்) க (ுகை		g
 //endsort

 //sort
   ஷங்) க (ர		kV
 _கோபி) க		kV#
 _ஸ்டா) க		kV#
 _ஜமாய்) க		kV#
--- a/phsource/compile_report
+++ b/phsource/compile_report
@@ -79,7 +79,7 @@ consonants  10  116
      bo  10  152
      kk  20  117
      fa   9  110
      ga  24  124
      ga  25  125

 Data file      Used by
 b/b             [b] base
@@ -407,7 +407,7 @@ l/l_            [l] base
                [l/] fr
 l/l_@           [l/3] base
                [l/] fr
 l/l@            [hÑù] base
 l/l@            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
@@ -441,7 +441,7 @@ l/L2_oL         [l/2] base
 l/L2_uL         [l/2] base
 l/l_3           [l/] de
 l/l_4           [ll] sq
 l/la            [hÑù] base
 l/la            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
@@ -449,7 +449,7 @@ l/la            [h
                [K] tn
 l/l_a           [l/3] base
                [l/] fr
 l/le            [hÑù] base
 l/le            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
@@ -461,7 +461,7 @@ l/L_eL_af       [&] af
                [&:] af
 l/l_front       [L] sq
 l/l_front_      [l/4] sq
 l/li            [hÑù] base
 l/li            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
@@ -475,7 +475,7 @@ ll/ll           [L] bg
 ll/_ll          [L] bg
 l/l_long        [l] base
                [l] fr
 l/lo            [hÑù] base
 l/lo            [h–q] base
                [l#] base
                [l/2] fr
                [K] nso
@@ -485,7 +485,7 @@ l/l_o           [l/3] base
 l^/l_rfx        [l.] base
                [l] ru
                [l^] ru
 l/lu            [hÑù] base
 l/lu            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
@@ -2143,6 +2143,7 @@ vowel/u         [u] base
                [u] ak
                [u:] wo
                [u] bo
                [u1] ga
                [u:] ga
 vowel/u#        [u:] en-sc
                [Y] tr
--- a/src/dictionary.cpp
+++ b/src/dictionary.cpp
@@ -347,8 +347,8 @@ int HashDictionary(const char *string)



 char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
 /*********************************************************************/
 const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme)
 /***************************************************************************/
 /* Translate a phoneme string from ascii mnemonics to internal phoneme numbers,
   from 'p' up to next blank .
   Returns advanced 'p'
@@ -364,7 +364,8 @@ char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
 	int  consumed;
 	unsigned int  mnemonic_word;

 	bad_phoneme[0] = 0;
 	if(bad_phoneme != NULL)
 		bad_phoneme[0] = 0;

 	// skip initial blanks
 	while(isspace(*p))
@@ -424,8 +425,11 @@ char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
 			if(max_ph == 0)
 			{
 				// not recognised, report and ignore
 				bad_phoneme[0] = *p;
 				bad_phoneme[1] = 0;
 				if(bad_phoneme != NULL)
 				{
 					bad_phoneme[0] = *p;
 					bad_phoneme[1] = 0;
 				}
 				*outptr++ = 0;
 				return(p+1);
 			}
--- a/src/numbers.cpp
+++ b/src/numbers.cpp
@@ -590,12 +590,13 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
 	int n_bytes;
 	int letter;
 	int len;
 	int ix;
 	int save_option_phonemes;
 	char *p2;
 	char *pbuf;
 	char capital[20];
 	char ph_buf[60];
 	char ph_buf2[60];
 	char ph_buf[80];
 	char ph_buf2[80];
 	char hexbuf[6];

 	ph_buf[0] = 0;
@@ -646,22 +647,49 @@ int TranslateLetter(Translator *tr, char *word, char *phonemes, int control)
 	if(ph_buf[0] == 0)
 	{
 		// character name not found
 		if(iswalpha(letter))
 			Lookup(tr, "_?A", ph_buf);
 		if((letter >= 0x2800) && (letter <= 0x28ff))
 		{
 			// braille dots symbol
 			Lookup(tr, "_braille", ph_buf);
 			if(ph_buf[0] == 0)
 			{
 				EncodePhonemes("br'e:l", ph_buf, NULL);
 			}

 		if((ph_buf[0]==0) && !iswspace(letter))
 			Lookup(tr, "_??", ph_buf);
 			if(ph_buf[0] != 0)
 			{
 				pbuf = ph_buf + strlen(ph_buf);
 				for(ix=0; ix<8; ix++)
 				{
 					if(letter & (1 << ix))
 					{
 						*pbuf++ = phonPAUSE_VSHORT;
 						LookupLetter(tr, '1'+ix, 0, pbuf, 1);
 						pbuf += strlen(pbuf);
 					}
 				}
 			}
 		}

 		if(ph_buf[0] != 0)
 		if(ph_buf[0]== 0)
 		{
 			// speak the hexadecimal number of the character code
 			sprintf(hexbuf,"%x",letter);
 			pbuf = ph_buf;
 			for(p2 = hexbuf; *p2 != 0; p2++)
 			if(iswalpha(letter))
 				Lookup(tr, "_?A", ph_buf);

 			if((ph_buf[0]==0) && !iswspace(letter))
 				Lookup(tr, "_??", ph_buf);
 	
 			if(ph_buf[0] != 0)
 			{
 				pbuf += strlen(pbuf);
 				*pbuf++ = phonPAUSE_VSHORT;
 				LookupLetter(tr, *p2, 0, pbuf, 1);
 				// speak the hexadecimal number of the character code
 				sprintf(hexbuf,"%x",letter);
 				pbuf = ph_buf;
 				for(p2 = hexbuf; *p2 != 0; p2++)
 				{
 					pbuf += strlen(pbuf);
 					*pbuf++ = phonPAUSE_VSHORT;
 					LookupLetter(tr, *p2, 0, pbuf, 1);
 				}
 			}
 		}
 	}
@@ -1951,8 +1979,9 @@ int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *fla
 		return(0);  // speak digits individually

 	if(tr->langopts.numbers != 0)
 	{
 		return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));

 	}
 	return(0);
 }  // end of TranslateNumber

--- a/src/phoneme.h
+++ b/src/phoneme.h
@@ -161,7 +161,7 @@ typedef struct {
 int LookupPhonemeString(const char *string);
 int PhonemeCode(unsigned int mnem);

 char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme);
 const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme);
 void DecodePhonemes(const char *inptr, char *outptr);

 extern const char *WordToString(unsigned int word);
--- a/src/synthdata.cpp
+++ b/src/synthdata.cpp
@@ -35,7 +35,7 @@
 #include "translate.h"
 #include "wave.h"

 const char *version_string = "1.46.17  05.May.12";
 const char *version_string = "1.46.19  27.Jun.12";
 const int version_phdata  = 0x014600;

 int option_device_number = -1;
--- a/src/tr_languages.cpp
+++ b/src/tr_languages.cpp
@@ -308,22 +308,23 @@ static void SetCyrillicLetters(Translator *tr)
 void SetIndicLetters(Translator *tr)
 {//=================================
 	// Set letter types for Indic scripts, Devanagari, Tamill, etc
 	static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f};
 	static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x7b,0x7c,0x7e,0x7f,0};
 	static const char dev_vowels2[] = {0x60,0x61, 0x55,0x56,0x57,0x62,0x63,0};  // non-consecutive vowels and vowel-signs

 	memset(tr->letter_bits,0,sizeof(tr->letter_bits));
 	SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14);   // vowel letters
 	SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d);   // + vowel signs, and virama
 	SetLetterBitsRange(tr,LETTERGP_A,0x55,0x57);   // + vowel signs
 	SetLetterBits(tr,LETTERGP_A, dev_vowels2);     // + extra vowels and vowel signs

 	SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d);   // vowel signs, and virama
 	SetLetterBitsRange(tr,LETTERGP_B,0x55,0x57);   // + vowel signs
 	SetLetterBits(tr,LETTERGP_B, dev_vowels2);     // + extra vowels and vowel signs

 	SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39);   // the main consonant range
 	SetLetterBits(tr,LETTERGP_C,dev_consonants2);  // + additional consonants

 	SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14);   // vowel letters
 	SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c);   // + vowel signs
 	SetLetterBitsRange(tr,LETTERGP_Y,0x55,0x57);   // + vowel signs
 	SetLetterBits(tr,LETTERGP_Y, dev_vowels2);     // + extra vowels and vowel signs

 	tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1;   // disable check for unpronouncable words
 	tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d;   //virama
@@ -711,6 +712,12 @@ Translator *SelectTranslator(const char *name)
 			{
 				tr->letter_bits_offset = OFFSET_GURMUKHI;
 			}
 			if(name2 == L('n','e'))
 			{
 				tr->langopts.break_numbers = 0x2aaaa8;
 				tr->langopts.max_digits = 22;
 				tr->langopts.numbers2 |= NUM2_ENGLISH_NUMERALS;
 			}
 			SetIndicLetters(tr);
 		}
 		break;
--- a/src/translate.cpp
+++ b/src/translate.cpp
@@ -407,6 +407,9 @@ int IsAlpha(unsigned int c)
 	if((c >= 0x1100) && (c <= 0x11ff))
 		return(1);  //Korean jamo

 	if((c >= 0x2800) && (c <= 0x28ff))
 		return(1);  // braille

 	if((c > 0x3040) && (c <= 0xa700))
 		return(1); // Chinese/Japanese.  Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure

@@ -981,6 +984,13 @@ if((wmark > 0) && (wmark < 8))
 			if(word_phonemes[0] == phonSWITCH)
 				return(0);

 			if((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED))
 			{
 				// for this language, speak English numerals (0-9) with the English voice
 				sprintf(word_phonemes,"%c",phonSWITCH);
 				return(0);
 			}

 			found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0);
 		}

@@ -2223,7 +2233,7 @@ static int EmbeddedCommand(unsigned int &source_index)



 static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert)
 static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
 {//=========================================================================================
 	int ix;
 	unsigned int word;
@@ -2285,12 +2295,14 @@ static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in,

 	if(upper_case)
 		new_c = towupper(new_c);

 	*wordflags |= FLAG_CHAR_REPLACED;
 	return(new_c);

 }


 static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
 static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
 {//================================================================================================================
 	// To allow language specific examination and replacement of characters

@@ -2368,7 +2380,7 @@ static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c,
 		}
 		break;
 	}
 	return(SubstituteChar(tr,c,next_in,insert));
 	return(SubstituteChar(tr, c, next_in, insert, wordflags));
 }


@@ -2680,7 +2692,7 @@ if((c == '/') && (tr->langopts.testing & 2) && IsDigit09(next_in) && IsAlpha(pre
 				word_flags |= FLAG_COMMA_AFTER;
 			}

 			c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted);  // optional language specific function
 			c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted, &word_flags);  // optional language specific function
 			if(c == 8)
 				continue;  // ignore this character

--- a/src/translate.h
+++ b/src/translate.h
@@ -110,6 +110,7 @@
 #define FLAG_MULTIPLE_SPACES 0x40000  // word is preceded by multiple spaces, newline, or tab
 #define FLAG_INDIVIDUAL_DIGITS 0x80000  // speak number as individual digits
 #define FLAG_DELETE_WORD     0x100000   // don't speak this word, it has been spoken as part of the previous word
 #define FLAG_CHAR_REPLACED   0x200000   // characters have been replaced by .replace in the *_rules 

 #define FLAG_SUFFIX_VOWEL  0x08000000   // remember an initial vowel from the suffix
 #define FLAG_NO_TRACE      0x10000000   // passed to TranslateRules() to suppress dictionary lookup printout
@@ -444,12 +445,14 @@ typedef struct {
 	int numbers;

 #define NUM2_MULTIPLE_ORDINAL   0x1000
 #define NUM2_ENGLISH_NUMERALS   0x2000
 	// bits 1-4  use variant form of numbers before thousands,millions,etc.
 	// bit6=(LANG=pl) two forms of plural, M or MA
 	// bit7=(LANG-ru) use MB for 1 thousand, million, etc
 	// bit8=(LANG=cs,sk) two forms of plural, M or MA
 	// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
 	// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
 	// bit13=(LANG=ne)  speak (non-replaced) English numerals in English
 	int numbers2;

 #define BREAK_THOUSANDS   0x49249248