Speak names of characters U+2800 to U+28FF, Braille dot symbols. Language option to speak numbers which use characters '0' to '9' with the English voice. Add some more characters to the vowel and consonant lists for Indian languages. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@284 d46cf337-b52f-0410-862d-fd96e6ae7743

13 years ago · 084584ee16
--- a/dictsource/de_list
+++ b/dictsource/de_list
 oboe            o:b'o:@
 orgie            $alt
 ok              o:k'e:
 ödem		Y:dEm
 paradies         $3
 passage         pas'aZ@
--- a/dictsource/dict_phonemes
+++ b/dictsource/dict_phonemes
 ts   v    x    z    
 Dictionary en_dict  2012-04-07
 Dictionary en_dict  2012-06-15
 0    3    3:   @    @-   @2   @5   @L   
 a    a#   A:   A@   aa   aI   aI3  aI@  
 t.   th   th.  v    w    x    z    
 Dictionary ta_dict  2012-06-06
 Dictionary ta_dict  2012-06-27
 a    a:   aI   aU   e    E    e:   i    
 I    i:   o    o:   u    U    u2   u:   
 k    l    m    n    N    p    q    r    
 s    S    t    tS   v    w    X    z    
 Z    
 Dictionary ga_dict  2012-06-27
 0    @    a    A:   e    E    e:   i    
 I    i:   o    O    o:   u    U    u1   
 u:   V    
 :    ;    b    c    C    d    d[   dZ   
 f    g    h    j    k    l    m    n    
 p    r    R    s    S    t    t[   tS   
 v    w    x    X    z    
--- a/dictsource/en_list
+++ b/dictsource/en_list
 _cap	k,ap@-t@L
 _?A	l,Et3
 _??	sImb@L
 _braille br'eIl
 _#9	tab
 _#32	speIs
 //…	_::d%0td,0t     // for elipsis or ...  while reading
--- a/dictsource/ta_list
+++ b/dictsource/ta_list
 (பி . ஏ)	bije: $dot
 (கி . மீ)	kilo:mi:t.t.Vr $dot
 aud	ostr'eIlIVn||d'olVz
 cny	tS'aIni:z||j'uVn
 eur	ju:ro:z
 gbp	br'itiS||p'aUndz
 inr	'IndIVn||r'u:ba:j
 inr	'IndIVn||r'u:pi:z
 jpy	dZ'a:pVni:z||j'en
 rub	r'VSVn||r'u:bVlz
 usd     j'ues||d'olVz
 // numbers
 _0	suz.ijVm    // சுழியம்
 _1	onRU
--- a/dictsource/ta_rules
+++ b/dictsource/ta_rules
 //endsort
 //sort
   ஷங்) க (ர		kV
 _கோபி) க		kV#
 _ஸ்டா) க		kV#
 _ஜமாய்) க		kV#
--- a/phsource/compile_report
+++ b/phsource/compile_report
      bo  10  152
      kk  20  117
      fa   9  110
      ga  24  124
      ga  25  125
 Data file      Used by
 b/b             [b] base
                [l/] fr
 l/l_@           [l/3] base
                [l/] fr
 l/l@            [hÑù] base
 l/l@            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
 l/L2_uL         [l/2] base
 l/l_3           [l/] de
 l/l_4           [ll] sq
 l/la            [hÑù] base
 l/la            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
                [K] tn
 l/l_a           [l/3] base
                [l/] fr
 l/le            [hÑù] base
 l/le            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
                [&:] af
 l/l_front       [L] sq
 l/l_front_      [l/4] sq
 l/li            [hÑù] base
 l/li            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
 ll/_ll          [L] bg
 l/l_long        [l] base
                [l] fr
 l/lo            [hÑù] base
 l/lo            [h–q] base
                [l#] base
                [l/2] fr
                [K] nso
 l^/l_rfx        [l.] base
                [l] ru
                [l^] ru
 l/lu            [hÑù] base
 l/lu            [h–q] base
                [l#] base
                [l] fr
                [l/2] fr
                [u] ak
                [u:] wo
                [u] bo
                [u1] ga
                [u:] ga
 vowel/u#        [u:] en-sc
                [Y] tr
--- a/src/dictionary.cpp
+++ b/src/dictionary.cpp
 char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme)
 /*********************************************************************/
 const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme)
 /***************************************************************************/
 /* Translate a phoneme string from ascii mnemonics to internal phoneme numbers,
   from 'p' up to next blank .
   Returns advanced 'p'
 	int  consumed;
 	unsigned int  mnemonic_word;
 	bad_phoneme[0] = 0;
 	if(bad_phoneme != NULL)
 		bad_phoneme[0] = 0;
 	// skip initial blanks
 	while(isspace(*p))
 			if(max_ph == 0)
 			{
 				// not recognised, report and ignore
 				bad_phoneme[0] = *p;
 				bad_phoneme[1] = 0;
 				if(bad_phoneme != NULL)
 				{
 					bad_phoneme[0] = *p;
 					bad_phoneme[1] = 0;
 				}
 				*outptr++ = 0;
 				return(p+1);
 			}
--- a/src/numbers.cpp
+++ b/src/numbers.cpp
 	int n_bytes;
 	int letter;
 	int len;
 	int ix;
 	int save_option_phonemes;
 	char *p2;
 	char *pbuf;
 	char capital[20];
 	char ph_buf[60];
 	char ph_buf2[60];
 	char ph_buf[80];
 	char ph_buf2[80];
 	char hexbuf[6];
 	ph_buf[0] = 0;
 	if(ph_buf[0] == 0)
 	{
 		// character name not found
 		if(iswalpha(letter))
 			Lookup(tr, "_?A", ph_buf);
 		if((letter >= 0x2800) && (letter <= 0x28ff))
 		{
 			// braille dots symbol
 			Lookup(tr, "_braille", ph_buf);
 			if(ph_buf[0] == 0)
 			{
 				EncodePhonemes("br'e:l", ph_buf, NULL);
 			}
 		if((ph_buf[0]==0) && !iswspace(letter))
 			Lookup(tr, "_??", ph_buf);
 			if(ph_buf[0] != 0)
 			{
 				pbuf = ph_buf + strlen(ph_buf);
 				for(ix=0; ix<8; ix++)
 				{
 					if(letter & (1 << ix))
 					{
 						*pbuf++ = phonPAUSE_VSHORT;
 						LookupLetter(tr, '1'+ix, 0, pbuf, 1);
 						pbuf += strlen(pbuf);
 					}
 				}
 			}
 		}
 		if(ph_buf[0] != 0)
 		if(ph_buf[0]== 0)
 		{
 			// speak the hexadecimal number of the character code
 			sprintf(hexbuf,"%x",letter);
 			pbuf = ph_buf;
 			for(p2 = hexbuf; *p2 != 0; p2++)
 			if(iswalpha(letter))
 				Lookup(tr, "_?A", ph_buf);
 			if((ph_buf[0]==0) && !iswspace(letter))
 				Lookup(tr, "_??", ph_buf);
 			if(ph_buf[0] != 0)
 			{
 				pbuf += strlen(pbuf);
 				*pbuf++ = phonPAUSE_VSHORT;
 				LookupLetter(tr, *p2, 0, pbuf, 1);
 				// speak the hexadecimal number of the character code
 				sprintf(hexbuf,"%x",letter);
 				pbuf = ph_buf;
 				for(p2 = hexbuf; *p2 != 0; p2++)
 				{
 					pbuf += strlen(pbuf);
 					*pbuf++ = phonPAUSE_VSHORT;
 					LookupLetter(tr, *p2, 0, pbuf, 1);
 				}
 			}
 		}
 	}
 		return(0);  // speak digits individually
 	if(tr->langopts.numbers != 0)
 	{
 		return(TranslateNumber_1(tr, word1, ph_out, flags, wtab, control));
 	}
 	return(0);
 }  // end of TranslateNumber
--- a/src/phoneme.h
+++ b/src/phoneme.h
 int LookupPhonemeString(const char *string);
 int PhonemeCode(unsigned int mnem);
 char *EncodePhonemes(char *p, char *outptr, unsigned char *bad_phoneme);
 const char *EncodePhonemes(const char *p, char *outptr, unsigned char *bad_phoneme);
 void DecodePhonemes(const char *inptr, char *outptr);
 extern const char *WordToString(unsigned int word);
--- a/src/synthdata.cpp
+++ b/src/synthdata.cpp
 #include "translate.h"
 #include "wave.h"
 const char *version_string = "1.46.17  05.May.12";
 const char *version_string = "1.46.19  27.Jun.12";
 const int version_phdata  = 0x014600;
 int option_device_number = -1;
--- a/src/tr_languages.cpp
+++ b/src/tr_languages.cpp
 void SetIndicLetters(Translator *tr)
 {//=================================
 	// Set letter types for Indic scripts, Devanagari, Tamill, etc
 	static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f};
 	static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,0x7b,0x7c,0x7e,0x7f,0};
 	static const char dev_vowels2[] = {0x60,0x61, 0x55,0x56,0x57,0x62,0x63,0};  // non-consecutive vowels and vowel-signs
 	memset(tr->letter_bits,0,sizeof(tr->letter_bits));
 	SetLetterBitsRange(tr,LETTERGP_A,0x04,0x14);   // vowel letters
 	SetLetterBitsRange(tr,LETTERGP_A,0x3e,0x4d);   // + vowel signs, and virama
 	SetLetterBitsRange(tr,LETTERGP_A,0x55,0x57);   // + vowel signs
 	SetLetterBits(tr,LETTERGP_A, dev_vowels2);     // + extra vowels and vowel signs
 	SetLetterBitsRange(tr,LETTERGP_B,0x3e,0x4d);   // vowel signs, and virama
 	SetLetterBitsRange(tr,LETTERGP_B,0x55,0x57);   // + vowel signs
 	SetLetterBits(tr,LETTERGP_B, dev_vowels2);     // + extra vowels and vowel signs
 	SetLetterBitsRange(tr,LETTERGP_C,0x15,0x39);   // the main consonant range
 	SetLetterBits(tr,LETTERGP_C,dev_consonants2);  // + additional consonants
 	SetLetterBitsRange(tr,LETTERGP_Y,0x04,0x14);   // vowel letters
 	SetLetterBitsRange(tr,LETTERGP_Y,0x3e,0x4c);   // + vowel signs
 	SetLetterBitsRange(tr,LETTERGP_Y,0x55,0x57);   // + vowel signs
 	SetLetterBits(tr,LETTERGP_Y, dev_vowels2);     // + extra vowels and vowel signs
 	tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1;   // disable check for unpronouncable words
 	tr->langopts.suffix_add_e = tr->letter_bits_offset + 0x4d;   //virama
 			{
 				tr->letter_bits_offset = OFFSET_GURMUKHI;
 			}
 			if(name2 == L('n','e'))
 			{
 				tr->langopts.break_numbers = 0x2aaaa8;
 				tr->langopts.max_digits = 22;
 				tr->langopts.numbers2 |= NUM2_ENGLISH_NUMERALS;
 			}
 			SetIndicLetters(tr);
 		}
 		break;
--- a/src/translate.cpp
+++ b/src/translate.cpp
 	if((c >= 0x1100) && (c <= 0x11ff))
 		return(1);  //Korean jamo
 	if((c >= 0x2800) && (c <= 0x28ff))
 		return(1);  // braille
 	if((c > 0x3040) && (c <= 0xa700))
 		return(1); // Chinese/Japanese.  Should never get here, but Mac OS 10.4's iswalpha seems to be broken, so just make sure
 			if(word_phonemes[0] == phonSWITCH)
 				return(0);
 			if((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED))
 			{
 				// for this language, speak English numerals (0-9) with the English voice
 				sprintf(word_phonemes,"%c",phonSWITCH);
 				return(0);
 			}
 			found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0);
 		}
 static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert)
 static int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
 {//=========================================================================================
 	int ix;
 	unsigned int word;
 	if(upper_case)
 		new_c = towupper(new_c);
 	*wordflags |= FLAG_CHAR_REPLACED;
 	return(new_c);
 }
 static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert)
 static int TranslateChar(Translator *tr, char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert, int *wordflags)
 {//================================================================================================================
 	// To allow language specific examination and replacement of characters
 		}
 		break;
 	}
 	return(SubstituteChar(tr,c,next_in,insert));
 	return(SubstituteChar(tr, c, next_in, insert, wordflags));
 }
 				word_flags |= FLAG_COMMA_AFTER;
 			}
 			c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted);  // optional language specific function
 			c = TranslateChar(tr, &source[source_index], prev_in,c, next_in, &char_inserted, &word_flags);  // optional language specific function
 			if(c == 8)
 				continue;  // ignore this character
--- a/src/translate.h
+++ b/src/translate.h
 #define FLAG_MULTIPLE_SPACES 0x40000  // word is preceded by multiple spaces, newline, or tab
 #define FLAG_INDIVIDUAL_DIGITS 0x80000  // speak number as individual digits
 #define FLAG_DELETE_WORD     0x100000   // don't speak this word, it has been spoken as part of the previous word
 #define FLAG_CHAR_REPLACED   0x200000   // characters have been replaced by .replace in the *_rules 
 #define FLAG_SUFFIX_VOWEL  0x08000000   // remember an initial vowel from the suffix
 #define FLAG_NO_TRACE      0x10000000   // passed to TranslateRules() to suppress dictionary lookup printout
 	int numbers;
 #define NUM2_MULTIPLE_ORDINAL   0x1000
 #define NUM2_ENGLISH_NUMERALS   0x2000
 	// bits 1-4  use variant form of numbers before thousands,millions,etc.
 	// bit6=(LANG=pl) two forms of plural, M or MA
 	// bit7=(LANG-ru) use MB for 1 thousand, million, etc
 	// bit8=(LANG=cs,sk) two forms of plural, M or MA
 	// bit9=(LANG=rw) say "thousand" and "million" before its number, not after
 	// bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
 	// bit13=(LANG=ne)  speak (non-replaced) English numerals in English
 	int numbers2;
 #define BREAK_THOUSANDS   0x49249248