lang=pt Reduce phoneme [&~] to [&] in unstressed syllables. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@30 d46cf337-b52f-0410-862d-fd96e6ae7743

18 years ago · 85cb0ec378
--- a/dictsource/de_rules
+++ b/dictsource/de_rules
          &) schaft (_S6  Saft
             schein (lich S'aIn
             schwer       Sve:*
          _) selbst (@@P6 z'Elpst
             shop         _^_EN
          _) sky          _^_EN
             soldat       z%OldA:t
--- a/dictsource/dict_phonemes
+++ b/dictsource/dict_phonemes
 *    :    ;    b    C    d    dZ   f    
 g    h    j    k    l    l^   m    n    
 N    n^   p    Q    r    R    s    S    
 s#   s;   t    T    tS   ts   v    w    
 x    z    Z    
 N    n^   p    Q    r    R    r-   s    
 S    s#   s;   t    T    tS   ts   v    
 w    x    z    Z    
 Dictionary ru_dict
--- a/dictsource/hu_list
+++ b/dictsource/hu_list
 ill	illEdvE  $dot
 stb	SAtYb:i  $dot
 vö	vEZd _!'Yss2E
 vö	v'EZd||_'Yss2E
 pl	pe:lda:ul  $dot
 mta	$abbrev
 a.m	An^n^i||m'int  $dot
 b	be:
 c	tse:
 d	de:
 f	Ef
 f	Eff
 g	ge:
 h	ha:
 j	je:
 k	ka:
 l	El
 m	Em
 n	En
 l	Ell
 m	Emm
 n	Enn
 p	pe:
 q	ku
 r	ER
 r	ERR
 s	S_
 s	ES  $atend
 _s	ES
--- a/dictsource/is_rules
+++ b/dictsource/is_rules
 // Spelling-to-phoneme rules for Icelandic
 // This file is UTF8 encoded.
 // letter group L08  voiceless consonants
 // letter group B  voiceless consonants c,f,h,k,p,t,x,þ
 // letter group F  p,t,k,s
 // letter group H  j,r,v,
 .group ð
        ð        D
        ð (L08   T
        ð (B     T
        ð (__    T    // before pause
 .group l
        l        l
        l (_     l#
        l (L08X  l#
        l (BX    l#
        ll       dl
        ll (L08  tl#
        ll (B8   tl#
        ll (_    tl#  // ??
 .group m
        m        m
        mm (K    m
   L08) m        hm#
        m (L08   m#
     B) m        hm#
        m (B     m#
 .group n
        n        n
   L08) n        hn#
        n (L08   n#
     B) n        hn#
        n (B     n#
        n (g     N
        ng (_    Ng
        ngt (K   N#d
 .group p
     _) p        p
        p        p
   L08) p        b
     B) p        b
     s) p        b
        p (K     b
        p (s     f
     _) r        R2
     C) r        @-*
     A) r (A     R
   L08) r        r#
        r (L08   r#
     B) r        r#
        r (B     r#
        r (_     r#
        r        R
        rl       dl
 .group t
     _) t        t
        t        t
   L08) t        d
     B) t        d
     s) t        d
        t (K     d
        tt       hd
--- a/dictsource/pt_rules
+++ b/dictsource/pt_rules
          a (r_      'a
 ?1     p) a (d_      =E
          a (CC_     &
 ?1        al (_      'Al // eg: Portugal, capital, etc.
 ?1        a (lK      ,A   // Algarve, alto, etc...
 ?1        a (l_      'A       // eg: Portugal, capital, etc.
 ?1        a (lK      ,A       // Algarve, Almerinda, etc...
 ?2        al (K      aU
 ?2        alh        alj
          e (cem_    E
          e (ces_    E
 e (stA_    E
          e (stA_    E
          e (stAm_   E
          e (stAs_   E
        e (strA_        E
          e (strA_        E
          e (rnA_    E
          e (rnAm_    E
          e (xA_     E
          e (xAm_    E
          e (xAs_    E
 ?2        _n) e (t        E
 ?2    _n) e (t       E
          em (C      eIm
          en (K      eIN
       l) r          x
       n) r          x
       s) r          x
 ?1     A) r (_       r // this letter is imperfect.
 ?1     A) r (_       r-       // [r-] is English linking-r
 .group s
--- a/dictsource/ru_rules
+++ b/dictsource/ru_rules
 // G  voiced: б в г д ж з
 // H  hard consonant:  ъ ж ц ш
 // Y  iotated vowel, softsign:  ь е ё и ю я
 // L08 кпстфх
 // L09 бгджзлмнр
 .group а
--- a/phsource/compile_report
+++ b/phsource/compile_report
      ru  36  123
      it  25  114
      es   6  114
      pt  34  137
   pt_pt  20  137
      pt  28  131
   pt_pt  20  131
      ro  36  138
      el   8  114
      sv  25  117
  2  vowel/@_4      nl hr
  3  vowel/8_2      en_us hr sv
  1  vowel/8_3      zh_yue
 12  vowel/a        en_n cy de hu nl pl sk hr pt
 11  vowel/a        en_n cy de hu nl pl sk hr
  4  vowel/a#       en_sc it pt
  6  vowel/a_2      eo it pt pt_pt ro vi
  5  vowel/a#_2     hr pt sv is sw
  7  vowel/a_3      en_sc cs pt is
  4  vowel/a#_2     hr sv is sw
  6  vowel/a_3      en_sc cs is
 12  vowel/a#_3     en en_n en_us en_wm de hi ru pt_pt vi zh_yue
  4  vowel/a_4      en_wm el vi
  3  vowel/a_5      pt sv sw
  2  vowel/a_5      sv sw
  7  vowel/aa       en_us fi fr_ca no zh_yue
  1  vowel/aa#      fi
  3  vowel/aa_2     en cy
  2  vowel/aa_4     sv vi
  2  vowel/aa_5     en_n
  1  vowel/aa_6     de
  2  vowel/aa_7     nl pt
  1  vowel/aa_7     nl
  4  vowel/a_en     en fr
  1  vowel/@_bck    hi
 13  vowel/e        en en_n af cy eo fr hu hr it pt pt_pt vi
  2  vowel/V        en en_sc
  3  vowel/V_2      af ru
  3  vowel/V_3      en_rp hi vi
  2  vowel/V_4      en_sc pt
  1  vowel/V_4      en_sc
  6  vowel/y        en de fi hu nl zh_yue
 10  vowel/y#       en en_wm de fi fr hu nl ro vi zh_yue
  1  vowel/y##      is
--- a/phsource/ph_af
+++ b/phsource/ph_af
 phoneme r
  liquid
  liquid  starttype #r endtype #r
  length 60
  vowelin  f1=2  f2=2700 -300 -200  f3=-1300 80  
  vowelout f1=2  f2=1700 -300 -200  f3=-1300 80
--- a/phsource/ph_dutch
+++ b/phsource/ph_dutch
 phoneme r    // from Afrikaans
  liquid
  liquid  starttype #r endtype #r
  length 60
  vowelin  f1=2  f2=2700 -300 -200  f3=-1300 80  
  vowelout f1=2  f2=1700 -300 -200  f3=-1300 80
--- a/phsource/ph_pt_brazil
+++ b/phsource/ph_pt_brazil
  vowel  starttype (@) endtype (@)
  length 200
  formants vnasal/a#_n
  reduceto &  4
 endphoneme
 phoneme &U~
 endphoneme
 phoneme A            // for "al" in European Portuguese
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/a_3
 endphoneme
 phoneme A2            // for "al" in European Portuguese
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/aa_7
 endphoneme
 phoneme A3            // for "al" in European Portuguese
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/a_5
 endphoneme
 phoneme A4            // for "al" in European Portuguese
 phoneme A            // for "al" in European Portuguese, don't reduce to [&]
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/a_2
 endphoneme
 phoneme A5            // for "al" in European Portuguese
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/a
 endphoneme
 phoneme A6            // for "al" in European Portuguese
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/V_4
 endphoneme
 phoneme A7            // for "al" in European Portuguese
  vowel starttype (a) endtype (a)
  length 200
  formants vowel/a#_2
 endphoneme
 phoneme E
--- a/phsource/phonemes
+++ b/phsource/phonemes
 endphoneme
 phoneme R3  // Afrikaans
  liquid
  liquid  starttype #r endtype #r
  length 60
  vowelin  f1=2  f2=2700 -300 -200  f3=-1300 80  
  vowelout f1=2  f2=1700 -300 -200  f3=-1300 80
--- a/phsource/vowel/a_3
+++ b/phsource/vowel/a_3
--- a/phsource/vowel/e#
+++ b/phsource/vowel/e#
--- a/phsource/vowel/oo_2
+++ b/phsource/vowel/oo_2
--- a/src/compiledata.cpp
+++ b/src/compiledata.cpp
 	{
 		item_string[ix++] = c;
 		c = fgetc(f_in);
 		if(feof(f_in))
 			break;
 		if(item_string[ix-1] == '=')
 			break;
 	}
 	ungetc(c,f_in);
 	item_string[ix] = 0;
 	if(feof(f_in)) return(-1);
 	if(!feof(f_in))
 		ungetc(c,f_in);
 	keyword = -1;
--- a/src/compiledict.cpp
+++ b/src/compiledict.cpp
 					c = *p++ - '0';
 					value = *p++ - '0';
 					c = c * 10 + value;
 					if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_TYPES))
 					if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_GROUPS))
 					{
 						c = 0;
 						fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum);
 					{
 						// pre-rule, put the group number before the RULE_LETTERGP command
 						output[ix++] = c;
 						c = RULE_LETTERGP;
 						c = RULE_LETTERGP2;
 					}
 					else
 					{
 						output[ix++] = RULE_LETTERGP;
 						output[ix++] = RULE_LETTERGP2;
 					}
 					break;
 int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
 {//=============================================================
 static int compile_lettergroup(char *input, FILE *f_out)
 {//=====================================================
 	char *p;
 	int group;
 	p = input;
 	if(!isdigit(p[0]) || !isdigit(p[1]))
 	{
 		return(1);
 	}
 	group = atoi(&p[1]);
 	if(group >= N_LETTER_GROUPS)
 		return(1);
 	while(!isspace2(*p)) p++;
 	fputc(RULE_GROUP_START,f_out);
 	fputc(RULE_LETTERGP2,f_out);
 	fputc(group + 'A', f_out);
 	for(;;)
 	{
 		while(isspace2(*p)) p++;
 		if(*p == 0)
 			break;
 		while((*p & 0xff) > ' ')
 		{
 			fputc(*p++, f_out);
 		}
 		fputc(0,f_out);
 	}
 	fputc(RULE_GROUP_END,f_out);
 	return(0);
 }
 static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp)
 {//====================================================================
 	char *prule;
 	unsigned char *p;
 	int ix;
 		buf = fgets(buf1,sizeof(buf1),f_in);
 		if((buf != NULL) && (buf[0] == '\r')) buf++;  // ignore extra \r in \r\n 
 		if((buf != NULL) && (memcmp(buf,".L",2)==0))
 		{
 			if(compile_lettergroup(&buf[2], f_out) != 0)
 			{
 				fprintf(f_log,"%5d: Bad lettergroup\n",linenum);
 				error_count++;
 			}
 			continue;
 		}
 		if((buf == NULL) || (memcmp(buf,".group",6)==0))
 		{
 			// next .group or end of file, write out the previous group
--- a/src/dictionary.cpp
+++ b/src/dictionary.cpp
 		groups2_count[ix]=0;
 		groups2_start[ix]=255;  // indicates "not set"
 	}
 	memset(letterGroups,0,sizeof(letterGroups));
 	p = data_dictrules;
 	while(*p != 0)
 			break;
 		}
 		p++;
 		len = strlen(p);
 		p_name = p;
 		c = p_name[0];
 		p += (len+1);
 		if(len == 1)
 		if(p[0] == RULE_LETTERGP2)
 		{
 			groups1[c] = p;
 			ix = p[1] - 'A';
 			p += 2;
 			if((ix >= 0) && (ix < N_LETTER_GROUPS))
 			{
 				letterGroups[ix] = p;
 			}
 		}
 		else
 		if(len == 0)
 		{
 			groups1[0] = p;
 			len = strlen(p);
 			p_name = p;
 			c = p_name[0];
 			p += (len+1);
 			if(len == 1)
 			{
 				groups1[c] = p;
 			}
 			else
 			if(len == 0)
 			{
 				groups1[0] = p;
 			}
 			else
 			{
 				if(groups2_start[c] == 255)
 					groups2_start[c] = n_groups2;
 				groups2_count[c]++;
 				groups2[n_groups2] = p;
 				c2 = p_name[1];
 				groups2_name[n_groups2++] = (c + (c2 << 8));
 			}
 		}
 		else
 		{
 			if(groups2_start[c] == 255)
 				groups2_start[c] = n_groups2;
 			groups2_count[c]++;
 			groups2[n_groups2] = p;
 			c2 = p_name[1];
 			groups2_name[n_groups2++] = (c + (c2 << 8));
 		}
 		// skip over all the rules in this group
 		rule_count = 0;
 		while(*p != RULE_GROUP_END)
 int Translator::IsLetterGroup(char *word, int group)
 {//=================================================
 	// match the word against a list of utf-8 strings
 	char *p;
 	char *w;
 	p = letterGroups[group];
 	while(*p != 0)
 	{
 		w = word;
 		while(*p == *w)
 		{
 			*w++;
 			*p++;
 		}
 		if(*p == 0)
 			return(w-word);   // matched a complete string
 		while(*p++ != 0);  // skip to end of string
 	}
 	return(0);
 }
 int Translator::IsLetter(int letter, int group)
 {//============================================
 	if(letter_groups[group] != NULL)
 	static char output[60];
 	static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',
 			'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'};
 			'@','&','%','+','#','S','D','Z','A','L',' ',' ',' ',' ',' ','N','K','V',' ','T','X','?','W'};
 	static char symbols_lg[] = {'A','B','C','H','F','G','Y'};
 	match_type = 0;
   buf_pre[0] = 0;
 		}
 		else
 		if(rb == RULE_LETTERGP)
 		{
 			c = symbols_lg[*rule++ - 'A'];
 		}
 		else
 		if(rb == RULE_LETTERGP2)
 		{
 			value = *rule++ - 'A';
 			if(value >= 8)
 			{
 				p[0] = 'L';
 				p[1] = (value / 10) + '0';
 				c = (value % 10) + '0';
 			p[0] = 'L';
 			p[1] = (value / 10) + '0';
 			c = (value % 10) + '0';
 				if(match_type == RULE_PRE)
 				{
 					p[0] = c;
 					c = 'L';
 				}
 				p+=2;
 			}
 			else
 			if(match_type == RULE_PRE)
 			{
 				c = symbols[value + RULE_LETTER_GROUPS];
 				p[0] = c;
 				c = 'L';
 			}
 			p+=2;
 		}
 		else
 		if(rb <= RULE_LAST_RULE)
 	int  distance_right;
 	int  distance_left;
 	int  lg_pts;
 	int  n_bytes;
 	MatchRecord match;
 	static MatchRecord best;
 	if(rule == NULL)
 	{
 		match_out->points = 0;
 		(*word)++;
 		return;
 	}
 						failed = 1;
 					break;
 				case RULE_LETTERGP2:   // match against a list of utf-t strings
 					letter_group = *rule++ - 'A';
 					if((n_bytes = IsLetterGroup(post_ptr-1,letter_group)) >0)
 					{
 						match.points += (20-distance_right);
 						post_ptr += (n_bytes-1);
 					}
 					else
 						failed =1;
 					break;
 				case RULE_NOTVOWEL:
 					if(!IsLetter(letter_w,0))
 					{
--- a/src/phonemelist.cpp
+++ b/src/phonemelist.cpp
 }
 #endif
 		if((ph->reduce_to != 0) && (ph->type != phVOWEL) && !(plist2->synthflags & SFLAG_DICTIONARY))
 		{
 			// reduction for vowels has already been done in SetWordStress
 			int reduce_level;
 			if(next->type == phVOWEL)
 			{
 				reduce_level = (ph->phflags >> 28) & 7;
 				if((&plist2[1])->stress < reduce_level)
 				{
 					// look at the stress of the following vowel
 					ph = phoneme_tab[ph->reduce_to];
 				}
 			}
 		}
 		if((plist2+1)->synthflags & SFLAG_LENGTHEN)
 		{
 			static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0};
--- a/src/synthdata.cpp
+++ b/src/synthdata.cpp
 #include "translate.h"
 #include "wave.h"
 const char *version_string = "1.25.13  30.May.07";
 const int version_phdata  = 0x012501;
 const char *version_string = "1.25.14  31.May.07";
 const int version_phdata  = 0x012514;
 int option_device_number = -1;
--- a/src/tr_languages.cpp
+++ b/src/tr_languages.cpp
 		{
 			static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 };
 			static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250};
 			static const wchar_t is_L08[] = {'c','f','h','k','p','t','x',0xfe,0};  // voiceless conants, including 'þ'  ?? 's'
 			static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0};  // voiceless conants, including 'þ'  ?? 's'
 			tr = new Translator();
 			SetupTranslator(tr,stress_lengths_is,stress_amps_is);
 			ResetLetterBits(tr,0x18);
 			SetLetterBits(tr,4,"kpst");   // Letter group F
 			SetLetterBits(tr,3,"jvr");    // Letter group H
 			tr->letter_groups[8] = is_L08;
 			tr->letter_groups[1] = is_lettergroup_B;
 			SetLetterVowel(tr,'y');
 			tr->langopts.numbers = 0xe9;
 			tr->langopts.numbers2 = 0x2;
 	static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0};    // letter group G  (voiced obstruents)
 	static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0};   // letter group Y  (iotated vowels & soft-sign)
 	// these are unicode character codes
 	static const wchar_t ru_L08[] = {0x43a,0x43f,0x441,0x442,0x444,0x445,0};  // кпстфх
 	static const wchar_t ru_L09[] = {0x431,0x433,0x434,0x436,0x43b,0x43c,0x43d,0x440,0};  // бгджзлмнр
 	SetupTranslator(this,stress_lengths_ru,stress_amps_ru);
 	charset_a0 = charsets[18];   // KOI8-R
 	SetLetterBits(this,6,ru_ivowels);
 	SetLetterBits(this,7,ru_vowels);
 	letter_groups[8] = ru_L08;        // This is  L08  in ru_rules
 	letter_groups[9] = ru_L09;        // This is  L09  in ru_rules
 	langopts.param[LOPT_UNPRONOUNCABLE] = 0x432;    // [v]  don't count this character at start of word
 	langopts.param[LOPT_REGRESSIVE_VOICING] = 1;
 	langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1;
--- a/src/translate.h
+++ b/src/translate.h
 #define N_RULE_GROUP2    120          // max num of two-letter rule chains
 #define N_HASH_DICT     1024
 #define N_CHARSETS        20
 #define N_LETTER_GROUPS   20
 /* flags from word dictionary */
 // bits 0-3  stressed syllable,  7=unstressed
 #define RULE_ENDING		14
 #define RULE_DIGIT		15   // D digit
 #define RULE_NONALPHA	16   // Z non-alpha
 #define RULE_LETTER_GROUPS 17  // 17 to 23
 #define RULE_LETTER1		17   // A vowels
 #define RULE_LETTER2		18   // B 'hard' consonants 
 #define RULE_LETTER3		19   // C all consonants
 #define RULE_LETTER4		20   // H letter group
 #define RULE_LETTER5    21   // F letter group
 #define RULE_LETTER6		22   // G letter group
 #define RULE_LETTER7    23   // Y letter group
 #define RULE_LETTERGP   17   // A B C H F G Y   letter group number
 #define RULE_LETTERGP2  18   // L + letter group number
 #define RULE_NO_SUFFIX  24   // N
 #define RULE_NOTVOWEL   25   // K
 #define RULE_IFVERB     26   // V
 #define RULE_LETTERGP   27   // L + letter group number
 #define RULE_ALT1       28   // T word has $alt attribute
 #define RULE_NOVOWELS   29   // X no vowels up to word boundary
 #define RULE_SPELLING   31   // W while spelling letter-by-letter
 // holds properties of characters: vowel, consonant, etc for pronunciation rules
 	unsigned char letter_bits[256];
 	int letter_bits_offset;
 #define N_LETTER_TYPES 20
 	const wchar_t *letter_groups[N_LETTER_TYPES];
 	const wchar_t *letter_groups[8];
 	/* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */
 	unsigned char punct_to_tone[4][5];
 	void ApplySpecialAttribute(char *phonemes, int dict_flags);
 	int IsLetter(int letter, int group);
 	int IsLetterGroup(char *word, int group);
 	void CalcPitches_Tone(int clause_tone);
 	unsigned char groups2_count[256];    // number of 2 letter groups for this initial letter
 	unsigned char groups2_start[256];    // index into groups2
 	char *letterGroups[N_LETTER_GROUPS];
 	int n_ph_list2;
 	PHONEME_LIST2 ph_list2[N_PHONEME_LIST];	// first stage of text->phonemes
--- a/src/vowelchart.cpp
+++ b/src/vowelchart.cpp
 	int hash;
 	char *p;
 	char *start;
 	char *group;
 	char *next;
 	unsigned char c;
 	int count = 0;
 		}
 		if(*p == RULE_GROUP_START)
 		{
 			group = p;
 			if(p[1] == RULE_LETTERGP2)
 			{
 				while(*p != RULE_GROUP_END) p++;
 				continue;
 			}
 			p += (strlen(p)+1);
 		}