Fix crash with -X option when spelling words (acronyms). Rules files: added special character X meaning "no vowel until the word boundary". Used for lang=no. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@14 d46cf337-b52f-0410-862d-fd96e6ae7743

18 years ago · f65bf2072b
--- a/dictsource/en_list
+++ b/dictsource/en_list
 // ABBREVIATIONS
 //**************
 abc              eIbi:s'i:
 ac               eI'si:
 adfs             eIdi:Ef'Es
 a.k.a            aka2_!
 agm              eIdZi:'Em 
 ai               eI;'aI
 api              apI2
 asap             eIEseIpi:
 awol             eIw0l
 cio              si:aI'oU
 ctrl             k0ntr'oUl
 dept             dI2pA@tm@nt
 diy              di:aI'waI
 eg               fO@Egz'aamp@L
 etc              Et'sEtr@
 eur              jU@
 hmm              h@m
 (http ://)       eItSti:ti:'pi:_
 ibm              aIbi:Em
 ie               aIi:_!        $pause
 i.e              aIi:_!        $pause
 irc              aI;A@s'i:
 lbs              paUndz
 ltd              lImItId
 mc               m@k
 oem              oUi:'Em
 ok               oU'keI
 os               oUEs
 riscos           rIskoUEs
 sae              EseI'i:
 seac             si:ak
 st               s@nt
 th               T
 thu              T3:        // Thursday
 ?5 thu           TIR        // Thursday
 ufo              ju:Ef'oU
 ui               ,ju:'aI
 uk               ju:k'eI
 url              ju:A@'El
 usa              ju:Es'eI
 wwii             dVb@Lju:dVb@Lju:t'u:
 xy               EkswaI
 ii               tu:          $abbrev    // roman numerals
 iii              Tri:         $abbrev
 iv               fo@          $abbrev
 abc	$abbrev
 ac	$abbrev
 adfs	$abbrev
 a.k.a	aka2_!
 agm	$abbrev 
 ai	$abbrev
 api	$abbrev
 asap	$abbrev
 awol	eIw0l
 cio	$abbrev
 ctrl	k0ntr'oUl
 dept	dI2pA@tm@nt
 diy	$abbrev
 edt	$abbrev
 eg	fO@Egz'aamp@L
 est	$abbrev
 etc	Et'sEtr@
 eur	jU@
 hmm	h@m
 (http ://)   eItSti:ti:'pi:_
 ibm	$abbrev
 ie	aIi:_!   $pause
 i.e	aIi:_!   $pause
 irc	$abbrev
 lbs	paUndz
 ltd	lImItId
 mc	m@k
 oem	$abbrev
 ok	$abbrev
 os	$abbrev
 riscos	rIskoUEs
 sae	$abbrev
 st	s@nt
 th	T
 thu	T3:        // Thursday
 ?5 thu	TIR        // Thursday
 ufo	$abbrev
 ui	$abbrev
 uk	$abbrev
 url	$abbrev
 usa	$abbrev
 wwii	dVb@Lju:dVb@Lju:t'u:
 xy	$abbrev
 ii	tu:       $abbrev    // roman numerals
 iii	Tri:      $abbrev
 iv	fo@       $abbrev
 nd                $only
 th                $only
 st                $only
 mr               mIst3         $dot
 mrs              mIsIz         $dot
 mr	mIst3     $dot
 mrs	mIsIz     $dot
 //ms               mIz         $dot
 dr               d0kt3         $dot $capital
 lt                             $dot
 prof                           $dot
 rev                            $dot
 st                             $dot
 dr      d0kt3     $dot $capital
 lt                $dot
 prof              $dot
 rev               $dot
 st                $dot
 batman           batman
 belisha          b@l'i:S@
 bely             bI2laI
 beryl            bEr@L
 basal            beIs@L
 best             bEst
 beta             bi:t@
 forever           $2
 forewarn          $2
 formid           fO@m'Id       // formidable
 forte            fo@teI
 foyer            fOIeI
 freelance        fri:laans
 frigate          frIg@t
 pedalo           pEd@loU
 pejorative       p@dZ0r@tIv
 penal            pi:n@L
 ?4 penchant      p0nS0n
 peninsula        p@n'InsjUl@
 penis            pi:nI2s
 perfectly        p3:fEktlI2
 sundial          sVndaI@l
 suite            swi:t         $onlys
 superb           su:p'3:b
 superfluous      su:p'3:flu:@s
 superman         su:p3man
 supplier         s@plaI3
 suppose                        $2
--- a/dictsource/en_rules
+++ b/dictsource/en_rules
         c)  ad (enc      eId
             ad (eq       ad
         _)  ad (i        ad
        gl)  ad (i        ad
             adjec        adZEk
             ad (le       eId
             ad (junct_   ad
         _)  al (i        al
         _)  al (ig       a2l
         _)  ali (Be      a2laI
         &)  al (isC_     @l
         &)  ali (sC_     @li
         &)  al (isCic    @l
         _)  al (k        al
         _)  all (@       a2l
        pl)  ia (nt       'aI@
       ill)  ia (nt       I2@
        al)  ia (nt       I2@
        _p)  ia (nA       i@
             iar          aI@
        @l)  iar (_       i@
             iara (_      I'A:r@
         &)  or (ous      @
             orough       Vr@
       eff)  or (t        3
         f)  orte (_      'o@teI
         w)  or (t_       o@
         w)  or (r        V
         w)  or (n        o@
         _)  tri (vi      trI
             tsch         tS
         _)  two          tu:
             ttu (r       t@
         @)  tur (A       tS@r
         @)  tur (al_     tS=@r
         @)  tur (y       tS@r
--- a/dictsource/hr_rules
+++ b/dictsource/hr_rules
 .group a
        a        a
        aj (K    aI
        a (r     A        // don't reduce to [&]
 .group b
        b        b
        o        o
        ou       oU
        oj (K    oI
        o (r     8
     &) o (r_    8
 .group p
        p        p
--- a/dictsource/no_list
+++ b/dictsource/no_list
 _0C       h'u-:nd@-*e:d@2
 _1C       'Et||h'u-:nd@-*e:d@2
 _0M1      t'u-:s@n
 _0M1      'Et||t'u-:s@n
 _1M1      'Et||t'u-:s@n
 _0M2      m'Illi:;,u:n@r
 _1M2      'e:n||m'Illi:;,u:n
 // pronouns
 jeg	$u+
 jeg	jaI $u+
 du	$u+
 han	$u+
 hun	$u+
 vi	$u+
 dere	$u+
 de	$u+
 de	di: $u+
 meg	$u+
 deg	$u+
 dette	$u+
 disse	$u+
 den	$u+
 det	$u+
 de	$u+
 det	de: $u+
 // possessive adjectives
 av	$u $brk     // of, off, by
 bak     $pause      // behind
 etter	$u $pause   // after
 for	$u $pause   // for
 for	fOr: $u $pause   // for
 foran   $pause      // in front of
 fra	$u $pause   // from
 in	$u $brk     // in
 _i     i:
 i      i:   $atend
 // MAIN WORD DICTIONARY
 //*********************
 kom         kOm
--- a/dictsource/no_rules
+++ b/dictsource/no_rules
        a        A:
        a (C%    A
        a (_     A
     X) a (CCX   A  // single syllable with >= 2 final consonants
        ai       AI
        au       aU
        aa       o:   // å
        aa (C%   O
     X) aa (CCX  O
        ae       a:   // æ
        ae (C%   a
     X) ae (CCX  a
 .group b
        b        b
        d        d
        dd       d:
     r) d (_
     &) det (_   d@2
 .group e
        e        e:
        e (C%    E
     X) e (CCX   E
        ei       aI
        e (rC    a           //  ??
     &) e (_     @2
 .group i
        i        i:
        i (C%    I
     X) i (CCX   I
     &) ig (_    I
 .group j
 .group o
        o        u:
        o (C%    O
     X) o (CCX   O
        oi       OI
        oe       Y:   // ø
        oe (C%   W
     X) oe (CCX  W
        oey      Yy
        o (nd    U
 .group u
        u        u-:
        u (C%    u-
     X) u (CCX   u-
        ui       u-I
 .group v
 .group y
        y        y:
        y (C%    y
     X) y (CCX   y
 .group z
        z        s
 .group å
        å        o:
        å (C%    O
     X) å (CCX   O
 .group æ
        æ        E:
        æ (r     a:
        æ (C%    a
     X) æ (CCX   a
 .group ø
        ø        Y:
        ø (C%    W
     X) ø (CCX   W
        øy       Yy
 .group ä
        ä        E:
        ä (C%    E
     X) ä (CCX   E
 .group ö
        ö        Y:
        ö (C%    W
     X) ö (CCX   W
 .group
        é        'e:
--- a/phsource/compile_report
+++ b/phsource/compile_report
      fi  40  123
      fr  33  115
   fr_ca  11  115
      hi  50  128
      hi  49  127
      hu  24  109
      nl  25  112
      pl  17  103
      sk  25  120
      cs   5  120
      hr  23  129
      hr  24  130
      ru  36  120
      it  28  112
      es   6  112
  2  ufric/x        base vi
  4  ufric/x2       af nl pt
  1  ufric/x_hr     hr
  1  ufric/xx       hr
  1  ustop/c        base
  5  ustop/k        base en fr hi sw
  9  ustop/k_       base en fi fr hi hu it el sw
  1  vdiph/u-i      vi
  1  vdiph/ui_2     af
  2  vdiph/ui_3     cy
  1  vdiph/ui_4     hr
  1  vdiph/Vi       vi
  1  vdiph/Vu       af
  2  vdiph/Vu_2     en_us en_wm
  1  vowel/3_3      en_rp
  3  vowel/3_en     en en_wm af
  2  vowel/@_4      nl hr
  2  vowel/8_2      en_us sv
  3  vowel/8_2      en_us hr sv
  1  vowel/8_3      zh_yue
 10  vowel/a        en_n cy de hu nl pl sk hr
 11  vowel/a        en_n cy de hu nl pl sk hr
  4  vowel/a#       en_sc it pt
  5  vowel/a_2      eo it pt_pt ro vi
  4  vowel/a#_2     hr sv is sw
  6  vowel/ii_en    en en_n
  5  vowel/@_low    hi ro no
  8  vowel/o        en en_wm de hi it pt_pt sv
  1  vowel/o-       hr
  4  vowel/o_2      cy hi hu no
  2  vowel/o-_2     en_n en_wm
  2  vowel/o_3      en_sc
  3  vowelr/r-voc   hi sk
  2  vowelr/V3_r    en en_sc
  1  vowelr/V_r     en
  8  vowel/u        en_n cy de eo fr hi cs
  7  vowel/u        en_n cy de eo fr cs
  5  vowel/u#       en en_sc
  3  vowel/u_2      fi sk ro
  1  vowel/u#_2     sv
--- a/phsource/l/_l
+++ b/phsource/l/_l
--- a/phsource/l/l@
+++ b/phsource/l/l@
--- a/phsource/l/la
+++ b/phsource/l/la
--- a/phsource/l/le
+++ b/phsource/l/le
--- a/phsource/l/li
+++ b/phsource/l/li
--- a/phsource/l/lo
+++ b/phsource/l/lo
--- a/phsource/l/lu
+++ b/phsource/l/lu
--- a/phsource/ph_croatian
+++ b/phsource/ph_croatian
 endphoneme
 phoneme A          // 'a' before 'r', doesn't reduce to [&]
  vowel starttype (a) endtype (a)
  length 160
  formants vowel/a
 endphoneme
 phoneme &
  vowel starttype (a) endtype (a)
  length 140
 phoneme 8
  vowel starttype (o) endtype (o)
  length 140
  formants vowel/o-
  formants vowel/8_2
 endphoneme
 phoneme aI
  vowel starttype (a) endtype (i)
  length 230
  length 250
  formants vdiph/ai
 endphoneme
 phoneme eI
  vowel starttype (e) endtype (i)
  length 220
  length 250
  formants vdiph/ei_2
  linkout ;
 endphoneme
 phoneme oI
  vowel starttype (o) endtype (i)
  length 220
  length 250
  formants vdiph/ooi_3
  linkout ;
 endphoneme
 phoneme uI
  vowel starttype (u) endtype (i)
  length 250
  formants vdiph/ui_4
  linkout ;
 endphoneme
 phoneme p
  vls blb stop
  switchvoicing Q
 endphoneme
 phoneme x2  // TEST
  vls vel frc
  vowelin f1=0  f2=2300 200 400  f3=-100 80
  vowelout f1=0 f2=2300 300 400  f3=-100 80  rms=20
  length 100
  lengthmod 3
  wave ufric/xx%120
  switchvoicing Q
 endphoneme
--- a/phsource/ph_hindi
+++ b/phsource/ph_hindi
 phoneme : //  Lengthen the previous vowel by "length"
  virtual
  length 80
  length 100
 endphoneme
 phoneme @
  length 130
  formants vowel/@_bck
  before H vowel/@_low
  reduceto  NULL 0
  reduceto  @- 0
 endphoneme
 phoneme V
 phoneme i:
  vowel long starttype (i) endtype (i)
  length 220
  length 230
  formants vowel/i_fnt
  linkout ;
 endphoneme
 phoneme i         // as [i:] but not marked as 'long'
  vowel starttype (i) endtype (i)
  length 220
  length 230
  formants vowel/i_fnt
  linkout ;
 endphoneme
 phoneme e:
  vowel long starttype (e) endtype (e)
  length 210
  length 220
  formants vowel/e_2
 endphoneme
 phoneme E:
  vowel long starttype (e) endtype (e)
  length 210
  length 220
  formants vdiph/ee-e
 endphoneme
 phoneme &:
  vowel long starttype (a) endtype (a)
  length 210
  length 220
  formants vowel/&
 endphoneme
 phoneme o:
  vowel long starttype (o) endtype (o)
  length 210
  length 220
  formants vowel/o_2
 endphoneme
 phoneme O:
  vowel long starttype (o) endtype (o)
  length 200
  length 210
  formants vowel/oo_4
 endphoneme
 phoneme u:
  vowel long starttype (u) endtype (u)
  length 220
  length 230
  formants vowel/u_bck
 endphoneme
 phoneme u2:
  vowel long starttype (u) endtype (u)
  length 220
  formants vowel/u
 endphoneme
 phoneme r-
--- a/phsource/vdiph/ei_2
+++ b/phsource/vdiph/ei_2
--- a/phsource/vdiph/ooi_3
+++ b/phsource/vdiph/ooi_3
--- a/phsource/vowel/vowelchart.png
+++ b/phsource/vowel/vowelchart.png
--- a/phsource/w/w2
+++ b/phsource/w/w2
--- a/src/compiledata.cpp
+++ b/src/compiledata.cpp
 extern void Write4Bytes(FILE *f, int value);
 extern void MakeVowelLists(void);
 extern void FindPhonemesUsed(void);
 extern int CompileDictionary(const char *dsource, const char *dict_name, FILE *log, char *fname);
 extern char voice_name[];
 }
 static int ph_sorter(char **a, char **b)
 {//======================================
 	int ix;
 	int t1, t2;
 	char mnem1[6];
 	PHONEME_TAB *p1 = (PHONEME_TAB *)(*a);
 	PHONEME_TAB *p2 = (PHONEME_TAB *)(*b);
 	t1 = p1->type;
 	if(t1 > phVOWEL) t1 = phVOWEL+1;
 	t2 = p2->type;
 	if(t2 > phVOWEL) t2 = phVOWEL+1;
 	if((ix = t1 - t2) != 0)
 		return(ix);
 	strcpy(mnem1,WordToString(p1->mnemonic));
  return(strcasecmp(mnem1,WordToString(p2->mnemonic)));
 }   /* end of ph_sorter */
 void PrintPhonemesUsed(FILE *f, const char *dictname)
 {//==================================================
 	int ix;
 	PHONEME_TAB *ph;
 	PHONEME_TAB *ph_tab[N_PHONEME_TAB];
 	int count = 0;
 	int n_ph = 0;
 	int section = 0;
 	fprintf(f,"\n\nDictionary %s_dict\n",dictname);
 	fflush(f);
 	for(ix=0; (ix<N_PHONEME_TAB) && (phoneme_tab[ix] != NULL); ix++)
 	{
 		if(phoneme_tab_flags[ix] & 2)
 		{
 			ph_tab[n_ph++] = phoneme_tab[ix];
 		}
 	}
 	qsort((void *)ph_tab,n_ph,sizeof(PHONEME_TAB *),(int (*)(const void *,const void *))ph_sorter);
 	for(ix=0; ix<n_ph; ix++)
 	{
 		ph = ph_tab[ix];
 		if(ph->type > 1)
 		{
 			if((ph->type > phVOWEL) && (section == 0))
 			{
 				section = 1;
 				count = 0;
 				fputc('\n',f);
 			}
 			if((count & 0x7) == 0)
 				fputc('\n',f);
 			fprintf(f,"%-4s ",WordToString(ph->mnemonic));
 			count++;
 		}
 	}
 	fputc('\n',f);
 }  // end of  PrintPhonemesUsed
 wxString CompileAllDictionaries()
 {//==============================
 	wxString filename;
 	int errors = 0;
 	int dict_count = 0;
 	FILE *log;
 	FILE *f_phused;
 	char dictname[80];
 	char fname_log[80];
 	char save_voice_name[80];
 	sprintf(fname_log,"%s%s",path_dsource,"dict_log");
 	log = fopen(fname_log,"w");
 	sprintf(fname_log,"%s%s",path_dsource,"dict_phonemes");
 	f_phused = fopen(fname_log,"w");
 	if(f_phused)
 	{
 		fprintf(f_phused,"Phonemes which are used in the *_rules and *_list files\n");
 	}
 	bool cont = dir.GetFirst(&filename, _T("*_rules"), wxDIR_FILES);
 	while ( cont )
 			errors += err;
 		}
 		if(f_phused != NULL)
 		{
 			memset(phoneme_tab_flags,0,sizeof(phoneme_tab_flags));
 			FindPhonemesUsed();
 			PrintPhonemesUsed(f_phused,dictname);
 		}
 		cont = dir.GetNext(&filename);
 	}
 	if(log != NULL)
 		fclose(log);
 	if(f_phused != NULL)
 		fclose(f_phused);
 	LoadVoice(save_voice_name,1);
--- a/src/compiledict.cpp
+++ b/src/compiledict.cpp
 				case 'W':
 					c = RULE_SPELLING;
 					break;
 				case 'X':
 					c = RULE_NOVOWELS;
 					break;
 				case 'L':
 					// expect two digits
 					c = *p++ - '0';
--- a/src/dictionary.cpp
+++ b/src/dictionary.cpp
 				else
 				{
 					// unstressed syllable within a word
 					v_stress = 1;      /* change from 0 (unstressed) to 1 (diminished stress) */
 					if((vowel_stress[v-1] != 1) || ((langopts.stress_flags & 0x10000) == 0))
 					{
 						v_stress = 1;      /* change from 0 (unstressed) to 1 (diminished stress) */
 						vowel_stress[v] = v_stress;
 					}
 				}
 			}
 	static char output[60];
 	static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ',
 			'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T'};
 			'@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'};
 	match_type = 0;
 					}
 					break;
 				case RULE_NOVOWELS:
 					{
 						char *p = post_ptr + letter_xbytes;
 						while(letter_w != RULE_SPACE)
 						{
 							if(IsLetter(letter_w,LETTERGP_VOWEL2))
 							{
 								failed = 1;
 								break;
 							}
 							p += utf8_in(&letter_w,p,0);
 						}
 						if(!failed)
 							match.points += (19-distance_right);
 					}
 					break;
 				case RULE_INC_SCORE:
 					match.points += 20;      // force an increase in points
 					break;
 						failed = 1;
 					break;
 				case RULE_NOVOWELS:
 					if(word_vowel_count== 0)
 						match.points += 19;
 					else
 						failed =1;
 					break;
 				case RULE_IFVERB:
 					if(expect_verb)
 						match.points += 1;
 int Translator::Lookup(char *word, char *ph_out)
 {//=============================================
 	return(LookupDictList(word,ph_out,NULL,0));
 	unsigned int flags;
 	return(LookupDictList(word,ph_out,&flags,0));
 }
--- a/src/synthdata.cpp
+++ b/src/synthdata.cpp
 #include "translate.h"
 #include "wave.h"
 const char *version_string = "1.24.11  17.May.07";
 const char *version_string = "1.24.12  18.May.07";
 const int version_phdata  = 0x012201;
 int option_device_number = -1;
--- a/src/tr_languages.cpp
+++ b/src/tr_languages.cpp
 			static const wchar_t replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0};  // digits 0-9
 			static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0};
 			static int stress_lengths_hi[8] = {190, 190,  210, 210,  0, 0,  230, 250};
 			static int stress_amps_hi[8] = {17,14, 20,20, 20,24, 24,22 };
 			static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 };
 			tr = new Translator();
 			SetupTranslator(tr,stress_lengths_hi,stress_amps_hi);
 			tr->langopts.length_mods0 = tr->langopts.length_mods;  // don't lengthen vowels in the last syllable
 			tr->langopts.stress_rule = 6;      // stress on last heaviest syllable
 			tr->langopts.stress_flags =  0x4;   // use 'diminished' for unstressed final syllable
 			tr->langopts.stress_flags =  0x10004;   // use 'diminished' for unstressed final syllable
 			tr->langopts.numbers = 0x811 + 0x40000;
 			tr->letter_bits_offset = OFFSET_DEVANAGARI;
 			tr->langopts.replace_chars = replace_chars_hi;
 	case L('n','o'):  // Norwegian
 		{
 			static int stress_amps_no[] = {16,16, 20,20, 20,24, 24,22 };
 			static int stress_lengths_no[8] = {160,140, 200,200, 0,0, 250,270};
 			static int stress_lengths_no[8] = {160,140, 200,190, 0,0, 220,240};
 			tr = new Translator();
 			SetupTranslator(tr,stress_lengths_no,stress_amps_no);
--- a/src/translate.h
+++ b/src/translate.h
 #define RULE_NOTVOWEL   25   // K
 #define RULE_IFVERB     26   // V
 #define RULE_LETTERGP   27   // L + letter group number
 #define RULE_ALT1       28   // word has $alt attribute
 #define RULE_SPELLING   31   // while spelling letter-by-letter
 #define RULE_LAST_RULE   28
 #define RULE_ALT1       28   // T word has $alt attribute
 #define RULE_NOVOWELS   29   // X no vowels up to word boundary
 #define RULE_SPELLING   31   // W while spelling letter-by-letter
 #define RULE_LAST_RULE   31
 #define LETTERGP_A	0
 #define LETTERGP_B	1
 // bit9=stress last syllable if it doesn't end in vowel or "s" or "n"  LANG=Spanish
 // bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress
 // bit13= If there is only one syllable before the primary stress, give it a secondary stress
 // bit16= Don't diminish consecutive syllables within a word.
 	int stress_flags; 
 	int unstressed_wd1; // stress for $u word of 1 syllable
--- a/src/vowelchart.cpp
+++ b/src/vowelchart.cpp
 static void FindPhonemesUsed(void)
 {//===============================
 void FindPhonemesUsed(void)
 {//========================
 	int hash;
 	char *p;
 	char *start;
 	char *group;
 	char *next;
 	unsigned char c;
 	int count = 0;
 	p = translator->data_dictrules;
 	while(*p != 0)
 	{
 		if(*p == RULE_CONDITION)
 			p+=2;
 		if(*p == RULE_GROUP_END)
 		{
 			p++;
 			if(*p == 0) break;
 		}
 		if(*p == RULE_GROUP_START)
 		{
 			group = p;
 			p += (strlen(p)+1);
 		}
 		while((((c = *p) != RULE_PHONEMES)) && (c != 0)) p++;
 		count++;
 		if(c == RULE_PHONEMES)
 		{
 			start = p;
 			p++;
 			while(*p != 0)
 			{