lang=pt Reduce phoneme [&~] to [&] in unstressed syllables. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@30 d46cf337-b52f-0410-862d-fd96e6ae7743master
| @@ -421,6 +421,7 @@ | |||
| &) schaft (_S6 Saft | |||
| schein (lich S'aIn | |||
| schwer Sve:* | |||
| _) selbst (@@P6 z'Elpst | |||
| shop _^_EN | |||
| _) sky _^_EN | |||
| soldat z%OldA:t | |||
| @@ -217,9 +217,9 @@ o~ U u uI u~ y | |||
| * : ; b C d dZ f | |||
| g h j k l l^ m n | |||
| N n^ p Q r R s S | |||
| s# s; t T tS ts v w | |||
| x z Z | |||
| N n^ p Q r R r- s | |||
| S s# s; t T tS ts v | |||
| w x z Z | |||
| Dictionary ru_dict | |||
| @@ -46,7 +46,7 @@ mm milime:tER | |||
| ill illEdvE $dot | |||
| stb SAtYb:i $dot | |||
| vö vEZd _!'Yss2E | |||
| vö v'EZd||_'Yss2E | |||
| pl pe:lda:ul $dot | |||
| mta $abbrev | |||
| a.m An^n^i||m'int $dot | |||
| @@ -200,17 +200,17 @@ a A: $atend // letter "a" | |||
| b be: | |||
| c tse: | |||
| d de: | |||
| f Ef | |||
| f Eff | |||
| g ge: | |||
| h ha: | |||
| j je: | |||
| k ka: | |||
| l El | |||
| m Em | |||
| n En | |||
| l Ell | |||
| m Emm | |||
| n Enn | |||
| p pe: | |||
| q ku | |||
| r ER | |||
| r ERR | |||
| s S_ | |||
| s ES $atend | |||
| _s ES | |||
| @@ -1,7 +1,7 @@ | |||
| // Spelling-to-phoneme rules for Icelandic | |||
| // This file is UTF8 encoded. | |||
| // letter group L08 voiceless consonants | |||
| // letter group B voiceless consonants c,f,h,k,p,t,x,þ | |||
| // letter group F p,t,k,s | |||
| // letter group H j,r,v, | |||
| @@ -43,7 +43,7 @@ | |||
| .group ð | |||
| ð D | |||
| ð (L08 T | |||
| ð (B T | |||
| ð (__ T // before pause | |||
| @@ -154,23 +154,23 @@ | |||
| .group l | |||
| l l | |||
| l (_ l# | |||
| l (L08X l# | |||
| l (BX l# | |||
| ll dl | |||
| ll (L08 tl# | |||
| ll (B8 tl# | |||
| ll (_ tl# // ?? | |||
| .group m | |||
| m m | |||
| mm (K m | |||
| L08) m hm# | |||
| m (L08 m# | |||
| B) m hm# | |||
| m (B m# | |||
| .group n | |||
| n n | |||
| L08) n hn# | |||
| n (L08 n# | |||
| B) n hn# | |||
| n (B n# | |||
| n (g N | |||
| ng (_ Ng | |||
| ngt (K N#d | |||
| @@ -209,7 +209,7 @@ | |||
| .group p | |||
| _) p p | |||
| p p | |||
| L08) p b | |||
| B) p b | |||
| s) p b | |||
| p (K b | |||
| p (s f | |||
| @@ -226,8 +226,8 @@ | |||
| _) r R2 | |||
| C) r @-* | |||
| A) r (A R | |||
| L08) r r# | |||
| r (L08 r# | |||
| B) r r# | |||
| r (B r# | |||
| r (_ r# | |||
| r R | |||
| rl dl | |||
| @@ -244,7 +244,7 @@ | |||
| .group t | |||
| _) t t | |||
| t t | |||
| L08) t d | |||
| B) t d | |||
| s) t d | |||
| t (K d | |||
| tt hd | |||
| @@ -28,8 +28,8 @@ | |||
| a (r_ 'a | |||
| ?1 p) a (d_ =E | |||
| a (CC_ & | |||
| ?1 al (_ 'Al // eg: Portugal, capital, etc. | |||
| ?1 a (lK ,A // Algarve, alto, etc... | |||
| ?1 a (l_ 'A // eg: Portugal, capital, etc. | |||
| ?1 a (lK ,A // Algarve, Almerinda, etc... | |||
| ?2 al (K aU | |||
| ?2 alh alj | |||
| @@ -141,10 +141,10 @@ | |||
| e (cem_ E | |||
| e (ces_ E | |||
| e (stA_ E | |||
| e (stA_ E | |||
| e (stAm_ E | |||
| e (stAs_ E | |||
| e (strA_ E | |||
| e (strA_ E | |||
| e (rnA_ E | |||
| e (rnAm_ E | |||
| @@ -157,7 +157,7 @@ e (stA_ E | |||
| e (xA_ E | |||
| e (xAm_ E | |||
| e (xAs_ E | |||
| ?2 _n) e (t E | |||
| ?2 _n) e (t E | |||
| em (C eIm | |||
| en (K eIN | |||
| @@ -475,7 +475,7 @@ e (stA_ E | |||
| l) r x | |||
| n) r x | |||
| s) r x | |||
| ?1 A) r (_ r // this letter is imperfect. | |||
| ?1 A) r (_ r- // [r-] is English linking-r | |||
| .group s | |||
| @@ -9,8 +9,6 @@ | |||
| // G voiced: б в г д ж з | |||
| // H hard consonant: ъ ж ц ш | |||
| // Y iotated vowel, softsign: ь е ё и ю я | |||
| // L08 кпстфх | |||
| // L09 бгджзлмнр | |||
| .group а | |||
| @@ -24,8 +24,8 @@ | |||
| ru 36 123 | |||
| it 25 114 | |||
| es 6 114 | |||
| pt 34 137 | |||
| pt_pt 20 137 | |||
| pt 28 131 | |||
| pt_pt 20 131 | |||
| ro 36 138 | |||
| el 8 114 | |||
| sv 25 117 | |||
| @@ -418,14 +418,14 @@ | |||
| 2 vowel/@_4 nl hr | |||
| 3 vowel/8_2 en_us hr sv | |||
| 1 vowel/8_3 zh_yue | |||
| 12 vowel/a en_n cy de hu nl pl sk hr pt | |||
| 11 vowel/a en_n cy de hu nl pl sk hr | |||
| 4 vowel/a# en_sc it pt | |||
| 6 vowel/a_2 eo it pt pt_pt ro vi | |||
| 5 vowel/a#_2 hr pt sv is sw | |||
| 7 vowel/a_3 en_sc cs pt is | |||
| 4 vowel/a#_2 hr sv is sw | |||
| 6 vowel/a_3 en_sc cs is | |||
| 12 vowel/a#_3 en en_n en_us en_wm de hi ru pt_pt vi zh_yue | |||
| 4 vowel/a_4 en_wm el vi | |||
| 3 vowel/a_5 pt sv sw | |||
| 2 vowel/a_5 sv sw | |||
| 7 vowel/aa en_us fi fr_ca no zh_yue | |||
| 1 vowel/aa# fi | |||
| 3 vowel/aa_2 en cy | |||
| @@ -433,7 +433,7 @@ | |||
| 2 vowel/aa_4 sv vi | |||
| 2 vowel/aa_5 en_n | |||
| 1 vowel/aa_6 de | |||
| 2 vowel/aa_7 nl pt | |||
| 1 vowel/aa_7 nl | |||
| 4 vowel/a_en en fr | |||
| 1 vowel/@_bck hi | |||
| 13 vowel/e en en_n af cy eo fr hu hr it pt pt_pt vi | |||
| @@ -513,7 +513,7 @@ | |||
| 2 vowel/V en en_sc | |||
| 3 vowel/V_2 af ru | |||
| 3 vowel/V_3 en_rp hi vi | |||
| 2 vowel/V_4 en_sc pt | |||
| 1 vowel/V_4 en_sc | |||
| 6 vowel/y en de fi hu nl zh_yue | |||
| 10 vowel/y# en en_wm de fi fr hu nl ro vi zh_yue | |||
| 1 vowel/y## is | |||
| @@ -255,7 +255,7 @@ endphoneme | |||
| phoneme r | |||
| liquid | |||
| liquid starttype #r endtype #r | |||
| length 60 | |||
| vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | |||
| vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | |||
| @@ -139,7 +139,7 @@ endphoneme | |||
| phoneme r // from Afrikaans | |||
| liquid | |||
| liquid starttype #r endtype #r | |||
| length 60 | |||
| vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | |||
| vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | |||
| @@ -32,6 +32,7 @@ phoneme &~ | |||
| vowel starttype (@) endtype (@) | |||
| length 200 | |||
| formants vnasal/a#_n | |||
| reduceto & 4 | |||
| endphoneme | |||
| phoneme &U~ | |||
| @@ -63,48 +64,12 @@ phoneme &/ // Used for final "a" when next word starts with "a" | |||
| endphoneme | |||
| phoneme A // for "al" in European Portuguese | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/a_3 | |||
| endphoneme | |||
| phoneme A2 // for "al" in European Portuguese | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/aa_7 | |||
| endphoneme | |||
| phoneme A3 // for "al" in European Portuguese | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/a_5 | |||
| endphoneme | |||
| phoneme A4 // for "al" in European Portuguese | |||
| phoneme A // for "al" in European Portuguese, don't reduce to [&] | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/a_2 | |||
| endphoneme | |||
| phoneme A5 // for "al" in European Portuguese | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/a | |||
| endphoneme | |||
| phoneme A6 // for "al" in European Portuguese | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/V_4 | |||
| endphoneme | |||
| phoneme A7 // for "al" in European Portuguese | |||
| vowel starttype (a) endtype (a) | |||
| length 200 | |||
| formants vowel/a#_2 | |||
| endphoneme | |||
| phoneme E | |||
| @@ -402,7 +402,7 @@ phoneme R2 // this is [R] from Slovak/Czech | |||
| endphoneme | |||
| phoneme R3 // Afrikaans | |||
| liquid | |||
| liquid starttype #r endtype #r | |||
| length 60 | |||
| vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | |||
| vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | |||
| @@ -532,13 +532,15 @@ int Compile::NextItem(int type) | |||
| { | |||
| item_string[ix++] = c; | |||
| c = fgetc(f_in); | |||
| if(feof(f_in)) | |||
| break; | |||
| if(item_string[ix-1] == '=') | |||
| break; | |||
| } | |||
| ungetc(c,f_in); | |||
| item_string[ix] = 0; | |||
| if(feof(f_in)) return(-1); | |||
| if(!feof(f_in)) | |||
| ungetc(c,f_in); | |||
| keyword = -1; | |||
| @@ -665,7 +665,7 @@ void copy_rule_string(char *string, int &state) | |||
| c = *p++ - '0'; | |||
| value = *p++ - '0'; | |||
| c = c * 10 + value; | |||
| if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_TYPES)) | |||
| if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_GROUPS)) | |||
| { | |||
| c = 0; | |||
| fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum); | |||
| @@ -676,11 +676,11 @@ void copy_rule_string(char *string, int &state) | |||
| { | |||
| // pre-rule, put the group number before the RULE_LETTERGP command | |||
| output[ix++] = c; | |||
| c = RULE_LETTERGP; | |||
| c = RULE_LETTERGP2; | |||
| } | |||
| else | |||
| { | |||
| output[ix++] = RULE_LETTERGP; | |||
| output[ix++] = RULE_LETTERGP2; | |||
| } | |||
| break; | |||
| @@ -1076,8 +1076,47 @@ void output_rule_group(FILE *f_out, int n_rules, char **rules, char *name) | |||
| int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| {//============================================================= | |||
| static int compile_lettergroup(char *input, FILE *f_out) | |||
| {//===================================================== | |||
| char *p; | |||
| int group; | |||
| p = input; | |||
| if(!isdigit(p[0]) || !isdigit(p[1])) | |||
| { | |||
| return(1); | |||
| } | |||
| group = atoi(&p[1]); | |||
| if(group >= N_LETTER_GROUPS) | |||
| return(1); | |||
| while(!isspace2(*p)) p++; | |||
| fputc(RULE_GROUP_START,f_out); | |||
| fputc(RULE_LETTERGP2,f_out); | |||
| fputc(group + 'A', f_out); | |||
| for(;;) | |||
| { | |||
| while(isspace2(*p)) p++; | |||
| if(*p == 0) | |||
| break; | |||
| while((*p & 0xff) > ' ') | |||
| { | |||
| fputc(*p++, f_out); | |||
| } | |||
| fputc(0,f_out); | |||
| } | |||
| fputc(RULE_GROUP_END,f_out); | |||
| return(0); | |||
| } | |||
| static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| {//==================================================================== | |||
| char *prule; | |||
| unsigned char *p; | |||
| int ix; | |||
| @@ -1108,6 +1147,16 @@ int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| buf = fgets(buf1,sizeof(buf1),f_in); | |||
| if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | |||
| if((buf != NULL) && (memcmp(buf,".L",2)==0)) | |||
| { | |||
| if(compile_lettergroup(&buf[2], f_out) != 0) | |||
| { | |||
| fprintf(f_log,"%5d: Bad lettergroup\n",linenum); | |||
| error_count++; | |||
| } | |||
| continue; | |||
| } | |||
| if((buf == NULL) || (memcmp(buf,".group",6)==0)) | |||
| { | |||
| // next .group or end of file, write out the previous group | |||
| @@ -213,6 +213,7 @@ void Translator::InitGroups(void) | |||
| groups2_count[ix]=0; | |||
| groups2_start[ix]=255; // indicates "not set" | |||
| } | |||
| memset(letterGroups,0,sizeof(letterGroups)); | |||
| p = data_dictrules; | |||
| while(*p != 0) | |||
| @@ -223,30 +224,44 @@ void Translator::InitGroups(void) | |||
| break; | |||
| } | |||
| p++; | |||
| len = strlen(p); | |||
| p_name = p; | |||
| c = p_name[0]; | |||
| p += (len+1); | |||
| if(len == 1) | |||
| if(p[0] == RULE_LETTERGP2) | |||
| { | |||
| groups1[c] = p; | |||
| ix = p[1] - 'A'; | |||
| p += 2; | |||
| if((ix >= 0) && (ix < N_LETTER_GROUPS)) | |||
| { | |||
| letterGroups[ix] = p; | |||
| } | |||
| } | |||
| else | |||
| if(len == 0) | |||
| { | |||
| groups1[0] = p; | |||
| len = strlen(p); | |||
| p_name = p; | |||
| c = p_name[0]; | |||
| p += (len+1); | |||
| if(len == 1) | |||
| { | |||
| groups1[c] = p; | |||
| } | |||
| else | |||
| if(len == 0) | |||
| { | |||
| groups1[0] = p; | |||
| } | |||
| else | |||
| { | |||
| if(groups2_start[c] == 255) | |||
| groups2_start[c] = n_groups2; | |||
| groups2_count[c]++; | |||
| groups2[n_groups2] = p; | |||
| c2 = p_name[1]; | |||
| groups2_name[n_groups2++] = (c + (c2 << 8)); | |||
| } | |||
| } | |||
| else | |||
| { | |||
| if(groups2_start[c] == 255) | |||
| groups2_start[c] = n_groups2; | |||
| groups2_count[c]++; | |||
| groups2[n_groups2] = p; | |||
| c2 = p_name[1]; | |||
| groups2_name[n_groups2++] = (c + (c2 << 8)); | |||
| } | |||
| // skip over all the rules in this group | |||
| rule_count = 0; | |||
| while(*p != RULE_GROUP_END) | |||
| @@ -589,6 +604,31 @@ return(0); | |||
| int Translator::IsLetterGroup(char *word, int group) | |||
| {//================================================= | |||
| // match the word against a list of utf-8 strings | |||
| char *p; | |||
| char *w; | |||
| p = letterGroups[group]; | |||
| while(*p != 0) | |||
| { | |||
| w = word; | |||
| while(*p == *w) | |||
| { | |||
| *w++; | |||
| *p++; | |||
| } | |||
| if(*p == 0) | |||
| return(w-word); // matched a complete string | |||
| while(*p++ != 0); // skip to end of string | |||
| } | |||
| return(0); | |||
| } | |||
| int Translator::IsLetter(int letter, int group) | |||
| {//============================================ | |||
| if(letter_groups[group] != NULL) | |||
| @@ -1412,8 +1452,9 @@ char *Translator::DecodeRule(const char *group, char *rule) | |||
| static char output[60]; | |||
| static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ', | |||
| '@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'}; | |||
| '@','&','%','+','#','S','D','Z','A','L',' ',' ',' ',' ',' ','N','K','V',' ','T','X','?','W'}; | |||
| static char symbols_lg[] = {'A','B','C','H','F','G','Y'}; | |||
| match_type = 0; | |||
| buf_pre[0] = 0; | |||
| @@ -1465,25 +1506,23 @@ char *Translator::DecodeRule(const char *group, char *rule) | |||
| } | |||
| else | |||
| if(rb == RULE_LETTERGP) | |||
| { | |||
| c = symbols_lg[*rule++ - 'A']; | |||
| } | |||
| else | |||
| if(rb == RULE_LETTERGP2) | |||
| { | |||
| value = *rule++ - 'A'; | |||
| if(value >= 8) | |||
| { | |||
| p[0] = 'L'; | |||
| p[1] = (value / 10) + '0'; | |||
| c = (value % 10) + '0'; | |||
| p[0] = 'L'; | |||
| p[1] = (value / 10) + '0'; | |||
| c = (value % 10) + '0'; | |||
| if(match_type == RULE_PRE) | |||
| { | |||
| p[0] = c; | |||
| c = 'L'; | |||
| } | |||
| p+=2; | |||
| } | |||
| else | |||
| if(match_type == RULE_PRE) | |||
| { | |||
| c = symbols[value + RULE_LETTER_GROUPS]; | |||
| p[0] = c; | |||
| c = 'L'; | |||
| } | |||
| p+=2; | |||
| } | |||
| else | |||
| if(rb <= RULE_LAST_RULE) | |||
| @@ -1611,6 +1650,7 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec | |||
| int distance_right; | |||
| int distance_left; | |||
| int lg_pts; | |||
| int n_bytes; | |||
| MatchRecord match; | |||
| static MatchRecord best; | |||
| @@ -1626,6 +1666,7 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec | |||
| if(rule == NULL) | |||
| { | |||
| match_out->points = 0; | |||
| (*word)++; | |||
| return; | |||
| } | |||
| @@ -1752,6 +1793,17 @@ void Translator::MatchRule(char *word[], const char *group, char *rule, MatchRec | |||
| failed = 1; | |||
| break; | |||
| case RULE_LETTERGP2: // match against a list of utf-t strings | |||
| letter_group = *rule++ - 'A'; | |||
| if((n_bytes = IsLetterGroup(post_ptr-1,letter_group)) >0) | |||
| { | |||
| match.points += (20-distance_right); | |||
| post_ptr += (n_bytes-1); | |||
| } | |||
| else | |||
| failed =1; | |||
| break; | |||
| case RULE_NOTVOWEL: | |||
| if(!IsLetter(letter_w,0)) | |||
| { | |||
| @@ -419,6 +419,22 @@ if((ph->mnemonic == 't') && ((prev->type == phVOWEL) || (prev->mnemonic == 'n')) | |||
| } | |||
| #endif | |||
| if((ph->reduce_to != 0) && (ph->type != phVOWEL) && !(plist2->synthflags & SFLAG_DICTIONARY)) | |||
| { | |||
| // reduction for vowels has already been done in SetWordStress | |||
| int reduce_level; | |||
| if(next->type == phVOWEL) | |||
| { | |||
| reduce_level = (ph->phflags >> 28) & 7; | |||
| if((&plist2[1])->stress < reduce_level) | |||
| { | |||
| // look at the stress of the following vowel | |||
| ph = phoneme_tab[ph->reduce_to]; | |||
| } | |||
| } | |||
| } | |||
| if((plist2+1)->synthflags & SFLAG_LENGTHEN) | |||
| { | |||
| static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0}; | |||
| @@ -35,8 +35,8 @@ | |||
| #include "translate.h" | |||
| #include "wave.h" | |||
| const char *version_string = "1.25.13 30.May.07"; | |||
| const int version_phdata = 0x012501; | |||
| const char *version_string = "1.25.14 31.May.07"; | |||
| const int version_phdata = 0x012514; | |||
| int option_device_number = -1; | |||
| @@ -344,7 +344,7 @@ Translator *SelectTranslator(const char *name) | |||
| { | |||
| static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 }; | |||
| static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250}; | |||
| static const wchar_t is_L08[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | |||
| static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | |||
| tr = new Translator(); | |||
| SetupTranslator(tr,stress_lengths_is,stress_amps_is); | |||
| @@ -356,7 +356,7 @@ Translator *SelectTranslator(const char *name) | |||
| ResetLetterBits(tr,0x18); | |||
| SetLetterBits(tr,4,"kpst"); // Letter group F | |||
| SetLetterBits(tr,3,"jvr"); // Letter group H | |||
| tr->letter_groups[8] = is_L08; | |||
| tr->letter_groups[1] = is_lettergroup_B; | |||
| SetLetterVowel(tr,'y'); | |||
| tr->langopts.numbers = 0xe9; | |||
| tr->langopts.numbers2 = 0x2; | |||
| @@ -656,10 +656,6 @@ Translator_Russian::Translator_Russian() : Translator() | |||
| static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0}; // letter group G (voiced obstruents) | |||
| static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0}; // letter group Y (iotated vowels & soft-sign) | |||
| // these are unicode character codes | |||
| static const wchar_t ru_L08[] = {0x43a,0x43f,0x441,0x442,0x444,0x445,0}; // кпстфх | |||
| static const wchar_t ru_L09[] = {0x431,0x433,0x434,0x436,0x43b,0x43c,0x43d,0x440,0}; // бгджзлмнр | |||
| SetupTranslator(this,stress_lengths_ru,stress_amps_ru); | |||
| charset_a0 = charsets[18]; // KOI8-R | |||
| @@ -678,9 +674,6 @@ Translator_Russian::Translator_Russian() : Translator() | |||
| SetLetterBits(this,6,ru_ivowels); | |||
| SetLetterBits(this,7,ru_vowels); | |||
| letter_groups[8] = ru_L08; // This is L08 in ru_rules | |||
| letter_groups[9] = ru_L09; // This is L09 in ru_rules | |||
| langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word | |||
| langopts.param[LOPT_REGRESSIVE_VOICING] = 1; | |||
| langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1; | |||
| @@ -29,6 +29,8 @@ | |||
| #define N_RULE_GROUP2 120 // max num of two-letter rule chains | |||
| #define N_HASH_DICT 1024 | |||
| #define N_CHARSETS 20 | |||
| #define N_LETTER_GROUPS 20 | |||
| /* flags from word dictionary */ | |||
| // bits 0-3 stressed syllable, 7=unstressed | |||
| @@ -110,18 +112,11 @@ | |||
| #define RULE_ENDING 14 | |||
| #define RULE_DIGIT 15 // D digit | |||
| #define RULE_NONALPHA 16 // Z non-alpha | |||
| #define RULE_LETTER_GROUPS 17 // 17 to 23 | |||
| #define RULE_LETTER1 17 // A vowels | |||
| #define RULE_LETTER2 18 // B 'hard' consonants | |||
| #define RULE_LETTER3 19 // C all consonants | |||
| #define RULE_LETTER4 20 // H letter group | |||
| #define RULE_LETTER5 21 // F letter group | |||
| #define RULE_LETTER6 22 // G letter group | |||
| #define RULE_LETTER7 23 // Y letter group | |||
| #define RULE_LETTERGP 17 // A B C H F G Y letter group number | |||
| #define RULE_LETTERGP2 18 // L + letter group number | |||
| #define RULE_NO_SUFFIX 24 // N | |||
| #define RULE_NOTVOWEL 25 // K | |||
| #define RULE_IFVERB 26 // V | |||
| #define RULE_LETTERGP 27 // L + letter group number | |||
| #define RULE_ALT1 28 // T word has $alt attribute | |||
| #define RULE_NOVOWELS 29 // X no vowels up to word boundary | |||
| #define RULE_SPELLING 31 // W while spelling letter-by-letter | |||
| @@ -355,8 +350,7 @@ public: | |||
| // holds properties of characters: vowel, consonant, etc for pronunciation rules | |||
| unsigned char letter_bits[256]; | |||
| int letter_bits_offset; | |||
| #define N_LETTER_TYPES 20 | |||
| const wchar_t *letter_groups[N_LETTER_TYPES]; | |||
| const wchar_t *letter_groups[8]; | |||
| /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */ | |||
| unsigned char punct_to_tone[4][5]; | |||
| @@ -394,6 +388,7 @@ private: | |||
| void ApplySpecialAttribute(char *phonemes, int dict_flags); | |||
| int IsLetter(int letter, int group); | |||
| int IsLetterGroup(char *word, int group); | |||
| void CalcPitches_Tone(int clause_tone); | |||
| @@ -420,6 +415,7 @@ protected: | |||
| unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter | |||
| unsigned char groups2_start[256]; // index into groups2 | |||
| char *letterGroups[N_LETTER_GROUPS]; | |||
| int n_ph_list2; | |||
| PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes | |||
| @@ -378,7 +378,6 @@ void FindPhonemesUsed(void) | |||
| int hash; | |||
| char *p; | |||
| char *start; | |||
| char *group; | |||
| char *next; | |||
| unsigned char c; | |||
| int count = 0; | |||
| @@ -397,7 +396,12 @@ void FindPhonemesUsed(void) | |||
| } | |||
| if(*p == RULE_GROUP_START) | |||
| { | |||
| group = p; | |||
| if(p[1] == RULE_LETTERGP2) | |||
| { | |||
| while(*p != RULE_GROUP_END) p++; | |||
| continue; | |||
| } | |||
| p += (strlen(p)+1); | |||
| } | |||