lang=pt Reduce phoneme [&~] to [&] in unstressed syllables. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@30 d46cf337-b52f-0410-862d-fd96e6ae7743master
| &) schaft (_S6 Saft | &) schaft (_S6 Saft | ||||
| schein (lich S'aIn | schein (lich S'aIn | ||||
| schwer Sve:* | schwer Sve:* | ||||
| _) selbst (@@P6 z'Elpst | |||||
| shop _^_EN | shop _^_EN | ||||
| _) sky _^_EN | _) sky _^_EN | ||||
| soldat z%OldA:t | soldat z%OldA:t |
| * : ; b C d dZ f | * : ; b C d dZ f | ||||
| g h j k l l^ m n | g h j k l l^ m n | ||||
| N n^ p Q r R s S | |||||
| s# s; t T tS ts v w | |||||
| x z Z | |||||
| N n^ p Q r R r- s | |||||
| S s# s; t T tS ts v | |||||
| w x z Z | |||||
| Dictionary ru_dict | Dictionary ru_dict |
| ill illEdvE $dot | ill illEdvE $dot | ||||
| stb SAtYb:i $dot | stb SAtYb:i $dot | ||||
| vö vEZd _!'Yss2E | |||||
| vö v'EZd||_'Yss2E | |||||
| pl pe:lda:ul $dot | pl pe:lda:ul $dot | ||||
| mta $abbrev | mta $abbrev | ||||
| a.m An^n^i||m'int $dot | a.m An^n^i||m'int $dot | ||||
| b be: | b be: | ||||
| c tse: | c tse: | ||||
| d de: | d de: | ||||
| f Ef | |||||
| f Eff | |||||
| g ge: | g ge: | ||||
| h ha: | h ha: | ||||
| j je: | j je: | ||||
| k ka: | k ka: | ||||
| l El | |||||
| m Em | |||||
| n En | |||||
| l Ell | |||||
| m Emm | |||||
| n Enn | |||||
| p pe: | p pe: | ||||
| q ku | q ku | ||||
| r ER | |||||
| r ERR | |||||
| s S_ | s S_ | ||||
| s ES $atend | s ES $atend | ||||
| _s ES | _s ES |
| // Spelling-to-phoneme rules for Icelandic | // Spelling-to-phoneme rules for Icelandic | ||||
| // This file is UTF8 encoded. | // This file is UTF8 encoded. | ||||
| // letter group L08 voiceless consonants | |||||
| // letter group B voiceless consonants c,f,h,k,p,t,x,þ | |||||
| // letter group F p,t,k,s | // letter group F p,t,k,s | ||||
| // letter group H j,r,v, | // letter group H j,r,v, | ||||
| .group ð | .group ð | ||||
| ð D | ð D | ||||
| ð (L08 T | |||||
| ð (B T | |||||
| ð (__ T // before pause | ð (__ T // before pause | ||||
| .group l | .group l | ||||
| l l | l l | ||||
| l (_ l# | l (_ l# | ||||
| l (L08X l# | |||||
| l (BX l# | |||||
| ll dl | ll dl | ||||
| ll (L08 tl# | |||||
| ll (B8 tl# | |||||
| ll (_ tl# // ?? | ll (_ tl# // ?? | ||||
| .group m | .group m | ||||
| m m | m m | ||||
| mm (K m | mm (K m | ||||
| L08) m hm# | |||||
| m (L08 m# | |||||
| B) m hm# | |||||
| m (B m# | |||||
| .group n | .group n | ||||
| n n | n n | ||||
| L08) n hn# | |||||
| n (L08 n# | |||||
| B) n hn# | |||||
| n (B n# | |||||
| n (g N | n (g N | ||||
| ng (_ Ng | ng (_ Ng | ||||
| ngt (K N#d | ngt (K N#d | ||||
| .group p | .group p | ||||
| _) p p | _) p p | ||||
| p p | p p | ||||
| L08) p b | |||||
| B) p b | |||||
| s) p b | s) p b | ||||
| p (K b | p (K b | ||||
| p (s f | p (s f | ||||
| _) r R2 | _) r R2 | ||||
| C) r @-* | C) r @-* | ||||
| A) r (A R | A) r (A R | ||||
| L08) r r# | |||||
| r (L08 r# | |||||
| B) r r# | |||||
| r (B r# | |||||
| r (_ r# | r (_ r# | ||||
| r R | r R | ||||
| rl dl | rl dl | ||||
| .group t | .group t | ||||
| _) t t | _) t t | ||||
| t t | t t | ||||
| L08) t d | |||||
| B) t d | |||||
| s) t d | s) t d | ||||
| t (K d | t (K d | ||||
| tt hd | tt hd |
| a (r_ 'a | a (r_ 'a | ||||
| ?1 p) a (d_ =E | ?1 p) a (d_ =E | ||||
| a (CC_ & | a (CC_ & | ||||
| ?1 al (_ 'Al // eg: Portugal, capital, etc. | |||||
| ?1 a (lK ,A // Algarve, alto, etc... | |||||
| ?1 a (l_ 'A // eg: Portugal, capital, etc. | |||||
| ?1 a (lK ,A // Algarve, Almerinda, etc... | |||||
| ?2 al (K aU | ?2 al (K aU | ||||
| ?2 alh alj | ?2 alh alj | ||||
| e (cem_ E | e (cem_ E | ||||
| e (ces_ E | e (ces_ E | ||||
| e (stA_ E | |||||
| e (stA_ E | |||||
| e (stAm_ E | e (stAm_ E | ||||
| e (stAs_ E | e (stAs_ E | ||||
| e (strA_ E | |||||
| e (strA_ E | |||||
| e (rnA_ E | e (rnA_ E | ||||
| e (rnAm_ E | e (rnAm_ E | ||||
| e (xA_ E | e (xA_ E | ||||
| e (xAm_ E | e (xAm_ E | ||||
| e (xAs_ E | e (xAs_ E | ||||
| ?2 _n) e (t E | |||||
| ?2 _n) e (t E | |||||
| em (C eIm | em (C eIm | ||||
| en (K eIN | en (K eIN | ||||
| l) r x | l) r x | ||||
| n) r x | n) r x | ||||
| s) r x | s) r x | ||||
| ?1 A) r (_ r // this letter is imperfect. | |||||
| ?1 A) r (_ r- // [r-] is English linking-r | |||||
| .group s | .group s |
| // G voiced: б в г д ж з | // G voiced: б в г д ж з | ||||
| // H hard consonant: ъ ж ц ш | // H hard consonant: ъ ж ц ш | ||||
| // Y iotated vowel, softsign: ь е ё и ю я | // Y iotated vowel, softsign: ь е ё и ю я | ||||
| // L08 кпстфх | |||||
| // L09 бгджзлмнр | |||||
| .group а | .group а |
| ru 36 123 | ru 36 123 | ||||
| it 25 114 | it 25 114 | ||||
| es 6 114 | es 6 114 | ||||
| pt 34 137 | |||||
| pt_pt 20 137 | |||||
| pt 28 131 | |||||
| pt_pt 20 131 | |||||
| ro 36 138 | ro 36 138 | ||||
| el 8 114 | el 8 114 | ||||
| sv 25 117 | sv 25 117 | ||||
| 2 vowel/@_4 nl hr | 2 vowel/@_4 nl hr | ||||
| 3 vowel/8_2 en_us hr sv | 3 vowel/8_2 en_us hr sv | ||||
| 1 vowel/8_3 zh_yue | 1 vowel/8_3 zh_yue | ||||
| 12 vowel/a en_n cy de hu nl pl sk hr pt | |||||
| 11 vowel/a en_n cy de hu nl pl sk hr | |||||
| 4 vowel/a# en_sc it pt | 4 vowel/a# en_sc it pt | ||||
| 6 vowel/a_2 eo it pt pt_pt ro vi | 6 vowel/a_2 eo it pt pt_pt ro vi | ||||
| 5 vowel/a#_2 hr pt sv is sw | |||||
| 7 vowel/a_3 en_sc cs pt is | |||||
| 4 vowel/a#_2 hr sv is sw | |||||
| 6 vowel/a_3 en_sc cs is | |||||
| 12 vowel/a#_3 en en_n en_us en_wm de hi ru pt_pt vi zh_yue | 12 vowel/a#_3 en en_n en_us en_wm de hi ru pt_pt vi zh_yue | ||||
| 4 vowel/a_4 en_wm el vi | 4 vowel/a_4 en_wm el vi | ||||
| 3 vowel/a_5 pt sv sw | |||||
| 2 vowel/a_5 sv sw | |||||
| 7 vowel/aa en_us fi fr_ca no zh_yue | 7 vowel/aa en_us fi fr_ca no zh_yue | ||||
| 1 vowel/aa# fi | 1 vowel/aa# fi | ||||
| 3 vowel/aa_2 en cy | 3 vowel/aa_2 en cy | ||||
| 2 vowel/aa_4 sv vi | 2 vowel/aa_4 sv vi | ||||
| 2 vowel/aa_5 en_n | 2 vowel/aa_5 en_n | ||||
| 1 vowel/aa_6 de | 1 vowel/aa_6 de | ||||
| 2 vowel/aa_7 nl pt | |||||
| 1 vowel/aa_7 nl | |||||
| 4 vowel/a_en en fr | 4 vowel/a_en en fr | ||||
| 1 vowel/@_bck hi | 1 vowel/@_bck hi | ||||
| 13 vowel/e en en_n af cy eo fr hu hr it pt pt_pt vi | 13 vowel/e en en_n af cy eo fr hu hr it pt pt_pt vi | ||||
| 2 vowel/V en en_sc | 2 vowel/V en en_sc | ||||
| 3 vowel/V_2 af ru | 3 vowel/V_2 af ru | ||||
| 3 vowel/V_3 en_rp hi vi | 3 vowel/V_3 en_rp hi vi | ||||
| 2 vowel/V_4 en_sc pt | |||||
| 1 vowel/V_4 en_sc | |||||
| 6 vowel/y en de fi hu nl zh_yue | 6 vowel/y en de fi hu nl zh_yue | ||||
| 10 vowel/y# en en_wm de fi fr hu nl ro vi zh_yue | 10 vowel/y# en en_wm de fi fr hu nl ro vi zh_yue | ||||
| 1 vowel/y## is | 1 vowel/y## is |
| phoneme r | phoneme r | ||||
| liquid | |||||
| liquid starttype #r endtype #r | |||||
| length 60 | length 60 | ||||
| vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | ||||
| vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 |
| phoneme r // from Afrikaans | phoneme r // from Afrikaans | ||||
| liquid | |||||
| liquid starttype #r endtype #r | |||||
| length 60 | length 60 | ||||
| vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | ||||
| vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 |
| vowel starttype (@) endtype (@) | vowel starttype (@) endtype (@) | ||||
| length 200 | length 200 | ||||
| formants vnasal/a#_n | formants vnasal/a#_n | ||||
| reduceto & 4 | |||||
| endphoneme | endphoneme | ||||
| phoneme &U~ | phoneme &U~ | ||||
| endphoneme | endphoneme | ||||
| phoneme A // for "al" in European Portuguese | |||||
| vowel starttype (a) endtype (a) | |||||
| length 200 | |||||
| formants vowel/a_3 | |||||
| endphoneme | |||||
| phoneme A2 // for "al" in European Portuguese | |||||
| vowel starttype (a) endtype (a) | |||||
| length 200 | |||||
| formants vowel/aa_7 | |||||
| endphoneme | |||||
| phoneme A3 // for "al" in European Portuguese | |||||
| vowel starttype (a) endtype (a) | |||||
| length 200 | |||||
| formants vowel/a_5 | |||||
| endphoneme | |||||
| phoneme A4 // for "al" in European Portuguese | |||||
| phoneme A // for "al" in European Portuguese, don't reduce to [&] | |||||
| vowel starttype (a) endtype (a) | vowel starttype (a) endtype (a) | ||||
| length 200 | length 200 | ||||
| formants vowel/a_2 | formants vowel/a_2 | ||||
| endphoneme | endphoneme | ||||
| phoneme A5 // for "al" in European Portuguese | |||||
| vowel starttype (a) endtype (a) | |||||
| length 200 | |||||
| formants vowel/a | |||||
| endphoneme | |||||
| phoneme A6 // for "al" in European Portuguese | |||||
| vowel starttype (a) endtype (a) | |||||
| length 200 | |||||
| formants vowel/V_4 | |||||
| endphoneme | |||||
| phoneme A7 // for "al" in European Portuguese | |||||
| vowel starttype (a) endtype (a) | |||||
| length 200 | |||||
| formants vowel/a#_2 | |||||
| endphoneme | |||||
| phoneme E | phoneme E |
| endphoneme | endphoneme | ||||
| phoneme R3 // Afrikaans | phoneme R3 // Afrikaans | ||||
| liquid | |||||
| liquid starttype #r endtype #r | |||||
| length 60 | length 60 | ||||
| vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | vowelin f1=2 f2=2700 -300 -200 f3=-1300 80 | ||||
| vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 | vowelout f1=2 f2=1700 -300 -200 f3=-1300 80 |
| { | { | ||||
| item_string[ix++] = c; | item_string[ix++] = c; | ||||
| c = fgetc(f_in); | c = fgetc(f_in); | ||||
| if(feof(f_in)) | |||||
| break; | |||||
| if(item_string[ix-1] == '=') | if(item_string[ix-1] == '=') | ||||
| break; | break; | ||||
| } | } | ||||
| ungetc(c,f_in); | |||||
| item_string[ix] = 0; | item_string[ix] = 0; | ||||
| if(feof(f_in)) return(-1); | |||||
| if(!feof(f_in)) | |||||
| ungetc(c,f_in); | |||||
| keyword = -1; | keyword = -1; | ||||
| c = *p++ - '0'; | c = *p++ - '0'; | ||||
| value = *p++ - '0'; | value = *p++ - '0'; | ||||
| c = c * 10 + value; | c = c * 10 + value; | ||||
| if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_TYPES)) | |||||
| if((value < 0) || (value > 9) || (c <= 0) || (c >= N_LETTER_GROUPS)) | |||||
| { | { | ||||
| c = 0; | c = 0; | ||||
| fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum); | fprintf(f_log,"%5d: Expected 2 digits after 'L'",linenum); | ||||
| { | { | ||||
| // pre-rule, put the group number before the RULE_LETTERGP command | // pre-rule, put the group number before the RULE_LETTERGP command | ||||
| output[ix++] = c; | output[ix++] = c; | ||||
| c = RULE_LETTERGP; | |||||
| c = RULE_LETTERGP2; | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| output[ix++] = RULE_LETTERGP; | |||||
| output[ix++] = RULE_LETTERGP2; | |||||
| } | } | ||||
| break; | break; | ||||
| int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||||
| {//============================================================= | |||||
| static int compile_lettergroup(char *input, FILE *f_out) | |||||
| {//===================================================== | |||||
| char *p; | |||||
| int group; | |||||
| p = input; | |||||
| if(!isdigit(p[0]) || !isdigit(p[1])) | |||||
| { | |||||
| return(1); | |||||
| } | |||||
| group = atoi(&p[1]); | |||||
| if(group >= N_LETTER_GROUPS) | |||||
| return(1); | |||||
| while(!isspace2(*p)) p++; | |||||
| fputc(RULE_GROUP_START,f_out); | |||||
| fputc(RULE_LETTERGP2,f_out); | |||||
| fputc(group + 'A', f_out); | |||||
| for(;;) | |||||
| { | |||||
| while(isspace2(*p)) p++; | |||||
| if(*p == 0) | |||||
| break; | |||||
| while((*p & 0xff) > ' ') | |||||
| { | |||||
| fputc(*p++, f_out); | |||||
| } | |||||
| fputc(0,f_out); | |||||
| } | |||||
| fputc(RULE_GROUP_END,f_out); | |||||
| return(0); | |||||
| } | |||||
| static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||||
| {//==================================================================== | |||||
| char *prule; | char *prule; | ||||
| unsigned char *p; | unsigned char *p; | ||||
| int ix; | int ix; | ||||
| buf = fgets(buf1,sizeof(buf1),f_in); | buf = fgets(buf1,sizeof(buf1),f_in); | ||||
| if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | ||||
| if((buf != NULL) && (memcmp(buf,".L",2)==0)) | |||||
| { | |||||
| if(compile_lettergroup(&buf[2], f_out) != 0) | |||||
| { | |||||
| fprintf(f_log,"%5d: Bad lettergroup\n",linenum); | |||||
| error_count++; | |||||
| } | |||||
| continue; | |||||
| } | |||||
| if((buf == NULL) || (memcmp(buf,".group",6)==0)) | if((buf == NULL) || (memcmp(buf,".group",6)==0)) | ||||
| { | { | ||||
| // next .group or end of file, write out the previous group | // next .group or end of file, write out the previous group |
| groups2_count[ix]=0; | groups2_count[ix]=0; | ||||
| groups2_start[ix]=255; // indicates "not set" | groups2_start[ix]=255; // indicates "not set" | ||||
| } | } | ||||
| memset(letterGroups,0,sizeof(letterGroups)); | |||||
| p = data_dictrules; | p = data_dictrules; | ||||
| while(*p != 0) | while(*p != 0) | ||||
| break; | break; | ||||
| } | } | ||||
| p++; | p++; | ||||
| len = strlen(p); | |||||
| p_name = p; | |||||
| c = p_name[0]; | |||||
| p += (len+1); | |||||
| if(len == 1) | |||||
| if(p[0] == RULE_LETTERGP2) | |||||
| { | { | ||||
| groups1[c] = p; | |||||
| ix = p[1] - 'A'; | |||||
| p += 2; | |||||
| if((ix >= 0) && (ix < N_LETTER_GROUPS)) | |||||
| { | |||||
| letterGroups[ix] = p; | |||||
| } | |||||
| } | } | ||||
| else | else | ||||
| if(len == 0) | |||||
| { | { | ||||
| groups1[0] = p; | |||||
| len = strlen(p); | |||||
| p_name = p; | |||||
| c = p_name[0]; | |||||
| p += (len+1); | |||||
| if(len == 1) | |||||
| { | |||||
| groups1[c] = p; | |||||
| } | |||||
| else | |||||
| if(len == 0) | |||||
| { | |||||
| groups1[0] = p; | |||||
| } | |||||
| else | |||||
| { | |||||
| if(groups2_start[c] == 255) | |||||
| groups2_start[c] = n_groups2; | |||||
| groups2_count[c]++; | |||||
| groups2[n_groups2] = p; | |||||
| c2 = p_name[1]; | |||||
| groups2_name[n_groups2++] = (c + (c2 << 8)); | |||||
| } | |||||
| } | } | ||||
| else | |||||
| { | |||||
| if(groups2_start[c] == 255) | |||||
| groups2_start[c] = n_groups2; | |||||
| groups2_count[c]++; | |||||
| groups2[n_groups2] = p; | |||||
| c2 = p_name[1]; | |||||
| groups2_name[n_groups2++] = (c + (c2 << 8)); | |||||
| } | |||||
| // skip over all the rules in this group | // skip over all the rules in this group | ||||
| rule_count = 0; | rule_count = 0; | ||||
| while(*p != RULE_GROUP_END) | while(*p != RULE_GROUP_END) | ||||
| int Translator::IsLetterGroup(char *word, int group) | |||||
| {//================================================= | |||||
| // match the word against a list of utf-8 strings | |||||
| char *p; | |||||
| char *w; | |||||
| p = letterGroups[group]; | |||||
| while(*p != 0) | |||||
| { | |||||
| w = word; | |||||
| while(*p == *w) | |||||
| { | |||||
| *w++; | |||||
| *p++; | |||||
| } | |||||
| if(*p == 0) | |||||
| return(w-word); // matched a complete string | |||||
| while(*p++ != 0); // skip to end of string | |||||
| } | |||||
| return(0); | |||||
| } | |||||
| int Translator::IsLetter(int letter, int group) | int Translator::IsLetter(int letter, int group) | ||||
| {//============================================ | {//============================================ | ||||
| if(letter_groups[group] != NULL) | if(letter_groups[group] != NULL) | ||||
| static char output[60]; | static char output[60]; | ||||
| static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ', | static char symbols[] = {' ',' ',' ',' ',' ',' ',' ',' ',' ', | ||||
| '@','&','%','+','#','S','D','Z','A','B','C','H','F','G','Y','N','K','V','L','T','X','?','W'}; | |||||
| '@','&','%','+','#','S','D','Z','A','L',' ',' ',' ',' ',' ','N','K','V',' ','T','X','?','W'}; | |||||
| static char symbols_lg[] = {'A','B','C','H','F','G','Y'}; | |||||
| match_type = 0; | match_type = 0; | ||||
| buf_pre[0] = 0; | buf_pre[0] = 0; | ||||
| } | } | ||||
| else | else | ||||
| if(rb == RULE_LETTERGP) | if(rb == RULE_LETTERGP) | ||||
| { | |||||
| c = symbols_lg[*rule++ - 'A']; | |||||
| } | |||||
| else | |||||
| if(rb == RULE_LETTERGP2) | |||||
| { | { | ||||
| value = *rule++ - 'A'; | value = *rule++ - 'A'; | ||||
| if(value >= 8) | |||||
| { | |||||
| p[0] = 'L'; | |||||
| p[1] = (value / 10) + '0'; | |||||
| c = (value % 10) + '0'; | |||||
| p[0] = 'L'; | |||||
| p[1] = (value / 10) + '0'; | |||||
| c = (value % 10) + '0'; | |||||
| if(match_type == RULE_PRE) | |||||
| { | |||||
| p[0] = c; | |||||
| c = 'L'; | |||||
| } | |||||
| p+=2; | |||||
| } | |||||
| else | |||||
| if(match_type == RULE_PRE) | |||||
| { | { | ||||
| c = symbols[value + RULE_LETTER_GROUPS]; | |||||
| p[0] = c; | |||||
| c = 'L'; | |||||
| } | } | ||||
| p+=2; | |||||
| } | } | ||||
| else | else | ||||
| if(rb <= RULE_LAST_RULE) | if(rb <= RULE_LAST_RULE) | ||||
| int distance_right; | int distance_right; | ||||
| int distance_left; | int distance_left; | ||||
| int lg_pts; | int lg_pts; | ||||
| int n_bytes; | |||||
| MatchRecord match; | MatchRecord match; | ||||
| static MatchRecord best; | static MatchRecord best; | ||||
| if(rule == NULL) | if(rule == NULL) | ||||
| { | { | ||||
| match_out->points = 0; | match_out->points = 0; | ||||
| (*word)++; | |||||
| return; | return; | ||||
| } | } | ||||
| failed = 1; | failed = 1; | ||||
| break; | break; | ||||
| case RULE_LETTERGP2: // match against a list of utf-t strings | |||||
| letter_group = *rule++ - 'A'; | |||||
| if((n_bytes = IsLetterGroup(post_ptr-1,letter_group)) >0) | |||||
| { | |||||
| match.points += (20-distance_right); | |||||
| post_ptr += (n_bytes-1); | |||||
| } | |||||
| else | |||||
| failed =1; | |||||
| break; | |||||
| case RULE_NOTVOWEL: | case RULE_NOTVOWEL: | ||||
| if(!IsLetter(letter_w,0)) | if(!IsLetter(letter_w,0)) | ||||
| { | { |
| } | } | ||||
| #endif | #endif | ||||
| if((ph->reduce_to != 0) && (ph->type != phVOWEL) && !(plist2->synthflags & SFLAG_DICTIONARY)) | |||||
| { | |||||
| // reduction for vowels has already been done in SetWordStress | |||||
| int reduce_level; | |||||
| if(next->type == phVOWEL) | |||||
| { | |||||
| reduce_level = (ph->phflags >> 28) & 7; | |||||
| if((&plist2[1])->stress < reduce_level) | |||||
| { | |||||
| // look at the stress of the following vowel | |||||
| ph = phoneme_tab[ph->reduce_to]; | |||||
| } | |||||
| } | |||||
| } | |||||
| if((plist2+1)->synthflags & SFLAG_LENGTHEN) | if((plist2+1)->synthflags & SFLAG_LENGTHEN) | ||||
| { | { | ||||
| static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0}; | static char types_double[] = {phFRICATIVE,phVFRICATIVE,phNASAL,phLIQUID,0}; |
| #include "translate.h" | #include "translate.h" | ||||
| #include "wave.h" | #include "wave.h" | ||||
| const char *version_string = "1.25.13 30.May.07"; | |||||
| const int version_phdata = 0x012501; | |||||
| const char *version_string = "1.25.14 31.May.07"; | |||||
| const int version_phdata = 0x012514; | |||||
| int option_device_number = -1; | int option_device_number = -1; | ||||
| { | { | ||||
| static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 }; | static int stress_amps_is[] = {16,16, 20,20, 20,24, 24,22 }; | ||||
| static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250}; | static int stress_lengths_is[8] = {180,155, 200,200, 0,0, 240,250}; | ||||
| static const wchar_t is_L08[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | |||||
| static const wchar_t is_lettergroup_B[] = {'c','f','h','k','p','t','x',0xfe,0}; // voiceless conants, including 'þ' ?? 's' | |||||
| tr = new Translator(); | tr = new Translator(); | ||||
| SetupTranslator(tr,stress_lengths_is,stress_amps_is); | SetupTranslator(tr,stress_lengths_is,stress_amps_is); | ||||
| ResetLetterBits(tr,0x18); | ResetLetterBits(tr,0x18); | ||||
| SetLetterBits(tr,4,"kpst"); // Letter group F | SetLetterBits(tr,4,"kpst"); // Letter group F | ||||
| SetLetterBits(tr,3,"jvr"); // Letter group H | SetLetterBits(tr,3,"jvr"); // Letter group H | ||||
| tr->letter_groups[8] = is_L08; | |||||
| tr->letter_groups[1] = is_lettergroup_B; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| tr->langopts.numbers = 0xe9; | tr->langopts.numbers = 0xe9; | ||||
| tr->langopts.numbers2 = 0x2; | tr->langopts.numbers2 = 0x2; | ||||
| static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0}; // letter group G (voiced obstruents) | static const char ru_voiced[] = {0x11,0x12,0x13,0x14,0x16,0x17,0}; // letter group G (voiced obstruents) | ||||
| static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0}; // letter group Y (iotated vowels & soft-sign) | static const char ru_ivowels[] = {0x2c,0x15,0x31,0x18,0x2e,0x2f,0}; // letter group Y (iotated vowels & soft-sign) | ||||
| // these are unicode character codes | |||||
| static const wchar_t ru_L08[] = {0x43a,0x43f,0x441,0x442,0x444,0x445,0}; // кпстфх | |||||
| static const wchar_t ru_L09[] = {0x431,0x433,0x434,0x436,0x43b,0x43c,0x43d,0x440,0}; // бгджзлмнр | |||||
| SetupTranslator(this,stress_lengths_ru,stress_amps_ru); | SetupTranslator(this,stress_lengths_ru,stress_amps_ru); | ||||
| charset_a0 = charsets[18]; // KOI8-R | charset_a0 = charsets[18]; // KOI8-R | ||||
| SetLetterBits(this,6,ru_ivowels); | SetLetterBits(this,6,ru_ivowels); | ||||
| SetLetterBits(this,7,ru_vowels); | SetLetterBits(this,7,ru_vowels); | ||||
| letter_groups[8] = ru_L08; // This is L08 in ru_rules | |||||
| letter_groups[9] = ru_L09; // This is L09 in ru_rules | |||||
| langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word | langopts.param[LOPT_UNPRONOUNCABLE] = 0x432; // [v] don't count this character at start of word | ||||
| langopts.param[LOPT_REGRESSIVE_VOICING] = 1; | langopts.param[LOPT_REGRESSIVE_VOICING] = 1; | ||||
| langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1; | langopts.param[LOPT_KEEP_UNSTR_VOWEL] = 1; |
| #define N_RULE_GROUP2 120 // max num of two-letter rule chains | #define N_RULE_GROUP2 120 // max num of two-letter rule chains | ||||
| #define N_HASH_DICT 1024 | #define N_HASH_DICT 1024 | ||||
| #define N_CHARSETS 20 | #define N_CHARSETS 20 | ||||
| #define N_LETTER_GROUPS 20 | |||||
| /* flags from word dictionary */ | /* flags from word dictionary */ | ||||
| // bits 0-3 stressed syllable, 7=unstressed | // bits 0-3 stressed syllable, 7=unstressed | ||||
| #define RULE_ENDING 14 | #define RULE_ENDING 14 | ||||
| #define RULE_DIGIT 15 // D digit | #define RULE_DIGIT 15 // D digit | ||||
| #define RULE_NONALPHA 16 // Z non-alpha | #define RULE_NONALPHA 16 // Z non-alpha | ||||
| #define RULE_LETTER_GROUPS 17 // 17 to 23 | |||||
| #define RULE_LETTER1 17 // A vowels | |||||
| #define RULE_LETTER2 18 // B 'hard' consonants | |||||
| #define RULE_LETTER3 19 // C all consonants | |||||
| #define RULE_LETTER4 20 // H letter group | |||||
| #define RULE_LETTER5 21 // F letter group | |||||
| #define RULE_LETTER6 22 // G letter group | |||||
| #define RULE_LETTER7 23 // Y letter group | |||||
| #define RULE_LETTERGP 17 // A B C H F G Y letter group number | |||||
| #define RULE_LETTERGP2 18 // L + letter group number | |||||
| #define RULE_NO_SUFFIX 24 // N | #define RULE_NO_SUFFIX 24 // N | ||||
| #define RULE_NOTVOWEL 25 // K | #define RULE_NOTVOWEL 25 // K | ||||
| #define RULE_IFVERB 26 // V | #define RULE_IFVERB 26 // V | ||||
| #define RULE_LETTERGP 27 // L + letter group number | |||||
| #define RULE_ALT1 28 // T word has $alt attribute | #define RULE_ALT1 28 // T word has $alt attribute | ||||
| #define RULE_NOVOWELS 29 // X no vowels up to word boundary | #define RULE_NOVOWELS 29 // X no vowels up to word boundary | ||||
| #define RULE_SPELLING 31 // W while spelling letter-by-letter | #define RULE_SPELLING 31 // W while spelling letter-by-letter | ||||
| // holds properties of characters: vowel, consonant, etc for pronunciation rules | // holds properties of characters: vowel, consonant, etc for pronunciation rules | ||||
| unsigned char letter_bits[256]; | unsigned char letter_bits[256]; | ||||
| int letter_bits_offset; | int letter_bits_offset; | ||||
| #define N_LETTER_TYPES 20 | |||||
| const wchar_t *letter_groups[N_LETTER_TYPES]; | |||||
| const wchar_t *letter_groups[8]; | |||||
| /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */ | /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */ | ||||
| unsigned char punct_to_tone[4][5]; | unsigned char punct_to_tone[4][5]; | ||||
| void ApplySpecialAttribute(char *phonemes, int dict_flags); | void ApplySpecialAttribute(char *phonemes, int dict_flags); | ||||
| int IsLetter(int letter, int group); | int IsLetter(int letter, int group); | ||||
| int IsLetterGroup(char *word, int group); | |||||
| void CalcPitches_Tone(int clause_tone); | void CalcPitches_Tone(int clause_tone); | ||||
| unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter | unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter | ||||
| unsigned char groups2_start[256]; // index into groups2 | unsigned char groups2_start[256]; // index into groups2 | ||||
| char *letterGroups[N_LETTER_GROUPS]; | |||||
| int n_ph_list2; | int n_ph_list2; | ||||
| PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes | PHONEME_LIST2 ph_list2[N_PHONEME_LIST]; // first stage of text->phonemes |
| int hash; | int hash; | ||||
| char *p; | char *p; | ||||
| char *start; | char *start; | ||||
| char *group; | |||||
| char *next; | char *next; | ||||
| unsigned char c; | unsigned char c; | ||||
| int count = 0; | int count = 0; | ||||
| } | } | ||||
| if(*p == RULE_GROUP_START) | if(*p == RULE_GROUP_START) | ||||
| { | { | ||||
| group = p; | |||||
| if(p[1] == RULE_LETTERGP2) | |||||
| { | |||||
| while(*p != RULE_GROUP_END) p++; | |||||
| continue; | |||||
| } | |||||
| p += (strlen(p)+1); | p += (strlen(p)+1); | ||||
| } | } | ||||