Lang eo: use .replace in eo_rules for "cx" etc. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@94 d46cf337-b52f-0410-862d-fd96e6ae7743master
| dinosaur daIn@sO@ | dinosaur daIn@sO@ | ||||
| diode daIoUd | diode daIoUd | ||||
| diplomacy dIpl'oUm@sI2 | diplomacy dIpl'oUm@sI2 | ||||
| dipole daIpoUl | |||||
| disconsolate dIsk'0ns@l@t | disconsolate dIsk'0ns@l@t | ||||
| disc dIsk // for discs | disc dIsk // for discs | ||||
| disciple dIs'aIp@L | disciple dIs'aIp@L | ||||
| fishnet fISnEt | fishnet fISnEt | ||||
| fiord fi:O@d | fiord fi:O@d | ||||
| fix fIks // for fixer | fix fIks // for fixer | ||||
| fjord fIO@d | |||||
| flagellum fla2dZEl@m | flagellum fla2dZEl@m | ||||
| flexible flEksIb@L | flexible flEksIb@L | ||||
| flier flaI3 | flier flaI3 | ||||
| Phoebe fi:bi: | Phoebe fi:bi: | ||||
| Rachel reItS@L | Rachel reItS@L | ||||
| Rhys ri:s | Rhys ri:s | ||||
| Robert r0b3t | |||||
| Roderick r0drIk | Roderick r0drIk | ||||
| Samantha s@manT@ | Samantha s@manT@ | ||||
| Sarah se@r@ | Sarah se@r@ |
| // ?7 Scottish | // ?7 Scottish | ||||
| // ?8 Use full vowel, not schwa in some word endings | // ?8 Use full vowel, not schwa in some word endings | ||||
| .replace | |||||
| ff f // ligature | |||||
| fi fi | |||||
| fl fl | |||||
| .group a | .group a | ||||
| _) a (_ a2 | _) a (_ a2 | ||||
| l) egion i:dZ@n | l) egion i:dZ@n | ||||
| egy (pt i:dZI | egy (pt i:dZI | ||||
| n) e (gro i: | n) e (gro i: | ||||
| eh (_ eI | |||||
| ei eI | ei eI | ||||
| _) ei aI | _) ei aI | ||||
| c) ei i: | c) ei i: | ||||
| r) iv (el Iv | r) iv (el Iv | ||||
| r) iv (en Iv | r) iv (en Iv | ||||
| r) iv (et Iv | r) iv (et Iv | ||||
| _l) iv (el aIv | |||||
| _l) iv (eB aIv | |||||
| XC) iv (el Iv | XC) iv (el Iv | ||||
| _g) iv (e Iv | _g) iv (e Iv | ||||
| _l) iv (e Iv | _l) iv (e Iv | ||||
| o (bliga 0 | o (bliga 0 | ||||
| o (bligato %0 | o (bligato %0 | ||||
| o (blo 0 | o (blo 0 | ||||
| _r) o (ber 0 | |||||
| r) o (bot oU | r) o (bot oU | ||||
| _) o (bs %0 | _) o (bs %0 | ||||
| _) obs (er %0bz | _) obs (er %0bz | ||||
| € jU@roUz | € jU@roUz | ||||
| ♯ SA@p | ♯ SA@p | ||||
| ♭ flat | ♭ flat | ||||
| ff f | |||||
| fi fI | |||||
| fl fl |
| _. punkto | _. punkto | ||||
| v vo // not Roman numbers | |||||
| x ikso | |||||
| t.e t'e,e | t.e t'e,e | ||||
| uk $abbrev | uk $abbrev | ||||
| // includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | |||||
| l' la | l' la | ||||
| la %la | la %la | ||||
| ne $u+ // negative | ne $u+ // negative | ||||
| cxu $u // interogative particle | |||||
| cxi $u | |||||
| ĉu $u // cxi interogative particle | |||||
| ĉi $u // cxi interogative particle | |||||
| ĉu $u // interogative particle | |||||
| ĉi $u // interogative particle | |||||
| ia $u // any | ia $u // any | ||||
| // prepositions | // prepositions | ||||
| de $u+ $brk | de $u+ $brk | ||||
| (de la) dela $u $brk | (de la) dela $u $brk | ||||
| antaux $u+ $pause | |||||
| antaŭ $u+ $pause | antaŭ $u+ $pause | ||||
| apud $u+ $pause | apud $u+ $pause | ||||
| ol $u+ $brk | ol $u+ $brk | ||||
| kun $u+ $pause | kun $u+ $pause | ||||
| laŭ $u+ $brk | laŭ $u+ $brk | ||||
| laux $u+ $brk | |||||
| per $u+ $brk | per $u+ $brk | ||||
| preter $u+ $brk | preter $u+ $brk | ||||
| pri $u+ $pause | pri $u+ $pause | ||||
| mi $u | mi $u | ||||
| ci $u | ci $u | ||||
| li $u | li $u | ||||
| sxi $u | |||||
| ŝi $u | ŝi $u | ||||
| gxi $u | |||||
| ĝi $u | ĝi $u | ||||
| ni $u | ni $u | ||||
| vi $u | |||||
| vi vi $u | |||||
| ili $u | ili $u | ||||
| oni $u | oni $u | ||||
| si $u | si $u | ||||
| mia $u+ | mia $u+ | ||||
| cia $u+ | cia $u+ | ||||
| lia $u+ | lia $u+ | ||||
| sxia $u+ | |||||
| ŝia $u+ | ŝia $u+ | ||||
| gxia $u+ | |||||
| ĝia $u+ | ĝia $u+ | ||||
| nia $u+ | nia $u+ | ||||
| via $u+ | via $u+ | ||||
| miaj $u+ | miaj $u+ | ||||
| ciaj $u+ | ciaj $u+ | ||||
| liaj $u+ | liaj $u+ | ||||
| sxiaj $u+ | |||||
| ŝiaj $u+ | ŝiaj $u+ | ||||
| gxiaj $u+ | |||||
| ĝiaj $u+ | ĝiaj $u+ | ||||
| niaj $u+ | niaj $u+ | ||||
| viaj $u+ | viaj $u+ | ||||
| // conjunctions | // conjunctions | ||||
| ke $u $pause // that (conj) | ke $u $pause // that (conj) | ||||
| aux $u $pause | |||||
| aŭ $u $pause // aux | |||||
| cxar $u $pause | |||||
| aŭ $u $pause | |||||
| ĉar $u $pause | ĉar $u $pause | ||||
| kaj $u $pause | kaj $u $pause | ||||
| sed $u $pause | sed $u $pause |
| // includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | // includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | ||||
| .replace | |||||
| cx ĉ | |||||
| gx ĝ | |||||
| hx ĥ | |||||
| jx ĵ | |||||
| sx ŝ | |||||
| ux ŭ | |||||
| .group a | .group a | ||||
| _) a(_ a | _) a(_ a | ||||
| a a | a a | ||||
| aj aI | aj aI | ||||
| a (jx a | |||||
| aux aU | |||||
| aŭ aU | aŭ aU | ||||
| D_-_) a (_ %a //'a' after a number | D_-_) a (_ %a //'a' after a number | ||||
| D_-_) an (_ %an //'an' after a number | D_-_) an (_ %an //'an' after a number | ||||
| .group c | .group c | ||||
| _) c(_ tso | _) c(_ tso | ||||
| _) ch(_ tSo | _) ch(_ tSo | ||||
| _) cx(_ tSo | |||||
| c ts | c ts | ||||
| ch (K tS // foreign words | ch (K tS // foreign words | ||||
| K) ch tS // foreign words | K) ch tS // foreign words | ||||
| cx tS | |||||
| .group d | .group d | ||||
| _) e(_ e | _) e(_ e | ||||
| e e | e e | ||||
| ej eI | ej eI | ||||
| e (jx e | |||||
| eux eU | |||||
| eŭ eU | eŭ eU | ||||
| .group f | .group f | ||||
| .group g | .group g | ||||
| _) g(_ go | _) g(_ go | ||||
| _) gx(_ dZo | |||||
| g g | g g | ||||
| gh (K dZ | gh (K dZ | ||||
| gx dZ | |||||
| .group h | .group h | ||||
| _) h(_ ho | _) h(_ ho | ||||
| _) hx(_ xo | |||||
| h h | h h | ||||
| hh x | hh x | ||||
| hx x | |||||
| .group i | .group i | ||||
| .group j | .group j | ||||
| _) j(_ jo | _) j(_ jo | ||||
| _) jx(_ Zo | |||||
| j j | j j | ||||
| jh (C Z | jh (C Z | ||||
| jx Z | |||||
| .group k | .group k | ||||
| _) k(_ ko | _) k(_ ko | ||||
| _) o(_ o | _) o(_ o | ||||
| o o | o o | ||||
| oj OI | oj OI | ||||
| o (jx o | |||||
| .group p | .group p | ||||
| _) p(_ po | _) p(_ po | ||||
| .group s | .group s | ||||
| _) s(_ so | _) s(_ so | ||||
| _) sh(_ So | _) sh(_ So | ||||
| _) sx(_ So | |||||
| s s | s s | ||||
| sx S | |||||
| sh (K S // foreign words | sh (K S // foreign words | ||||
| K) sh S // foreign words | K) sh S // foreign words | ||||
| .group u | .group u | ||||
| _) u(_ u | _) u(_ u | ||||
| _) ux(_ wo | |||||
| u u | u u | ||||
| uj uI | uj uI | ||||
| u (jx u | |||||
| ux w | |||||
| .group v | .group v |
| </pre> | </pre> | ||||
| then if "to" is at the end of the clause, we get [tu:], if not then we get [t@]. | then if "to" is at the end of the clause, we get [tu:], if not then we get [t@]. | ||||
| <p> | |||||
| <h4>4.4.4 Translating a Word with another Word</h4> | |||||
| Rather than specifying the pronunciation of a word by a phoneme string, you can specify another "sounds like" word.<p>Use the attribute <b>$text</b> eg.<p> | |||||
| <pre> | |||||
| cough coff $text | |||||
| </pre> | |||||
| Alternatively, use the command <b>$textmode</b> on a line by itself to turn this on for all subsequent entries in the file, until it's turned off by <b>$phonememode</b>. eg.<p> | |||||
| <pre> | |||||
| $textmode | |||||
| cough coff | |||||
| through threw | |||||
| $phonememode | |||||
| </pre> | |||||
| This feature cannot be used for the special entries in the <b>_list</b> files which start with an underscore, such as numbers.<p> | |||||
| Currently "textmode" entries are only recognized for complete words, and not for for stems from which a prefix or suffix has been removed (eg. the word "coughs" would not match the example above). | |||||
| <p> | |||||
| <p> <hr> | <p> <hr> | ||||
| <h3>4.5 Conditional Rules</h3> | <h3>4.5 Conditional Rules</h3> | ||||
| Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p> | Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p> | ||||
| </tbody></table> | </tbody></table> | ||||
| </ul> | </ul> | ||||
| <p> <hr> | |||||
| <h3>4.7 Character Substitution</h3> | |||||
| Chracter substitutions can be specified by using a <b> .replace </b> section at the start of the <b> _rules </b> file. Each line specified either one or two alphabetic characters to be replaced by another one or two alphabetic characters. This substitution is done to a word before it is translated using the spelling-to-phoneme rules. Only the lower-case version of the characters needs to be specified. eg.<p> | |||||
| .replace<br> | |||||
| ô ő // (Hungarian) allow the use of o-circumflex instead of o-double-accute<br> | |||||
| û ű<p> | |||||
| cx ĉ // (Esperanto) allow "cx" as an alternative to c-circumflex<p> | |||||
| fi fi // replace a single character ligature by two characters | |||||
| <p> | |||||
| </body> | </body> | ||||
| </html> | </html> |
| vowelin f1=0 f2=1700 -300 300 f3=-100 80 | vowelin f1=0 f2=1700 -300 300 f3=-100 80 | ||||
| vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | ||||
| lengthmod 2 | lengthmod 2 | ||||
| wave ustop/t | |||||
| wave ustop/t%90 | |||||
| before _ ustop/t_ | before _ ustop/t_ | ||||
| before @- ustop/t_dnt%50 | before @- ustop/t_dnt%50 | ||||
| before r ustop/tr | before r ustop/tr |
| vowelin f1=0 f2=1700 -300 300 f3=-100 80 | vowelin f1=0 f2=1700 -300 300 f3=-100 80 | ||||
| vowelout f1=0 f2=1700 -300 300 f3=-100 80 | vowelout f1=0 f2=1700 -300 300 f3=-100 80 | ||||
| lengthmod 5 | lengthmod 5 | ||||
| wave x/d%90 | |||||
| wave x/d%80 | |||||
| endphoneme | endphoneme | ||||
| vowelin f1=0 f2=1700 -300 300 f3=-100 80 | vowelin f1=0 f2=1700 -300 300 f3=-100 80 | ||||
| vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | ||||
| lengthmod 2 | lengthmod 2 | ||||
| wave ustop/t | |||||
| wave ustop/t%90 | |||||
| before _ ustop/t_ | before _ ustop/t_ | ||||
| before @- ustop/t_dnt%50 | before @- ustop/t_dnt%50 | ||||
| before r ustop/tr | before r ustop/tr |
| static int transpose_offset; // transpose character range for LookupDictList() | static int transpose_offset; // transpose character range for LookupDictList() | ||||
| static int transpose_min; | static int transpose_min; | ||||
| static int transpose_max; | static int transpose_max; | ||||
| static int text_mode = 0; | |||||
| int hash_counts[N_HASH_DICT]; | int hash_counts[N_HASH_DICT]; | ||||
| char *hash_chains[N_HASH_DICT]; | char *hash_chains[N_HASH_DICT]; | ||||
| {"$pastf", 27}, /* past tense follows */ | {"$pastf", 27}, /* past tense follows */ | ||||
| {"$verbextend",28}, /* extend influence of 'verb follows' */ | {"$verbextend",28}, /* extend influence of 'verb follows' */ | ||||
| {"$brk", 30}, /* a shorter $pause */ | |||||
| {"$text", 29}, // word translates to replcement text, not phonemes | |||||
| {"$brk", 30}, // a shorter $pause | |||||
| // doesn't set dictionary_flags | // doesn't set dictionary_flags | ||||
| {"$?", 100}, // conditional rule, followed by byte giving the condition number | {"$?", 100}, // conditional rule, followed by byte giving the condition number | ||||
| {"$textmode", 200}, | |||||
| {"$phonememode", 201}, | |||||
| {NULL, -1} | {NULL, -1} | ||||
| }; | }; | ||||
| int len_word; | int len_word; | ||||
| int len_phonetic; | int len_phonetic; | ||||
| int text_not_phonemes; // this word specifies replacement text, not phonemes | |||||
| char *mnemptr; | char *mnemptr; | ||||
| char *comment; | char *comment; | ||||
| unsigned char flag_codes[100]; | unsigned char flag_codes[100]; | ||||
| char encoded_ph[200]; | char encoded_ph[200]; | ||||
| unsigned char bad_phoneme[4]; | unsigned char bad_phoneme[4]; | ||||
| p = linebuf; | |||||
| comment = NULL; | comment = NULL; | ||||
| text_not_phonemes = 0; | |||||
| phonetic = word = ""; | phonetic = word = ""; | ||||
| p = linebuf; | |||||
| // while(isspace2(*p)) p++; | |||||
| #ifdef deleted | |||||
| if(*p == '$') | |||||
| { | |||||
| if(memcmp(p,"$textmode",9) == 0) | |||||
| { | |||||
| text_mode = 1; | |||||
| return(0); | |||||
| } | |||||
| if(memcmp(p,"$phonememode",12) == 0) | |||||
| { | |||||
| text_mode = 0; | |||||
| return(0); | |||||
| } | |||||
| } | |||||
| #endif | |||||
| step = 0; | step = 0; | ||||
| c = 0; | c = 0; | ||||
| ix = LookupMnem(mnem_flags,mnemptr); | ix = LookupMnem(mnem_flags,mnemptr); | ||||
| if(ix > 0) | if(ix > 0) | ||||
| flag_codes[n_flag_codes++] = ix; | |||||
| { | |||||
| if(ix == 200) | |||||
| { | |||||
| text_mode = 1; | |||||
| } | |||||
| else | |||||
| if(ix == 201) | |||||
| { | |||||
| text_mode = 0; | |||||
| } | |||||
| else | |||||
| if(ix == BITNUM_FLAG_DICTTEXT) | |||||
| { | |||||
| text_not_phonemes = 1; | |||||
| } | |||||
| else | |||||
| { | |||||
| flag_codes[n_flag_codes++] = ix; | |||||
| } | |||||
| } | |||||
| else | else | ||||
| { | { | ||||
| fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); | fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); | ||||
| #endif | #endif | ||||
| return(0); /* blank line */ | return(0); /* blank line */ | ||||
| } | } | ||||
| EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||||
| if(strchr(encoded_ph,phonSWITCH) != 0) | |||||
| if(text_not_phonemes || text_mode) | |||||
| { | { | ||||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; | |||||
| strcpy(encoded_ph,phonetic); // this is replacement text, so don't encode as phonemes | |||||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_DICTTEXT; | |||||
| } | } | ||||
| for(ix=0; ix<255; ix++) | |||||
| else | |||||
| { | { | ||||
| c = encoded_ph[ix]; | |||||
| if(c == 0) break; | |||||
| if(c == 255) | |||||
| EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||||
| if(strchr(encoded_ph,phonSWITCH) != 0) | |||||
| { | { | ||||
| /* unrecognised phoneme, report error */ | |||||
| fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||||
| error_count++; | |||||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages | |||||
| } | |||||
| // check for errors in the phonemes codes | |||||
| for(ix=0; ix<sizeof(encoded_ph); ix++) | |||||
| { | |||||
| c = encoded_ph[ix]; | |||||
| if(c == 0) break; | |||||
| if(c == 255) | |||||
| { | |||||
| /* unrecognised phoneme, report error */ | |||||
| fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||||
| error_count++; | |||||
| } | |||||
| } | } | ||||
| } | } | ||||
| if((word[0] & 0x80)==0) // 7 bit ascii only | if((word[0] & 0x80)==0) // 7 bit ascii only | ||||
| { | { | ||||
| // If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | // If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | ||||
| char buf[sizeof(path_home)+45]; | char buf[sizeof(path_home)+45]; | ||||
| char dict_line[128]; | char dict_line[128]; | ||||
| text_mode = 0; | |||||
| sprintf(buf,"%s%s",path,filename); | sprintf(buf,"%s%s",path,filename); | ||||
| if((f_in = fopen(buf,"r")) == NULL) | if((f_in = fopen(buf,"r")) == NULL) | ||||
| return(-1); | return(-1); |
| DecodePhonemes(phonetic,ph_decoded); | DecodePhonemes(phonetic,ph_decoded); | ||||
| if(flags != NULL) | if(flags != NULL) | ||||
| flags1 = *flags; | flags1 = *flags; | ||||
| fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||||
| if((dictionary_flags & FLAG_DICTTEXT) == 0) | |||||
| { | |||||
| fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||||
| } | |||||
| } | } | ||||
| return(1); | return(1); | ||||
| int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags) | |||||
| //=========================================================================================== | |||||
| int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags) | |||||
| //============================================================================================== | |||||
| /* Lookup a specified word in the word dictionary. | /* Lookup a specified word in the word dictionary. | ||||
| Returns phonetic data in 'phonetic' and bits in 'flags' | Returns phonetic data in 'phonetic' and bits in 'flags' | ||||
| { | { | ||||
| int length; | int length; | ||||
| int found; | int found; | ||||
| char *word1; | |||||
| char *word2; | char *word2; | ||||
| unsigned char c; | unsigned char c; | ||||
| int nbytes; | int nbytes; | ||||
| int c2; | int c2; | ||||
| char word[N_WORD_BYTES]; | char word[N_WORD_BYTES]; | ||||
| static char word_replacement[N_WORD_BYTES]; | |||||
| length = 0; | length = 0; | ||||
| word2 = word1; | |||||
| word2 = word1 = *wordptr; | |||||
| while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | ||||
| { | { | ||||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | found = LookupDict2(word,word1,ph_out,flags,end_flags); | ||||
| if(found) return(1); | |||||
| ph_out[0] = 0; | |||||
| // try modifications to find a recognised word | |||||
| if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||||
| if(found == 0) | |||||
| { | { | ||||
| // try removing an 'e' which has been added by RemoveEnding | |||||
| word[length-1] = 0; | |||||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
| if(found) return(1); | |||||
| ph_out[0] = 0; | |||||
| // try modifications to find a recognised word | |||||
| if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||||
| { | |||||
| // try removing an 'e' which has been added by RemoveEnding | |||||
| word[length-1] = 0; | |||||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
| } | |||||
| else | |||||
| if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||||
| { | |||||
| // try removing a double letter | |||||
| word[length-1] = 0; | |||||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
| } | |||||
| } | } | ||||
| if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||||
| if(found) | |||||
| { | { | ||||
| // try removing a double letter | |||||
| word[length-1] = 0; | |||||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||||
| if(found) return(1); | |||||
| if(*flags & FLAG_DICTTEXT) | |||||
| { | |||||
| // the word translates to replacement text, not to phonemes | |||||
| if(end_flags & FLAG_ALLOW_DICTTEXT) | |||||
| { | |||||
| // only use replacement text if this is the original word, not if a prefix or suffix has been removed | |||||
| word_replacement[0] = 0; | |||||
| word_replacement[1] = ' '; | |||||
| strcpy(&word_replacement[2],ph_out); // replacement word, preceded by zerochar and space | |||||
| *wordptr = &word_replacement[2]; | |||||
| if(option_phonemes == 2) | |||||
| { | |||||
| fprintf(f_trans,"Replace: %s %s\n",word,*wordptr); | |||||
| } | |||||
| } | |||||
| ph_out[0] = 0; | |||||
| return(0); | |||||
| } | |||||
| return(1); | |||||
| } | } | ||||
| ph_out[0] = 0; | ph_out[0] = 0; | ||||
| int Translator::Lookup(char *word, char *ph_out) | int Translator::Lookup(char *word, char *ph_out) | ||||
| {//============================================= | {//============================================= | ||||
| unsigned int flags; | unsigned int flags; | ||||
| return(LookupDictList(word,ph_out,&flags,0)); | |||||
| return(LookupDictList(&word,ph_out,&flags,0)); | |||||
| } | } | ||||
| char phonemes2[55]; | char phonemes2[55]; | ||||
| static char buf[60]; | static char buf[60]; | ||||
| if(LookupDictList(string,phonemes,&flags,0)) | |||||
| if(LookupDictList(&string,phonemes,&flags,0)) | |||||
| { | { | ||||
| SetWordStress(phonemes,flags,-1,0); | SetWordStress(phonemes,flags,-1,0); | ||||
| DecodePhonemes(phonemes,phonemes2); | DecodePhonemes(phonemes,phonemes2); |
| #include "translate.h" | #include "translate.h" | ||||
| #include "wave.h" | #include "wave.h" | ||||
| const char *version_string = "1.29.11 23.Oct.07"; | |||||
| const char *version_string = "1.29.12 29.Oct.07"; | |||||
| const int version_phdata = 0x012901; | const int version_phdata = 0x012901; | ||||
| int option_device_number = -1; | int option_device_number = -1; |
| tr->langopts.unstressed_wd1 = 1; | tr->langopts.unstressed_wd1 = 1; | ||||
| tr->langopts.unstressed_wd2 = 2; | tr->langopts.unstressed_wd2 = 2; | ||||
| tr->langopts.numbers = 0x1409; | |||||
| tr->langopts.numbers = 0x1c09 + NUM_ROMAN; | |||||
| } | } | ||||
| break; | break; | ||||
| unsigned int dictionary_flags2=0; | unsigned int dictionary_flags2=0; | ||||
| int end_type=0; | int end_type=0; | ||||
| int prefix_type=0; | int prefix_type=0; | ||||
| char *word; | |||||
| char *wordx; | |||||
| char phonemes[N_WORD_PHONEMES]; | char phonemes[N_WORD_PHONEMES]; | ||||
| char *ph_limit; | char *ph_limit; | ||||
| char *phonemes_ptr; | char *phonemes_ptr; | ||||
| static char word_iz[4] = {0,'i','z',0}; | static char word_iz[4] = {0,'i','z',0}; | ||||
| static char word_ss[4] = {0,'s','s',0}; | static char word_ss[4] = {0,'s','s',0}; | ||||
| word = word1; | |||||
| prefix_phonemes[0] = 0; | prefix_phonemes[0] = 0; | ||||
| end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
| ph_limit = &phonemes[N_WORD_PHONEMES]; | ph_limit = &phonemes[N_WORD_PHONEMES]; | ||||
| // count the length of the word | // count the length of the word | ||||
| utf8_in(&first_char,word,0); | |||||
| wordx = word1; | |||||
| utf8_in(&first_char,wordx,0); | |||||
| word_length = 0; | word_length = 0; | ||||
| while((*word != 0) && (*word != ' ')) | |||||
| while((*wordx != 0) && (*wordx != ' ')) | |||||
| { | { | ||||
| word += utf8_in(&last_char,word,0); | |||||
| wordx += utf8_in(&last_char,wordx,0); | |||||
| word_length++; | word_length++; | ||||
| } | } | ||||
| word = word1; | |||||
| // try an initial lookup in the dictionary list, we may find a pronunciation specified, or | // try an initial lookup in the dictionary list, we may find a pronunciation specified, or | ||||
| // we may just find some flags | // we may just find some flags | ||||
| else | else | ||||
| { | { | ||||
| spell_word = 0; | spell_word = 0; | ||||
| found = LookupDictList(word,phonemes,&dictionary_flags,wflags << 16); | |||||
| found = LookupDictList(&word1,phonemes,&dictionary_flags,FLAG_ALLOW_DICTTEXT | wflags << 16); // the original word | |||||
| if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
| { | { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| if(word_phonemes[0] == phonSWITCH) | if(word_phonemes[0] == phonSWITCH) | ||||
| return(0); | return(0); | ||||
| found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | |||||
| found = TranslateNumber(word1,phonemes,&dictionary_flags,wflags); | |||||
| } | } | ||||
| if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | ||||
| if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | ||||
| { | { | ||||
| if((found = TranslateRoman(word,phonemes)) != 0) | |||||
| if((found = TranslateRoman(word1,phonemes)) != 0) | |||||
| dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | ||||
| } | } | ||||
| } | } | ||||
| if(spell_word > 0) | if(spell_word > 0) | ||||
| { | { | ||||
| // Speak as individual letters | // Speak as individual letters | ||||
| word = word1; | |||||
| wordx = word1; | |||||
| posn = 0; | posn = 0; | ||||
| phonemes[0] = 0; | phonemes[0] = 0; | ||||
| end_type = 0; | end_type = 0; | ||||
| while(*word != ' ') | |||||
| while(*wordx != ' ') | |||||
| { | { | ||||
| word += TranslateLetter(word, phonemes,spell_word); | |||||
| wordx += TranslateLetter(wordx, phonemes,spell_word); | |||||
| if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
| { | { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| posn = 0; | posn = 0; | ||||
| length = 999; | length = 999; | ||||
| while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(word))) | |||||
| wordx = word1; | |||||
| while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(wordx))) | |||||
| { | { | ||||
| char *p; | char *p; | ||||
| // This word looks "unpronouncable", so speak letters individually until we | // This word looks "unpronouncable", so speak letters individually until we | ||||
| // find a remainder that we can pronounce. | // find a remainder that we can pronounce. | ||||
| word += TranslateLetter(word,phonemes,0); | |||||
| wordx += TranslateLetter(wordx,phonemes,0); | |||||
| if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
| { | { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| return(0); | return(0); | ||||
| } | } | ||||
| p = &word[word_length-3]; | |||||
| p = &wordx[word_length-3]; | |||||
| if(memcmp(p,"'s ",3) == 0) | if(memcmp(p,"'s ",3) == 0) | ||||
| { | { | ||||
| // remove a 's suffix and pronounce this separately (not as an individual letter) | // remove a 's suffix and pronounce this separately (not as an individual letter) | ||||
| } | } | ||||
| length=0; | length=0; | ||||
| while(word[length] != ' ') length++; | |||||
| while(wordx[length] != ' ') length++; | |||||
| if(length > 0) | if(length > 0) | ||||
| word[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||||
| wordx[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||||
| } | } | ||||
| SetSpellingStress(phonemes,0); | SetSpellingStress(phonemes,0); | ||||
| // anything left ? | // anything left ? | ||||
| if(*word != ' ') | |||||
| if(*wordx != ' ') | |||||
| { | { | ||||
| // Translate the stem | // Translate the stem | ||||
| unpron_length = strlen(phonemes); | unpron_length = strlen(phonemes); | ||||
| end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||||
| end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||||
| if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
| { | { | ||||
| return(0); | return(0); | ||||
| } | } | ||||
| c_temp = word[-1]; | |||||
| c_temp = wordx[-1]; | |||||
| found = 0; | found = 0; | ||||
| confirm_prefix = 1; | confirm_prefix = 1; | ||||
| // remove any standard suffix and confirm that the prefix is still recognised | // remove any standard suffix and confirm that the prefix is still recognised | ||||
| phonemes2[0] = 0; | phonemes2[0] = 0; | ||||
| end2 = TranslateRules(word, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||||
| end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||||
| if(end2) | if(end2) | ||||
| { | { | ||||
| RemoveEnding(word,end2,word_copy); | |||||
| end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||||
| memcpy(word,word_copy,strlen(word_copy)); | |||||
| RemoveEnding(wordx,end2,word_copy); | |||||
| end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||||
| memcpy(wordx,word_copy,strlen(word_copy)); | |||||
| if((end_type & SUFX_P) == 0) | if((end_type & SUFX_P) == 0) | ||||
| { | { | ||||
| // after removing the suffix, the prefix is no longer recognised. | // after removing the suffix, the prefix is no longer recognised. | ||||
| expect_verb = 1; // use the verb form of the word | expect_verb = 1; // use the verb form of the word | ||||
| } | } | ||||
| word[-1] = c_temp; | |||||
| wordx[-1] = c_temp; | |||||
| for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | ||||
| { | { | ||||
| word++; | |||||
| while((*word & 0xc0) == 0x80) word++; // for multibyte characters | |||||
| wordx++; | |||||
| while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||||
| } | } | ||||
| c_temp = word[-1]; | |||||
| word[-1] = ' '; | |||||
| c_temp = wordx[-1]; | |||||
| wordx[-1] = ' '; | |||||
| confirm_prefix = 1; | confirm_prefix = 1; | ||||
| end_type = 0; | end_type = 0; | ||||
| found = LookupDictList(word,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); | |||||
| found = LookupDictList(&wordx,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); // without prefix | |||||
| if(dictionary_flags==0) | if(dictionary_flags==0) | ||||
| dictionary_flags = dictionary_flags2; | dictionary_flags = dictionary_flags2; | ||||
| else | else | ||||
| prefix_flags = 1; | prefix_flags = 1; | ||||
| if(found == 0) | if(found == 0) | ||||
| { | { | ||||
| end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||||
| end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||||
| if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
| { | { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| word[-1] = c_temp; | |||||
| wordx[-1] = c_temp; | |||||
| strcpy(word_phonemes,phonemes); | strcpy(word_phonemes,phonemes); | ||||
| return(0); | return(0); | ||||
| } | } | ||||
| strcpy(phonemes2,phonemes); | strcpy(phonemes2,phonemes); | ||||
| // The word has a standard ending, re-translate without this ending | // The word has a standard ending, re-translate without this ending | ||||
| end_flags = RemoveEnding(word,end_type,word_copy); | |||||
| end_flags = RemoveEnding(wordx,end_type,word_copy); | |||||
| phonemes_ptr = &phonemes[unpron_length]; | phonemes_ptr = &phonemes[unpron_length]; | ||||
| phonemes_ptr[0] = 0; | phonemes_ptr[0] = 0; | ||||
| if(prefix_phonemes[0] != 0) | if(prefix_phonemes[0] != 0) | ||||
| { | { | ||||
| // lookup the stem without the prefix removed | // lookup the stem without the prefix removed | ||||
| word[-1] = c_temp; | |||||
| found = LookupDictList(word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||||
| word[-1] = ' '; | |||||
| wordx[-1] = c_temp; | |||||
| found = LookupDictList(&word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // include prefix, but not suffix | |||||
| wordx[-1] = ' '; | |||||
| if(dictionary_flags==0) | if(dictionary_flags==0) | ||||
| dictionary_flags = dictionary_flags2; | dictionary_flags = dictionary_flags2; | ||||
| if(found) | if(found) | ||||
| prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | ||||
| // if(found || (dictionary_flags2 != 0)) | |||||
| // prefix_flags = 1; // ?? this looks wrong | |||||
| if((found==0) && (dictionary_flags2 != 0)) | if((found==0) && (dictionary_flags2 != 0)) | ||||
| prefix_flags = 1; | prefix_flags = 1; | ||||
| } | } | ||||
| if(found == 0) | if(found == 0) | ||||
| { | { | ||||
| found = LookupDictList(word,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||||
| found = LookupDictList(&wordx,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // without prefix and suffix | |||||
| if(phonemes_ptr[0] == phonSWITCH) | if(phonemes_ptr[0] == phonSWITCH) | ||||
| { | { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| memcpy(word,word_copy,strlen(word_copy)); | |||||
| memcpy(wordx,word_copy,strlen(word_copy)); | |||||
| strcpy(word_phonemes,phonemes_ptr); | strcpy(word_phonemes,phonemes_ptr); | ||||
| return(0); | return(0); | ||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| if(end_flags & FLAG_SUFX) | if(end_flags & FLAG_SUFX) | ||||
| TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||||
| TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||||
| else | else | ||||
| TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||||
| TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||||
| if(phonemes[0] == phonSWITCH) | if(phonemes[0] == phonSWITCH) | ||||
| { | { | ||||
| // change to another language in order to translate this word | // change to another language in order to translate this word | ||||
| strcpy(word_phonemes,phonemes); | strcpy(word_phonemes,phonemes); | ||||
| memcpy(word,word_copy,strlen(word_copy)); | |||||
| word[-1] = c_temp; | |||||
| memcpy(wordx,word_copy,strlen(word_copy)); | |||||
| wordx[-1] = c_temp; | |||||
| return(0); | return(0); | ||||
| } | } | ||||
| } | } | ||||
| end_phonemes[0] = 0; | end_phonemes[0] = 0; | ||||
| } | } | ||||
| } | } | ||||
| word[-1] = c_temp; | |||||
| wordx[-1] = c_temp; | |||||
| } | } | ||||
| } | } | ||||
| expect_past = 0; | expect_past = 0; | ||||
| } | } | ||||
| if((word[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||||
| if((wordx[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||||
| { | { | ||||
| if(expect_verb > 0) | if(expect_verb > 0) | ||||
| expect_verb -= 1; | expect_verb -= 1; |
| // bits 0-3 stressed syllable, 7=unstressed | // bits 0-3 stressed syllable, 7=unstressed | ||||
| #define FLAG_SKIPWORDS 0x80 | #define FLAG_SKIPWORDS 0x80 | ||||
| #define FLAG_PREPAUSE 0x100 | #define FLAG_PREPAUSE 0x100 | ||||
| #define FLAG_ONLY 0x200 | #define FLAG_ONLY 0x200 | ||||
| #define FLAG_ONLY_S 0x400 | |||||
| #define BITNUM_FLAG_ONLY 9 // bit 9 is set | #define BITNUM_FLAG_ONLY 9 // bit 9 is set | ||||
| #define BITNUM_FLAG_ONLY_S 10 // bit 10 is set | #define BITNUM_FLAG_ONLY_S 10 // bit 10 is set | ||||
| #define FLAG_ONLY_S 0x400 | |||||
| #define FLAG_STRESS_END 0x800 /* full stress if at end of clause */ | #define FLAG_STRESS_END 0x800 /* full stress if at end of clause */ | ||||
| #define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */ | #define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */ | ||||
| #define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | #define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | ||||
| #define FLAG_PASTF 0x8000000 /* past tense follows */ | #define FLAG_PASTF 0x8000000 /* past tense follows */ | ||||
| #define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | #define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | ||||
| #define FLAG_DICTTEXT 0x20000000 // word translates to replacement text, not phonemes | |||||
| #define BITNUM_FLAG_DICTTEXT 29 | |||||
| #define FLAG_PAUSE1 0x40000000 // shorter prepause | #define FLAG_PAUSE1 0x40000000 // shorter prepause | ||||
| #define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | #define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | ||||
| #define SUFX_Q 0x4000 // don't retranslate | #define SUFX_Q 0x4000 // don't retranslate | ||||
| #define SUFX_T 0x10000 // don't affect the stress position in the stem | #define SUFX_T 0x10000 // don't affect the stress position in the stem | ||||
| #define FLAG_ALLOW_DICTTEXT 0x02 // allow dictionary to translate to text rather than phonemes | |||||
| #define FLAG_SUFX 0x04 | #define FLAG_SUFX 0x04 | ||||
| #define FLAG_SUFX_S 0x08 | #define FLAG_SUFX_S 0x08 | ||||
| #define FLAG_SUFX_E_ADDED 0x10 | #define FLAG_SUFX_E_ADDED 0x10 | ||||
| virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | ||||
| int IsVowel(int letter); | int IsVowel(int letter); | ||||
| int LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags); | |||||
| int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags); | |||||
| int Lookup(char *word, char *ph_out); | int Lookup(char *word, char *ph_out); | ||||