Lang eo: use .replace in eo_rules for "cx" etc. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@94 d46cf337-b52f-0410-862d-fd96e6ae7743master
| @@ -725,6 +725,7 @@ dingy dIndZI2 | |||
| dinosaur daIn@sO@ | |||
| diode daIoUd | |||
| diplomacy dIpl'oUm@sI2 | |||
| dipole daIpoUl | |||
| disconsolate dIsk'0ns@l@t | |||
| disc dIsk // for discs | |||
| disciple dIs'aIp@L | |||
| @@ -857,6 +858,7 @@ finite faInaIt | |||
| fishnet fISnEt | |||
| fiord fi:O@d | |||
| fix fIks // for fixer | |||
| fjord fIO@d | |||
| flagellum fla2dZEl@m | |||
| flexible flEksIb@L | |||
| flier flaI3 | |||
| @@ -2255,7 +2257,6 @@ Penelope p@nEl@pI2 | |||
| Phoebe fi:bi: | |||
| Rachel reItS@L | |||
| Rhys ri:s | |||
| Robert r0b3t | |||
| Roderick r0drIk | |||
| Samantha s@manT@ | |||
| Sarah se@r@ | |||
| @@ -29,6 +29,11 @@ | |||
| // ?7 Scottish | |||
| // ?8 Use full vowel, not schwa in some word endings | |||
| .replace | |||
| ff f // ligature | |||
| fi fi | |||
| fl fl | |||
| .group a | |||
| _) a (_ a2 | |||
| @@ -1653,6 +1658,7 @@ | |||
| l) egion i:dZ@n | |||
| egy (pt i:dZI | |||
| n) e (gro i: | |||
| eh (_ eI | |||
| ei eI | |||
| _) ei aI | |||
| c) ei i: | |||
| @@ -3092,7 +3098,7 @@ | |||
| r) iv (el Iv | |||
| r) iv (en Iv | |||
| r) iv (et Iv | |||
| _l) iv (el aIv | |||
| _l) iv (eB aIv | |||
| XC) iv (el Iv | |||
| _g) iv (e Iv | |||
| _l) iv (e Iv | |||
| @@ -3377,6 +3383,7 @@ | |||
| o (bliga 0 | |||
| o (bligato %0 | |||
| o (blo 0 | |||
| _r) o (ber 0 | |||
| r) o (bot oU | |||
| _) o (bs %0 | |||
| _) obs (er %0bz | |||
| @@ -5223,6 +5230,3 @@ | |||
| € jU@roUz | |||
| ♯ SA@p | |||
| ♭ flat | |||
| ff f | |||
| fi fI | |||
| fl fl | |||
| @@ -32,6 +32,9 @@ _dpt komo_ | |||
| _. punkto | |||
| v vo // not Roman numbers | |||
| x ikso | |||
| t.e t'e,e | |||
| @@ -54,22 +57,18 @@ uea $abbrev | |||
| uk $abbrev | |||
| // includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | |||
| l' la | |||
| la %la | |||
| ne $u+ // negative | |||
| cxu $u // interogative particle | |||
| cxi $u | |||
| ĉu $u // cxi interogative particle | |||
| ĉi $u // cxi interogative particle | |||
| ĉu $u // interogative particle | |||
| ĉi $u // interogative particle | |||
| ia $u // any | |||
| // prepositions | |||
| de $u+ $brk | |||
| (de la) dela $u $brk | |||
| antaux $u+ $pause | |||
| antaŭ $u+ $pause | |||
| apud $u+ $pause | |||
| ol $u+ $brk | |||
| @@ -87,7 +86,6 @@ je $u $brk | |||
| kun $u+ $pause | |||
| laŭ $u+ $brk | |||
| laux $u+ $brk | |||
| per $u+ $brk | |||
| preter $u+ $brk | |||
| pri $u+ $pause | |||
| @@ -109,12 +107,10 @@ eĉ $brk | |||
| mi $u | |||
| ci $u | |||
| li $u | |||
| sxi $u | |||
| ŝi $u | |||
| gxi $u | |||
| ĝi $u | |||
| ni $u | |||
| vi $u | |||
| vi vi $u | |||
| ili $u | |||
| oni $u | |||
| si $u | |||
| @@ -122,9 +118,7 @@ si $u | |||
| mia $u+ | |||
| cia $u+ | |||
| lia $u+ | |||
| sxia $u+ | |||
| ŝia $u+ | |||
| gxia $u+ | |||
| ĝia $u+ | |||
| nia $u+ | |||
| via $u+ | |||
| @@ -134,9 +128,7 @@ sia $u+ | |||
| miaj $u+ | |||
| ciaj $u+ | |||
| liaj $u+ | |||
| sxiaj $u+ | |||
| ŝiaj $u+ | |||
| gxiaj $u+ | |||
| ĝiaj $u+ | |||
| niaj $u+ | |||
| viaj $u+ | |||
| @@ -193,9 +185,7 @@ tiam $brk | |||
| // conjunctions | |||
| ke $u $pause // that (conj) | |||
| aux $u $pause | |||
| aŭ $u $pause // aux | |||
| cxar $u $pause | |||
| aŭ $u $pause | |||
| ĉar $u $pause | |||
| kaj $u $pause | |||
| sed $u $pause | |||
| @@ -3,12 +3,19 @@ | |||
| // includes both accented characters and ascii digraph alternatives (cx, gx, ux, etc) | |||
| .replace | |||
| cx ĉ | |||
| gx ĝ | |||
| hx ĥ | |||
| jx ĵ | |||
| sx ŝ | |||
| ux ŭ | |||
| .group a | |||
| _) a(_ a | |||
| a a | |||
| aj aI | |||
| a (jx a | |||
| aux aU | |||
| aŭ aU | |||
| D_-_) a (_ %a //'a' after a number | |||
| D_-_) an (_ %an //'an' after a number | |||
| @@ -22,11 +29,9 @@ | |||
| .group c | |||
| _) c(_ tso | |||
| _) ch(_ tSo | |||
| _) cx(_ tSo | |||
| c ts | |||
| ch (K tS // foreign words | |||
| K) ch tS // foreign words | |||
| cx tS | |||
| .group d | |||
| @@ -40,8 +45,6 @@ | |||
| _) e(_ e | |||
| e e | |||
| ej eI | |||
| e (jx e | |||
| eux eU | |||
| eŭ eU | |||
| .group f | |||
| @@ -53,17 +56,13 @@ | |||
| .group g | |||
| _) g(_ go | |||
| _) gx(_ dZo | |||
| g g | |||
| gh (K dZ | |||
| gx dZ | |||
| .group h | |||
| _) h(_ ho | |||
| _) hx(_ xo | |||
| h h | |||
| hh x | |||
| hx x | |||
| .group i | |||
| @@ -73,10 +72,8 @@ | |||
| .group j | |||
| _) j(_ jo | |||
| _) jx(_ Zo | |||
| j j | |||
| jh (C Z | |||
| jx Z | |||
| .group k | |||
| _) k(_ ko | |||
| @@ -106,7 +103,6 @@ | |||
| _) o(_ o | |||
| o o | |||
| oj OI | |||
| o (jx o | |||
| .group p | |||
| _) p(_ po | |||
| @@ -131,9 +127,7 @@ | |||
| .group s | |||
| _) s(_ so | |||
| _) sh(_ So | |||
| _) sx(_ So | |||
| s s | |||
| sx S | |||
| sh (K S // foreign words | |||
| K) sh S // foreign words | |||
| @@ -149,11 +143,8 @@ | |||
| .group u | |||
| _) u(_ u | |||
| _) ux(_ wo | |||
| u u | |||
| uj uI | |||
| u (jx u | |||
| ux w | |||
| .group v | |||
| @@ -528,6 +528,23 @@ The dictionary list is searched from bottom to top. The first match that satisf | |||
| </pre> | |||
| then if "to" is at the end of the clause, we get [tu:], if not then we get [t@]. | |||
| <p> | |||
| <h4>4.4.4 Translating a Word with another Word</h4> | |||
| Rather than specifying the pronunciation of a word by a phoneme string, you can specify another "sounds like" word.<p>Use the attribute <b>$text</b> eg.<p> | |||
| <pre> | |||
| cough coff $text | |||
| </pre> | |||
| Alternatively, use the command <b>$textmode</b> on a line by itself to turn this on for all subsequent entries in the file, until it's turned off by <b>$phonememode</b>. eg.<p> | |||
| <pre> | |||
| $textmode | |||
| cough coff | |||
| through threw | |||
| $phonememode | |||
| </pre> | |||
| This feature cannot be used for the special entries in the <b>_list</b> files which start with an underscore, such as numbers.<p> | |||
| Currently "textmode" entries are only recognized for complete words, and not for for stems from which a prefix or suffix has been removed (eg. the word "coughs" would not match the example above). | |||
| <p> | |||
| <p> <hr> | |||
| <h3>4.5 Conditional Rules</h3> | |||
| Rules in a <b>_rules</b> file and entries in a <b>_list</b> file can be made conditional. They apply only to some voices. This can be useful to specify different pronunciations for different variants of a language (dialects or accents).<p> | |||
| @@ -572,6 +589,17 @@ _0 to _9 | |||
| </tbody></table> | |||
| </ul> | |||
| <p> <hr> | |||
| <h3>4.7 Character Substitution</h3> | |||
| Chracter substitutions can be specified by using a <b> .replace </b> section at the start of the <b> _rules </b> file. Each line specified either one or two alphabetic characters to be replaced by another one or two alphabetic characters. This substitution is done to a word before it is translated using the spelling-to-phoneme rules. Only the lower-case version of the characters needs to be specified. eg.<p> | |||
| .replace<br> | |||
| ô ő // (Hungarian) allow the use of o-circumflex instead of o-double-accute<br> | |||
| û ű<p> | |||
| cx ĉ // (Esperanto) allow "cx" as an alternative to c-circumflex<p> | |||
| fi fi // replace a single character ligature by two characters | |||
| <p> | |||
| </body> | |||
| </html> | |||
| @@ -18,7 +18,7 @@ phoneme t2 // [t] which doesn't reduce | |||
| vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
| vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
| lengthmod 2 | |||
| wave ustop/t | |||
| wave ustop/t%90 | |||
| before _ ustop/t_ | |||
| before @- ustop/t_dnt%50 | |||
| before r ustop/tr | |||
| @@ -304,7 +304,7 @@ phoneme t# // reduced [t] as in "city" | |||
| vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
| vowelout f1=0 f2=1700 -300 300 f3=-100 80 | |||
| lengthmod 5 | |||
| wave x/d%90 | |||
| wave x/d%80 | |||
| endphoneme | |||
| @@ -895,7 +895,7 @@ phoneme t | |||
| vowelin f1=0 f2=1700 -300 300 f3=-100 80 | |||
| vowelout f1=0 f2=1700 -300 250 f3=-100 80 rms=20 | |||
| lengthmod 2 | |||
| wave ustop/t | |||
| wave ustop/t%90 | |||
| before _ ustop/t_ | |||
| before @- ustop/t_dnt%50 | |||
| before r ustop/tr | |||
| @@ -44,6 +44,7 @@ static int error_count; | |||
| static int transpose_offset; // transpose character range for LookupDictList() | |||
| static int transpose_min; | |||
| static int transpose_max; | |||
| static int text_mode = 0; | |||
| int hash_counts[N_HASH_DICT]; | |||
| char *hash_chains[N_HASH_DICT]; | |||
| @@ -94,9 +95,13 @@ MNEM_TAB mnem_flags[] = { | |||
| {"$pastf", 27}, /* past tense follows */ | |||
| {"$verbextend",28}, /* extend influence of 'verb follows' */ | |||
| {"$brk", 30}, /* a shorter $pause */ | |||
| {"$text", 29}, // word translates to replcement text, not phonemes | |||
| {"$brk", 30}, // a shorter $pause | |||
| // doesn't set dictionary_flags | |||
| {"$?", 100}, // conditional rule, followed by byte giving the condition number | |||
| {"$textmode", 200}, | |||
| {"$phonememode", 201}, | |||
| {NULL, -1} | |||
| }; | |||
| @@ -169,17 +174,37 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| int len_word; | |||
| int len_phonetic; | |||
| int text_not_phonemes; // this word specifies replacement text, not phonemes | |||
| char *mnemptr; | |||
| char *comment; | |||
| unsigned char flag_codes[100]; | |||
| char encoded_ph[200]; | |||
| unsigned char bad_phoneme[4]; | |||
| p = linebuf; | |||
| comment = NULL; | |||
| text_not_phonemes = 0; | |||
| phonetic = word = ""; | |||
| p = linebuf; | |||
| // while(isspace2(*p)) p++; | |||
| #ifdef deleted | |||
| if(*p == '$') | |||
| { | |||
| if(memcmp(p,"$textmode",9) == 0) | |||
| { | |||
| text_mode = 1; | |||
| return(0); | |||
| } | |||
| if(memcmp(p,"$phonememode",12) == 0) | |||
| { | |||
| text_mode = 0; | |||
| return(0); | |||
| } | |||
| } | |||
| #endif | |||
| step = 0; | |||
| c = 0; | |||
| @@ -224,7 +249,26 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| ix = LookupMnem(mnem_flags,mnemptr); | |||
| if(ix > 0) | |||
| flag_codes[n_flag_codes++] = ix; | |||
| { | |||
| if(ix == 200) | |||
| { | |||
| text_mode = 1; | |||
| } | |||
| else | |||
| if(ix == 201) | |||
| { | |||
| text_mode = 0; | |||
| } | |||
| else | |||
| if(ix == BITNUM_FLAG_DICTTEXT) | |||
| { | |||
| text_not_phonemes = 1; | |||
| } | |||
| else | |||
| { | |||
| flag_codes[n_flag_codes++] = ix; | |||
| } | |||
| } | |||
| else | |||
| { | |||
| fprintf(f_log,"%5d: Unknown keyword: %s\n",linenum,mnemptr); | |||
| @@ -325,25 +369,35 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| #endif | |||
| return(0); /* blank line */ | |||
| } | |||
| EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||
| if(strchr(encoded_ph,phonSWITCH) != 0) | |||
| if(text_not_phonemes || text_mode) | |||
| { | |||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; | |||
| strcpy(encoded_ph,phonetic); // this is replacement text, so don't encode as phonemes | |||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_DICTTEXT; | |||
| } | |||
| for(ix=0; ix<255; ix++) | |||
| else | |||
| { | |||
| c = encoded_ph[ix]; | |||
| if(c == 0) break; | |||
| if(c == 255) | |||
| EncodePhonemes(phonetic,encoded_ph,bad_phoneme); | |||
| if(strchr(encoded_ph,phonSWITCH) != 0) | |||
| { | |||
| /* unrecognised phoneme, report error */ | |||
| fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||
| error_count++; | |||
| flag_codes[n_flag_codes++] = BITNUM_FLAG_ONLY_S; // don't match on suffixes (except 's') when switching languages | |||
| } | |||
| // check for errors in the phonemes codes | |||
| for(ix=0; ix<sizeof(encoded_ph); ix++) | |||
| { | |||
| c = encoded_ph[ix]; | |||
| if(c == 0) break; | |||
| if(c == 255) | |||
| { | |||
| /* unrecognised phoneme, report error */ | |||
| fprintf(f_log,"%5d: Bad phoneme [%c] (0x%x) in: %s %s\n",linenum,bad_phoneme[0],bad_phoneme[0],word,phonetic); | |||
| error_count++; | |||
| } | |||
| } | |||
| } | |||
| if((word[0] & 0x80)==0) // 7 bit ascii only | |||
| { | |||
| // If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | |||
| @@ -504,6 +558,8 @@ int compile_dictlist_file(const char *path, const char* filename) | |||
| char buf[sizeof(path_home)+45]; | |||
| char dict_line[128]; | |||
| text_mode = 0; | |||
| sprintf(buf,"%s%s",path,filename); | |||
| if((f_in = fopen(buf,"r")) == NULL) | |||
| return(-1); | |||
| @@ -2795,7 +2795,11 @@ int Translator::LookupDict2(char *word, char *word2, char *phonetic, unsigned in | |||
| DecodePhonemes(phonetic,ph_decoded); | |||
| if(flags != NULL) | |||
| flags1 = *flags; | |||
| fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||
| if((dictionary_flags & FLAG_DICTTEXT) == 0) | |||
| { | |||
| fprintf(f_trans,"Found: %s [%s] %s\n",word1,ph_decoded,print_dflags(flags1)); | |||
| } | |||
| } | |||
| return(1); | |||
| @@ -2805,8 +2809,8 @@ int Translator::LookupDict2(char *word, char *word2, char *phonetic, unsigned in | |||
| int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags) | |||
| //=========================================================================================== | |||
| int Translator::LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags) | |||
| //============================================================================================== | |||
| /* Lookup a specified word in the word dictionary. | |||
| Returns phonetic data in 'phonetic' and bits in 'flags' | |||
| @@ -2815,14 +2819,16 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i | |||
| { | |||
| int length; | |||
| int found; | |||
| char *word1; | |||
| char *word2; | |||
| unsigned char c; | |||
| int nbytes; | |||
| int c2; | |||
| char word[N_WORD_BYTES]; | |||
| static char word_replacement[N_WORD_BYTES]; | |||
| length = 0; | |||
| word2 = word1; | |||
| word2 = word1 = *wordptr; | |||
| while((word2[nbytes = utf8_in(&c2,word2,0)]==' ') && (word2[nbytes+1]=='.')) | |||
| { | |||
| @@ -2857,25 +2863,52 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i | |||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
| if(found) return(1); | |||
| ph_out[0] = 0; | |||
| // try modifications to find a recognised word | |||
| if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||
| if(found == 0) | |||
| { | |||
| // try removing an 'e' which has been added by RemoveEnding | |||
| word[length-1] = 0; | |||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
| if(found) return(1); | |||
| ph_out[0] = 0; | |||
| // try modifications to find a recognised word | |||
| if((end_flags & FLAG_SUFX_E_ADDED) && (word[length-1] == 'e')) | |||
| { | |||
| // try removing an 'e' which has been added by RemoveEnding | |||
| word[length-1] = 0; | |||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
| } | |||
| else | |||
| if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||
| { | |||
| // try removing a double letter | |||
| word[length-1] = 0; | |||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
| } | |||
| } | |||
| if((end_flags & SUFX_D) && (word[length-1] == word[length-2])) | |||
| if(found) | |||
| { | |||
| // try removing a double letter | |||
| word[length-1] = 0; | |||
| found = LookupDict2(word,word1,ph_out,flags,end_flags); | |||
| if(found) return(1); | |||
| if(*flags & FLAG_DICTTEXT) | |||
| { | |||
| // the word translates to replacement text, not to phonemes | |||
| if(end_flags & FLAG_ALLOW_DICTTEXT) | |||
| { | |||
| // only use replacement text if this is the original word, not if a prefix or suffix has been removed | |||
| word_replacement[0] = 0; | |||
| word_replacement[1] = ' '; | |||
| strcpy(&word_replacement[2],ph_out); // replacement word, preceded by zerochar and space | |||
| *wordptr = &word_replacement[2]; | |||
| if(option_phonemes == 2) | |||
| { | |||
| fprintf(f_trans,"Replace: %s %s\n",word,*wordptr); | |||
| } | |||
| } | |||
| ph_out[0] = 0; | |||
| return(0); | |||
| } | |||
| return(1); | |||
| } | |||
| ph_out[0] = 0; | |||
| @@ -2887,7 +2920,7 @@ int Translator::LookupDictList(char *word1, char *ph_out, unsigned int *flags, i | |||
| int Translator::Lookup(char *word, char *ph_out) | |||
| {//============================================= | |||
| unsigned int flags; | |||
| return(LookupDictList(word,ph_out,&flags,0)); | |||
| return(LookupDictList(&word,ph_out,&flags,0)); | |||
| } | |||
| @@ -470,7 +470,7 @@ const char *Translator::LookupSpecial(char *string) | |||
| char phonemes2[55]; | |||
| static char buf[60]; | |||
| if(LookupDictList(string,phonemes,&flags,0)) | |||
| if(LookupDictList(&string,phonemes,&flags,0)) | |||
| { | |||
| SetWordStress(phonemes,flags,-1,0); | |||
| DecodePhonemes(phonemes,phonemes2); | |||
| @@ -35,7 +35,7 @@ | |||
| #include "translate.h" | |||
| #include "wave.h" | |||
| const char *version_string = "1.29.11 23.Oct.07"; | |||
| const char *version_string = "1.29.12 29.Oct.07"; | |||
| const int version_phdata = 0x012901; | |||
| int option_device_number = -1; | |||
| @@ -219,7 +219,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.unstressed_wd1 = 1; | |||
| tr->langopts.unstressed_wd2 = 2; | |||
| tr->langopts.numbers = 0x1409; | |||
| tr->langopts.numbers = 0x1c09 + NUM_ROMAN; | |||
| } | |||
| break; | |||
| @@ -592,7 +592,7 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
| unsigned int dictionary_flags2=0; | |||
| int end_type=0; | |||
| int prefix_type=0; | |||
| char *word; | |||
| char *wordx; | |||
| char phonemes[N_WORD_PHONEMES]; | |||
| char *ph_limit; | |||
| char *phonemes_ptr; | |||
| @@ -618,20 +618,19 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
| static char word_iz[4] = {0,'i','z',0}; | |||
| static char word_ss[4] = {0,'s','s',0}; | |||
| word = word1; | |||
| prefix_phonemes[0] = 0; | |||
| end_phonemes[0] = 0; | |||
| ph_limit = &phonemes[N_WORD_PHONEMES]; | |||
| // count the length of the word | |||
| utf8_in(&first_char,word,0); | |||
| wordx = word1; | |||
| utf8_in(&first_char,wordx,0); | |||
| word_length = 0; | |||
| while((*word != 0) && (*word != ' ')) | |||
| while((*wordx != 0) && (*wordx != ' ')) | |||
| { | |||
| word += utf8_in(&last_char,word,0); | |||
| wordx += utf8_in(&last_char,wordx,0); | |||
| word_length++; | |||
| } | |||
| word = word1; | |||
| // try an initial lookup in the dictionary list, we may find a pronunciation specified, or | |||
| // we may just find some flags | |||
| @@ -643,7 +642,8 @@ int Translator::TranslateWord(char *word1, int next_pause, WORD_TAB *wtab) | |||
| else | |||
| { | |||
| spell_word = 0; | |||
| found = LookupDictList(word,phonemes,&dictionary_flags,wflags << 16); | |||
| found = LookupDictList(&word1,phonemes,&dictionary_flags,FLAG_ALLOW_DICTTEXT | wflags << 16); // the original word | |||
| if(phonemes[0] == phonSWITCH) | |||
| { | |||
| // change to another language in order to translate this word | |||
| @@ -669,7 +669,7 @@ if((wmark > 0) && (wmark < 8)) | |||
| if(word_phonemes[0] == phonSWITCH) | |||
| return(0); | |||
| found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | |||
| found = TranslateNumber(word1,phonemes,&dictionary_flags,wflags); | |||
| } | |||
| if(!found & ((word_flags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) | |||
| @@ -678,7 +678,7 @@ if((wmark > 0) && (wmark < 8)) | |||
| if((langopts.numbers & NUM_ROMAN) || ((langopts.numbers & NUM_ROMAN_UC) && (word_flags & FLAG_ALL_UPPER))) | |||
| { | |||
| if((found = TranslateRoman(word,phonemes)) != 0) | |||
| if((found = TranslateRoman(word1,phonemes)) != 0) | |||
| dictionary_flags |= FLAG_ABBREV; // don't spell capital Roman numbers as individual letters | |||
| } | |||
| } | |||
| @@ -694,14 +694,14 @@ if((wmark > 0) && (wmark < 8)) | |||
| if(spell_word > 0) | |||
| { | |||
| // Speak as individual letters | |||
| word = word1; | |||
| wordx = word1; | |||
| posn = 0; | |||
| phonemes[0] = 0; | |||
| end_type = 0; | |||
| while(*word != ' ') | |||
| while(*wordx != ' ') | |||
| { | |||
| word += TranslateLetter(word, phonemes,spell_word); | |||
| wordx += TranslateLetter(wordx, phonemes,spell_word); | |||
| if(phonemes[0] == phonSWITCH) | |||
| { | |||
| // change to another language in order to translate this word | |||
| @@ -719,12 +719,14 @@ if((wmark > 0) && (wmark < 8)) | |||
| posn = 0; | |||
| length = 999; | |||
| while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(word))) | |||
| wordx = word1; | |||
| while(((length < 3) && (length > 0))|| (word_length > 1 && Unpronouncable(wordx))) | |||
| { | |||
| char *p; | |||
| // This word looks "unpronouncable", so speak letters individually until we | |||
| // find a remainder that we can pronounce. | |||
| word += TranslateLetter(word,phonemes,0); | |||
| wordx += TranslateLetter(wordx,phonemes,0); | |||
| if(phonemes[0] == phonSWITCH) | |||
| { | |||
| // change to another language in order to translate this word | |||
| @@ -732,7 +734,7 @@ if((wmark > 0) && (wmark < 8)) | |||
| return(0); | |||
| } | |||
| p = &word[word_length-3]; | |||
| p = &wordx[word_length-3]; | |||
| if(memcmp(p,"'s ",3) == 0) | |||
| { | |||
| // remove a 's suffix and pronounce this separately (not as an individual letter) | |||
| @@ -743,18 +745,18 @@ if((wmark > 0) && (wmark < 8)) | |||
| } | |||
| length=0; | |||
| while(word[length] != ' ') length++; | |||
| while(wordx[length] != ' ') length++; | |||
| if(length > 0) | |||
| word[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||
| wordx[-1] = ' '; // prevent this affecting the pronunciation of the pronuncable part | |||
| } | |||
| SetSpellingStress(phonemes,0); | |||
| // anything left ? | |||
| if(*word != ' ') | |||
| if(*wordx != ' ') | |||
| { | |||
| // Translate the stem | |||
| unpron_length = strlen(phonemes); | |||
| end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
| end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags); | |||
| if(phonemes[0] == phonSWITCH) | |||
| { | |||
| @@ -763,7 +765,7 @@ if((wmark > 0) && (wmark < 8)) | |||
| return(0); | |||
| } | |||
| c_temp = word[-1]; | |||
| c_temp = wordx[-1]; | |||
| found = 0; | |||
| confirm_prefix = 1; | |||
| @@ -779,12 +781,12 @@ if((wmark > 0) && (wmark < 8)) | |||
| // remove any standard suffix and confirm that the prefix is still recognised | |||
| phonemes2[0] = 0; | |||
| end2 = TranslateRules(word, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||
| end2 = TranslateRules(wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags); | |||
| if(end2) | |||
| { | |||
| RemoveEnding(word,end2,word_copy); | |||
| end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||
| memcpy(word,word_copy,strlen(word_copy)); | |||
| RemoveEnding(wordx,end2,word_copy); | |||
| end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags); | |||
| memcpy(wordx,word_copy,strlen(word_copy)); | |||
| if((end_type & SUFX_P) == 0) | |||
| { | |||
| // after removing the suffix, the prefix is no longer recognised. | |||
| @@ -812,30 +814,30 @@ if((wmark > 0) && (wmark < 8)) | |||
| expect_verb = 1; // use the verb form of the word | |||
| } | |||
| word[-1] = c_temp; | |||
| wordx[-1] = c_temp; | |||
| for(ix=(prefix_type & 0xf); ix>0; ix--) // num. of characters to remove | |||
| { | |||
| word++; | |||
| while((*word & 0xc0) == 0x80) word++; // for multibyte characters | |||
| wordx++; | |||
| while((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters | |||
| } | |||
| c_temp = word[-1]; | |||
| word[-1] = ' '; | |||
| c_temp = wordx[-1]; | |||
| wordx[-1] = ' '; | |||
| confirm_prefix = 1; | |||
| end_type = 0; | |||
| found = LookupDictList(word,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); | |||
| found = LookupDictList(&wordx,phonemes,&dictionary_flags2,SUFX_P | (wflags << 16)); // without prefix | |||
| if(dictionary_flags==0) | |||
| dictionary_flags = dictionary_flags2; | |||
| else | |||
| prefix_flags = 1; | |||
| if(found == 0) | |||
| { | |||
| end_type = TranslateRules(word, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||
| end_type = TranslateRules(wordx, phonemes, N_WORD_PHONEMES, end_phonemes, 0, dictionary_flags); | |||
| if(phonemes[0] == phonSWITCH) | |||
| { | |||
| // change to another language in order to translate this word | |||
| word[-1] = c_temp; | |||
| wordx[-1] = c_temp; | |||
| strcpy(word_phonemes,phonemes); | |||
| return(0); | |||
| } | |||
| @@ -848,7 +850,7 @@ char phonemes2[N_WORD_PHONEMES]; | |||
| strcpy(phonemes2,phonemes); | |||
| // The word has a standard ending, re-translate without this ending | |||
| end_flags = RemoveEnding(word,end_type,word_copy); | |||
| end_flags = RemoveEnding(wordx,end_type,word_copy); | |||
| phonemes_ptr = &phonemes[unpron_length]; | |||
| phonemes_ptr[0] = 0; | |||
| @@ -856,26 +858,24 @@ strcpy(phonemes2,phonemes); | |||
| if(prefix_phonemes[0] != 0) | |||
| { | |||
| // lookup the stem without the prefix removed | |||
| word[-1] = c_temp; | |||
| found = LookupDictList(word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||
| word[-1] = ' '; | |||
| wordx[-1] = c_temp; | |||
| found = LookupDictList(&word1,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // include prefix, but not suffix | |||
| wordx[-1] = ' '; | |||
| if(dictionary_flags==0) | |||
| dictionary_flags = dictionary_flags2; | |||
| if(found) | |||
| prefix_phonemes[0] = 0; // matched whole word, don't need prefix now | |||
| // if(found || (dictionary_flags2 != 0)) | |||
| // prefix_flags = 1; // ?? this looks wrong | |||
| if((found==0) && (dictionary_flags2 != 0)) | |||
| prefix_flags = 1; | |||
| } | |||
| if(found == 0) | |||
| { | |||
| found = LookupDictList(word,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); | |||
| found = LookupDictList(&wordx,phonemes_ptr,&dictionary_flags2,end_flags | (wflags << 16)); // without prefix and suffix | |||
| if(phonemes_ptr[0] == phonSWITCH) | |||
| { | |||
| // change to another language in order to translate this word | |||
| memcpy(word,word_copy,strlen(word_copy)); | |||
| memcpy(wordx,word_copy,strlen(word_copy)); | |||
| strcpy(word_phonemes,phonemes_ptr); | |||
| return(0); | |||
| } | |||
| @@ -895,16 +895,16 @@ strcpy(phonemes2,phonemes); | |||
| else | |||
| { | |||
| if(end_flags & FLAG_SUFX) | |||
| TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||
| TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags | FLAG_SUFFIX_REMOVED, dictionary_flags); | |||
| else | |||
| TranslateRules(word, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||
| TranslateRules(wordx, phonemes, N_WORD_PHONEMES, NULL,wflags,dictionary_flags); | |||
| if(phonemes[0] == phonSWITCH) | |||
| { | |||
| // change to another language in order to translate this word | |||
| strcpy(word_phonemes,phonemes); | |||
| memcpy(word,word_copy,strlen(word_copy)); | |||
| word[-1] = c_temp; | |||
| memcpy(wordx,word_copy,strlen(word_copy)); | |||
| wordx[-1] = c_temp; | |||
| return(0); | |||
| } | |||
| } | |||
| @@ -917,7 +917,7 @@ strcpy(phonemes2,phonemes); | |||
| end_phonemes[0] = 0; | |||
| } | |||
| } | |||
| word[-1] = c_temp; | |||
| wordx[-1] = c_temp; | |||
| } | |||
| } | |||
| @@ -1047,7 +1047,7 @@ strcpy(phonemes2,phonemes); | |||
| expect_past = 0; | |||
| } | |||
| if((word[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||
| if((wordx[0] != 0) && (!(dictionary_flags & FLAG_VERB_EXT))) | |||
| { | |||
| if(expect_verb > 0) | |||
| expect_verb -= 1; | |||
| @@ -36,10 +36,12 @@ | |||
| // bits 0-3 stressed syllable, 7=unstressed | |||
| #define FLAG_SKIPWORDS 0x80 | |||
| #define FLAG_PREPAUSE 0x100 | |||
| #define FLAG_ONLY 0x200 | |||
| #define FLAG_ONLY_S 0x400 | |||
| #define BITNUM_FLAG_ONLY 9 // bit 9 is set | |||
| #define BITNUM_FLAG_ONLY_S 10 // bit 10 is set | |||
| #define FLAG_ONLY_S 0x400 | |||
| #define FLAG_STRESS_END 0x800 /* full stress if at end of clause */ | |||
| #define FLAG_STRESS_END2 0x1000 /* full stress if at end of clause, or only followed by unstressed */ | |||
| #define FLAG_UNSTRESS_END 0x2000 /* reduce stress at end of clause */ | |||
| @@ -62,6 +64,9 @@ | |||
| #define FLAG_PASTF 0x8000000 /* past tense follows */ | |||
| #define FLAG_VERB_EXT 0x10000000 /* extend the 'verb follows' */ | |||
| #define FLAG_DICTTEXT 0x20000000 // word translates to replacement text, not phonemes | |||
| #define BITNUM_FLAG_DICTTEXT 29 | |||
| #define FLAG_PAUSE1 0x40000000 // shorter prepause | |||
| #define FLAG_FOUND 0x80000000 /* pronunciation was found in the dictionary list */ | |||
| @@ -92,6 +97,7 @@ | |||
| #define SUFX_Q 0x4000 // don't retranslate | |||
| #define SUFX_T 0x10000 // don't affect the stress position in the stem | |||
| #define FLAG_ALLOW_DICTTEXT 0x02 // allow dictionary to translate to text rather than phonemes | |||
| #define FLAG_SUFX 0x04 | |||
| #define FLAG_SUFX_S 0x08 | |||
| #define FLAG_SUFX_E_ADDED 0x10 | |||
| @@ -417,7 +423,7 @@ protected: | |||
| virtual int ChangePhonemes(PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch); | |||
| int IsVowel(int letter); | |||
| int LookupDictList(char *word1, char *ph_out, unsigned int *flags, int end_flags); | |||
| int LookupDictList(char **wordptr, char *ph_out, unsigned int *flags, int end_flags); | |||
| int Lookup(char *word, char *ph_out); | |||