git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@93 d46cf337-b52f-0410-862d-fd96e6ae7743master
| @@ -91,21 +91,26 @@ bethlehem bEt_liEm | |||
| brittanje $2 | |||
| ceylon s@lOn | |||
| china S'ina | |||
| (graaff reinet) x2rA:f||r@n'Et | |||
| italië it'A:li;@ | |||
| jerusalem j@r'ysalEm | |||
| kanada kanad%a | |||
| knysna neIsna | |||
| kongo kONgu | |||
| mesopotamië mEsuput'A:mi@ | |||
| mosambiek musamb'ik | |||
| potchefstroom pOtSIfstr'o@m | |||
| (thaba nchu) tab'A:||ntSu | |||
| europa Y@r'o@pa | |||
| outeniekwa @Ut@n'ikwa | |||
| portugal $1 | |||
| potchefstroom pOtSIfstr'o@m | |||
| pretoria prit'o@ria | |||
| stellenbosch st%&l@mbOs | |||
| tunisië $2 | |||
| turkye $2 | |||
| upington apiNt@n | |||
| worcester v'ust@r | |||
| zimbabwe zI:mb'ab_wE | |||
| zimbabwe zI:mb'ab_wE | |||
| @@ -124,16 +129,19 @@ bester b'Est@r | |||
| botha bo@ta | |||
| breytenbach breIt@nbax2 | |||
| carl kA:r@L | |||
| cecilia s@si:lia | |||
| charles _^_EN | |||
| charlie tSA:li | |||
| chopin S%OpA~ | |||
| chris krIs | |||
| christo krIstu | |||
| christian krIstian | |||
| christelle kr@st&l | |||
| christine kr@st'i:n | |||
| coetzee kuts'e@ | |||
| cronjé krOnj'e@ | |||
| debora d@bo@ra | |||
| debussy d@bus'i: | |||
| der d@r | |||
| deventer d'e@v@nt@r | |||
| du $u | |||
| @@ -187,6 +195,7 @@ martha marta | |||
| martin m'A:rt@n | |||
| michelle miS'&l | |||
| michiel $2 | |||
| miriam mIri@m | |||
| mostert m'Ost@rt | |||
| mozart mo@tsart | |||
| naomi na_'o@mi | |||
| @@ -195,11 +204,14 @@ naudé nOd'e@ | |||
| paul p@Ul | |||
| paulus p@UlWs | |||
| philip fIl@p | |||
| phillips fIl@ps | |||
| pierre p'e@r | |||
| petrus pe@trWs | |||
| phoebe fi:bi | |||
| rachmaninoff rax2m'aninOf | |||
| rebekka r@bEka | |||
| renée r@neI | |||
| retief r@tif | |||
| ronel run'&l | |||
| rousseau r@s@U | |||
| roux r'u | |||
| @@ -216,6 +228,7 @@ theron tr'On | |||
| viljoen $2 | |||
| villiers vIli@rs | |||
| violet _^_EN $capital | |||
| william _^_EN | |||
| marais mar'E: | |||
| mandela mand'E:la | |||
| celliers sIlj'e@ | |||
| @@ -292,7 +305,7 @@ alhoewel alhuv'&l $pause | |||
| tensy $2 $pause | |||
| aangesien $pause | |||
| wie $pause | |||
| wanneer $pause | |||
| wanneer $1 $pause | |||
| waar $pause | |||
| waarom $pause | |||
| waarheen $pause | |||
| @@ -376,7 +389,6 @@ bv beIf'o@rbe@lt $dot | |||
| // main word list | |||
| aanbid $2 | |||
| aanstaande $2 | |||
| adagio ad'A:dZi;%@U | |||
| algaande $2 | |||
| @@ -468,7 +480,6 @@ hokaai hOkAI | |||
| hoofsaaklik $2 | |||
| idee $2 | |||
| ietwat itvat | |||
| ignoreer ix2no@r'e@r | |||
| inagneming @nax2ne@m@N | |||
| inkluis $2 | |||
| @@ -476,16 +487,16 @@ intens @nt'Ens | |||
| intensiteit $4 | |||
| jawoord jA:vo@rt | |||
| jupiter jupit@r | |||
| kafee kaf'e@ | |||
| kapitaal kapit'A:l | |||
| kaviaar kavi'A:r | |||
| komaan kOmA:n | |||
| komberse kOmb'&rs@ | |||
| kombers kOmb'&rs | |||
| kopiereg kup'ir&x2 | |||
| kritiek krIt'ik | |||
| kultuur $2 | |||
| kulture $2 | |||
| kunsmatige kWnsm'A:t@x2@ | |||
| kwansuis $2 | |||
| @@ -497,6 +508,7 @@ lekkerste l&k@rst@ | |||
| macaroni makar'o@ni | |||
| madame mad'A:m | |||
| makabere m%ak'A:b@r@ | |||
| maskering mask'e@r@N $only | |||
| meegee me@x2e@ | |||
| memoriam mEm'o@riam | |||
| merlot m&rl'o@ | |||
| @@ -608,6 +620,8 @@ uiteindelik $2 | |||
| uiteraard Yyt@r_'A:rt | |||
| vaarwel fA:rv'&l | |||
| vakant f%ak'ant | |||
| vakante f%ak'ant@ | |||
| vanaf fanaf | |||
| vandat $1 | |||
| vanne fan@ | |||
| @@ -619,7 +633,6 @@ vererger f@r&rg@r | |||
| ver f'&:r | |||
| verg f&rx2 | |||
| vergete f@rx2e@t@ | |||
| vergewe f@rx2e@v@ | |||
| vermy $2 | |||
| verre f&:r@ | |||
| verregaande f&r@x2'A:nd@ | |||
| @@ -127,16 +127,15 @@ tS v w z Z z2 | |||
| Dictionary hi_dict | |||
| @ @- @2 @3 a a: aI aU | |||
| e E e: E: E~ i I i: | |||
| l- o O o: O: o~ O~ r- | |||
| U u: u~ V | |||
| @ @2 @3 a a: aI aU E | |||
| e: E: E~ I i: O o: O: | |||
| O~ r- U u: V | |||
| - : b c ch d d. f | |||
| g h H j J k kh l | |||
| m n N n. n^ p ph Q | |||
| q r s S s. t T t. | |||
| t.h th v w x z | |||
| : b c ch d d. f g | |||
| h H j J k kh l m | |||
| n N n. n^ p ph Q q | |||
| r s S s. t T t. t.h | |||
| th v w x z | |||
| Dictionary hr_dict | |||
| @@ -155,12 +154,12 @@ tS; v x z Z | |||
| Dictionary hu_dict | |||
| A a: E e: i i: o o: | |||
| u u: Y y y: Y: | |||
| u u: Y Y: | |||
| : b c C d dZ f g | |||
| h j J k l l^ m n | |||
| N n^ p R R2 s S s2 | |||
| t tS ts v z Z | |||
| : b c d dZ f g h | |||
| j J k l m n n^ p | |||
| R R2 s S s2 t tS ts | |||
| v z Z | |||
| Dictionary it_dict | |||
| @@ -288,7 +287,7 @@ Dictionary pt_dict | |||
| & &/ &U~ &~ @ @- a A | |||
| aI aU e E eI EI eU EU | |||
| i i/ iU i~ o O oI OI | |||
| e~ i i/ iU o O oI OI | |||
| o~ u U uI u~ y | |||
| * : ; b C d dZ f | |||
| @@ -300,17 +299,14 @@ w x z Z | |||
| Dictionary ro_dict | |||
| @ @- @I @U a aI aU e | |||
| ea eI eo eU i i/ I^ iI | |||
| iU o O Oa oI oU u uI | |||
| y Y yI yU | |||
| @ @- @I a aI aU e ea | |||
| eI eU i I^ iI o Oa oI | |||
| oU u uI y | |||
| * *; b b; c C d d; | |||
| dZ f f; g h j k l | |||
| l; m m; n N n; p p; | |||
| r s S S; t T t; tS | |||
| ts ts; v v; w w2 x z | |||
| Z z; Z; | |||
| * *; b c d dZ f g | |||
| h j k l m m; n p | |||
| r s S S; t tS ts ts; | |||
| v w w2 x z Z | |||
| Dictionary is_dict | |||
| @@ -337,13 +333,16 @@ h j k l m n N p | |||
| r R s t w z | |||
| Dictionary grc_dict | |||
| Dictionary mk_dict | |||
| & @ @- @2 a A a: E | |||
| e e: E~ i I i: l- o | |||
| o: oU r- u u: y | |||
| & @ @- @2 a E e i | |||
| I o r- u | |||
| * b d dZ dZ; f g h | |||
| j k k^ l l^ m n N | |||
| n^ p r R s S t tS | |||
| ts tS; v x z Z | |||
| * b d dZ dZ; f g j | |||
| k k^ l l^ m n n^ p | |||
| r R s S t tS ts v | |||
| x z Z | |||
| @@ -10,6 +10,19 @@ | |||
| // ?3 use diphthong for "au" | |||
| .replace | |||
| ० 0 | |||
| १ 1 | |||
| २ 2 | |||
| ३ 3 | |||
| ४ 4 | |||
| ५ 5 | |||
| ६ 6 | |||
| ७ 7 | |||
| ८ 8 | |||
| ९ 9 | |||
| // Vowels | |||
| .group अ | |||
| @@ -2,6 +2,12 @@ | |||
| // This file is UTF-8 encoded | |||
| .replace | |||
| // allow o,u-circumflex for o,u-double-acute | |||
| ô ő | |||
| û ű | |||
| .group a | |||
| a A | |||
| _) a (_ %A | |||
| @@ -1,6 +1,42 @@ | |||
| // translation rules for Macedonian | |||
| // This file is UTF-8 encoded | |||
| .replace | |||
| a а | |||
| b б | |||
| c ц | |||
| ć ћ | |||
| č ч | |||
| dž џ | |||
| dz ѕ | |||
| d д | |||
| đ ђ | |||
| e е | |||
| f ф | |||
| g г | |||
| h х | |||
| i и | |||
| j ј | |||
| k к | |||
| lj љ | |||
| l л | |||
| m м | |||
| nj њ | |||
| n н | |||
| o о | |||
| p п | |||
| r р | |||
| s с | |||
| š ш | |||
| t т | |||
| u у | |||
| v в | |||
| z з | |||
| ž ж | |||
| đ ѓ | |||
| ć ќ | |||
| .group а | |||
| а a | |||
| @@ -3,6 +3,10 @@ | |||
| // This file is UTF-8 encoded | |||
| // replace s-comma, t-comma by s-cedilla, t-cedilla | |||
| .replace | |||
| ș ş | |||
| ț ţ | |||
| .group a | |||
| @@ -1,7 +1,7 @@ | |||
| 39 phoneme tables | |||
| 40 phoneme tables | |||
| new total | |||
| base 96 96 | |||
| base2 23 114 | |||
| base2 24 115 | |||
| en 53 144 | |||
| en_n 30 144 | |||
| en_us 37 144 | |||
| @@ -25,13 +25,14 @@ | |||
| mk 3 130 | |||
| sr 2 129 | |||
| ru 38 126 | |||
| it 17 117 | |||
| it 17 118 | |||
| la 21 114 | |||
| es 6 114 | |||
| pt 28 131 | |||
| es 6 115 | |||
| pt 27 131 | |||
| pt_pt 20 131 | |||
| ro 36 138 | |||
| el 8 114 | |||
| ro 36 139 | |||
| el 8 115 | |||
| grc 7 120 | |||
| sv 25 118 | |||
| no 28 122 | |||
| is 32 121 | |||
| @@ -215,7 +216,7 @@ | |||
| 30 r/trr base af de fi nl ru ro sv sw | |||
| 11 r/xr base | |||
| 2 ufric/ch base de | |||
| 3 ufric/f base de ro | |||
| 4 ufric/f base de ro grc | |||
| 2 ufric/f_ base ro | |||
| 5 ufric/h_ base fi hi la | |||
| 6 ufric/h@ base fi hi la | |||
| @@ -272,7 +273,7 @@ | |||
| 8 ustop/ts_pzd base hi ru | |||
| 2 ustop/ts_pzd_ hi hu | |||
| 2 ustop/ts_pzd2 hi hu | |||
| 2 vdiph/0i pt vi | |||
| 3 vdiph/0i pt grc vi | |||
| 3 vdiph/0i_2 en_sc no en_wi | |||
| 3 vdiph2/e@ en_sc en_wi | |||
| 1 vdiph2/ea ro | |||
| @@ -323,10 +324,10 @@ | |||
| 1 vdiph/au_3 en_rp | |||
| 6 vdiph/au_4 base2 cy eo sk it is | |||
| 1 vdiph/ee-e hi | |||
| 5 vdiph/eei en de nl pt vi | |||
| 5 vdiph/eei base2 en de nl vi | |||
| 3 vdiph/eei_2 en_us eo fi | |||
| 2 vdiph/eei_3 en_rp sk | |||
| 3 vdiph/eeu pt vi zhy | |||
| 4 vdiph/eeu pt grc vi zhy | |||
| 2 vdiph/eeu_2 la pt_pt | |||
| 2 vdiph/eeu_3 en_n en_wm | |||
| 1 vdiph/eey fi | |||
| @@ -346,7 +347,7 @@ | |||
| 9 vdiph/ooi en en_n en_us cy eo fi no zhy | |||
| 1 vdiph/ooi_2 af | |||
| 2 vdiph/ooi_3 en_rp en_wm | |||
| 1 vdiph/oou cs | |||
| 2 vdiph/oou cs grc | |||
| 2 vdiph/ou fi zhy | |||
| 2 vdiph/ou_2 sk ro | |||
| 2 vdiph/ou_3 is | |||
| @@ -363,7 +364,7 @@ | |||
| 1 vdiph/Vu_2 en_wm | |||
| 1 vdiph/Vu_3 nl | |||
| 2 vdiph/&y fi nl | |||
| 2 vdiph/yi fi no | |||
| 3 vdiph/yi fi grc no | |||
| 1 vdiph/y#i fi | |||
| 1 vdiph/y#i_2 is | |||
| 1 vdiph/yi_fr fr | |||
| @@ -416,7 +417,7 @@ | |||
| 4 vowel/& en_rp fi hi sv | |||
| 4 vowel/0 base2 en hi pt | |||
| 3 vowel/0_2 en_n pt_pt sw | |||
| 5 vowel/0_3 en_us en_sc en_rp en_wm hu | |||
| 4 vowel/0_3 en_us en_sc en_rp hu | |||
| 2 vowel/@_2 fr | |||
| 2 vowel/&_2 en_us | |||
| 6 vowel/@_3 en_sc de hi | |||
| @@ -480,7 +481,7 @@ | |||
| 2 vowel/ii_6 en_wm | |||
| 1 vowel/ii_en en | |||
| 10 vowel/@_low en_rp hi ro no | |||
| 12 vowel/o base2 en en_wm de hi it la pt_pt sv en_wi | |||
| 10 vowel/o base2 en de hi it la pt_pt sv en_wi | |||
| 4 vowel/o_2 cy hi hu no | |||
| 2 vowel/o-_2 en_n en_wm | |||
| 3 vowel/o_3 en_sc fr | |||
| @@ -493,10 +494,10 @@ | |||
| 1 vowel/oe_4 sv | |||
| 2 vowel/o_mid fr hu | |||
| 12 vowel/oo en_sc de eo la es el sv no zhy en_wi | |||
| 10 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||
| 12 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||
| 3 vowel/oo_2 en_sc cy cs | |||
| 1 vowel/oo_3 af | |||
| 5 vowel/oo_4 hi pl it en_wi | |||
| 6 vowel/oo_4 en_wm hi pl it en_wi | |||
| 1 vowel/oo_5 is | |||
| 6 vowel/oo_en en en_n en_rp | |||
| 2 vowelr/aa_r en_sc | |||
| @@ -538,11 +539,11 @@ | |||
| 1 vowel/yy fr_ca | |||
| 1 vowel/yy_2 no | |||
| 1 vowel/yy_3 sv | |||
| 5 vowel/yy_4 de hu la is | |||
| 6 vowel/yy_4 de hu la grc is | |||
| 1 vwl_af/@ af | |||
| 1 vwl_af/I af | |||
| 2 vwl_af/r@ af | |||
| 6 vwl_en/aI@ en en_n en_us en_sc en_rp en_wm | |||
| 5 vwl_en/aI@ en en_n en_us en_sc en_rp | |||
| 2 vwl_en/aI@_2 en_sc | |||
| 5 vwl_en/aU@ en en_n en_us en_sc en_wm | |||
| 12 vwl_en/@L en en_us en_sc en_rp en_wm af | |||
| @@ -550,6 +551,7 @@ | |||
| 1 vwl_en_n/aa_5 en_n | |||
| 2 vwl_en_n/O@ en_n | |||
| 1 vwl_en_n/u_ en_n | |||
| 1 vwl_en/ooi@ en_wm | |||
| 3 vwl_en_rp/aa en_rp | |||
| 1 vwl_en_rp/aU@ en_rp | |||
| 1 vwl_en_rp/e@ en_rp | |||
| @@ -27,3 +27,4 @@ | |||
| 0 @- NULL 0 NULL | |||
| 0 aI@ NULL 60 aI @ | |||
| 0 aU@ NULL 75 aU @ | |||
| 0 x NULL 0 k | |||
| @@ -12,6 +12,7 @@ | |||
| 0 w/ NULL 0 w | |||
| 0 ; NULL 0 NULL | |||
| 0 g- NULL 0 NULL | |||
| 0 x NULL 0 k_h | |||
| 0 @- NULL 0 NULL | |||
| 0 aI@ NULL 60 AI r | |||
| 0 aU@ NULL 75 aU r | |||
| @@ -12,6 +12,7 @@ | |||
| 0 w/ NULL 0 w | |||
| 0 ; NULL 0 NULL | |||
| 0 g- NULL 0 NULL | |||
| 0 x NULL 0 k | |||
| 0 @- NULL 0 NULL | |||
| 0 aI@ NULL 60 AI r | |||
| 0 aU@ NULL 75 aU r | |||
| @@ -107,6 +107,13 @@ phoneme eI | |||
| endphoneme | |||
| phoneme EI | |||
| vowel starttype (e) endtype (i) | |||
| length 230 | |||
| formants vdiph/eei | |||
| endphoneme | |||
| phoneme oI | |||
| vowel starttype (o) endtype (i) | |||
| length 240 | |||
| @@ -92,14 +92,14 @@ endphoneme | |||
| phoneme I | |||
| vowel starttype (e) endtype (e) | |||
| vowel starttype (@) endtype (@) | |||
| length 110 | |||
| formants vowel/e# | |||
| before l/2 vowel/@_3-30+l/L2_@L | |||
| endphoneme | |||
| phoneme I2 | |||
| vowel starttype (e) endtype (e) | |||
| vowel starttype (@) endtype (@) | |||
| unstressed | |||
| length 110 | |||
| formants vowel/e# | |||
| @@ -62,7 +62,7 @@ endphoneme | |||
| phoneme 0 | |||
| vowel starttype (o) endtype (o) | |||
| length 140 | |||
| formants vowel/0_3 | |||
| formants vowel/oo_4 | |||
| reduceto @ 0 | |||
| endphoneme | |||
| @@ -146,7 +146,7 @@ endphoneme | |||
| phoneme O@ | |||
| vowel starttype (o) endtype (@) | |||
| length 240 | |||
| formants vowel/o | |||
| formants vowel/oo_1 | |||
| linkout r- | |||
| endphoneme | |||
| @@ -154,7 +154,7 @@ endphoneme | |||
| phoneme O | |||
| vowel starttype (o) endtype (o) | |||
| length 150 | |||
| formants vowel/o | |||
| formants vowel/oo_1 | |||
| reduceto @ 0 | |||
| endphoneme | |||
| @@ -189,7 +189,7 @@ endphoneme | |||
| phoneme eI | |||
| vowel starttype (@) endtype (i) | |||
| length 210 | |||
| length 230 | |||
| formants vdiph/@i_3 | |||
| endphoneme | |||
| @@ -228,7 +228,7 @@ endphoneme | |||
| phoneme aI@ | |||
| vowel starttype (a) endtype (@) | |||
| length 270 | |||
| formants vwl_en/aI@ | |||
| formants vwl_en/ooi@ | |||
| linkout r- | |||
| endphoneme | |||
| @@ -0,0 +1,44 @@ | |||
| //==================================================== | |||
| // Ancient Greek - based on base2 | |||
| //==================================================== | |||
| phoneme y | |||
| vowel starttype (i) endtype (i) | |||
| length 160 | |||
| formants vowel/yy_4 | |||
| endphoneme | |||
| phoneme EU | |||
| vowel starttype (e) endtype (u) | |||
| length 230 | |||
| formants vdiph/eeu | |||
| endphoneme | |||
| phoneme OI | |||
| vowel starttype (o) endtype (i) | |||
| length 230 | |||
| formants vdiph/0i | |||
| endphoneme | |||
| phoneme OU | |||
| vowel starttype (o) endtype (u) | |||
| length 230 | |||
| formants vdiph/oou | |||
| endphoneme | |||
| phoneme yI | |||
| vowel starttype (i) endtype (i) | |||
| length 230 | |||
| formants vdiph/yi | |||
| endphoneme | |||
| phoneme f // consider this an affrictive | |||
| vls blb afr | |||
| vowelout f1=0 f2=1000 -500 -350 f3=-200 80 | |||
| lengthmod 2 | |||
| wave ufric/f // could replace this with a [pf] wav file | |||
| endphoneme | |||
| @@ -138,13 +138,6 @@ phoneme eI | |||
| endphoneme | |||
| phoneme EI | |||
| vowel starttype (e) endtype (i) | |||
| length 230 | |||
| formants vdiph/eei | |||
| endphoneme | |||
| phoneme OI | |||
| vowel starttype (o) endtype (i) | |||
| length 230 | |||
| @@ -1218,6 +1218,9 @@ include ph_romanian | |||
| phonemetable el base2 | |||
| include ph_greek | |||
| phonemetable grc base2 | |||
| include ph_greek_ancient | |||
| phonemetable sv base | |||
| include ph_swedish | |||
| @@ -176,7 +176,6 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| char encoded_ph[200]; | |||
| unsigned char bad_phoneme[4]; | |||
| p = linebuf; | |||
| comment = NULL; | |||
| phonetic = word = ""; | |||
| @@ -347,7 +346,8 @@ int compile_line(char *linebuf, char *dict_line, int *hash) | |||
| if((word[0] & 0x80)==0) // 7 bit ascii only | |||
| { | |||
| // 1st letter - need to consider utf8 here | |||
| // If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | |||
| // ??? need to consider utf8 here | |||
| word[0] = tolower(word[0]); | |||
| } | |||
| @@ -789,8 +789,6 @@ char *compile_rule(char *input) | |||
| for(ix=0; finish==0; ix++) | |||
| { | |||
| c = input[ix]; | |||
| if((c=='/') && (input[ix+1]=='/')) | |||
| c = input[ix] = '\n'; // treat command as end of line | |||
| switch(c = input[ix]) | |||
| { | |||
| @@ -1167,6 +1165,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| int different; | |||
| char *prev_rgroup_name; | |||
| unsigned int char_code; | |||
| int compile_mode=0; | |||
| char *buf; | |||
| char buf1[120]; | |||
| char *rules[N_RULES]; | |||
| @@ -1184,7 +1183,13 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| { | |||
| linenum++; | |||
| buf = fgets(buf1,sizeof(buf1),f_in); | |||
| if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | |||
| if(buf != NULL) | |||
| { | |||
| if((p = (unsigned char *)strstr(buf,"//")) != NULL) | |||
| *p = 0; | |||
| if(buf[0] == '\r') buf++; // ignore extra \r in \r\n | |||
| } | |||
| if((buf != NULL) && (memcmp(buf,".L",2)==0)) | |||
| { | |||
| @@ -1196,7 +1201,7 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| continue; | |||
| } | |||
| if((buf == NULL) || (memcmp(buf,".group",6)==0)) | |||
| if((buf == NULL) || (buf[0] == '.')) | |||
| { | |||
| // next .group or end of file, write out the previous group | |||
| @@ -1212,46 +1217,106 @@ static int compile_dictrules(FILE *f_in, FILE *f_out, char *fname_temp) | |||
| } | |||
| n_rules = 0; | |||
| if(buf == NULL) break; // end of file | |||
| if(compile_mode == 2) | |||
| { | |||
| // end of the character replacements section | |||
| fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list | |||
| } | |||
| p = (unsigned char *)&buf[6]; | |||
| while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||
| ix = 0; | |||
| while((*p > ' ') && (ix<12)) | |||
| group_name[ix++] = *p++; | |||
| group_name[ix]=0; | |||
| if(buf == NULL) break; // end of file | |||
| if(sscanf(group_name,"0x%x",&char_code)==1) | |||
| if(memcmp(buf,".replace",8)==0) | |||
| { | |||
| // group character is given as a character code (max 16 bits) | |||
| p = (unsigned char *)group_name; | |||
| compile_mode = 2; | |||
| fputc(RULE_GROUP_START,f_out); | |||
| fputc(RULE_REPLACEMENTS,f_out); | |||
| if(char_code > 0x100) | |||
| { | |||
| *p++ = (char_code >> 8); | |||
| } | |||
| *p++ = char_code; | |||
| *p = 0; | |||
| // advance to next word boundary | |||
| while((ftell(f_out) & 3) != 0) | |||
| fputc(0,f_out); | |||
| } | |||
| if(strlen(group_name) > 2) | |||
| if(memcmp(buf,".group",6)==0) | |||
| { | |||
| if(utf8_in(&c,group_name,0) < 2) | |||
| compile_mode = 1; | |||
| p = (unsigned char *)&buf[6]; | |||
| while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||
| ix = 0; | |||
| while((*p > ' ') && (ix<12)) | |||
| group_name[ix++] = *p++; | |||
| group_name[ix]=0; | |||
| if(sscanf(group_name,"0x%x",&char_code)==1) | |||
| { | |||
| fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||
| error_count++; | |||
| // group character is given as a character code (max 16 bits) | |||
| p = (unsigned char *)group_name; | |||
| if(char_code > 0x100) | |||
| { | |||
| *p++ = (char_code >> 8); | |||
| } | |||
| *p++ = char_code; | |||
| *p = 0; | |||
| } | |||
| if(strlen(group_name) > 2) | |||
| { | |||
| if(utf8_in(&c,group_name,0) < 2) | |||
| { | |||
| fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||
| error_count++; | |||
| } | |||
| group_name[2] = 0; | |||
| } | |||
| group_name[2] = 0; | |||
| } | |||
| continue; | |||
| } | |||
| prule = compile_rule(buf); | |||
| if((prule != NULL) && (n_rules < N_RULES)) | |||
| switch(compile_mode) | |||
| { | |||
| rules[n_rules++] = prule; | |||
| case 1: // .group | |||
| prule = compile_rule(buf); | |||
| if((prule != NULL) && (n_rules < N_RULES)) | |||
| { | |||
| rules[n_rules++] = prule; | |||
| } | |||
| break; | |||
| case 2: // .replace | |||
| { | |||
| int replace1; | |||
| int replace2; | |||
| char *p; | |||
| p = buf; | |||
| replace1 = 0; | |||
| replace2 = 0; | |||
| while(isspace2(*p)) p++; | |||
| ix = 0; | |||
| while((unsigned char)(*p) > 0x20) // not space or zero-byte | |||
| { | |||
| p += utf8_in(&c,p,0); | |||
| replace1 += (c << ix); | |||
| ix += 16; | |||
| } | |||
| while(isspace2(*p)) p++; | |||
| ix = 0; | |||
| while((unsigned char)(*p) > 0x20) | |||
| { | |||
| p += utf8_in(&c,p,0); | |||
| replace2 += (c << ix); | |||
| ix += 16; | |||
| } | |||
| if(replace1 != 0) | |||
| { | |||
| fwrite(&replace1,1,4,f_out); | |||
| fwrite(&replace2,1,4,f_out); | |||
| } | |||
| } | |||
| break; | |||
| } | |||
| } | |||
| fclose(f_temp); | |||
| @@ -205,9 +205,9 @@ void Translator::InitGroups(void) | |||
| int ix; | |||
| char *p; | |||
| char *p_name; | |||
| unsigned int *pw; | |||
| unsigned char c, c2; | |||
| int len; | |||
| int rule_count; | |||
| n_groups2 = 0; | |||
| for(ix=0; ix<256; ix++) | |||
| @@ -228,6 +228,18 @@ void Translator::InitGroups(void) | |||
| } | |||
| p++; | |||
| if(p[0] == RULE_REPLACEMENTS) | |||
| { | |||
| pw = (unsigned int *)(((int)p+4) & ~3); // advance to next word boundary | |||
| langopts.replace_chars = pw; | |||
| while(pw[0] != 0) | |||
| { | |||
| pw += 2; // find the end of the replacement list, each entry is 2 words. | |||
| } | |||
| p = (char *)(pw+1); | |||
| continue; | |||
| } | |||
| if(p[0] == RULE_LETTERGP2) | |||
| { | |||
| ix = p[1] - 'A'; | |||
| @@ -266,11 +278,9 @@ void Translator::InitGroups(void) | |||
| } | |||
| // skip over all the rules in this group | |||
| rule_count = 0; | |||
| while(*p != RULE_GROUP_END) | |||
| { | |||
| p += (strlen(p) + 1); | |||
| rule_count++; | |||
| } | |||
| p++; | |||
| } | |||
| @@ -998,6 +998,8 @@ void ConvertToUtf8() | |||
| } // end of ConvertToItf8 | |||
| //****************************************************************************************************** | |||
| //#define calcspeedtab | |||
| @@ -354,12 +354,14 @@ int Translator::LookupNum2(int value, int control, char *ph_out) | |||
| if(langopts.numbers & 0x200) | |||
| { | |||
| // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | |||
| ix = strlen(ph_tens)-1; | |||
| if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||
| next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||
| if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||
| ph_tens[ix] = 0; | |||
| if((ix = strlen(ph_tens)-1) >= 0) | |||
| { | |||
| if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||
| next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||
| if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||
| ph_tens[ix] = 0; | |||
| } | |||
| } | |||
| sprintf(ph_out,"%s%s",ph_tens,ph_digits); | |||
| } | |||
| @@ -793,7 +795,7 @@ int Translator::TranslateNumber_1(char *word, char *ph_out, unsigned int *flags, | |||
| decimal_point = 0; | |||
| } | |||
| } | |||
| if(ph_out[0] != 0) | |||
| if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH)) | |||
| { | |||
| int next_char; | |||
| utf8_in(&next_char,&word[n_digits+1],0); | |||
| @@ -35,7 +35,7 @@ | |||
| #include "translate.h" | |||
| #include "wave.h" | |||
| const char *version_string = "1.29.10 16.Oct.07"; | |||
| const char *version_string = "1.29.11 23.Oct.07"; | |||
| const int version_phdata = 0x012901; | |||
| int option_device_number = -1; | |||
| @@ -38,6 +38,7 @@ | |||
| #define L_qa 0x716100 | |||
| #define L_grc 0x677263 // grc Ancient Greek | |||
| #define OFFSET_GREEK 0x380 | |||
| @@ -45,19 +46,41 @@ | |||
| #define OFFSET_DEVANAGARI 0x900 | |||
| static const unsigned int replace_cyrillic[] = | |||
| {0x430,0x431,0x446,0x45b,0x447,0x45f,0x455,0x434,0x452, | |||
| 0x435,0x444,0x433,0x445,0x438,0x458,0x43a,0x459, | |||
| 0x43b,0x43c,0x45a,0x43d,0x43e,0x43f,0x440,0x441, | |||
| 0x448,0x442,0x443,0x432,0x437,0x436, | |||
| 0x453,0x45c,0}; // ѓ ѕ ќ | |||
| static const unsigned int replace_cyrillic_latin[] = | |||
| {'a','b','c',0x107,0x10d,'d'+(0x17e<<16),'d'+('z'<<16),'d',0x111, | |||
| 'e','f','g','h','i','j','k','l'+('j'<<16), | |||
| 'l','m','n'+('j'<<16),'n','o','p','r','s', | |||
| 0x161,'t','u','v','z',0x17e, | |||
| 0x111,0x107,0}; | |||
| static const unsigned int replace_cyrillic_latin[] = | |||
| {0x430,'a', | |||
| 0x431,'b', | |||
| 0x446,'c', | |||
| 0x45b,0x107, | |||
| 0x447,0x10d, | |||
| 0x45f,'d'+(0x17e<<16), | |||
| 0x455,'d'+('z'<<16), | |||
| 0x434,'d', | |||
| 0x452,0x111, | |||
| 0x435,'e', | |||
| 0x444,'f', | |||
| 0x433,'g', | |||
| 0x445,'h', | |||
| 0x438,'i', | |||
| 0x458,'j', | |||
| 0x43a,'k', | |||
| 0x459,'l'+('j'<<16), | |||
| 0x43b,'l', | |||
| 0x43c,'m', | |||
| 0x45a,'n'+('j'<<16), | |||
| 0x43d,'n', | |||
| 0x43e,'o', | |||
| 0x43f,'p', | |||
| 0x440,'r', | |||
| 0x441,'s', | |||
| 0x448,0x161, | |||
| 0x442,'t', | |||
| 0x443,'u', | |||
| 0x432,'v', | |||
| 0x437,'z', | |||
| 0x436,0x17e, | |||
| 0x453,0x111, | |||
| 0x45c,0x107, | |||
| 0}; // ѓ ѕ ќ | |||
| void SetupTranslator(Translator *tr, int *lengths, int *amps) | |||
| @@ -132,6 +155,7 @@ Translator *SelectTranslator(const char *name) | |||
| break; | |||
| case L('e','l'): // Greek | |||
| case L_grc: // Ancient Greek | |||
| { | |||
| static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | |||
| static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | |||
| @@ -167,6 +191,12 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.numbers = 0xb09; | |||
| tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | |||
| if(name2 == L_grc) | |||
| { | |||
| // ancient greek | |||
| tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; | |||
| } | |||
| } | |||
| break; | |||
| @@ -259,8 +289,6 @@ Translator *SelectTranslator(const char *name) | |||
| case L('h','i'): | |||
| { | |||
| static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | |||
| static const unsigned int replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9 | |||
| static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0}; | |||
| static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | |||
| static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | |||
| @@ -274,8 +302,6 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.numbers = 0x811; | |||
| tr->langopts.numbers2 = 0x100; | |||
| tr->letter_bits_offset = OFFSET_DEVANAGARI; | |||
| tr->langopts.replace_chars = replace_chars_hi; | |||
| tr->langopts.replacement_chars = replacement_chars_hi; | |||
| memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | |||
| SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters | |||
| @@ -306,8 +332,7 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | |||
| tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | |||
| tr->langopts.replace_chars = replace_cyrillic; | |||
| tr->langopts.replacement_chars = replace_cyrillic_latin; | |||
| tr->langopts.replace_chars = replace_cyrillic_latin; | |||
| SetLetterVowel(tr,'y'); | |||
| SetLetterVowel(tr,'r'); | |||
| @@ -319,14 +344,10 @@ Translator *SelectTranslator(const char *name) | |||
| { | |||
| static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | |||
| static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | |||
| static const unsigned int replace_chars_hu[] = {0xd4,0xf4,0xdb,0xfb,0}; | |||
| static const unsigned int replacement_chars_hu[] = {0x150,0x151,0x170,0x171,0}; // allow o,u-circumflex for o,u-double-acute | |||
| tr = new Translator(); | |||
| SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | |||
| tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
| tr->langopts.replace_chars = replace_chars_hu; | |||
| tr->langopts.replacement_chars = replacement_chars_hu; | |||
| tr->langopts.vowel_pause = 0x20; | |||
| tr->langopts.stress_rule = 0; | |||
| @@ -417,9 +438,6 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_rule = 4; // antipenultimate | |||
| tr->langopts.numbers = 0x0c29 + 0x4000; | |||
| tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | |||
| tr->langopts.replace_chars = replace_cyrillic_latin; | |||
| tr->langopts.replacement_chars = replace_cyrillic; | |||
| } | |||
| break; | |||
| @@ -495,8 +513,6 @@ SetLengthMods(tr,3); // all equal | |||
| { | |||
| static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | |||
| static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | |||
| static const unsigned int replace_chars_ro[5] = {0x218,0x219,0x21a,0x21b,0}; | |||
| static const unsigned int replacement_chars_ro[5] = {0x15e,0x15f,0x162,0x163,0}; // replace s-comma, t-comma by s-cedilla, t-cedilla | |||
| tr = new Translator(); | |||
| SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | |||
| @@ -505,8 +521,6 @@ SetLengthMods(tr,3); // all equal | |||
| tr->langopts.stress_flags = 0x100 + 0x6; | |||
| tr->charset_a0 = charsets[2]; // ISO-8859-2 | |||
| tr->langopts.replace_chars = replace_chars_ro; | |||
| tr->langopts.replacement_chars = replacement_chars_ro; | |||
| tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | |||
| tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | |||
| } | |||
| @@ -665,6 +665,10 @@ if((wmark > 0) && (wmark < 8)) | |||
| if(!found && iswdigit(first_char)) | |||
| { | |||
| Lookup("_0lang",word_phonemes); | |||
| if(word_phonemes[0] == phonSWITCH) | |||
| return(0); | |||
| found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | |||
| } | |||
| @@ -1560,16 +1564,14 @@ static int EmbeddedCommand(unsigned int &source_index) | |||
| } // end of EmbeddedCommand | |||
| int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||
| {//===================================================================================================== | |||
| // To allow language specific examination and replacement of characters | |||
| int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | |||
| {//================================================================================== | |||
| int ix; | |||
| unsigned int word; | |||
| unsigned int new_c, c2, c_lower; | |||
| int upper_case = 0; | |||
| static int ignore_next = 0; | |||
| const unsigned int *replace_chars; | |||
| if(ignore_next) | |||
| { | |||
| @@ -1578,7 +1580,7 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i | |||
| } | |||
| if(c == 0) return(0); | |||
| if(langopts.replace_chars == NULL) | |||
| if((replace_chars = tr->langopts.replace_chars) == NULL) | |||
| return(c); | |||
| // there is a list of character codes to be substituted with alternative codes | |||
| @@ -1590,18 +1592,18 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i | |||
| } | |||
| new_c = 0; | |||
| for(ix=0; (word = langopts.replace_chars[ix]) != 0; ix++) | |||
| for(ix=0; (word = replace_chars[ix]) != 0; ix+=2) | |||
| { | |||
| if(c_lower == (word & 0xffff)) | |||
| { | |||
| if((word >> 16) == 0) | |||
| { | |||
| new_c = langopts.replacement_chars[ix]; | |||
| new_c = replace_chars[ix+1]; | |||
| break; | |||
| } | |||
| if((word >> 16) == (unsigned int)tolower(next_in)) | |||
| { | |||
| new_c = langopts.replacement_chars[ix]; | |||
| new_c = replace_chars[ix+1]; | |||
| ignore_next = 1; | |||
| break; | |||
| } | |||
| @@ -1625,6 +1627,14 @@ int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned i | |||
| if(upper_case) | |||
| new_c = towupper(new_c); | |||
| return(new_c); | |||
| } | |||
| int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||
| {//===================================================================================================== | |||
| // To allow language specific examination and replacement of characters | |||
| return(SubstituteChar(this,c,next_in,insert)); | |||
| } | |||
| @@ -118,6 +118,7 @@ | |||
| #define RULE_LETTERGP 17 // A B C H F G Y letter group number | |||
| #define RULE_LETTERGP2 18 // L + letter group number | |||
| #define RULE_CAPITAL 19 // word starts with a capital letter | |||
| #define RULE_REPLACEMENTS 20 // section for character replacements | |||
| #define RULE_NO_SUFFIX 24 // N | |||
| #define RULE_NOTVOWEL 25 // K | |||
| #define RULE_IFVERB 26 // V | |||
| @@ -278,6 +279,7 @@ typedef struct { | |||
| #define NUM_ROMAN 0x20000 | |||
| #define NUM_ROMAN_UC 0x40000 | |||
| // bits0-1=which numbers routine to use. | |||
| // bit2= thousands separator must be space | |||
| // bit3= , decimal separator, not . | |||
| @@ -294,7 +296,6 @@ typedef struct { | |||
| // bit16=dot after number indicates ordinal | |||
| // bit17=recognize roman numbers | |||
| // bit18=Roman numbers only if upper case | |||
| int numbers; | |||
| // bits 1-4 use variant form of numbers before thousands,millions,etc. | |||
| @@ -302,6 +303,7 @@ typedef struct { | |||
| // bit7=(LANG-ru) use MB for 1 thousand, million, etc | |||
| // bit8=(LANG=sw) special word for 100,000s | |||
| int numbers2; | |||
| int max_roman; | |||
| int thousands_sep; | |||
| int decimal_sep; | |||
| @@ -314,7 +316,6 @@ typedef struct { | |||
| char ideographs; // treat as separate words | |||
| int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" | |||
| const unsigned int *replace_chars; // characters to be substitutes | |||
| const unsigned int *replacement_chars; // substitutes for replace_chars | |||
| } LANGUAGE_OPTIONS; | |||