git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@93 d46cf337-b52f-0410-862d-fd96e6ae7743master
| brittanje $2 | brittanje $2 | ||||
| ceylon s@lOn | ceylon s@lOn | ||||
| china S'ina | china S'ina | ||||
| (graaff reinet) x2rA:f||r@n'Et | |||||
| italië it'A:li;@ | |||||
| jerusalem j@r'ysalEm | jerusalem j@r'ysalEm | ||||
| kanada kanad%a | kanada kanad%a | ||||
| knysna neIsna | |||||
| kongo kONgu | kongo kONgu | ||||
| mesopotamië mEsuput'A:mi@ | mesopotamië mEsuput'A:mi@ | ||||
| mosambiek musamb'ik | mosambiek musamb'ik | ||||
| potchefstroom pOtSIfstr'o@m | |||||
| (thaba nchu) tab'A:||ntSu | (thaba nchu) tab'A:||ntSu | ||||
| europa Y@r'o@pa | europa Y@r'o@pa | ||||
| outeniekwa @Ut@n'ikwa | |||||
| portugal $1 | portugal $1 | ||||
| potchefstroom pOtSIfstr'o@m | |||||
| pretoria prit'o@ria | pretoria prit'o@ria | ||||
| stellenbosch st%&l@mbOs | stellenbosch st%&l@mbOs | ||||
| tunisië $2 | |||||
| turkye $2 | |||||
| upington apiNt@n | upington apiNt@n | ||||
| worcester v'ust@r | worcester v'ust@r | ||||
| zimbabwe zI:mb'ab_wE | |||||
| zimbabwe zI:mb'ab_wE | |||||
| botha bo@ta | botha bo@ta | ||||
| breytenbach breIt@nbax2 | breytenbach breIt@nbax2 | ||||
| carl kA:r@L | carl kA:r@L | ||||
| cecilia s@si:lia | |||||
| charles _^_EN | charles _^_EN | ||||
| charlie tSA:li | charlie tSA:li | ||||
| chopin S%OpA~ | chopin S%OpA~ | ||||
| chris krIs | chris krIs | ||||
| christo krIstu | christo krIstu | ||||
| christian krIstian | christian krIstian | ||||
| christelle kr@st&l | |||||
| christine kr@st'i:n | christine kr@st'i:n | ||||
| coetzee kuts'e@ | coetzee kuts'e@ | ||||
| cronjé krOnj'e@ | cronjé krOnj'e@ | ||||
| debora d@bo@ra | debora d@bo@ra | ||||
| debussy d@bus'i: | |||||
| der d@r | der d@r | ||||
| deventer d'e@v@nt@r | deventer d'e@v@nt@r | ||||
| du $u | du $u | ||||
| martin m'A:rt@n | martin m'A:rt@n | ||||
| michelle miS'&l | michelle miS'&l | ||||
| michiel $2 | michiel $2 | ||||
| miriam mIri@m | |||||
| mostert m'Ost@rt | mostert m'Ost@rt | ||||
| mozart mo@tsart | mozart mo@tsart | ||||
| naomi na_'o@mi | naomi na_'o@mi | ||||
| paul p@Ul | paul p@Ul | ||||
| paulus p@UlWs | paulus p@UlWs | ||||
| philip fIl@p | philip fIl@p | ||||
| phillips fIl@ps | |||||
| pierre p'e@r | pierre p'e@r | ||||
| petrus pe@trWs | petrus pe@trWs | ||||
| phoebe fi:bi | phoebe fi:bi | ||||
| rachmaninoff rax2m'aninOf | rachmaninoff rax2m'aninOf | ||||
| rebekka r@bEka | rebekka r@bEka | ||||
| renée r@neI | |||||
| retief r@tif | |||||
| ronel run'&l | ronel run'&l | ||||
| rousseau r@s@U | rousseau r@s@U | ||||
| roux r'u | roux r'u | ||||
| viljoen $2 | viljoen $2 | ||||
| villiers vIli@rs | villiers vIli@rs | ||||
| violet _^_EN $capital | violet _^_EN $capital | ||||
| william _^_EN | |||||
| marais mar'E: | marais mar'E: | ||||
| mandela mand'E:la | mandela mand'E:la | ||||
| celliers sIlj'e@ | celliers sIlj'e@ | ||||
| tensy $2 $pause | tensy $2 $pause | ||||
| aangesien $pause | aangesien $pause | ||||
| wie $pause | wie $pause | ||||
| wanneer $pause | |||||
| wanneer $1 $pause | |||||
| waar $pause | waar $pause | ||||
| waarom $pause | waarom $pause | ||||
| waarheen $pause | waarheen $pause | ||||
| // main word list | // main word list | ||||
| aanbid $2 | |||||
| aanstaande $2 | aanstaande $2 | ||||
| adagio ad'A:dZi;%@U | adagio ad'A:dZi;%@U | ||||
| algaande $2 | algaande $2 | ||||
| hoofsaaklik $2 | hoofsaaklik $2 | ||||
| idee $2 | idee $2 | ||||
| ietwat itvat | |||||
| ignoreer ix2no@r'e@r | ignoreer ix2no@r'e@r | ||||
| inagneming @nax2ne@m@N | inagneming @nax2ne@m@N | ||||
| inkluis $2 | inkluis $2 | ||||
| intensiteit $4 | intensiteit $4 | ||||
| jawoord jA:vo@rt | jawoord jA:vo@rt | ||||
| jupiter jupit@r | |||||
| kafee kaf'e@ | kafee kaf'e@ | ||||
| kapitaal kapit'A:l | |||||
| kaviaar kavi'A:r | |||||
| komaan kOmA:n | komaan kOmA:n | ||||
| komberse kOmb'&rs@ | komberse kOmb'&rs@ | ||||
| kombers kOmb'&rs | kombers kOmb'&rs | ||||
| kopiereg kup'ir&x2 | kopiereg kup'ir&x2 | ||||
| kritiek krIt'ik | kritiek krIt'ik | ||||
| kultuur $2 | |||||
| kulture $2 | |||||
| kunsmatige kWnsm'A:t@x2@ | kunsmatige kWnsm'A:t@x2@ | ||||
| kwansuis $2 | kwansuis $2 | ||||
| macaroni makar'o@ni | macaroni makar'o@ni | ||||
| madame mad'A:m | madame mad'A:m | ||||
| makabere m%ak'A:b@r@ | makabere m%ak'A:b@r@ | ||||
| maskering mask'e@r@N $only | |||||
| meegee me@x2e@ | meegee me@x2e@ | ||||
| memoriam mEm'o@riam | memoriam mEm'o@riam | ||||
| merlot m&rl'o@ | merlot m&rl'o@ | ||||
| uiteraard Yyt@r_'A:rt | uiteraard Yyt@r_'A:rt | ||||
| vaarwel fA:rv'&l | vaarwel fA:rv'&l | ||||
| vakant f%ak'ant | |||||
| vakante f%ak'ant@ | |||||
| vanaf fanaf | vanaf fanaf | ||||
| vandat $1 | vandat $1 | ||||
| vanne fan@ | vanne fan@ | ||||
| ver f'&:r | ver f'&:r | ||||
| verg f&rx2 | verg f&rx2 | ||||
| vergete f@rx2e@t@ | vergete f@rx2e@t@ | ||||
| vergewe f@rx2e@v@ | |||||
| vermy $2 | vermy $2 | ||||
| verre f&:r@ | verre f&:r@ | ||||
| verregaande f&r@x2'A:nd@ | verregaande f&r@x2'A:nd@ |
| Dictionary hi_dict | Dictionary hi_dict | ||||
| @ @- @2 @3 a a: aI aU | |||||
| e E e: E: E~ i I i: | |||||
| l- o O o: O: o~ O~ r- | |||||
| U u: u~ V | |||||
| @ @2 @3 a a: aI aU E | |||||
| e: E: E~ I i: O o: O: | |||||
| O~ r- U u: V | |||||
| - : b c ch d d. f | |||||
| g h H j J k kh l | |||||
| m n N n. n^ p ph Q | |||||
| q r s S s. t T t. | |||||
| t.h th v w x z | |||||
| : b c ch d d. f g | |||||
| h H j J k kh l m | |||||
| n N n. n^ p ph Q q | |||||
| r s S s. t T t. t.h | |||||
| th v w x z | |||||
| Dictionary hr_dict | Dictionary hr_dict | ||||
| Dictionary hu_dict | Dictionary hu_dict | ||||
| A a: E e: i i: o o: | A a: E e: i i: o o: | ||||
| u u: Y y y: Y: | |||||
| u u: Y Y: | |||||
| : b c C d dZ f g | |||||
| h j J k l l^ m n | |||||
| N n^ p R R2 s S s2 | |||||
| t tS ts v z Z | |||||
| : b c d dZ f g h | |||||
| j J k l m n n^ p | |||||
| R R2 s S s2 t tS ts | |||||
| v z Z | |||||
| Dictionary it_dict | Dictionary it_dict | ||||
| & &/ &U~ &~ @ @- a A | & &/ &U~ &~ @ @- a A | ||||
| aI aU e E eI EI eU EU | aI aU e E eI EI eU EU | ||||
| i i/ iU i~ o O oI OI | |||||
| e~ i i/ iU o O oI OI | |||||
| o~ u U uI u~ y | o~ u U uI u~ y | ||||
| * : ; b C d dZ f | * : ; b C d dZ f | ||||
| Dictionary ro_dict | Dictionary ro_dict | ||||
| @ @- @I @U a aI aU e | |||||
| ea eI eo eU i i/ I^ iI | |||||
| iU o O Oa oI oU u uI | |||||
| y Y yI yU | |||||
| @ @- @I a aI aU e ea | |||||
| eI eU i I^ iI o Oa oI | |||||
| oU u uI y | |||||
| * *; b b; c C d d; | |||||
| dZ f f; g h j k l | |||||
| l; m m; n N n; p p; | |||||
| r s S S; t T t; tS | |||||
| ts ts; v v; w w2 x z | |||||
| Z z; Z; | |||||
| * *; b c d dZ f g | |||||
| h j k l m m; n p | |||||
| r s S S; t tS ts ts; | |||||
| v w w2 x z Z | |||||
| Dictionary is_dict | Dictionary is_dict | ||||
| r R s t w z | r R s t w z | ||||
| Dictionary grc_dict | |||||
| Dictionary mk_dict | Dictionary mk_dict | ||||
| & @ @- @2 a A a: E | |||||
| e e: E~ i I i: l- o | |||||
| o: oU r- u u: y | |||||
| & @ @- @2 a E e i | |||||
| I o r- u | |||||
| * b d dZ dZ; f g h | |||||
| j k k^ l l^ m n N | |||||
| n^ p r R s S t tS | |||||
| ts tS; v x z Z | |||||
| * b d dZ dZ; f g j | |||||
| k k^ l l^ m n n^ p | |||||
| r R s S t tS ts v | |||||
| x z Z |
| // ?3 use diphthong for "au" | // ?3 use diphthong for "au" | ||||
| .replace | |||||
| ० 0 | |||||
| १ 1 | |||||
| २ 2 | |||||
| ३ 3 | |||||
| ४ 4 | |||||
| ५ 5 | |||||
| ६ 6 | |||||
| ७ 7 | |||||
| ८ 8 | |||||
| ९ 9 | |||||
| // Vowels | // Vowels | ||||
| .group अ | .group अ |
| // This file is UTF-8 encoded | // This file is UTF-8 encoded | ||||
| .replace | |||||
| // allow o,u-circumflex for o,u-double-acute | |||||
| ô ő | |||||
| û ű | |||||
| .group a | .group a | ||||
| a A | a A | ||||
| _) a (_ %A | _) a (_ %A |
| // translation rules for Macedonian | // translation rules for Macedonian | ||||
| // This file is UTF-8 encoded | // This file is UTF-8 encoded | ||||
| .replace | |||||
| a а | |||||
| b б | |||||
| c ц | |||||
| ć ћ | |||||
| č ч | |||||
| dž џ | |||||
| dz ѕ | |||||
| d д | |||||
| đ ђ | |||||
| e е | |||||
| f ф | |||||
| g г | |||||
| h х | |||||
| i и | |||||
| j ј | |||||
| k к | |||||
| lj љ | |||||
| l л | |||||
| m м | |||||
| nj њ | |||||
| n н | |||||
| o о | |||||
| p п | |||||
| r р | |||||
| s с | |||||
| š ш | |||||
| t т | |||||
| u у | |||||
| v в | |||||
| z з | |||||
| ž ж | |||||
| đ ѓ | |||||
| ć ќ | |||||
| .group а | .group а | ||||
| а a | а a | ||||
| // This file is UTF-8 encoded | // This file is UTF-8 encoded | ||||
| // replace s-comma, t-comma by s-cedilla, t-cedilla | |||||
| .replace | |||||
| ș ş | |||||
| ț ţ | |||||
| .group a | .group a |
| 39 phoneme tables | |||||
| 40 phoneme tables | |||||
| new total | new total | ||||
| base 96 96 | base 96 96 | ||||
| base2 23 114 | |||||
| base2 24 115 | |||||
| en 53 144 | en 53 144 | ||||
| en_n 30 144 | en_n 30 144 | ||||
| en_us 37 144 | en_us 37 144 | ||||
| mk 3 130 | mk 3 130 | ||||
| sr 2 129 | sr 2 129 | ||||
| ru 38 126 | ru 38 126 | ||||
| it 17 117 | |||||
| it 17 118 | |||||
| la 21 114 | la 21 114 | ||||
| es 6 114 | |||||
| pt 28 131 | |||||
| es 6 115 | |||||
| pt 27 131 | |||||
| pt_pt 20 131 | pt_pt 20 131 | ||||
| ro 36 138 | |||||
| el 8 114 | |||||
| ro 36 139 | |||||
| el 8 115 | |||||
| grc 7 120 | |||||
| sv 25 118 | sv 25 118 | ||||
| no 28 122 | no 28 122 | ||||
| is 32 121 | is 32 121 | ||||
| 30 r/trr base af de fi nl ru ro sv sw | 30 r/trr base af de fi nl ru ro sv sw | ||||
| 11 r/xr base | 11 r/xr base | ||||
| 2 ufric/ch base de | 2 ufric/ch base de | ||||
| 3 ufric/f base de ro | |||||
| 4 ufric/f base de ro grc | |||||
| 2 ufric/f_ base ro | 2 ufric/f_ base ro | ||||
| 5 ufric/h_ base fi hi la | 5 ufric/h_ base fi hi la | ||||
| 6 ufric/h@ base fi hi la | 6 ufric/h@ base fi hi la | ||||
| 8 ustop/ts_pzd base hi ru | 8 ustop/ts_pzd base hi ru | ||||
| 2 ustop/ts_pzd_ hi hu | 2 ustop/ts_pzd_ hi hu | ||||
| 2 ustop/ts_pzd2 hi hu | 2 ustop/ts_pzd2 hi hu | ||||
| 2 vdiph/0i pt vi | |||||
| 3 vdiph/0i pt grc vi | |||||
| 3 vdiph/0i_2 en_sc no en_wi | 3 vdiph/0i_2 en_sc no en_wi | ||||
| 3 vdiph2/e@ en_sc en_wi | 3 vdiph2/e@ en_sc en_wi | ||||
| 1 vdiph2/ea ro | 1 vdiph2/ea ro | ||||
| 1 vdiph/au_3 en_rp | 1 vdiph/au_3 en_rp | ||||
| 6 vdiph/au_4 base2 cy eo sk it is | 6 vdiph/au_4 base2 cy eo sk it is | ||||
| 1 vdiph/ee-e hi | 1 vdiph/ee-e hi | ||||
| 5 vdiph/eei en de nl pt vi | |||||
| 5 vdiph/eei base2 en de nl vi | |||||
| 3 vdiph/eei_2 en_us eo fi | 3 vdiph/eei_2 en_us eo fi | ||||
| 2 vdiph/eei_3 en_rp sk | 2 vdiph/eei_3 en_rp sk | ||||
| 3 vdiph/eeu pt vi zhy | |||||
| 4 vdiph/eeu pt grc vi zhy | |||||
| 2 vdiph/eeu_2 la pt_pt | 2 vdiph/eeu_2 la pt_pt | ||||
| 2 vdiph/eeu_3 en_n en_wm | 2 vdiph/eeu_3 en_n en_wm | ||||
| 1 vdiph/eey fi | 1 vdiph/eey fi | ||||
| 9 vdiph/ooi en en_n en_us cy eo fi no zhy | 9 vdiph/ooi en en_n en_us cy eo fi no zhy | ||||
| 1 vdiph/ooi_2 af | 1 vdiph/ooi_2 af | ||||
| 2 vdiph/ooi_3 en_rp en_wm | 2 vdiph/ooi_3 en_rp en_wm | ||||
| 1 vdiph/oou cs | |||||
| 2 vdiph/oou cs grc | |||||
| 2 vdiph/ou fi zhy | 2 vdiph/ou fi zhy | ||||
| 2 vdiph/ou_2 sk ro | 2 vdiph/ou_2 sk ro | ||||
| 2 vdiph/ou_3 is | 2 vdiph/ou_3 is | ||||
| 1 vdiph/Vu_2 en_wm | 1 vdiph/Vu_2 en_wm | ||||
| 1 vdiph/Vu_3 nl | 1 vdiph/Vu_3 nl | ||||
| 2 vdiph/&y fi nl | 2 vdiph/&y fi nl | ||||
| 2 vdiph/yi fi no | |||||
| 3 vdiph/yi fi grc no | |||||
| 1 vdiph/y#i fi | 1 vdiph/y#i fi | ||||
| 1 vdiph/y#i_2 is | 1 vdiph/y#i_2 is | ||||
| 1 vdiph/yi_fr fr | 1 vdiph/yi_fr fr | ||||
| 4 vowel/& en_rp fi hi sv | 4 vowel/& en_rp fi hi sv | ||||
| 4 vowel/0 base2 en hi pt | 4 vowel/0 base2 en hi pt | ||||
| 3 vowel/0_2 en_n pt_pt sw | 3 vowel/0_2 en_n pt_pt sw | ||||
| 5 vowel/0_3 en_us en_sc en_rp en_wm hu | |||||
| 4 vowel/0_3 en_us en_sc en_rp hu | |||||
| 2 vowel/@_2 fr | 2 vowel/@_2 fr | ||||
| 2 vowel/&_2 en_us | 2 vowel/&_2 en_us | ||||
| 6 vowel/@_3 en_sc de hi | 6 vowel/@_3 en_sc de hi | ||||
| 2 vowel/ii_6 en_wm | 2 vowel/ii_6 en_wm | ||||
| 1 vowel/ii_en en | 1 vowel/ii_en en | ||||
| 10 vowel/@_low en_rp hi ro no | 10 vowel/@_low en_rp hi ro no | ||||
| 12 vowel/o base2 en en_wm de hi it la pt_pt sv en_wi | |||||
| 10 vowel/o base2 en de hi it la pt_pt sv en_wi | |||||
| 4 vowel/o_2 cy hi hu no | 4 vowel/o_2 cy hi hu no | ||||
| 2 vowel/o-_2 en_n en_wm | 2 vowel/o-_2 en_n en_wm | ||||
| 3 vowel/o_3 en_sc fr | 3 vowel/o_3 en_sc fr | ||||
| 1 vowel/oe_4 sv | 1 vowel/oe_4 sv | ||||
| 2 vowel/o_mid fr hu | 2 vowel/o_mid fr hu | ||||
| 12 vowel/oo en_sc de eo la es el sv no zhy en_wi | 12 vowel/oo en_sc de eo la es el sv no zhy en_wi | ||||
| 10 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||||
| 12 vowel/oo_1 en_n en_rp en_wm af fi sk hr vi | |||||
| 3 vowel/oo_2 en_sc cy cs | 3 vowel/oo_2 en_sc cy cs | ||||
| 1 vowel/oo_3 af | 1 vowel/oo_3 af | ||||
| 5 vowel/oo_4 hi pl it en_wi | |||||
| 6 vowel/oo_4 en_wm hi pl it en_wi | |||||
| 1 vowel/oo_5 is | 1 vowel/oo_5 is | ||||
| 6 vowel/oo_en en en_n en_rp | 6 vowel/oo_en en en_n en_rp | ||||
| 2 vowelr/aa_r en_sc | 2 vowelr/aa_r en_sc | ||||
| 1 vowel/yy fr_ca | 1 vowel/yy fr_ca | ||||
| 1 vowel/yy_2 no | 1 vowel/yy_2 no | ||||
| 1 vowel/yy_3 sv | 1 vowel/yy_3 sv | ||||
| 5 vowel/yy_4 de hu la is | |||||
| 6 vowel/yy_4 de hu la grc is | |||||
| 1 vwl_af/@ af | 1 vwl_af/@ af | ||||
| 1 vwl_af/I af | 1 vwl_af/I af | ||||
| 2 vwl_af/r@ af | 2 vwl_af/r@ af | ||||
| 6 vwl_en/aI@ en en_n en_us en_sc en_rp en_wm | |||||
| 5 vwl_en/aI@ en en_n en_us en_sc en_rp | |||||
| 2 vwl_en/aI@_2 en_sc | 2 vwl_en/aI@_2 en_sc | ||||
| 5 vwl_en/aU@ en en_n en_us en_sc en_wm | 5 vwl_en/aU@ en en_n en_us en_sc en_wm | ||||
| 12 vwl_en/@L en en_us en_sc en_rp en_wm af | 12 vwl_en/@L en en_us en_sc en_rp en_wm af | ||||
| 1 vwl_en_n/aa_5 en_n | 1 vwl_en_n/aa_5 en_n | ||||
| 2 vwl_en_n/O@ en_n | 2 vwl_en_n/O@ en_n | ||||
| 1 vwl_en_n/u_ en_n | 1 vwl_en_n/u_ en_n | ||||
| 1 vwl_en/ooi@ en_wm | |||||
| 3 vwl_en_rp/aa en_rp | 3 vwl_en_rp/aa en_rp | ||||
| 1 vwl_en_rp/aU@ en_rp | 1 vwl_en_rp/aU@ en_rp | ||||
| 1 vwl_en_rp/e@ en_rp | 1 vwl_en_rp/e@ en_rp |
| 0 @- NULL 0 NULL | 0 @- NULL 0 NULL | ||||
| 0 aI@ NULL 60 aI @ | 0 aI@ NULL 60 aI @ | ||||
| 0 aU@ NULL 75 aU @ | 0 aU@ NULL 75 aU @ | ||||
| 0 x NULL 0 k |
| 0 w/ NULL 0 w | 0 w/ NULL 0 w | ||||
| 0 ; NULL 0 NULL | 0 ; NULL 0 NULL | ||||
| 0 g- NULL 0 NULL | 0 g- NULL 0 NULL | ||||
| 0 x NULL 0 k_h | |||||
| 0 @- NULL 0 NULL | 0 @- NULL 0 NULL | ||||
| 0 aI@ NULL 60 AI r | 0 aI@ NULL 60 AI r | ||||
| 0 aU@ NULL 75 aU r | 0 aU@ NULL 75 aU r |
| 0 w/ NULL 0 w | 0 w/ NULL 0 w | ||||
| 0 ; NULL 0 NULL | 0 ; NULL 0 NULL | ||||
| 0 g- NULL 0 NULL | 0 g- NULL 0 NULL | ||||
| 0 x NULL 0 k | |||||
| 0 @- NULL 0 NULL | 0 @- NULL 0 NULL | ||||
| 0 aI@ NULL 60 AI r | 0 aI@ NULL 60 AI r | ||||
| 0 aU@ NULL 75 aU r | 0 aU@ NULL 75 aU r |
| endphoneme | endphoneme | ||||
| phoneme EI | |||||
| vowel starttype (e) endtype (i) | |||||
| length 230 | |||||
| formants vdiph/eei | |||||
| endphoneme | |||||
| phoneme oI | phoneme oI | ||||
| vowel starttype (o) endtype (i) | vowel starttype (o) endtype (i) | ||||
| length 240 | length 240 |
| phoneme I | phoneme I | ||||
| vowel starttype (e) endtype (e) | |||||
| vowel starttype (@) endtype (@) | |||||
| length 110 | length 110 | ||||
| formants vowel/e# | formants vowel/e# | ||||
| before l/2 vowel/@_3-30+l/L2_@L | before l/2 vowel/@_3-30+l/L2_@L | ||||
| endphoneme | endphoneme | ||||
| phoneme I2 | phoneme I2 | ||||
| vowel starttype (e) endtype (e) | |||||
| vowel starttype (@) endtype (@) | |||||
| unstressed | unstressed | ||||
| length 110 | length 110 | ||||
| formants vowel/e# | formants vowel/e# |
| phoneme 0 | phoneme 0 | ||||
| vowel starttype (o) endtype (o) | vowel starttype (o) endtype (o) | ||||
| length 140 | length 140 | ||||
| formants vowel/0_3 | |||||
| formants vowel/oo_4 | |||||
| reduceto @ 0 | reduceto @ 0 | ||||
| endphoneme | endphoneme | ||||
| phoneme O@ | phoneme O@ | ||||
| vowel starttype (o) endtype (@) | vowel starttype (o) endtype (@) | ||||
| length 240 | length 240 | ||||
| formants vowel/o | |||||
| formants vowel/oo_1 | |||||
| linkout r- | linkout r- | ||||
| endphoneme | endphoneme | ||||
| phoneme O | phoneme O | ||||
| vowel starttype (o) endtype (o) | vowel starttype (o) endtype (o) | ||||
| length 150 | length 150 | ||||
| formants vowel/o | |||||
| formants vowel/oo_1 | |||||
| reduceto @ 0 | reduceto @ 0 | ||||
| endphoneme | endphoneme | ||||
| phoneme eI | phoneme eI | ||||
| vowel starttype (@) endtype (i) | vowel starttype (@) endtype (i) | ||||
| length 210 | |||||
| length 230 | |||||
| formants vdiph/@i_3 | formants vdiph/@i_3 | ||||
| endphoneme | endphoneme | ||||
| phoneme aI@ | phoneme aI@ | ||||
| vowel starttype (a) endtype (@) | vowel starttype (a) endtype (@) | ||||
| length 270 | length 270 | ||||
| formants vwl_en/aI@ | |||||
| formants vwl_en/ooi@ | |||||
| linkout r- | linkout r- | ||||
| endphoneme | endphoneme | ||||
| //==================================================== | |||||
| // Ancient Greek - based on base2 | |||||
| //==================================================== | |||||
| phoneme y | |||||
| vowel starttype (i) endtype (i) | |||||
| length 160 | |||||
| formants vowel/yy_4 | |||||
| endphoneme | |||||
| phoneme EU | |||||
| vowel starttype (e) endtype (u) | |||||
| length 230 | |||||
| formants vdiph/eeu | |||||
| endphoneme | |||||
| phoneme OI | |||||
| vowel starttype (o) endtype (i) | |||||
| length 230 | |||||
| formants vdiph/0i | |||||
| endphoneme | |||||
| phoneme OU | |||||
| vowel starttype (o) endtype (u) | |||||
| length 230 | |||||
| formants vdiph/oou | |||||
| endphoneme | |||||
| phoneme yI | |||||
| vowel starttype (i) endtype (i) | |||||
| length 230 | |||||
| formants vdiph/yi | |||||
| endphoneme | |||||
| phoneme f // consider this an affrictive | |||||
| vls blb afr | |||||
| vowelout f1=0 f2=1000 -500 -350 f3=-200 80 | |||||
| lengthmod 2 | |||||
| wave ufric/f // could replace this with a [pf] wav file | |||||
| endphoneme | |||||
| endphoneme | endphoneme | ||||
| phoneme EI | |||||
| vowel starttype (e) endtype (i) | |||||
| length 230 | |||||
| formants vdiph/eei | |||||
| endphoneme | |||||
| phoneme OI | phoneme OI | ||||
| vowel starttype (o) endtype (i) | vowel starttype (o) endtype (i) | ||||
| length 230 | length 230 |
| phonemetable el base2 | phonemetable el base2 | ||||
| include ph_greek | include ph_greek | ||||
| phonemetable grc base2 | |||||
| include ph_greek_ancient | |||||
| phonemetable sv base | phonemetable sv base | ||||
| include ph_swedish | include ph_swedish | ||||
| char encoded_ph[200]; | char encoded_ph[200]; | ||||
| unsigned char bad_phoneme[4]; | unsigned char bad_phoneme[4]; | ||||
| p = linebuf; | p = linebuf; | ||||
| comment = NULL; | comment = NULL; | ||||
| phonetic = word = ""; | phonetic = word = ""; | ||||
| if((word[0] & 0x80)==0) // 7 bit ascii only | if((word[0] & 0x80)==0) // 7 bit ascii only | ||||
| { | { | ||||
| // 1st letter - need to consider utf8 here | |||||
| // If first letter is uppercase, convert to lower case. (Only if it's 7bit ascii) | |||||
| // ??? need to consider utf8 here | |||||
| word[0] = tolower(word[0]); | word[0] = tolower(word[0]); | ||||
| } | } | ||||
| for(ix=0; finish==0; ix++) | for(ix=0; finish==0; ix++) | ||||
| { | { | ||||
| c = input[ix]; | c = input[ix]; | ||||
| if((c=='/') && (input[ix+1]=='/')) | |||||
| c = input[ix] = '\n'; // treat command as end of line | |||||
| switch(c = input[ix]) | switch(c = input[ix]) | ||||
| { | { | ||||
| int different; | int different; | ||||
| char *prev_rgroup_name; | char *prev_rgroup_name; | ||||
| unsigned int char_code; | unsigned int char_code; | ||||
| int compile_mode=0; | |||||
| char *buf; | char *buf; | ||||
| char buf1[120]; | char buf1[120]; | ||||
| char *rules[N_RULES]; | char *rules[N_RULES]; | ||||
| { | { | ||||
| linenum++; | linenum++; | ||||
| buf = fgets(buf1,sizeof(buf1),f_in); | buf = fgets(buf1,sizeof(buf1),f_in); | ||||
| if((buf != NULL) && (buf[0] == '\r')) buf++; // ignore extra \r in \r\n | |||||
| if(buf != NULL) | |||||
| { | |||||
| if((p = (unsigned char *)strstr(buf,"//")) != NULL) | |||||
| *p = 0; | |||||
| if(buf[0] == '\r') buf++; // ignore extra \r in \r\n | |||||
| } | |||||
| if((buf != NULL) && (memcmp(buf,".L",2)==0)) | if((buf != NULL) && (memcmp(buf,".L",2)==0)) | ||||
| { | { | ||||
| continue; | continue; | ||||
| } | } | ||||
| if((buf == NULL) || (memcmp(buf,".group",6)==0)) | |||||
| if((buf == NULL) || (buf[0] == '.')) | |||||
| { | { | ||||
| // next .group or end of file, write out the previous group | // next .group or end of file, write out the previous group | ||||
| } | } | ||||
| n_rules = 0; | n_rules = 0; | ||||
| if(buf == NULL) break; // end of file | |||||
| if(compile_mode == 2) | |||||
| { | |||||
| // end of the character replacements section | |||||
| fwrite(&n_rules,1,4,f_out); // write a zero word to terminate the replacemenmt list | |||||
| } | |||||
| p = (unsigned char *)&buf[6]; | |||||
| while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||||
| ix = 0; | |||||
| while((*p > ' ') && (ix<12)) | |||||
| group_name[ix++] = *p++; | |||||
| group_name[ix]=0; | |||||
| if(buf == NULL) break; // end of file | |||||
| if(sscanf(group_name,"0x%x",&char_code)==1) | |||||
| if(memcmp(buf,".replace",8)==0) | |||||
| { | { | ||||
| // group character is given as a character code (max 16 bits) | |||||
| p = (unsigned char *)group_name; | |||||
| compile_mode = 2; | |||||
| fputc(RULE_GROUP_START,f_out); | |||||
| fputc(RULE_REPLACEMENTS,f_out); | |||||
| if(char_code > 0x100) | |||||
| { | |||||
| *p++ = (char_code >> 8); | |||||
| } | |||||
| *p++ = char_code; | |||||
| *p = 0; | |||||
| // advance to next word boundary | |||||
| while((ftell(f_out) & 3) != 0) | |||||
| fputc(0,f_out); | |||||
| } | } | ||||
| if(strlen(group_name) > 2) | |||||
| if(memcmp(buf,".group",6)==0) | |||||
| { | { | ||||
| if(utf8_in(&c,group_name,0) < 2) | |||||
| compile_mode = 1; | |||||
| p = (unsigned char *)&buf[6]; | |||||
| while((p[0]==' ') || (p[0]=='\t')) p++; // Note: Windows isspace(0xe1) gives TRUE ! | |||||
| ix = 0; | |||||
| while((*p > ' ') && (ix<12)) | |||||
| group_name[ix++] = *p++; | |||||
| group_name[ix]=0; | |||||
| if(sscanf(group_name,"0x%x",&char_code)==1) | |||||
| { | { | ||||
| fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||||
| error_count++; | |||||
| // group character is given as a character code (max 16 bits) | |||||
| p = (unsigned char *)group_name; | |||||
| if(char_code > 0x100) | |||||
| { | |||||
| *p++ = (char_code >> 8); | |||||
| } | |||||
| *p++ = char_code; | |||||
| *p = 0; | |||||
| } | |||||
| if(strlen(group_name) > 2) | |||||
| { | |||||
| if(utf8_in(&c,group_name,0) < 2) | |||||
| { | |||||
| fprintf(f_log,"%5d: Group name longer than 2 bytes (UTF8)",linenum); | |||||
| error_count++; | |||||
| } | |||||
| group_name[2] = 0; | |||||
| } | } | ||||
| group_name[2] = 0; | |||||
| } | } | ||||
| continue; | continue; | ||||
| } | } | ||||
| prule = compile_rule(buf); | |||||
| if((prule != NULL) && (n_rules < N_RULES)) | |||||
| switch(compile_mode) | |||||
| { | { | ||||
| rules[n_rules++] = prule; | |||||
| case 1: // .group | |||||
| prule = compile_rule(buf); | |||||
| if((prule != NULL) && (n_rules < N_RULES)) | |||||
| { | |||||
| rules[n_rules++] = prule; | |||||
| } | |||||
| break; | |||||
| case 2: // .replace | |||||
| { | |||||
| int replace1; | |||||
| int replace2; | |||||
| char *p; | |||||
| p = buf; | |||||
| replace1 = 0; | |||||
| replace2 = 0; | |||||
| while(isspace2(*p)) p++; | |||||
| ix = 0; | |||||
| while((unsigned char)(*p) > 0x20) // not space or zero-byte | |||||
| { | |||||
| p += utf8_in(&c,p,0); | |||||
| replace1 += (c << ix); | |||||
| ix += 16; | |||||
| } | |||||
| while(isspace2(*p)) p++; | |||||
| ix = 0; | |||||
| while((unsigned char)(*p) > 0x20) | |||||
| { | |||||
| p += utf8_in(&c,p,0); | |||||
| replace2 += (c << ix); | |||||
| ix += 16; | |||||
| } | |||||
| if(replace1 != 0) | |||||
| { | |||||
| fwrite(&replace1,1,4,f_out); | |||||
| fwrite(&replace2,1,4,f_out); | |||||
| } | |||||
| } | |||||
| break; | |||||
| } | } | ||||
| } | } | ||||
| fclose(f_temp); | fclose(f_temp); |
| int ix; | int ix; | ||||
| char *p; | char *p; | ||||
| char *p_name; | char *p_name; | ||||
| unsigned int *pw; | |||||
| unsigned char c, c2; | unsigned char c, c2; | ||||
| int len; | int len; | ||||
| int rule_count; | |||||
| n_groups2 = 0; | n_groups2 = 0; | ||||
| for(ix=0; ix<256; ix++) | for(ix=0; ix<256; ix++) | ||||
| } | } | ||||
| p++; | p++; | ||||
| if(p[0] == RULE_REPLACEMENTS) | |||||
| { | |||||
| pw = (unsigned int *)(((int)p+4) & ~3); // advance to next word boundary | |||||
| langopts.replace_chars = pw; | |||||
| while(pw[0] != 0) | |||||
| { | |||||
| pw += 2; // find the end of the replacement list, each entry is 2 words. | |||||
| } | |||||
| p = (char *)(pw+1); | |||||
| continue; | |||||
| } | |||||
| if(p[0] == RULE_LETTERGP2) | if(p[0] == RULE_LETTERGP2) | ||||
| { | { | ||||
| ix = p[1] - 'A'; | ix = p[1] - 'A'; | ||||
| } | } | ||||
| // skip over all the rules in this group | // skip over all the rules in this group | ||||
| rule_count = 0; | |||||
| while(*p != RULE_GROUP_END) | while(*p != RULE_GROUP_END) | ||||
| { | { | ||||
| p += (strlen(p) + 1); | p += (strlen(p) + 1); | ||||
| rule_count++; | |||||
| } | } | ||||
| p++; | p++; | ||||
| } | } |
| } // end of ConvertToItf8 | } // end of ConvertToItf8 | ||||
| //****************************************************************************************************** | |||||
| //#define calcspeedtab | //#define calcspeedtab |
| if(langopts.numbers & 0x200) | if(langopts.numbers & 0x200) | ||||
| { | { | ||||
| // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | // remove vowel from the end of tens if units starts with a vowel (LANG=Italian) | ||||
| ix = strlen(ph_tens)-1; | |||||
| if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||||
| next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||||
| if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||||
| ph_tens[ix] = 0; | |||||
| if((ix = strlen(ph_tens)-1) >= 0) | |||||
| { | |||||
| if((next_phtype = phoneme_tab[(unsigned int)(ph_digits[0])]->type) == phSTRESS) | |||||
| next_phtype = phoneme_tab[(unsigned int)(ph_digits[1])]->type; | |||||
| if((phoneme_tab[(unsigned int)(ph_tens[ix])]->type == phVOWEL) && (next_phtype == phVOWEL)) | |||||
| ph_tens[ix] = 0; | |||||
| } | |||||
| } | } | ||||
| sprintf(ph_out,"%s%s",ph_tens,ph_digits); | sprintf(ph_out,"%s%s",ph_tens,ph_digits); | ||||
| } | } | ||||
| decimal_point = 0; | decimal_point = 0; | ||||
| } | } | ||||
| } | } | ||||
| if(ph_out[0] != 0) | |||||
| if((ph_out[0] != 0) && (ph_out[0] != phonSWITCH)) | |||||
| { | { | ||||
| int next_char; | int next_char; | ||||
| utf8_in(&next_char,&word[n_digits+1],0); | utf8_in(&next_char,&word[n_digits+1],0); |
| #include "translate.h" | #include "translate.h" | ||||
| #include "wave.h" | #include "wave.h" | ||||
| const char *version_string = "1.29.10 16.Oct.07"; | |||||
| const char *version_string = "1.29.11 23.Oct.07"; | |||||
| const int version_phdata = 0x012901; | const int version_phdata = 0x012901; | ||||
| int option_device_number = -1; | int option_device_number = -1; |
| #define L_qa 0x716100 | #define L_qa 0x716100 | ||||
| #define L_grc 0x677263 // grc Ancient Greek | |||||
| #define OFFSET_GREEK 0x380 | #define OFFSET_GREEK 0x380 | ||||
| #define OFFSET_DEVANAGARI 0x900 | #define OFFSET_DEVANAGARI 0x900 | ||||
| static const unsigned int replace_cyrillic[] = | |||||
| {0x430,0x431,0x446,0x45b,0x447,0x45f,0x455,0x434,0x452, | |||||
| 0x435,0x444,0x433,0x445,0x438,0x458,0x43a,0x459, | |||||
| 0x43b,0x43c,0x45a,0x43d,0x43e,0x43f,0x440,0x441, | |||||
| 0x448,0x442,0x443,0x432,0x437,0x436, | |||||
| 0x453,0x45c,0}; // ѓ ѕ ќ | |||||
| static const unsigned int replace_cyrillic_latin[] = | |||||
| {'a','b','c',0x107,0x10d,'d'+(0x17e<<16),'d'+('z'<<16),'d',0x111, | |||||
| 'e','f','g','h','i','j','k','l'+('j'<<16), | |||||
| 'l','m','n'+('j'<<16),'n','o','p','r','s', | |||||
| 0x161,'t','u','v','z',0x17e, | |||||
| 0x111,0x107,0}; | |||||
| static const unsigned int replace_cyrillic_latin[] = | |||||
| {0x430,'a', | |||||
| 0x431,'b', | |||||
| 0x446,'c', | |||||
| 0x45b,0x107, | |||||
| 0x447,0x10d, | |||||
| 0x45f,'d'+(0x17e<<16), | |||||
| 0x455,'d'+('z'<<16), | |||||
| 0x434,'d', | |||||
| 0x452,0x111, | |||||
| 0x435,'e', | |||||
| 0x444,'f', | |||||
| 0x433,'g', | |||||
| 0x445,'h', | |||||
| 0x438,'i', | |||||
| 0x458,'j', | |||||
| 0x43a,'k', | |||||
| 0x459,'l'+('j'<<16), | |||||
| 0x43b,'l', | |||||
| 0x43c,'m', | |||||
| 0x45a,'n'+('j'<<16), | |||||
| 0x43d,'n', | |||||
| 0x43e,'o', | |||||
| 0x43f,'p', | |||||
| 0x440,'r', | |||||
| 0x441,'s', | |||||
| 0x448,0x161, | |||||
| 0x442,'t', | |||||
| 0x443,'u', | |||||
| 0x432,'v', | |||||
| 0x437,'z', | |||||
| 0x436,0x17e, | |||||
| 0x453,0x111, | |||||
| 0x45c,0x107, | |||||
| 0}; // ѓ ѕ ќ | |||||
| void SetupTranslator(Translator *tr, int *lengths, int *amps) | void SetupTranslator(Translator *tr, int *lengths, int *amps) | ||||
| break; | break; | ||||
| case L('e','l'): // Greek | case L('e','l'): // Greek | ||||
| case L_grc: // Ancient Greek | |||||
| { | { | ||||
| static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | static int stress_lengths_el[8] = {155, 180, 210, 210, 0, 0, 270, 300}; | ||||
| static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | static int stress_amps_el[8] = {15,12, 20,20, 20,24, 24,22 }; // 'diminished' is used to mark a quieter, final unstressed syllable | ||||
| tr->langopts.numbers = 0xb09; | tr->langopts.numbers = 0xb09; | ||||
| tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | tr->langopts.numbers2 = 0x2; // variant form of numbers before thousands | ||||
| if(name2 == L_grc) | |||||
| { | |||||
| // ancient greek | |||||
| tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; | |||||
| } | |||||
| } | } | ||||
| break; | break; | ||||
| case L('h','i'): | case L('h','i'): | ||||
| { | { | ||||
| static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | static const char dev_consonants2[] = {0x02,0x03,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f}; | ||||
| static const unsigned int replace_chars_hi[11] = {0x966,0x967,0x968,0x969,0x96a,0x96b,0x96c,0x96d,0x96e,0x96f,0}; // digits 0-9 | |||||
| static const unsigned int replacement_chars_hi[11] = {0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0}; | |||||
| static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | static int stress_lengths_hi[8] = {190, 190, 210, 210, 0, 0, 230, 250}; | ||||
| static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | static int stress_amps_hi[8] = {17,14, 20,19, 20,24, 24,22 }; | ||||
| tr->langopts.numbers = 0x811; | tr->langopts.numbers = 0x811; | ||||
| tr->langopts.numbers2 = 0x100; | tr->langopts.numbers2 = 0x100; | ||||
| tr->letter_bits_offset = OFFSET_DEVANAGARI; | tr->letter_bits_offset = OFFSET_DEVANAGARI; | ||||
| tr->langopts.replace_chars = replace_chars_hi; | |||||
| tr->langopts.replacement_chars = replacement_chars_hi; | |||||
| memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | memset(tr->letter_bits,0,sizeof(tr->letter_bits)); | ||||
| SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters | SetLetterBitsRange(tr,LETTERGP_A,0x06,0x14); // vowel letters | ||||
| tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | tr->langopts.numbers = 0x1c0d + 0x4000 + NUM_ROMAN_UC; | ||||
| tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x4a; // variant numbers before thousands,milliards | ||||
| tr->langopts.replace_chars = replace_cyrillic; | |||||
| tr->langopts.replacement_chars = replace_cyrillic_latin; | |||||
| tr->langopts.replace_chars = replace_cyrillic_latin; | |||||
| SetLetterVowel(tr,'y'); | SetLetterVowel(tr,'y'); | ||||
| SetLetterVowel(tr,'r'); | SetLetterVowel(tr,'r'); | ||||
| { | { | ||||
| static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | static int stress_amps_hu[8] = {17,17, 19,19, 20,24, 24,22 }; | ||||
| static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | static int stress_lengths_hu[8] = {185,195, 195,190, 0,0, 210,220}; | ||||
| static const unsigned int replace_chars_hu[] = {0xd4,0xf4,0xdb,0xfb,0}; | |||||
| static const unsigned int replacement_chars_hu[] = {0x150,0x151,0x170,0x171,0}; // allow o,u-circumflex for o,u-double-acute | |||||
| tr = new Translator(); | tr = new Translator(); | ||||
| SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | SetupTranslator(tr,stress_lengths_hu,stress_amps_hu); | ||||
| tr->charset_a0 = charsets[2]; // ISO-8859-2 | tr->charset_a0 = charsets[2]; // ISO-8859-2 | ||||
| tr->langopts.replace_chars = replace_chars_hu; | |||||
| tr->langopts.replacement_chars = replacement_chars_hu; | |||||
| tr->langopts.vowel_pause = 0x20; | tr->langopts.vowel_pause = 0x20; | ||||
| tr->langopts.stress_rule = 0; | tr->langopts.stress_rule = 0; | ||||
| tr->langopts.stress_rule = 4; // antipenultimate | tr->langopts.stress_rule = 4; // antipenultimate | ||||
| tr->langopts.numbers = 0x0c29 + 0x4000; | tr->langopts.numbers = 0x0c29 + 0x4000; | ||||
| tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | tr->langopts.numbers2 = 0x8a; // variant numbers before thousands,milliards | ||||
| tr->langopts.replace_chars = replace_cyrillic_latin; | |||||
| tr->langopts.replacement_chars = replace_cyrillic; | |||||
| } | } | ||||
| break; | break; | ||||
| { | { | ||||
| static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | static int stress_lengths_ro[8] = {170, 170, 180, 180, 0, 0, 240, 260}; | ||||
| static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | static int stress_amps_ro[8] = {15,13, 18,18, 20,22, 22,22 }; | ||||
| static const unsigned int replace_chars_ro[5] = {0x218,0x219,0x21a,0x21b,0}; | |||||
| static const unsigned int replacement_chars_ro[5] = {0x15e,0x15f,0x162,0x163,0}; // replace s-comma, t-comma by s-cedilla, t-cedilla | |||||
| tr = new Translator(); | tr = new Translator(); | ||||
| SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | SetupTranslator(tr,stress_lengths_ro,stress_amps_ro); | ||||
| tr->langopts.stress_flags = 0x100 + 0x6; | tr->langopts.stress_flags = 0x100 + 0x6; | ||||
| tr->charset_a0 = charsets[2]; // ISO-8859-2 | tr->charset_a0 = charsets[2]; // ISO-8859-2 | ||||
| tr->langopts.replace_chars = replace_chars_ro; | |||||
| tr->langopts.replacement_chars = replacement_chars_ro; | |||||
| tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | tr->langopts.numbers = 0x1829+0x6000 + NUM_ROMAN; | ||||
| tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | tr->langopts.numbers2 = 0x1e; // variant numbers before all thousandplex | ||||
| } | } |
| if(!found && iswdigit(first_char)) | if(!found && iswdigit(first_char)) | ||||
| { | { | ||||
| Lookup("_0lang",word_phonemes); | |||||
| if(word_phonemes[0] == phonSWITCH) | |||||
| return(0); | |||||
| found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | found = TranslateNumber(word,phonemes,&dictionary_flags,wflags); | ||||
| } | } | ||||
| } // end of EmbeddedCommand | } // end of EmbeddedCommand | ||||
| int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||||
| {//===================================================================================================== | |||||
| // To allow language specific examination and replacement of characters | |||||
| int SubstituteChar(Translator *tr, unsigned int c, unsigned int next_in, int *insert) | |||||
| {//================================================================================== | |||||
| int ix; | int ix; | ||||
| unsigned int word; | unsigned int word; | ||||
| unsigned int new_c, c2, c_lower; | unsigned int new_c, c2, c_lower; | ||||
| int upper_case = 0; | int upper_case = 0; | ||||
| static int ignore_next = 0; | static int ignore_next = 0; | ||||
| const unsigned int *replace_chars; | |||||
| if(ignore_next) | if(ignore_next) | ||||
| { | { | ||||
| } | } | ||||
| if(c == 0) return(0); | if(c == 0) return(0); | ||||
| if(langopts.replace_chars == NULL) | |||||
| if((replace_chars = tr->langopts.replace_chars) == NULL) | |||||
| return(c); | return(c); | ||||
| // there is a list of character codes to be substituted with alternative codes | // there is a list of character codes to be substituted with alternative codes | ||||
| } | } | ||||
| new_c = 0; | new_c = 0; | ||||
| for(ix=0; (word = langopts.replace_chars[ix]) != 0; ix++) | |||||
| for(ix=0; (word = replace_chars[ix]) != 0; ix+=2) | |||||
| { | { | ||||
| if(c_lower == (word & 0xffff)) | if(c_lower == (word & 0xffff)) | ||||
| { | { | ||||
| if((word >> 16) == 0) | if((word >> 16) == 0) | ||||
| { | { | ||||
| new_c = langopts.replacement_chars[ix]; | |||||
| new_c = replace_chars[ix+1]; | |||||
| break; | break; | ||||
| } | } | ||||
| if((word >> 16) == (unsigned int)tolower(next_in)) | if((word >> 16) == (unsigned int)tolower(next_in)) | ||||
| { | { | ||||
| new_c = langopts.replacement_chars[ix]; | |||||
| new_c = replace_chars[ix+1]; | |||||
| ignore_next = 1; | ignore_next = 1; | ||||
| break; | break; | ||||
| } | } | ||||
| if(upper_case) | if(upper_case) | ||||
| new_c = towupper(new_c); | new_c = towupper(new_c); | ||||
| return(new_c); | return(new_c); | ||||
| } | |||||
| int Translator::TranslateChar(char *ptr, int prev_in, unsigned int c, unsigned int next_in, int *insert) | |||||
| {//===================================================================================================== | |||||
| // To allow language specific examination and replacement of characters | |||||
| return(SubstituteChar(this,c,next_in,insert)); | |||||
| } | } | ||||
| #define RULE_LETTERGP 17 // A B C H F G Y letter group number | #define RULE_LETTERGP 17 // A B C H F G Y letter group number | ||||
| #define RULE_LETTERGP2 18 // L + letter group number | #define RULE_LETTERGP2 18 // L + letter group number | ||||
| #define RULE_CAPITAL 19 // word starts with a capital letter | #define RULE_CAPITAL 19 // word starts with a capital letter | ||||
| #define RULE_REPLACEMENTS 20 // section for character replacements | |||||
| #define RULE_NO_SUFFIX 24 // N | #define RULE_NO_SUFFIX 24 // N | ||||
| #define RULE_NOTVOWEL 25 // K | #define RULE_NOTVOWEL 25 // K | ||||
| #define RULE_IFVERB 26 // V | #define RULE_IFVERB 26 // V | ||||
| #define NUM_ROMAN 0x20000 | #define NUM_ROMAN 0x20000 | ||||
| #define NUM_ROMAN_UC 0x40000 | #define NUM_ROMAN_UC 0x40000 | ||||
| // bits0-1=which numbers routine to use. | // bits0-1=which numbers routine to use. | ||||
| // bit2= thousands separator must be space | // bit2= thousands separator must be space | ||||
| // bit3= , decimal separator, not . | // bit3= , decimal separator, not . | ||||
| // bit16=dot after number indicates ordinal | // bit16=dot after number indicates ordinal | ||||
| // bit17=recognize roman numbers | // bit17=recognize roman numbers | ||||
| // bit18=Roman numbers only if upper case | // bit18=Roman numbers only if upper case | ||||
| int numbers; | int numbers; | ||||
| // bits 1-4 use variant form of numbers before thousands,millions,etc. | // bits 1-4 use variant form of numbers before thousands,millions,etc. | ||||
| // bit7=(LANG-ru) use MB for 1 thousand, million, etc | // bit7=(LANG-ru) use MB for 1 thousand, million, etc | ||||
| // bit8=(LANG=sw) special word for 100,000s | // bit8=(LANG=sw) special word for 100,000s | ||||
| int numbers2; | int numbers2; | ||||
| int max_roman; | int max_roman; | ||||
| int thousands_sep; | int thousands_sep; | ||||
| int decimal_sep; | int decimal_sep; | ||||
| char ideographs; // treat as separate words | char ideographs; // treat as separate words | ||||
| int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" | int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2" | ||||
| const unsigned int *replace_chars; // characters to be substitutes | const unsigned int *replace_chars; // characters to be substitutes | ||||
| const unsigned int *replacement_chars; // substitutes for replace_chars | |||||
| } LANGUAGE_OPTIONS; | } LANGUAGE_OPTIONS; | ||||