1. Rewrite cmn_rules. Vowel will be spoken as Mandarin only when it's with a tone number. Otherwise, it will be regarded as English. This will make English words translated more correctly. 2. Fix issue of word item in cmn_listx not taking effect. 3. Fix dictionary_skipwords bug. It should be injected when re-structing method TranslateWord3.master
| @@ -1,4 +1,3 @@ | |||
| _?? @:11 // unrecognized character | |||
| //numbers | |||
| @@ -26,33 +25,34 @@ _0M1 tS;h'iE55n_| // 1,000 | |||
| _0M2 w'A51n_| // 10,000 | |||
| //_0M3 j'i51_| // 100,000,000 | |||
| //Latin letters with Chinese accent | |||
| a ei51 | |||
| b pi51 | |||
| c sei55 | |||
| d ti51 | |||
| e ji51 | |||
| f ai35f | |||
| g ts.i51 | |||
| h ei35ts.h | |||
| i ai51 | |||
| j ts.ei51 | |||
| k khei51 | |||
| l ai35l | |||
| m ai35m | |||
| n @n55 | |||
| o ou55 | |||
| p phi51 | |||
| q khiou55 | |||
| r @r35 | |||
| s ai35s | |||
| t thi51 | |||
| u jiou55 | |||
| v vei55 | |||
| w ta35pliou | |||
| x ai35ks | |||
| y wuai51 | |||
| z zi51 | |||
| // Latin letters with Chinese accent | |||
| // This will make letter within English sentense translated not correctly. i.e. "ma is a horse". "a" will be translated as ei51. | |||
| //a ei51 | |||
| //b pi51 | |||
| //c sei55 | |||
| //d ti51 | |||
| //e ji51 | |||
| //f ai35f | |||
| //g ts.i51 | |||
| //h ei35ts.h | |||
| //i ai51 | |||
| //j ts.ei51 | |||
| //k khei51 | |||
| //l ai35l | |||
| //m ai35m | |||
| //n @n55 | |||
| //o ou55 | |||
| //p phi51 | |||
| //q khiou55 | |||
| //r @r35 | |||
| //s ai35s | |||
| //t thi51 | |||
| //u jiou55 | |||
| //v vei55 | |||
| //w ta35pliou | |||
| //x ai35ks | |||
| //y wuai51 | |||
| //z zi51 | |||
| //bopomofo letters | |||
| ㄅ po55 | |||
| @@ -93,6 +93,8 @@ z zi51 | |||
| ㄨ wu55 | |||
| ㄩ y55 | |||
| $textmode | |||
| // Most frequent pronunciations of the 3799 most common characters (from Unihan database ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip, kHanyuPinlu field with some corrections) | |||
| 涉 she4 | |||
| 礦 kuang4 | |||
| @@ -11,9 +11,9 @@ | |||
| ˋ 4 | |||
| ā a1 | |||
| á a2 | |||
| ă a3 | |||
| ǎ a3 | |||
| à a4 | |||
| // TODO: àn is not handled | |||
| ō o1 | |||
| ó o2 | |||
| ŏ o3 | |||
| @@ -48,6 +48,9 @@ | |||
| ŝ sh | |||
| .L01 j q x y | |||
| .L02 1 2 3 4 5 // tone number | |||
| .L03 a o e i u v ai ei ui ao ou iu ie ve er an en in un vn ang eng ing ong ua ue uo uai uan uang ia iao ian iang | |||
| // Note: ngK matches "ng" if not followed by a vowel | |||
| // (D will match on a digit and also if no digit is present. (special for tone | |||
| @@ -56,211 +59,159 @@ language). | |||
| // 儿 兒 erhua | |||
| .group a | |||
| ?1 a _^_EN | |||
| a A | |||
| ai ai | |||
| a1i ai55 | |||
| a2i ai35 | |||
| a3i ai214 | |||
| a4i ai51 | |||
| ao Au | |||
| a1o Au55 | |||
| a2o Au35 | |||
| a3o Au214 | |||
| a4o Au51 | |||
| a (DnK a | |||
| ch) a (DngK A | |||
| ch) a (4ngK a | |||
| ch) a (ng4K a | |||
| k) a (DngK A | |||
| _) a (DngK A | |||
| y) a iA | |||
| y) ao Au | |||
| y) a1o Au55 | |||
| y) a2o Au35 | |||
| y) a3o Au214 | |||
| y) a4o Au51 | |||
| y) a (DnK iE | |||
| y) a (DngK iA | |||
| a _^_EN | |||
| a (L02 A | |||
| ai (L02 ai | |||
| ao (L02 Au | |||
| a (nL02 a | |||
| a (ngL02 A | |||
| y) a (L02 iA | |||
| y) a (L02 Au | |||
| y) a (nL02 iE | |||
| y) a (ngL02 iA | |||
| .group b | |||
| ?1 b _^_EN | |||
| b p | |||
| @) b (K _^_EN | |||
| b _^_EN | |||
| b (L03L02 p | |||
| .group c | |||
| ?1 c _^_EN | |||
| c tsh | |||
| ch (+ ts.h | |||
| @) c (K _^_EN | |||
| c _^_EN | |||
| c (L03L02 tsh | |||
| ch (L03L02 ts.h | |||
| .group d | |||
| ?1 d _^_EN | |||
| d t | |||
| @) d (K _^_EN | |||
| d _^_EN | |||
| d (L03L02 t | |||
| .group e | |||
| ?1 e _^_EN | |||
| e o- | |||
| e _^_EN | |||
| e (L02 o- | |||
| d) e @ | |||
| t) e @ | |||
| n) e @ | |||
| l) e @ | |||
| y) e iE | |||
| ea E | |||
| ei ei | |||
| e1i ei55 | |||
| e2i ei35 | |||
| e3i ei214 | |||
| e4i ei51 | |||
| e (Dn @ //en eng | |||
| er (K @r | |||
| e1r (K @r55 | |||
| e2r (K @r35 | |||
| e3r (K @r214 | |||
| e4r (K @r51 | |||
| ei (L02 ei | |||
| e (nL02 @ // en | |||
| e (ngL02 @ // eng | |||
| er (L02 @r | |||
| .group f | |||
| ?1 f _^_EN | |||
| f f | |||
| @) f (K _^_EN | |||
| f _^_EN | |||
| f (L03L02 f | |||
| .group g | |||
| ?1 g _^_EN | |||
| g k | |||
| @) g (K _^_EN | |||
| g _^_EN | |||
| g (L03L02 k | |||
| .group h | |||
| ?1 h _^_EN | |||
| h X | |||
| @) h (K _^_EN | |||
| h _^_EN | |||
| h (L03L02 X | |||
| .group i | |||
| ?1 i _^_EN | |||
| i i //i in ing | |||
| z) i i[ | |||
| c) i i[ | |||
| s) i i[ | |||
| h) i i. //after zh ch sh | |||
| r) i i. | |||
| ia iA | |||
| ia (DnK iE | |||
| ia (DngK iA | |||
| iao jAu | |||
| ia1o jAu55 | |||
| ia2o jAu35 | |||
| ia3o jAu214 | |||
| ia4o jAu51 | |||
| ie iE | |||
| io (DngK y | |||
| q) io (DngK u | |||
| iu iou | |||
| i _^_EN | |||
| i (L02 i | |||
| i (nL02 i // in | |||
| i (ngL02 i // ing | |||
| z) i (L02 i[ | |||
| c) i (L02 i[ | |||
| s) i (L02 i[ | |||
| h) i (L02 i. //after zh ch sh | |||
| r) i (L02 i. | |||
| ia (L02 iA | |||
| ia (nL02 iE | |||
| ia (ngL02 iA | |||
| iao (L02 jAu | |||
| ie (L02 iE | |||
| io (ngL02 y | |||
| q) io (ngL02 u | |||
| iu (L02 iou | |||
| .group j | |||
| ?1 j _^_EN | |||
| j tS; | |||
| @) j (K _^_EN | |||
| j _^_EN | |||
| j (L03L02 tS; | |||
| .group k | |||
| ?1 k _^_EN | |||
| k kh | |||
| @) k (K _^_EN | |||
| k _^_EN | |||
| k (L03L02 kh | |||
| .group l | |||
| ?1 l _^_EN | |||
| l l | |||
| @) l (K _^_EN | |||
| @) l (v l | |||
| l _^_EN | |||
| l (L03L02 l | |||
| .group m | |||
| ?1 m _^_EN | |||
| m m | |||
| @) m (K _^_EN | |||
| m _^_EN | |||
| m (L03L02 m | |||
| .group n | |||
| ?1 n _^_EN | |||
| n n | |||
| ng (K N // consider (ng+vowel) as (n g+vowel) ?? | |||
| n _^_EN | |||
| n (L03L02 n | |||
| n (L02 n | |||
| ng (L02 N // consider (ng+vowel) as (n g+vowel) ?? | |||
| _) ng (K N- // syllablic [N] | |||
| .group o | |||
| ?1 o _^_EN | |||
| o o | |||
| ou ou | |||
| o1u ou55 | |||
| o2u ou35 | |||
| o3u ou214 | |||
| o4u ou51 | |||
| ong (K ong | |||
| o1ng (K ong55 | |||
| o2ng (K ong35 | |||
| o3ng (K ong214 | |||
| o4ng (K ong51 | |||
| y) o io | |||
| y) ou iou | |||
| y) o1u iou55 | |||
| y) o2u iou35 | |||
| y) o3u iou214 | |||
| y) o4u iou51 | |||
| o _^_EN | |||
| o (L02 o | |||
| ou (L02 ou | |||
| ong (L02 ong | |||
| y) o (L02 io | |||
| y) ou (L02 iou | |||
| .group p | |||
| ?1 p _^_EN | |||
| p ph | |||
| @) p (K _^_EN | |||
| p _^_EN | |||
| p (L03L02 ph | |||
| .group q | |||
| ?1 q _^_EN | |||
| q tS;h | |||
| @) q (K _^_EN | |||
| q _^_EN | |||
| q (L03L02 tS;h | |||
| .group r | |||
| ?1 r _^_EN | |||
| r z. | |||
| r (K @r11 | |||
| r _^_EN | |||
| r (L03L02 z. | |||
| .group s | |||
| ?1 s _^_EN | |||
| s s | |||
| sh (+ s. | |||
| @) s (K _^_EN | |||
| s _^_EN | |||
| s (L03L02 s | |||
| sh (L03L02 s. | |||
| .group t | |||
| ?1 t _^_EN | |||
| t th | |||
| @) t (K _^_EN | |||
| t _^_EN | |||
| t (L03L02 th | |||
| .group u | |||
| ?1 u _^_EN | |||
| u u | |||
| ua wA | |||
| ua (DnK ua | |||
| g) ua (DnK wa | |||
| k) ua (DnK wa | |||
| ua (DngK wA | |||
| uai uai | |||
| ua1i uai55 | |||
| ua2i uai35 | |||
| ua3i uai214 | |||
| ua4i uai51 | |||
| ue yE //üe | |||
| ui uei | |||
| sh) ui wei | |||
| u (DnK u@ | |||
| k) u (DnK wu@ | |||
| _h) u (DnK wu@ | |||
| t) u (DnK wu@ | |||
| uo uo | |||
| sh) ui rei | |||
| sh) u (DnK wo- | |||
| sh) uo wo | |||
| u _^_EN | |||
| u (L02 u | |||
| ua (L02 wA | |||
| ua (nL02 ua | |||
| g) ua (nL02 wa | |||
| k) ua (nL02 wa | |||
| ua (ngL02 wA | |||
| uai (L02 uai | |||
| ue (L02 yE // üe | |||
| ui (L02 uei | |||
| sh) ui (L02 wei | |||
| u (nL02 u@ | |||
| k) u (nL02 wu@ | |||
| _h) u (nL02 wu@ | |||
| t) u (nL02 wu@ | |||
| uo (L02 uo | |||
| sh) ui (L02 rei | |||
| sh) u (nL02 wo- | |||
| sh) uo (L02 wo | |||
| L01) u y /// j,q,x,y + u | |||
| L01) u (DnK y@ | |||
| @@ -268,60 +219,44 @@ language). | |||
| L01) ua (DnK y& | |||
| L01) ue yE | |||
| .group ü | |||
| ?1 ü _^_EN | |||
| ü y | |||
| üe yE | |||
| ü _^_EN | |||
| ü (L02 y | |||
| üe (L02 yE | |||
| .group v //variant of ü | |||
| ?1 v _^_EN | |||
| v v // foreign words | |||
| l) v y //ü | |||
| n) v y //ü | |||
| l) ve yE //üe | |||
| n) ve yE //üe | |||
| v _^_EN | |||
| l) v (L02 y // ü | |||
| n) v (L02 y // ü | |||
| l) ve (L02 yE // üe | |||
| n) ve (L02 yE // üe | |||
| .group w | |||
| ?1 w _^_EN | |||
| @) w (K _^_EN | |||
| wa wA //wa wan wang | |||
| wai wai | |||
| wa1i wai55 | |||
| wa2i wai35 | |||
| wa3i wai214 | |||
| wa4i wai51 | |||
| wei wei | |||
| we1i wei55 | |||
| we2i wei35 | |||
| we3i wei214 | |||
| we4i wei51 | |||
| we wu@ //wen weng | |||
| wo wo | |||
| wu wu | |||
| w _^_EN | |||
| wa (L02 wA // wa | |||
| wa (nL02 wA // wan | |||
| wa (ngL02 wA // wang | |||
| wai (L02 wai | |||
| wei (L02 wei | |||
| we (nL02 wu@ // wen | |||
| we (ngL02 wu@ // weng | |||
| wo (L02 wo | |||
| wu (L02 wu | |||
| .group x | |||
| ?1 x _^_EN | |||
| x S; | |||
| @) x (K _^_EN | |||
| x _^_EN | |||
| x (L03L02 S; | |||
| .group y | |||
| ?1 y _^_EN | |||
| y j //before a o e i | |||
| y (u //NULL before u | |||
| y (uK ; | |||
| y (uDn ; | |||
| yo (DngK yu | |||
| y (K i // foreign words | |||
| @) y (K _^_EN | |||
| y _^_EN | |||
| y (L03L02 j | |||
| .group z | |||
| ?1 z _^_EN | |||
| z ts | |||
| zh (+ ts. | |||
| @) z (K _^_EN | |||
| z _^_EN | |||
| z (L03L02 ts | |||
| zh (L03L02 ts. | |||
| //tone number | |||
| // tone number | |||
| .group | |||
| 1 55 | |||
| 2 35 | |||
| @@ -1,6 +1,7 @@ | |||
| //From Unihan database ftp://ftp.unicode.org/Public/UNIDATA/Unihan.zip kMandarin entries (except the ones that have kHanyuPinlu, which are in zh_list) | |||
| //with compounds from CC-CEDICT http://www.mdbg.net/chindict/chindict.php?page=cedict and some corrections | |||
| //21611 single characters plus 36500 compound exceptions (includes 320 added 'yi' and 10721 added 'bu' exceptions, and 9700 extra 2-syllable words for 3rd-tone sandhi blocking) | |||
| $textmode | |||
| 〇 ling2 | |||
| 㐀 qiu1 | |||
| 㐁 tian3 | |||
| @@ -10,8 +10,6 @@ pitch 80 118 | |||
| dict_min 100000 | |||
| dictrules 1 // interpret latin characters as English text | |||
| //for some dialects | |||
| //[en]: replace ng with n | |||
| @@ -1590,14 +1590,14 @@ Translator *SelectTranslator(const char *name) | |||
| tr->langopts.our_alphabet = 0x3100; | |||
| tr->langopts.word_gap = 0x21; // length of a final vowel is less dependent on the next consonant, don't merge consonant with next word | |||
| tr->langopts.textmode = true; | |||
| tr->langopts.listx = 1; // compile *_listx after *_list | |||
| if (name2 == L3('y', 'u', 'e')) { | |||
| tr->langopts.listx = 1; // compile zh_listx after zh_list | |||
| tr->langopts.numbers = NUM_DEFAULT; | |||
| tr->langopts.numbers2 = NUM2_ZERO_TENS; | |||
| tr->langopts.break_numbers = BREAK_INDIVIDUAL; | |||
| } | |||
| break; | |||
| } | |||
| break; | |||
| default: | |||
| tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words | |||
| break; | |||
| @@ -164,6 +164,10 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
| wtab->flags &= ~FLAG_FIRST_UPPER; | |||
| } | |||
| // dictionary_skipwords is a global variable and TranslateWord3 will reset it to 0 at the beginning. | |||
| // However, dictionary_skipwords value is still needed outside this scope. | |||
| // So we backup and restore it at the end of this scope. | |||
| int skipwords = dictionary_skipwords; | |||
| TranslateWord3(tr, word_out, wtab, NULL, &any_stressed_words, current_alphabet, word_phonemes, sizeof(word_phonemes)); | |||
| int n; | |||
| @@ -182,6 +186,7 @@ int TranslateWord(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_o | |||
| while (!isspace(*word_out)) ++word_out; | |||
| while (isspace(*word_out)) ++word_out; | |||
| } | |||
| dictionary_skipwords = skipwords; | |||
| } | |||
| // If the list file contains a text replacement to another | |||
| @@ -101,5 +101,7 @@ test_phon fa "j'ek j'ek" "11" | |||
| # Mandarin and Cantonese Chinese with different Latin character fallback | |||
| test_phon yue "n'ei5_| h'ou2_| m'aa5_| (en)h@1l'oU1_| D'e@1(yue)_|" "你好馬 hello there" | |||
| test_phon yue-latn-jyutping "n'ei5_| h'ou2_| m'aa5_| n'ei5_| h'ou2_| m'aa5_|" "你好馬 nei5 hou2 maa5" | |||
| test_phon cmn "n'i3_| ha11'o3_| m'a3_| (en)h@11loU11_| De@11(cmn)_|" "你好馬 hello there" | |||
| test_phon cmn-latn-pinyin "n'i3_| ha11'o3_| m'a3_| n'i35_| X'Au35_| m'A214_|" "你好馬 ni3 hao3 ma3" | |||
| test_phon cmn "ni35X'Au35_| m'A21_| (en)h@44loU11_| De@11(cmn)_|" "你好馬 hello there" | |||
| test_phon cmn-latn-pinyin "ni35X'Au35_| m'A35_| n'i35_| X'Au35_| m'A214_|" "你好馬 ni3 hao3 ma3" | |||
| test_phon cmn "thiE55nt'i51_|" "天地" | |||