| @@ -5,7 +5,7 @@ LOCAL_CFLAGS = -std=c11 | |||
| # ucd-tools wide-character compatibility support: | |||
| UCDTOOLS_SRC_PATH := ../../ucd-tools/src | |||
| UCDTOOLS_SRC_PATH := ../../src/ucd-tools/src | |||
| UCDTOOLS_SRC_FILES := \ | |||
| $(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) | |||
| @@ -218,6 +218,7 @@ afganistan afg'anistan | |||
| alaska al'aska | |||
| albanië alb'A:ne@:@- | |||
| algerië alx2'e@re@:@- | |||
| alhambra al'ambra | |||
| amanzimtoti $4 | |||
| antwerpen antv&rp@n | |||
| avignon _^_FR | |||
| @@ -333,6 +334,7 @@ oberammergau o@b@r'am@rgaU | |||
| oklahoma @Ukl@h'@Uma | |||
| outeniekwa @Ut@n'ikwa | |||
| oxford _^_EN | |||
| paardeneiland pA:rd@n_'eIlant | |||
| palermo pal'&rmu | |||
| pelindaba p&l@nd'A:ba | |||
| perú p@ru | |||
| @@ -342,6 +344,7 @@ portugal pOrtyx2al | |||
| potchefstroom pOtSIfstr'o@m | |||
| rhône _^_FR | |||
| richardsbaai ritS@dsb'AI | |||
| riversdal r@v@rsdal | |||
| riviersonderend r@fi:rsOn@r_'Ent | |||
| robertson _^_EN | |||
| salvador salvadO:r | |||
| @@ -585,6 +588,7 @@ james _^_EN | |||
| jane _^_EN | |||
| janine dZ@ni:n | |||
| Jean ZA~n $capital | |||
| jeff _^_EN | |||
| jesebel je@s@b&l | |||
| jessica _^_EN | |||
| jimmy _^_EN | |||
| @@ -769,9 +773,11 @@ stockenström stOk@nstro@m | |||
| stuart _^_EN | |||
| suzanne suz'A:n | |||
| suzette suz'Et | |||
| sylvia _^_EN | |||
| tania tanja | |||
| telemann te@l@man | |||
| terblanche t@rblA:nS | |||
| thelma _^_EN | |||
| theo tiu | |||
| theron tr'On | |||
| thessalonicense tEsalo@nis'E:ns@ | |||
| @@ -816,6 +822,7 @@ vivaldi viv'aldi | |||
| viviers v@v@je@ | |||
| wilhelm v@lh&l@-m | |||
| williston _^_EN | |||
| woltemade vOlt@mA:d@ | |||
| khumalo kum'A:lu | |||
| zuma zu:ma | |||
| @@ -825,6 +832,7 @@ alibama $3 | |||
| antares $2 | |||
| beatles _^_EN | |||
| boeing _^_EN | |||
| cadillac _^_EN | |||
| checkers tSEk@rs | |||
| chevrolet _^_FR | |||
| chrysler kraIsl@r | |||
| @@ -1044,7 +1052,8 @@ fides _^_LA | |||
| forma _^_LA | |||
| grata _^_LA | |||
| habitatio _^_LA | |||
| inclusio _^_LA | |||
| inclusio _^_LA | |||
| (in camera) @n||kam@ra | |||
| (in debiti) _^_LA | |||
| (in absentia) _^_LA | |||
| (in extremis) _^_LA | |||
| @@ -1075,6 +1084,7 @@ vivos _^_LA | |||
| // main word list | |||
| aangaande $2 | |||
| aanmerklik $2 | |||
| aanstaande $2 | |||
| (a cappella) a||kap'&la | |||
| adagio ad'A:dZi;%@U | |||
| @@ -1091,6 +1101,7 @@ algaande $2 | |||
| alge alx2@ | |||
| algehele alx2@h,e@l@ | |||
| allegro al'Egru | |||
| allengs alENs | |||
| allergene $3 | |||
| almiskie $3 | |||
| alom al_'Om | |||
| @@ -1116,11 +1127,13 @@ aversie av'&rsi | |||
| babelaas bab@lA:s | |||
| barrikade $3 | |||
| beaming b@_A:m@N | |||
| bedewete be@d@ve@t@ | |||
| beide beId@ | |||
| bekaf b&kaf | |||
| belangriker b@laNr@k,Ir | |||
| bene be@n@ | |||
| beringde b@rINd@ | |||
| beswil bEsv@l | |||
| besnedene b@sne@d@n@ | |||
| bestes bEst@s | |||
| @@ -1130,6 +1143,7 @@ bewe be@v@ | |||
| bewebeen be@v@be@n | |||
| bewend be@v@nt | |||
| bewering b@ve@rIN | |||
| bilharzia b@lharsia | |||
| biopsie bi'Opsi | |||
| bomaat bo@mA:t | |||
| bordegoed bO:rd@x2ut | |||
| @@ -1155,6 +1169,7 @@ cedille s@dIl@ | |||
| charisma kar'Isma | |||
| cinsaut s@nso@ | |||
| cliché kliS'eI: | |||
| clientèle _^_FR | |||
| clivia klIvija | |||
| cognac kOn^ak | |||
| confetti $2 | |||
| @@ -1173,6 +1188,7 @@ dawidster dA:v@tst&r | |||
| déjà _^_FR | |||
| dekade dEk'A:d@ | |||
| dekreling dEkre@l@N | |||
| demensie d@me~nsi | |||
| deurentyd dy@r@nteIt | |||
| deurgaans $1 | |||
| deurkruis $2 | |||
| @@ -1219,12 +1235,14 @@ ewentwil e@v@ntv@l | |||
| exodus Eks'o@dWs | |||
| factotum $2 | |||
| faktotum $2 | |||
| fakture $2 | |||
| fetakaas fEtakA:s | |||
| figuur f@x2yr | |||
| filippense f@l@pEns@ | |||
| finalis $3 | |||
| finaliste $3 | |||
| fluktuasie $3 | |||
| fort fOrt | |||
| forte fOrt@ | |||
| @@ -1269,6 +1287,7 @@ hekke h&k@ | |||
| helaas he@l'A:s | |||
| here he@r@ | |||
| herero hEr'E:ru | |||
| herontmoeting h&r_Ontmut@N | |||
| herrysenis h&r'eIs@n@s | |||
| hierso hi:rsO | |||
| hippie _^_EN | |||
| @@ -1294,6 +1313,7 @@ inkatha iNk'A:ta | |||
| inkluis $2 | |||
| innestel InnEst@l | |||
| insomnia @nsOmnija | |||
| inteling Inte@l@N | |||
| intens @nt'Ens | |||
| intensiteit $4 | |||
| ironieë irun'i:@ | |||
| @@ -1312,6 +1332,7 @@ kaviaar kavi'A:r | |||
| kennisvaardig $1 | |||
| kimono $2 | |||
| klaasvakie $2 | |||
| kliënteel $3 | |||
| klimeid klImeIt | |||
| knapsekêrel knaps@k&:r@l | |||
| kolossense kOl@s'e~ns@ | |||
| @@ -1325,11 +1346,13 @@ kopiereg kup'ir&x2 | |||
| korswel kOrsv@l | |||
| kotiljons kOt@lj'o~ns | |||
| kritiek krIt'ik | |||
| kruisteling krYyste@l@N | |||
| kulture $2 | |||
| kunsmatige kWnsm'A:t@x2@ | |||
| kwansuis $2 | |||
| kweekwal kwe@kval | |||
| landswye lantsveI@ | |||
| lasagne las'anj@ | |||
| legaat l@x2A:t | |||
| legate l@x2A:t@ | |||
| @@ -1532,6 +1555,7 @@ sonore sun'o@r@ | |||
| sonure sOn_yr@ | |||
| sover so@f&r | |||
| staccato $2 | |||
| steekhoudendheid $2 | |||
| sterwens st&rv@ns | |||
| stilswye $1 | |||
| strydros streIt_rOs | |||
| @@ -1555,6 +1579,7 @@ tevrede t@fre@d@ | |||
| thula tu:la | |||
| toegee tux2e@ | |||
| toereken ture@k@n | |||
| toleransie tOl@r'ansi | |||
| tornado $2 | |||
| totale tut'A:l@ | |||
| totsiens $2 | |||
| @@ -1566,6 +1591,7 @@ tsoenami tsun'A:mi | |||
| uitdaging YydA:x2@N | |||
| uiteraard Yyt@r_'A:rt | |||
| uitermate $3 | |||
| uitgeslotene Yytx2@slo@t@n@ | |||
| uniforme $3 | |||
| vaarwel fA:rv'&l | |||
| @@ -73,6 +73,7 @@ | |||
| @@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. | |||
| amarula %am%arul%a // fix stress and a sounds: amarula and compounds | |||
| a (ment %a // shorten a sound: perkament/testament and compounds | |||
| _) arendag (CAC %A:r@ntax2 // fix e sound: arendagtig/-e/-heid | |||
| arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes | |||
| argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e | |||
| a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | |||
| @@ -310,7 +311,7 @@ | |||
| attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché | |||
| attrib (u %atr@b // move default stress: attribuut/bute/attributêr | |||
| K) au @U // trauma/-ties/Aucamp/Paul/-a/-us | |||
| auer aU@r //Sauer etc. fixed au and e sounds in compounds. | |||
| auer aU@r //Sauer etc. fix au and e sounds in compounds. | |||
| augustus %Ox2WstWs | |||
| avokado %af%ukA:du // stress and o sounds | |||
| avokade %af%ukA:d@ // variant form of avokado | |||
| @@ -431,7 +432,8 @@ | |||
| bo (grond bo@ // fix 1st o sound: bogronds/-e | |||
| _) bo (kle bo@ // fix o sound: bokleed/boklere | |||
| bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie | |||
| bom (AA bOm_ // fix a sound, pause: bomaanval/-aanslag/-aard/-eenheid and compounds | |||
| bom (AA bOm_ // fix a sound, pause: bomaanval/-aard/-eenheid and compounds | |||
| bomaans (la bOm_A:ns // fix o sound: bomaanslag/-aanslae | |||
| bomaans bo@mA:ns // but fix O sound: bomaans/-e | |||
| bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend | |||
| boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid | |||
| @@ -526,6 +528,7 @@ | |||
| bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie | |||
| baken bA:k@n // fix e sound: afbakening and compounds | |||
| bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë | |||
| balalaika b%al%alaIk%a // fix a sounds and stress: balalaika/-s/-musiek | |||
| _) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... | |||
| ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit | |||
| band (Alier b,and // fix stress and d sound: band(e/o)lier/-e | |||
| @@ -547,6 +550,7 @@ | |||
| ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. | |||
| basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar | |||
| basi (s bA:s@ // basis and compounds | |||
| basilie b%asil%i // fix a sound and stress: basilie/-kruid, etc. | |||
| bastille b%asti:l // Bastille and compounds like Bastilledag | |||
| batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds | |||
| ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk | |||
| @@ -646,6 +650,8 @@ | |||
| bere (_ be@r@ //tediebere pandabere etc. | |||
| _) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom | |||
| beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s | |||
| beton (C@ b@tOn // split ng sounds: betongebou/-gietsel/-gruis | |||
| beton (inC@ b@t'On // fix O sound: betoninrigting/-ingenieur | |||
| be (weging b@ // draaibeweging/swaaibeweging | |||
| bewende be@v@nd@ // first e pronounced long | |||
| bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid | |||
| @@ -749,6 +755,7 @@ | |||
| dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant | |||
| diaken d%iA:k@n // diaken and compounds | |||
| diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë | |||
| diende dind@ // fix last e sound in many compounds of bediende: bediendekamer | |||
| diens (willi d%ins // move default stress: dienswillig/-e/-heid | |||
| dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e | |||
| digi (ta d%ix2%i // move default stress: digitaal/digitale | |||
| @@ -983,6 +990,7 @@ | |||
| @C) e (reekK @ // fix e sound: duine-/rotsereeks | |||
| aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- | |||
| lat) eres @r@s //lateres | |||
| @C) erm (K &r@-m // a catch-all for words ending in -erm(s) | |||
| fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. | |||
| bloup) ers &:rs // fix e sound: bloupers | |||
| iew) ers (ter @rs // fix e sound: (l)iewerster | |||
| @@ -1074,6 +1082,7 @@ | |||
| p) e (talje @ // fix stress and 1st e sound: petalje and compounds | |||
| l) e (moen @ //fix lemoen and compounds | |||
| Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. | |||
| effe (kleur Ef@ // fix stress and 2nd e sound: effekleur(ig/-e) | |||
| eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep | |||
| @C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds | |||
| @) ei (sen _'eI // (on)veeleisend/spoedeisend | |||
| @@ -1589,6 +1598,7 @@ | |||
| flu (we fl%y // move default stress: fluweel/fluwele and compounds | |||
| _) fok (o fOk_ // fix o sound, insert short break: fokop/fokof | |||
| folio fo@li_u // fix o sounds: folio and compounds | |||
| fondsw fOntsv // fix v sound: fondswerwing & compounds/-waardes | |||
| fondue f%Ondy // fix stress and ue sound: fondue and compounds | |||
| fone (tiek f%o@n@ // fix e sound: fonetiek and compounds | |||
| fone (tie f%une@ // fix stress and o sound: foneties/-e | |||
| @@ -1602,6 +1612,7 @@ | |||
| formi (da f%Orm%i // move default stress: formidabel/-e | |||
| formu (lier f%Orm%y // move default stress: formulier/-e/-boek | |||
| _) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C | |||
| forse (nd fOrs@ // fix e sound: forsend/-e | |||
| fos (fa f%Os // move default stress: fosfaat/fosfate | |||
| fo (ssiel f%O // move default stress: fossiel/-e and compounds | |||
| foto fo@tu | |||
| @@ -1694,6 +1705,7 @@ | |||
| _) gra (na x2r@ // granaat(boom)/granate | |||
| grandi (o x2r%and%i // move default stress: grandioos/-ose | |||
| _) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure | |||
| grenadella gr@n@d&la // fix stress and vowels: grenadella and compounds like -heining | |||
| griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. | |||
| ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s | |||
| _) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs | |||
| @@ -1726,6 +1738,7 @@ | |||
| gegesel x2@x2e@s@l // fix middle e sound | |||
| K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele | |||
| _) geler x2e@l@r // stress and 1st e sound: geler/gelerig | |||
| _) gell x2&l | |||
| gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e | |||
| gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. | |||
| gene (_ x2e@n@ // gene/diegene | |||
| @@ -1741,7 +1754,7 @@ | |||
| _) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. | |||
| ni) ge (ri x2'e@ //nigeriese etc. | |||
| gese (_ x2'e@s@ //Portugese, and others | |||
| gesp (etjie x2Esp // fix e sounds: gespetjie/-s | |||
| gespe (_ x2Esp@ // gespe and compounds | |||
| gespes (_ x2Esp@s // gespes and compounds | |||
| gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid | |||
| @@ -1847,6 +1860,7 @@ | |||
| hart (stogte_ h%art // move default stress: hartstogtelik | |||
| hart (stogte_N hart // restore default stress: hartstogte | |||
| ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling | |||
| _) hef (a hEf_ // fix e sound, insert break: hefapparaat/-arm(s) | |||
| hia (sint h%ij%a // fix stress: hiasint/-e and compounds | |||
| _) hi (bis h%i // move default stress: hibiskus/-se and compounds | |||
| hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e | |||
| @@ -1984,6 +1998,7 @@ | |||
| _) idi (o %id%i // idioom/idiome/idioot | |||
| _) id (A %id // idille/idillies/ideëryk | |||
| &l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. | |||
| &l) iker (_ @k@r // fix i sound: afstootliker/(ge)redeliker/onberispeliker | |||
| illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati | |||
| illu (si %ily // illusie/-s/illusief | |||
| illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie | |||
| @@ -2114,6 +2129,7 @@ | |||
| _) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | |||
| _) inbe (lC Inb%& // fix e sound in compounds like inbelprogram | |||
| indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese | |||
| s) in (gestel @n // split n g: compounds with ingesteldheid/winsingestelde | |||
| _) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives | |||
| invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der | |||
| investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende | |||
| @@ -2324,10 +2340,11 @@ | |||
| krieketw krik@tv //krieket followed by w in compounds always v | |||
| krokodi (l kr%Ok@dI // (wyfie)krokodil/-le | |||
| kro (niek kr%u // fix stress and o sound: kroniek and many compounds | |||
| kruger kr'Y@@r | |||
| ku (ba@ k%y // move default stress: kubaan/kubane | |||
| ku (biek k%y // move default stress: kubiek/-e/-getal | |||
| _) kuber kyb@r // fixed e sound: kuberruim(te) | |||
| _) kuber kyb@r // fix e sound: kuberruim(te) | |||
| kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e | |||
| @) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar | |||
| kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda | |||
| @@ -2364,6 +2381,7 @@ | |||
| kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds | |||
| kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds | |||
| ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder | |||
| kakao k%akA:w // fix stress and vowel sounds: kakao and many compounds | |||
| kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie | |||
| kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn | |||
| _) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds | |||
| @@ -2544,6 +2562,7 @@ | |||
| kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives | |||
| kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel | |||
| kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant | |||
| kop (agtig k%Op_ // fix o sound, insert break: hamer-/spinnekop-/penkopagtig(e(s)) | |||
| kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives | |||
| _) kor (dA k%Or // kordaat/kordon | |||
| ko (rint k%u // fix stress and o sound: korint/-e and compounds | |||
| @@ -2656,6 +2675,7 @@ | |||
| loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie | |||
| lo (ja l%u // fix stress and o sound: lojale/lojaliteit | |||
| lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. | |||
| loke (t l%ukE // fix stress and vowel sounds: many compounds with loket | |||
| lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde | |||
| _) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier | |||
| _) losge (@P5 l'Osx2@ | |||
| @@ -2805,6 +2825,7 @@ | |||
| medisyne m@d@seIn@ //medisyne and compounds | |||
| meganies m@x2'A:nis | |||
| _) meege (@P5 m'e@x2@ | |||
| _) meegewe (nd me@x2e@v@ // fix e sounds and stress: meegewend(e) | |||
| me (juf m@ // move default stress and shorten e sound | |||
| me (laats m@ // fix stress and e sound: melaats/-e/-heid | |||
| melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | |||
| @@ -2946,6 +2967,7 @@ | |||
| morf (otomie m%Orf // move default stress: morfotomie | |||
| _) morr (i mOr // restore default stress: morrig/morrie/-doring | |||
| _) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig | |||
| mos (agtig mOs_ // fix o sound, insert break: (kos)mosagtig(e) | |||
| mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se | |||
| mosa (ïek m%o@s%a // move default stress: mosaïek and compounds | |||
| mo (skee m%O // move default stress: moskee/-s and compounds | |||
| @@ -3035,6 +3057,7 @@ | |||
| ne (anderCa n%i // move default stress: Neander(d/t)al/-ler | |||
| neger (in n%e@x2@r // move default stress: negerin/-ne | |||
| ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. | |||
| nek (_ n&k // catch-all for words ending in -nek: koedoe-/swaannek | |||
| nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai | |||
| nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. | |||
| neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum | |||
| @@ -3080,6 +3103,7 @@ | |||
| nood (lotti n%o@t // move default stress: noodlottig/-e | |||
| nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak | |||
| nood (saak_N no@t // restore default stress: (ge)noodsaak | |||
| nooien (tjie noIN // remove the e sound: nooientjie(s) and compounds | |||
| noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e | |||
| nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale | |||
| nostal (gie_N n%Ost%al // stress on last slb.: nostalgie | |||
| @@ -3417,6 +3441,7 @@ | |||
| ooi oI | |||
| ooy oI | |||
| oodjie oIci | |||
| CC) ool (A o@l_ // insert break: skooluur/-ure, steenkooluitvoer, but not: Karoolug | |||
| ootjie oIci | |||
| oontjie oINki | |||
| oondjie oINki | |||
| @@ -3483,6 +3508,7 @@ | |||
| oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | |||
| .group op | |||
| _) opaal %o@pA:l // fix o sound, remove break: opaal and compounds | |||
| opaat up'A:t // homeopaat/psigopaat and similar | |||
| opatie upat'i // homeopatie/neuropatie and similar | |||
| opaties up'A:tis // psigopaties/osteopaties and similar | |||
| @@ -3578,6 +3604,7 @@ | |||
| pol (vy p%Ol // move default stress: polvy/-e and compounds | |||
| pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) | |||
| _) pon (dok p%On // move default stress: pondok/-ke/-kie | |||
| pop (agtig p%Op_ // fix o sound, insert break: popagtig(e) and compounds | |||
| popu ,pOpy | |||
| _) por (C %pOr //portret portaal etc. | |||
| por (ie p%o@r // move default stress: porie/-ë | |||
| @@ -4118,6 +4145,7 @@ | |||
| sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e | |||
| skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds | |||
| skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum | |||
| skim (agtig sk@m_ // fix i sound, insert break: skimagtig(e) | |||
| skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene | |||
| skle (rose skl@ // fix stress and e sound: sklerose and compounds | |||
| _) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... | |||
| @@ -4252,6 +4280,7 @@ | |||
| _) su (meri s%u // fix stress and u sound: sumeries/-e | |||
| su (mmier s%W // move default stress: sumier/-e | |||
| super (A s''yp@r_ // fix stress, break in compounds like superintelligent | |||
| superi (A s%up%e@r%i // fix stress and vowel sounds: superieur/superioriteit | |||
| superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds | |||
| surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds | |||
| su (saC s%u // fix stress and u sound: susan/-na/susara | |||
| @@ -4561,7 +4590,7 @@ | |||
| toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar | |||
| toer (n t%ur // move default stress: compounds of toernooi | |||
| toere (_ tu:r@ // restore default stress | |||
| toi (let t%OI // move default stress: toilet and compounds | |||
| toilet t%OIlEt // move default stress: toilet and compounds: toiletartikel/-emmer/-opsigter | |||
| tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds | |||
| tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. | |||
| tombola t%Ombo@l%a // move default stress: tombola and compounds | |||
| @@ -4574,6 +4603,7 @@ | |||
| @) toris (_ t'o@r@s // fix stress: pectoris/klitoris | |||
| _) tor (nyn t%Or // move default stress: tornyn/-e and compounds | |||
| _) tos (ka t%Os // move default stress: Toskaanse/Toskane | |||
| tser (tjie ts@r // fix e sound: (skoen)poetsertjie/weerkaatsertjie | |||
| ttel t@l // many compounds of bottel/skottelgoed | |||
| tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling | |||
| tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie | |||
| @@ -4875,6 +4905,7 @@ | |||
| _) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid | |||
| vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid | |||
| vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer | |||
| va (sal v%a // fix stress and v and a sounds: vasal/-le | |||
| vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. | |||
| vasste (l fast& // fix e sound: vasstel(ling/-lende) | |||
| _) vat (A@ fat_ // fix a sound: vatafstand/-orgaan | |||
| @@ -4943,9 +4974,11 @@ | |||
| ve (l f& // maagvel, stress on 1st slb | |||
| @) vel (A fe@l // aanbeveling/aanbevelingsbrief | |||
| vel (djie f&l // fix -djie sound: veldjie(s) and compounds | |||
| veld (C f< // fix d sound: veldreuk/-radio/-rantsoen, etc. | |||
| veld (eks f<_ // fix d sound, pause: veldekskursie/-ekspedisie/-s | |||
| veld (o f<_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. | |||
| veld (t f&l // eliminate double t sound: veldtog and many compounds, Langeveldt, Springveldt | |||
| vele f'e@l@ | |||
| ven (detta v%En // fix stress and v sound: vendatta and compounds | |||
| ven (dusie f@n // fix stress and e sound: vendusie and compounds | |||
| @@ -5048,7 +5081,7 @@ | |||
| vol (kome f%Ol | |||
| vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O | |||
| volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... | |||
| voll (engte fOlE | |||
| volle (ngte fOlE | |||
| vo (ller fO // exception to: vo (lC f%O | |||
| volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. | |||
| vol (hou_ fOl // exception to: vo (lC f%O | |||
| @@ -2228,7 +2228,7 @@ idevice $alt6 | |||
| idly aIdlI | |||
| idiocy IdI@si | |||
| ifrog $alt6 | |||
| ignoramus Igno@r'eIm@s | |||
| ignoramus IgnO@r'eIm@s | |||
| illiterate $alt2 | |||
| illumine $alt2 | |||
| imagery ImIdZri | |||
| @@ -2716,6 +2716,7 @@ nonsense n0ns@ns | |||
| nonetheless nVnD@l'Es | |||
| nosedive noUzdaIv | |||
| nosir noUs3: | |||
| ?5 nosir noUsIR | |||
| not noUt // for noted, notable, etc | |||
| nots n0ts | |||
| (nôtre dame) noUtr@'dA:m | |||
| @@ -3420,7 +3421,7 @@ sincerest sIns'i@r@st | |||
| sinus saIn@s | |||
| siphon $alt2 | |||
| sir s,3: $only | |||
| ?5 sir s,VR $only | |||
| ?5 sir s,IR $only | |||
| siren saIr@n | |||
| site saIt // for sited | |||
| ski ski: | |||
| @@ -3455,12 +3456,13 @@ sommelier s0m'Eli@ | |||
| son sVn | |||
| sonar soUnA@ | |||
| sonny sVnI | |||
| sooth su:T $only | |||
| sopapilla soUp@p'i:@ | |||
| sope soUpeI | |||
| sorbet sO@beI | |||
| souffle su:fl'eI | |||
| soundbite saUndbaIt | |||
| souvenir su:v@n'i@3 | |||
| sopapilla soUp@p'i:@ | |||
| sorbet sO@beI | |||
| soyabean sOI@bi:n | |||
| specific sp@sIfIk | |||
| specimen spEsI2m@n | |||
| @@ -3568,7 +3570,6 @@ tallyho talI'hoU | |||
| tamale ta#mA:li | |||
| tampon tamp0n | |||
| tangerine tandZ@r'i:n | |||
| taoiseach ti:S@x | |||
| tapestry tapI#stri | |||
| tarantula t@rantS@l@ | |||
| tardis $alt1 | |||
| @@ -3913,6 +3914,7 @@ yer j3 $u+ | |||
| ye ji: $u+ | |||
| yea jeI | |||
| yessir jEss3: | |||
| ?5 yessir jEssIR | |||
| yoghurt j0g3t | |||
| ?3 yoghurt joUg3t | |||
| ?3 yogurt joUg3t | |||
| @@ -4546,8 +4548,6 @@ Annise a#ni:s | |||
| ?!3 Anthony ant@ni | |||
| Anton ant0n | |||
| Anya anj@ | |||
| Aoife i:f@ | |||
| Aoiffe i:f@ | |||
| Aphrodite afr@d'aIti | |||
| Archibald A@tSIbO:ld | |||
| Archie A@tSi | |||
| @@ -4898,7 +4898,6 @@ Sabine sa#b'i:n | |||
| Salman sa#lmA:n | |||
| Samantha sa#manT@ | |||
| (Santa claus) s'ant@||kl'O:z | |||
| Saoirse si@S@ | |||
| Sarah se@r@ | |||
| Sarisa $alt3 | |||
| Seamus SeIm@s | |||
| @@ -4912,8 +4911,8 @@ Sheila Si:l@ | |||
| Simon saIm@n | |||
| Sinead SI2neId | |||
| Sinéad SI2neId | |||
| Siobhan S@vO:n | |||
| Siobhán S@vO:n | |||
| Siobhan SI2vO:n | |||
| Siobhán SI2vO:n | |||
| Sonia s0nj@ | |||
| Sophia soUf'i@ | |||
| Sophie soUfi | |||
| @@ -150,12 +150,16 @@ | |||
| sw) a (m_ a | |||
| sw) a (nk a | |||
| ao eI0 | |||
| mh) ao eI // Irish, e.g. 'mhaol' /weIl/ | |||
| m) ao aU | |||
| p) ao aU | |||
| t) ao aU | |||
| ao (_ =aU | |||
| aois (_ i:S | |||
| aoise (_ i:S@ | |||
| ao (ism aU | |||
| ao (ist aU | |||
| aoi (C i: // Irish, e.g. 'Aoife' /i:f@/ | |||
| aoir (C e@ // Irish, e.g. 'Saoirse' /se@S@/ | |||
| ?3 aoir (C 3: // Irish, e.g. 'Saoirse' /s3:S@/ | |||
| g) ao (l eI@ | |||
| aor eI'o@ | |||
| m) ao (ri aU | |||
| @@ -180,7 +184,8 @@ | |||
| _n) a (tional a | |||
| @) a (tious 'eI | |||
| ell) a (trix @ | |||
| a (triC 'eI | |||
| a (trix 'eI | |||
| a (trice 'eI | |||
| n) a (tur eI | |||
| n) a (tura a | |||
| &) a (ture_ @ | |||
| @@ -512,7 +517,7 @@ | |||
| _m) ag (ell a#dZ | |||
| Cp) age (_ eIdZ | |||
| pp) age (_ I2dZ | |||
| _ant) ag 'ag | |||
| _ant) ag (on 'ag | |||
| enr) ag (e_ 'eIdZ | |||
| outr) ag (e_ eIdZ | |||
| der) ag (e_ eIdZ | |||
| @@ -1463,7 +1468,7 @@ _it_separ) ate (_ @t | |||
| may) be (_ bi: | |||
| _) be (CA bI# | |||
| _) bete (lg bi:t@ | |||
| _) be (C% bE | |||
| _) be (C%+ bE | |||
| _) be (atiC b%i: | |||
| _) be (b bi: | |||
| _) be (cl bI# | |||
| @@ -1477,7 +1482,7 @@ _it_separ) ate (_ @t | |||
| _) be (kn bI# | |||
| _) belarus bEl@r'u:s | |||
| _) be (lC bE | |||
| _) be (llig bI# | |||
| _) be (llig+ bI# | |||
| _) be (re bE | |||
| _) be (sC bI# | |||
| _) be (stia bE | |||
| @@ -2150,6 +2155,8 @@ _it_separ) ate (_ @t | |||
| e (Cical 'E | |||
| e (CiuB i: | |||
| &) e (_ | |||
| aoiC) e (_ @ // Irish, e.g. 'Aoife' /i:fe/ | |||
| aoiCC) e (_ @ // Irish, e.g. 'Saoirse' /se@S@/ | |||
| acB) e (_ %I | |||
| XC) e (_N i: | |||
| vert) e (b I | |||
| @@ -2378,6 +2385,7 @@ _it_separ) ate (_ @t | |||
| y) ed (_S2v d# | |||
| debut) ed (_S2 d# | |||
| edly (_S4m I#dl%i | |||
| eg) edly (_S3m I#dl%i | |||
| c) ed (e_ 'i:d | |||
| p) ed (e_ i:d | |||
| p) edal Ed@L | |||
| @@ -2567,6 +2575,8 @@ _it_separ) ate (_ @t | |||
| en (core 0n | |||
| &) ency (_ @ns%i | |||
| ency (cli %EnsI | |||
| _) en (dg@ %En | |||
| _) en (dp@ %En | |||
| k) en (d_ En | |||
| s) en (d_ En | |||
| t) en (d_ En | |||
| @@ -2771,7 +2781,7 @@ _it_separ) ate (_ @t | |||
| exp) eri (en i@rI2 | |||
| XC) er 3: | |||
| th) er (@ 3: | |||
| h) er (@ %3 | |||
| h) er (nan %3 | |||
| X) er (A E#r | |||
| _h) eretical I#rEtIk@L | |||
| _qu) er 3: | |||
| @@ -2890,6 +2900,7 @@ _it_separ) ate (_ @t | |||
| &z) es (_S2 %I#z | |||
| &C) es (_S1i z | |||
| xus) es (_S2 %I#z | |||
| tamus) es (_S2 %I#z // hippopotamuses | |||
| es (carp I2s | |||
| es (cape %Es | |||
| es (capi %Es | |||
| @@ -3099,7 +3110,7 @@ _it_separ) ate (_ @t | |||
| &) ford (_S4 f3d | |||
| &f) ford (_S4 3d | |||
| for (see f%O@ | |||
| for (ward f'o@ // straightforward | |||
| for (ward f'O@ | |||
| ft (en f | |||
| &) ful (_S3i f@L | |||
| @@ -4288,6 +4299,8 @@ multip) ly laI | |||
| _) metall (ic m@tal | |||
| metabo m@t'ab0 | |||
| _) meta (llu m%Eta | |||
| mh (ao w // Irish, e.g. 'mhaol' /weIl/ | |||
| aoi) mh (e v // Irish, e.g. 'Caoimhe' /ki:v@/ | |||
| &) mobile m@bi:l | |||
| _) mocha moUk@ | |||
| mono (ga m@n'0 | |||
| @@ -4299,6 +4312,7 @@ multip) ly laI | |||
| &) mouth (_ m@T | |||
| &) mouth (_$w_alt1 maUT | |||
| _) multi mVlti | |||
| _) multi (pl m,VltI | |||
| _) multi (@@P5 m,VltI | |||
| .group mi | |||
| @@ -5847,6 +5861,10 @@ multip) ly laI | |||
| Co) s (Er z | |||
| Co) s (En z | |||
| e) s (d z | |||
| aoi) s (e S // Irish, e.g. 'Laoise' | |||
| aoi) seach S@x // Irish, e.g. 'Taoiseach' | |||
| aoi) sigh Si // Irish, e.g. 'Taoisigh' | |||
| aoir) s (e S // Irish, e.g. 'Saoirse' | |||
| _) se (clu sI# | |||
| secur sI#kjU@ | |||
| _) se (duc sI# | |||
| @@ -5878,6 +5896,7 @@ multip) ly laI | |||
| &) s (ic_ z | |||
| n) s (ic_ s | |||
| ss (ic s | |||
| mu) s (e z | |||
| mu) s (ic z | |||
| ea) s (ie z | |||
| ea) s (il z | |||
| @@ -6082,6 +6101,8 @@ multip) ly laI | |||
| th (ill th | |||
| gh) th (A th | |||
| ee) thing DI2N | |||
| soo) th D | |||
| soo) th (say T | |||
| the (_ D | |||
| &) th (L03_ =T | |||
| ou) thed (_ Dd | |||
| @@ -1,5 +1,5 @@ | |||
| // * Farsi Language fa (or Parsi or Persian) fa_list Version 3.133 | |||
| // * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 05-10-2017 | |||
| // * Farsi Language fa (or Parsi or Persian) fa_list Version 3.134 | |||
| // * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 06-24-2017 | |||
| //********* | |||
| // * This program is free software; you can redistribute it and/or modify * | |||
| // * it under the terms of the GNU General Public License as published by * | |||
| @@ -554,6 +554,7 @@ _) paRAntezbaste: | |||
| آموخت Amuxt | |||
| آموزد Amuzad | |||
| آموزش AmuzeS | |||
| آمپر AmpeR | |||
| آمپرمتر AmpeRmetR | |||
| آمپلیفایر AmpelifAjeR | |||
| آمپیریسم AmpiRism | |||
| @@ -5055,7 +5056,6 @@ _) paRAntezbaste: | |||
| درایه deRAje | |||
| درایو deRAjv | |||
| درایور deRAjveR | |||
| درباره daR'bAReje: | |||
| دربازکن daRbAzkon | |||
| دربدر daRbedaR | |||
| دربندکشیده daRbandkeSide | |||
| @@ -6809,6 +6809,7 @@ _) paRAntezbaste: | |||
| شدیدا Sadidan | |||
| شدیداللحن Sadidollahn | |||
| شدیم Sodim | |||
| شراادی SA:_d:jA:_R | |||
| شرافت SeRAfat | |||
| شراپنل SeRApnel | |||
| شراکت SeRAkat | |||
| @@ -6816,7 +6817,6 @@ _) paRAntezbaste: | |||
| شربت SaRbat | |||
| شرت 'SoRt | |||
| شرتکات SoRtkAt | |||
| شراادی SA:_d:jA:_R | |||
| شرشر SeRSeR | |||
| شرطه SoRte: | |||
| شرعا SaR?an | |||
| @@ -1,5 +1,5 @@ | |||
| // * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.133 | |||
| // * This file writen by Shadyar Khodayari 05-10-2017 | |||
| // * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.134 | |||
| // * This file writen by Shadyar Khodayari 06-24-2017 | |||
| //********* | |||
| // * This program is free software; you can redistribute it and/or modify * | |||
| // * it under the terms of the GNU General Public License as published by * | |||
| @@ -4843,7 +4843,7 @@ L09L04) السّادات (_Sm8 ossAdAt | |||
| L09L09L09L09) م (L03L09L09L09_ ma | |||
| // Prefixes م | |||
| _) م (L03L09L03_$noprefixP1@ ma | |||
| _) م (L03L04L03_$noprefixP1@ ma | |||
| _) می (L03L09+$noprefixP2@ mi | |||
| _) می (آL09L09$noprefixP2@ mi | |||
| _) می (وL09L09$noprefixP2@ mi | |||
| @@ -1,4 +1,5 @@ | |||
| .*.swp | |||
| *~ | |||
| # intermediate files: | |||
| @@ -7,11 +7,14 @@ These are eSpeak NG specific modifications to the `ucd-tools` project: | |||
| * `data/espeak-ng` data files for eSpeak NG extended data. | |||
| * espeak-ng PropList property lookup as part of the `ucd_property` API. | |||
| ## 9.0.0.1 - (In Progress) | |||
| ## 10.0.0 - 2017-06-25 | |||
| * Add `iswblank` and `iswxdigit` compatibility. | |||
| * Improve ctype compatibility. | |||
| * PropList property lookup. | |||
| * PropList and emoji-data property lookup. | |||
| * Support building with a C89 compiler. | |||
| * Update to Unicode Character Data 10.0.0. | |||
| * Unicode Emoji 5.0. | |||
| ## 9.0.0 - 2016-12-28 | |||
| @@ -55,34 +55,34 @@ EXTRA_DIST += ChangeLog | |||
| ############################# Unicode Data #################################### | |||
| EMOJI_VERSION=4.0 | |||
| EMOJI_VERSION=5.0 | |||
| UCD_VERSION=@UCD_VERSION@ | |||
| UCD_ROOTDIR=data/ucd | |||
| UCD_SRCDIR=http://www.unicode.org/Public | |||
| data/emoji/emoji-data.txt: | |||
| mkdir -pv data/emoji | |||
| curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@ | |||
| curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@ | |||
| data/ucd/PropList.txt: | |||
| mkdir -pv data/ucd | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@ | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@ | |||
| data/ucd/DerivedCoreProperties.txt: | |||
| mkdir -pv data/ucd | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@ | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@ | |||
| data/ucd/PropertyValueAliases.txt: | |||
| mkdir -pv data/ucd | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@ | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@ | |||
| data/ucd/Scripts.txt: | |||
| mkdir -pv data/ucd | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@ | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@ | |||
| data/ucd/UnicodeData.txt: | |||
| mkdir -pv data/ucd | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@ | |||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@ | |||
| ############################# documentation ################################### | |||
| @@ -1,5 +1,5 @@ | |||
| AC_PREREQ([2.65]) | |||
| AC_INIT([Unicode Character Database Tools], [9.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||
| AC_INIT([Unicode Character Database Tools], [10.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||
| AM_INIT_AUTOMAKE() | |||
| m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | |||
| @@ -24,6 +24,7 @@ dnl library checks. | |||
| dnl ================================================================ | |||
| AC_CHECK_HEADERS([stddef.h]) dnl C89 | |||
| AC_CHECK_FUNCS([iswblank]) dnl C99 | |||
| AC_TYPE_UINT8_T | |||
| AC_TYPE_UINT32_T | |||
| @@ -33,10 +34,10 @@ dnl UCD data configuration. | |||
| dnl ================================================================ | |||
| AC_ARG_WITH([unicode-version], | |||
| [AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=9.0.0@:>@])], | |||
| [AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=10.0.0@:>@])], | |||
| [AS_IF([test x"$withval" != x], | |||
| [UCD_VERSION="$withval"])], | |||
| [UCD_VERSION="9.0.0"]) | |||
| [UCD_VERSION="10.0.0"]) | |||
| AC_SUBST(UCD_VERSION) | |||
| @@ -18,14 +18,15 @@ | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| /* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| * the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| */ | |||
| #include "ucd/ucd.h" | |||
| #include <stddef.h> | |||
| // Unicode Character Data 9.0.0 | |||
| /* Unicode Character Data 9.0.0 */ | |||
| struct case_conversion_entry | |||
| { | |||
| @@ -69,16 +69,16 @@ int ucd_isblank(codepoint_t c) | |||
| switch (ucd_lookup_category(c)) | |||
| { | |||
| case UCD_CATEGORY_Zs: | |||
| switch (c) // Exclude characters with the <noBreak> DispositionType | |||
| switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||
| { | |||
| case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
| case 0x2007: // U+2007 : FIGURE SPACE | |||
| case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
| case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||
| case 0x2007: /* U+2007 : FIGURE SPACE */ | |||
| case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||
| return 0; | |||
| } | |||
| return 1; | |||
| case UCD_CATEGORY_Cc: | |||
| return c == 0x09; // U+0009 : CHARACTER TABULATION | |||
| return c == 0x09; /* U+0009 : CHARACTER TABULATION */ | |||
| default: | |||
| return 0; | |||
| } | |||
| @@ -91,7 +91,7 @@ int ucd_iscntrl(codepoint_t c) | |||
| int ucd_isdigit(codepoint_t c) | |||
| { | |||
| return (c >= 0x30 && c <= 0x39); // [0-9] | |||
| return (c >= 0x30 && c <= 0x39); /* [0-9] */ | |||
| } | |||
| int ucd_isgraph(codepoint_t c) | |||
| @@ -174,23 +174,23 @@ int ucd_isspace(codepoint_t c) | |||
| case UCD_CATEGORY_Zp: | |||
| return 1; | |||
| case UCD_CATEGORY_Zs: | |||
| switch (c) // Exclude characters with the <noBreak> DispositionType | |||
| switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||
| { | |||
| case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
| case 0x2007: // U+2007 : FIGURE SPACE | |||
| case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
| case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||
| case 0x2007: /* U+2007 : FIGURE SPACE */ | |||
| case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||
| return 0; | |||
| } | |||
| return 1; | |||
| case UCD_CATEGORY_Cc: | |||
| switch (c) // Include control characters marked as White_Space | |||
| switch (c) /* Include control characters marked as White_Space */ | |||
| { | |||
| case 0x09: // U+0009 : CHARACTER TABULATION | |||
| case 0x0A: // U+000A : LINE FEED | |||
| case 0x0B: // U+000B : LINE TABULATION | |||
| case 0x0C: // U+000C : FORM FEED | |||
| case 0x0D: // U+000D : CARRIAGE RETURN | |||
| case 0x85: // U+0085 : NEXT LINE | |||
| case 0x09: /* U+0009 : CHARACTER TABULATION */ | |||
| case 0x0A: /* U+000A : LINE FEED */ | |||
| case 0x0B: /* U+000B : LINE TABULATION */ | |||
| case 0x0C: /* U+000C : FORM FEED */ | |||
| case 0x0D: /* U+000D : CARRIAGE RETURN */ | |||
| case 0x85: /* U+0085 : NEXT LINE */ | |||
| return 1; | |||
| } | |||
| default: | |||
| @@ -217,7 +217,7 @@ int ucd_isupper(codepoint_t c) | |||
| int ucd_isxdigit(codepoint_t c) | |||
| { | |||
| return (c >= 0x30 && c <= 0x39) // [0-9] | |||
| || (c >= 0x41 && c <= 0x46) // [A-Z] | |||
| || (c >= 0x61 && c <= 0x66); // [a-z] | |||
| return (c >= 0x30 && c <= 0x39) /* [0-9] */ | |||
| || (c >= 0x41 && c <= 0x46) /* [A-Z] */ | |||
| || (c >= 0x61 && c <= 0x66); /* [a-z] */ | |||
| } | |||
| @@ -176,6 +176,7 @@ typedef enum ucd_script_ | |||
| UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | |||
| UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | |||
| UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | |||
| UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||
| UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | |||
| UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | |||
| UCD_SCRIPT_Grek, /**< @brief Greek Script */ | |||
| @@ -273,6 +274,7 @@ typedef enum ucd_script_ | |||
| UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | |||
| UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | |||
| UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | |||
| UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||
| UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | |||
| UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | |||
| UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | |||
| @@ -302,6 +304,7 @@ typedef enum ucd_script_ | |||
| UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | |||
| UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | |||
| UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | |||
| UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||
| UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | |||
| UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | |||
| UCD_SCRIPT_Zsym, /**< @brief Symbols */ | |||
| @@ -366,6 +369,8 @@ typedef uint64_t ucd_property; | |||
| #define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ | |||
| #define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ | |||
| #define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ | |||
| #define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */ | |||
| #define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */ | |||
| // eSpeak NG extended properties: | |||
| #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | |||
| @@ -679,6 +684,7 @@ namespace ucd | |||
| Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | |||
| Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | |||
| Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | |||
| Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||
| Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | |||
| Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | |||
| Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ | |||
| @@ -776,6 +782,7 @@ namespace ucd | |||
| Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | |||
| Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | |||
| Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | |||
| Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||
| Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | |||
| Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | |||
| Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | |||
| @@ -805,6 +812,7 @@ namespace ucd | |||
| Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | |||
| Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | |||
| Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | |||
| Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||
| Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | |||
| Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | |||
| Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ | |||
| @@ -876,6 +884,8 @@ namespace ucd | |||
| Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ | |||
| Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ | |||
| Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ | |||
| Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */ | |||
| Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */ | |||
| }; | |||
| /** @brief Return the properties of the specified codepoint. | |||
| @@ -78,9 +78,6 @@ static ucd_property properties_Cn(codepoint_t c) | |||
| case 0x2000: | |||
| if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; | |||
| break; | |||
| case 0x2300: | |||
| if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| break; | |||
| case 0x2400: | |||
| if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| @@ -90,7 +87,7 @@ static ucd_property properties_Cn(codepoint_t c) | |||
| if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| break; | |||
| case 0x2E00: | |||
| @@ -148,11 +145,11 @@ static ucd_property properties_Ll(codepoint_t c) | |||
| if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | |||
| break; | |||
| case 0x0300: | |||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x0400: | |||
| if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | |||
| @@ -166,12 +163,12 @@ static ucd_property properties_Ll(codepoint_t c) | |||
| if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | |||
| break; | |||
| case 0x2100: | |||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c == 0x2139) return UCD_PROPERTY_EMOJI; | |||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| break; | |||
| case 0xFF00: | |||
| @@ -179,45 +176,45 @@ static ucd_property properties_Ll(codepoint_t c) | |||
| break; | |||
| case 0x01D400: | |||
| if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x01D500: | |||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x01D600: | |||
| if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | |||
| break; | |||
| case 0x01D700: | |||
| if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| } | |||
| return 0; | |||
| @@ -332,7 +329,7 @@ static ucd_property properties_Lm(codepoint_t c) | |||
| break; | |||
| case 0x016F00: | |||
| if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; | |||
| if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER; | |||
| if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER; | |||
| break; | |||
| } | |||
| return 0; | |||
| @@ -407,19 +404,21 @@ static ucd_property properties_Lo_ideographic(codepoint_t c) | |||
| { | |||
| case 0x000000: | |||
| if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; | |||
| if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; | |||
| break; | |||
| case 0x010000: | |||
| if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; | |||
| if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; | |||
| if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC; | |||
| break; | |||
| case 0x020000: | |||
| if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
| if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; | |||
| break; | |||
| } | |||
| @@ -434,8 +433,8 @@ static ucd_property properties_Lu(codepoint_t c) | |||
| if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | |||
| break; | |||
| case 0x0300: | |||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0xFF00: | |||
| if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | |||
| @@ -443,49 +442,49 @@ static ucd_property properties_Lu(codepoint_t c) | |||
| case 0x2100: | |||
| if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x01D400: | |||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x01D500: | |||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x01D600: | |||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | |||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| case 0x01D700: | |||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
| break; | |||
| } | |||
| return 0; | |||
| @@ -613,6 +612,7 @@ static ucd_property properties_Mc(codepoint_t c) | |||
| if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| case 0x3000: | |||
| if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; | |||
| @@ -709,6 +709,12 @@ static ucd_property properties_Mc(codepoint_t c) | |||
| if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| break; | |||
| case 0x011A00: | |||
| if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| break; | |||
| case 0x011C00: | |||
| if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| @@ -818,6 +824,8 @@ static ucd_property properties_Mn(codepoint_t c) | |||
| if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| case 0x0B00: | |||
| if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| @@ -848,7 +856,8 @@ static ucd_property properties_Mn(codepoint_t c) | |||
| if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| break; | |||
| case 0x0D00: | |||
| if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| @@ -964,7 +973,7 @@ static ucd_property properties_Mn(codepoint_t c) | |||
| case 0x1D00: | |||
| if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| case 0x2000: | |||
| @@ -1111,6 +1120,16 @@ static ucd_property properties_Mn(codepoint_t c) | |||
| if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| case 0x011A00: | |||
| if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC; | |||
| if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011A98) return UCD_PROPERTY_EXTENDER; | |||
| if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| case 0x011C00: | |||
| if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| @@ -1120,6 +1139,16 @@ static ucd_property properties_Mn(codepoint_t c) | |||
| if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| break; | |||
| case 0x011D00: | |||
| if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC; | |||
| if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC; | |||
| if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
| break; | |||
| case 0x016A00: | |||
| if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| @@ -1165,7 +1194,7 @@ static ucd_property properties_Nd(codepoint_t c) | |||
| switch (c & 0xFFFFFF00) | |||
| { | |||
| case 0x0000: | |||
| if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI; | |||
| if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT; | |||
| break; | |||
| case 0xFF00: | |||
| if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; | |||
| @@ -1279,10 +1308,10 @@ static ucd_property properties_Pe(codepoint_t c) | |||
| break; | |||
| case 0x2700: | |||
| if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
| return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| case 0x2900: | |||
| return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
| return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
| case 0x2E00: | |||
| return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| case 0x3000: | |||
| @@ -1337,9 +1366,9 @@ static ucd_property properties_Po(codepoint_t c) | |||
| case 0x0000: | |||
| if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; | |||
| if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||
| if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||
| if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; | |||
| if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; | |||
| if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | |||
| @@ -1609,6 +1638,11 @@ static ucd_property properties_Po(codepoint_t c) | |||
| case 0x11700: | |||
| if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
| break; | |||
| case 0x11A00: | |||
| if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
| if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
| if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | |||
| break; | |||
| case 0x11C00: | |||
| if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
| if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | |||
| @@ -1664,7 +1698,7 @@ static ucd_property properties_Ps(codepoint_t c) | |||
| break; | |||
| case 0x2700: | |||
| if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
| return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| case 0x2900: | |||
| return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
| @@ -1746,7 +1780,7 @@ static ucd_property properties_Sk(codepoint_t c) | |||
| if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; | |||
| break; | |||
| case 0x01F300: | |||
| return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER; | |||
| return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT; | |||
| } | |||
| return 0; | |||
| } | |||
| @@ -1839,7 +1873,7 @@ static ucd_property properties_So(codepoint_t c) | |||
| if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | |||
| if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | |||
| if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| break; | |||
| case 0x2400: | |||
| if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
| @@ -1979,7 +2013,7 @@ static ucd_property properties_So(codepoint_t c) | |||
| if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; | |||
| if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT; | |||
| break; | |||
| case 0x01F200: | |||
| if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| @@ -2074,23 +2108,29 @@ static ucd_property properties_So(codepoint_t c) | |||
| if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; | |||
| if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; | |||
| if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| break; | |||
| case 0x01F900: | |||
| if (c <= 0x01F90B) return 0; | |||
| if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c == 0x01F93B) return 0; | |||
| if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c == 0x01F946) return 0; | |||
| if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
| if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
| return UCD_PROPERTY_EMOJI; | |||
| } | |||
| return 0; | |||
| @@ -2132,6 +2172,6 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category) | |||
| case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | |||
| case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; | |||
| case UCD_CATEGORY_Zs: return properties_Zs(c); | |||
| default: return 0; // Co Cs Ii Lt Me | |||
| default: return 0; /* Co Cs Ii Lt Me */ | |||
| }; | |||
| } | |||
| @@ -120,6 +120,7 @@ const char *ucd_get_script_string(ucd_script s) | |||
| "Geok", | |||
| "Geor", | |||
| "Glag", | |||
| "Gonm", | |||
| "Goth", | |||
| "Gran", | |||
| "Grek", | |||
| @@ -217,6 +218,7 @@ const char *ucd_get_script_string(ucd_script s) | |||
| "Sind", | |||
| "Sinh", | |||
| "Sora", | |||
| "Soyo", | |||
| "Sund", | |||
| "Sylo", | |||
| "Syrc", | |||
| @@ -246,6 +248,7 @@ const char *ucd_get_script_string(ucd_script s) | |||
| "Xpeo", | |||
| "Xsux", | |||
| "Yiii", | |||
| "Zanb", | |||
| "Zinh", | |||
| "Zmth", | |||
| "Zsym", | |||
| @@ -17,6 +17,7 @@ | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| #include "config.h" | |||
| #include "ucd/ucd.h" | |||
| #include <locale.h> | |||
| @@ -25,6 +26,13 @@ | |||
| #include <wchar.h> | |||
| #include <wctype.h> | |||
| #ifndef HAVE_ISWBLANK | |||
| static int iswblank(wint_t c) | |||
| { | |||
| return iswspace(c) && !(c >= 0x0A && c <= 0x0D); | |||
| } | |||
| #endif | |||
| void fput_utf8c(FILE *out, codepoint_t c) | |||
| { | |||
| if (c < 0x80) | |||
| @@ -86,7 +94,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
| { | |||
| switch (mode) | |||
| { | |||
| case 'c': // character | |||
| case 'c': /* character */ | |||
| switch (c) | |||
| { | |||
| case '\t': fputs("\\t", out); break; | |||
| @@ -95,10 +103,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
| default: fput_utf8c(out, c); break; | |||
| } | |||
| break; | |||
| case 'h': // hexadecimal (lower) | |||
| case 'h': /* hexadecimal (lower) */ | |||
| fprintf(out, "%06x", c); | |||
| break; | |||
| case 'H': // hexadecimal (upper) | |||
| case 'H': /* hexadecimal (upper) */ | |||
| fprintf(out, "%06X", c); | |||
| break; | |||
| } | |||
| @@ -108,40 +116,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode) | |||
| { | |||
| switch (mode) | |||
| { | |||
| case 'A': // alpha-numeric | |||
| case 'A': /* alpha-numeric */ | |||
| fputc(iswalnum(c) ? '1' : '0', out); | |||
| break; | |||
| case 'a': // alpha | |||
| case 'a': /* alpha */ | |||
| fputc(iswalpha(c) ? '1' : '0', out); | |||
| break; | |||
| case 'b': // blank | |||
| case 'b': /* blank */ | |||
| fputc(iswblank(c) ? '1' : '0', out); | |||
| break; | |||
| case 'c': // control | |||
| case 'c': /* control */ | |||
| fputc(iswcntrl(c) ? '1' : '0', out); | |||
| break; | |||
| case 'd': // numeric | |||
| case 'd': /* numeric */ | |||
| fputc(iswdigit(c) ? '1' : '0', out); | |||
| break; | |||
| case 'g': // glyph | |||
| case 'g': /* glyph */ | |||
| fputc(iswgraph(c) ? '1' : '0', out); | |||
| break; | |||
| case 'l': // lower case | |||
| case 'l': /* lower case */ | |||
| fputc(iswlower(c) ? '1' : '0', out); | |||
| break; | |||
| case 'P': // printable | |||
| case 'P': /* printable */ | |||
| fputc(iswprint(c) ? '1' : '0', out); | |||
| break; | |||
| case 'p': // punctuation | |||
| case 'p': /* punctuation */ | |||
| fputc(iswpunct(c) ? '1' : '0', out); | |||
| break; | |||
| case 's': // whitespace | |||
| case 's': /* whitespace */ | |||
| fputc(iswspace(c) ? '1' : '0', out); | |||
| break; | |||
| case 'u': // upper case | |||
| case 'u': /* upper case */ | |||
| fputc(iswupper(c) ? '1' : '0', out); | |||
| break; | |||
| case 'x': // xdigit | |||
| case 'x': /* xdigit */ | |||
| fputc(iswxdigit(c) ? '1' : '0', out); | |||
| break; | |||
| } | |||
| @@ -154,31 +162,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format) | |||
| case '%': | |||
| switch (*++format) | |||
| { | |||
| case 'c': // category | |||
| case 'c': /* category */ | |||
| fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | |||
| break; | |||
| case 'C': // category group | |||
| case 'C': /* category group */ | |||
| fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | |||
| break; | |||
| case 'p': // codepoint | |||
| case 'p': /* codepoint */ | |||
| uprintf_codepoint(out, c, *++format); | |||
| break; | |||
| case 'P': // properties | |||
| case 'P': /* properties */ | |||
| fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | |||
| break; | |||
| case 'i': // is* | |||
| case 'i': /* is* */ | |||
| uprintf_is(out, c, *++format); | |||
| break; | |||
| case 'L': // lowercase | |||
| case 'L': /* lowercase */ | |||
| uprintf_codepoint(out, towlower(c), *++format); | |||
| break; | |||
| case 's': // script | |||
| case 's': /* script */ | |||
| fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | |||
| break; | |||
| case 'T': // titlecase | |||
| case 'T': /* titlecase */ | |||
| uprintf_codepoint(out, ucd_totitle(c), *++format); | |||
| break; | |||
| case 'U': // uppercase | |||
| case 'U': /* uppercase */ | |||
| uprintf_codepoint(out, towupper(c), *++format); | |||
| break; | |||
| } | |||
| @@ -224,7 +232,8 @@ int main(int argc, char **argv) | |||
| { | |||
| FILE *in = NULL; | |||
| const char *format = NULL; | |||
| for (int argn = 1; argn != argc; ++argn) | |||
| int argn; | |||
| for (argn = 1; argn != argc; ++argn) | |||
| { | |||
| const char *arg = argv[argn]; | |||
| if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | |||
| @@ -250,7 +259,8 @@ int main(int argc, char **argv) | |||
| } | |||
| else | |||
| { | |||
| for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
| codepoint_t c; | |||
| for (c = 0; c <= 0x10FFFF; ++c) | |||
| uprintf(stdout, c, format ? format : | |||
| "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | |||
| } | |||
| @@ -83,7 +83,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
| { | |||
| switch (mode) | |||
| { | |||
| case 'c': // character | |||
| case 'c': /* character */ | |||
| switch (c) | |||
| { | |||
| case '\t': fputs("\\t", out); break; | |||
| @@ -92,10 +92,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
| default: fput_utf8c(out, c); break; | |||
| } | |||
| break; | |||
| case 'h': // hexadecimal (lower) | |||
| case 'h': /* hexadecimal (lower) */ | |||
| fprintf(out, "%06x", c); | |||
| break; | |||
| case 'H': // hexadecimal (upper) | |||
| case 'H': /* hexadecimal (upper) */ | |||
| fprintf(out, "%06X", c); | |||
| break; | |||
| } | |||
| @@ -105,40 +105,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode) | |||
| { | |||
| switch (mode) | |||
| { | |||
| case 'A': // alpha-numeric | |||
| case 'A': /* alpha-numeric */ | |||
| fputc(ucd_isalnum(c) ? '1' : '0', out); | |||
| break; | |||
| case 'a': // alpha | |||
| case 'a': /* alpha */ | |||
| fputc(ucd_isalpha(c) ? '1' : '0', out); | |||
| break; | |||
| case 'b': // blank | |||
| case 'b': /* blank */ | |||
| fputc(ucd_isblank(c) ? '1' : '0', out); | |||
| break; | |||
| case 'c': // control | |||
| case 'c': /* control */ | |||
| fputc(ucd_iscntrl(c) ? '1' : '0', out); | |||
| break; | |||
| case 'd': // numeric | |||
| case 'd': /* numeric */ | |||
| fputc(ucd_isdigit(c) ? '1' : '0', out); | |||
| break; | |||
| case 'g': // glyph | |||
| case 'g': /* glyph */ | |||
| fputc(ucd_isgraph(c) ? '1' : '0', out); | |||
| break; | |||
| case 'l': // lower case | |||
| case 'l': /* lower case */ | |||
| fputc(ucd_islower(c) ? '1' : '0', out); | |||
| break; | |||
| case 'P': // printable | |||
| case 'P': /* printable */ | |||
| fputc(ucd_isprint(c) ? '1' : '0', out); | |||
| break; | |||
| case 'p': // punctuation | |||
| case 'p': /* punctuation */ | |||
| fputc(ucd_ispunct(c) ? '1' : '0', out); | |||
| break; | |||
| case 's': // whitespace | |||
| case 's': /* whitespace */ | |||
| fputc(ucd_isspace(c) ? '1' : '0', out); | |||
| break; | |||
| case 'u': // upper case | |||
| case 'u': /* upper case */ | |||
| fputc(ucd_isupper(c) ? '1' : '0', out); | |||
| break; | |||
| case 'x': // xdigit | |||
| case 'x': /* xdigit */ | |||
| fputc(ucd_isxdigit(c) ? '1' : '0', out); | |||
| break; | |||
| } | |||
| @@ -151,31 +151,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format) | |||
| case '%': | |||
| switch (*++format) | |||
| { | |||
| case 'c': // category | |||
| case 'c': /* category */ | |||
| fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | |||
| break; | |||
| case 'C': // category group | |||
| case 'C': /* category group */ | |||
| fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | |||
| break; | |||
| case 'p': // codepoint | |||
| case 'p': /* codepoint */ | |||
| uprintf_codepoint(out, c, *++format); | |||
| break; | |||
| case 'P': // properties | |||
| case 'P': /* properties */ | |||
| fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | |||
| break; | |||
| case 'i': // is* | |||
| case 'i': /* is* */ | |||
| uprintf_is(out, c, *++format); | |||
| break; | |||
| case 'L': // lowercase | |||
| case 'L': /* lowercase */ | |||
| uprintf_codepoint(out, ucd_tolower(c), *++format); | |||
| break; | |||
| case 's': // script | |||
| case 's': /* script */ | |||
| fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | |||
| break; | |||
| case 'T': // titlecase | |||
| case 'T': /* titlecase */ | |||
| uprintf_codepoint(out, ucd_totitle(c), *++format); | |||
| break; | |||
| case 'U': // uppercase | |||
| case 'U': /* uppercase */ | |||
| uprintf_codepoint(out, ucd_toupper(c), *++format); | |||
| break; | |||
| } | |||
| @@ -221,7 +221,8 @@ int main(int argc, char **argv) | |||
| { | |||
| FILE *in = NULL; | |||
| const char *format = NULL; | |||
| for (int argn = 1; argn != argc; ++argn) | |||
| int argn; | |||
| for (argn = 1; argn != argc; ++argn) | |||
| { | |||
| const char *arg = argv[argn]; | |||
| if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | |||
| @@ -245,7 +246,8 @@ int main(int argc, char **argv) | |||
| } | |||
| else | |||
| { | |||
| for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
| codepoint_t c; | |||
| for (c = 0; c <= 0x10FFFF; ++c) | |||
| uprintf(stdout, c, format ? format : | |||
| "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | |||
| } | |||
| @@ -51,14 +51,15 @@ if __name__ == '__main__': | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| /* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| * the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| */ | |||
| #include "ucd/ucd.h" | |||
| #include <stddef.h> | |||
| // Unicode Character Data %s | |||
| /* Unicode Character Data %s */ | |||
| struct case_conversion_entry | |||
| { | |||
| @@ -110,8 +110,9 @@ if __name__ == '__main__': | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| /* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| * the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| */ | |||
| #include "ucd/ucd.h" | |||
| @@ -149,7 +150,7 @@ if __name__ == '__main__': | |||
| #define Zs UCD_CATEGORY_Zs | |||
| #define Ii UCD_CATEGORY_Ii | |||
| // Unicode Character Data %s | |||
| /* Unicode Character Data %s */ | |||
| """ % ucd_version) | |||
| for category in special_categories: | |||
| @@ -187,7 +188,7 @@ if __name__ == '__main__': | |||
| sys.stdout.write('{\n') | |||
| for codepoint, table in sorted(category_tables[table_index].items()): | |||
| if isinstance(table, str): | |||
| sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) | |||
| sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint)) | |||
| else: | |||
| sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||
| sys.stdout.write('};\n') | |||
| @@ -197,14 +198,14 @@ if __name__ == '__main__': | |||
| sys.stdout.write('{\n') | |||
| for codepoints, category, comment in category_sets: | |||
| if category: | |||
| sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment)) | |||
| sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment)) | |||
| else: | |||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
| sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||
| sys.stdout.write('\t{\n') | |||
| sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
| sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | |||
| sys.stdout.write('\t}\n') | |||
| sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||
| sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n') | |||
| sys.stdout.write('}\n') | |||
| sys.stdout.write(""" | |||
| @@ -166,6 +166,8 @@ def properties(data): | |||
| props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data | |||
| props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data | |||
| props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data | |||
| props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0 | |||
| props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0 | |||
| # eSpeak NG extended properties: | |||
| props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | |||
| props += (2 ** 53) * data.get('Punctuation_In_Word', 0) | |||
| @@ -104,8 +104,9 @@ if __name__ == '__main__': | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| // NOTE: This file is automatically generated from the Scripts.txt file in | |||
| // the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||
| /* NOTE: This file is automatically generated from the Scripts.txt file in | |||
| * the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||
| */ | |||
| #include "ucd/ucd.h" | |||
| @@ -152,6 +153,7 @@ if __name__ == '__main__': | |||
| #define Geok UCD_SCRIPT_Geok | |||
| #define Geor UCD_SCRIPT_Geor | |||
| #define Glag UCD_SCRIPT_Glag | |||
| #define Gonm UCD_SCRIPT_Gonm | |||
| #define Goth UCD_SCRIPT_Goth | |||
| #define Gran UCD_SCRIPT_Gran | |||
| #define Grek UCD_SCRIPT_Grek | |||
| @@ -249,6 +251,7 @@ if __name__ == '__main__': | |||
| #define Sind UCD_SCRIPT_Sind | |||
| #define Sinh UCD_SCRIPT_Sinh | |||
| #define Sora UCD_SCRIPT_Sora | |||
| #define Soyo UCD_SCRIPT_Soyo | |||
| #define Sund UCD_SCRIPT_Sund | |||
| #define Sylo UCD_SCRIPT_Sylo | |||
| #define Syrc UCD_SCRIPT_Syrc | |||
| @@ -278,6 +281,7 @@ if __name__ == '__main__': | |||
| #define Xpeo UCD_SCRIPT_Xpeo | |||
| #define Xsux UCD_SCRIPT_Xsux | |||
| #define Yiii UCD_SCRIPT_Yiii | |||
| #define Zanb UCD_SCRIPT_Zanb | |||
| #define Zinh UCD_SCRIPT_Zinh | |||
| #define Zmth UCD_SCRIPT_Zmth | |||
| #define Zsym UCD_SCRIPT_Zsym | |||
| @@ -285,7 +289,7 @@ if __name__ == '__main__': | |||
| #define Zyyy UCD_SCRIPT_Zyyy | |||
| #define Zzzz UCD_SCRIPT_Zzzz | |||
| // Unicode Character Data %s | |||
| /* Unicode Character Data %s */ | |||
| """ % ucd_version) | |||
| for script in special_scripts: | |||
| @@ -323,7 +327,7 @@ if __name__ == '__main__': | |||
| sys.stdout.write('{\n') | |||
| for codepoint, table in sorted(script_tables[table_index].items()): | |||
| if isinstance(table, str): | |||
| sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint)) | |||
| sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint)) | |||
| else: | |||
| sys.stdout.write('\tscripts_%s,\n' % codepoint) | |||
| sys.stdout.write('};\n') | |||
| @@ -333,12 +337,12 @@ if __name__ == '__main__': | |||
| sys.stdout.write('{\n') | |||
| for codepoints, script, comment in script_sets: | |||
| if script: | |||
| sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment)) | |||
| sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment)) | |||
| else: | |||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
| sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||
| sys.stdout.write('\t{\n') | |||
| sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
| sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | |||
| sys.stdout.write('\t}\n') | |||
| sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n') | |||
| sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n') | |||
| sys.stdout.write('}\n') | |||