# ucd-tools wide-character compatibility support: | # ucd-tools wide-character compatibility support: | ||||
UCDTOOLS_SRC_PATH := ../../ucd-tools/src | |||||
UCDTOOLS_SRC_PATH := ../../src/ucd-tools/src | |||||
UCDTOOLS_SRC_FILES := \ | UCDTOOLS_SRC_FILES := \ | ||||
$(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) | $(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) | ||||
alaska al'aska | alaska al'aska | ||||
albanië alb'A:ne@:@- | albanië alb'A:ne@:@- | ||||
algerië alx2'e@re@:@- | algerië alx2'e@re@:@- | ||||
alhambra al'ambra | |||||
amanzimtoti $4 | amanzimtoti $4 | ||||
antwerpen antv&rp@n | antwerpen antv&rp@n | ||||
avignon _^_FR | avignon _^_FR | ||||
oklahoma @Ukl@h'@Uma | oklahoma @Ukl@h'@Uma | ||||
outeniekwa @Ut@n'ikwa | outeniekwa @Ut@n'ikwa | ||||
oxford _^_EN | oxford _^_EN | ||||
paardeneiland pA:rd@n_'eIlant | |||||
palermo pal'&rmu | palermo pal'&rmu | ||||
pelindaba p&l@nd'A:ba | pelindaba p&l@nd'A:ba | ||||
perú p@ru | perú p@ru | ||||
potchefstroom pOtSIfstr'o@m | potchefstroom pOtSIfstr'o@m | ||||
rhône _^_FR | rhône _^_FR | ||||
richardsbaai ritS@dsb'AI | richardsbaai ritS@dsb'AI | ||||
riversdal r@v@rsdal | |||||
riviersonderend r@fi:rsOn@r_'Ent | riviersonderend r@fi:rsOn@r_'Ent | ||||
robertson _^_EN | robertson _^_EN | ||||
salvador salvadO:r | salvador salvadO:r | ||||
jane _^_EN | jane _^_EN | ||||
janine dZ@ni:n | janine dZ@ni:n | ||||
Jean ZA~n $capital | Jean ZA~n $capital | ||||
jeff _^_EN | |||||
jesebel je@s@b&l | jesebel je@s@b&l | ||||
jessica _^_EN | jessica _^_EN | ||||
jimmy _^_EN | jimmy _^_EN | ||||
stuart _^_EN | stuart _^_EN | ||||
suzanne suz'A:n | suzanne suz'A:n | ||||
suzette suz'Et | suzette suz'Et | ||||
sylvia _^_EN | |||||
tania tanja | tania tanja | ||||
telemann te@l@man | telemann te@l@man | ||||
terblanche t@rblA:nS | terblanche t@rblA:nS | ||||
thelma _^_EN | |||||
theo tiu | theo tiu | ||||
theron tr'On | theron tr'On | ||||
thessalonicense tEsalo@nis'E:ns@ | thessalonicense tEsalo@nis'E:ns@ | ||||
viviers v@v@je@ | viviers v@v@je@ | ||||
wilhelm v@lh&l@-m | wilhelm v@lh&l@-m | ||||
williston _^_EN | williston _^_EN | ||||
woltemade vOlt@mA:d@ | |||||
khumalo kum'A:lu | khumalo kum'A:lu | ||||
zuma zu:ma | zuma zu:ma | ||||
antares $2 | antares $2 | ||||
beatles _^_EN | beatles _^_EN | ||||
boeing _^_EN | boeing _^_EN | ||||
cadillac _^_EN | |||||
checkers tSEk@rs | checkers tSEk@rs | ||||
chevrolet _^_FR | chevrolet _^_FR | ||||
chrysler kraIsl@r | chrysler kraIsl@r | ||||
forma _^_LA | forma _^_LA | ||||
grata _^_LA | grata _^_LA | ||||
habitatio _^_LA | habitatio _^_LA | ||||
inclusio _^_LA | |||||
inclusio _^_LA | |||||
(in camera) @n||kam@ra | |||||
(in debiti) _^_LA | (in debiti) _^_LA | ||||
(in absentia) _^_LA | (in absentia) _^_LA | ||||
(in extremis) _^_LA | (in extremis) _^_LA | ||||
// main word list | // main word list | ||||
aangaande $2 | aangaande $2 | ||||
aanmerklik $2 | |||||
aanstaande $2 | aanstaande $2 | ||||
(a cappella) a||kap'&la | (a cappella) a||kap'&la | ||||
adagio ad'A:dZi;%@U | adagio ad'A:dZi;%@U | ||||
alge alx2@ | alge alx2@ | ||||
algehele alx2@h,e@l@ | algehele alx2@h,e@l@ | ||||
allegro al'Egru | allegro al'Egru | ||||
allengs alENs | |||||
allergene $3 | allergene $3 | ||||
almiskie $3 | almiskie $3 | ||||
alom al_'Om | alom al_'Om | ||||
babelaas bab@lA:s | babelaas bab@lA:s | ||||
barrikade $3 | barrikade $3 | ||||
beaming b@_A:m@N | |||||
bedewete be@d@ve@t@ | bedewete be@d@ve@t@ | ||||
beide beId@ | beide beId@ | ||||
bekaf b&kaf | bekaf b&kaf | ||||
belangriker b@laNr@k,Ir | belangriker b@laNr@k,Ir | ||||
bene be@n@ | bene be@n@ | ||||
beringde b@rINd@ | |||||
beswil bEsv@l | beswil bEsv@l | ||||
besnedene b@sne@d@n@ | besnedene b@sne@d@n@ | ||||
bestes bEst@s | bestes bEst@s | ||||
bewebeen be@v@be@n | bewebeen be@v@be@n | ||||
bewend be@v@nt | bewend be@v@nt | ||||
bewering b@ve@rIN | bewering b@ve@rIN | ||||
bilharzia b@lharsia | |||||
biopsie bi'Opsi | biopsie bi'Opsi | ||||
bomaat bo@mA:t | bomaat bo@mA:t | ||||
bordegoed bO:rd@x2ut | bordegoed bO:rd@x2ut | ||||
charisma kar'Isma | charisma kar'Isma | ||||
cinsaut s@nso@ | cinsaut s@nso@ | ||||
cliché kliS'eI: | cliché kliS'eI: | ||||
clientèle _^_FR | |||||
clivia klIvija | clivia klIvija | ||||
cognac kOn^ak | cognac kOn^ak | ||||
confetti $2 | confetti $2 | ||||
déjà _^_FR | déjà _^_FR | ||||
dekade dEk'A:d@ | dekade dEk'A:d@ | ||||
dekreling dEkre@l@N | dekreling dEkre@l@N | ||||
demensie d@me~nsi | |||||
deurentyd dy@r@nteIt | deurentyd dy@r@nteIt | ||||
deurgaans $1 | deurgaans $1 | ||||
deurkruis $2 | deurkruis $2 | ||||
exodus Eks'o@dWs | exodus Eks'o@dWs | ||||
factotum $2 | factotum $2 | ||||
faktotum $2 | |||||
fakture $2 | fakture $2 | ||||
fetakaas fEtakA:s | fetakaas fEtakA:s | ||||
figuur f@x2yr | figuur f@x2yr | ||||
filippense f@l@pEns@ | filippense f@l@pEns@ | ||||
finalis $3 | finalis $3 | ||||
finaliste $3 | finaliste $3 | ||||
fluktuasie $3 | |||||
fort fOrt | fort fOrt | ||||
forte fOrt@ | forte fOrt@ | ||||
helaas he@l'A:s | helaas he@l'A:s | ||||
here he@r@ | here he@r@ | ||||
herero hEr'E:ru | herero hEr'E:ru | ||||
herontmoeting h&r_Ontmut@N | |||||
herrysenis h&r'eIs@n@s | herrysenis h&r'eIs@n@s | ||||
hierso hi:rsO | hierso hi:rsO | ||||
hippie _^_EN | hippie _^_EN | ||||
inkluis $2 | inkluis $2 | ||||
innestel InnEst@l | innestel InnEst@l | ||||
insomnia @nsOmnija | insomnia @nsOmnija | ||||
inteling Inte@l@N | |||||
intens @nt'Ens | intens @nt'Ens | ||||
intensiteit $4 | intensiteit $4 | ||||
ironieë irun'i:@ | ironieë irun'i:@ | ||||
kennisvaardig $1 | kennisvaardig $1 | ||||
kimono $2 | kimono $2 | ||||
klaasvakie $2 | klaasvakie $2 | ||||
kliënteel $3 | |||||
klimeid klImeIt | klimeid klImeIt | ||||
knapsekêrel knaps@k&:r@l | knapsekêrel knaps@k&:r@l | ||||
kolossense kOl@s'e~ns@ | kolossense kOl@s'e~ns@ | ||||
korswel kOrsv@l | korswel kOrsv@l | ||||
kotiljons kOt@lj'o~ns | kotiljons kOt@lj'o~ns | ||||
kritiek krIt'ik | kritiek krIt'ik | ||||
kruisteling krYyste@l@N | |||||
kulture $2 | kulture $2 | ||||
kunsmatige kWnsm'A:t@x2@ | kunsmatige kWnsm'A:t@x2@ | ||||
kwansuis $2 | kwansuis $2 | ||||
kweekwal kwe@kval | kweekwal kwe@kval | ||||
landswye lantsveI@ | |||||
lasagne las'anj@ | lasagne las'anj@ | ||||
legaat l@x2A:t | legaat l@x2A:t | ||||
legate l@x2A:t@ | legate l@x2A:t@ | ||||
sonure sOn_yr@ | sonure sOn_yr@ | ||||
sover so@f&r | sover so@f&r | ||||
staccato $2 | staccato $2 | ||||
steekhoudendheid $2 | |||||
sterwens st&rv@ns | sterwens st&rv@ns | ||||
stilswye $1 | stilswye $1 | ||||
strydros streIt_rOs | strydros streIt_rOs | ||||
thula tu:la | thula tu:la | ||||
toegee tux2e@ | toegee tux2e@ | ||||
toereken ture@k@n | toereken ture@k@n | ||||
toleransie tOl@r'ansi | |||||
tornado $2 | tornado $2 | ||||
totale tut'A:l@ | totale tut'A:l@ | ||||
totsiens $2 | totsiens $2 | ||||
uitdaging YydA:x2@N | uitdaging YydA:x2@N | ||||
uiteraard Yyt@r_'A:rt | uiteraard Yyt@r_'A:rt | ||||
uitermate $3 | uitermate $3 | ||||
uitgeslotene Yytx2@slo@t@n@ | |||||
uniforme $3 | uniforme $3 | ||||
vaarwel fA:rv'&l | vaarwel fA:rv'&l |
@@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. | @@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. | ||||
amarula %am%arul%a // fix stress and a sounds: amarula and compounds | amarula %am%arul%a // fix stress and a sounds: amarula and compounds | ||||
a (ment %a // shorten a sound: perkament/testament and compounds | a (ment %a // shorten a sound: perkament/testament and compounds | ||||
_) arendag (CAC %A:r@ntax2 // fix e sound: arendagtig/-e/-heid | |||||
arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes | arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes | ||||
argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e | argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e | ||||
a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | ||||
attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché | attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché | ||||
attrib (u %atr@b // move default stress: attribuut/bute/attributêr | attrib (u %atr@b // move default stress: attribuut/bute/attributêr | ||||
K) au @U // trauma/-ties/Aucamp/Paul/-a/-us | K) au @U // trauma/-ties/Aucamp/Paul/-a/-us | ||||
auer aU@r //Sauer etc. fixed au and e sounds in compounds. | |||||
auer aU@r //Sauer etc. fix au and e sounds in compounds. | |||||
augustus %Ox2WstWs | augustus %Ox2WstWs | ||||
avokado %af%ukA:du // stress and o sounds | avokado %af%ukA:du // stress and o sounds | ||||
avokade %af%ukA:d@ // variant form of avokado | avokade %af%ukA:d@ // variant form of avokado | ||||
bo (grond bo@ // fix 1st o sound: bogronds/-e | bo (grond bo@ // fix 1st o sound: bogronds/-e | ||||
_) bo (kle bo@ // fix o sound: bokleed/boklere | _) bo (kle bo@ // fix o sound: bokleed/boklere | ||||
bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie | bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie | ||||
bom (AA bOm_ // fix a sound, pause: bomaanval/-aanslag/-aard/-eenheid and compounds | |||||
bom (AA bOm_ // fix a sound, pause: bomaanval/-aard/-eenheid and compounds | |||||
bomaans (la bOm_A:ns // fix o sound: bomaanslag/-aanslae | |||||
bomaans bo@mA:ns // but fix O sound: bomaans/-e | bomaans bo@mA:ns // but fix O sound: bomaans/-e | ||||
bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend | bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend | ||||
boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid | boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid | ||||
bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie | bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie | ||||
baken bA:k@n // fix e sound: afbakening and compounds | baken bA:k@n // fix e sound: afbakening and compounds | ||||
bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë | bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë | ||||
balalaika b%al%alaIk%a // fix a sounds and stress: balalaika/-s/-musiek | |||||
_) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... | _) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... | ||||
ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit | ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit | ||||
band (Alier b,and // fix stress and d sound: band(e/o)lier/-e | band (Alier b,and // fix stress and d sound: band(e/o)lier/-e | ||||
ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. | ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. | ||||
basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar | basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar | ||||
basi (s bA:s@ // basis and compounds | basi (s bA:s@ // basis and compounds | ||||
basilie b%asil%i // fix a sound and stress: basilie/-kruid, etc. | |||||
bastille b%asti:l // Bastille and compounds like Bastilledag | bastille b%asti:l // Bastille and compounds like Bastilledag | ||||
batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds | batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds | ||||
ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk | ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk | ||||
bere (_ be@r@ //tediebere pandabere etc. | bere (_ be@r@ //tediebere pandabere etc. | ||||
_) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom | _) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom | ||||
beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s | beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s | ||||
beton (C@ b@tOn // split ng sounds: betongebou/-gietsel/-gruis | |||||
beton (inC@ b@t'On // fix O sound: betoninrigting/-ingenieur | |||||
be (weging b@ // draaibeweging/swaaibeweging | be (weging b@ // draaibeweging/swaaibeweging | ||||
bewende be@v@nd@ // first e pronounced long | bewende be@v@nd@ // first e pronounced long | ||||
bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid | bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid | ||||
dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant | dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant | ||||
diaken d%iA:k@n // diaken and compounds | diaken d%iA:k@n // diaken and compounds | ||||
diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë | diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë | ||||
diende dind@ // fix last e sound in many compounds of bediende: bediendekamer | |||||
diens (willi d%ins // move default stress: dienswillig/-e/-heid | diens (willi d%ins // move default stress: dienswillig/-e/-heid | ||||
dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e | dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e | ||||
digi (ta d%ix2%i // move default stress: digitaal/digitale | digi (ta d%ix2%i // move default stress: digitaal/digitale | ||||
@C) e (reekK @ // fix e sound: duine-/rotsereeks | @C) e (reekK @ // fix e sound: duine-/rotsereeks | ||||
aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- | aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- | ||||
lat) eres @r@s //lateres | lat) eres @r@s //lateres | ||||
@C) erm (K &r@-m // a catch-all for words ending in -erm(s) | |||||
fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. | fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. | ||||
bloup) ers &:rs // fix e sound: bloupers | bloup) ers &:rs // fix e sound: bloupers | ||||
iew) ers (ter @rs // fix e sound: (l)iewerster | iew) ers (ter @rs // fix e sound: (l)iewerster | ||||
p) e (talje @ // fix stress and 1st e sound: petalje and compounds | p) e (talje @ // fix stress and 1st e sound: petalje and compounds | ||||
l) e (moen @ //fix lemoen and compounds | l) e (moen @ //fix lemoen and compounds | ||||
Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. | Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. | ||||
effe (kleur Ef@ // fix stress and 2nd e sound: effekleur(ig/-e) | |||||
eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep | eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep | ||||
@C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds | @C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds | ||||
@) ei (sen _'eI // (on)veeleisend/spoedeisend | @) ei (sen _'eI // (on)veeleisend/spoedeisend | ||||
flu (we fl%y // move default stress: fluweel/fluwele and compounds | flu (we fl%y // move default stress: fluweel/fluwele and compounds | ||||
_) fok (o fOk_ // fix o sound, insert short break: fokop/fokof | _) fok (o fOk_ // fix o sound, insert short break: fokop/fokof | ||||
folio fo@li_u // fix o sounds: folio and compounds | folio fo@li_u // fix o sounds: folio and compounds | ||||
fondsw fOntsv // fix v sound: fondswerwing & compounds/-waardes | |||||
fondue f%Ondy // fix stress and ue sound: fondue and compounds | fondue f%Ondy // fix stress and ue sound: fondue and compounds | ||||
fone (tiek f%o@n@ // fix e sound: fonetiek and compounds | fone (tiek f%o@n@ // fix e sound: fonetiek and compounds | ||||
fone (tie f%une@ // fix stress and o sound: foneties/-e | fone (tie f%une@ // fix stress and o sound: foneties/-e | ||||
formi (da f%Orm%i // move default stress: formidabel/-e | formi (da f%Orm%i // move default stress: formidabel/-e | ||||
formu (lier f%Orm%y // move default stress: formulier/-e/-boek | formu (lier f%Orm%y // move default stress: formulier/-e/-boek | ||||
_) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C | _) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C | ||||
forse (nd fOrs@ // fix e sound: forsend/-e | |||||
fos (fa f%Os // move default stress: fosfaat/fosfate | fos (fa f%Os // move default stress: fosfaat/fosfate | ||||
fo (ssiel f%O // move default stress: fossiel/-e and compounds | fo (ssiel f%O // move default stress: fossiel/-e and compounds | ||||
foto fo@tu | foto fo@tu | ||||
_) gra (na x2r@ // granaat(boom)/granate | _) gra (na x2r@ // granaat(boom)/granate | ||||
grandi (o x2r%and%i // move default stress: grandioos/-ose | grandi (o x2r%and%i // move default stress: grandioos/-ose | ||||
_) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure | _) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure | ||||
grenadella gr@n@d&la // fix stress and vowels: grenadella and compounds like -heining | |||||
griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. | griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. | ||||
ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s | ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s | ||||
_) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs | _) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs | ||||
gegesel x2@x2e@s@l // fix middle e sound | gegesel x2@x2e@s@l // fix middle e sound | ||||
K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele | K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele | ||||
_) geler x2e@l@r // stress and 1st e sound: geler/gelerig | _) geler x2e@l@r // stress and 1st e sound: geler/gelerig | ||||
_) gell x2&l | |||||
gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e | gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e | ||||
gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. | gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. | ||||
gene (_ x2e@n@ // gene/diegene | gene (_ x2e@n@ // gene/diegene | ||||
_) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. | _) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. | ||||
ni) ge (ri x2'e@ //nigeriese etc. | ni) ge (ri x2'e@ //nigeriese etc. | ||||
gese (_ x2'e@s@ //Portugese, and others | gese (_ x2'e@s@ //Portugese, and others | ||||
gesp (etjie x2Esp // fix e sounds: gespetjie/-s | |||||
gespe (_ x2Esp@ // gespe and compounds | gespe (_ x2Esp@ // gespe and compounds | ||||
gespes (_ x2Esp@s // gespes and compounds | gespes (_ x2Esp@s // gespes and compounds | ||||
gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid | gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid | ||||
hart (stogte_ h%art // move default stress: hartstogtelik | hart (stogte_ h%art // move default stress: hartstogtelik | ||||
hart (stogte_N hart // restore default stress: hartstogte | hart (stogte_N hart // restore default stress: hartstogte | ||||
ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling | ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling | ||||
_) hef (a hEf_ // fix e sound, insert break: hefapparaat/-arm(s) | |||||
hia (sint h%ij%a // fix stress: hiasint/-e and compounds | hia (sint h%ij%a // fix stress: hiasint/-e and compounds | ||||
_) hi (bis h%i // move default stress: hibiskus/-se and compounds | _) hi (bis h%i // move default stress: hibiskus/-se and compounds | ||||
hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e | hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e | ||||
_) idi (o %id%i // idioom/idiome/idioot | _) idi (o %id%i // idioom/idiome/idioot | ||||
_) id (A %id // idille/idillies/ideëryk | _) id (A %id // idille/idillies/ideëryk | ||||
&l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. | &l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. | ||||
&l) iker (_ @k@r // fix i sound: afstootliker/(ge)redeliker/onberispeliker | |||||
illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati | illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati | ||||
illu (si %ily // illusie/-s/illusief | illu (si %ily // illusie/-s/illusief | ||||
illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie | illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie | ||||
_) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | _) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | ||||
_) inbe (lC Inb%& // fix e sound in compounds like inbelprogram | _) inbe (lC Inb%& // fix e sound in compounds like inbelprogram | ||||
indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese | indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese | ||||
s) in (gestel @n // split n g: compounds with ingesteldheid/winsingestelde | |||||
_) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives | _) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives | ||||
invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der | invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der | ||||
investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende | investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende | ||||
krieketw krik@tv //krieket followed by w in compounds always v | krieketw krik@tv //krieket followed by w in compounds always v | ||||
krokodi (l kr%Ok@dI // (wyfie)krokodil/-le | krokodi (l kr%Ok@dI // (wyfie)krokodil/-le | ||||
kro (niek kr%u // fix stress and o sound: kroniek and many compounds | |||||
kruger kr'Y@@r | kruger kr'Y@@r | ||||
ku (ba@ k%y // move default stress: kubaan/kubane | ku (ba@ k%y // move default stress: kubaan/kubane | ||||
ku (biek k%y // move default stress: kubiek/-e/-getal | ku (biek k%y // move default stress: kubiek/-e/-getal | ||||
_) kuber kyb@r // fixed e sound: kuberruim(te) | |||||
_) kuber kyb@r // fix e sound: kuberruim(te) | |||||
kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e | kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e | ||||
@) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar | @) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar | ||||
kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda | kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda | ||||
kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds | kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds | ||||
kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds | kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds | ||||
ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder | ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder | ||||
kakao k%akA:w // fix stress and vowel sounds: kakao and many compounds | |||||
kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie | kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie | ||||
kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn | kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn | ||||
_) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds | _) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds | ||||
kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives | kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives | ||||
kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel | kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel | ||||
kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant | kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant | ||||
kop (agtig k%Op_ // fix o sound, insert break: hamer-/spinnekop-/penkopagtig(e(s)) | |||||
kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives | kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives | ||||
_) kor (dA k%Or // kordaat/kordon | _) kor (dA k%Or // kordaat/kordon | ||||
ko (rint k%u // fix stress and o sound: korint/-e and compounds | ko (rint k%u // fix stress and o sound: korint/-e and compounds | ||||
loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie | loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie | ||||
lo (ja l%u // fix stress and o sound: lojale/lojaliteit | lo (ja l%u // fix stress and o sound: lojale/lojaliteit | ||||
lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. | lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. | ||||
loke (t l%ukE // fix stress and vowel sounds: many compounds with loket | |||||
lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde | lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde | ||||
_) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier | _) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier | ||||
_) losge (@P5 l'Osx2@ | _) losge (@P5 l'Osx2@ | ||||
medisyne m@d@seIn@ //medisyne and compounds | medisyne m@d@seIn@ //medisyne and compounds | ||||
meganies m@x2'A:nis | meganies m@x2'A:nis | ||||
_) meege (@P5 m'e@x2@ | _) meege (@P5 m'e@x2@ | ||||
_) meegewe (nd me@x2e@v@ // fix e sounds and stress: meegewend(e) | |||||
me (juf m@ // move default stress and shorten e sound | me (juf m@ // move default stress and shorten e sound | ||||
me (laats m@ // fix stress and e sound: melaats/-e/-heid | me (laats m@ // fix stress and e sound: melaats/-e/-heid | ||||
melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | ||||
morf (otomie m%Orf // move default stress: morfotomie | morf (otomie m%Orf // move default stress: morfotomie | ||||
_) morr (i mOr // restore default stress: morrig/morrie/-doring | _) morr (i mOr // restore default stress: morrig/morrie/-doring | ||||
_) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig | _) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig | ||||
mos (agtig mOs_ // fix o sound, insert break: (kos)mosagtig(e) | |||||
mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se | mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se | ||||
mosa (ïek m%o@s%a // move default stress: mosaïek and compounds | mosa (ïek m%o@s%a // move default stress: mosaïek and compounds | ||||
mo (skee m%O // move default stress: moskee/-s and compounds | mo (skee m%O // move default stress: moskee/-s and compounds | ||||
ne (anderCa n%i // move default stress: Neander(d/t)al/-ler | ne (anderCa n%i // move default stress: Neander(d/t)al/-ler | ||||
neger (in n%e@x2@r // move default stress: negerin/-ne | neger (in n%e@x2@r // move default stress: negerin/-ne | ||||
ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. | ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. | ||||
nek (_ n&k // catch-all for words ending in -nek: koedoe-/swaannek | |||||
nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai | nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai | ||||
nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. | nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. | ||||
neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum | neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum | ||||
nood (lotti n%o@t // move default stress: noodlottig/-e | nood (lotti n%o@t // move default stress: noodlottig/-e | ||||
nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak | nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak | ||||
nood (saak_N no@t // restore default stress: (ge)noodsaak | nood (saak_N no@t // restore default stress: (ge)noodsaak | ||||
nooien (tjie noIN // remove the e sound: nooientjie(s) and compounds | |||||
noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e | noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e | ||||
nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale | nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale | ||||
nostal (gie_N n%Ost%al // stress on last slb.: nostalgie | nostal (gie_N n%Ost%al // stress on last slb.: nostalgie | ||||
ooi oI | ooi oI | ||||
ooy oI | ooy oI | ||||
oodjie oIci | oodjie oIci | ||||
CC) ool (A o@l_ // insert break: skooluur/-ure, steenkooluitvoer, but not: Karoolug | |||||
ootjie oIci | ootjie oIci | ||||
oontjie oINki | oontjie oINki | ||||
oondjie oINki | oondjie oINki | ||||
oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | ||||
.group op | .group op | ||||
_) opaal %o@pA:l // fix o sound, remove break: opaal and compounds | |||||
opaat up'A:t // homeopaat/psigopaat and similar | opaat up'A:t // homeopaat/psigopaat and similar | ||||
opatie upat'i // homeopatie/neuropatie and similar | opatie upat'i // homeopatie/neuropatie and similar | ||||
opaties up'A:tis // psigopaties/osteopaties and similar | opaties up'A:tis // psigopaties/osteopaties and similar | ||||
pol (vy p%Ol // move default stress: polvy/-e and compounds | pol (vy p%Ol // move default stress: polvy/-e and compounds | ||||
pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) | pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) | ||||
_) pon (dok p%On // move default stress: pondok/-ke/-kie | _) pon (dok p%On // move default stress: pondok/-ke/-kie | ||||
pop (agtig p%Op_ // fix o sound, insert break: popagtig(e) and compounds | |||||
popu ,pOpy | popu ,pOpy | ||||
_) por (C %pOr //portret portaal etc. | _) por (C %pOr //portret portaal etc. | ||||
por (ie p%o@r // move default stress: porie/-ë | por (ie p%o@r // move default stress: porie/-ë | ||||
sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e | sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e | ||||
skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds | skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds | ||||
skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum | skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum | ||||
skim (agtig sk@m_ // fix i sound, insert break: skimagtig(e) | |||||
skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene | skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene | ||||
skle (rose skl@ // fix stress and e sound: sklerose and compounds | skle (rose skl@ // fix stress and e sound: sklerose and compounds | ||||
_) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... | _) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... | ||||
_) su (meri s%u // fix stress and u sound: sumeries/-e | _) su (meri s%u // fix stress and u sound: sumeries/-e | ||||
su (mmier s%W // move default stress: sumier/-e | su (mmier s%W // move default stress: sumier/-e | ||||
super (A s''yp@r_ // fix stress, break in compounds like superintelligent | super (A s''yp@r_ // fix stress, break in compounds like superintelligent | ||||
superi (A s%up%e@r%i // fix stress and vowel sounds: superieur/superioriteit | |||||
superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds | superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds | ||||
surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds | surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds | ||||
su (saC s%u // fix stress and u sound: susan/-na/susara | su (saC s%u // fix stress and u sound: susan/-na/susara | ||||
toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar | toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar | ||||
toer (n t%ur // move default stress: compounds of toernooi | toer (n t%ur // move default stress: compounds of toernooi | ||||
toere (_ tu:r@ // restore default stress | toere (_ tu:r@ // restore default stress | ||||
toi (let t%OI // move default stress: toilet and compounds | |||||
toilet t%OIlEt // move default stress: toilet and compounds: toiletartikel/-emmer/-opsigter | |||||
tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds | tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds | ||||
tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. | tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. | ||||
tombola t%Ombo@l%a // move default stress: tombola and compounds | tombola t%Ombo@l%a // move default stress: tombola and compounds | ||||
@) toris (_ t'o@r@s // fix stress: pectoris/klitoris | @) toris (_ t'o@r@s // fix stress: pectoris/klitoris | ||||
_) tor (nyn t%Or // move default stress: tornyn/-e and compounds | _) tor (nyn t%Or // move default stress: tornyn/-e and compounds | ||||
_) tos (ka t%Os // move default stress: Toskaanse/Toskane | _) tos (ka t%Os // move default stress: Toskaanse/Toskane | ||||
tser (tjie ts@r // fix e sound: (skoen)poetsertjie/weerkaatsertjie | |||||
ttel t@l // many compounds of bottel/skottelgoed | ttel t@l // many compounds of bottel/skottelgoed | ||||
tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling | tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling | ||||
tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie | tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie | ||||
_) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid | _) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid | ||||
vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid | vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid | ||||
vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer | vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer | ||||
va (sal v%a // fix stress and v and a sounds: vasal/-le | |||||
vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. | vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. | ||||
vasste (l fast& // fix e sound: vasstel(ling/-lende) | vasste (l fast& // fix e sound: vasstel(ling/-lende) | ||||
_) vat (A@ fat_ // fix a sound: vatafstand/-orgaan | _) vat (A@ fat_ // fix a sound: vatafstand/-orgaan | ||||
ve (l f& // maagvel, stress on 1st slb | ve (l f& // maagvel, stress on 1st slb | ||||
@) vel (A fe@l // aanbeveling/aanbevelingsbrief | @) vel (A fe@l // aanbeveling/aanbevelingsbrief | ||||
vel (djie f&l // fix -djie sound: veldjie(s) and compounds | |||||
veld (C f< // fix d sound: veldreuk/-radio/-rantsoen, etc. | veld (C f< // fix d sound: veldreuk/-radio/-rantsoen, etc. | ||||
veld (eks f<_ // fix d sound, pause: veldekskursie/-ekspedisie/-s | veld (eks f<_ // fix d sound, pause: veldekskursie/-ekspedisie/-s | ||||
veld (o f<_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. | veld (o f<_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. | ||||
veld (t f&l // eliminate double t sound: veldtog and many compounds, Langeveldt, Springveldt | |||||
vele f'e@l@ | vele f'e@l@ | ||||
ven (detta v%En // fix stress and v sound: vendatta and compounds | ven (detta v%En // fix stress and v sound: vendatta and compounds | ||||
ven (dusie f@n // fix stress and e sound: vendusie and compounds | ven (dusie f@n // fix stress and e sound: vendusie and compounds | ||||
vol (kome f%Ol | vol (kome f%Ol | ||||
vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O | vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O | ||||
volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... | volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... | ||||
voll (engte fOlE | |||||
volle (ngte fOlE | |||||
vo (ller fO // exception to: vo (lC f%O | vo (ller fO // exception to: vo (lC f%O | ||||
volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. | volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. | ||||
vol (hou_ fOl // exception to: vo (lC f%O | vol (hou_ fOl // exception to: vo (lC f%O |
idly aIdlI | idly aIdlI | ||||
idiocy IdI@si | idiocy IdI@si | ||||
ifrog $alt6 | ifrog $alt6 | ||||
ignoramus Igno@r'eIm@s | |||||
ignoramus IgnO@r'eIm@s | |||||
illiterate $alt2 | illiterate $alt2 | ||||
illumine $alt2 | illumine $alt2 | ||||
imagery ImIdZri | imagery ImIdZri | ||||
nonetheless nVnD@l'Es | nonetheless nVnD@l'Es | ||||
nosedive noUzdaIv | nosedive noUzdaIv | ||||
nosir noUs3: | nosir noUs3: | ||||
?5 nosir noUsIR | |||||
not noUt // for noted, notable, etc | not noUt // for noted, notable, etc | ||||
nots n0ts | nots n0ts | ||||
(nôtre dame) noUtr@'dA:m | (nôtre dame) noUtr@'dA:m | ||||
sinus saIn@s | sinus saIn@s | ||||
siphon $alt2 | siphon $alt2 | ||||
sir s,3: $only | sir s,3: $only | ||||
?5 sir s,VR $only | |||||
?5 sir s,IR $only | |||||
siren saIr@n | siren saIr@n | ||||
site saIt // for sited | site saIt // for sited | ||||
ski ski: | ski ski: | ||||
son sVn | son sVn | ||||
sonar soUnA@ | sonar soUnA@ | ||||
sonny sVnI | sonny sVnI | ||||
sooth su:T $only | |||||
sopapilla soUp@p'i:@ | |||||
sope soUpeI | sope soUpeI | ||||
sorbet sO@beI | |||||
souffle su:fl'eI | souffle su:fl'eI | ||||
soundbite saUndbaIt | soundbite saUndbaIt | ||||
souvenir su:v@n'i@3 | souvenir su:v@n'i@3 | ||||
sopapilla soUp@p'i:@ | |||||
sorbet sO@beI | |||||
soyabean sOI@bi:n | soyabean sOI@bi:n | ||||
specific sp@sIfIk | specific sp@sIfIk | ||||
specimen spEsI2m@n | specimen spEsI2m@n | ||||
tamale ta#mA:li | tamale ta#mA:li | ||||
tampon tamp0n | tampon tamp0n | ||||
tangerine tandZ@r'i:n | tangerine tandZ@r'i:n | ||||
taoiseach ti:S@x | |||||
tapestry tapI#stri | tapestry tapI#stri | ||||
tarantula t@rantS@l@ | tarantula t@rantS@l@ | ||||
tardis $alt1 | tardis $alt1 | ||||
ye ji: $u+ | ye ji: $u+ | ||||
yea jeI | yea jeI | ||||
yessir jEss3: | yessir jEss3: | ||||
?5 yessir jEssIR | |||||
yoghurt j0g3t | yoghurt j0g3t | ||||
?3 yoghurt joUg3t | ?3 yoghurt joUg3t | ||||
?3 yogurt joUg3t | ?3 yogurt joUg3t | ||||
?!3 Anthony ant@ni | ?!3 Anthony ant@ni | ||||
Anton ant0n | Anton ant0n | ||||
Anya anj@ | Anya anj@ | ||||
Aoife i:f@ | |||||
Aoiffe i:f@ | |||||
Aphrodite afr@d'aIti | Aphrodite afr@d'aIti | ||||
Archibald A@tSIbO:ld | Archibald A@tSIbO:ld | ||||
Archie A@tSi | Archie A@tSi | ||||
Salman sa#lmA:n | Salman sa#lmA:n | ||||
Samantha sa#manT@ | Samantha sa#manT@ | ||||
(Santa claus) s'ant@||kl'O:z | (Santa claus) s'ant@||kl'O:z | ||||
Saoirse si@S@ | |||||
Sarah se@r@ | Sarah se@r@ | ||||
Sarisa $alt3 | Sarisa $alt3 | ||||
Seamus SeIm@s | Seamus SeIm@s | ||||
Simon saIm@n | Simon saIm@n | ||||
Sinead SI2neId | Sinead SI2neId | ||||
Sinéad SI2neId | Sinéad SI2neId | ||||
Siobhan S@vO:n | |||||
Siobhán S@vO:n | |||||
Siobhan SI2vO:n | |||||
Siobhán SI2vO:n | |||||
Sonia s0nj@ | Sonia s0nj@ | ||||
Sophia soUf'i@ | Sophia soUf'i@ | ||||
Sophie soUfi | Sophie soUfi |
sw) a (m_ a | sw) a (m_ a | ||||
sw) a (nk a | sw) a (nk a | ||||
ao eI0 | ao eI0 | ||||
mh) ao eI // Irish, e.g. 'mhaol' /weIl/ | |||||
m) ao aU | m) ao aU | ||||
p) ao aU | p) ao aU | ||||
t) ao aU | t) ao aU | ||||
ao (_ =aU | ao (_ =aU | ||||
aois (_ i:S | |||||
aoise (_ i:S@ | |||||
ao (ism aU | |||||
ao (ist aU | |||||
aoi (C i: // Irish, e.g. 'Aoife' /i:f@/ | |||||
aoir (C e@ // Irish, e.g. 'Saoirse' /se@S@/ | |||||
?3 aoir (C 3: // Irish, e.g. 'Saoirse' /s3:S@/ | |||||
g) ao (l eI@ | g) ao (l eI@ | ||||
aor eI'o@ | aor eI'o@ | ||||
m) ao (ri aU | m) ao (ri aU | ||||
_n) a (tional a | _n) a (tional a | ||||
@) a (tious 'eI | @) a (tious 'eI | ||||
ell) a (trix @ | ell) a (trix @ | ||||
a (triC 'eI | |||||
a (trix 'eI | |||||
a (trice 'eI | |||||
n) a (tur eI | n) a (tur eI | ||||
n) a (tura a | n) a (tura a | ||||
&) a (ture_ @ | &) a (ture_ @ | ||||
_m) ag (ell a#dZ | _m) ag (ell a#dZ | ||||
Cp) age (_ eIdZ | Cp) age (_ eIdZ | ||||
pp) age (_ I2dZ | pp) age (_ I2dZ | ||||
_ant) ag 'ag | |||||
_ant) ag (on 'ag | |||||
enr) ag (e_ 'eIdZ | enr) ag (e_ 'eIdZ | ||||
outr) ag (e_ eIdZ | outr) ag (e_ eIdZ | ||||
der) ag (e_ eIdZ | der) ag (e_ eIdZ | ||||
may) be (_ bi: | may) be (_ bi: | ||||
_) be (CA bI# | _) be (CA bI# | ||||
_) bete (lg bi:t@ | _) bete (lg bi:t@ | ||||
_) be (C% bE | |||||
_) be (C%+ bE | |||||
_) be (atiC b%i: | _) be (atiC b%i: | ||||
_) be (b bi: | _) be (b bi: | ||||
_) be (cl bI# | _) be (cl bI# | ||||
_) be (kn bI# | _) be (kn bI# | ||||
_) belarus bEl@r'u:s | _) belarus bEl@r'u:s | ||||
_) be (lC bE | _) be (lC bE | ||||
_) be (llig bI# | |||||
_) be (llig+ bI# | |||||
_) be (re bE | _) be (re bE | ||||
_) be (sC bI# | _) be (sC bI# | ||||
_) be (stia bE | _) be (stia bE | ||||
e (Cical 'E | e (Cical 'E | ||||
e (CiuB i: | e (CiuB i: | ||||
&) e (_ | &) e (_ | ||||
aoiC) e (_ @ // Irish, e.g. 'Aoife' /i:fe/ | |||||
aoiCC) e (_ @ // Irish, e.g. 'Saoirse' /se@S@/ | |||||
acB) e (_ %I | acB) e (_ %I | ||||
XC) e (_N i: | XC) e (_N i: | ||||
vert) e (b I | vert) e (b I | ||||
y) ed (_S2v d# | y) ed (_S2v d# | ||||
debut) ed (_S2 d# | debut) ed (_S2 d# | ||||
edly (_S4m I#dl%i | edly (_S4m I#dl%i | ||||
eg) edly (_S3m I#dl%i | |||||
c) ed (e_ 'i:d | c) ed (e_ 'i:d | ||||
p) ed (e_ i:d | p) ed (e_ i:d | ||||
p) edal Ed@L | p) edal Ed@L | ||||
en (core 0n | en (core 0n | ||||
&) ency (_ @ns%i | &) ency (_ @ns%i | ||||
ency (cli %EnsI | ency (cli %EnsI | ||||
_) en (dg@ %En | |||||
_) en (dp@ %En | |||||
k) en (d_ En | k) en (d_ En | ||||
s) en (d_ En | s) en (d_ En | ||||
t) en (d_ En | t) en (d_ En | ||||
exp) eri (en i@rI2 | exp) eri (en i@rI2 | ||||
XC) er 3: | XC) er 3: | ||||
th) er (@ 3: | th) er (@ 3: | ||||
h) er (@ %3 | |||||
h) er (nan %3 | |||||
X) er (A E#r | X) er (A E#r | ||||
_h) eretical I#rEtIk@L | _h) eretical I#rEtIk@L | ||||
_qu) er 3: | _qu) er 3: | ||||
&z) es (_S2 %I#z | &z) es (_S2 %I#z | ||||
&C) es (_S1i z | &C) es (_S1i z | ||||
xus) es (_S2 %I#z | xus) es (_S2 %I#z | ||||
tamus) es (_S2 %I#z // hippopotamuses | |||||
es (carp I2s | es (carp I2s | ||||
es (cape %Es | es (cape %Es | ||||
es (capi %Es | es (capi %Es | ||||
&) ford (_S4 f3d | &) ford (_S4 f3d | ||||
&f) ford (_S4 3d | &f) ford (_S4 3d | ||||
for (see f%O@ | for (see f%O@ | ||||
for (ward f'o@ // straightforward | |||||
for (ward f'O@ | |||||
ft (en f | ft (en f | ||||
&) ful (_S3i f@L | &) ful (_S3i f@L | ||||
_) metall (ic m@tal | _) metall (ic m@tal | ||||
metabo m@t'ab0 | metabo m@t'ab0 | ||||
_) meta (llu m%Eta | _) meta (llu m%Eta | ||||
mh (ao w // Irish, e.g. 'mhaol' /weIl/ | |||||
aoi) mh (e v // Irish, e.g. 'Caoimhe' /ki:v@/ | |||||
&) mobile m@bi:l | &) mobile m@bi:l | ||||
_) mocha moUk@ | _) mocha moUk@ | ||||
mono (ga m@n'0 | mono (ga m@n'0 | ||||
&) mouth (_ m@T | &) mouth (_ m@T | ||||
&) mouth (_$w_alt1 maUT | &) mouth (_$w_alt1 maUT | ||||
_) multi mVlti | _) multi mVlti | ||||
_) multi (pl m,VltI | |||||
_) multi (@@P5 m,VltI | _) multi (@@P5 m,VltI | ||||
.group mi | .group mi | ||||
Co) s (Er z | Co) s (Er z | ||||
Co) s (En z | Co) s (En z | ||||
e) s (d z | e) s (d z | ||||
aoi) s (e S // Irish, e.g. 'Laoise' | |||||
aoi) seach S@x // Irish, e.g. 'Taoiseach' | |||||
aoi) sigh Si // Irish, e.g. 'Taoisigh' | |||||
aoir) s (e S // Irish, e.g. 'Saoirse' | |||||
_) se (clu sI# | _) se (clu sI# | ||||
secur sI#kjU@ | secur sI#kjU@ | ||||
_) se (duc sI# | _) se (duc sI# | ||||
&) s (ic_ z | &) s (ic_ z | ||||
n) s (ic_ s | n) s (ic_ s | ||||
ss (ic s | ss (ic s | ||||
mu) s (e z | |||||
mu) s (ic z | mu) s (ic z | ||||
ea) s (ie z | ea) s (ie z | ||||
ea) s (il z | ea) s (il z | ||||
th (ill th | th (ill th | ||||
gh) th (A th | gh) th (A th | ||||
ee) thing DI2N | ee) thing DI2N | ||||
soo) th D | |||||
soo) th (say T | |||||
the (_ D | the (_ D | ||||
&) th (L03_ =T | &) th (L03_ =T | ||||
ou) thed (_ Dd | ou) thed (_ Dd |
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.133 | |||||
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 05-10-2017 | |||||
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.134 | |||||
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 06-24-2017 | |||||
//********* | //********* | ||||
// * This program is free software; you can redistribute it and/or modify * | // * This program is free software; you can redistribute it and/or modify * | ||||
// * it under the terms of the GNU General Public License as published by * | // * it under the terms of the GNU General Public License as published by * | ||||
آموخت Amuxt | آموخت Amuxt | ||||
آموزد Amuzad | آموزد Amuzad | ||||
آموزش AmuzeS | آموزش AmuzeS | ||||
آمپر AmpeR | |||||
آمپرمتر AmpeRmetR | آمپرمتر AmpeRmetR | ||||
آمپلیفایر AmpelifAjeR | آمپلیفایر AmpelifAjeR | ||||
آمپیریسم AmpiRism | آمپیریسم AmpiRism | ||||
درایه deRAje | درایه deRAje | ||||
درایو deRAjv | درایو deRAjv | ||||
درایور deRAjveR | درایور deRAjveR | ||||
درباره daR'bAReje: | |||||
دربازکن daRbAzkon | دربازکن daRbAzkon | ||||
دربدر daRbedaR | دربدر daRbedaR | ||||
دربندکشیده daRbandkeSide | دربندکشیده daRbandkeSide | ||||
شدیدا Sadidan | شدیدا Sadidan | ||||
شدیداللحن Sadidollahn | شدیداللحن Sadidollahn | ||||
شدیم Sodim | شدیم Sodim | ||||
شراادی SA:_d:jA:_R | |||||
شرافت SeRAfat | شرافت SeRAfat | ||||
شراپنل SeRApnel | شراپنل SeRApnel | ||||
شراکت SeRAkat | شراکت SeRAkat | ||||
شربت SaRbat | شربت SaRbat | ||||
شرت 'SoRt | شرت 'SoRt | ||||
شرتکات SoRtkAt | شرتکات SoRtkAt | ||||
شراادی SA:_d:jA:_R | |||||
شرشر SeRSeR | شرشر SeRSeR | ||||
شرطه SoRte: | شرطه SoRte: | ||||
شرعا SaR?an | شرعا SaR?an |
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.133 | |||||
// * This file writen by Shadyar Khodayari 05-10-2017 | |||||
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.134 | |||||
// * This file writen by Shadyar Khodayari 06-24-2017 | |||||
//********* | //********* | ||||
// * This program is free software; you can redistribute it and/or modify * | // * This program is free software; you can redistribute it and/or modify * | ||||
// * it under the terms of the GNU General Public License as published by * | // * it under the terms of the GNU General Public License as published by * | ||||
L09L09L09L09) م (L03L09L09L09_ ma | L09L09L09L09) م (L03L09L09L09_ ma | ||||
// Prefixes م | // Prefixes م | ||||
_) م (L03L09L03_$noprefixP1@ ma | |||||
_) م (L03L04L03_$noprefixP1@ ma | |||||
_) می (L03L09+$noprefixP2@ mi | _) می (L03L09+$noprefixP2@ mi | ||||
_) می (آL09L09$noprefixP2@ mi | _) می (آL09L09$noprefixP2@ mi | ||||
_) می (وL09L09$noprefixP2@ mi | _) می (وL09L09$noprefixP2@ mi |
.*.swp | .*.swp | ||||
*~ | |||||
# intermediate files: | # intermediate files: | ||||
* `data/espeak-ng` data files for eSpeak NG extended data. | * `data/espeak-ng` data files for eSpeak NG extended data. | ||||
* espeak-ng PropList property lookup as part of the `ucd_property` API. | * espeak-ng PropList property lookup as part of the `ucd_property` API. | ||||
## 9.0.0.1 - (In Progress) | |||||
## 10.0.0 - 2017-06-25 | |||||
* Add `iswblank` and `iswxdigit` compatibility. | * Add `iswblank` and `iswxdigit` compatibility. | ||||
* Improve ctype compatibility. | * Improve ctype compatibility. | ||||
* PropList property lookup. | |||||
* PropList and emoji-data property lookup. | |||||
* Support building with a C89 compiler. | |||||
* Update to Unicode Character Data 10.0.0. | |||||
* Unicode Emoji 5.0. | |||||
## 9.0.0 - 2016-12-28 | ## 9.0.0 - 2016-12-28 | ||||
############################# Unicode Data #################################### | ############################# Unicode Data #################################### | ||||
EMOJI_VERSION=4.0 | |||||
EMOJI_VERSION=5.0 | |||||
UCD_VERSION=@UCD_VERSION@ | UCD_VERSION=@UCD_VERSION@ | ||||
UCD_ROOTDIR=data/ucd | UCD_ROOTDIR=data/ucd | ||||
UCD_SRCDIR=http://www.unicode.org/Public | UCD_SRCDIR=http://www.unicode.org/Public | ||||
data/emoji/emoji-data.txt: | data/emoji/emoji-data.txt: | ||||
mkdir -pv data/emoji | mkdir -pv data/emoji | ||||
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@ | |||||
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@ | |||||
data/ucd/PropList.txt: | data/ucd/PropList.txt: | ||||
mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@ | |||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@ | |||||
data/ucd/DerivedCoreProperties.txt: | data/ucd/DerivedCoreProperties.txt: | ||||
mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@ | |||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@ | |||||
data/ucd/PropertyValueAliases.txt: | data/ucd/PropertyValueAliases.txt: | ||||
mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@ | |||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@ | |||||
data/ucd/Scripts.txt: | data/ucd/Scripts.txt: | ||||
mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@ | |||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@ | |||||
data/ucd/UnicodeData.txt: | data/ucd/UnicodeData.txt: | ||||
mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@ | |||||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@ | |||||
############################# documentation ################################### | ############################# documentation ################################### | ||||
AC_PREREQ([2.65]) | AC_PREREQ([2.65]) | ||||
AC_INIT([Unicode Character Database Tools], [9.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||||
AC_INIT([Unicode Character Database Tools], [10.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||||
AM_INIT_AUTOMAKE() | AM_INIT_AUTOMAKE() | ||||
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | ||||
dnl ================================================================ | dnl ================================================================ | ||||
AC_CHECK_HEADERS([stddef.h]) dnl C89 | AC_CHECK_HEADERS([stddef.h]) dnl C89 | ||||
AC_CHECK_FUNCS([iswblank]) dnl C99 | |||||
AC_TYPE_UINT8_T | AC_TYPE_UINT8_T | ||||
AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||
dnl ================================================================ | dnl ================================================================ | ||||
AC_ARG_WITH([unicode-version], | AC_ARG_WITH([unicode-version], | ||||
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=9.0.0@:>@])], | |||||
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=10.0.0@:>@])], | |||||
[AS_IF([test x"$withval" != x], | [AS_IF([test x"$withval" != x], | ||||
[UCD_VERSION="$withval"])], | [UCD_VERSION="$withval"])], | ||||
[UCD_VERSION="9.0.0"]) | |||||
[UCD_VERSION="10.0.0"]) | |||||
AC_SUBST(UCD_VERSION) | AC_SUBST(UCD_VERSION) | ||||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
*/ | */ | ||||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
*/ | |||||
#include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
#include <stddef.h> | #include <stddef.h> | ||||
// Unicode Character Data 9.0.0 | |||||
/* Unicode Character Data 9.0.0 */ | |||||
struct case_conversion_entry | struct case_conversion_entry | ||||
{ | { |
switch (ucd_lookup_category(c)) | switch (ucd_lookup_category(c)) | ||||
{ | { | ||||
case UCD_CATEGORY_Zs: | case UCD_CATEGORY_Zs: | ||||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||||
switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||||
{ | { | ||||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||||
case 0x2007: // U+2007 : FIGURE SPACE | |||||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||||
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||||
case 0x2007: /* U+2007 : FIGURE SPACE */ | |||||
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||||
return 0; | return 0; | ||||
} | } | ||||
return 1; | return 1; | ||||
case UCD_CATEGORY_Cc: | case UCD_CATEGORY_Cc: | ||||
return c == 0x09; // U+0009 : CHARACTER TABULATION | |||||
return c == 0x09; /* U+0009 : CHARACTER TABULATION */ | |||||
default: | default: | ||||
return 0; | return 0; | ||||
} | } | ||||
int ucd_isdigit(codepoint_t c) | int ucd_isdigit(codepoint_t c) | ||||
{ | { | ||||
return (c >= 0x30 && c <= 0x39); // [0-9] | |||||
return (c >= 0x30 && c <= 0x39); /* [0-9] */ | |||||
} | } | ||||
int ucd_isgraph(codepoint_t c) | int ucd_isgraph(codepoint_t c) | ||||
case UCD_CATEGORY_Zp: | case UCD_CATEGORY_Zp: | ||||
return 1; | return 1; | ||||
case UCD_CATEGORY_Zs: | case UCD_CATEGORY_Zs: | ||||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||||
switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||||
{ | { | ||||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||||
case 0x2007: // U+2007 : FIGURE SPACE | |||||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||||
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||||
case 0x2007: /* U+2007 : FIGURE SPACE */ | |||||
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||||
return 0; | return 0; | ||||
} | } | ||||
return 1; | return 1; | ||||
case UCD_CATEGORY_Cc: | case UCD_CATEGORY_Cc: | ||||
switch (c) // Include control characters marked as White_Space | |||||
switch (c) /* Include control characters marked as White_Space */ | |||||
{ | { | ||||
case 0x09: // U+0009 : CHARACTER TABULATION | |||||
case 0x0A: // U+000A : LINE FEED | |||||
case 0x0B: // U+000B : LINE TABULATION | |||||
case 0x0C: // U+000C : FORM FEED | |||||
case 0x0D: // U+000D : CARRIAGE RETURN | |||||
case 0x85: // U+0085 : NEXT LINE | |||||
case 0x09: /* U+0009 : CHARACTER TABULATION */ | |||||
case 0x0A: /* U+000A : LINE FEED */ | |||||
case 0x0B: /* U+000B : LINE TABULATION */ | |||||
case 0x0C: /* U+000C : FORM FEED */ | |||||
case 0x0D: /* U+000D : CARRIAGE RETURN */ | |||||
case 0x85: /* U+0085 : NEXT LINE */ | |||||
return 1; | return 1; | ||||
} | } | ||||
default: | default: | ||||
int ucd_isxdigit(codepoint_t c) | int ucd_isxdigit(codepoint_t c) | ||||
{ | { | ||||
return (c >= 0x30 && c <= 0x39) // [0-9] | |||||
|| (c >= 0x41 && c <= 0x46) // [A-Z] | |||||
|| (c >= 0x61 && c <= 0x66); // [a-z] | |||||
return (c >= 0x30 && c <= 0x39) /* [0-9] */ | |||||
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */ | |||||
|| (c >= 0x61 && c <= 0x66); /* [a-z] */ | |||||
} | } |
UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | ||||
UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | ||||
UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | ||||
UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||||
UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | ||||
UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | ||||
UCD_SCRIPT_Grek, /**< @brief Greek Script */ | UCD_SCRIPT_Grek, /**< @brief Greek Script */ | ||||
UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | ||||
UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | ||||
UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | ||||
UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||||
UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | ||||
UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | ||||
UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | ||||
UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | ||||
UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | ||||
UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | ||||
UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||||
UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | ||||
UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | ||||
UCD_SCRIPT_Zsym, /**< @brief Symbols */ | UCD_SCRIPT_Zsym, /**< @brief Symbols */ | ||||
#define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ | #define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ | ||||
#define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ | #define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ | ||||
#define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ | #define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ | ||||
#define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */ | |||||
#define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */ | |||||
// eSpeak NG extended properties: | // eSpeak NG extended properties: | ||||
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | ||||
Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | ||||
Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | ||||
Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | ||||
Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||||
Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | ||||
Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | ||||
Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ | Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ | ||||
Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | ||||
Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | ||||
Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | ||||
Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||||
Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | ||||
Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | ||||
Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | ||||
Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | ||||
Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | ||||
Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | ||||
Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||||
Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | ||||
Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | ||||
Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ | Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ | ||||
Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ | Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ | ||||
Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ | Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ | ||||
Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ | Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ | ||||
Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */ | |||||
Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */ | |||||
}; | }; | ||||
/** @brief Return the properties of the specified codepoint. | /** @brief Return the properties of the specified codepoint. |
case 0x2000: | case 0x2000: | ||||
if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; | if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; | ||||
break; | break; | ||||
case 0x2300: | |||||
if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
break; | |||||
case 0x2400: | case 0x2400: | ||||
if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
break; | break; | ||||
case 0x2E00: | case 0x2E00: | ||||
if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | ||||
break; | break; | ||||
case 0x0300: | case 0x0300: | ||||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x0400: | case 0x0400: | ||||
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | ||||
break; | break; | ||||
case 0x2100: | case 0x2100: | ||||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c == 0x2139) return UCD_PROPERTY_EMOJI; | if (c == 0x2139) return UCD_PROPERTY_EMOJI; | ||||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
break; | break; | ||||
case 0xFF00: | case 0xFF00: | ||||
break; | break; | ||||
case 0x01D400: | case 0x01D400: | ||||
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x01D500: | case 0x01D500: | ||||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x01D600: | case 0x01D600: | ||||
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | ||||
break; | break; | ||||
case 0x01D700: | case 0x01D700: | ||||
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
} | } | ||||
return 0; | return 0; | ||||
break; | break; | ||||
case 0x016F00: | case 0x016F00: | ||||
if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; | ||||
if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER; | |||||
if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER; | |||||
break; | break; | ||||
} | } | ||||
return 0; | return 0; | ||||
{ | { | ||||
case 0x000000: | case 0x000000: | ||||
if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||||
if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||||
if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
break; | break; | ||||
case 0x010000: | case 0x010000: | ||||
if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC; | |||||
break; | break; | ||||
case 0x020000: | case 0x020000: | ||||
if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||||
if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
break; | break; | ||||
} | } | ||||
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | ||||
break; | break; | ||||
case 0x0300: | case 0x0300: | ||||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0xFF00: | case 0xFF00: | ||||
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | ||||
case 0x2100: | case 0x2100: | ||||
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x01D400: | case 0x01D400: | ||||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x01D500: | case 0x01D500: | ||||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x01D600: | case 0x01D600: | ||||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | ||||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
case 0x01D700: | case 0x01D700: | ||||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
break; | break; | ||||
} | } | ||||
return 0; | return 0; | ||||
if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; | if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; | ||||
if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC; | |||||
break; | break; | ||||
case 0x3000: | case 0x3000: | ||||
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; | if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; | ||||
if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
break; | break; | ||||
case 0x011A00: | |||||
if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
break; | |||||
case 0x011C00: | case 0x011C00: | ||||
if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; | if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; | ||||
if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC; | |||||
break; | break; | ||||
case 0x0B00: | case 0x0B00: | ||||
if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
break; | break; | ||||
case 0x0D00: | case 0x0D00: | ||||
if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC; | |||||
if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; | if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; | ||||
if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
case 0x1D00: | case 0x1D00: | ||||
if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; | ||||
if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC; | |||||
if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC; | |||||
if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; | ||||
break; | break; | ||||
case 0x2000: | case 0x2000: | ||||
if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; | if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; | ||||
break; | break; | ||||
case 0x011A00: | |||||
if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC; | |||||
if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC; | |||||
if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011A98) return UCD_PROPERTY_EXTENDER; | |||||
if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC; | |||||
break; | |||||
case 0x011C00: | case 0x011C00: | ||||
if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
break; | break; | ||||
case 0x011D00: | |||||
if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC; | |||||
if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC; | |||||
if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
break; | |||||
case 0x016A00: | case 0x016A00: | ||||
if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; | ||||
break; | break; | ||||
switch (c & 0xFFFFFF00) | switch (c & 0xFFFFFF00) | ||||
{ | { | ||||
case 0x0000: | case 0x0000: | ||||
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI; | |||||
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
break; | break; | ||||
case 0xFF00: | case 0xFF00: | ||||
if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; | if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; | ||||
break; | break; | ||||
case 0x2700: | case 0x2700: | ||||
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||||
return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
case 0x2900: | case 0x2900: | ||||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||||
case 0x2E00: | case 0x2E00: | ||||
return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
case 0x3000: | case 0x3000: | ||||
case 0x0000: | case 0x0000: | ||||
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; | if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; | ||||
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||||
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||||
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; | if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; | ||||
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; | if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; | ||||
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | ||||
case 0x11700: | case 0x11700: | ||||
if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | ||||
break; | break; | ||||
case 0x11A00: | |||||
if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||||
if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||||
if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | |||||
break; | |||||
case 0x11C00: | case 0x11C00: | ||||
if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | ||||
if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | ||||
break; | break; | ||||
case 0x2700: | case 0x2700: | ||||
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||||
return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
case 0x2900: | case 0x2900: | ||||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; | if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; | ||||
break; | break; | ||||
case 0x01F300: | case 0x01F300: | ||||
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER; | |||||
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
} | } | ||||
return 0; | return 0; | ||||
} | } | ||||
if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | ||||
if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | ||||
if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
break; | break; | ||||
case 0x2400: | case 0x2400: | ||||
if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; | if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; | ||||
if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
break; | break; | ||||
case 0x01F200: | case 0x01F200: | ||||
if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; | if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; | ||||
if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; | if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; | ||||
if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
break; | break; | ||||
case 0x01F900: | case 0x01F900: | ||||
if (c <= 0x01F90B) return 0; | |||||
if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | ||||
if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | ||||
if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
if (c == 0x01F93B) return 0; | if (c == 0x01F93B) return 0; | ||||
if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | ||||
if (c == 0x01F946) return 0; | if (c == 0x01F946) return 0; | ||||
if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
return UCD_PROPERTY_EMOJI; | return UCD_PROPERTY_EMOJI; | ||||
} | } | ||||
return 0; | return 0; | ||||
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | ||||
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; | case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; | ||||
case UCD_CATEGORY_Zs: return properties_Zs(c); | case UCD_CATEGORY_Zs: return properties_Zs(c); | ||||
default: return 0; // Co Cs Ii Lt Me | |||||
default: return 0; /* Co Cs Ii Lt Me */ | |||||
}; | }; | ||||
} | } |
"Geok", | "Geok", | ||||
"Geor", | "Geor", | ||||
"Glag", | "Glag", | ||||
"Gonm", | |||||
"Goth", | "Goth", | ||||
"Gran", | "Gran", | ||||
"Grek", | "Grek", | ||||
"Sind", | "Sind", | ||||
"Sinh", | "Sinh", | ||||
"Sora", | "Sora", | ||||
"Soyo", | |||||
"Sund", | "Sund", | ||||
"Sylo", | "Sylo", | ||||
"Syrc", | "Syrc", | ||||
"Xpeo", | "Xpeo", | ||||
"Xsux", | "Xsux", | ||||
"Yiii", | "Yiii", | ||||
"Zanb", | |||||
"Zinh", | "Zinh", | ||||
"Zmth", | "Zmth", | ||||
"Zsym", | "Zsym", |
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
*/ | */ | ||||
#include "config.h" | |||||
#include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
#include <locale.h> | #include <locale.h> | ||||
#include <wchar.h> | #include <wchar.h> | ||||
#include <wctype.h> | #include <wctype.h> | ||||
#ifndef HAVE_ISWBLANK | |||||
static int iswblank(wint_t c) | |||||
{ | |||||
return iswspace(c) && !(c >= 0x0A && c <= 0x0D); | |||||
} | |||||
#endif | |||||
void fput_utf8c(FILE *out, codepoint_t c) | void fput_utf8c(FILE *out, codepoint_t c) | ||||
{ | { | ||||
if (c < 0x80) | if (c < 0x80) | ||||
{ | { | ||||
switch (mode) | switch (mode) | ||||
{ | { | ||||
case 'c': // character | |||||
case 'c': /* character */ | |||||
switch (c) | switch (c) | ||||
{ | { | ||||
case '\t': fputs("\\t", out); break; | case '\t': fputs("\\t", out); break; | ||||
default: fput_utf8c(out, c); break; | default: fput_utf8c(out, c); break; | ||||
} | } | ||||
break; | break; | ||||
case 'h': // hexadecimal (lower) | |||||
case 'h': /* hexadecimal (lower) */ | |||||
fprintf(out, "%06x", c); | fprintf(out, "%06x", c); | ||||
break; | break; | ||||
case 'H': // hexadecimal (upper) | |||||
case 'H': /* hexadecimal (upper) */ | |||||
fprintf(out, "%06X", c); | fprintf(out, "%06X", c); | ||||
break; | break; | ||||
} | } | ||||
{ | { | ||||
switch (mode) | switch (mode) | ||||
{ | { | ||||
case 'A': // alpha-numeric | |||||
case 'A': /* alpha-numeric */ | |||||
fputc(iswalnum(c) ? '1' : '0', out); | fputc(iswalnum(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'a': // alpha | |||||
case 'a': /* alpha */ | |||||
fputc(iswalpha(c) ? '1' : '0', out); | fputc(iswalpha(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'b': // blank | |||||
case 'b': /* blank */ | |||||
fputc(iswblank(c) ? '1' : '0', out); | fputc(iswblank(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'c': // control | |||||
case 'c': /* control */ | |||||
fputc(iswcntrl(c) ? '1' : '0', out); | fputc(iswcntrl(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'd': // numeric | |||||
case 'd': /* numeric */ | |||||
fputc(iswdigit(c) ? '1' : '0', out); | fputc(iswdigit(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'g': // glyph | |||||
case 'g': /* glyph */ | |||||
fputc(iswgraph(c) ? '1' : '0', out); | fputc(iswgraph(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'l': // lower case | |||||
case 'l': /* lower case */ | |||||
fputc(iswlower(c) ? '1' : '0', out); | fputc(iswlower(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'P': // printable | |||||
case 'P': /* printable */ | |||||
fputc(iswprint(c) ? '1' : '0', out); | fputc(iswprint(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'p': // punctuation | |||||
case 'p': /* punctuation */ | |||||
fputc(iswpunct(c) ? '1' : '0', out); | fputc(iswpunct(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 's': // whitespace | |||||
case 's': /* whitespace */ | |||||
fputc(iswspace(c) ? '1' : '0', out); | fputc(iswspace(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'u': // upper case | |||||
case 'u': /* upper case */ | |||||
fputc(iswupper(c) ? '1' : '0', out); | fputc(iswupper(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'x': // xdigit | |||||
case 'x': /* xdigit */ | |||||
fputc(iswxdigit(c) ? '1' : '0', out); | fputc(iswxdigit(c) ? '1' : '0', out); | ||||
break; | break; | ||||
} | } | ||||
case '%': | case '%': | ||||
switch (*++format) | switch (*++format) | ||||
{ | { | ||||
case 'c': // category | |||||
case 'c': /* category */ | |||||
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | ||||
break; | break; | ||||
case 'C': // category group | |||||
case 'C': /* category group */ | |||||
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | ||||
break; | break; | ||||
case 'p': // codepoint | |||||
case 'p': /* codepoint */ | |||||
uprintf_codepoint(out, c, *++format); | uprintf_codepoint(out, c, *++format); | ||||
break; | break; | ||||
case 'P': // properties | |||||
case 'P': /* properties */ | |||||
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | ||||
break; | break; | ||||
case 'i': // is* | |||||
case 'i': /* is* */ | |||||
uprintf_is(out, c, *++format); | uprintf_is(out, c, *++format); | ||||
break; | break; | ||||
case 'L': // lowercase | |||||
case 'L': /* lowercase */ | |||||
uprintf_codepoint(out, towlower(c), *++format); | uprintf_codepoint(out, towlower(c), *++format); | ||||
break; | break; | ||||
case 's': // script | |||||
case 's': /* script */ | |||||
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | ||||
break; | break; | ||||
case 'T': // titlecase | |||||
case 'T': /* titlecase */ | |||||
uprintf_codepoint(out, ucd_totitle(c), *++format); | uprintf_codepoint(out, ucd_totitle(c), *++format); | ||||
break; | break; | ||||
case 'U': // uppercase | |||||
case 'U': /* uppercase */ | |||||
uprintf_codepoint(out, towupper(c), *++format); | uprintf_codepoint(out, towupper(c), *++format); | ||||
break; | break; | ||||
} | } | ||||
{ | { | ||||
FILE *in = NULL; | FILE *in = NULL; | ||||
const char *format = NULL; | const char *format = NULL; | ||||
for (int argn = 1; argn != argc; ++argn) | |||||
int argn; | |||||
for (argn = 1; argn != argc; ++argn) | |||||
{ | { | ||||
const char *arg = argv[argn]; | const char *arg = argv[argn]; | ||||
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||||
codepoint_t c; | |||||
for (c = 0; c <= 0x10FFFF; ++c) | |||||
uprintf(stdout, c, format ? format : | uprintf(stdout, c, format ? format : | ||||
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | ||||
} | } |
{ | { | ||||
switch (mode) | switch (mode) | ||||
{ | { | ||||
case 'c': // character | |||||
case 'c': /* character */ | |||||
switch (c) | switch (c) | ||||
{ | { | ||||
case '\t': fputs("\\t", out); break; | case '\t': fputs("\\t", out); break; | ||||
default: fput_utf8c(out, c); break; | default: fput_utf8c(out, c); break; | ||||
} | } | ||||
break; | break; | ||||
case 'h': // hexadecimal (lower) | |||||
case 'h': /* hexadecimal (lower) */ | |||||
fprintf(out, "%06x", c); | fprintf(out, "%06x", c); | ||||
break; | break; | ||||
case 'H': // hexadecimal (upper) | |||||
case 'H': /* hexadecimal (upper) */ | |||||
fprintf(out, "%06X", c); | fprintf(out, "%06X", c); | ||||
break; | break; | ||||
} | } | ||||
{ | { | ||||
switch (mode) | switch (mode) | ||||
{ | { | ||||
case 'A': // alpha-numeric | |||||
case 'A': /* alpha-numeric */ | |||||
fputc(ucd_isalnum(c) ? '1' : '0', out); | fputc(ucd_isalnum(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'a': // alpha | |||||
case 'a': /* alpha */ | |||||
fputc(ucd_isalpha(c) ? '1' : '0', out); | fputc(ucd_isalpha(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'b': // blank | |||||
case 'b': /* blank */ | |||||
fputc(ucd_isblank(c) ? '1' : '0', out); | fputc(ucd_isblank(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'c': // control | |||||
case 'c': /* control */ | |||||
fputc(ucd_iscntrl(c) ? '1' : '0', out); | fputc(ucd_iscntrl(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'd': // numeric | |||||
case 'd': /* numeric */ | |||||
fputc(ucd_isdigit(c) ? '1' : '0', out); | fputc(ucd_isdigit(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'g': // glyph | |||||
case 'g': /* glyph */ | |||||
fputc(ucd_isgraph(c) ? '1' : '0', out); | fputc(ucd_isgraph(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'l': // lower case | |||||
case 'l': /* lower case */ | |||||
fputc(ucd_islower(c) ? '1' : '0', out); | fputc(ucd_islower(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'P': // printable | |||||
case 'P': /* printable */ | |||||
fputc(ucd_isprint(c) ? '1' : '0', out); | fputc(ucd_isprint(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'p': // punctuation | |||||
case 'p': /* punctuation */ | |||||
fputc(ucd_ispunct(c) ? '1' : '0', out); | fputc(ucd_ispunct(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 's': // whitespace | |||||
case 's': /* whitespace */ | |||||
fputc(ucd_isspace(c) ? '1' : '0', out); | fputc(ucd_isspace(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'u': // upper case | |||||
case 'u': /* upper case */ | |||||
fputc(ucd_isupper(c) ? '1' : '0', out); | fputc(ucd_isupper(c) ? '1' : '0', out); | ||||
break; | break; | ||||
case 'x': // xdigit | |||||
case 'x': /* xdigit */ | |||||
fputc(ucd_isxdigit(c) ? '1' : '0', out); | fputc(ucd_isxdigit(c) ? '1' : '0', out); | ||||
break; | break; | ||||
} | } | ||||
case '%': | case '%': | ||||
switch (*++format) | switch (*++format) | ||||
{ | { | ||||
case 'c': // category | |||||
case 'c': /* category */ | |||||
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | ||||
break; | break; | ||||
case 'C': // category group | |||||
case 'C': /* category group */ | |||||
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | ||||
break; | break; | ||||
case 'p': // codepoint | |||||
case 'p': /* codepoint */ | |||||
uprintf_codepoint(out, c, *++format); | uprintf_codepoint(out, c, *++format); | ||||
break; | break; | ||||
case 'P': // properties | |||||
case 'P': /* properties */ | |||||
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | ||||
break; | break; | ||||
case 'i': // is* | |||||
case 'i': /* is* */ | |||||
uprintf_is(out, c, *++format); | uprintf_is(out, c, *++format); | ||||
break; | break; | ||||
case 'L': // lowercase | |||||
case 'L': /* lowercase */ | |||||
uprintf_codepoint(out, ucd_tolower(c), *++format); | uprintf_codepoint(out, ucd_tolower(c), *++format); | ||||
break; | break; | ||||
case 's': // script | |||||
case 's': /* script */ | |||||
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | ||||
break; | break; | ||||
case 'T': // titlecase | |||||
case 'T': /* titlecase */ | |||||
uprintf_codepoint(out, ucd_totitle(c), *++format); | uprintf_codepoint(out, ucd_totitle(c), *++format); | ||||
break; | break; | ||||
case 'U': // uppercase | |||||
case 'U': /* uppercase */ | |||||
uprintf_codepoint(out, ucd_toupper(c), *++format); | uprintf_codepoint(out, ucd_toupper(c), *++format); | ||||
break; | break; | ||||
} | } | ||||
{ | { | ||||
FILE *in = NULL; | FILE *in = NULL; | ||||
const char *format = NULL; | const char *format = NULL; | ||||
for (int argn = 1; argn != argc; ++argn) | |||||
int argn; | |||||
for (argn = 1; argn != argc; ++argn) | |||||
{ | { | ||||
const char *arg = argv[argn]; | const char *arg = argv[argn]; | ||||
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | ||||
} | } | ||||
else | else | ||||
{ | { | ||||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||||
codepoint_t c; | |||||
for (c = 0; c <= 0x10FFFF; ++c) | |||||
uprintf(stdout, c, format ? format : | uprintf(stdout, c, format ? format : | ||||
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | ||||
} | } |
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
*/ | */ | ||||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
*/ | |||||
#include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
#include <stddef.h> | #include <stddef.h> | ||||
// Unicode Character Data %s | |||||
/* Unicode Character Data %s */ | |||||
struct case_conversion_entry | struct case_conversion_entry | ||||
{ | { |
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
*/ | */ | ||||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
*/ | |||||
#include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
#define Zs UCD_CATEGORY_Zs | #define Zs UCD_CATEGORY_Zs | ||||
#define Ii UCD_CATEGORY_Ii | #define Ii UCD_CATEGORY_Ii | ||||
// Unicode Character Data %s | |||||
/* Unicode Character Data %s */ | |||||
""" % ucd_version) | """ % ucd_version) | ||||
for category in special_categories: | for category in special_categories: | ||||
sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
for codepoint, table in sorted(category_tables[table_index].items()): | for codepoint, table in sorted(category_tables[table_index].items()): | ||||
if isinstance(table, str): | if isinstance(table, str): | ||||
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) | |||||
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint)) | |||||
else: | else: | ||||
sys.stdout.write('\tcategories_%s,\n' % codepoint) | sys.stdout.write('\tcategories_%s,\n' % codepoint) | ||||
sys.stdout.write('};\n') | sys.stdout.write('};\n') | ||||
sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
for codepoints, category, comment in category_sets: | for codepoints, category, comment in category_sets: | ||||
if category: | if category: | ||||
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment)) | |||||
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment)) | |||||
else: | else: | ||||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||||
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||||
sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | ||||
sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||||
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n') | |||||
sys.stdout.write('}\n') | sys.stdout.write('}\n') | ||||
sys.stdout.write(""" | sys.stdout.write(""" |
props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data | props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data | ||||
props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data | props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data | ||||
props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data | props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data | ||||
props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0 | |||||
props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0 | |||||
# eSpeak NG extended properties: | # eSpeak NG extended properties: | ||||
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | ||||
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) | props += (2 ** 53) * data.get('Punctuation_In_Word', 0) |
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
*/ | */ | ||||
// NOTE: This file is automatically generated from the Scripts.txt file in | |||||
// the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||||
/* NOTE: This file is automatically generated from the Scripts.txt file in | |||||
* the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||||
*/ | |||||
#include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
#define Geok UCD_SCRIPT_Geok | #define Geok UCD_SCRIPT_Geok | ||||
#define Geor UCD_SCRIPT_Geor | #define Geor UCD_SCRIPT_Geor | ||||
#define Glag UCD_SCRIPT_Glag | #define Glag UCD_SCRIPT_Glag | ||||
#define Gonm UCD_SCRIPT_Gonm | |||||
#define Goth UCD_SCRIPT_Goth | #define Goth UCD_SCRIPT_Goth | ||||
#define Gran UCD_SCRIPT_Gran | #define Gran UCD_SCRIPT_Gran | ||||
#define Grek UCD_SCRIPT_Grek | #define Grek UCD_SCRIPT_Grek | ||||
#define Sind UCD_SCRIPT_Sind | #define Sind UCD_SCRIPT_Sind | ||||
#define Sinh UCD_SCRIPT_Sinh | #define Sinh UCD_SCRIPT_Sinh | ||||
#define Sora UCD_SCRIPT_Sora | #define Sora UCD_SCRIPT_Sora | ||||
#define Soyo UCD_SCRIPT_Soyo | |||||
#define Sund UCD_SCRIPT_Sund | #define Sund UCD_SCRIPT_Sund | ||||
#define Sylo UCD_SCRIPT_Sylo | #define Sylo UCD_SCRIPT_Sylo | ||||
#define Syrc UCD_SCRIPT_Syrc | #define Syrc UCD_SCRIPT_Syrc | ||||
#define Xpeo UCD_SCRIPT_Xpeo | #define Xpeo UCD_SCRIPT_Xpeo | ||||
#define Xsux UCD_SCRIPT_Xsux | #define Xsux UCD_SCRIPT_Xsux | ||||
#define Yiii UCD_SCRIPT_Yiii | #define Yiii UCD_SCRIPT_Yiii | ||||
#define Zanb UCD_SCRIPT_Zanb | |||||
#define Zinh UCD_SCRIPT_Zinh | #define Zinh UCD_SCRIPT_Zinh | ||||
#define Zmth UCD_SCRIPT_Zmth | #define Zmth UCD_SCRIPT_Zmth | ||||
#define Zsym UCD_SCRIPT_Zsym | #define Zsym UCD_SCRIPT_Zsym | ||||
#define Zyyy UCD_SCRIPT_Zyyy | #define Zyyy UCD_SCRIPT_Zyyy | ||||
#define Zzzz UCD_SCRIPT_Zzzz | #define Zzzz UCD_SCRIPT_Zzzz | ||||
// Unicode Character Data %s | |||||
/* Unicode Character Data %s */ | |||||
""" % ucd_version) | """ % ucd_version) | ||||
for script in special_scripts: | for script in special_scripts: | ||||
sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
for codepoint, table in sorted(script_tables[table_index].items()): | for codepoint, table in sorted(script_tables[table_index].items()): | ||||
if isinstance(table, str): | if isinstance(table, str): | ||||
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint)) | |||||
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint)) | |||||
else: | else: | ||||
sys.stdout.write('\tscripts_%s,\n' % codepoint) | sys.stdout.write('\tscripts_%s,\n' % codepoint) | ||||
sys.stdout.write('};\n') | sys.stdout.write('};\n') | ||||
sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
for codepoints, script, comment in script_sets: | for codepoints, script, comment in script_sets: | ||||
if script: | if script: | ||||
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment)) | |||||
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment)) | |||||
else: | else: | ||||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||||
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||||
sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | ||||
sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n') | |||||
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n') | |||||
sys.stdout.write('}\n') | sys.stdout.write('}\n') |