Valdis Vitolins 8 years ago
parent
commit
fb64332f66

+ 1
- 1
android/jni/Android.mk View File



# ucd-tools wide-character compatibility support: # ucd-tools wide-character compatibility support:


UCDTOOLS_SRC_PATH := ../../ucd-tools/src
UCDTOOLS_SRC_PATH := ../../src/ucd-tools/src
UCDTOOLS_SRC_FILES := \ UCDTOOLS_SRC_FILES := \
$(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) $(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*))



+ 27
- 1
dictsource/af_list View File

alaska al'aska alaska al'aska
albanië alb'A:ne@:@- albanië alb'A:ne@:@-
algerië alx2'e@re@:@- algerië alx2'e@re@:@-
alhambra al'ambra
amanzimtoti $4 amanzimtoti $4
antwerpen antv&rp@n antwerpen antv&rp@n
avignon _^_FR avignon _^_FR
oklahoma @Ukl@h'@Uma oklahoma @Ukl@h'@Uma
outeniekwa @Ut@n'ikwa outeniekwa @Ut@n'ikwa
oxford _^_EN oxford _^_EN
paardeneiland pA:rd@n_'eIlant
palermo pal'&rmu palermo pal'&rmu
pelindaba p&l@nd'A:ba pelindaba p&l@nd'A:ba
perú p@ru perú p@ru
potchefstroom pOtSIfstr'o@m potchefstroom pOtSIfstr'o@m
rhône _^_FR rhône _^_FR
richardsbaai ritS@dsb'AI richardsbaai ritS@dsb'AI
riversdal r@v@rsdal
riviersonderend r@fi:rsOn@r_'Ent riviersonderend r@fi:rsOn@r_'Ent
robertson _^_EN robertson _^_EN
salvador salvadO:r salvador salvadO:r
jane _^_EN jane _^_EN
janine dZ@ni:n janine dZ@ni:n
Jean ZA~n $capital Jean ZA~n $capital
jeff _^_EN
jesebel je@s@b&l jesebel je@s@b&l
jessica _^_EN jessica _^_EN
jimmy _^_EN jimmy _^_EN
stuart _^_EN stuart _^_EN
suzanne suz'A:n suzanne suz'A:n
suzette suz'Et suzette suz'Et
sylvia _^_EN
tania tanja tania tanja
telemann te@l@man telemann te@l@man
terblanche t@rblA:nS terblanche t@rblA:nS
thelma _^_EN
theo tiu theo tiu
theron tr'On theron tr'On
thessalonicense tEsalo@nis'E:ns@ thessalonicense tEsalo@nis'E:ns@
viviers v@v@je@ viviers v@v@je@
wilhelm v@lh&l@-m wilhelm v@lh&l@-m
williston _^_EN williston _^_EN
woltemade vOlt@mA:d@
khumalo kum'A:lu khumalo kum'A:lu
zuma zu:ma zuma zu:ma


antares $2 antares $2
beatles _^_EN beatles _^_EN
boeing _^_EN boeing _^_EN
cadillac _^_EN
checkers tSEk@rs checkers tSEk@rs
chevrolet _^_FR chevrolet _^_FR
chrysler kraIsl@r chrysler kraIsl@r
forma _^_LA forma _^_LA
grata _^_LA grata _^_LA
habitatio _^_LA habitatio _^_LA
inclusio _^_LA
inclusio _^_LA
(in camera) @n||kam@ra
(in debiti) _^_LA (in debiti) _^_LA
(in absentia) _^_LA (in absentia) _^_LA
(in extremis) _^_LA (in extremis) _^_LA
// main word list // main word list


aangaande $2 aangaande $2
aanmerklik $2
aanstaande $2 aanstaande $2
(a cappella) a||kap'&la (a cappella) a||kap'&la
adagio ad'A:dZi;%@U adagio ad'A:dZi;%@U
alge alx2@ alge alx2@
algehele alx2@h,e@l@ algehele alx2@h,e@l@
allegro al'Egru allegro al'Egru
allengs alENs
allergene $3 allergene $3
almiskie $3 almiskie $3
alom al_'Om alom al_'Om


babelaas bab@lA:s babelaas bab@lA:s
barrikade $3 barrikade $3
beaming b@_A:m@N
bedewete be@d@ve@t@ bedewete be@d@ve@t@
beide beId@ beide beId@
bekaf b&kaf bekaf b&kaf
belangriker b@laNr@k,Ir belangriker b@laNr@k,Ir
bene be@n@ bene be@n@
beringde b@rINd@
beswil bEsv@l beswil bEsv@l
besnedene b@sne@d@n@ besnedene b@sne@d@n@
bestes bEst@s bestes bEst@s
bewebeen be@v@be@n bewebeen be@v@be@n
bewend be@v@nt bewend be@v@nt
bewering b@ve@rIN bewering b@ve@rIN
bilharzia b@lharsia
biopsie bi'Opsi biopsie bi'Opsi
bomaat bo@mA:t bomaat bo@mA:t
bordegoed bO:rd@x2ut bordegoed bO:rd@x2ut
charisma kar'Isma charisma kar'Isma
cinsaut s@nso@ cinsaut s@nso@
cliché kliS'eI: cliché kliS'eI:
clientèle _^_FR
clivia klIvija clivia klIvija
cognac kOn^ak cognac kOn^ak
confetti $2 confetti $2
déjà _^_FR déjà _^_FR
dekade dEk'A:d@ dekade dEk'A:d@
dekreling dEkre@l@N dekreling dEkre@l@N
demensie d@me~nsi
deurentyd dy@r@nteIt deurentyd dy@r@nteIt
deurgaans $1 deurgaans $1
deurkruis $2 deurkruis $2
exodus Eks'o@dWs exodus Eks'o@dWs


factotum $2 factotum $2
faktotum $2
fakture $2 fakture $2
fetakaas fEtakA:s fetakaas fEtakA:s
figuur f@x2yr figuur f@x2yr
filippense f@l@pEns@ filippense f@l@pEns@
finalis $3 finalis $3
finaliste $3 finaliste $3
fluktuasie $3
fort fOrt fort fOrt
forte fOrt@ forte fOrt@


helaas he@l'A:s helaas he@l'A:s
here he@r@ here he@r@
herero hEr'E:ru herero hEr'E:ru
herontmoeting h&r_Ontmut@N
herrysenis h&r'eIs@n@s herrysenis h&r'eIs@n@s
hierso hi:rsO hierso hi:rsO
hippie _^_EN hippie _^_EN
inkluis $2 inkluis $2
innestel InnEst@l innestel InnEst@l
insomnia @nsOmnija insomnia @nsOmnija
inteling Inte@l@N
intens @nt'Ens intens @nt'Ens
intensiteit $4 intensiteit $4
ironieë irun'i:@ ironieë irun'i:@
kennisvaardig $1 kennisvaardig $1
kimono $2 kimono $2
klaasvakie $2 klaasvakie $2
kliënteel $3
klimeid klImeIt klimeid klImeIt
knapsekêrel knaps@k&:r@l knapsekêrel knaps@k&:r@l
kolossense kOl@s'e~ns@ kolossense kOl@s'e~ns@
korswel kOrsv@l korswel kOrsv@l
kotiljons kOt@lj'o~ns kotiljons kOt@lj'o~ns
kritiek krIt'ik kritiek krIt'ik
kruisteling krYyste@l@N
kulture $2 kulture $2
kunsmatige kWnsm'A:t@x2@ kunsmatige kWnsm'A:t@x2@
kwansuis $2 kwansuis $2
kweekwal kwe@kval kweekwal kwe@kval


landswye lantsveI@
lasagne las'anj@ lasagne las'anj@
legaat l@x2A:t legaat l@x2A:t
legate l@x2A:t@ legate l@x2A:t@
sonure sOn_yr@ sonure sOn_yr@
sover so@f&r sover so@f&r
staccato $2 staccato $2
steekhoudendheid $2
sterwens st&rv@ns sterwens st&rv@ns
stilswye $1 stilswye $1
strydros streIt_rOs strydros streIt_rOs
thula tu:la thula tu:la
toegee tux2e@ toegee tux2e@
toereken ture@k@n toereken ture@k@n
toleransie tOl@r'ansi
tornado $2 tornado $2
totale tut'A:l@ totale tut'A:l@
totsiens $2 totsiens $2
uitdaging YydA:x2@N uitdaging YydA:x2@N
uiteraard Yyt@r_'A:rt uiteraard Yyt@r_'A:rt
uitermate $3 uitermate $3
uitgeslotene Yytx2@slo@t@n@
uniforme $3 uniforme $3


vaarwel fA:rv'&l vaarwel fA:rv'&l

+ 39
- 6
dictsource/af_rules View File

@@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. @@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc.
amarula %am%arul%a // fix stress and a sounds: amarula and compounds amarula %am%arul%a // fix stress and a sounds: amarula and compounds
a (ment %a // shorten a sound: perkament/testament and compounds a (ment %a // shorten a sound: perkament/testament and compounds
_) arendag (CAC %A:r@ntax2 // fix e sound: arendagtig/-e/-heid
arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes
argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e
a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria
attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché
attrib (u %atr@b // move default stress: attribuut/bute/attributêr attrib (u %atr@b // move default stress: attribuut/bute/attributêr
K) au @U // trauma/-ties/Aucamp/Paul/-a/-us K) au @U // trauma/-ties/Aucamp/Paul/-a/-us
auer aU@r //Sauer etc. fixed au and e sounds in compounds.
auer aU@r //Sauer etc. fix au and e sounds in compounds.
augustus %Ox2WstWs augustus %Ox2WstWs
avokado %af%ukA:du // stress and o sounds avokado %af%ukA:du // stress and o sounds
avokade %af%ukA:d@ // variant form of avokado avokade %af%ukA:d@ // variant form of avokado
bo (grond bo@ // fix 1st o sound: bogronds/-e bo (grond bo@ // fix 1st o sound: bogronds/-e
_) bo (kle bo@ // fix o sound: bokleed/boklere _) bo (kle bo@ // fix o sound: bokleed/boklere
bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie
bom (AA bOm_ // fix a sound, pause: bomaanval/-aanslag/-aard/-eenheid and compounds
bom (AA bOm_ // fix a sound, pause: bomaanval/-aard/-eenheid and compounds
bomaans (la bOm_A:ns // fix o sound: bomaanslag/-aanslae
bomaans bo@mA:ns // but fix O sound: bomaans/-e bomaans bo@mA:ns // but fix O sound: bomaans/-e
bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend
boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid
bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie
baken bA:k@n // fix e sound: afbakening and compounds baken bA:k@n // fix e sound: afbakening and compounds
bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë
balalaika b%al%alaIk%a // fix a sounds and stress: balalaika/-s/-musiek
_) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... _) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit...
ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit
band (Alier b,and // fix stress and d sound: band(e/o)lier/-e band (Alier b,and // fix stress and d sound: band(e/o)lier/-e
ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc.
basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar
basi (s bA:s@ // basis and compounds basi (s bA:s@ // basis and compounds
basilie b%asil%i // fix a sound and stress: basilie/-kruid, etc.
bastille b%asti:l // Bastille and compounds like Bastilledag bastille b%asti:l // Bastille and compounds like Bastilledag
batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds
ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk
bere (_ be@r@ //tediebere pandabere etc. bere (_ be@r@ //tediebere pandabere etc.
_) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom _) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom
beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s
beton (C@ b@tOn // split ng sounds: betongebou/-gietsel/-gruis
beton (inC@ b@t'On // fix O sound: betoninrigting/-ingenieur
be (weging b@ // draaibeweging/swaaibeweging be (weging b@ // draaibeweging/swaaibeweging
bewende be@v@nd@ // first e pronounced long bewende be@v@nd@ // first e pronounced long
bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid
dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant
diaken d%iA:k@n // diaken and compounds diaken d%iA:k@n // diaken and compounds
diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë
diende dind@ // fix last e sound in many compounds of bediende: bediendekamer
diens (willi d%ins // move default stress: dienswillig/-e/-heid diens (willi d%ins // move default stress: dienswillig/-e/-heid
dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e
digi (ta d%ix2%i // move default stress: digitaal/digitale digi (ta d%ix2%i // move default stress: digitaal/digitale
@C) e (reekK @ // fix e sound: duine-/rotsereeks @C) e (reekK @ // fix e sound: duine-/rotsereeks
aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers-
lat) eres @r@s //lateres lat) eres @r@s //lateres
@C) erm (K &r@-m // a catch-all for words ending in -erm(s)
fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc.
bloup) ers &:rs // fix e sound: bloupers bloup) ers &:rs // fix e sound: bloupers
iew) ers (ter @rs // fix e sound: (l)iewerster iew) ers (ter @rs // fix e sound: (l)iewerster
p) e (talje @ // fix stress and 1st e sound: petalje and compounds p) e (talje @ // fix stress and 1st e sound: petalje and compounds
l) e (moen @ //fix lemoen and compounds l) e (moen @ //fix lemoen and compounds
Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc.
effe (kleur Ef@ // fix stress and 2nd e sound: effekleur(ig/-e)
eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep
@C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds @C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds
@) ei (sen _'eI // (on)veeleisend/spoedeisend @) ei (sen _'eI // (on)veeleisend/spoedeisend
flu (we fl%y // move default stress: fluweel/fluwele and compounds flu (we fl%y // move default stress: fluweel/fluwele and compounds
_) fok (o fOk_ // fix o sound, insert short break: fokop/fokof _) fok (o fOk_ // fix o sound, insert short break: fokop/fokof
folio fo@li_u // fix o sounds: folio and compounds folio fo@li_u // fix o sounds: folio and compounds
fondsw fOntsv // fix v sound: fondswerwing & compounds/-waardes
fondue f%Ondy // fix stress and ue sound: fondue and compounds fondue f%Ondy // fix stress and ue sound: fondue and compounds
fone (tiek f%o@n@ // fix e sound: fonetiek and compounds fone (tiek f%o@n@ // fix e sound: fonetiek and compounds
fone (tie f%une@ // fix stress and o sound: foneties/-e fone (tie f%une@ // fix stress and o sound: foneties/-e
formi (da f%Orm%i // move default stress: formidabel/-e formi (da f%Orm%i // move default stress: formidabel/-e
formu (lier f%Orm%y // move default stress: formulier/-e/-boek formu (lier f%Orm%y // move default stress: formulier/-e/-boek
_) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C _) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C
forse (nd fOrs@ // fix e sound: forsend/-e
fos (fa f%Os // move default stress: fosfaat/fosfate fos (fa f%Os // move default stress: fosfaat/fosfate
fo (ssiel f%O // move default stress: fossiel/-e and compounds fo (ssiel f%O // move default stress: fossiel/-e and compounds
foto fo@tu foto fo@tu
_) gra (na x2r@ // granaat(boom)/granate _) gra (na x2r@ // granaat(boom)/granate
grandi (o x2r%and%i // move default stress: grandioos/-ose grandi (o x2r%and%i // move default stress: grandioos/-ose
_) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure _) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure
grenadella gr@n@d&la // fix stress and vowels: grenadella and compounds like -heining
griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc.
ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s
_) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs _) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs
gegesel x2@x2e@s@l // fix middle e sound gegesel x2@x2e@s@l // fix middle e sound
K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele
_) geler x2e@l@r // stress and 1st e sound: geler/gelerig _) geler x2e@l@r // stress and 1st e sound: geler/gelerig
_) gell x2&l
gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e
gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc.
gene (_ x2e@n@ // gene/diegene gene (_ x2e@n@ // gene/diegene
_) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. _) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc.
ni) ge (ri x2'e@ //nigeriese etc. ni) ge (ri x2'e@ //nigeriese etc.
gese (_ x2'e@s@ //Portugese, and others gese (_ x2'e@s@ //Portugese, and others
gesp (etjie x2Esp // fix e sounds: gespetjie/-s
gespe (_ x2Esp@ // gespe and compounds gespe (_ x2Esp@ // gespe and compounds
gespes (_ x2Esp@s // gespes and compounds gespes (_ x2Esp@s // gespes and compounds
gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid
hart (stogte_ h%art // move default stress: hartstogtelik hart (stogte_ h%art // move default stress: hartstogtelik
hart (stogte_N hart // restore default stress: hartstogte hart (stogte_N hart // restore default stress: hartstogte
ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling
_) hef (a hEf_ // fix e sound, insert break: hefapparaat/-arm(s)
hia (sint h%ij%a // fix stress: hiasint/-e and compounds hia (sint h%ij%a // fix stress: hiasint/-e and compounds
_) hi (bis h%i // move default stress: hibiskus/-se and compounds _) hi (bis h%i // move default stress: hibiskus/-se and compounds
hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e
_) idi (o %id%i // idioom/idiome/idioot _) idi (o %id%i // idioom/idiome/idioot
_) id (A %id // idille/idillies/ideëryk _) id (A %id // idille/idillies/ideëryk
&l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. &l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc.
&l) iker (_ @k@r // fix i sound: afstootliker/(ge)redeliker/onberispeliker
illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati
illu (si %ily // illusie/-s/illusief illu (si %ily // illusie/-s/illusief
illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie
_) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit _) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit
_) inbe (lC Inb%& // fix e sound in compounds like inbelprogram _) inbe (lC Inb%& // fix e sound in compounds like inbelprogram
indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese
s) in (gestel @n // split n g: compounds with ingesteldheid/winsingestelde
_) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives _) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives
invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der
investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende
krieketw krik@tv //krieket followed by w in compounds always v krieketw krik@tv //krieket followed by w in compounds always v


krokodi (l kr%Ok@dI // (wyfie)krokodil/-le krokodi (l kr%Ok@dI // (wyfie)krokodil/-le
kro (niek kr%u // fix stress and o sound: kroniek and many compounds
kruger kr'Y@@r kruger kr'Y@@r
ku (ba@ k%y // move default stress: kubaan/kubane ku (ba@ k%y // move default stress: kubaan/kubane
ku (biek k%y // move default stress: kubiek/-e/-getal ku (biek k%y // move default stress: kubiek/-e/-getal
_) kuber kyb@r // fixed e sound: kuberruim(te)
_) kuber kyb@r // fix e sound: kuberruim(te)
kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e
@) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar @) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar
kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda
kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds
kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds
ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder
kakao k%akA:w // fix stress and vowel sounds: kakao and many compounds
kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie
kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn
_) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds _) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds
kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives
kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel
kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant
kop (agtig k%Op_ // fix o sound, insert break: hamer-/spinnekop-/penkopagtig(e(s))
kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives
_) kor (dA k%Or // kordaat/kordon _) kor (dA k%Or // kordaat/kordon
ko (rint k%u // fix stress and o sound: korint/-e and compounds ko (rint k%u // fix stress and o sound: korint/-e and compounds
loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie
lo (ja l%u // fix stress and o sound: lojale/lojaliteit lo (ja l%u // fix stress and o sound: lojale/lojaliteit
lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc.
loke (t l%ukE // fix stress and vowel sounds: many compounds with loket
lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde
_) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier _) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier
_) losge (@P5 l'Osx2@ _) losge (@P5 l'Osx2@
medisyne m@d@seIn@ //medisyne and compounds medisyne m@d@seIn@ //medisyne and compounds
meganies m@x2'A:nis meganies m@x2'A:nis
_) meege (@P5 m'e@x2@ _) meege (@P5 m'e@x2@
_) meegewe (nd me@x2e@v@ // fix e sounds and stress: meegewend(e)
me (juf m@ // move default stress and shorten e sound me (juf m@ // move default stress and shorten e sound
me (laats m@ // fix stress and e sound: melaats/-e/-heid me (laats m@ // fix stress and e sound: melaats/-e/-heid
melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie
morf (otomie m%Orf // move default stress: morfotomie morf (otomie m%Orf // move default stress: morfotomie
_) morr (i mOr // restore default stress: morrig/morrie/-doring _) morr (i mOr // restore default stress: morrig/morrie/-doring
_) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig _) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig
mos (agtig mOs_ // fix o sound, insert break: (kos)mosagtig(e)
mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se
mosa (ïek m%o@s%a // move default stress: mosaïek and compounds mosa (ïek m%o@s%a // move default stress: mosaïek and compounds
mo (skee m%O // move default stress: moskee/-s and compounds mo (skee m%O // move default stress: moskee/-s and compounds
ne (anderCa n%i // move default stress: Neander(d/t)al/-ler ne (anderCa n%i // move default stress: Neander(d/t)al/-ler
neger (in n%e@x2@r // move default stress: negerin/-ne neger (in n%e@x2@r // move default stress: negerin/-ne
ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. ne (gosie n@ // fix stress and e sound: negosie/-ware, etc.
nek (_ n&k // catch-all for words ending in -nek: koedoe-/swaannek
nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai
nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc.
neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum
nood (lotti n%o@t // move default stress: noodlottig/-e nood (lotti n%o@t // move default stress: noodlottig/-e
nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak
nood (saak_N no@t // restore default stress: (ge)noodsaak nood (saak_N no@t // restore default stress: (ge)noodsaak
nooien (tjie noIN // remove the e sound: nooientjie(s) and compounds
noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e
nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale
nostal (gie_N n%Ost%al // stress on last slb.: nostalgie nostal (gie_N n%Ost%al // stress on last slb.: nostalgie
ooi oI ooi oI
ooy oI ooy oI
oodjie oIci oodjie oIci
CC) ool (A o@l_ // insert break: skooluur/-ure, steenkooluitvoer, but not: Karoolug
ootjie oIci ootjie oIci
oontjie oINki oontjie oINki
oondjie oINki oondjie oINki
oot (moedig %o@t // move default stress: ootmoedig/-e/-heid oot (moedig %o@t // move default stress: ootmoedig/-e/-heid


.group op .group op
_) opaal %o@pA:l // fix o sound, remove break: opaal and compounds
opaat up'A:t // homeopaat/psigopaat and similar opaat up'A:t // homeopaat/psigopaat and similar
opatie upat'i // homeopatie/neuropatie and similar opatie upat'i // homeopatie/neuropatie and similar
opaties up'A:tis // psigopaties/osteopaties and similar opaties up'A:tis // psigopaties/osteopaties and similar
pol (vy p%Ol // move default stress: polvy/-e and compounds pol (vy p%Ol // move default stress: polvy/-e and compounds
pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...)
_) pon (dok p%On // move default stress: pondok/-ke/-kie _) pon (dok p%On // move default stress: pondok/-ke/-kie
pop (agtig p%Op_ // fix o sound, insert break: popagtig(e) and compounds
popu ,pOpy popu ,pOpy
_) por (C %pOr //portret portaal etc. _) por (C %pOr //portret portaal etc.
por (ie p%o@r // move default stress: porie/-ë por (ie p%o@r // move default stress: porie/-ë
sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e
skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds
skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum
skim (agtig sk@m_ // fix i sound, insert break: skimagtig(e)
skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene
skle (rose skl@ // fix stress and e sound: sklerose and compounds skle (rose skl@ // fix stress and e sound: sklerose and compounds
_) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... _) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling...
_) su (meri s%u // fix stress and u sound: sumeries/-e _) su (meri s%u // fix stress and u sound: sumeries/-e
su (mmier s%W // move default stress: sumier/-e su (mmier s%W // move default stress: sumier/-e
super (A s''yp@r_ // fix stress, break in compounds like superintelligent super (A s''yp@r_ // fix stress, break in compounds like superintelligent
superi (A s%up%e@r%i // fix stress and vowel sounds: superieur/superioriteit
superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds
surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds
su (saC s%u // fix stress and u sound: susan/-na/susara su (saC s%u // fix stress and u sound: susan/-na/susara
toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar
toer (n t%ur // move default stress: compounds of toernooi toer (n t%ur // move default stress: compounds of toernooi
toere (_ tu:r@ // restore default stress toere (_ tu:r@ // restore default stress
toi (let t%OI // move default stress: toilet and compounds
toilet t%OIlEt // move default stress: toilet and compounds: toiletartikel/-emmer/-opsigter
tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds
tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc.
tombola t%Ombo@l%a // move default stress: tombola and compounds tombola t%Ombo@l%a // move default stress: tombola and compounds
@) toris (_ t'o@r@s // fix stress: pectoris/klitoris @) toris (_ t'o@r@s // fix stress: pectoris/klitoris
_) tor (nyn t%Or // move default stress: tornyn/-e and compounds _) tor (nyn t%Or // move default stress: tornyn/-e and compounds
_) tos (ka t%Os // move default stress: Toskaanse/Toskane _) tos (ka t%Os // move default stress: Toskaanse/Toskane
tser (tjie ts@r // fix e sound: (skoen)poetsertjie/weerkaatsertjie
ttel t@l // many compounds of bottel/skottelgoed ttel t@l // many compounds of bottel/skottelgoed
tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling
tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie
_) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid _) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid
vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid
vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer
va (sal v%a // fix stress and v and a sounds: vasal/-le
vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc.
vasste (l fast& // fix e sound: vasstel(ling/-lende) vasste (l fast& // fix e sound: vasstel(ling/-lende)
_) vat (A@ fat_ // fix a sound: vatafstand/-orgaan _) vat (A@ fat_ // fix a sound: vatafstand/-orgaan


ve (l f& // maagvel, stress on 1st slb ve (l f& // maagvel, stress on 1st slb
@) vel (A fe@l // aanbeveling/aanbevelingsbrief @) vel (A fe@l // aanbeveling/aanbevelingsbrief
vel (djie f&l // fix -djie sound: veldjie(s) and compounds
veld (C f&lt // fix d sound: veldreuk/-radio/-rantsoen, etc. veld (C f&lt // fix d sound: veldreuk/-radio/-rantsoen, etc.
veld (eks f&lt_ // fix d sound, pause: veldekskursie/-ekspedisie/-s veld (eks f&lt_ // fix d sound, pause: veldekskursie/-ekspedisie/-s
veld (o f&lt_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. veld (o f&lt_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc.
veld (t f&l // eliminate double t sound: veldtog and many compounds, Langeveldt, Springveldt
vele f'e@l@ vele f'e@l@
ven (detta v%En // fix stress and v sound: vendatta and compounds ven (detta v%En // fix stress and v sound: vendatta and compounds
ven (dusie f@n // fix stress and e sound: vendusie and compounds ven (dusie f@n // fix stress and e sound: vendusie and compounds
vol (kome f%Ol vol (kome f%Ol
vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O
volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid...
voll (engte fOlE
volle (ngte fOlE
vo (ller fO // exception to: vo (lC f%O vo (ller fO // exception to: vo (lC f%O
volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc.
vol (hou_ fOl // exception to: vo (lC f%O vol (hou_ fOl // exception to: vo (lC f%O

+ 9
- 10
dictsource/en_list View File

idly aIdlI idly aIdlI
idiocy IdI@si idiocy IdI@si
ifrog $alt6 ifrog $alt6
ignoramus Igno@r'eIm@s
ignoramus IgnO@r'eIm@s
illiterate $alt2 illiterate $alt2
illumine $alt2 illumine $alt2
imagery ImIdZri imagery ImIdZri
nonetheless nVnD@l'Es nonetheless nVnD@l'Es
nosedive noUzdaIv nosedive noUzdaIv
nosir noUs3: nosir noUs3:
?5 nosir noUsIR
not noUt // for noted, notable, etc not noUt // for noted, notable, etc
nots n0ts nots n0ts
(nôtre dame) noUtr@'dA:m (nôtre dame) noUtr@'dA:m
sinus saIn@s sinus saIn@s
siphon $alt2 siphon $alt2
sir s,3: $only sir s,3: $only
?5 sir s,VR $only
?5 sir s,IR $only
siren saIr@n siren saIr@n
site saIt // for sited site saIt // for sited
ski ski: ski ski:
son sVn son sVn
sonar soUnA@ sonar soUnA@
sonny sVnI sonny sVnI
sooth su:T $only
sopapilla soUp@p'i:@
sope soUpeI sope soUpeI
sorbet sO@beI
souffle su:fl'eI souffle su:fl'eI
soundbite saUndbaIt soundbite saUndbaIt
souvenir su:v@n'i@3 souvenir su:v@n'i@3
sopapilla soUp@p'i:@
sorbet sO@beI
soyabean sOI@bi:n soyabean sOI@bi:n
specific sp@sIfIk specific sp@sIfIk
specimen spEsI2m@n specimen spEsI2m@n
tamale ta#mA:li tamale ta#mA:li
tampon tamp0n tampon tamp0n
tangerine tandZ@r'i:n tangerine tandZ@r'i:n
taoiseach ti:S@x
tapestry tapI#stri tapestry tapI#stri
tarantula t@rantS@l@ tarantula t@rantS@l@
tardis $alt1 tardis $alt1
ye ji: $u+ ye ji: $u+
yea jeI yea jeI
yessir jEss3: yessir jEss3:
?5 yessir jEssIR
yoghurt j0g3t yoghurt j0g3t
?3 yoghurt joUg3t ?3 yoghurt joUg3t
?3 yogurt joUg3t ?3 yogurt joUg3t
?!3 Anthony ant@ni ?!3 Anthony ant@ni
Anton ant0n Anton ant0n
Anya anj@ Anya anj@
Aoife i:f@
Aoiffe i:f@
Aphrodite afr@d'aIti Aphrodite afr@d'aIti
Archibald A@tSIbO:ld Archibald A@tSIbO:ld
Archie A@tSi Archie A@tSi
Salman sa#lmA:n Salman sa#lmA:n
Samantha sa#manT@ Samantha sa#manT@
(Santa claus) s'ant@||kl'O:z (Santa claus) s'ant@||kl'O:z
Saoirse si@S@
Sarah se@r@ Sarah se@r@
Sarisa $alt3 Sarisa $alt3
Seamus SeIm@s Seamus SeIm@s
Simon saIm@n Simon saIm@n
Sinead SI2neId Sinead SI2neId
Sinéad SI2neId Sinéad SI2neId
Siobhan S@vO:n
Siobhán S@vO:n
Siobhan SI2vO:n
Siobhán SI2vO:n
Sonia s0nj@ Sonia s0nj@
Sophia soUf'i@ Sophia soUf'i@
Sophie soUfi Sophie soUfi

+ 29
- 8
dictsource/en_rules View File

sw) a (m_ a sw) a (m_ a
sw) a (nk a sw) a (nk a
ao eI0 ao eI0
mh) ao eI // Irish, e.g. 'mhaol' /weIl/
m) ao aU m) ao aU
p) ao aU p) ao aU
t) ao aU t) ao aU
ao (_ =aU ao (_ =aU
aois (_ i:S
aoise (_ i:S@
ao (ism aU
ao (ist aU
aoi (C i: // Irish, e.g. 'Aoife' /i:f@/
aoir (C e@ // Irish, e.g. 'Saoirse' /se@S@/
?3 aoir (C 3: // Irish, e.g. 'Saoirse' /s3:S@/
g) ao (l eI@ g) ao (l eI@
aor eI'o@ aor eI'o@
m) ao (ri aU m) ao (ri aU
_n) a (tional a _n) a (tional a
@) a (tious 'eI @) a (tious 'eI
ell) a (trix @ ell) a (trix @
a (triC 'eI
a (trix 'eI
a (trice 'eI
n) a (tur eI n) a (tur eI
n) a (tura a n) a (tura a
&) a (ture_ @ &) a (ture_ @
_m) ag (ell a#dZ _m) ag (ell a#dZ
Cp) age (_ eIdZ Cp) age (_ eIdZ
pp) age (_ I2dZ pp) age (_ I2dZ
_ant) ag 'ag
_ant) ag (on 'ag
enr) ag (e_ 'eIdZ enr) ag (e_ 'eIdZ
outr) ag (e_ eIdZ outr) ag (e_ eIdZ
der) ag (e_ eIdZ der) ag (e_ eIdZ
may) be (_ bi: may) be (_ bi:
_) be (CA bI# _) be (CA bI#
_) bete (lg bi:t@ _) bete (lg bi:t@
_) be (C% bE
_) be (C%+ bE
_) be (atiC b%i: _) be (atiC b%i:
_) be (b bi: _) be (b bi:
_) be (cl bI# _) be (cl bI#
_) be (kn bI# _) be (kn bI#
_) belarus bEl@r'u:s _) belarus bEl@r'u:s
_) be (lC bE _) be (lC bE
_) be (llig bI#
_) be (llig+ bI#
_) be (re bE _) be (re bE
_) be (sC bI# _) be (sC bI#
_) be (stia bE _) be (stia bE
e (Cical 'E e (Cical 'E
e (CiuB i: e (CiuB i:
&) e (_ &) e (_
aoiC) e (_ @ // Irish, e.g. 'Aoife' /i:fe/
aoiCC) e (_ @ // Irish, e.g. 'Saoirse' /se@S@/
acB) e (_ %I acB) e (_ %I
XC) e (_N i: XC) e (_N i:
vert) e (b I vert) e (b I
y) ed (_S2v d# y) ed (_S2v d#
debut) ed (_S2 d# debut) ed (_S2 d#
edly (_S4m I#dl%i edly (_S4m I#dl%i
eg) edly (_S3m I#dl%i
c) ed (e_ 'i:d c) ed (e_ 'i:d
p) ed (e_ i:d p) ed (e_ i:d
p) edal Ed@L p) edal Ed@L
en (core 0n en (core 0n
&) ency (_ @ns%i &) ency (_ @ns%i
ency (cli %EnsI ency (cli %EnsI
_) en (dg@ %En
_) en (dp@ %En
k) en (d_ En k) en (d_ En
s) en (d_ En s) en (d_ En
t) en (d_ En t) en (d_ En
exp) eri (en i@rI2 exp) eri (en i@rI2
XC) er 3: XC) er 3:
th) er (@ 3: th) er (@ 3:
h) er (@ %3
h) er (nan %3
X) er (A E#r X) er (A E#r
_h) eretical I#rEtIk@L _h) eretical I#rEtIk@L
_qu) er 3: _qu) er 3:
&z) es (_S2 %I#z &z) es (_S2 %I#z
&C) es (_S1i z &C) es (_S1i z
xus) es (_S2 %I#z xus) es (_S2 %I#z
tamus) es (_S2 %I#z // hippopotamuses
es (carp I2s es (carp I2s
es (cape %Es es (cape %Es
es (capi %Es es (capi %Es
&) ford (_S4 f3d &) ford (_S4 f3d
&f) ford (_S4 3d &f) ford (_S4 3d
for (see f%O@ for (see f%O@
for (ward f'o@ // straightforward
for (ward f'O@
ft (en f ft (en f
&) ful (_S3i f@L &) ful (_S3i f@L


_) metall (ic m@tal _) metall (ic m@tal
metabo m@t'ab0 metabo m@t'ab0
_) meta (llu m%Eta _) meta (llu m%Eta
mh (ao w // Irish, e.g. 'mhaol' /weIl/
aoi) mh (e v // Irish, e.g. 'Caoimhe' /ki:v@/
&) mobile m@bi:l &) mobile m@bi:l
_) mocha moUk@ _) mocha moUk@
mono (ga m@n'0 mono (ga m@n'0
&) mouth (_ m@T &) mouth (_ m@T
&) mouth (_$w_alt1 maUT &) mouth (_$w_alt1 maUT
_) multi mVlti _) multi mVlti
_) multi (pl m,VltI
_) multi (@@P5 m,VltI _) multi (@@P5 m,VltI


.group mi .group mi
Co) s (Er z Co) s (Er z
Co) s (En z Co) s (En z
e) s (d z e) s (d z
aoi) s (e S // Irish, e.g. 'Laoise'
aoi) seach S@x // Irish, e.g. 'Taoiseach'
aoi) sigh Si // Irish, e.g. 'Taoisigh'
aoir) s (e S // Irish, e.g. 'Saoirse'
_) se (clu sI# _) se (clu sI#
secur sI#kjU@ secur sI#kjU@
_) se (duc sI# _) se (duc sI#
&) s (ic_ z &) s (ic_ z
n) s (ic_ s n) s (ic_ s
ss (ic s ss (ic s
mu) s (e z
mu) s (ic z mu) s (ic z
ea) s (ie z ea) s (ie z
ea) s (il z ea) s (il z
th (ill th th (ill th
gh) th (A th gh) th (A th
ee) thing DI2N ee) thing DI2N
soo) th D
soo) th (say T
the (_ D the (_ D
&) th (L03_ =T &) th (L03_ =T
ou) thed (_ Dd ou) thed (_ Dd

+ 4
- 4
dictsource/fa_list View File

// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.133
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 05-10-2017
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.134
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 06-24-2017
//********* //*********
// * This program is free software; you can redistribute it and/or modify * // * This program is free software; you can redistribute it and/or modify *
// * it under the terms of the GNU General Public License as published by * // * it under the terms of the GNU General Public License as published by *
آموخت Amuxt آموخت Amuxt
آموزد Amuzad آموزد Amuzad
آموزش AmuzeS آموزش AmuzeS
آمپر AmpeR
آمپرمتر AmpeRmetR آمپرمتر AmpeRmetR
آمپلیفایر AmpelifAjeR آمپلیفایر AmpelifAjeR
آمپیریسم AmpiRism آمپیریسم AmpiRism
درایه deRAje درایه deRAje
درایو deRAjv درایو deRAjv
درایور deRAjveR درایور deRAjveR
درباره daR'bAReje:
دربازکن daRbAzkon دربازکن daRbAzkon
دربدر daRbedaR دربدر daRbedaR
دربندکشیده daRbandkeSide دربندکشیده daRbandkeSide
شدیدا Sadidan شدیدا Sadidan
شدیداللحن Sadidollahn شدیداللحن Sadidollahn
شدیم Sodim شدیم Sodim
شراادی SA:_d:jA:_R
شرافت SeRAfat شرافت SeRAfat
شراپنل SeRApnel شراپنل SeRApnel
شراکت SeRAkat شراکت SeRAkat
شربت SaRbat شربت SaRbat
شرت 'SoRt شرت 'SoRt
شرتکات SoRtkAt شرتکات SoRtkAt
شراادی SA:_d:jA:_R
شرشر SeRSeR شرشر SeRSeR
شرطه SoRte: شرطه SoRte:
شرعا SaR?an شرعا SaR?an

+ 3
- 3
dictsource/fa_rules View File

// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.133
// * This file writen by Shadyar Khodayari 05-10-2017
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.134
// * This file writen by Shadyar Khodayari 06-24-2017
//********* //*********
// * This program is free software; you can redistribute it and/or modify * // * This program is free software; you can redistribute it and/or modify *
// * it under the terms of the GNU General Public License as published by * // * it under the terms of the GNU General Public License as published by *
L09L09L09L09) م (L03L09L09L09_ ma L09L09L09L09) م (L03L09L09L09_ ma


// Prefixes م // Prefixes م
_) م (L03L09L03_$noprefixP1@ ma
_) م (L03L04L03_$noprefixP1@ ma
_) می (L03L09+$noprefixP2@ mi _) می (L03L09+$noprefixP2@ mi
_) می (آL09L09$noprefixP2@ mi _) می (آL09L09$noprefixP2@ mi
_) می (وL09L09$noprefixP2@ mi _) می (وL09L09$noprefixP2@ mi

+ 1
- 0
src/ucd-tools/.gitignore View File

.*.swp .*.swp
*~


# intermediate files: # intermediate files:



+ 5
- 2
src/ucd-tools/CHANGELOG.md View File

* `data/espeak-ng` data files for eSpeak NG extended data. * `data/espeak-ng` data files for eSpeak NG extended data.
* espeak-ng PropList property lookup as part of the `ucd_property` API. * espeak-ng PropList property lookup as part of the `ucd_property` API.


## 9.0.0.1 - (In Progress)
## 10.0.0 - 2017-06-25


* Add `iswblank` and `iswxdigit` compatibility. * Add `iswblank` and `iswxdigit` compatibility.
* Improve ctype compatibility. * Improve ctype compatibility.
* PropList property lookup.
* PropList and emoji-data property lookup.
* Support building with a C89 compiler.
* Update to Unicode Character Data 10.0.0.
* Unicode Emoji 5.0.


## 9.0.0 - 2016-12-28 ## 9.0.0 - 2016-12-28



+ 7
- 7
src/ucd-tools/Makefile.am View File



############################# Unicode Data #################################### ############################# Unicode Data ####################################


EMOJI_VERSION=4.0
EMOJI_VERSION=5.0
UCD_VERSION=@UCD_VERSION@ UCD_VERSION=@UCD_VERSION@
UCD_ROOTDIR=data/ucd UCD_ROOTDIR=data/ucd
UCD_SRCDIR=http://www.unicode.org/Public UCD_SRCDIR=http://www.unicode.org/Public


data/emoji/emoji-data.txt: data/emoji/emoji-data.txt:
mkdir -pv data/emoji mkdir -pv data/emoji
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@


data/ucd/PropList.txt: data/ucd/PropList.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@


data/ucd/DerivedCoreProperties.txt: data/ucd/DerivedCoreProperties.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@


data/ucd/PropertyValueAliases.txt: data/ucd/PropertyValueAliases.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@


data/ucd/Scripts.txt: data/ucd/Scripts.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@


data/ucd/UnicodeData.txt: data/ucd/UnicodeData.txt:
mkdir -pv data/ucd mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@


############################# documentation ################################### ############################# documentation ###################################



+ 4
- 3
src/ucd-tools/configure.ac View File

AC_PREREQ([2.65]) AC_PREREQ([2.65])
AC_INIT([Unicode Character Database Tools], [9.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools])
AC_INIT([Unicode Character Database Tools], [10.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools])
AM_INIT_AUTOMAKE() AM_INIT_AUTOMAKE()


m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES])
dnl ================================================================ dnl ================================================================


AC_CHECK_HEADERS([stddef.h]) dnl C89 AC_CHECK_HEADERS([stddef.h]) dnl C89
AC_CHECK_FUNCS([iswblank]) dnl C99


AC_TYPE_UINT8_T AC_TYPE_UINT8_T
AC_TYPE_UINT32_T AC_TYPE_UINT32_T
dnl ================================================================ dnl ================================================================


AC_ARG_WITH([unicode-version], AC_ARG_WITH([unicode-version],
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=9.0.0@:>@])],
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=10.0.0@:>@])],
[AS_IF([test x"$withval" != x], [AS_IF([test x"$withval" != x],
[UCD_VERSION="$withval"])], [UCD_VERSION="$withval"])],
[UCD_VERSION="9.0.0"])
[UCD_VERSION="10.0.0"])


AC_SUBST(UCD_VERSION) AC_SUBST(UCD_VERSION)



+ 4
- 3
src/ucd-tools/src/case.c View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <stddef.h> #include <stddef.h>


// Unicode Character Data 9.0.0
/* Unicode Character Data 9.0.0 */


struct case_conversion_entry struct case_conversion_entry
{ {

+ 740
- 679
src/ucd-tools/src/categories.c
File diff suppressed because it is too large
View File


+ 20
- 20
src/ucd-tools/src/ctype.c View File

switch (ucd_lookup_category(c)) switch (ucd_lookup_category(c))
{ {
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{ {
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0; return 0;
} }
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
return c == 0x09; // U+0009 : CHARACTER TABULATION
return c == 0x09; /* U+0009 : CHARACTER TABULATION */
default: default:
return 0; return 0;
} }


int ucd_isdigit(codepoint_t c) int ucd_isdigit(codepoint_t c)
{ {
return (c >= 0x30 && c <= 0x39); // [0-9]
return (c >= 0x30 && c <= 0x39); /* [0-9] */
} }


int ucd_isgraph(codepoint_t c) int ucd_isgraph(codepoint_t c)
case UCD_CATEGORY_Zp: case UCD_CATEGORY_Zp:
return 1; return 1;
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{ {
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0; return 0;
} }
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
switch (c) // Include control characters marked as White_Space
switch (c) /* Include control characters marked as White_Space */
{ {
case 0x09: // U+0009 : CHARACTER TABULATION
case 0x0A: // U+000A : LINE FEED
case 0x0B: // U+000B : LINE TABULATION
case 0x0C: // U+000C : FORM FEED
case 0x0D: // U+000D : CARRIAGE RETURN
case 0x85: // U+0085 : NEXT LINE
case 0x09: /* U+0009 : CHARACTER TABULATION */
case 0x0A: /* U+000A : LINE FEED */
case 0x0B: /* U+000B : LINE TABULATION */
case 0x0C: /* U+000C : FORM FEED */
case 0x0D: /* U+000D : CARRIAGE RETURN */
case 0x85: /* U+0085 : NEXT LINE */
return 1; return 1;
} }
default: default:


int ucd_isxdigit(codepoint_t c) int ucd_isxdigit(codepoint_t c)
{ {
return (c >= 0x30 && c <= 0x39) // [0-9]
|| (c >= 0x41 && c <= 0x46) // [A-Z]
|| (c >= 0x61 && c <= 0x66); // [a-z]
return (c >= 0x30 && c <= 0x39) /* [0-9] */
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */
|| (c >= 0x61 && c <= 0x66); /* [a-z] */
} }

+ 10
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */
UCD_SCRIPT_Goth, /**< @brief Gothic Script */ UCD_SCRIPT_Goth, /**< @brief Gothic Script */
UCD_SCRIPT_Gran, /**< @brief Grantha Script */ UCD_SCRIPT_Gran, /**< @brief Grantha Script */
UCD_SCRIPT_Grek, /**< @brief Greek Script */ UCD_SCRIPT_Grek, /**< @brief Greek Script */
UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
UCD_SCRIPT_Soyo, /**< @brief Soyombo */
UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
UCD_SCRIPT_Yiii, /**< @brief Yi Script */ UCD_SCRIPT_Yiii, /**< @brief Yi Script */
UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */
UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
UCD_SCRIPT_Zsym, /**< @brief Symbols */ UCD_SCRIPT_Zsym, /**< @brief Symbols */
#define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ #define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */
#define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ #define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */
#define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ #define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */
#define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */
#define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */


// eSpeak NG extended properties: // eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */
Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */
Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */
Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */
Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */
Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */
Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */
Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */
Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */
Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */
Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */
Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */
Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */
Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */
}; };


/** @brief Return the properties of the specified codepoint. /** @brief Return the properties of the specified codepoint.

+ 102
- 62
src/ucd-tools/src/proplist.c View File

case 0x2000: case 0x2000:
if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
break; break;
case 0x2300:
if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2400: case 0x2400:
if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2E00: case 0x2E00:
if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0x0300: case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x0400: case 0x0400:
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED;
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0x2100: case 0x2100:
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2139) return UCD_PROPERTY_EMOJI; if (c == 0x2139) return UCD_PROPERTY_EMOJI;
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0xFF00: case 0xFF00:
break; break;
case 0x01D400: case 0x01D400:
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D500: case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D600: case 0x01D600:
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH;
break; break;
case 0x01D700: case 0x01D700:
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
} }
return 0; return 0;
break; break;
case 0x016F00: case 0x016F00:
if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC;
if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER;
if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER;
break; break;
} }
return 0; return 0;
{ {
case 0x000000: case 0x000000:
if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC;
break; break;
case 0x010000: case 0x010000:
if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC;
break; break;
case 0x020000: case 0x020000:
if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC;
break; break;
} }
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT;
break; break;
case 0x0300: case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0xFF00: case 0xFF00:
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT;
case 0x2100: case 0x2100:
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D400: case 0x01D400:
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D500: case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D600: case 0x01D600:
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D700: case 0x01D700:
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
} }
return 0; return 0;
if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x3000: case 0x3000:
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC;
break; break;
case 0x011A00:
if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011C00: case 0x011C00:
if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x0B00: case 0x0B00:
if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC;
break; break;
case 0x0D00: case 0x0D00:
if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC;
case 0x1D00: case 0x1D00:
if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x2000: case 0x2000:
if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x011A00:
if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A98) return UCD_PROPERTY_EXTENDER;
if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC;
break;
case 0x011C00: case 0x011C00:
if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC;
break; break;
case 0x011D00:
if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x016A00: case 0x016A00:
if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC;
break; break;
switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x0000: case 0x0000:
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI;
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT;
break; break;
case 0xFF00: case 0xFF00:
if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT;
break; break;
case 0x2700: case 0x2700:
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
case 0x2E00: case 0x2E00:
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000: case 0x3000:
case 0x0000: case 0x0000:
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
case 0x11700: case 0x11700:
if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break; break;
case 0x11A00:
if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x11C00: case 0x11C00:
if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break; break;
case 0x2700: case 0x2700:
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x01F300: case 0x01F300:
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER;
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT;
} }
return 0; return 0;
} }
if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX;
break; break;
case 0x2400: case 0x2400:
if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT;
break; break;
case 0x01F200: case 0x01F200:
if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI;
if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break; break;
case 0x01F900: case 0x01F900:
if (c <= 0x01F90B) return 0;
if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F93B) return 0; if (c == 0x01F93B) return 0;
if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F946) return 0; if (c == 0x01F946) return 0;
if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_EMOJI; return UCD_PROPERTY_EMOJI;
} }
return 0; return 0;
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
case UCD_CATEGORY_Zs: return properties_Zs(c); case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Co Cs Ii Lt Me
default: return 0; /* Co Cs Ii Lt Me */
}; };
} }

+ 765
- 701
src/ucd-tools/src/scripts.c
File diff suppressed because it is too large
View File


+ 3
- 0
src/ucd-tools/src/tostring.c View File

"Geok", "Geok",
"Geor", "Geor",
"Glag", "Glag",
"Gonm",
"Goth", "Goth",
"Gran", "Gran",
"Grek", "Grek",
"Sind", "Sind",
"Sinh", "Sinh",
"Sora", "Sora",
"Soyo",
"Sund", "Sund",
"Sylo", "Sylo",
"Syrc", "Syrc",
"Xpeo", "Xpeo",
"Xsux", "Xsux",
"Yiii", "Yiii",
"Zanb",
"Zinh", "Zinh",
"Zmth", "Zmth",
"Zsym", "Zsym",

+ 36
- 26
src/ucd-tools/tests/printcdata.c View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


#include "config.h"
#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <locale.h> #include <locale.h>
#include <wchar.h> #include <wchar.h>
#include <wctype.h> #include <wctype.h>


#ifndef HAVE_ISWBLANK
static int iswblank(wint_t c)
{
return iswspace(c) && !(c >= 0x0A && c <= 0x0D);
}
#endif

void fput_utf8c(FILE *out, codepoint_t c) void fput_utf8c(FILE *out, codepoint_t c)
{ {
if (c < 0x80) if (c < 0x80)
{ {
switch (mode) switch (mode)
{ {
case 'c': // character
case 'c': /* character */
switch (c) switch (c)
{ {
case '\t': fputs("\\t", out); break; case '\t': fputs("\\t", out); break;
default: fput_utf8c(out, c); break; default: fput_utf8c(out, c); break;
} }
break; break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c); fprintf(out, "%06x", c);
break; break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c); fprintf(out, "%06X", c);
break; break;
} }
{ {
switch (mode) switch (mode)
{ {
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(iswalnum(c) ? '1' : '0', out); fputc(iswalnum(c) ? '1' : '0', out);
break; break;
case 'a': // alpha
case 'a': /* alpha */
fputc(iswalpha(c) ? '1' : '0', out); fputc(iswalpha(c) ? '1' : '0', out);
break; break;
case 'b': // blank
case 'b': /* blank */
fputc(iswblank(c) ? '1' : '0', out); fputc(iswblank(c) ? '1' : '0', out);
break; break;
case 'c': // control
case 'c': /* control */
fputc(iswcntrl(c) ? '1' : '0', out); fputc(iswcntrl(c) ? '1' : '0', out);
break; break;
case 'd': // numeric
case 'd': /* numeric */
fputc(iswdigit(c) ? '1' : '0', out); fputc(iswdigit(c) ? '1' : '0', out);
break; break;
case 'g': // glyph
case 'g': /* glyph */
fputc(iswgraph(c) ? '1' : '0', out); fputc(iswgraph(c) ? '1' : '0', out);
break; break;
case 'l': // lower case
case 'l': /* lower case */
fputc(iswlower(c) ? '1' : '0', out); fputc(iswlower(c) ? '1' : '0', out);
break; break;
case 'P': // printable
case 'P': /* printable */
fputc(iswprint(c) ? '1' : '0', out); fputc(iswprint(c) ? '1' : '0', out);
break; break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(iswpunct(c) ? '1' : '0', out); fputc(iswpunct(c) ? '1' : '0', out);
break; break;
case 's': // whitespace
case 's': /* whitespace */
fputc(iswspace(c) ? '1' : '0', out); fputc(iswspace(c) ? '1' : '0', out);
break; break;
case 'u': // upper case
case 'u': /* upper case */
fputc(iswupper(c) ? '1' : '0', out); fputc(iswupper(c) ? '1' : '0', out);
break; break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(iswxdigit(c) ? '1' : '0', out); fputc(iswxdigit(c) ? '1' : '0', out);
break; break;
} }
case '%': case '%':
switch (*++format) switch (*++format)
{ {
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break; break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break; break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format); uprintf_codepoint(out, c, *++format);
break; break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break; break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format); uprintf_is(out, c, *++format);
break; break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, towlower(c), *++format); uprintf_codepoint(out, towlower(c), *++format);
break; break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break; break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format); uprintf_codepoint(out, ucd_totitle(c), *++format);
break; break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, towupper(c), *++format); uprintf_codepoint(out, towupper(c), *++format);
break; break;
} }
{ {
FILE *in = NULL; FILE *in = NULL;
const char *format = NULL; const char *format = NULL;
for (int argn = 1; argn != argc; ++argn)
int argn;
for (argn = 1; argn != argc; ++argn)
{ {
const char *arg = argv[argn]; const char *arg = argv[argn];
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
} }
else else
{ {
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
codepoint_t c;
for (c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : uprintf(stdout, c, format ? format :
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n");
} }

+ 28
- 26
src/ucd-tools/tests/printucddata.c View File

{ {
switch (mode) switch (mode)
{ {
case 'c': // character
case 'c': /* character */
switch (c) switch (c)
{ {
case '\t': fputs("\\t", out); break; case '\t': fputs("\\t", out); break;
default: fput_utf8c(out, c); break; default: fput_utf8c(out, c); break;
} }
break; break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c); fprintf(out, "%06x", c);
break; break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c); fprintf(out, "%06X", c);
break; break;
} }
{ {
switch (mode) switch (mode)
{ {
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(ucd_isalnum(c) ? '1' : '0', out); fputc(ucd_isalnum(c) ? '1' : '0', out);
break; break;
case 'a': // alpha
case 'a': /* alpha */
fputc(ucd_isalpha(c) ? '1' : '0', out); fputc(ucd_isalpha(c) ? '1' : '0', out);
break; break;
case 'b': // blank
case 'b': /* blank */
fputc(ucd_isblank(c) ? '1' : '0', out); fputc(ucd_isblank(c) ? '1' : '0', out);
break; break;
case 'c': // control
case 'c': /* control */
fputc(ucd_iscntrl(c) ? '1' : '0', out); fputc(ucd_iscntrl(c) ? '1' : '0', out);
break; break;
case 'd': // numeric
case 'd': /* numeric */
fputc(ucd_isdigit(c) ? '1' : '0', out); fputc(ucd_isdigit(c) ? '1' : '0', out);
break; break;
case 'g': // glyph
case 'g': /* glyph */
fputc(ucd_isgraph(c) ? '1' : '0', out); fputc(ucd_isgraph(c) ? '1' : '0', out);
break; break;
case 'l': // lower case
case 'l': /* lower case */
fputc(ucd_islower(c) ? '1' : '0', out); fputc(ucd_islower(c) ? '1' : '0', out);
break; break;
case 'P': // printable
case 'P': /* printable */
fputc(ucd_isprint(c) ? '1' : '0', out); fputc(ucd_isprint(c) ? '1' : '0', out);
break; break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(ucd_ispunct(c) ? '1' : '0', out); fputc(ucd_ispunct(c) ? '1' : '0', out);
break; break;
case 's': // whitespace
case 's': /* whitespace */
fputc(ucd_isspace(c) ? '1' : '0', out); fputc(ucd_isspace(c) ? '1' : '0', out);
break; break;
case 'u': // upper case
case 'u': /* upper case */
fputc(ucd_isupper(c) ? '1' : '0', out); fputc(ucd_isupper(c) ? '1' : '0', out);
break; break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(ucd_isxdigit(c) ? '1' : '0', out); fputc(ucd_isxdigit(c) ? '1' : '0', out);
break; break;
} }
case '%': case '%':
switch (*++format) switch (*++format)
{ {
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break; break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break; break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format); uprintf_codepoint(out, c, *++format);
break; break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break; break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format); uprintf_is(out, c, *++format);
break; break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, ucd_tolower(c), *++format); uprintf_codepoint(out, ucd_tolower(c), *++format);
break; break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break; break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format); uprintf_codepoint(out, ucd_totitle(c), *++format);
break; break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, ucd_toupper(c), *++format); uprintf_codepoint(out, ucd_toupper(c), *++format);
break; break;
} }
{ {
FILE *in = NULL; FILE *in = NULL;
const char *format = NULL; const char *format = NULL;
for (int argn = 1; argn != argc; ++argn)
int argn;
for (argn = 1; argn != argc; ++argn)
{ {
const char *arg = argv[argn]; const char *arg = argv[argn];
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
} }
else else
{ {
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
codepoint_t c;
for (c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : uprintf(stdout, c, format ? format :
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n");
} }

+ 4
- 3
src/ucd-tools/tools/case.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <stddef.h> #include <stddef.h>


// Unicode Character Data %s
/* Unicode Character Data %s */


struct case_conversion_entry struct case_conversion_entry
{ {

+ 8
- 7
src/ucd-tools/tools/categories.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#define Zs UCD_CATEGORY_Zs #define Zs UCD_CATEGORY_Zs
#define Ii UCD_CATEGORY_Ii #define Ii UCD_CATEGORY_Ii


// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version) """ % ucd_version)


for category in special_categories: for category in special_categories:
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoint, table in sorted(category_tables[table_index].items()): for codepoint, table in sorted(category_tables[table_index].items()):
if isinstance(table, str): if isinstance(table, str):
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint))
else: else:
sys.stdout.write('\tcategories_%s,\n' % codepoint) sys.stdout.write('\tcategories_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, category, comment in category_sets: for codepoints, category, comment in category_sets:
if category: if category:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment))
else: else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')


sys.stdout.write(""" sys.stdout.write("""

+ 2
- 0
src/ucd-tools/tools/printdata.py View File

props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data
props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data
props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data
props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0
props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0)
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) props += (2 ** 53) * data.get('Punctuation_In_Word', 0)

+ 11
- 7
src/ucd-tools/tools/scripts.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the Scripts.txt file in
// the Unicode Character database by the ucd-tools/tools/scripts.py script.
/* NOTE: This file is automatically generated from the Scripts.txt file in
* the Unicode Character database by the ucd-tools/tools/scripts.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#define Geok UCD_SCRIPT_Geok #define Geok UCD_SCRIPT_Geok
#define Geor UCD_SCRIPT_Geor #define Geor UCD_SCRIPT_Geor
#define Glag UCD_SCRIPT_Glag #define Glag UCD_SCRIPT_Glag
#define Gonm UCD_SCRIPT_Gonm
#define Goth UCD_SCRIPT_Goth #define Goth UCD_SCRIPT_Goth
#define Gran UCD_SCRIPT_Gran #define Gran UCD_SCRIPT_Gran
#define Grek UCD_SCRIPT_Grek #define Grek UCD_SCRIPT_Grek
#define Sind UCD_SCRIPT_Sind #define Sind UCD_SCRIPT_Sind
#define Sinh UCD_SCRIPT_Sinh #define Sinh UCD_SCRIPT_Sinh
#define Sora UCD_SCRIPT_Sora #define Sora UCD_SCRIPT_Sora
#define Soyo UCD_SCRIPT_Soyo
#define Sund UCD_SCRIPT_Sund #define Sund UCD_SCRIPT_Sund
#define Sylo UCD_SCRIPT_Sylo #define Sylo UCD_SCRIPT_Sylo
#define Syrc UCD_SCRIPT_Syrc #define Syrc UCD_SCRIPT_Syrc
#define Xpeo UCD_SCRIPT_Xpeo #define Xpeo UCD_SCRIPT_Xpeo
#define Xsux UCD_SCRIPT_Xsux #define Xsux UCD_SCRIPT_Xsux
#define Yiii UCD_SCRIPT_Yiii #define Yiii UCD_SCRIPT_Yiii
#define Zanb UCD_SCRIPT_Zanb
#define Zinh UCD_SCRIPT_Zinh #define Zinh UCD_SCRIPT_Zinh
#define Zmth UCD_SCRIPT_Zmth #define Zmth UCD_SCRIPT_Zmth
#define Zsym UCD_SCRIPT_Zsym #define Zsym UCD_SCRIPT_Zsym
#define Zyyy UCD_SCRIPT_Zyyy #define Zyyy UCD_SCRIPT_Zyyy
#define Zzzz UCD_SCRIPT_Zzzz #define Zzzz UCD_SCRIPT_Zzzz


// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version) """ % ucd_version)


for script in special_scripts: for script in special_scripts:
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoint, table in sorted(script_tables[table_index].items()): for codepoint, table in sorted(script_tables[table_index].items()):
if isinstance(table, str): if isinstance(table, str):
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint))
else: else:
sys.stdout.write('\tscripts_%s,\n' % codepoint) sys.stdout.write('\tscripts_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, script, comment in script_sets: for codepoints, script, comment in script_sets:
if script: if script:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment))
else: else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')

Loading…
Cancel
Save