Valdis Vitolins 8 years ago
parent
commit
fb64332f66

+ 1
- 1
android/jni/Android.mk View File

@@ -5,7 +5,7 @@ LOCAL_CFLAGS = -std=c11

# ucd-tools wide-character compatibility support:

UCDTOOLS_SRC_PATH := ../../ucd-tools/src
UCDTOOLS_SRC_PATH := ../../src/ucd-tools/src
UCDTOOLS_SRC_FILES := \
$(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*))


+ 27
- 1
dictsource/af_list View File

@@ -218,6 +218,7 @@ afganistan afg'anistan
alaska al'aska
albanië alb'A:ne@:@-
algerië alx2'e@re@:@-
alhambra al'ambra
amanzimtoti $4
antwerpen antv&rp@n
avignon _^_FR
@@ -333,6 +334,7 @@ oberammergau o@b@r'am@rgaU
oklahoma @Ukl@h'@Uma
outeniekwa @Ut@n'ikwa
oxford _^_EN
paardeneiland pA:rd@n_'eIlant
palermo pal'&rmu
pelindaba p&l@nd'A:ba
perú p@ru
@@ -342,6 +344,7 @@ portugal pOrtyx2al
potchefstroom pOtSIfstr'o@m
rhône _^_FR
richardsbaai ritS@dsb'AI
riversdal r@v@rsdal
riviersonderend r@fi:rsOn@r_'Ent
robertson _^_EN
salvador salvadO:r
@@ -585,6 +588,7 @@ james _^_EN
jane _^_EN
janine dZ@ni:n
Jean ZA~n $capital
jeff _^_EN
jesebel je@s@b&l
jessica _^_EN
jimmy _^_EN
@@ -769,9 +773,11 @@ stockenström stOk@nstro@m
stuart _^_EN
suzanne suz'A:n
suzette suz'Et
sylvia _^_EN
tania tanja
telemann te@l@man
terblanche t@rblA:nS
thelma _^_EN
theo tiu
theron tr'On
thessalonicense tEsalo@nis'E:ns@
@@ -816,6 +822,7 @@ vivaldi viv'aldi
viviers v@v@je@
wilhelm v@lh&l@-m
williston _^_EN
woltemade vOlt@mA:d@
khumalo kum'A:lu
zuma zu:ma

@@ -825,6 +832,7 @@ alibama $3
antares $2
beatles _^_EN
boeing _^_EN
cadillac _^_EN
checkers tSEk@rs
chevrolet _^_FR
chrysler kraIsl@r
@@ -1044,7 +1052,8 @@ fides _^_LA
forma _^_LA
grata _^_LA
habitatio _^_LA
inclusio _^_LA
inclusio _^_LA
(in camera) @n||kam@ra
(in debiti) _^_LA
(in absentia) _^_LA
(in extremis) _^_LA
@@ -1075,6 +1084,7 @@ vivos _^_LA
// main word list

aangaande $2
aanmerklik $2
aanstaande $2
(a cappella) a||kap'&la
adagio ad'A:dZi;%@U
@@ -1091,6 +1101,7 @@ algaande $2
alge alx2@
algehele alx2@h,e@l@
allegro al'Egru
allengs alENs
allergene $3
almiskie $3
alom al_'Om
@@ -1116,11 +1127,13 @@ aversie av'&rsi

babelaas bab@lA:s
barrikade $3
beaming b@_A:m@N
bedewete be@d@ve@t@
beide beId@
bekaf b&kaf
belangriker b@laNr@k,Ir
bene be@n@
beringde b@rINd@
beswil bEsv@l
besnedene b@sne@d@n@
bestes bEst@s
@@ -1130,6 +1143,7 @@ bewe be@v@
bewebeen be@v@be@n
bewend be@v@nt
bewering b@ve@rIN
bilharzia b@lharsia
biopsie bi'Opsi
bomaat bo@mA:t
bordegoed bO:rd@x2ut
@@ -1155,6 +1169,7 @@ cedille s@dIl@
charisma kar'Isma
cinsaut s@nso@
cliché kliS'eI:
clientèle _^_FR
clivia klIvija
cognac kOn^ak
confetti $2
@@ -1173,6 +1188,7 @@ dawidster dA:v@tst&r
déjà _^_FR
dekade dEk'A:d@
dekreling dEkre@l@N
demensie d@me~nsi
deurentyd dy@r@nteIt
deurgaans $1
deurkruis $2
@@ -1219,12 +1235,14 @@ ewentwil e@v@ntv@l
exodus Eks'o@dWs

factotum $2
faktotum $2
fakture $2
fetakaas fEtakA:s
figuur f@x2yr
filippense f@l@pEns@
finalis $3
finaliste $3
fluktuasie $3
fort fOrt
forte fOrt@

@@ -1269,6 +1287,7 @@ hekke h&k@
helaas he@l'A:s
here he@r@
herero hEr'E:ru
herontmoeting h&r_Ontmut@N
herrysenis h&r'eIs@n@s
hierso hi:rsO
hippie _^_EN
@@ -1294,6 +1313,7 @@ inkatha iNk'A:ta
inkluis $2
innestel InnEst@l
insomnia @nsOmnija
inteling Inte@l@N
intens @nt'Ens
intensiteit $4
ironieë irun'i:@
@@ -1312,6 +1332,7 @@ kaviaar kavi'A:r
kennisvaardig $1
kimono $2
klaasvakie $2
kliënteel $3
klimeid klImeIt
knapsekêrel knaps@k&:r@l
kolossense kOl@s'e~ns@
@@ -1325,11 +1346,13 @@ kopiereg kup'ir&x2
korswel kOrsv@l
kotiljons kOt@lj'o~ns
kritiek krIt'ik
kruisteling krYyste@l@N
kulture $2
kunsmatige kWnsm'A:t@x2@
kwansuis $2
kweekwal kwe@kval

landswye lantsveI@
lasagne las'anj@
legaat l@x2A:t
legate l@x2A:t@
@@ -1532,6 +1555,7 @@ sonore sun'o@r@
sonure sOn_yr@
sover so@f&r
staccato $2
steekhoudendheid $2
sterwens st&rv@ns
stilswye $1
strydros streIt_rOs
@@ -1555,6 +1579,7 @@ tevrede t@fre@d@
thula tu:la
toegee tux2e@
toereken ture@k@n
toleransie tOl@r'ansi
tornado $2
totale tut'A:l@
totsiens $2
@@ -1566,6 +1591,7 @@ tsoenami tsun'A:mi
uitdaging YydA:x2@N
uiteraard Yyt@r_'A:rt
uitermate $3
uitgeslotene Yytx2@slo@t@n@
uniforme $3

vaarwel fA:rv'&l

+ 39
- 6
dictsource/af_rules View File

@@ -73,6 +73,7 @@
@@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc.
amarula %am%arul%a // fix stress and a sounds: amarula and compounds
a (ment %a // shorten a sound: perkament/testament and compounds
_) arendag (CAC %A:r@ntax2 // fix e sound: arendagtig/-e/-heid
arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes
argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e
a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria
@@ -310,7 +311,7 @@
attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché
attrib (u %atr@b // move default stress: attribuut/bute/attributêr
K) au @U // trauma/-ties/Aucamp/Paul/-a/-us
auer aU@r //Sauer etc. fixed au and e sounds in compounds.
auer aU@r //Sauer etc. fix au and e sounds in compounds.
augustus %Ox2WstWs
avokado %af%ukA:du // stress and o sounds
avokade %af%ukA:d@ // variant form of avokado
@@ -431,7 +432,8 @@
bo (grond bo@ // fix 1st o sound: bogronds/-e
_) bo (kle bo@ // fix o sound: bokleed/boklere
bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie
bom (AA bOm_ // fix a sound, pause: bomaanval/-aanslag/-aard/-eenheid and compounds
bom (AA bOm_ // fix a sound, pause: bomaanval/-aard/-eenheid and compounds
bomaans (la bOm_A:ns // fix o sound: bomaanslag/-aanslae
bomaans bo@mA:ns // but fix O sound: bomaans/-e
bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend
boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid
@@ -526,6 +528,7 @@
bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie
baken bA:k@n // fix e sound: afbakening and compounds
bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë
balalaika b%al%alaIk%a // fix a sounds and stress: balalaika/-s/-musiek
_) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit...
ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit
band (Alier b,and // fix stress and d sound: band(e/o)lier/-e
@@ -547,6 +550,7 @@
ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc.
basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar
basi (s bA:s@ // basis and compounds
basilie b%asil%i // fix a sound and stress: basilie/-kruid, etc.
bastille b%asti:l // Bastille and compounds like Bastilledag
batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds
ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk
@@ -646,6 +650,8 @@
bere (_ be@r@ //tediebere pandabere etc.
_) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom
beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s
beton (C@ b@tOn // split ng sounds: betongebou/-gietsel/-gruis
beton (inC@ b@t'On // fix O sound: betoninrigting/-ingenieur
be (weging b@ // draaibeweging/swaaibeweging
bewende be@v@nd@ // first e pronounced long
bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid
@@ -749,6 +755,7 @@
dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant
diaken d%iA:k@n // diaken and compounds
diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë
diende dind@ // fix last e sound in many compounds of bediende: bediendekamer
diens (willi d%ins // move default stress: dienswillig/-e/-heid
dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e
digi (ta d%ix2%i // move default stress: digitaal/digitale
@@ -983,6 +990,7 @@
@C) e (reekK @ // fix e sound: duine-/rotsereeks
aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers-
lat) eres @r@s //lateres
@C) erm (K &r@-m // a catch-all for words ending in -erm(s)
fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc.
bloup) ers &:rs // fix e sound: bloupers
iew) ers (ter @rs // fix e sound: (l)iewerster
@@ -1074,6 +1082,7 @@
p) e (talje @ // fix stress and 1st e sound: petalje and compounds
l) e (moen @ //fix lemoen and compounds
Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc.
effe (kleur Ef@ // fix stress and 2nd e sound: effekleur(ig/-e)
eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep
@C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds
@) ei (sen _'eI // (on)veeleisend/spoedeisend
@@ -1589,6 +1598,7 @@
flu (we fl%y // move default stress: fluweel/fluwele and compounds
_) fok (o fOk_ // fix o sound, insert short break: fokop/fokof
folio fo@li_u // fix o sounds: folio and compounds
fondsw fOntsv // fix v sound: fondswerwing & compounds/-waardes
fondue f%Ondy // fix stress and ue sound: fondue and compounds
fone (tiek f%o@n@ // fix e sound: fonetiek and compounds
fone (tie f%une@ // fix stress and o sound: foneties/-e
@@ -1602,6 +1612,7 @@
formi (da f%Orm%i // move default stress: formidabel/-e
formu (lier f%Orm%y // move default stress: formulier/-e/-boek
_) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C
forse (nd fOrs@ // fix e sound: forsend/-e
fos (fa f%Os // move default stress: fosfaat/fosfate
fo (ssiel f%O // move default stress: fossiel/-e and compounds
foto fo@tu
@@ -1694,6 +1705,7 @@
_) gra (na x2r@ // granaat(boom)/granate
grandi (o x2r%and%i // move default stress: grandioos/-ose
_) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure
grenadella gr@n@d&la // fix stress and vowels: grenadella and compounds like -heining
griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc.
ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s
_) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs
@@ -1726,6 +1738,7 @@
gegesel x2@x2e@s@l // fix middle e sound
K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele
_) geler x2e@l@r // stress and 1st e sound: geler/gelerig
_) gell x2&l
gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e
gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc.
gene (_ x2e@n@ // gene/diegene
@@ -1741,7 +1754,7 @@
_) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc.
ni) ge (ri x2'e@ //nigeriese etc.
gese (_ x2'e@s@ //Portugese, and others
gesp (etjie x2Esp // fix e sounds: gespetjie/-s
gespe (_ x2Esp@ // gespe and compounds
gespes (_ x2Esp@s // gespes and compounds
gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid
@@ -1847,6 +1860,7 @@
hart (stogte_ h%art // move default stress: hartstogtelik
hart (stogte_N hart // restore default stress: hartstogte
ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling
_) hef (a hEf_ // fix e sound, insert break: hefapparaat/-arm(s)
hia (sint h%ij%a // fix stress: hiasint/-e and compounds
_) hi (bis h%i // move default stress: hibiskus/-se and compounds
hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e
@@ -1984,6 +1998,7 @@
_) idi (o %id%i // idioom/idiome/idioot
_) id (A %id // idille/idillies/ideëryk
&l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc.
&l) iker (_ @k@r // fix i sound: afstootliker/(ge)redeliker/onberispeliker
illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati
illu (si %ily // illusie/-s/illusief
illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie
@@ -2114,6 +2129,7 @@
_) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit
_) inbe (lC Inb%& // fix e sound in compounds like inbelprogram
indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese
s) in (gestel @n // split n g: compounds with ingesteldheid/winsingestelde
_) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives
invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der
investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende
@@ -2324,10 +2340,11 @@
krieketw krik@tv //krieket followed by w in compounds always v

krokodi (l kr%Ok@dI // (wyfie)krokodil/-le
kro (niek kr%u // fix stress and o sound: kroniek and many compounds
kruger kr'Y@@r
ku (ba@ k%y // move default stress: kubaan/kubane
ku (biek k%y // move default stress: kubiek/-e/-getal
_) kuber kyb@r // fixed e sound: kuberruim(te)
_) kuber kyb@r // fix e sound: kuberruim(te)
kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e
@) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar
kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda
@@ -2364,6 +2381,7 @@
kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds
kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds
ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder
kakao k%akA:w // fix stress and vowel sounds: kakao and many compounds
kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie
kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn
_) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds
@@ -2544,6 +2562,7 @@
kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives
kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel
kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant
kop (agtig k%Op_ // fix o sound, insert break: hamer-/spinnekop-/penkopagtig(e(s))
kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives
_) kor (dA k%Or // kordaat/kordon
ko (rint k%u // fix stress and o sound: korint/-e and compounds
@@ -2656,6 +2675,7 @@
loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie
lo (ja l%u // fix stress and o sound: lojale/lojaliteit
lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc.
loke (t l%ukE // fix stress and vowel sounds: many compounds with loket
lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde
_) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier
_) losge (@P5 l'Osx2@
@@ -2805,6 +2825,7 @@
medisyne m@d@seIn@ //medisyne and compounds
meganies m@x2'A:nis
_) meege (@P5 m'e@x2@
_) meegewe (nd me@x2e@v@ // fix e sounds and stress: meegewend(e)
me (juf m@ // move default stress and shorten e sound
me (laats m@ // fix stress and e sound: melaats/-e/-heid
melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie
@@ -2946,6 +2967,7 @@
morf (otomie m%Orf // move default stress: morfotomie
_) morr (i mOr // restore default stress: morrig/morrie/-doring
_) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig
mos (agtig mOs_ // fix o sound, insert break: (kos)mosagtig(e)
mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se
mosa (ïek m%o@s%a // move default stress: mosaïek and compounds
mo (skee m%O // move default stress: moskee/-s and compounds
@@ -3035,6 +3057,7 @@
ne (anderCa n%i // move default stress: Neander(d/t)al/-ler
neger (in n%e@x2@r // move default stress: negerin/-ne
ne (gosie n@ // fix stress and e sound: negosie/-ware, etc.
nek (_ n&k // catch-all for words ending in -nek: koedoe-/swaannek
nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai
nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc.
neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum
@@ -3080,6 +3103,7 @@
nood (lotti n%o@t // move default stress: noodlottig/-e
nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak
nood (saak_N no@t // restore default stress: (ge)noodsaak
nooien (tjie noIN // remove the e sound: nooientjie(s) and compounds
noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e
nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale
nostal (gie_N n%Ost%al // stress on last slb.: nostalgie
@@ -3417,6 +3441,7 @@
ooi oI
ooy oI
oodjie oIci
CC) ool (A o@l_ // insert break: skooluur/-ure, steenkooluitvoer, but not: Karoolug
ootjie oIci
oontjie oINki
oondjie oINki
@@ -3483,6 +3508,7 @@
oot (moedig %o@t // move default stress: ootmoedig/-e/-heid

.group op
_) opaal %o@pA:l // fix o sound, remove break: opaal and compounds
opaat up'A:t // homeopaat/psigopaat and similar
opatie upat'i // homeopatie/neuropatie and similar
opaties up'A:tis // psigopaties/osteopaties and similar
@@ -3578,6 +3604,7 @@
pol (vy p%Ol // move default stress: polvy/-e and compounds
pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...)
_) pon (dok p%On // move default stress: pondok/-ke/-kie
pop (agtig p%Op_ // fix o sound, insert break: popagtig(e) and compounds
popu ,pOpy
_) por (C %pOr //portret portaal etc.
por (ie p%o@r // move default stress: porie/-ë
@@ -4118,6 +4145,7 @@
sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e
skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds
skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum
skim (agtig sk@m_ // fix i sound, insert break: skimagtig(e)
skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene
skle (rose skl@ // fix stress and e sound: sklerose and compounds
_) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling...
@@ -4252,6 +4280,7 @@
_) su (meri s%u // fix stress and u sound: sumeries/-e
su (mmier s%W // move default stress: sumier/-e
super (A s''yp@r_ // fix stress, break in compounds like superintelligent
superi (A s%up%e@r%i // fix stress and vowel sounds: superieur/superioriteit
superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds
surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds
su (saC s%u // fix stress and u sound: susan/-na/susara
@@ -4561,7 +4590,7 @@
toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar
toer (n t%ur // move default stress: compounds of toernooi
toere (_ tu:r@ // restore default stress
toi (let t%OI // move default stress: toilet and compounds
toilet t%OIlEt // move default stress: toilet and compounds: toiletartikel/-emmer/-opsigter
tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds
tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc.
tombola t%Ombo@l%a // move default stress: tombola and compounds
@@ -4574,6 +4603,7 @@
@) toris (_ t'o@r@s // fix stress: pectoris/klitoris
_) tor (nyn t%Or // move default stress: tornyn/-e and compounds
_) tos (ka t%Os // move default stress: Toskaanse/Toskane
tser (tjie ts@r // fix e sound: (skoen)poetsertjie/weerkaatsertjie
ttel t@l // many compounds of bottel/skottelgoed
tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling
tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie
@@ -4875,6 +4905,7 @@
_) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid
vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid
vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer
va (sal v%a // fix stress and v and a sounds: vasal/-le
vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc.
vasste (l fast& // fix e sound: vasstel(ling/-lende)
_) vat (A@ fat_ // fix a sound: vatafstand/-orgaan
@@ -4943,9 +4974,11 @@

ve (l f& // maagvel, stress on 1st slb
@) vel (A fe@l // aanbeveling/aanbevelingsbrief
vel (djie f&l // fix -djie sound: veldjie(s) and compounds
veld (C f&lt // fix d sound: veldreuk/-radio/-rantsoen, etc.
veld (eks f&lt_ // fix d sound, pause: veldekskursie/-ekspedisie/-s
veld (o f&lt_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc.
veld (t f&l // eliminate double t sound: veldtog and many compounds, Langeveldt, Springveldt
vele f'e@l@
ven (detta v%En // fix stress and v sound: vendatta and compounds
ven (dusie f@n // fix stress and e sound: vendusie and compounds
@@ -5048,7 +5081,7 @@
vol (kome f%Ol
vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O
volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid...
voll (engte fOlE
volle (ngte fOlE
vo (ller fO // exception to: vo (lC f%O
volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc.
vol (hou_ fOl // exception to: vo (lC f%O

+ 9
- 10
dictsource/en_list View File

@@ -2228,7 +2228,7 @@ idevice $alt6
idly aIdlI
idiocy IdI@si
ifrog $alt6
ignoramus Igno@r'eIm@s
ignoramus IgnO@r'eIm@s
illiterate $alt2
illumine $alt2
imagery ImIdZri
@@ -2716,6 +2716,7 @@ nonsense n0ns@ns
nonetheless nVnD@l'Es
nosedive noUzdaIv
nosir noUs3:
?5 nosir noUsIR
not noUt // for noted, notable, etc
nots n0ts
(nôtre dame) noUtr@'dA:m
@@ -3420,7 +3421,7 @@ sincerest sIns'i@r@st
sinus saIn@s
siphon $alt2
sir s,3: $only
?5 sir s,VR $only
?5 sir s,IR $only
siren saIr@n
site saIt // for sited
ski ski:
@@ -3455,12 +3456,13 @@ sommelier s0m'Eli@
son sVn
sonar soUnA@
sonny sVnI
sooth su:T $only
sopapilla soUp@p'i:@
sope soUpeI
sorbet sO@beI
souffle su:fl'eI
soundbite saUndbaIt
souvenir su:v@n'i@3
sopapilla soUp@p'i:@
sorbet sO@beI
soyabean sOI@bi:n
specific sp@sIfIk
specimen spEsI2m@n
@@ -3568,7 +3570,6 @@ tallyho talI'hoU
tamale ta#mA:li
tampon tamp0n
tangerine tandZ@r'i:n
taoiseach ti:S@x
tapestry tapI#stri
tarantula t@rantS@l@
tardis $alt1
@@ -3913,6 +3914,7 @@ yer j3 $u+
ye ji: $u+
yea jeI
yessir jEss3:
?5 yessir jEssIR
yoghurt j0g3t
?3 yoghurt joUg3t
?3 yogurt joUg3t
@@ -4546,8 +4548,6 @@ Annise a#ni:s
?!3 Anthony ant@ni
Anton ant0n
Anya anj@
Aoife i:f@
Aoiffe i:f@
Aphrodite afr@d'aIti
Archibald A@tSIbO:ld
Archie A@tSi
@@ -4898,7 +4898,6 @@ Sabine sa#b'i:n
Salman sa#lmA:n
Samantha sa#manT@
(Santa claus) s'ant@||kl'O:z
Saoirse si@S@
Sarah se@r@
Sarisa $alt3
Seamus SeIm@s
@@ -4912,8 +4911,8 @@ Sheila Si:l@
Simon saIm@n
Sinead SI2neId
Sinéad SI2neId
Siobhan S@vO:n
Siobhán S@vO:n
Siobhan SI2vO:n
Siobhán SI2vO:n
Sonia s0nj@
Sophia soUf'i@
Sophie soUfi

+ 29
- 8
dictsource/en_rules View File

@@ -150,12 +150,16 @@
sw) a (m_ a
sw) a (nk a
ao eI0
mh) ao eI // Irish, e.g. 'mhaol' /weIl/
m) ao aU
p) ao aU
t) ao aU
ao (_ =aU
aois (_ i:S
aoise (_ i:S@
ao (ism aU
ao (ist aU
aoi (C i: // Irish, e.g. 'Aoife' /i:f@/
aoir (C e@ // Irish, e.g. 'Saoirse' /se@S@/
?3 aoir (C 3: // Irish, e.g. 'Saoirse' /s3:S@/
g) ao (l eI@
aor eI'o@
m) ao (ri aU
@@ -180,7 +184,8 @@
_n) a (tional a
@) a (tious 'eI
ell) a (trix @
a (triC 'eI
a (trix 'eI
a (trice 'eI
n) a (tur eI
n) a (tura a
&) a (ture_ @
@@ -512,7 +517,7 @@
_m) ag (ell a#dZ
Cp) age (_ eIdZ
pp) age (_ I2dZ
_ant) ag 'ag
_ant) ag (on 'ag
enr) ag (e_ 'eIdZ
outr) ag (e_ eIdZ
der) ag (e_ eIdZ
@@ -1463,7 +1468,7 @@ _it_separ) ate (_ @t
may) be (_ bi:
_) be (CA bI#
_) bete (lg bi:t@
_) be (C% bE
_) be (C%+ bE
_) be (atiC b%i:
_) be (b bi:
_) be (cl bI#
@@ -1477,7 +1482,7 @@ _it_separ) ate (_ @t
_) be (kn bI#
_) belarus bEl@r'u:s
_) be (lC bE
_) be (llig bI#
_) be (llig+ bI#
_) be (re bE
_) be (sC bI#
_) be (stia bE
@@ -2150,6 +2155,8 @@ _it_separ) ate (_ @t
e (Cical 'E
e (CiuB i:
&) e (_
aoiC) e (_ @ // Irish, e.g. 'Aoife' /i:fe/
aoiCC) e (_ @ // Irish, e.g. 'Saoirse' /se@S@/
acB) e (_ %I
XC) e (_N i:
vert) e (b I
@@ -2378,6 +2385,7 @@ _it_separ) ate (_ @t
y) ed (_S2v d#
debut) ed (_S2 d#
edly (_S4m I#dl%i
eg) edly (_S3m I#dl%i
c) ed (e_ 'i:d
p) ed (e_ i:d
p) edal Ed@L
@@ -2567,6 +2575,8 @@ _it_separ) ate (_ @t
en (core 0n
&) ency (_ @ns%i
ency (cli %EnsI
_) en (dg@ %En
_) en (dp@ %En
k) en (d_ En
s) en (d_ En
t) en (d_ En
@@ -2771,7 +2781,7 @@ _it_separ) ate (_ @t
exp) eri (en i@rI2
XC) er 3:
th) er (@ 3:
h) er (@ %3
h) er (nan %3
X) er (A E#r
_h) eretical I#rEtIk@L
_qu) er 3:
@@ -2890,6 +2900,7 @@ _it_separ) ate (_ @t
&z) es (_S2 %I#z
&C) es (_S1i z
xus) es (_S2 %I#z
tamus) es (_S2 %I#z // hippopotamuses
es (carp I2s
es (cape %Es
es (capi %Es
@@ -3099,7 +3110,7 @@ _it_separ) ate (_ @t
&) ford (_S4 f3d
&f) ford (_S4 3d
for (see f%O@
for (ward f'o@ // straightforward
for (ward f'O@
ft (en f
&) ful (_S3i f@L

@@ -4288,6 +4299,8 @@ multip) ly laI
_) metall (ic m@tal
metabo m@t'ab0
_) meta (llu m%Eta
mh (ao w // Irish, e.g. 'mhaol' /weIl/
aoi) mh (e v // Irish, e.g. 'Caoimhe' /ki:v@/
&) mobile m@bi:l
_) mocha moUk@
mono (ga m@n'0
@@ -4299,6 +4312,7 @@ multip) ly laI
&) mouth (_ m@T
&) mouth (_$w_alt1 maUT
_) multi mVlti
_) multi (pl m,VltI
_) multi (@@P5 m,VltI

.group mi
@@ -5847,6 +5861,10 @@ multip) ly laI
Co) s (Er z
Co) s (En z
e) s (d z
aoi) s (e S // Irish, e.g. 'Laoise'
aoi) seach S@x // Irish, e.g. 'Taoiseach'
aoi) sigh Si // Irish, e.g. 'Taoisigh'
aoir) s (e S // Irish, e.g. 'Saoirse'
_) se (clu sI#
secur sI#kjU@
_) se (duc sI#
@@ -5878,6 +5896,7 @@ multip) ly laI
&) s (ic_ z
n) s (ic_ s
ss (ic s
mu) s (e z
mu) s (ic z
ea) s (ie z
ea) s (il z
@@ -6082,6 +6101,8 @@ multip) ly laI
th (ill th
gh) th (A th
ee) thing DI2N
soo) th D
soo) th (say T
the (_ D
&) th (L03_ =T
ou) thed (_ Dd

+ 4
- 4
dictsource/fa_list View File

@@ -1,5 +1,5 @@
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.133
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 05-10-2017
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.134
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 06-24-2017
//*********
// * This program is free software; you can redistribute it and/or modify *
// * it under the terms of the GNU General Public License as published by *
@@ -554,6 +554,7 @@ _) paRAntezbaste:
آموخت Amuxt
آموزد Amuzad
آموزش AmuzeS
آمپر AmpeR
آمپرمتر AmpeRmetR
آمپلیفایر AmpelifAjeR
آمپیریسم AmpiRism
@@ -5055,7 +5056,6 @@ _) paRAntezbaste:
درایه deRAje
درایو deRAjv
درایور deRAjveR
درباره daR'bAReje:
دربازکن daRbAzkon
دربدر daRbedaR
دربندکشیده daRbandkeSide
@@ -6809,6 +6809,7 @@ _) paRAntezbaste:
شدیدا Sadidan
شدیداللحن Sadidollahn
شدیم Sodim
شراادی SA:_d:jA:_R
شرافت SeRAfat
شراپنل SeRApnel
شراکت SeRAkat
@@ -6816,7 +6817,6 @@ _) paRAntezbaste:
شربت SaRbat
شرت 'SoRt
شرتکات SoRtkAt
شراادی SA:_d:jA:_R
شرشر SeRSeR
شرطه SoRte:
شرعا SaR?an

+ 3
- 3
dictsource/fa_rules View File

@@ -1,5 +1,5 @@
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.133
// * This file writen by Shadyar Khodayari 05-10-2017
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.134
// * This file writen by Shadyar Khodayari 06-24-2017
//*********
// * This program is free software; you can redistribute it and/or modify *
// * it under the terms of the GNU General Public License as published by *
@@ -4843,7 +4843,7 @@ L09L04) السّادات (_Sm8 ossAdAt
L09L09L09L09) م (L03L09L09L09_ ma

// Prefixes م
_) م (L03L09L03_$noprefixP1@ ma
_) م (L03L04L03_$noprefixP1@ ma
_) می (L03L09+$noprefixP2@ mi
_) می (آL09L09$noprefixP2@ mi
_) می (وL09L09$noprefixP2@ mi

+ 1
- 0
src/ucd-tools/.gitignore View File

@@ -1,4 +1,5 @@
.*.swp
*~

# intermediate files:


+ 5
- 2
src/ucd-tools/CHANGELOG.md View File

@@ -7,11 +7,14 @@ These are eSpeak NG specific modifications to the `ucd-tools` project:
* `data/espeak-ng` data files for eSpeak NG extended data.
* espeak-ng PropList property lookup as part of the `ucd_property` API.

## 9.0.0.1 - (In Progress)
## 10.0.0 - 2017-06-25

* Add `iswblank` and `iswxdigit` compatibility.
* Improve ctype compatibility.
* PropList property lookup.
* PropList and emoji-data property lookup.
* Support building with a C89 compiler.
* Update to Unicode Character Data 10.0.0.
* Unicode Emoji 5.0.

## 9.0.0 - 2016-12-28


+ 7
- 7
src/ucd-tools/Makefile.am View File

@@ -55,34 +55,34 @@ EXTRA_DIST += ChangeLog

############################# Unicode Data ####################################

EMOJI_VERSION=4.0
EMOJI_VERSION=5.0
UCD_VERSION=@UCD_VERSION@
UCD_ROOTDIR=data/ucd
UCD_SRCDIR=http://www.unicode.org/Public

data/emoji/emoji-data.txt:
mkdir -pv data/emoji
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@

data/ucd/PropList.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@

data/ucd/DerivedCoreProperties.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@

data/ucd/PropertyValueAliases.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@

data/ucd/Scripts.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@

data/ucd/UnicodeData.txt:
mkdir -pv data/ucd
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@

############################# documentation ###################################


+ 4
- 3
src/ucd-tools/configure.ac View File

@@ -1,5 +1,5 @@
AC_PREREQ([2.65])
AC_INIT([Unicode Character Database Tools], [9.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools])
AC_INIT([Unicode Character Database Tools], [10.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools])
AM_INIT_AUTOMAKE()

m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES])
@@ -24,6 +24,7 @@ dnl library checks.
dnl ================================================================

AC_CHECK_HEADERS([stddef.h]) dnl C89
AC_CHECK_FUNCS([iswblank]) dnl C99

AC_TYPE_UINT8_T
AC_TYPE_UINT32_T
@@ -33,10 +34,10 @@ dnl UCD data configuration.
dnl ================================================================

AC_ARG_WITH([unicode-version],
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=9.0.0@:>@])],
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=10.0.0@:>@])],
[AS_IF([test x"$withval" != x],
[UCD_VERSION="$withval"])],
[UCD_VERSION="9.0.0"])
[UCD_VERSION="10.0.0"])

AC_SUBST(UCD_VERSION)


+ 4
- 3
src/ucd-tools/src/case.c View File

@@ -18,14 +18,15 @@
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/

#include "ucd/ucd.h"

#include <stddef.h>

// Unicode Character Data 9.0.0
/* Unicode Character Data 9.0.0 */

struct case_conversion_entry
{

+ 740
- 679
src/ucd-tools/src/categories.c
File diff suppressed because it is too large
View File


+ 20
- 20
src/ucd-tools/src/ctype.c View File

@@ -69,16 +69,16 @@ int ucd_isblank(codepoint_t c)
switch (ucd_lookup_category(c))
{
case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0;
}
return 1;
case UCD_CATEGORY_Cc:
return c == 0x09; // U+0009 : CHARACTER TABULATION
return c == 0x09; /* U+0009 : CHARACTER TABULATION */
default:
return 0;
}
@@ -91,7 +91,7 @@ int ucd_iscntrl(codepoint_t c)

int ucd_isdigit(codepoint_t c)
{
return (c >= 0x30 && c <= 0x39); // [0-9]
return (c >= 0x30 && c <= 0x39); /* [0-9] */
}

int ucd_isgraph(codepoint_t c)
@@ -174,23 +174,23 @@ int ucd_isspace(codepoint_t c)
case UCD_CATEGORY_Zp:
return 1;
case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0;
}
return 1;
case UCD_CATEGORY_Cc:
switch (c) // Include control characters marked as White_Space
switch (c) /* Include control characters marked as White_Space */
{
case 0x09: // U+0009 : CHARACTER TABULATION
case 0x0A: // U+000A : LINE FEED
case 0x0B: // U+000B : LINE TABULATION
case 0x0C: // U+000C : FORM FEED
case 0x0D: // U+000D : CARRIAGE RETURN
case 0x85: // U+0085 : NEXT LINE
case 0x09: /* U+0009 : CHARACTER TABULATION */
case 0x0A: /* U+000A : LINE FEED */
case 0x0B: /* U+000B : LINE TABULATION */
case 0x0C: /* U+000C : FORM FEED */
case 0x0D: /* U+000D : CARRIAGE RETURN */
case 0x85: /* U+0085 : NEXT LINE */
return 1;
}
default:
@@ -217,7 +217,7 @@ int ucd_isupper(codepoint_t c)

int ucd_isxdigit(codepoint_t c)
{
return (c >= 0x30 && c <= 0x39) // [0-9]
|| (c >= 0x41 && c <= 0x46) // [A-Z]
|| (c >= 0x61 && c <= 0x66); // [a-z]
return (c >= 0x30 && c <= 0x39) /* [0-9] */
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */
|| (c >= 0x61 && c <= 0x66); /* [a-z] */
}

+ 10
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

@@ -176,6 +176,7 @@ typedef enum ucd_script_
UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */
UCD_SCRIPT_Goth, /**< @brief Gothic Script */
UCD_SCRIPT_Gran, /**< @brief Grantha Script */
UCD_SCRIPT_Grek, /**< @brief Greek Script */
@@ -273,6 +274,7 @@ typedef enum ucd_script_
UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
UCD_SCRIPT_Soyo, /**< @brief Soyombo */
UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
@@ -302,6 +304,7 @@ typedef enum ucd_script_
UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
UCD_SCRIPT_Yiii, /**< @brief Yi Script */
UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */
UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
UCD_SCRIPT_Zsym, /**< @brief Symbols */
@@ -366,6 +369,8 @@ typedef uint64_t ucd_property;
#define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */
#define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */
#define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */
#define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */
#define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */

// eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */
@@ -679,6 +684,7 @@ namespace ucd
Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */
Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */
Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */
Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */
@@ -776,6 +782,7 @@ namespace ucd
Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */
Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
@@ -805,6 +812,7 @@ namespace ucd
Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */
Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */
Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */
@@ -876,6 +884,8 @@ namespace ucd
Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */
Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */
Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */
Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */
Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */
};

/** @brief Return the properties of the specified codepoint.

+ 102
- 62
src/ucd-tools/src/proplist.c View File

@@ -78,9 +78,6 @@ static ucd_property properties_Cn(codepoint_t c)
case 0x2000:
if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT;
break;
case 0x2300:
if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2400:
if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX;
@@ -90,7 +87,7 @@ static ucd_property properties_Cn(codepoint_t c)
if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2E00:
@@ -148,11 +145,11 @@ static ucd_property properties_Ll(codepoint_t c)
if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED;
break;
case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x0400:
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED;
@@ -166,12 +163,12 @@ static ucd_property properties_Ll(codepoint_t c)
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED;
break;
case 0x2100:
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2139) return UCD_PROPERTY_EMOJI;
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
break;
case 0xFF00:
@@ -179,45 +176,45 @@ static ucd_property properties_Ll(codepoint_t c)
break;
case 0x01D400:
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x01D600:
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH;
break;
case 0x01D700:
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
}
return 0;
@@ -332,7 +329,7 @@ static ucd_property properties_Lm(codepoint_t c)
break;
case 0x016F00:
if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC;
if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER;
if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER;
break;
}
return 0;
@@ -407,19 +404,21 @@ static ucd_property properties_Lo_ideographic(codepoint_t c)
{
case 0x000000:
if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC;
break;
case 0x010000:
if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC;
break;
case 0x020000:
if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH;
if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC;
break;
}
@@ -434,8 +433,8 @@ static ucd_property properties_Lu(codepoint_t c)
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT;
break;
case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0xFF00:
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT;
@@ -443,49 +442,49 @@ static ucd_property properties_Lu(codepoint_t c)
case 0x2100:
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x01D400:
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x01D600:
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
case 0x01D700:
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break;
}
return 0;
@@ -613,6 +612,7 @@ static ucd_property properties_Mc(codepoint_t c)
if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC;
break;
case 0x3000:
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
@@ -709,6 +709,12 @@ static ucd_property properties_Mc(codepoint_t c)
if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011A00:
if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011C00:
if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
@@ -818,6 +824,8 @@ static ucd_property properties_Mn(codepoint_t c)
if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC;
break;
case 0x0B00:
if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC;
@@ -848,7 +856,8 @@ static ucd_property properties_Mn(codepoint_t c)
if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x0D00:
if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC;
@@ -964,7 +973,7 @@ static ucd_property properties_Mn(codepoint_t c)
case 0x1D00:
if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC;
break;
case 0x2000:
@@ -1111,6 +1120,16 @@ static ucd_property properties_Mn(codepoint_t c)
if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC;
break;
case 0x011A00:
if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011A98) return UCD_PROPERTY_EXTENDER;
if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC;
break;
case 0x011C00:
if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC;
@@ -1120,6 +1139,16 @@ static ucd_property properties_Mn(codepoint_t c)
if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011D00:
if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x016A00:
if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC;
break;
@@ -1165,7 +1194,7 @@ static ucd_property properties_Nd(codepoint_t c)
switch (c & 0xFFFFFF00)
{
case 0x0000:
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI;
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT;
break;
case 0xFF00:
if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT;
@@ -1279,10 +1308,10 @@ static ucd_property properties_Pe(codepoint_t c)
break;
case 0x2700:
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
case 0x2E00:
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000:
@@ -1337,9 +1366,9 @@ static ucd_property properties_Po(codepoint_t c)
case 0x0000:
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT;
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX;
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT;
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA;
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP;
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
@@ -1609,6 +1638,11 @@ static ucd_property properties_Po(codepoint_t c)
case 0x11700:
if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
break;
case 0x11A00:
if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break;
case 0x11C00:
if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL;
if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
@@ -1664,7 +1698,7 @@ static ucd_property properties_Ps(codepoint_t c)
break;
case 0x2700:
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
@@ -1746,7 +1780,7 @@ static ucd_property properties_Sk(codepoint_t c)
if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC;
break;
case 0x01F300:
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER;
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT;
}
return 0;
}
@@ -1839,7 +1873,7 @@ static ucd_property properties_So(codepoint_t c)
if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI;
if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX;
break;
case 0x2400:
if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX;
@@ -1979,7 +2013,7 @@ static ucd_property properties_So(codepoint_t c)
if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE;
if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT;
break;
case 0x01F200:
if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
@@ -2074,23 +2108,29 @@ static ucd_property properties_So(codepoint_t c)
if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI;
if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI;
if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
break;
case 0x01F900:
if (c <= 0x01F90B) return 0;
if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F93B) return 0;
if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c == 0x01F946) return 0;
if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE;
if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION;
return UCD_PROPERTY_EMOJI;
}
return 0;
@@ -2132,6 +2172,6 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category)
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR;
case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Co Cs Ii Lt Me
default: return 0; /* Co Cs Ii Lt Me */
};
}

+ 765
- 701
src/ucd-tools/src/scripts.c
File diff suppressed because it is too large
View File


+ 3
- 0
src/ucd-tools/src/tostring.c View File

@@ -120,6 +120,7 @@ const char *ucd_get_script_string(ucd_script s)
"Geok",
"Geor",
"Glag",
"Gonm",
"Goth",
"Gran",
"Grek",
@@ -217,6 +218,7 @@ const char *ucd_get_script_string(ucd_script s)
"Sind",
"Sinh",
"Sora",
"Soyo",
"Sund",
"Sylo",
"Syrc",
@@ -246,6 +248,7 @@ const char *ucd_get_script_string(ucd_script s)
"Xpeo",
"Xsux",
"Yiii",
"Zanb",
"Zinh",
"Zmth",
"Zsym",

+ 36
- 26
src/ucd-tools/tests/printcdata.c View File

@@ -17,6 +17,7 @@
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

#include "config.h"
#include "ucd/ucd.h"

#include <locale.h>
@@ -25,6 +26,13 @@
#include <wchar.h>
#include <wctype.h>

#ifndef HAVE_ISWBLANK
static int iswblank(wint_t c)
{
return iswspace(c) && !(c >= 0x0A && c <= 0x0D);
}
#endif

void fput_utf8c(FILE *out, codepoint_t c)
{
if (c < 0x80)
@@ -86,7 +94,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode)
{
switch (mode)
{
case 'c': // character
case 'c': /* character */
switch (c)
{
case '\t': fputs("\\t", out); break;
@@ -95,10 +103,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode)
default: fput_utf8c(out, c); break;
}
break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c);
break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c);
break;
}
@@ -108,40 +116,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode)
{
switch (mode)
{
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(iswalnum(c) ? '1' : '0', out);
break;
case 'a': // alpha
case 'a': /* alpha */
fputc(iswalpha(c) ? '1' : '0', out);
break;
case 'b': // blank
case 'b': /* blank */
fputc(iswblank(c) ? '1' : '0', out);
break;
case 'c': // control
case 'c': /* control */
fputc(iswcntrl(c) ? '1' : '0', out);
break;
case 'd': // numeric
case 'd': /* numeric */
fputc(iswdigit(c) ? '1' : '0', out);
break;
case 'g': // glyph
case 'g': /* glyph */
fputc(iswgraph(c) ? '1' : '0', out);
break;
case 'l': // lower case
case 'l': /* lower case */
fputc(iswlower(c) ? '1' : '0', out);
break;
case 'P': // printable
case 'P': /* printable */
fputc(iswprint(c) ? '1' : '0', out);
break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(iswpunct(c) ? '1' : '0', out);
break;
case 's': // whitespace
case 's': /* whitespace */
fputc(iswspace(c) ? '1' : '0', out);
break;
case 'u': // upper case
case 'u': /* upper case */
fputc(iswupper(c) ? '1' : '0', out);
break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(iswxdigit(c) ? '1' : '0', out);
break;
}
@@ -154,31 +162,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format)
case '%':
switch (*++format)
{
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format);
break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format);
break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, towlower(c), *++format);
break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format);
break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, towupper(c), *++format);
break;
}
@@ -224,7 +232,8 @@ int main(int argc, char **argv)
{
FILE *in = NULL;
const char *format = NULL;
for (int argn = 1; argn != argc; ++argn)
int argn;
for (argn = 1; argn != argc; ++argn)
{
const char *arg = argv[argn];
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
@@ -250,7 +259,8 @@ int main(int argc, char **argv)
}
else
{
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
codepoint_t c;
for (c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format :
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n");
}

+ 28
- 26
src/ucd-tools/tests/printucddata.c View File

@@ -83,7 +83,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode)
{
switch (mode)
{
case 'c': // character
case 'c': /* character */
switch (c)
{
case '\t': fputs("\\t", out); break;
@@ -92,10 +92,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode)
default: fput_utf8c(out, c); break;
}
break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c);
break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c);
break;
}
@@ -105,40 +105,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode)
{
switch (mode)
{
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(ucd_isalnum(c) ? '1' : '0', out);
break;
case 'a': // alpha
case 'a': /* alpha */
fputc(ucd_isalpha(c) ? '1' : '0', out);
break;
case 'b': // blank
case 'b': /* blank */
fputc(ucd_isblank(c) ? '1' : '0', out);
break;
case 'c': // control
case 'c': /* control */
fputc(ucd_iscntrl(c) ? '1' : '0', out);
break;
case 'd': // numeric
case 'd': /* numeric */
fputc(ucd_isdigit(c) ? '1' : '0', out);
break;
case 'g': // glyph
case 'g': /* glyph */
fputc(ucd_isgraph(c) ? '1' : '0', out);
break;
case 'l': // lower case
case 'l': /* lower case */
fputc(ucd_islower(c) ? '1' : '0', out);
break;
case 'P': // printable
case 'P': /* printable */
fputc(ucd_isprint(c) ? '1' : '0', out);
break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(ucd_ispunct(c) ? '1' : '0', out);
break;
case 's': // whitespace
case 's': /* whitespace */
fputc(ucd_isspace(c) ? '1' : '0', out);
break;
case 'u': // upper case
case 'u': /* upper case */
fputc(ucd_isupper(c) ? '1' : '0', out);
break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(ucd_isxdigit(c) ? '1' : '0', out);
break;
}
@@ -151,31 +151,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format)
case '%':
switch (*++format)
{
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format);
break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format);
break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, ucd_tolower(c), *++format);
break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format);
break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, ucd_toupper(c), *++format);
break;
}
@@ -221,7 +221,8 @@ int main(int argc, char **argv)
{
FILE *in = NULL;
const char *format = NULL;
for (int argn = 1; argn != argc; ++argn)
int argn;
for (argn = 1; argn != argc; ++argn)
{
const char *arg = argv[argn];
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-"))
@@ -245,7 +246,8 @@ int main(int argc, char **argv)
}
else
{
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
codepoint_t c;
for (c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format :
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n");
}

+ 4
- 3
src/ucd-tools/tools/case.py View File

@@ -51,14 +51,15 @@ if __name__ == '__main__':
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/

#include "ucd/ucd.h"

#include <stddef.h>

// Unicode Character Data %s
/* Unicode Character Data %s */

struct case_conversion_entry
{

+ 8
- 7
src/ucd-tools/tools/categories.py View File

@@ -110,8 +110,9 @@ if __name__ == '__main__':
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/

#include "ucd/ucd.h"

@@ -149,7 +150,7 @@ if __name__ == '__main__':
#define Zs UCD_CATEGORY_Zs
#define Ii UCD_CATEGORY_Ii

// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version)

for category in special_categories:
@@ -187,7 +188,7 @@ if __name__ == '__main__':
sys.stdout.write('{\n')
for codepoint, table in sorted(category_tables[table_index].items()):
if isinstance(table, str):
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint))
else:
sys.stdout.write('\tcategories_%s,\n' % codepoint)
sys.stdout.write('};\n')
@@ -197,14 +198,14 @@ if __name__ == '__main__':
sys.stdout.write('{\n')
for codepoints, category, comment in category_sets:
if category:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment))
else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n')
sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n')

sys.stdout.write("""

+ 2
- 0
src/ucd-tools/tools/printdata.py View File

@@ -166,6 +166,8 @@ def properties(data):
props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data
props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data
props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data
props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0
props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0
# eSpeak NG extended properties:
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0)
props += (2 ** 53) * data.get('Punctuation_In_Word', 0)

+ 11
- 7
src/ucd-tools/tools/scripts.py View File

@@ -104,8 +104,9 @@ if __name__ == '__main__':
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

// NOTE: This file is automatically generated from the Scripts.txt file in
// the Unicode Character database by the ucd-tools/tools/scripts.py script.
/* NOTE: This file is automatically generated from the Scripts.txt file in
* the Unicode Character database by the ucd-tools/tools/scripts.py script.
*/

#include "ucd/ucd.h"

@@ -152,6 +153,7 @@ if __name__ == '__main__':
#define Geok UCD_SCRIPT_Geok
#define Geor UCD_SCRIPT_Geor
#define Glag UCD_SCRIPT_Glag
#define Gonm UCD_SCRIPT_Gonm
#define Goth UCD_SCRIPT_Goth
#define Gran UCD_SCRIPT_Gran
#define Grek UCD_SCRIPT_Grek
@@ -249,6 +251,7 @@ if __name__ == '__main__':
#define Sind UCD_SCRIPT_Sind
#define Sinh UCD_SCRIPT_Sinh
#define Sora UCD_SCRIPT_Sora
#define Soyo UCD_SCRIPT_Soyo
#define Sund UCD_SCRIPT_Sund
#define Sylo UCD_SCRIPT_Sylo
#define Syrc UCD_SCRIPT_Syrc
@@ -278,6 +281,7 @@ if __name__ == '__main__':
#define Xpeo UCD_SCRIPT_Xpeo
#define Xsux UCD_SCRIPT_Xsux
#define Yiii UCD_SCRIPT_Yiii
#define Zanb UCD_SCRIPT_Zanb
#define Zinh UCD_SCRIPT_Zinh
#define Zmth UCD_SCRIPT_Zmth
#define Zsym UCD_SCRIPT_Zsym
@@ -285,7 +289,7 @@ if __name__ == '__main__':
#define Zyyy UCD_SCRIPT_Zyyy
#define Zzzz UCD_SCRIPT_Zzzz

// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version)

for script in special_scripts:
@@ -323,7 +327,7 @@ if __name__ == '__main__':
sys.stdout.write('{\n')
for codepoint, table in sorted(script_tables[table_index].items()):
if isinstance(table, str):
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint))
else:
sys.stdout.write('\tscripts_%s,\n' % codepoint)
sys.stdout.write('};\n')
@@ -333,12 +337,12 @@ if __name__ == '__main__':
sys.stdout.write('{\n')
for codepoints, script, comment in script_sets:
if script:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment))
else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n')
sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n')

Loading…
Cancel
Save