@@ -5,7 +5,7 @@ LOCAL_CFLAGS = -std=c11 | |||
# ucd-tools wide-character compatibility support: | |||
UCDTOOLS_SRC_PATH := ../../ucd-tools/src | |||
UCDTOOLS_SRC_PATH := ../../src/ucd-tools/src | |||
UCDTOOLS_SRC_FILES := \ | |||
$(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) | |||
@@ -218,6 +218,7 @@ afganistan afg'anistan | |||
alaska al'aska | |||
albanië alb'A:ne@:@- | |||
algerië alx2'e@re@:@- | |||
alhambra al'ambra | |||
amanzimtoti $4 | |||
antwerpen antv&rp@n | |||
avignon _^_FR | |||
@@ -333,6 +334,7 @@ oberammergau o@b@r'am@rgaU | |||
oklahoma @Ukl@h'@Uma | |||
outeniekwa @Ut@n'ikwa | |||
oxford _^_EN | |||
paardeneiland pA:rd@n_'eIlant | |||
palermo pal'&rmu | |||
pelindaba p&l@nd'A:ba | |||
perú p@ru | |||
@@ -342,6 +344,7 @@ portugal pOrtyx2al | |||
potchefstroom pOtSIfstr'o@m | |||
rhône _^_FR | |||
richardsbaai ritS@dsb'AI | |||
riversdal r@v@rsdal | |||
riviersonderend r@fi:rsOn@r_'Ent | |||
robertson _^_EN | |||
salvador salvadO:r | |||
@@ -585,6 +588,7 @@ james _^_EN | |||
jane _^_EN | |||
janine dZ@ni:n | |||
Jean ZA~n $capital | |||
jeff _^_EN | |||
jesebel je@s@b&l | |||
jessica _^_EN | |||
jimmy _^_EN | |||
@@ -769,9 +773,11 @@ stockenström stOk@nstro@m | |||
stuart _^_EN | |||
suzanne suz'A:n | |||
suzette suz'Et | |||
sylvia _^_EN | |||
tania tanja | |||
telemann te@l@man | |||
terblanche t@rblA:nS | |||
thelma _^_EN | |||
theo tiu | |||
theron tr'On | |||
thessalonicense tEsalo@nis'E:ns@ | |||
@@ -816,6 +822,7 @@ vivaldi viv'aldi | |||
viviers v@v@je@ | |||
wilhelm v@lh&l@-m | |||
williston _^_EN | |||
woltemade vOlt@mA:d@ | |||
khumalo kum'A:lu | |||
zuma zu:ma | |||
@@ -825,6 +832,7 @@ alibama $3 | |||
antares $2 | |||
beatles _^_EN | |||
boeing _^_EN | |||
cadillac _^_EN | |||
checkers tSEk@rs | |||
chevrolet _^_FR | |||
chrysler kraIsl@r | |||
@@ -1044,7 +1052,8 @@ fides _^_LA | |||
forma _^_LA | |||
grata _^_LA | |||
habitatio _^_LA | |||
inclusio _^_LA | |||
inclusio _^_LA | |||
(in camera) @n||kam@ra | |||
(in debiti) _^_LA | |||
(in absentia) _^_LA | |||
(in extremis) _^_LA | |||
@@ -1075,6 +1084,7 @@ vivos _^_LA | |||
// main word list | |||
aangaande $2 | |||
aanmerklik $2 | |||
aanstaande $2 | |||
(a cappella) a||kap'&la | |||
adagio ad'A:dZi;%@U | |||
@@ -1091,6 +1101,7 @@ algaande $2 | |||
alge alx2@ | |||
algehele alx2@h,e@l@ | |||
allegro al'Egru | |||
allengs alENs | |||
allergene $3 | |||
almiskie $3 | |||
alom al_'Om | |||
@@ -1116,11 +1127,13 @@ aversie av'&rsi | |||
babelaas bab@lA:s | |||
barrikade $3 | |||
beaming b@_A:m@N | |||
bedewete be@d@ve@t@ | |||
beide beId@ | |||
bekaf b&kaf | |||
belangriker b@laNr@k,Ir | |||
bene be@n@ | |||
beringde b@rINd@ | |||
beswil bEsv@l | |||
besnedene b@sne@d@n@ | |||
bestes bEst@s | |||
@@ -1130,6 +1143,7 @@ bewe be@v@ | |||
bewebeen be@v@be@n | |||
bewend be@v@nt | |||
bewering b@ve@rIN | |||
bilharzia b@lharsia | |||
biopsie bi'Opsi | |||
bomaat bo@mA:t | |||
bordegoed bO:rd@x2ut | |||
@@ -1155,6 +1169,7 @@ cedille s@dIl@ | |||
charisma kar'Isma | |||
cinsaut s@nso@ | |||
cliché kliS'eI: | |||
clientèle _^_FR | |||
clivia klIvija | |||
cognac kOn^ak | |||
confetti $2 | |||
@@ -1173,6 +1188,7 @@ dawidster dA:v@tst&r | |||
déjà _^_FR | |||
dekade dEk'A:d@ | |||
dekreling dEkre@l@N | |||
demensie d@me~nsi | |||
deurentyd dy@r@nteIt | |||
deurgaans $1 | |||
deurkruis $2 | |||
@@ -1219,12 +1235,14 @@ ewentwil e@v@ntv@l | |||
exodus Eks'o@dWs | |||
factotum $2 | |||
faktotum $2 | |||
fakture $2 | |||
fetakaas fEtakA:s | |||
figuur f@x2yr | |||
filippense f@l@pEns@ | |||
finalis $3 | |||
finaliste $3 | |||
fluktuasie $3 | |||
fort fOrt | |||
forte fOrt@ | |||
@@ -1269,6 +1287,7 @@ hekke h&k@ | |||
helaas he@l'A:s | |||
here he@r@ | |||
herero hEr'E:ru | |||
herontmoeting h&r_Ontmut@N | |||
herrysenis h&r'eIs@n@s | |||
hierso hi:rsO | |||
hippie _^_EN | |||
@@ -1294,6 +1313,7 @@ inkatha iNk'A:ta | |||
inkluis $2 | |||
innestel InnEst@l | |||
insomnia @nsOmnija | |||
inteling Inte@l@N | |||
intens @nt'Ens | |||
intensiteit $4 | |||
ironieë irun'i:@ | |||
@@ -1312,6 +1332,7 @@ kaviaar kavi'A:r | |||
kennisvaardig $1 | |||
kimono $2 | |||
klaasvakie $2 | |||
kliënteel $3 | |||
klimeid klImeIt | |||
knapsekêrel knaps@k&:r@l | |||
kolossense kOl@s'e~ns@ | |||
@@ -1325,11 +1346,13 @@ kopiereg kup'ir&x2 | |||
korswel kOrsv@l | |||
kotiljons kOt@lj'o~ns | |||
kritiek krIt'ik | |||
kruisteling krYyste@l@N | |||
kulture $2 | |||
kunsmatige kWnsm'A:t@x2@ | |||
kwansuis $2 | |||
kweekwal kwe@kval | |||
landswye lantsveI@ | |||
lasagne las'anj@ | |||
legaat l@x2A:t | |||
legate l@x2A:t@ | |||
@@ -1532,6 +1555,7 @@ sonore sun'o@r@ | |||
sonure sOn_yr@ | |||
sover so@f&r | |||
staccato $2 | |||
steekhoudendheid $2 | |||
sterwens st&rv@ns | |||
stilswye $1 | |||
strydros streIt_rOs | |||
@@ -1555,6 +1579,7 @@ tevrede t@fre@d@ | |||
thula tu:la | |||
toegee tux2e@ | |||
toereken ture@k@n | |||
toleransie tOl@r'ansi | |||
tornado $2 | |||
totale tut'A:l@ | |||
totsiens $2 | |||
@@ -1566,6 +1591,7 @@ tsoenami tsun'A:mi | |||
uitdaging YydA:x2@N | |||
uiteraard Yyt@r_'A:rt | |||
uitermate $3 | |||
uitgeslotene Yytx2@slo@t@n@ | |||
uniforme $3 | |||
vaarwel fA:rv'&l |
@@ -73,6 +73,7 @@ | |||
@@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. | |||
amarula %am%arul%a // fix stress and a sounds: amarula and compounds | |||
a (ment %a // shorten a sound: perkament/testament and compounds | |||
_) arendag (CAC %A:r@ntax2 // fix e sound: arendagtig/-e/-heid | |||
arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes | |||
argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e | |||
a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | |||
@@ -310,7 +311,7 @@ | |||
attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché | |||
attrib (u %atr@b // move default stress: attribuut/bute/attributêr | |||
K) au @U // trauma/-ties/Aucamp/Paul/-a/-us | |||
auer aU@r //Sauer etc. fixed au and e sounds in compounds. | |||
auer aU@r //Sauer etc. fix au and e sounds in compounds. | |||
augustus %Ox2WstWs | |||
avokado %af%ukA:du // stress and o sounds | |||
avokade %af%ukA:d@ // variant form of avokado | |||
@@ -431,7 +432,8 @@ | |||
bo (grond bo@ // fix 1st o sound: bogronds/-e | |||
_) bo (kle bo@ // fix o sound: bokleed/boklere | |||
bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie | |||
bom (AA bOm_ // fix a sound, pause: bomaanval/-aanslag/-aard/-eenheid and compounds | |||
bom (AA bOm_ // fix a sound, pause: bomaanval/-aard/-eenheid and compounds | |||
bomaans (la bOm_A:ns // fix o sound: bomaanslag/-aanslae | |||
bomaans bo@mA:ns // but fix O sound: bomaans/-e | |||
bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend | |||
boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid | |||
@@ -526,6 +528,7 @@ | |||
bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie | |||
baken bA:k@n // fix e sound: afbakening and compounds | |||
bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë | |||
balalaika b%al%alaIk%a // fix a sounds and stress: balalaika/-s/-musiek | |||
_) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... | |||
ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit | |||
band (Alier b,and // fix stress and d sound: band(e/o)lier/-e | |||
@@ -547,6 +550,7 @@ | |||
ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. | |||
basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar | |||
basi (s bA:s@ // basis and compounds | |||
basilie b%asil%i // fix a sound and stress: basilie/-kruid, etc. | |||
bastille b%asti:l // Bastille and compounds like Bastilledag | |||
batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds | |||
ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk | |||
@@ -646,6 +650,8 @@ | |||
bere (_ be@r@ //tediebere pandabere etc. | |||
_) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom | |||
beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s | |||
beton (C@ b@tOn // split ng sounds: betongebou/-gietsel/-gruis | |||
beton (inC@ b@t'On // fix O sound: betoninrigting/-ingenieur | |||
be (weging b@ // draaibeweging/swaaibeweging | |||
bewende be@v@nd@ // first e pronounced long | |||
bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid | |||
@@ -749,6 +755,7 @@ | |||
dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant | |||
diaken d%iA:k@n // diaken and compounds | |||
diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë | |||
diende dind@ // fix last e sound in many compounds of bediende: bediendekamer | |||
diens (willi d%ins // move default stress: dienswillig/-e/-heid | |||
dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e | |||
digi (ta d%ix2%i // move default stress: digitaal/digitale | |||
@@ -983,6 +990,7 @@ | |||
@C) e (reekK @ // fix e sound: duine-/rotsereeks | |||
aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- | |||
lat) eres @r@s //lateres | |||
@C) erm (K &r@-m // a catch-all for words ending in -erm(s) | |||
fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. | |||
bloup) ers &:rs // fix e sound: bloupers | |||
iew) ers (ter @rs // fix e sound: (l)iewerster | |||
@@ -1074,6 +1082,7 @@ | |||
p) e (talje @ // fix stress and 1st e sound: petalje and compounds | |||
l) e (moen @ //fix lemoen and compounds | |||
Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. | |||
effe (kleur Ef@ // fix stress and 2nd e sound: effekleur(ig/-e) | |||
eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep | |||
@C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds | |||
@) ei (sen _'eI // (on)veeleisend/spoedeisend | |||
@@ -1589,6 +1598,7 @@ | |||
flu (we fl%y // move default stress: fluweel/fluwele and compounds | |||
_) fok (o fOk_ // fix o sound, insert short break: fokop/fokof | |||
folio fo@li_u // fix o sounds: folio and compounds | |||
fondsw fOntsv // fix v sound: fondswerwing & compounds/-waardes | |||
fondue f%Ondy // fix stress and ue sound: fondue and compounds | |||
fone (tiek f%o@n@ // fix e sound: fonetiek and compounds | |||
fone (tie f%une@ // fix stress and o sound: foneties/-e | |||
@@ -1602,6 +1612,7 @@ | |||
formi (da f%Orm%i // move default stress: formidabel/-e | |||
formu (lier f%Orm%y // move default stress: formulier/-e/-boek | |||
_) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C | |||
forse (nd fOrs@ // fix e sound: forsend/-e | |||
fos (fa f%Os // move default stress: fosfaat/fosfate | |||
fo (ssiel f%O // move default stress: fossiel/-e and compounds | |||
foto fo@tu | |||
@@ -1694,6 +1705,7 @@ | |||
_) gra (na x2r@ // granaat(boom)/granate | |||
grandi (o x2r%and%i // move default stress: grandioos/-ose | |||
_) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure | |||
grenadella gr@n@d&la // fix stress and vowels: grenadella and compounds like -heining | |||
griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. | |||
ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s | |||
_) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs | |||
@@ -1726,6 +1738,7 @@ | |||
gegesel x2@x2e@s@l // fix middle e sound | |||
K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele | |||
_) geler x2e@l@r // stress and 1st e sound: geler/gelerig | |||
_) gell x2&l | |||
gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e | |||
gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. | |||
gene (_ x2e@n@ // gene/diegene | |||
@@ -1741,7 +1754,7 @@ | |||
_) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. | |||
ni) ge (ri x2'e@ //nigeriese etc. | |||
gese (_ x2'e@s@ //Portugese, and others | |||
gesp (etjie x2Esp // fix e sounds: gespetjie/-s | |||
gespe (_ x2Esp@ // gespe and compounds | |||
gespes (_ x2Esp@s // gespes and compounds | |||
gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid | |||
@@ -1847,6 +1860,7 @@ | |||
hart (stogte_ h%art // move default stress: hartstogtelik | |||
hart (stogte_N hart // restore default stress: hartstogte | |||
ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling | |||
_) hef (a hEf_ // fix e sound, insert break: hefapparaat/-arm(s) | |||
hia (sint h%ij%a // fix stress: hiasint/-e and compounds | |||
_) hi (bis h%i // move default stress: hibiskus/-se and compounds | |||
hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e | |||
@@ -1984,6 +1998,7 @@ | |||
_) idi (o %id%i // idioom/idiome/idioot | |||
_) id (A %id // idille/idillies/ideëryk | |||
&l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. | |||
&l) iker (_ @k@r // fix i sound: afstootliker/(ge)redeliker/onberispeliker | |||
illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati | |||
illu (si %ily // illusie/-s/illusief | |||
illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie | |||
@@ -2114,6 +2129,7 @@ | |||
_) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | |||
_) inbe (lC Inb%& // fix e sound in compounds like inbelprogram | |||
indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese | |||
s) in (gestel @n // split n g: compounds with ingesteldheid/winsingestelde | |||
_) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives | |||
invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der | |||
investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende | |||
@@ -2324,10 +2340,11 @@ | |||
krieketw krik@tv //krieket followed by w in compounds always v | |||
krokodi (l kr%Ok@dI // (wyfie)krokodil/-le | |||
kro (niek kr%u // fix stress and o sound: kroniek and many compounds | |||
kruger kr'Y@@r | |||
ku (ba@ k%y // move default stress: kubaan/kubane | |||
ku (biek k%y // move default stress: kubiek/-e/-getal | |||
_) kuber kyb@r // fixed e sound: kuberruim(te) | |||
_) kuber kyb@r // fix e sound: kuberruim(te) | |||
kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e | |||
@) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar | |||
kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda | |||
@@ -2364,6 +2381,7 @@ | |||
kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds | |||
kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds | |||
ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder | |||
kakao k%akA:w // fix stress and vowel sounds: kakao and many compounds | |||
kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie | |||
kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn | |||
_) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds | |||
@@ -2544,6 +2562,7 @@ | |||
kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives | |||
kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel | |||
kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant | |||
kop (agtig k%Op_ // fix o sound, insert break: hamer-/spinnekop-/penkopagtig(e(s)) | |||
kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives | |||
_) kor (dA k%Or // kordaat/kordon | |||
ko (rint k%u // fix stress and o sound: korint/-e and compounds | |||
@@ -2656,6 +2675,7 @@ | |||
loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie | |||
lo (ja l%u // fix stress and o sound: lojale/lojaliteit | |||
lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. | |||
loke (t l%ukE // fix stress and vowel sounds: many compounds with loket | |||
lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde | |||
_) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier | |||
_) losge (@P5 l'Osx2@ | |||
@@ -2805,6 +2825,7 @@ | |||
medisyne m@d@seIn@ //medisyne and compounds | |||
meganies m@x2'A:nis | |||
_) meege (@P5 m'e@x2@ | |||
_) meegewe (nd me@x2e@v@ // fix e sounds and stress: meegewend(e) | |||
me (juf m@ // move default stress and shorten e sound | |||
me (laats m@ // fix stress and e sound: melaats/-e/-heid | |||
melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | |||
@@ -2946,6 +2967,7 @@ | |||
morf (otomie m%Orf // move default stress: morfotomie | |||
_) morr (i mOr // restore default stress: morrig/morrie/-doring | |||
_) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig | |||
mos (agtig mOs_ // fix o sound, insert break: (kos)mosagtig(e) | |||
mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se | |||
mosa (ïek m%o@s%a // move default stress: mosaïek and compounds | |||
mo (skee m%O // move default stress: moskee/-s and compounds | |||
@@ -3035,6 +3057,7 @@ | |||
ne (anderCa n%i // move default stress: Neander(d/t)al/-ler | |||
neger (in n%e@x2@r // move default stress: negerin/-ne | |||
ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. | |||
nek (_ n&k // catch-all for words ending in -nek: koedoe-/swaannek | |||
nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai | |||
nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. | |||
neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum | |||
@@ -3080,6 +3103,7 @@ | |||
nood (lotti n%o@t // move default stress: noodlottig/-e | |||
nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak | |||
nood (saak_N no@t // restore default stress: (ge)noodsaak | |||
nooien (tjie noIN // remove the e sound: nooientjie(s) and compounds | |||
noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e | |||
nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale | |||
nostal (gie_N n%Ost%al // stress on last slb.: nostalgie | |||
@@ -3417,6 +3441,7 @@ | |||
ooi oI | |||
ooy oI | |||
oodjie oIci | |||
CC) ool (A o@l_ // insert break: skooluur/-ure, steenkooluitvoer, but not: Karoolug | |||
ootjie oIci | |||
oontjie oINki | |||
oondjie oINki | |||
@@ -3483,6 +3508,7 @@ | |||
oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | |||
.group op | |||
_) opaal %o@pA:l // fix o sound, remove break: opaal and compounds | |||
opaat up'A:t // homeopaat/psigopaat and similar | |||
opatie upat'i // homeopatie/neuropatie and similar | |||
opaties up'A:tis // psigopaties/osteopaties and similar | |||
@@ -3578,6 +3604,7 @@ | |||
pol (vy p%Ol // move default stress: polvy/-e and compounds | |||
pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) | |||
_) pon (dok p%On // move default stress: pondok/-ke/-kie | |||
pop (agtig p%Op_ // fix o sound, insert break: popagtig(e) and compounds | |||
popu ,pOpy | |||
_) por (C %pOr //portret portaal etc. | |||
por (ie p%o@r // move default stress: porie/-ë | |||
@@ -4118,6 +4145,7 @@ | |||
sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e | |||
skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds | |||
skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum | |||
skim (agtig sk@m_ // fix i sound, insert break: skimagtig(e) | |||
skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene | |||
skle (rose skl@ // fix stress and e sound: sklerose and compounds | |||
_) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... | |||
@@ -4252,6 +4280,7 @@ | |||
_) su (meri s%u // fix stress and u sound: sumeries/-e | |||
su (mmier s%W // move default stress: sumier/-e | |||
super (A s''yp@r_ // fix stress, break in compounds like superintelligent | |||
superi (A s%up%e@r%i // fix stress and vowel sounds: superieur/superioriteit | |||
superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds | |||
surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds | |||
su (saC s%u // fix stress and u sound: susan/-na/susara | |||
@@ -4561,7 +4590,7 @@ | |||
toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar | |||
toer (n t%ur // move default stress: compounds of toernooi | |||
toere (_ tu:r@ // restore default stress | |||
toi (let t%OI // move default stress: toilet and compounds | |||
toilet t%OIlEt // move default stress: toilet and compounds: toiletartikel/-emmer/-opsigter | |||
tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds | |||
tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. | |||
tombola t%Ombo@l%a // move default stress: tombola and compounds | |||
@@ -4574,6 +4603,7 @@ | |||
@) toris (_ t'o@r@s // fix stress: pectoris/klitoris | |||
_) tor (nyn t%Or // move default stress: tornyn/-e and compounds | |||
_) tos (ka t%Os // move default stress: Toskaanse/Toskane | |||
tser (tjie ts@r // fix e sound: (skoen)poetsertjie/weerkaatsertjie | |||
ttel t@l // many compounds of bottel/skottelgoed | |||
tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling | |||
tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie | |||
@@ -4875,6 +4905,7 @@ | |||
_) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid | |||
vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid | |||
vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer | |||
va (sal v%a // fix stress and v and a sounds: vasal/-le | |||
vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. | |||
vasste (l fast& // fix e sound: vasstel(ling/-lende) | |||
_) vat (A@ fat_ // fix a sound: vatafstand/-orgaan | |||
@@ -4943,9 +4974,11 @@ | |||
ve (l f& // maagvel, stress on 1st slb | |||
@) vel (A fe@l // aanbeveling/aanbevelingsbrief | |||
vel (djie f&l // fix -djie sound: veldjie(s) and compounds | |||
veld (C f< // fix d sound: veldreuk/-radio/-rantsoen, etc. | |||
veld (eks f<_ // fix d sound, pause: veldekskursie/-ekspedisie/-s | |||
veld (o f<_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. | |||
veld (t f&l // eliminate double t sound: veldtog and many compounds, Langeveldt, Springveldt | |||
vele f'e@l@ | |||
ven (detta v%En // fix stress and v sound: vendatta and compounds | |||
ven (dusie f@n // fix stress and e sound: vendusie and compounds | |||
@@ -5048,7 +5081,7 @@ | |||
vol (kome f%Ol | |||
vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O | |||
volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... | |||
voll (engte fOlE | |||
volle (ngte fOlE | |||
vo (ller fO // exception to: vo (lC f%O | |||
volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. | |||
vol (hou_ fOl // exception to: vo (lC f%O |
@@ -2228,7 +2228,7 @@ idevice $alt6 | |||
idly aIdlI | |||
idiocy IdI@si | |||
ifrog $alt6 | |||
ignoramus Igno@r'eIm@s | |||
ignoramus IgnO@r'eIm@s | |||
illiterate $alt2 | |||
illumine $alt2 | |||
imagery ImIdZri | |||
@@ -2716,6 +2716,7 @@ nonsense n0ns@ns | |||
nonetheless nVnD@l'Es | |||
nosedive noUzdaIv | |||
nosir noUs3: | |||
?5 nosir noUsIR | |||
not noUt // for noted, notable, etc | |||
nots n0ts | |||
(nôtre dame) noUtr@'dA:m | |||
@@ -3420,7 +3421,7 @@ sincerest sIns'i@r@st | |||
sinus saIn@s | |||
siphon $alt2 | |||
sir s,3: $only | |||
?5 sir s,VR $only | |||
?5 sir s,IR $only | |||
siren saIr@n | |||
site saIt // for sited | |||
ski ski: | |||
@@ -3455,12 +3456,13 @@ sommelier s0m'Eli@ | |||
son sVn | |||
sonar soUnA@ | |||
sonny sVnI | |||
sooth su:T $only | |||
sopapilla soUp@p'i:@ | |||
sope soUpeI | |||
sorbet sO@beI | |||
souffle su:fl'eI | |||
soundbite saUndbaIt | |||
souvenir su:v@n'i@3 | |||
sopapilla soUp@p'i:@ | |||
sorbet sO@beI | |||
soyabean sOI@bi:n | |||
specific sp@sIfIk | |||
specimen spEsI2m@n | |||
@@ -3568,7 +3570,6 @@ tallyho talI'hoU | |||
tamale ta#mA:li | |||
tampon tamp0n | |||
tangerine tandZ@r'i:n | |||
taoiseach ti:S@x | |||
tapestry tapI#stri | |||
tarantula t@rantS@l@ | |||
tardis $alt1 | |||
@@ -3913,6 +3914,7 @@ yer j3 $u+ | |||
ye ji: $u+ | |||
yea jeI | |||
yessir jEss3: | |||
?5 yessir jEssIR | |||
yoghurt j0g3t | |||
?3 yoghurt joUg3t | |||
?3 yogurt joUg3t | |||
@@ -4546,8 +4548,6 @@ Annise a#ni:s | |||
?!3 Anthony ant@ni | |||
Anton ant0n | |||
Anya anj@ | |||
Aoife i:f@ | |||
Aoiffe i:f@ | |||
Aphrodite afr@d'aIti | |||
Archibald A@tSIbO:ld | |||
Archie A@tSi | |||
@@ -4898,7 +4898,6 @@ Sabine sa#b'i:n | |||
Salman sa#lmA:n | |||
Samantha sa#manT@ | |||
(Santa claus) s'ant@||kl'O:z | |||
Saoirse si@S@ | |||
Sarah se@r@ | |||
Sarisa $alt3 | |||
Seamus SeIm@s | |||
@@ -4912,8 +4911,8 @@ Sheila Si:l@ | |||
Simon saIm@n | |||
Sinead SI2neId | |||
Sinéad SI2neId | |||
Siobhan S@vO:n | |||
Siobhán S@vO:n | |||
Siobhan SI2vO:n | |||
Siobhán SI2vO:n | |||
Sonia s0nj@ | |||
Sophia soUf'i@ | |||
Sophie soUfi |
@@ -150,12 +150,16 @@ | |||
sw) a (m_ a | |||
sw) a (nk a | |||
ao eI0 | |||
mh) ao eI // Irish, e.g. 'mhaol' /weIl/ | |||
m) ao aU | |||
p) ao aU | |||
t) ao aU | |||
ao (_ =aU | |||
aois (_ i:S | |||
aoise (_ i:S@ | |||
ao (ism aU | |||
ao (ist aU | |||
aoi (C i: // Irish, e.g. 'Aoife' /i:f@/ | |||
aoir (C e@ // Irish, e.g. 'Saoirse' /se@S@/ | |||
?3 aoir (C 3: // Irish, e.g. 'Saoirse' /s3:S@/ | |||
g) ao (l eI@ | |||
aor eI'o@ | |||
m) ao (ri aU | |||
@@ -180,7 +184,8 @@ | |||
_n) a (tional a | |||
@) a (tious 'eI | |||
ell) a (trix @ | |||
a (triC 'eI | |||
a (trix 'eI | |||
a (trice 'eI | |||
n) a (tur eI | |||
n) a (tura a | |||
&) a (ture_ @ | |||
@@ -512,7 +517,7 @@ | |||
_m) ag (ell a#dZ | |||
Cp) age (_ eIdZ | |||
pp) age (_ I2dZ | |||
_ant) ag 'ag | |||
_ant) ag (on 'ag | |||
enr) ag (e_ 'eIdZ | |||
outr) ag (e_ eIdZ | |||
der) ag (e_ eIdZ | |||
@@ -1463,7 +1468,7 @@ _it_separ) ate (_ @t | |||
may) be (_ bi: | |||
_) be (CA bI# | |||
_) bete (lg bi:t@ | |||
_) be (C% bE | |||
_) be (C%+ bE | |||
_) be (atiC b%i: | |||
_) be (b bi: | |||
_) be (cl bI# | |||
@@ -1477,7 +1482,7 @@ _it_separ) ate (_ @t | |||
_) be (kn bI# | |||
_) belarus bEl@r'u:s | |||
_) be (lC bE | |||
_) be (llig bI# | |||
_) be (llig+ bI# | |||
_) be (re bE | |||
_) be (sC bI# | |||
_) be (stia bE | |||
@@ -2150,6 +2155,8 @@ _it_separ) ate (_ @t | |||
e (Cical 'E | |||
e (CiuB i: | |||
&) e (_ | |||
aoiC) e (_ @ // Irish, e.g. 'Aoife' /i:fe/ | |||
aoiCC) e (_ @ // Irish, e.g. 'Saoirse' /se@S@/ | |||
acB) e (_ %I | |||
XC) e (_N i: | |||
vert) e (b I | |||
@@ -2378,6 +2385,7 @@ _it_separ) ate (_ @t | |||
y) ed (_S2v d# | |||
debut) ed (_S2 d# | |||
edly (_S4m I#dl%i | |||
eg) edly (_S3m I#dl%i | |||
c) ed (e_ 'i:d | |||
p) ed (e_ i:d | |||
p) edal Ed@L | |||
@@ -2567,6 +2575,8 @@ _it_separ) ate (_ @t | |||
en (core 0n | |||
&) ency (_ @ns%i | |||
ency (cli %EnsI | |||
_) en (dg@ %En | |||
_) en (dp@ %En | |||
k) en (d_ En | |||
s) en (d_ En | |||
t) en (d_ En | |||
@@ -2771,7 +2781,7 @@ _it_separ) ate (_ @t | |||
exp) eri (en i@rI2 | |||
XC) er 3: | |||
th) er (@ 3: | |||
h) er (@ %3 | |||
h) er (nan %3 | |||
X) er (A E#r | |||
_h) eretical I#rEtIk@L | |||
_qu) er 3: | |||
@@ -2890,6 +2900,7 @@ _it_separ) ate (_ @t | |||
&z) es (_S2 %I#z | |||
&C) es (_S1i z | |||
xus) es (_S2 %I#z | |||
tamus) es (_S2 %I#z // hippopotamuses | |||
es (carp I2s | |||
es (cape %Es | |||
es (capi %Es | |||
@@ -3099,7 +3110,7 @@ _it_separ) ate (_ @t | |||
&) ford (_S4 f3d | |||
&f) ford (_S4 3d | |||
for (see f%O@ | |||
for (ward f'o@ // straightforward | |||
for (ward f'O@ | |||
ft (en f | |||
&) ful (_S3i f@L | |||
@@ -4288,6 +4299,8 @@ multip) ly laI | |||
_) metall (ic m@tal | |||
metabo m@t'ab0 | |||
_) meta (llu m%Eta | |||
mh (ao w // Irish, e.g. 'mhaol' /weIl/ | |||
aoi) mh (e v // Irish, e.g. 'Caoimhe' /ki:v@/ | |||
&) mobile m@bi:l | |||
_) mocha moUk@ | |||
mono (ga m@n'0 | |||
@@ -4299,6 +4312,7 @@ multip) ly laI | |||
&) mouth (_ m@T | |||
&) mouth (_$w_alt1 maUT | |||
_) multi mVlti | |||
_) multi (pl m,VltI | |||
_) multi (@@P5 m,VltI | |||
.group mi | |||
@@ -5847,6 +5861,10 @@ multip) ly laI | |||
Co) s (Er z | |||
Co) s (En z | |||
e) s (d z | |||
aoi) s (e S // Irish, e.g. 'Laoise' | |||
aoi) seach S@x // Irish, e.g. 'Taoiseach' | |||
aoi) sigh Si // Irish, e.g. 'Taoisigh' | |||
aoir) s (e S // Irish, e.g. 'Saoirse' | |||
_) se (clu sI# | |||
secur sI#kjU@ | |||
_) se (duc sI# | |||
@@ -5878,6 +5896,7 @@ multip) ly laI | |||
&) s (ic_ z | |||
n) s (ic_ s | |||
ss (ic s | |||
mu) s (e z | |||
mu) s (ic z | |||
ea) s (ie z | |||
ea) s (il z | |||
@@ -6082,6 +6101,8 @@ multip) ly laI | |||
th (ill th | |||
gh) th (A th | |||
ee) thing DI2N | |||
soo) th D | |||
soo) th (say T | |||
the (_ D | |||
&) th (L03_ =T | |||
ou) thed (_ Dd |
@@ -1,5 +1,5 @@ | |||
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.133 | |||
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 05-10-2017 | |||
// * Farsi Language fa (or Parsi or Persian) fa_list Version 3.134 | |||
// * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 06-24-2017 | |||
//********* | |||
// * This program is free software; you can redistribute it and/or modify * | |||
// * it under the terms of the GNU General Public License as published by * | |||
@@ -554,6 +554,7 @@ _) paRAntezbaste: | |||
آموخت Amuxt | |||
آموزد Amuzad | |||
آموزش AmuzeS | |||
آمپر AmpeR | |||
آمپرمتر AmpeRmetR | |||
آمپلیفایر AmpelifAjeR | |||
آمپیریسم AmpiRism | |||
@@ -5055,7 +5056,6 @@ _) paRAntezbaste: | |||
درایه deRAje | |||
درایو deRAjv | |||
درایور deRAjveR | |||
درباره daR'bAReje: | |||
دربازکن daRbAzkon | |||
دربدر daRbedaR | |||
دربندکشیده daRbandkeSide | |||
@@ -6809,6 +6809,7 @@ _) paRAntezbaste: | |||
شدیدا Sadidan | |||
شدیداللحن Sadidollahn | |||
شدیم Sodim | |||
شراادی SA:_d:jA:_R | |||
شرافت SeRAfat | |||
شراپنل SeRApnel | |||
شراکت SeRAkat | |||
@@ -6816,7 +6817,6 @@ _) paRAntezbaste: | |||
شربت SaRbat | |||
شرت 'SoRt | |||
شرتکات SoRtkAt | |||
شراادی SA:_d:jA:_R | |||
شرشر SeRSeR | |||
شرطه SoRte: | |||
شرعا SaR?an |
@@ -1,5 +1,5 @@ | |||
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.133 | |||
// * This file writen by Shadyar Khodayari 05-10-2017 | |||
// * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.134 | |||
// * This file writen by Shadyar Khodayari 06-24-2017 | |||
//********* | |||
// * This program is free software; you can redistribute it and/or modify * | |||
// * it under the terms of the GNU General Public License as published by * | |||
@@ -4843,7 +4843,7 @@ L09L04) السّادات (_Sm8 ossAdAt | |||
L09L09L09L09) م (L03L09L09L09_ ma | |||
// Prefixes م | |||
_) م (L03L09L03_$noprefixP1@ ma | |||
_) م (L03L04L03_$noprefixP1@ ma | |||
_) می (L03L09+$noprefixP2@ mi | |||
_) می (آL09L09$noprefixP2@ mi | |||
_) می (وL09L09$noprefixP2@ mi |
@@ -1,4 +1,5 @@ | |||
.*.swp | |||
*~ | |||
# intermediate files: | |||
@@ -7,11 +7,14 @@ These are eSpeak NG specific modifications to the `ucd-tools` project: | |||
* `data/espeak-ng` data files for eSpeak NG extended data. | |||
* espeak-ng PropList property lookup as part of the `ucd_property` API. | |||
## 9.0.0.1 - (In Progress) | |||
## 10.0.0 - 2017-06-25 | |||
* Add `iswblank` and `iswxdigit` compatibility. | |||
* Improve ctype compatibility. | |||
* PropList property lookup. | |||
* PropList and emoji-data property lookup. | |||
* Support building with a C89 compiler. | |||
* Update to Unicode Character Data 10.0.0. | |||
* Unicode Emoji 5.0. | |||
## 9.0.0 - 2016-12-28 | |||
@@ -55,34 +55,34 @@ EXTRA_DIST += ChangeLog | |||
############################# Unicode Data #################################### | |||
EMOJI_VERSION=4.0 | |||
EMOJI_VERSION=5.0 | |||
UCD_VERSION=@UCD_VERSION@ | |||
UCD_ROOTDIR=data/ucd | |||
UCD_SRCDIR=http://www.unicode.org/Public | |||
data/emoji/emoji-data.txt: | |||
mkdir -pv data/emoji | |||
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@ | |||
curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@ | |||
data/ucd/PropList.txt: | |||
mkdir -pv data/ucd | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@ | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@ | |||
data/ucd/DerivedCoreProperties.txt: | |||
mkdir -pv data/ucd | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@ | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@ | |||
data/ucd/PropertyValueAliases.txt: | |||
mkdir -pv data/ucd | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@ | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@ | |||
data/ucd/Scripts.txt: | |||
mkdir -pv data/ucd | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@ | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@ | |||
data/ucd/UnicodeData.txt: | |||
mkdir -pv data/ucd | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@ | |||
curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@ | |||
############################# documentation ################################### | |||
@@ -1,5 +1,5 @@ | |||
AC_PREREQ([2.65]) | |||
AC_INIT([Unicode Character Database Tools], [9.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||
AC_INIT([Unicode Character Database Tools], [10.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||
AM_INIT_AUTOMAKE() | |||
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | |||
@@ -24,6 +24,7 @@ dnl library checks. | |||
dnl ================================================================ | |||
AC_CHECK_HEADERS([stddef.h]) dnl C89 | |||
AC_CHECK_FUNCS([iswblank]) dnl C99 | |||
AC_TYPE_UINT8_T | |||
AC_TYPE_UINT32_T | |||
@@ -33,10 +34,10 @@ dnl UCD data configuration. | |||
dnl ================================================================ | |||
AC_ARG_WITH([unicode-version], | |||
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=9.0.0@:>@])], | |||
[AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=10.0.0@:>@])], | |||
[AS_IF([test x"$withval" != x], | |||
[UCD_VERSION="$withval"])], | |||
[UCD_VERSION="9.0.0"]) | |||
[UCD_VERSION="10.0.0"]) | |||
AC_SUBST(UCD_VERSION) | |||
@@ -18,14 +18,15 @@ | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
#include <stddef.h> | |||
// Unicode Character Data 9.0.0 | |||
/* Unicode Character Data 9.0.0 */ | |||
struct case_conversion_entry | |||
{ |
@@ -69,16 +69,16 @@ int ucd_isblank(codepoint_t c) | |||
switch (ucd_lookup_category(c)) | |||
{ | |||
case UCD_CATEGORY_Zs: | |||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||
switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||
{ | |||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
case 0x2007: // U+2007 : FIGURE SPACE | |||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||
case 0x2007: /* U+2007 : FIGURE SPACE */ | |||
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||
return 0; | |||
} | |||
return 1; | |||
case UCD_CATEGORY_Cc: | |||
return c == 0x09; // U+0009 : CHARACTER TABULATION | |||
return c == 0x09; /* U+0009 : CHARACTER TABULATION */ | |||
default: | |||
return 0; | |||
} | |||
@@ -91,7 +91,7 @@ int ucd_iscntrl(codepoint_t c) | |||
int ucd_isdigit(codepoint_t c) | |||
{ | |||
return (c >= 0x30 && c <= 0x39); // [0-9] | |||
return (c >= 0x30 && c <= 0x39); /* [0-9] */ | |||
} | |||
int ucd_isgraph(codepoint_t c) | |||
@@ -174,23 +174,23 @@ int ucd_isspace(codepoint_t c) | |||
case UCD_CATEGORY_Zp: | |||
return 1; | |||
case UCD_CATEGORY_Zs: | |||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||
switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||
{ | |||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
case 0x2007: // U+2007 : FIGURE SPACE | |||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||
case 0x2007: /* U+2007 : FIGURE SPACE */ | |||
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||
return 0; | |||
} | |||
return 1; | |||
case UCD_CATEGORY_Cc: | |||
switch (c) // Include control characters marked as White_Space | |||
switch (c) /* Include control characters marked as White_Space */ | |||
{ | |||
case 0x09: // U+0009 : CHARACTER TABULATION | |||
case 0x0A: // U+000A : LINE FEED | |||
case 0x0B: // U+000B : LINE TABULATION | |||
case 0x0C: // U+000C : FORM FEED | |||
case 0x0D: // U+000D : CARRIAGE RETURN | |||
case 0x85: // U+0085 : NEXT LINE | |||
case 0x09: /* U+0009 : CHARACTER TABULATION */ | |||
case 0x0A: /* U+000A : LINE FEED */ | |||
case 0x0B: /* U+000B : LINE TABULATION */ | |||
case 0x0C: /* U+000C : FORM FEED */ | |||
case 0x0D: /* U+000D : CARRIAGE RETURN */ | |||
case 0x85: /* U+0085 : NEXT LINE */ | |||
return 1; | |||
} | |||
default: | |||
@@ -217,7 +217,7 @@ int ucd_isupper(codepoint_t c) | |||
int ucd_isxdigit(codepoint_t c) | |||
{ | |||
return (c >= 0x30 && c <= 0x39) // [0-9] | |||
|| (c >= 0x41 && c <= 0x46) // [A-Z] | |||
|| (c >= 0x61 && c <= 0x66); // [a-z] | |||
return (c >= 0x30 && c <= 0x39) /* [0-9] */ | |||
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */ | |||
|| (c >= 0x61 && c <= 0x66); /* [a-z] */ | |||
} |
@@ -176,6 +176,7 @@ typedef enum ucd_script_ | |||
UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | |||
UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | |||
UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | |||
UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||
UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | |||
UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | |||
UCD_SCRIPT_Grek, /**< @brief Greek Script */ | |||
@@ -273,6 +274,7 @@ typedef enum ucd_script_ | |||
UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | |||
UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | |||
UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | |||
UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||
UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | |||
UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | |||
UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | |||
@@ -302,6 +304,7 @@ typedef enum ucd_script_ | |||
UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | |||
UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | |||
UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | |||
UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||
UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | |||
UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | |||
UCD_SCRIPT_Zsym, /**< @brief Symbols */ | |||
@@ -366,6 +369,8 @@ typedef uint64_t ucd_property; | |||
#define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ | |||
#define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ | |||
#define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ | |||
#define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */ | |||
#define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */ | |||
// eSpeak NG extended properties: | |||
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | |||
@@ -679,6 +684,7 @@ namespace ucd | |||
Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | |||
Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | |||
Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | |||
Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||
Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | |||
Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | |||
Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ | |||
@@ -776,6 +782,7 @@ namespace ucd | |||
Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | |||
Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | |||
Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | |||
Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||
Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | |||
Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | |||
Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | |||
@@ -805,6 +812,7 @@ namespace ucd | |||
Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | |||
Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | |||
Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | |||
Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||
Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | |||
Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | |||
Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ | |||
@@ -876,6 +884,8 @@ namespace ucd | |||
Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ | |||
Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ | |||
Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ | |||
Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */ | |||
Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */ | |||
}; | |||
/** @brief Return the properties of the specified codepoint. |
@@ -78,9 +78,6 @@ static ucd_property properties_Cn(codepoint_t c) | |||
case 0x2000: | |||
if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; | |||
break; | |||
case 0x2300: | |||
if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
break; | |||
case 0x2400: | |||
if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
@@ -90,7 +87,7 @@ static ucd_property properties_Cn(codepoint_t c) | |||
if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
break; | |||
case 0x2E00: | |||
@@ -148,11 +145,11 @@ static ucd_property properties_Ll(codepoint_t c) | |||
if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | |||
break; | |||
case 0x0300: | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x0400: | |||
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | |||
@@ -166,12 +163,12 @@ static ucd_property properties_Ll(codepoint_t c) | |||
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | |||
break; | |||
case 0x2100: | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x2139) return UCD_PROPERTY_EMOJI; | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
break; | |||
case 0xFF00: | |||
@@ -179,45 +176,45 @@ static ucd_property properties_Ll(codepoint_t c) | |||
break; | |||
case 0x01D400: | |||
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D500: | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D600: | |||
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | |||
break; | |||
case 0x01D700: | |||
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
} | |||
return 0; | |||
@@ -332,7 +329,7 @@ static ucd_property properties_Lm(codepoint_t c) | |||
break; | |||
case 0x016F00: | |||
if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; | |||
if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER; | |||
if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER; | |||
break; | |||
} | |||
return 0; | |||
@@ -407,19 +404,21 @@ static ucd_property properties_Lo_ideographic(codepoint_t c) | |||
{ | |||
case 0x000000: | |||
if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; | |||
if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; | |||
break; | |||
case 0x010000: | |||
if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; | |||
if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; | |||
if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC; | |||
break; | |||
case 0x020000: | |||
if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||
if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; | |||
break; | |||
} | |||
@@ -434,8 +433,8 @@ static ucd_property properties_Lu(codepoint_t c) | |||
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | |||
break; | |||
case 0x0300: | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0xFF00: | |||
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | |||
@@ -443,49 +442,49 @@ static ucd_property properties_Lu(codepoint_t c) | |||
case 0x2100: | |||
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D400: | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D500: | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | |||
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D600: | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
case 0x01D700: | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||
break; | |||
} | |||
return 0; | |||
@@ -613,6 +612,7 @@ static ucd_property properties_Mc(codepoint_t c) | |||
if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
case 0x3000: | |||
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; | |||
@@ -709,6 +709,12 @@ static ucd_property properties_Mc(codepoint_t c) | |||
if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
break; | |||
case 0x011A00: | |||
if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
break; | |||
case 0x011C00: | |||
if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
@@ -818,6 +824,8 @@ static ucd_property properties_Mn(codepoint_t c) | |||
if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
case 0x0B00: | |||
if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
@@ -848,7 +856,8 @@ static ucd_property properties_Mn(codepoint_t c) | |||
if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
break; | |||
case 0x0D00: | |||
if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
@@ -964,7 +973,7 @@ static ucd_property properties_Mn(codepoint_t c) | |||
case 0x1D00: | |||
if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
case 0x2000: | |||
@@ -1111,6 +1120,16 @@ static ucd_property properties_Mn(codepoint_t c) | |||
if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
case 0x011A00: | |||
if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC; | |||
if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011A98) return UCD_PROPERTY_EXTENDER; | |||
if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
case 0x011C00: | |||
if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
@@ -1120,6 +1139,16 @@ static ucd_property properties_Mn(codepoint_t c) | |||
if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
break; | |||
case 0x011D00: | |||
if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC; | |||
if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC; | |||
if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||
break; | |||
case 0x016A00: | |||
if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
@@ -1165,7 +1194,7 @@ static ucd_property properties_Nd(codepoint_t c) | |||
switch (c & 0xFFFFFF00) | |||
{ | |||
case 0x0000: | |||
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI; | |||
if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT; | |||
break; | |||
case 0xFF00: | |||
if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; | |||
@@ -1279,10 +1308,10 @@ static ucd_property properties_Pe(codepoint_t c) | |||
break; | |||
case 0x2700: | |||
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
return UCD_PROPERTY_PATTERN_SYNTAX; | |||
case 0x2900: | |||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
case 0x2E00: | |||
return UCD_PROPERTY_PATTERN_SYNTAX; | |||
case 0x3000: | |||
@@ -1337,9 +1366,9 @@ static ucd_property properties_Po(codepoint_t c) | |||
case 0x0000: | |||
if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; | |||
if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||
if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||
if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; | |||
if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; | |||
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | |||
@@ -1609,6 +1638,11 @@ static ucd_property properties_Po(codepoint_t c) | |||
case 0x11700: | |||
if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
break; | |||
case 0x11A00: | |||
if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | |||
break; | |||
case 0x11C00: | |||
if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||
if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | |||
@@ -1664,7 +1698,7 @@ static ucd_property properties_Ps(codepoint_t c) | |||
break; | |||
case 0x2700: | |||
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||
return UCD_PROPERTY_PATTERN_SYNTAX; | |||
case 0x2900: | |||
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | |||
@@ -1746,7 +1780,7 @@ static ucd_property properties_Sk(codepoint_t c) | |||
if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; | |||
break; | |||
case 0x01F300: | |||
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER; | |||
return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT; | |||
} | |||
return 0; | |||
} | |||
@@ -1839,7 +1873,7 @@ static ucd_property properties_So(codepoint_t c) | |||
if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | |||
if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | |||
if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
break; | |||
case 0x2400: | |||
if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; | |||
@@ -1979,7 +2013,7 @@ static ucd_property properties_So(codepoint_t c) | |||
if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; | |||
if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT; | |||
break; | |||
case 0x01F200: | |||
if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
@@ -2074,23 +2108,29 @@ static ucd_property properties_So(codepoint_t c) | |||
if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; | |||
if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; | |||
if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
break; | |||
case 0x01F900: | |||
if (c <= 0x01F90B) return 0; | |||
if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c == 0x01F93B) return 0; | |||
if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c == 0x01F946) return 0; | |||
if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||
if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||
return UCD_PROPERTY_EMOJI; | |||
} | |||
return 0; | |||
@@ -2132,6 +2172,6 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category) | |||
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | |||
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; | |||
case UCD_CATEGORY_Zs: return properties_Zs(c); | |||
default: return 0; // Co Cs Ii Lt Me | |||
default: return 0; /* Co Cs Ii Lt Me */ | |||
}; | |||
} |
@@ -120,6 +120,7 @@ const char *ucd_get_script_string(ucd_script s) | |||
"Geok", | |||
"Geor", | |||
"Glag", | |||
"Gonm", | |||
"Goth", | |||
"Gran", | |||
"Grek", | |||
@@ -217,6 +218,7 @@ const char *ucd_get_script_string(ucd_script s) | |||
"Sind", | |||
"Sinh", | |||
"Sora", | |||
"Soyo", | |||
"Sund", | |||
"Sylo", | |||
"Syrc", | |||
@@ -246,6 +248,7 @@ const char *ucd_get_script_string(ucd_script s) | |||
"Xpeo", | |||
"Xsux", | |||
"Yiii", | |||
"Zanb", | |||
"Zinh", | |||
"Zmth", | |||
"Zsym", |
@@ -17,6 +17,7 @@ | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#include "config.h" | |||
#include "ucd/ucd.h" | |||
#include <locale.h> | |||
@@ -25,6 +26,13 @@ | |||
#include <wchar.h> | |||
#include <wctype.h> | |||
#ifndef HAVE_ISWBLANK | |||
static int iswblank(wint_t c) | |||
{ | |||
return iswspace(c) && !(c >= 0x0A && c <= 0x0D); | |||
} | |||
#endif | |||
void fput_utf8c(FILE *out, codepoint_t c) | |||
{ | |||
if (c < 0x80) | |||
@@ -86,7 +94,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'c': // character | |||
case 'c': /* character */ | |||
switch (c) | |||
{ | |||
case '\t': fputs("\\t", out); break; | |||
@@ -95,10 +103,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
default: fput_utf8c(out, c); break; | |||
} | |||
break; | |||
case 'h': // hexadecimal (lower) | |||
case 'h': /* hexadecimal (lower) */ | |||
fprintf(out, "%06x", c); | |||
break; | |||
case 'H': // hexadecimal (upper) | |||
case 'H': /* hexadecimal (upper) */ | |||
fprintf(out, "%06X", c); | |||
break; | |||
} | |||
@@ -108,40 +116,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'A': // alpha-numeric | |||
case 'A': /* alpha-numeric */ | |||
fputc(iswalnum(c) ? '1' : '0', out); | |||
break; | |||
case 'a': // alpha | |||
case 'a': /* alpha */ | |||
fputc(iswalpha(c) ? '1' : '0', out); | |||
break; | |||
case 'b': // blank | |||
case 'b': /* blank */ | |||
fputc(iswblank(c) ? '1' : '0', out); | |||
break; | |||
case 'c': // control | |||
case 'c': /* control */ | |||
fputc(iswcntrl(c) ? '1' : '0', out); | |||
break; | |||
case 'd': // numeric | |||
case 'd': /* numeric */ | |||
fputc(iswdigit(c) ? '1' : '0', out); | |||
break; | |||
case 'g': // glyph | |||
case 'g': /* glyph */ | |||
fputc(iswgraph(c) ? '1' : '0', out); | |||
break; | |||
case 'l': // lower case | |||
case 'l': /* lower case */ | |||
fputc(iswlower(c) ? '1' : '0', out); | |||
break; | |||
case 'P': // printable | |||
case 'P': /* printable */ | |||
fputc(iswprint(c) ? '1' : '0', out); | |||
break; | |||
case 'p': // punctuation | |||
case 'p': /* punctuation */ | |||
fputc(iswpunct(c) ? '1' : '0', out); | |||
break; | |||
case 's': // whitespace | |||
case 's': /* whitespace */ | |||
fputc(iswspace(c) ? '1' : '0', out); | |||
break; | |||
case 'u': // upper case | |||
case 'u': /* upper case */ | |||
fputc(iswupper(c) ? '1' : '0', out); | |||
break; | |||
case 'x': // xdigit | |||
case 'x': /* xdigit */ | |||
fputc(iswxdigit(c) ? '1' : '0', out); | |||
break; | |||
} | |||
@@ -154,31 +162,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format) | |||
case '%': | |||
switch (*++format) | |||
{ | |||
case 'c': // category | |||
case 'c': /* category */ | |||
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | |||
break; | |||
case 'C': // category group | |||
case 'C': /* category group */ | |||
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | |||
break; | |||
case 'p': // codepoint | |||
case 'p': /* codepoint */ | |||
uprintf_codepoint(out, c, *++format); | |||
break; | |||
case 'P': // properties | |||
case 'P': /* properties */ | |||
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | |||
break; | |||
case 'i': // is* | |||
case 'i': /* is* */ | |||
uprintf_is(out, c, *++format); | |||
break; | |||
case 'L': // lowercase | |||
case 'L': /* lowercase */ | |||
uprintf_codepoint(out, towlower(c), *++format); | |||
break; | |||
case 's': // script | |||
case 's': /* script */ | |||
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | |||
break; | |||
case 'T': // titlecase | |||
case 'T': /* titlecase */ | |||
uprintf_codepoint(out, ucd_totitle(c), *++format); | |||
break; | |||
case 'U': // uppercase | |||
case 'U': /* uppercase */ | |||
uprintf_codepoint(out, towupper(c), *++format); | |||
break; | |||
} | |||
@@ -224,7 +232,8 @@ int main(int argc, char **argv) | |||
{ | |||
FILE *in = NULL; | |||
const char *format = NULL; | |||
for (int argn = 1; argn != argc; ++argn) | |||
int argn; | |||
for (argn = 1; argn != argc; ++argn) | |||
{ | |||
const char *arg = argv[argn]; | |||
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | |||
@@ -250,7 +259,8 @@ int main(int argc, char **argv) | |||
} | |||
else | |||
{ | |||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
codepoint_t c; | |||
for (c = 0; c <= 0x10FFFF; ++c) | |||
uprintf(stdout, c, format ? format : | |||
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | |||
} |
@@ -83,7 +83,7 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'c': // character | |||
case 'c': /* character */ | |||
switch (c) | |||
{ | |||
case '\t': fputs("\\t", out); break; | |||
@@ -92,10 +92,10 @@ void uprintf_codepoint(FILE *out, codepoint_t c, char mode) | |||
default: fput_utf8c(out, c); break; | |||
} | |||
break; | |||
case 'h': // hexadecimal (lower) | |||
case 'h': /* hexadecimal (lower) */ | |||
fprintf(out, "%06x", c); | |||
break; | |||
case 'H': // hexadecimal (upper) | |||
case 'H': /* hexadecimal (upper) */ | |||
fprintf(out, "%06X", c); | |||
break; | |||
} | |||
@@ -105,40 +105,40 @@ void uprintf_is(FILE *out, codepoint_t c, char mode) | |||
{ | |||
switch (mode) | |||
{ | |||
case 'A': // alpha-numeric | |||
case 'A': /* alpha-numeric */ | |||
fputc(ucd_isalnum(c) ? '1' : '0', out); | |||
break; | |||
case 'a': // alpha | |||
case 'a': /* alpha */ | |||
fputc(ucd_isalpha(c) ? '1' : '0', out); | |||
break; | |||
case 'b': // blank | |||
case 'b': /* blank */ | |||
fputc(ucd_isblank(c) ? '1' : '0', out); | |||
break; | |||
case 'c': // control | |||
case 'c': /* control */ | |||
fputc(ucd_iscntrl(c) ? '1' : '0', out); | |||
break; | |||
case 'd': // numeric | |||
case 'd': /* numeric */ | |||
fputc(ucd_isdigit(c) ? '1' : '0', out); | |||
break; | |||
case 'g': // glyph | |||
case 'g': /* glyph */ | |||
fputc(ucd_isgraph(c) ? '1' : '0', out); | |||
break; | |||
case 'l': // lower case | |||
case 'l': /* lower case */ | |||
fputc(ucd_islower(c) ? '1' : '0', out); | |||
break; | |||
case 'P': // printable | |||
case 'P': /* printable */ | |||
fputc(ucd_isprint(c) ? '1' : '0', out); | |||
break; | |||
case 'p': // punctuation | |||
case 'p': /* punctuation */ | |||
fputc(ucd_ispunct(c) ? '1' : '0', out); | |||
break; | |||
case 's': // whitespace | |||
case 's': /* whitespace */ | |||
fputc(ucd_isspace(c) ? '1' : '0', out); | |||
break; | |||
case 'u': // upper case | |||
case 'u': /* upper case */ | |||
fputc(ucd_isupper(c) ? '1' : '0', out); | |||
break; | |||
case 'x': // xdigit | |||
case 'x': /* xdigit */ | |||
fputc(ucd_isxdigit(c) ? '1' : '0', out); | |||
break; | |||
} | |||
@@ -151,31 +151,31 @@ void uprintf(FILE *out, codepoint_t c, const char *format) | |||
case '%': | |||
switch (*++format) | |||
{ | |||
case 'c': // category | |||
case 'c': /* category */ | |||
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | |||
break; | |||
case 'C': // category group | |||
case 'C': /* category group */ | |||
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | |||
break; | |||
case 'p': // codepoint | |||
case 'p': /* codepoint */ | |||
uprintf_codepoint(out, c, *++format); | |||
break; | |||
case 'P': // properties | |||
case 'P': /* properties */ | |||
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | |||
break; | |||
case 'i': // is* | |||
case 'i': /* is* */ | |||
uprintf_is(out, c, *++format); | |||
break; | |||
case 'L': // lowercase | |||
case 'L': /* lowercase */ | |||
uprintf_codepoint(out, ucd_tolower(c), *++format); | |||
break; | |||
case 's': // script | |||
case 's': /* script */ | |||
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | |||
break; | |||
case 'T': // titlecase | |||
case 'T': /* titlecase */ | |||
uprintf_codepoint(out, ucd_totitle(c), *++format); | |||
break; | |||
case 'U': // uppercase | |||
case 'U': /* uppercase */ | |||
uprintf_codepoint(out, ucd_toupper(c), *++format); | |||
break; | |||
} | |||
@@ -221,7 +221,8 @@ int main(int argc, char **argv) | |||
{ | |||
FILE *in = NULL; | |||
const char *format = NULL; | |||
for (int argn = 1; argn != argc; ++argn) | |||
int argn; | |||
for (argn = 1; argn != argc; ++argn) | |||
{ | |||
const char *arg = argv[argn]; | |||
if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | |||
@@ -245,7 +246,8 @@ int main(int argc, char **argv) | |||
} | |||
else | |||
{ | |||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
codepoint_t c; | |||
for (c = 0; c <= 0x10FFFF; ++c) | |||
uprintf(stdout, c, format ? format : | |||
"%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | |||
} |
@@ -51,14 +51,15 @@ if __name__ == '__main__': | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
#include <stddef.h> | |||
// Unicode Character Data %s | |||
/* Unicode Character Data %s */ | |||
struct case_conversion_entry | |||
{ |
@@ -110,8 +110,9 @@ if __name__ == '__main__': | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
/* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
@@ -149,7 +150,7 @@ if __name__ == '__main__': | |||
#define Zs UCD_CATEGORY_Zs | |||
#define Ii UCD_CATEGORY_Ii | |||
// Unicode Character Data %s | |||
/* Unicode Character Data %s */ | |||
""" % ucd_version) | |||
for category in special_categories: | |||
@@ -187,7 +188,7 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoint, table in sorted(category_tables[table_index].items()): | |||
if isinstance(table, str): | |||
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) | |||
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint)) | |||
else: | |||
sys.stdout.write('\tcategories_%s,\n' % codepoint) | |||
sys.stdout.write('};\n') | |||
@@ -197,14 +198,14 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoints, category, comment in category_sets: | |||
if category: | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment)) | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment)) | |||
else: | |||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\t{\n') | |||
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | |||
sys.stdout.write('\t}\n') | |||
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n') | |||
sys.stdout.write('}\n') | |||
sys.stdout.write(""" |
@@ -166,6 +166,8 @@ def properties(data): | |||
props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data | |||
props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data | |||
props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data | |||
props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0 | |||
props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0 | |||
# eSpeak NG extended properties: | |||
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | |||
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) |
@@ -104,8 +104,9 @@ if __name__ == '__main__': | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the Scripts.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||
/* NOTE: This file is automatically generated from the Scripts.txt file in | |||
* the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||
*/ | |||
#include "ucd/ucd.h" | |||
@@ -152,6 +153,7 @@ if __name__ == '__main__': | |||
#define Geok UCD_SCRIPT_Geok | |||
#define Geor UCD_SCRIPT_Geor | |||
#define Glag UCD_SCRIPT_Glag | |||
#define Gonm UCD_SCRIPT_Gonm | |||
#define Goth UCD_SCRIPT_Goth | |||
#define Gran UCD_SCRIPT_Gran | |||
#define Grek UCD_SCRIPT_Grek | |||
@@ -249,6 +251,7 @@ if __name__ == '__main__': | |||
#define Sind UCD_SCRIPT_Sind | |||
#define Sinh UCD_SCRIPT_Sinh | |||
#define Sora UCD_SCRIPT_Sora | |||
#define Soyo UCD_SCRIPT_Soyo | |||
#define Sund UCD_SCRIPT_Sund | |||
#define Sylo UCD_SCRIPT_Sylo | |||
#define Syrc UCD_SCRIPT_Syrc | |||
@@ -278,6 +281,7 @@ if __name__ == '__main__': | |||
#define Xpeo UCD_SCRIPT_Xpeo | |||
#define Xsux UCD_SCRIPT_Xsux | |||
#define Yiii UCD_SCRIPT_Yiii | |||
#define Zanb UCD_SCRIPT_Zanb | |||
#define Zinh UCD_SCRIPT_Zinh | |||
#define Zmth UCD_SCRIPT_Zmth | |||
#define Zsym UCD_SCRIPT_Zsym | |||
@@ -285,7 +289,7 @@ if __name__ == '__main__': | |||
#define Zyyy UCD_SCRIPT_Zyyy | |||
#define Zzzz UCD_SCRIPT_Zzzz | |||
// Unicode Character Data %s | |||
/* Unicode Character Data %s */ | |||
""" % ucd_version) | |||
for script in special_scripts: | |||
@@ -323,7 +327,7 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoint, table in sorted(script_tables[table_index].items()): | |||
if isinstance(table, str): | |||
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint)) | |||
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint)) | |||
else: | |||
sys.stdout.write('\tscripts_%s,\n' % codepoint) | |||
sys.stdout.write('};\n') | |||
@@ -333,12 +337,12 @@ if __name__ == '__main__': | |||
sys.stdout.write('{\n') | |||
for codepoints, script, comment in script_sets: | |||
if script: | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment)) | |||
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment)) | |||
else: | |||
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||
sys.stdout.write('\t{\n') | |||
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | |||
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | |||
sys.stdout.write('\t}\n') | |||
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n') | |||
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n') | |||
sys.stdout.write('}\n') |