| # ucd-tools wide-character compatibility support: | # ucd-tools wide-character compatibility support: | ||||
| UCDTOOLS_SRC_PATH := ../../ucd-tools/src | |||||
| UCDTOOLS_SRC_PATH := ../../src/ucd-tools/src | |||||
| UCDTOOLS_SRC_FILES := \ | UCDTOOLS_SRC_FILES := \ | ||||
| $(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) | $(subst $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH),$(UCDTOOLS_SRC_PATH),$(wildcard $(LOCAL_PATH)/$(UCDTOOLS_SRC_PATH)/*.c*)) | ||||
| alaska al'aska | alaska al'aska | ||||
| albanië alb'A:ne@:@- | albanië alb'A:ne@:@- | ||||
| algerië alx2'e@re@:@- | algerië alx2'e@re@:@- | ||||
| alhambra al'ambra | |||||
| amanzimtoti $4 | amanzimtoti $4 | ||||
| antwerpen antv&rp@n | antwerpen antv&rp@n | ||||
| avignon _^_FR | avignon _^_FR | ||||
| oklahoma @Ukl@h'@Uma | oklahoma @Ukl@h'@Uma | ||||
| outeniekwa @Ut@n'ikwa | outeniekwa @Ut@n'ikwa | ||||
| oxford _^_EN | oxford _^_EN | ||||
| paardeneiland pA:rd@n_'eIlant | |||||
| palermo pal'&rmu | palermo pal'&rmu | ||||
| pelindaba p&l@nd'A:ba | pelindaba p&l@nd'A:ba | ||||
| perú p@ru | perú p@ru | ||||
| potchefstroom pOtSIfstr'o@m | potchefstroom pOtSIfstr'o@m | ||||
| rhône _^_FR | rhône _^_FR | ||||
| richardsbaai ritS@dsb'AI | richardsbaai ritS@dsb'AI | ||||
| riversdal r@v@rsdal | |||||
| riviersonderend r@fi:rsOn@r_'Ent | riviersonderend r@fi:rsOn@r_'Ent | ||||
| robertson _^_EN | robertson _^_EN | ||||
| salvador salvadO:r | salvador salvadO:r | ||||
| jane _^_EN | jane _^_EN | ||||
| janine dZ@ni:n | janine dZ@ni:n | ||||
| Jean ZA~n $capital | Jean ZA~n $capital | ||||
| jeff _^_EN | |||||
| jesebel je@s@b&l | jesebel je@s@b&l | ||||
| jessica _^_EN | jessica _^_EN | ||||
| jimmy _^_EN | jimmy _^_EN | ||||
| stuart _^_EN | stuart _^_EN | ||||
| suzanne suz'A:n | suzanne suz'A:n | ||||
| suzette suz'Et | suzette suz'Et | ||||
| sylvia _^_EN | |||||
| tania tanja | tania tanja | ||||
| telemann te@l@man | telemann te@l@man | ||||
| terblanche t@rblA:nS | terblanche t@rblA:nS | ||||
| thelma _^_EN | |||||
| theo tiu | theo tiu | ||||
| theron tr'On | theron tr'On | ||||
| thessalonicense tEsalo@nis'E:ns@ | thessalonicense tEsalo@nis'E:ns@ | ||||
| viviers v@v@je@ | viviers v@v@je@ | ||||
| wilhelm v@lh&l@-m | wilhelm v@lh&l@-m | ||||
| williston _^_EN | williston _^_EN | ||||
| woltemade vOlt@mA:d@ | |||||
| khumalo kum'A:lu | khumalo kum'A:lu | ||||
| zuma zu:ma | zuma zu:ma | ||||
| antares $2 | antares $2 | ||||
| beatles _^_EN | beatles _^_EN | ||||
| boeing _^_EN | boeing _^_EN | ||||
| cadillac _^_EN | |||||
| checkers tSEk@rs | checkers tSEk@rs | ||||
| chevrolet _^_FR | chevrolet _^_FR | ||||
| chrysler kraIsl@r | chrysler kraIsl@r | ||||
| forma _^_LA | forma _^_LA | ||||
| grata _^_LA | grata _^_LA | ||||
| habitatio _^_LA | habitatio _^_LA | ||||
| inclusio _^_LA | |||||
| inclusio _^_LA | |||||
| (in camera) @n||kam@ra | |||||
| (in debiti) _^_LA | (in debiti) _^_LA | ||||
| (in absentia) _^_LA | (in absentia) _^_LA | ||||
| (in extremis) _^_LA | (in extremis) _^_LA | ||||
| // main word list | // main word list | ||||
| aangaande $2 | aangaande $2 | ||||
| aanmerklik $2 | |||||
| aanstaande $2 | aanstaande $2 | ||||
| (a cappella) a||kap'&la | (a cappella) a||kap'&la | ||||
| adagio ad'A:dZi;%@U | adagio ad'A:dZi;%@U | ||||
| alge alx2@ | alge alx2@ | ||||
| algehele alx2@h,e@l@ | algehele alx2@h,e@l@ | ||||
| allegro al'Egru | allegro al'Egru | ||||
| allengs alENs | |||||
| allergene $3 | allergene $3 | ||||
| almiskie $3 | almiskie $3 | ||||
| alom al_'Om | alom al_'Om | ||||
| babelaas bab@lA:s | babelaas bab@lA:s | ||||
| barrikade $3 | barrikade $3 | ||||
| beaming b@_A:m@N | |||||
| bedewete be@d@ve@t@ | bedewete be@d@ve@t@ | ||||
| beide beId@ | beide beId@ | ||||
| bekaf b&kaf | bekaf b&kaf | ||||
| belangriker b@laNr@k,Ir | belangriker b@laNr@k,Ir | ||||
| bene be@n@ | bene be@n@ | ||||
| beringde b@rINd@ | |||||
| beswil bEsv@l | beswil bEsv@l | ||||
| besnedene b@sne@d@n@ | besnedene b@sne@d@n@ | ||||
| bestes bEst@s | bestes bEst@s | ||||
| bewebeen be@v@be@n | bewebeen be@v@be@n | ||||
| bewend be@v@nt | bewend be@v@nt | ||||
| bewering b@ve@rIN | bewering b@ve@rIN | ||||
| bilharzia b@lharsia | |||||
| biopsie bi'Opsi | biopsie bi'Opsi | ||||
| bomaat bo@mA:t | bomaat bo@mA:t | ||||
| bordegoed bO:rd@x2ut | bordegoed bO:rd@x2ut | ||||
| charisma kar'Isma | charisma kar'Isma | ||||
| cinsaut s@nso@ | cinsaut s@nso@ | ||||
| cliché kliS'eI: | cliché kliS'eI: | ||||
| clientèle _^_FR | |||||
| clivia klIvija | clivia klIvija | ||||
| cognac kOn^ak | cognac kOn^ak | ||||
| confetti $2 | confetti $2 | ||||
| déjà _^_FR | déjà _^_FR | ||||
| dekade dEk'A:d@ | dekade dEk'A:d@ | ||||
| dekreling dEkre@l@N | dekreling dEkre@l@N | ||||
| demensie d@me~nsi | |||||
| deurentyd dy@r@nteIt | deurentyd dy@r@nteIt | ||||
| deurgaans $1 | deurgaans $1 | ||||
| deurkruis $2 | deurkruis $2 | ||||
| exodus Eks'o@dWs | exodus Eks'o@dWs | ||||
| factotum $2 | factotum $2 | ||||
| faktotum $2 | |||||
| fakture $2 | fakture $2 | ||||
| fetakaas fEtakA:s | fetakaas fEtakA:s | ||||
| figuur f@x2yr | figuur f@x2yr | ||||
| filippense f@l@pEns@ | filippense f@l@pEns@ | ||||
| finalis $3 | finalis $3 | ||||
| finaliste $3 | finaliste $3 | ||||
| fluktuasie $3 | |||||
| fort fOrt | fort fOrt | ||||
| forte fOrt@ | forte fOrt@ | ||||
| helaas he@l'A:s | helaas he@l'A:s | ||||
| here he@r@ | here he@r@ | ||||
| herero hEr'E:ru | herero hEr'E:ru | ||||
| herontmoeting h&r_Ontmut@N | |||||
| herrysenis h&r'eIs@n@s | herrysenis h&r'eIs@n@s | ||||
| hierso hi:rsO | hierso hi:rsO | ||||
| hippie _^_EN | hippie _^_EN | ||||
| inkluis $2 | inkluis $2 | ||||
| innestel InnEst@l | innestel InnEst@l | ||||
| insomnia @nsOmnija | insomnia @nsOmnija | ||||
| inteling Inte@l@N | |||||
| intens @nt'Ens | intens @nt'Ens | ||||
| intensiteit $4 | intensiteit $4 | ||||
| ironieë irun'i:@ | ironieë irun'i:@ | ||||
| kennisvaardig $1 | kennisvaardig $1 | ||||
| kimono $2 | kimono $2 | ||||
| klaasvakie $2 | klaasvakie $2 | ||||
| kliënteel $3 | |||||
| klimeid klImeIt | klimeid klImeIt | ||||
| knapsekêrel knaps@k&:r@l | knapsekêrel knaps@k&:r@l | ||||
| kolossense kOl@s'e~ns@ | kolossense kOl@s'e~ns@ | ||||
| korswel kOrsv@l | korswel kOrsv@l | ||||
| kotiljons kOt@lj'o~ns | kotiljons kOt@lj'o~ns | ||||
| kritiek krIt'ik | kritiek krIt'ik | ||||
| kruisteling krYyste@l@N | |||||
| kulture $2 | kulture $2 | ||||
| kunsmatige kWnsm'A:t@x2@ | kunsmatige kWnsm'A:t@x2@ | ||||
| kwansuis $2 | kwansuis $2 | ||||
| kweekwal kwe@kval | kweekwal kwe@kval | ||||
| landswye lantsveI@ | |||||
| lasagne las'anj@ | lasagne las'anj@ | ||||
| legaat l@x2A:t | legaat l@x2A:t | ||||
| legate l@x2A:t@ | legate l@x2A:t@ | ||||
| sonure sOn_yr@ | sonure sOn_yr@ | ||||
| sover so@f&r | sover so@f&r | ||||
| staccato $2 | staccato $2 | ||||
| steekhoudendheid $2 | |||||
| sterwens st&rv@ns | sterwens st&rv@ns | ||||
| stilswye $1 | stilswye $1 | ||||
| strydros streIt_rOs | strydros streIt_rOs | ||||
| thula tu:la | thula tu:la | ||||
| toegee tux2e@ | toegee tux2e@ | ||||
| toereken ture@k@n | toereken ture@k@n | ||||
| toleransie tOl@r'ansi | |||||
| tornado $2 | tornado $2 | ||||
| totale tut'A:l@ | totale tut'A:l@ | ||||
| totsiens $2 | totsiens $2 | ||||
| uitdaging YydA:x2@N | uitdaging YydA:x2@N | ||||
| uiteraard Yyt@r_'A:rt | uiteraard Yyt@r_'A:rt | ||||
| uitermate $3 | uitermate $3 | ||||
| uitgeslotene Yytx2@slo@t@n@ | |||||
| uniforme $3 | uniforme $3 | ||||
| vaarwel fA:rv'&l | vaarwel fA:rv'&l |
| @@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. | @@@smokkel) ary %ar%eI // default stress: diamant-/drank-/dwelm-/goud-/kokaïensmokkelary, etc. | ||||
| amarula %am%arul%a // fix stress and a sounds: amarula and compounds | amarula %am%arul%a // fix stress and a sounds: amarula and compounds | ||||
| a (ment %a // shorten a sound: perkament/testament and compounds | a (ment %a // shorten a sound: perkament/testament and compounds | ||||
| _) arendag (CAC %A:r@ntax2 // fix e sound: arendagtig/-e/-heid | |||||
| arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes | arends A:r@nts_ // fix e sound: arendsoog/-kloue/-nes | ||||
| argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e | argen (tA %arx2@n // move default stress: Argentinië/Argentyns/-e | ||||
| a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | a (riA 'A: // akwarium/barium/estuarium/herbarium/seminaria | ||||
| attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché | attaché _%at%aSe@ // correct pronunciation: (handels-/inligtings-)attaché | ||||
| attrib (u %atr@b // move default stress: attribuut/bute/attributêr | attrib (u %atr@b // move default stress: attribuut/bute/attributêr | ||||
| K) au @U // trauma/-ties/Aucamp/Paul/-a/-us | K) au @U // trauma/-ties/Aucamp/Paul/-a/-us | ||||
| auer aU@r //Sauer etc. fixed au and e sounds in compounds. | |||||
| auer aU@r //Sauer etc. fix au and e sounds in compounds. | |||||
| augustus %Ox2WstWs | augustus %Ox2WstWs | ||||
| avokado %af%ukA:du // stress and o sounds | avokado %af%ukA:du // stress and o sounds | ||||
| avokade %af%ukA:d@ // variant form of avokado | avokade %af%ukA:d@ // variant form of avokado | ||||
| bo (grond bo@ // fix 1st o sound: bogronds/-e | bo (grond bo@ // fix 1st o sound: bogronds/-e | ||||
| _) bo (kle bo@ // fix o sound: bokleed/boklere | _) bo (kle bo@ // fix o sound: bokleed/boklere | ||||
| bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie | bokma (kier b%Okm%a // fix stress and a sound: bokmakierie/-s/-tjie | ||||
| bom (AA bOm_ // fix a sound, pause: bomaanval/-aanslag/-aard/-eenheid and compounds | |||||
| bom (AA bOm_ // fix a sound, pause: bomaanval/-aard/-eenheid and compounds | |||||
| bomaans (la bOm_A:ns // fix o sound: bomaanslag/-aanslae | |||||
| bomaans bo@mA:ns // but fix O sound: bomaans/-e | bomaans bo@mA:ns // but fix O sound: bomaans/-e | ||||
| bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend | bonde (C bOnd@ // verbondenheid/bondeldraer/gebondene/saambondelend | ||||
| boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid | boos (aardig b%o@s_ // move default stress: boosaardig/-e/-heid | ||||
| bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie | bakate (l b%ak@t& // fix stress and vowel sounds: bakatel/-le/-letjie | ||||
| baken bA:k@n // fix e sound: afbakening and compounds | baken bA:k@n // fix e sound: afbakening and compounds | ||||
| bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë | bakte (ri b%akte@ // fix stress and e sound: bakterie/-ë | ||||
| balalaika b%al%alaIk%a // fix a sounds and stress: balalaika/-s/-musiek | |||||
| _) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... | _) bam (boes b%am // move default stress: bamboes/-e/bamboesfluit... | ||||
| ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit | ba (nalA b%a // fix stress and 1st a sound: banale/banaliteit | ||||
| band (Alier b,and // fix stress and d sound: band(e/o)lier/-e | band (Alier b,and // fix stress and d sound: band(e/o)lier/-e | ||||
| ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. | ba (sieli b%a // fix stress and 1st a sound: basielie/-kruit, etc. | ||||
| basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar | basotho b%asut%u // fix stress and vowel sounds: Basotho/-0ponie and similar | ||||
| basi (s bA:s@ // basis and compounds | basi (s bA:s@ // basis and compounds | ||||
| basilie b%asil%i // fix a sound and stress: basilie/-kruid, etc. | |||||
| bastille b%asti:l // Bastille and compounds like Bastilledag | bastille b%asti:l // Bastille and compounds like Bastilledag | ||||
| batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds | batal (jon b%at%al // shorten 1st a sound, moved stress: bataljon and compounds | ||||
| ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk | ba (tik b%a // fix stress and a sound: batik/-doek/-kuns/-werk | ||||
| bere (_ be@r@ //tediebere pandabere etc. | bere (_ be@r@ //tediebere pandabere etc. | ||||
| _) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom | _) ber (C b&r // fix e sound: Bert/Berta/Bertie/Bertus/berke/-boom | ||||
| beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s | beste (_ bEst@ // fix e sound: beste/allerbeste/naasbeste/-s | ||||
| beton (C@ b@tOn // split ng sounds: betongebou/-gietsel/-gruis | |||||
| beton (inC@ b@t'On // fix O sound: betoninrigting/-ingenieur | |||||
| be (weging b@ // draaibeweging/swaaibeweging | be (weging b@ // draaibeweging/swaaibeweging | ||||
| bewende be@v@nd@ // first e pronounced long | bewende be@v@nd@ // first e pronounced long | ||||
| bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid | bewe (rig be@v@ // fix e sound and stress: bewerig/-e/-heid | ||||
| dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant | dia (C d%i%a // diabeet/diafragma/diagnose/dialek/dialoog/diamant | ||||
| diaken d%iA:k@n // diaken and compounds | diaken d%iA:k@n // diaken and compounds | ||||
| diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë | diako (nie d%i%ak%u // fix stress and o sound: diakonie/-ë | ||||
| diende dind@ // fix last e sound in many compounds of bediende: bediendekamer | |||||
| diens (willi d%ins // move default stress: dienswillig/-e/-heid | diens (willi d%ins // move default stress: dienswillig/-e/-heid | ||||
| dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e | dieper (@ dip@r // fix e sound: diepere/dieperliggend/-e | ||||
| digi (ta d%ix2%i // move default stress: digitaal/digitale | digi (ta d%ix2%i // move default stress: digitaal/digitale | ||||
| @C) e (reekK @ // fix e sound: duine-/rotsereeks | @C) e (reekK @ // fix e sound: duine-/rotsereeks | ||||
| aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- | aai) e (C @ // fix e sound: baaierd/waaierstert/paaiement/compounds starting with baaiers- | ||||
| lat) eres @r@s //lateres | lat) eres @r@s //lateres | ||||
| @C) erm (K &r@-m // a catch-all for words ending in -erm(s) | |||||
| fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. | fp) ers (_ &rs // fix e sound: dof-/gif-/olyf-/tydskrifpers, etc. | ||||
| bloup) ers &:rs // fix e sound: bloupers | bloup) ers &:rs // fix e sound: bloupers | ||||
| iew) ers (ter @rs // fix e sound: (l)iewerster | iew) ers (ter @rs // fix e sound: (l)iewerster | ||||
| p) e (talje @ // fix stress and 1st e sound: petalje and compounds | p) e (talje @ // fix stress and 1st e sound: petalje and compounds | ||||
| l) e (moen @ //fix lemoen and compounds | l) e (moen @ //fix lemoen and compounds | ||||
| Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. | Cy) e (C+ @ // fix connecting e sound in compounds like byekorf/skilderyemuseum, etc. | ||||
| effe (kleur Ef@ // fix stress and 2nd e sound: effekleur(ig/-e) | |||||
| eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep | eier eI@r // eiergeel/eierwit/leierfiguur/leiergroep | ||||
| @C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds | @C) ei (land _,eI // insert short pause: skiereiland and many -eiland compounds | ||||
| @) ei (sen _'eI // (on)veeleisend/spoedeisend | @) ei (sen _'eI // (on)veeleisend/spoedeisend | ||||
| flu (we fl%y // move default stress: fluweel/fluwele and compounds | flu (we fl%y // move default stress: fluweel/fluwele and compounds | ||||
| _) fok (o fOk_ // fix o sound, insert short break: fokop/fokof | _) fok (o fOk_ // fix o sound, insert short break: fokop/fokof | ||||
| folio fo@li_u // fix o sounds: folio and compounds | folio fo@li_u // fix o sounds: folio and compounds | ||||
| fondsw fOntsv // fix v sound: fondswerwing & compounds/-waardes | |||||
| fondue f%Ondy // fix stress and ue sound: fondue and compounds | fondue f%Ondy // fix stress and ue sound: fondue and compounds | ||||
| fone (tiek f%o@n@ // fix e sound: fonetiek and compounds | fone (tiek f%o@n@ // fix e sound: fonetiek and compounds | ||||
| fone (tie f%une@ // fix stress and o sound: foneties/-e | fone (tie f%une@ // fix stress and o sound: foneties/-e | ||||
| formi (da f%Orm%i // move default stress: formidabel/-e | formi (da f%Orm%i // move default stress: formidabel/-e | ||||
| formu (lier f%Orm%y // move default stress: formulier/-e/-boek | formu (lier f%Orm%y // move default stress: formulier/-e/-boek | ||||
| _) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C | _) for (se_ f'Or // stress back to 1st slb; se rule: _) for (C | ||||
| forse (nd fOrs@ // fix e sound: forsend/-e | |||||
| fos (fa f%Os // move default stress: fosfaat/fosfate | fos (fa f%Os // move default stress: fosfaat/fosfate | ||||
| fo (ssiel f%O // move default stress: fossiel/-e and compounds | fo (ssiel f%O // move default stress: fossiel/-e and compounds | ||||
| foto fo@tu | foto fo@tu | ||||
| _) gra (na x2r@ // granaat(boom)/granate | _) gra (na x2r@ // granaat(boom)/granate | ||||
| grandi (o x2r%and%i // move default stress: grandioos/-ose | grandi (o x2r%and%i // move default stress: grandioos/-ose | ||||
| _) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure | _) gra (vA x2r%a // fix stress and a sound: gravin/-ne/gravure | ||||
| grenadella gr@n@d&la // fix stress and vowels: grenadella and compounds like -heining | |||||
| griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. | griekwa x2rikwa // fix w sound and a sound in compounds: Griekwa/-land, etc. | ||||
| ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s | ouCa) gr (ootjie x2r' // move default stress: ouma/oupagrootjie/-s | ||||
| _) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs | _) graad (e x2r%A:t_ // fix stress and d sound: graadeen(tjies)/-elfs | ||||
| gegesel x2@x2e@s@l // fix middle e sound | gegesel x2@x2e@s@l // fix middle e sound | ||||
| K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele | K) gele (_ x2e@l@ // fix stress and e sound: (eier)gele | ||||
| _) geler x2e@l@r // stress and 1st e sound: geler/gelerig | _) geler x2e@l@r // stress and 1st e sound: geler/gelerig | ||||
| _) gell x2&l | |||||
| gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e | gemel (d x2@m&l // fix e sound: bo-/bowe-/laas-/meergemeld/-e | ||||
| gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. | gems x2Ems // fix e sound: (baster)gemsbok/-bul/-ooi, etc. | ||||
| gene (_ x2e@n@ // gene/diegene | gene (_ x2e@n@ // gene/diegene | ||||
| _) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. | _) ge (ossP2 x2@ // but ge- prefix: geossilleer/geossifiseer, etc. | ||||
| ni) ge (ri x2'e@ //nigeriese etc. | ni) ge (ri x2'e@ //nigeriese etc. | ||||
| gese (_ x2'e@s@ //Portugese, and others | gese (_ x2'e@s@ //Portugese, and others | ||||
| gesp (etjie x2Esp // fix e sounds: gespetjie/-s | |||||
| gespe (_ x2Esp@ // gespe and compounds | gespe (_ x2Esp@ // gespe and compounds | ||||
| gespes (_ x2Esp@s // gespes and compounds | gespes (_ x2Esp@s // gespes and compounds | ||||
| gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid | gewens (g x2e@v@ns // (on)vergewensgesind/-e/-heid | ||||
| hart (stogte_ h%art // move default stress: hartstogtelik | hart (stogte_ h%art // move default stress: hartstogtelik | ||||
| hart (stogte_N hart // restore default stress: hartstogte | hart (stogte_N hart // restore default stress: hartstogte | ||||
| ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling | ha (we hA: // fix stress and a sound: hawearbeider/lewendehaweafdeling | ||||
| _) hef (a hEf_ // fix e sound, insert break: hefapparaat/-arm(s) | |||||
| hia (sint h%ij%a // fix stress: hiasint/-e and compounds | hia (sint h%ij%a // fix stress: hiasint/-e and compounds | ||||
| _) hi (bis h%i // move default stress: hibiskus/-se and compounds | _) hi (bis h%i // move default stress: hibiskus/-se and compounds | ||||
| hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e | hierna (maal h%i:rnA: // fix stress and a sound: hiernamaals/-e | ||||
| _) idi (o %id%i // idioom/idiome/idioot | _) idi (o %id%i // idioom/idiome/idioot | ||||
| _) id (A %id // idille/idillies/ideëryk | _) id (A %id // idille/idillies/ideëryk | ||||
| &l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. | &l) iker (C @k@r // fix i sound: menslikerwys/redelikerwyse, etc. | ||||
| &l) iker (_ @k@r // fix i sound: afstootliker/(ge)redeliker/onberispeliker | |||||
| illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati | illumi (nA %il%um%i // fix stress and vowel sounds: illuminasie/illumineer/illuminati | ||||
| illu (si %ily // illusie/-s/illusief | illu (si %ily // illusie/-s/illusief | ||||
| illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie | illu (strA %il%W // fix i sound: illustreer/illustrering/illustrasie | ||||
| _) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | _) in (a@P2 In // inakkuraat/inaktief/inaktiwiteit | ||||
| _) inbe (lC Inb%& // fix e sound in compounds like inbelprogram | _) inbe (lC Inb%& // fix e sound in compounds like inbelprogram | ||||
| indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese | indone (si @nd%uni: // fix stress and vowel sounds: Indonesië/Indonesiese | ||||
| s) in (gestel @n // split n g: compounds with ingesteldheid/winsingestelde | |||||
| _) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives | _) in (oe In_ // fix i sound, pause: inoefen/-oes and derivatives | ||||
| invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der | invest (eer @nv%Est // fix stress, v and e sounds: investeer/-der | ||||
| investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende | investe (r@ @nv%Este@ // fix e sound: (kapitaal)investering/investerende | ||||
| krieketw krik@tv //krieket followed by w in compounds always v | krieketw krik@tv //krieket followed by w in compounds always v | ||||
| krokodi (l kr%Ok@dI // (wyfie)krokodil/-le | krokodi (l kr%Ok@dI // (wyfie)krokodil/-le | ||||
| kro (niek kr%u // fix stress and o sound: kroniek and many compounds | |||||
| kruger kr'Y@@r | kruger kr'Y@@r | ||||
| ku (ba@ k%y // move default stress: kubaan/kubane | ku (ba@ k%y // move default stress: kubaan/kubane | ||||
| ku (biek k%y // move default stress: kubiek/-e/-getal | ku (biek k%y // move default stress: kubiek/-e/-getal | ||||
| _) kuber kyb@r // fixed e sound: kuberruim(te) | |||||
| _) kuber kyb@r // fix e sound: kuberruim(te) | |||||
| kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e | kulin k%Wl%in // Move default stress and fix u sound: kulinër/-e | ||||
| @) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar | @) kundi (g k'Wnd@ // wiskundige/onoordeelkundigheid and many similar | ||||
| kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda | kurwe kWrv@ // fix e sound: kurwes/skurwebas/skurwebek/skurwepadda | ||||
| kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds | kafe (te k%af@ // fix stress and vowel sounds: kafeteria and compounds | ||||
| kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds | kafe (ï k%af%i // fix stress and vowel sounds: kafeïene and compounds | ||||
| ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder | ka (jak k%a // fix stress and 1st a sound: kajak/-ke/-vaarder | ||||
| kakao k%akA:w // fix stress and vowel sounds: kakao and many compounds | |||||
| kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie | kake (C kA:k@ // kakebeen/skakelaar/skakelbord/skakelfunksie | ||||
| kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn | kalahari kalah'A:ri // stress: Kalahari/-sand/-woestyn | ||||
| _) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds | _) ka (lAnC k%a // kalender and compounds/kalant/kalander and compounds | ||||
| kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives | kontrasep k%Ontr%asEp // fix stress and e sound: kontrasepsie and derivatives | ||||
| kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel | kontrover (s k%Ontr%uv&r // o and v sounds: kontroversie/kontroversieel | ||||
| kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant | kop (o kOp? // fix o sound: koponderstebo/kopomdraai/kopoperasie/gryskoponderwyser/poenskopolifant | ||||
| kop (agtig k%Op_ // fix o sound, insert break: hamer-/spinnekop-/penkopagtig(e(s)) | |||||
| kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives | kopu (lA k%Op%y // fix o sound: kopulasie/kopuleer and derivatives | ||||
| _) kor (dA k%Or // kordaat/kordon | _) kor (dA k%Or // kordaat/kordon | ||||
| ko (rint k%u // fix stress and o sound: korint/-e and compounds | ko (rint k%u // fix stress and o sound: korint/-e and compounds | ||||
| loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie | loboto (mie l%ub%Ot%u // fix stress and o sounds: lobotomie | ||||
| lo (ja l%u // fix stress and o sound: lojale/lojaliteit | lo (ja l%u // fix stress and o sound: lojale/lojaliteit | ||||
| lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. | lo (kalA l%u // fix stress and o sound: lokale/ontvangslokale, etc. | ||||
| loke (t l%ukE // fix stress and vowel sounds: many compounds with loket | |||||
| lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde | lom (bardA l%Om // move default stress: Lombardies/-e/Lombarde | ||||
| _) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier | _) lore (C@ lo@r@ // fix e sound: verloregaan/verloregoederekantoor/Verlorerivier | ||||
| _) losge (@P5 l'Osx2@ | _) losge (@P5 l'Osx2@ | ||||
| medisyne m@d@seIn@ //medisyne and compounds | medisyne m@d@seIn@ //medisyne and compounds | ||||
| meganies m@x2'A:nis | meganies m@x2'A:nis | ||||
| _) meege (@P5 m'e@x2@ | _) meege (@P5 m'e@x2@ | ||||
| _) meegewe (nd me@x2e@v@ // fix e sounds and stress: meegewend(e) | |||||
| me (juf m@ // move default stress and shorten e sound | me (juf m@ // move default stress and shorten e sound | ||||
| me (laats m@ // fix stress and e sound: melaats/-e/-heid | me (laats m@ // fix stress and e sound: melaats/-e/-heid | ||||
| melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | melancholie (_N m%El%aNk%o@li // fix stress and 1st e sound: melancholie | ||||
| morf (otomie m%Orf // move default stress: morfotomie | morf (otomie m%Orf // move default stress: morfotomie | ||||
| _) morr (i mOr // restore default stress: morrig/morrie/-doring | _) morr (i mOr // restore default stress: morrig/morrie/-doring | ||||
| _) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig | _) mors (@ mOrs // restore default stress: morsaf/morsdood/morsig | ||||
| mos (agtig mOs_ // fix o sound, insert break: (kos)mosagtig(e) | |||||
| mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se | mosam (biek m%o@s%am // move default stress: Mosambiek/-er/-se | ||||
| mosa (ïek m%o@s%a // move default stress: mosaïek and compounds | mosa (ïek m%o@s%a // move default stress: mosaïek and compounds | ||||
| mo (skee m%O // move default stress: moskee/-s and compounds | mo (skee m%O // move default stress: moskee/-s and compounds | ||||
| ne (anderCa n%i // move default stress: Neander(d/t)al/-ler | ne (anderCa n%i // move default stress: Neander(d/t)al/-ler | ||||
| neger (in n%e@x2@r // move default stress: negerin/-ne | neger (in n%e@x2@r // move default stress: negerin/-ne | ||||
| ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. | ne (gosie n@ // fix stress and e sound: negosie/-ware, etc. | ||||
| nek (_ n&k // catch-all for words ending in -nek: koedoe-/swaannek | |||||
| nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai | nek (om n&k_ // fix e sound, insert pause: nekom(ge)draai | ||||
| nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. | nekta (rien n%Ekt%a // fix stress and a sound: nektarien/-perske, etc. | ||||
| neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum | neo (li n%i%u // fix stress and vowel sounds: neolities/-e/neolitikum | ||||
| nood (lotti n%o@t // move default stress: noodlottig/-e | nood (lotti n%o@t // move default stress: noodlottig/-e | ||||
| nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak | nood (saak n%o@t // move default stress: noodsaaklik/-e/-heid, vs. noodsaak | ||||
| nood (saak_N no@t // restore default stress: (ge)noodsaak | nood (saak_N no@t // restore default stress: (ge)noodsaak | ||||
| nooien (tjie noIN // remove the e sound: nooientjie(s) and compounds | |||||
| noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e | noord (oos n%o@rt_ // move default stress: noordoos/-te/-telik/-e | ||||
| nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale | nor (ma@ n%Or // normaal/normaalweg/normale/abnormaal/-ale | ||||
| nostal (gie_N n%Ost%al // stress on last slb.: nostalgie | nostal (gie_N n%Ost%al // stress on last slb.: nostalgie | ||||
| ooi oI | ooi oI | ||||
| ooy oI | ooy oI | ||||
| oodjie oIci | oodjie oIci | ||||
| CC) ool (A o@l_ // insert break: skooluur/-ure, steenkooluitvoer, but not: Karoolug | |||||
| ootjie oIci | ootjie oIci | ||||
| oontjie oINki | oontjie oINki | ||||
| oondjie oINki | oondjie oINki | ||||
| oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | oot (moedig %o@t // move default stress: ootmoedig/-e/-heid | ||||
| .group op | .group op | ||||
| _) opaal %o@pA:l // fix o sound, remove break: opaal and compounds | |||||
| opaat up'A:t // homeopaat/psigopaat and similar | opaat up'A:t // homeopaat/psigopaat and similar | ||||
| opatie upat'i // homeopatie/neuropatie and similar | opatie upat'i // homeopatie/neuropatie and similar | ||||
| opaties up'A:tis // psigopaties/osteopaties and similar | opaties up'A:tis // psigopaties/osteopaties and similar | ||||
| pol (vy p%Ol // move default stress: polvy/-e and compounds | pol (vy p%Ol // move default stress: polvy/-e and compounds | ||||
| pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) | pomelo p%ume@l%u // fix stress and o sounds: pomelo(sap/-drankie...) | ||||
| _) pon (dok p%On // move default stress: pondok/-ke/-kie | _) pon (dok p%On // move default stress: pondok/-ke/-kie | ||||
| pop (agtig p%Op_ // fix o sound, insert break: popagtig(e) and compounds | |||||
| popu ,pOpy | popu ,pOpy | ||||
| _) por (C %pOr //portret portaal etc. | _) por (C %pOr //portret portaal etc. | ||||
| por (ie p%o@r // move default stress: porie/-ë | por (ie p%o@r // move default stress: porie/-ë | ||||
| sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e | sker (muts sk%&r // move default stress: (ge)skermutsel/skermutseling/-e | ||||
| skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds | skerpi (oen sk%&rp%i // move default stress: skerpioen/-e and compounds | ||||
| skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum | skilder (y sk@ld@r // move stress to y: skildery and compounds like skilderymuseum | ||||
| skim (agtig sk@m_ // fix i sound, insert break: skimagtig(e) | |||||
| skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene | skisofr (e sk%is%ufr // fix stress and o sound: skisofreen/skisofrene | ||||
| skle (rose skl@ // fix stress and e sound: sklerose and compounds | skle (rose skl@ // fix stress and e sound: sklerose and compounds | ||||
| _) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... | _) skok (AP4 sk''Ok_ // fix o sound and stress: skokaankondiging/-effek/-insluiting/-onthulling... | ||||
| _) su (meri s%u // fix stress and u sound: sumeries/-e | _) su (meri s%u // fix stress and u sound: sumeries/-e | ||||
| su (mmier s%W // move default stress: sumier/-e | su (mmier s%W // move default stress: sumier/-e | ||||
| super (A s''yp@r_ // fix stress, break in compounds like superintelligent | super (A s''yp@r_ // fix stress, break in compounds like superintelligent | ||||
| superi (A s%up%e@r%i // fix stress and vowel sounds: superieur/superioriteit | |||||
| superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds | superintendent s,upr@nt%EndEnt // fix stress and vowel sounds: superintendent and compounds | ||||
| surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds | surro (ga s%Wr%u // fix stress and o sound: surrogaat/surrogate and compounds | ||||
| su (saC s%u // fix stress and u sound: susan/-na/susara | su (saC s%u // fix stress and u sound: susan/-na/susara | ||||
| toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar | toe (riste@ t%u // move default stress: toeristebedryf/-sentrum and similar | ||||
| toer (n t%ur // move default stress: compounds of toernooi | toer (n t%ur // move default stress: compounds of toernooi | ||||
| toere (_ tu:r@ // restore default stress | toere (_ tu:r@ // restore default stress | ||||
| toi (let t%OI // move default stress: toilet and compounds | |||||
| toilet t%OIlEt // move default stress: toilet and compounds: toiletartikel/-emmer/-opsigter | |||||
| tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds | tokke (lo t%Ok@ // move default stress: tokkelos/-sie/tokkelok and compounds | ||||
| tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. | tok (tokk t%Ok // move default stress: toktokkie/-s/-spelery, etc. | ||||
| tombola t%Ombo@l%a // move default stress: tombola and compounds | tombola t%Ombo@l%a // move default stress: tombola and compounds | ||||
| @) toris (_ t'o@r@s // fix stress: pectoris/klitoris | @) toris (_ t'o@r@s // fix stress: pectoris/klitoris | ||||
| _) tor (nyn t%Or // move default stress: tornyn/-e and compounds | _) tor (nyn t%Or // move default stress: tornyn/-e and compounds | ||||
| _) tos (ka t%Os // move default stress: Toskaanse/Toskane | _) tos (ka t%Os // move default stress: Toskaanse/Toskane | ||||
| tser (tjie ts@r // fix e sound: (skoen)poetsertjie/weerkaatsertjie | |||||
| ttel t@l // many compounds of bottel/skottelgoed | ttel t@l // many compounds of bottel/skottelgoed | ||||
| tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling | tuberkulose t%yb@rk%ylo@s@ // fix stress; e sound in compounds: tuberkulose/-behandeling | ||||
| tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie | tug (A tWx2_ // fix u sound: (on)tugondersoek/-oortreding/-ordonnansie | ||||
| _) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid | _) vanklik faNkl@k // (on)ontvanklik/-e/-er/-heid | ||||
| vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid | vanself (spr f%ans%&lf // move default stress: vanselfsprekend/-e/-heid | ||||
| vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer | vari (A v%ar%i // fix v sound and stress: variasie/-s/varieer | ||||
| va (sal v%a // fix stress and v and a sounds: vasal/-le | |||||
| vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. | vaseline v%as@lin // fix stress, v and vowel sounds: vaseline/-bottel, etc. | ||||
| vasste (l fast& // fix e sound: vasstel(ling/-lende) | vasste (l fast& // fix e sound: vasstel(ling/-lende) | ||||
| _) vat (A@ fat_ // fix a sound: vatafstand/-orgaan | _) vat (A@ fat_ // fix a sound: vatafstand/-orgaan | ||||
| ve (l f& // maagvel, stress on 1st slb | ve (l f& // maagvel, stress on 1st slb | ||||
| @) vel (A fe@l // aanbeveling/aanbevelingsbrief | @) vel (A fe@l // aanbeveling/aanbevelingsbrief | ||||
| vel (djie f&l // fix -djie sound: veldjie(s) and compounds | |||||
| veld (C f< // fix d sound: veldreuk/-radio/-rantsoen, etc. | veld (C f< // fix d sound: veldreuk/-radio/-rantsoen, etc. | ||||
| veld (eks f<_ // fix d sound, pause: veldekskursie/-ekspedisie/-s | veld (eks f<_ // fix d sound, pause: veldekskursie/-ekspedisie/-s | ||||
| veld (o f<_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. | veld (o f<_ // fix d sound, pause: veldorgideë/-opsigter/-oppervlakte/-opname, etc. | ||||
| veld (t f&l // eliminate double t sound: veldtog and many compounds, Langeveldt, Springveldt | |||||
| vele f'e@l@ | vele f'e@l@ | ||||
| ven (detta v%En // fix stress and v sound: vendatta and compounds | ven (detta v%En // fix stress and v sound: vendatta and compounds | ||||
| ven (dusie f@n // fix stress and e sound: vendusie and compounds | ven (dusie f@n // fix stress and e sound: vendusie and compounds | ||||
| vol (kome f%Ol | vol (kome f%Ol | ||||
| vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O | vo (llA_ fO // volle/vollê/Volla - exception to: vo (lC f%O | ||||
| volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... | volle (dig f%Ole@ // fix stress and e sound: (on)volledig/-e/-heid... | ||||
| voll (engte fOlE | |||||
| volle (ngte fOlE | |||||
| vo (ller fO // exception to: vo (lC f%O | vo (ller fO // exception to: vo (lC f%O | ||||
| volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. | volles (_ fOl@s // fix stress and e sound: volles/passievolles, etc. | ||||
| vol (hou_ fOl // exception to: vo (lC f%O | vol (hou_ fOl // exception to: vo (lC f%O |
| idly aIdlI | idly aIdlI | ||||
| idiocy IdI@si | idiocy IdI@si | ||||
| ifrog $alt6 | ifrog $alt6 | ||||
| ignoramus Igno@r'eIm@s | |||||
| ignoramus IgnO@r'eIm@s | |||||
| illiterate $alt2 | illiterate $alt2 | ||||
| illumine $alt2 | illumine $alt2 | ||||
| imagery ImIdZri | imagery ImIdZri | ||||
| nonetheless nVnD@l'Es | nonetheless nVnD@l'Es | ||||
| nosedive noUzdaIv | nosedive noUzdaIv | ||||
| nosir noUs3: | nosir noUs3: | ||||
| ?5 nosir noUsIR | |||||
| not noUt // for noted, notable, etc | not noUt // for noted, notable, etc | ||||
| nots n0ts | nots n0ts | ||||
| (nôtre dame) noUtr@'dA:m | (nôtre dame) noUtr@'dA:m | ||||
| sinus saIn@s | sinus saIn@s | ||||
| siphon $alt2 | siphon $alt2 | ||||
| sir s,3: $only | sir s,3: $only | ||||
| ?5 sir s,VR $only | |||||
| ?5 sir s,IR $only | |||||
| siren saIr@n | siren saIr@n | ||||
| site saIt // for sited | site saIt // for sited | ||||
| ski ski: | ski ski: | ||||
| son sVn | son sVn | ||||
| sonar soUnA@ | sonar soUnA@ | ||||
| sonny sVnI | sonny sVnI | ||||
| sooth su:T $only | |||||
| sopapilla soUp@p'i:@ | |||||
| sope soUpeI | sope soUpeI | ||||
| sorbet sO@beI | |||||
| souffle su:fl'eI | souffle su:fl'eI | ||||
| soundbite saUndbaIt | soundbite saUndbaIt | ||||
| souvenir su:v@n'i@3 | souvenir su:v@n'i@3 | ||||
| sopapilla soUp@p'i:@ | |||||
| sorbet sO@beI | |||||
| soyabean sOI@bi:n | soyabean sOI@bi:n | ||||
| specific sp@sIfIk | specific sp@sIfIk | ||||
| specimen spEsI2m@n | specimen spEsI2m@n | ||||
| tamale ta#mA:li | tamale ta#mA:li | ||||
| tampon tamp0n | tampon tamp0n | ||||
| tangerine tandZ@r'i:n | tangerine tandZ@r'i:n | ||||
| taoiseach ti:S@x | |||||
| tapestry tapI#stri | tapestry tapI#stri | ||||
| tarantula t@rantS@l@ | tarantula t@rantS@l@ | ||||
| tardis $alt1 | tardis $alt1 | ||||
| ye ji: $u+ | ye ji: $u+ | ||||
| yea jeI | yea jeI | ||||
| yessir jEss3: | yessir jEss3: | ||||
| ?5 yessir jEssIR | |||||
| yoghurt j0g3t | yoghurt j0g3t | ||||
| ?3 yoghurt joUg3t | ?3 yoghurt joUg3t | ||||
| ?3 yogurt joUg3t | ?3 yogurt joUg3t | ||||
| ?!3 Anthony ant@ni | ?!3 Anthony ant@ni | ||||
| Anton ant0n | Anton ant0n | ||||
| Anya anj@ | Anya anj@ | ||||
| Aoife i:f@ | |||||
| Aoiffe i:f@ | |||||
| Aphrodite afr@d'aIti | Aphrodite afr@d'aIti | ||||
| Archibald A@tSIbO:ld | Archibald A@tSIbO:ld | ||||
| Archie A@tSi | Archie A@tSi | ||||
| Salman sa#lmA:n | Salman sa#lmA:n | ||||
| Samantha sa#manT@ | Samantha sa#manT@ | ||||
| (Santa claus) s'ant@||kl'O:z | (Santa claus) s'ant@||kl'O:z | ||||
| Saoirse si@S@ | |||||
| Sarah se@r@ | Sarah se@r@ | ||||
| Sarisa $alt3 | Sarisa $alt3 | ||||
| Seamus SeIm@s | Seamus SeIm@s | ||||
| Simon saIm@n | Simon saIm@n | ||||
| Sinead SI2neId | Sinead SI2neId | ||||
| Sinéad SI2neId | Sinéad SI2neId | ||||
| Siobhan S@vO:n | |||||
| Siobhán S@vO:n | |||||
| Siobhan SI2vO:n | |||||
| Siobhán SI2vO:n | |||||
| Sonia s0nj@ | Sonia s0nj@ | ||||
| Sophia soUf'i@ | Sophia soUf'i@ | ||||
| Sophie soUfi | Sophie soUfi |
| sw) a (m_ a | sw) a (m_ a | ||||
| sw) a (nk a | sw) a (nk a | ||||
| ao eI0 | ao eI0 | ||||
| mh) ao eI // Irish, e.g. 'mhaol' /weIl/ | |||||
| m) ao aU | m) ao aU | ||||
| p) ao aU | p) ao aU | ||||
| t) ao aU | t) ao aU | ||||
| ao (_ =aU | ao (_ =aU | ||||
| aois (_ i:S | |||||
| aoise (_ i:S@ | |||||
| ao (ism aU | |||||
| ao (ist aU | |||||
| aoi (C i: // Irish, e.g. 'Aoife' /i:f@/ | |||||
| aoir (C e@ // Irish, e.g. 'Saoirse' /se@S@/ | |||||
| ?3 aoir (C 3: // Irish, e.g. 'Saoirse' /s3:S@/ | |||||
| g) ao (l eI@ | g) ao (l eI@ | ||||
| aor eI'o@ | aor eI'o@ | ||||
| m) ao (ri aU | m) ao (ri aU | ||||
| _n) a (tional a | _n) a (tional a | ||||
| @) a (tious 'eI | @) a (tious 'eI | ||||
| ell) a (trix @ | ell) a (trix @ | ||||
| a (triC 'eI | |||||
| a (trix 'eI | |||||
| a (trice 'eI | |||||
| n) a (tur eI | n) a (tur eI | ||||
| n) a (tura a | n) a (tura a | ||||
| &) a (ture_ @ | &) a (ture_ @ | ||||
| _m) ag (ell a#dZ | _m) ag (ell a#dZ | ||||
| Cp) age (_ eIdZ | Cp) age (_ eIdZ | ||||
| pp) age (_ I2dZ | pp) age (_ I2dZ | ||||
| _ant) ag 'ag | |||||
| _ant) ag (on 'ag | |||||
| enr) ag (e_ 'eIdZ | enr) ag (e_ 'eIdZ | ||||
| outr) ag (e_ eIdZ | outr) ag (e_ eIdZ | ||||
| der) ag (e_ eIdZ | der) ag (e_ eIdZ | ||||
| may) be (_ bi: | may) be (_ bi: | ||||
| _) be (CA bI# | _) be (CA bI# | ||||
| _) bete (lg bi:t@ | _) bete (lg bi:t@ | ||||
| _) be (C% bE | |||||
| _) be (C%+ bE | |||||
| _) be (atiC b%i: | _) be (atiC b%i: | ||||
| _) be (b bi: | _) be (b bi: | ||||
| _) be (cl bI# | _) be (cl bI# | ||||
| _) be (kn bI# | _) be (kn bI# | ||||
| _) belarus bEl@r'u:s | _) belarus bEl@r'u:s | ||||
| _) be (lC bE | _) be (lC bE | ||||
| _) be (llig bI# | |||||
| _) be (llig+ bI# | |||||
| _) be (re bE | _) be (re bE | ||||
| _) be (sC bI# | _) be (sC bI# | ||||
| _) be (stia bE | _) be (stia bE | ||||
| e (Cical 'E | e (Cical 'E | ||||
| e (CiuB i: | e (CiuB i: | ||||
| &) e (_ | &) e (_ | ||||
| aoiC) e (_ @ // Irish, e.g. 'Aoife' /i:fe/ | |||||
| aoiCC) e (_ @ // Irish, e.g. 'Saoirse' /se@S@/ | |||||
| acB) e (_ %I | acB) e (_ %I | ||||
| XC) e (_N i: | XC) e (_N i: | ||||
| vert) e (b I | vert) e (b I | ||||
| y) ed (_S2v d# | y) ed (_S2v d# | ||||
| debut) ed (_S2 d# | debut) ed (_S2 d# | ||||
| edly (_S4m I#dl%i | edly (_S4m I#dl%i | ||||
| eg) edly (_S3m I#dl%i | |||||
| c) ed (e_ 'i:d | c) ed (e_ 'i:d | ||||
| p) ed (e_ i:d | p) ed (e_ i:d | ||||
| p) edal Ed@L | p) edal Ed@L | ||||
| en (core 0n | en (core 0n | ||||
| &) ency (_ @ns%i | &) ency (_ @ns%i | ||||
| ency (cli %EnsI | ency (cli %EnsI | ||||
| _) en (dg@ %En | |||||
| _) en (dp@ %En | |||||
| k) en (d_ En | k) en (d_ En | ||||
| s) en (d_ En | s) en (d_ En | ||||
| t) en (d_ En | t) en (d_ En | ||||
| exp) eri (en i@rI2 | exp) eri (en i@rI2 | ||||
| XC) er 3: | XC) er 3: | ||||
| th) er (@ 3: | th) er (@ 3: | ||||
| h) er (@ %3 | |||||
| h) er (nan %3 | |||||
| X) er (A E#r | X) er (A E#r | ||||
| _h) eretical I#rEtIk@L | _h) eretical I#rEtIk@L | ||||
| _qu) er 3: | _qu) er 3: | ||||
| &z) es (_S2 %I#z | &z) es (_S2 %I#z | ||||
| &C) es (_S1i z | &C) es (_S1i z | ||||
| xus) es (_S2 %I#z | xus) es (_S2 %I#z | ||||
| tamus) es (_S2 %I#z // hippopotamuses | |||||
| es (carp I2s | es (carp I2s | ||||
| es (cape %Es | es (cape %Es | ||||
| es (capi %Es | es (capi %Es | ||||
| &) ford (_S4 f3d | &) ford (_S4 f3d | ||||
| &f) ford (_S4 3d | &f) ford (_S4 3d | ||||
| for (see f%O@ | for (see f%O@ | ||||
| for (ward f'o@ // straightforward | |||||
| for (ward f'O@ | |||||
| ft (en f | ft (en f | ||||
| &) ful (_S3i f@L | &) ful (_S3i f@L | ||||
| _) metall (ic m@tal | _) metall (ic m@tal | ||||
| metabo m@t'ab0 | metabo m@t'ab0 | ||||
| _) meta (llu m%Eta | _) meta (llu m%Eta | ||||
| mh (ao w // Irish, e.g. 'mhaol' /weIl/ | |||||
| aoi) mh (e v // Irish, e.g. 'Caoimhe' /ki:v@/ | |||||
| &) mobile m@bi:l | &) mobile m@bi:l | ||||
| _) mocha moUk@ | _) mocha moUk@ | ||||
| mono (ga m@n'0 | mono (ga m@n'0 | ||||
| &) mouth (_ m@T | &) mouth (_ m@T | ||||
| &) mouth (_$w_alt1 maUT | &) mouth (_$w_alt1 maUT | ||||
| _) multi mVlti | _) multi mVlti | ||||
| _) multi (pl m,VltI | |||||
| _) multi (@@P5 m,VltI | _) multi (@@P5 m,VltI | ||||
| .group mi | .group mi | ||||
| Co) s (Er z | Co) s (Er z | ||||
| Co) s (En z | Co) s (En z | ||||
| e) s (d z | e) s (d z | ||||
| aoi) s (e S // Irish, e.g. 'Laoise' | |||||
| aoi) seach S@x // Irish, e.g. 'Taoiseach' | |||||
| aoi) sigh Si // Irish, e.g. 'Taoisigh' | |||||
| aoir) s (e S // Irish, e.g. 'Saoirse' | |||||
| _) se (clu sI# | _) se (clu sI# | ||||
| secur sI#kjU@ | secur sI#kjU@ | ||||
| _) se (duc sI# | _) se (duc sI# | ||||
| &) s (ic_ z | &) s (ic_ z | ||||
| n) s (ic_ s | n) s (ic_ s | ||||
| ss (ic s | ss (ic s | ||||
| mu) s (e z | |||||
| mu) s (ic z | mu) s (ic z | ||||
| ea) s (ie z | ea) s (ie z | ||||
| ea) s (il z | ea) s (il z | ||||
| th (ill th | th (ill th | ||||
| gh) th (A th | gh) th (A th | ||||
| ee) thing DI2N | ee) thing DI2N | ||||
| soo) th D | |||||
| soo) th (say T | |||||
| the (_ D | the (_ D | ||||
| &) th (L03_ =T | &) th (L03_ =T | ||||
| ou) thed (_ Dd | ou) thed (_ Dd |
| // * Farsi Language fa (or Parsi or Persian) fa_list Version 3.133 | |||||
| // * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 05-10-2017 | |||||
| // * Farsi Language fa (or Parsi or Persian) fa_list Version 3.134 | |||||
| // * This file writen by Shadyar Khodayari and Ehsan Esmaili who has managed collecting exceptional words. 06-24-2017 | |||||
| //********* | //********* | ||||
| // * This program is free software; you can redistribute it and/or modify * | // * This program is free software; you can redistribute it and/or modify * | ||||
| // * it under the terms of the GNU General Public License as published by * | // * it under the terms of the GNU General Public License as published by * | ||||
| آموخت Amuxt | آموخت Amuxt | ||||
| آموزد Amuzad | آموزد Amuzad | ||||
| آموزش AmuzeS | آموزش AmuzeS | ||||
| آمپر AmpeR | |||||
| آمپرمتر AmpeRmetR | آمپرمتر AmpeRmetR | ||||
| آمپلیفایر AmpelifAjeR | آمپلیفایر AmpelifAjeR | ||||
| آمپیریسم AmpiRism | آمپیریسم AmpiRism | ||||
| درایه deRAje | درایه deRAje | ||||
| درایو deRAjv | درایو deRAjv | ||||
| درایور deRAjveR | درایور deRAjveR | ||||
| درباره daR'bAReje: | |||||
| دربازکن daRbAzkon | دربازکن daRbAzkon | ||||
| دربدر daRbedaR | دربدر daRbedaR | ||||
| دربندکشیده daRbandkeSide | دربندکشیده daRbandkeSide | ||||
| شدیدا Sadidan | شدیدا Sadidan | ||||
| شدیداللحن Sadidollahn | شدیداللحن Sadidollahn | ||||
| شدیم Sodim | شدیم Sodim | ||||
| شراادی SA:_d:jA:_R | |||||
| شرافت SeRAfat | شرافت SeRAfat | ||||
| شراپنل SeRApnel | شراپنل SeRApnel | ||||
| شراکت SeRAkat | شراکت SeRAkat | ||||
| شربت SaRbat | شربت SaRbat | ||||
| شرت 'SoRt | شرت 'SoRt | ||||
| شرتکات SoRtkAt | شرتکات SoRtkAt | ||||
| شراادی SA:_d:jA:_R | |||||
| شرشر SeRSeR | شرشر SeRSeR | ||||
| شرطه SoRte: | شرطه SoRte: | ||||
| شرعا SaR?an | شرعا SaR?an |
| // * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.133 | |||||
| // * This file writen by Shadyar Khodayari 05-10-2017 | |||||
| // * Farsi Language fa (or Parsi or Persian) fa_rules Version 3.134 | |||||
| // * This file writen by Shadyar Khodayari 06-24-2017 | |||||
| //********* | //********* | ||||
| // * This program is free software; you can redistribute it and/or modify * | // * This program is free software; you can redistribute it and/or modify * | ||||
| // * it under the terms of the GNU General Public License as published by * | // * it under the terms of the GNU General Public License as published by * | ||||
| L09L09L09L09) م (L03L09L09L09_ ma | L09L09L09L09) م (L03L09L09L09_ ma | ||||
| // Prefixes م | // Prefixes م | ||||
| _) م (L03L09L03_$noprefixP1@ ma | |||||
| _) م (L03L04L03_$noprefixP1@ ma | |||||
| _) می (L03L09+$noprefixP2@ mi | _) می (L03L09+$noprefixP2@ mi | ||||
| _) می (آL09L09$noprefixP2@ mi | _) می (آL09L09$noprefixP2@ mi | ||||
| _) می (وL09L09$noprefixP2@ mi | _) می (وL09L09$noprefixP2@ mi |
| .*.swp | .*.swp | ||||
| *~ | |||||
| # intermediate files: | # intermediate files: | ||||
| * `data/espeak-ng` data files for eSpeak NG extended data. | * `data/espeak-ng` data files for eSpeak NG extended data. | ||||
| * espeak-ng PropList property lookup as part of the `ucd_property` API. | * espeak-ng PropList property lookup as part of the `ucd_property` API. | ||||
| ## 9.0.0.1 - (In Progress) | |||||
| ## 10.0.0 - 2017-06-25 | |||||
| * Add `iswblank` and `iswxdigit` compatibility. | * Add `iswblank` and `iswxdigit` compatibility. | ||||
| * Improve ctype compatibility. | * Improve ctype compatibility. | ||||
| * PropList property lookup. | |||||
| * PropList and emoji-data property lookup. | |||||
| * Support building with a C89 compiler. | |||||
| * Update to Unicode Character Data 10.0.0. | |||||
| * Unicode Emoji 5.0. | |||||
| ## 9.0.0 - 2016-12-28 | ## 9.0.0 - 2016-12-28 | ||||
| ############################# Unicode Data #################################### | ############################# Unicode Data #################################### | ||||
| EMOJI_VERSION=4.0 | |||||
| EMOJI_VERSION=5.0 | |||||
| UCD_VERSION=@UCD_VERSION@ | UCD_VERSION=@UCD_VERSION@ | ||||
| UCD_ROOTDIR=data/ucd | UCD_ROOTDIR=data/ucd | ||||
| UCD_SRCDIR=http://www.unicode.org/Public | UCD_SRCDIR=http://www.unicode.org/Public | ||||
| data/emoji/emoji-data.txt: | data/emoji/emoji-data.txt: | ||||
| mkdir -pv data/emoji | mkdir -pv data/emoji | ||||
| curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt > $@ | |||||
| curl ${UCD_SRCDIR}/emoji/${EMOJI_VERSION}/emoji-data.txt -o $@ | |||||
| data/ucd/PropList.txt: | data/ucd/PropList.txt: | ||||
| mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt > $@ | |||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropList.txt -o $@ | |||||
| data/ucd/DerivedCoreProperties.txt: | data/ucd/DerivedCoreProperties.txt: | ||||
| mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt > $@ | |||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/DerivedCoreProperties.txt -o $@ | |||||
| data/ucd/PropertyValueAliases.txt: | data/ucd/PropertyValueAliases.txt: | ||||
| mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt > $@ | |||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/PropertyValueAliases.txt -o $@ | |||||
| data/ucd/Scripts.txt: | data/ucd/Scripts.txt: | ||||
| mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt > $@ | |||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/Scripts.txt -o $@ | |||||
| data/ucd/UnicodeData.txt: | data/ucd/UnicodeData.txt: | ||||
| mkdir -pv data/ucd | mkdir -pv data/ucd | ||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt > $@ | |||||
| curl ${UCD_SRCDIR}/${UCD_VERSION}/ucd/UnicodeData.txt -o $@ | |||||
| ############################# documentation ################################### | ############################# documentation ################################### | ||||
| AC_PREREQ([2.65]) | AC_PREREQ([2.65]) | ||||
| AC_INIT([Unicode Character Database Tools], [9.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||||
| AC_INIT([Unicode Character Database Tools], [10.0.0], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||||
| AM_INIT_AUTOMAKE() | AM_INIT_AUTOMAKE() | ||||
| m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | ||||
| dnl ================================================================ | dnl ================================================================ | ||||
| AC_CHECK_HEADERS([stddef.h]) dnl C89 | AC_CHECK_HEADERS([stddef.h]) dnl C89 | ||||
| AC_CHECK_FUNCS([iswblank]) dnl C99 | |||||
| AC_TYPE_UINT8_T | AC_TYPE_UINT8_T | ||||
| AC_TYPE_UINT32_T | AC_TYPE_UINT32_T | ||||
| dnl ================================================================ | dnl ================================================================ | ||||
| AC_ARG_WITH([unicode-version], | AC_ARG_WITH([unicode-version], | ||||
| [AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=9.0.0@:>@])], | |||||
| [AS_HELP_STRING([--with-unicode-version], [Unicode version to support @<:@default=10.0.0@:>@])], | |||||
| [AS_IF([test x"$withval" != x], | [AS_IF([test x"$withval" != x], | ||||
| [UCD_VERSION="$withval"])], | [UCD_VERSION="$withval"])], | ||||
| [UCD_VERSION="9.0.0"]) | |||||
| [UCD_VERSION="10.0.0"]) | |||||
| AC_SUBST(UCD_VERSION) | AC_SUBST(UCD_VERSION) | ||||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | */ | ||||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| /* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| * the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| */ | |||||
| #include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
| #include <stddef.h> | #include <stddef.h> | ||||
| // Unicode Character Data 9.0.0 | |||||
| /* Unicode Character Data 9.0.0 */ | |||||
| struct case_conversion_entry | struct case_conversion_entry | ||||
| { | { |
| switch (ucd_lookup_category(c)) | switch (ucd_lookup_category(c)) | ||||
| { | { | ||||
| case UCD_CATEGORY_Zs: | case UCD_CATEGORY_Zs: | ||||
| switch (c) // Exclude characters with the <noBreak> DispositionType | |||||
| switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||||
| { | { | ||||
| case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||||
| case 0x2007: // U+2007 : FIGURE SPACE | |||||
| case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||||
| case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||||
| case 0x2007: /* U+2007 : FIGURE SPACE */ | |||||
| case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| return 1; | return 1; | ||||
| case UCD_CATEGORY_Cc: | case UCD_CATEGORY_Cc: | ||||
| return c == 0x09; // U+0009 : CHARACTER TABULATION | |||||
| return c == 0x09; /* U+0009 : CHARACTER TABULATION */ | |||||
| default: | default: | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| int ucd_isdigit(codepoint_t c) | int ucd_isdigit(codepoint_t c) | ||||
| { | { | ||||
| return (c >= 0x30 && c <= 0x39); // [0-9] | |||||
| return (c >= 0x30 && c <= 0x39); /* [0-9] */ | |||||
| } | } | ||||
| int ucd_isgraph(codepoint_t c) | int ucd_isgraph(codepoint_t c) | ||||
| case UCD_CATEGORY_Zp: | case UCD_CATEGORY_Zp: | ||||
| return 1; | return 1; | ||||
| case UCD_CATEGORY_Zs: | case UCD_CATEGORY_Zs: | ||||
| switch (c) // Exclude characters with the <noBreak> DispositionType | |||||
| switch (c) /* Exclude characters with the <noBreak> DispositionType */ | |||||
| { | { | ||||
| case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||||
| case 0x2007: // U+2007 : FIGURE SPACE | |||||
| case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||||
| case 0x00A0: /* U+00A0 : NO-BREAK SPACE */ | |||||
| case 0x2007: /* U+2007 : FIGURE SPACE */ | |||||
| case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */ | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| return 1; | return 1; | ||||
| case UCD_CATEGORY_Cc: | case UCD_CATEGORY_Cc: | ||||
| switch (c) // Include control characters marked as White_Space | |||||
| switch (c) /* Include control characters marked as White_Space */ | |||||
| { | { | ||||
| case 0x09: // U+0009 : CHARACTER TABULATION | |||||
| case 0x0A: // U+000A : LINE FEED | |||||
| case 0x0B: // U+000B : LINE TABULATION | |||||
| case 0x0C: // U+000C : FORM FEED | |||||
| case 0x0D: // U+000D : CARRIAGE RETURN | |||||
| case 0x85: // U+0085 : NEXT LINE | |||||
| case 0x09: /* U+0009 : CHARACTER TABULATION */ | |||||
| case 0x0A: /* U+000A : LINE FEED */ | |||||
| case 0x0B: /* U+000B : LINE TABULATION */ | |||||
| case 0x0C: /* U+000C : FORM FEED */ | |||||
| case 0x0D: /* U+000D : CARRIAGE RETURN */ | |||||
| case 0x85: /* U+0085 : NEXT LINE */ | |||||
| return 1; | return 1; | ||||
| } | } | ||||
| default: | default: | ||||
| int ucd_isxdigit(codepoint_t c) | int ucd_isxdigit(codepoint_t c) | ||||
| { | { | ||||
| return (c >= 0x30 && c <= 0x39) // [0-9] | |||||
| || (c >= 0x41 && c <= 0x46) // [A-Z] | |||||
| || (c >= 0x61 && c <= 0x66); // [a-z] | |||||
| return (c >= 0x30 && c <= 0x39) /* [0-9] */ | |||||
| || (c >= 0x41 && c <= 0x46) /* [A-Z] */ | |||||
| || (c >= 0x61 && c <= 0x66); /* [a-z] */ | |||||
| } | } |
| UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | ||||
| UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | ||||
| UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | ||||
| UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||||
| UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | ||||
| UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | ||||
| UCD_SCRIPT_Grek, /**< @brief Greek Script */ | UCD_SCRIPT_Grek, /**< @brief Greek Script */ | ||||
| UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | ||||
| UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | ||||
| UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | ||||
| UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||||
| UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | ||||
| UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | ||||
| UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | ||||
| UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | ||||
| UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | ||||
| UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | ||||
| UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||||
| UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | ||||
| UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | ||||
| UCD_SCRIPT_Zsym, /**< @brief Symbols */ | UCD_SCRIPT_Zsym, /**< @brief Symbols */ | ||||
| #define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ | #define UCD_PROPERTY_EMOJI_PRESENTATION 0x0000000400000000ull /**< @brief Emoji_Presentation */ | ||||
| #define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ | #define UCD_PROPERTY_EMOJI_MODIFIER 0x0000000800000000ull /**< @brief Emoji_Modifier */ | ||||
| #define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ | #define UCD_PROPERTY_EMOJI_MODIFIER_BASE 0x0000001000000000ull /**< @brief Emoji_Modifier_Base */ | ||||
| #define UCD_PROPERTY_REGIONAL_INDICATOR 0x0000002000000000ull /**< @brief Regional_Indicator */ | |||||
| #define UCD_PROPERTY_EMOJI_COMPONENT 0x0000004000000000ull /**< @brief Emoji_Component */ | |||||
| // eSpeak NG extended properties: | // eSpeak NG extended properties: | ||||
| #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | #define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | ||||
| Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */ | ||||
| Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */ | ||||
| Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */ | ||||
| Gonm = UCD_SCRIPT_Gonm, /**< @brief Masaram Gondi */ | |||||
| Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */ | ||||
| Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */ | ||||
| Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ | Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */ | ||||
| Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */ | ||||
| Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */ | ||||
| Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */ | ||||
| Soyo = UCD_SCRIPT_Soyo, /**< @brief Soyombo */ | |||||
| Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */ | ||||
| Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */ | ||||
| Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */ | ||||
| Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */ | ||||
| Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */ | ||||
| Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */ | ||||
| Zanb = UCD_SCRIPT_Zanb, /**< @brief Zanabazar Square */ | |||||
| Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */ | ||||
| Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */ | ||||
| Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ | Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */ | ||||
| Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ | Emoji_Presentation = UCD_PROPERTY_EMOJI_PRESENTATION, /**< @brief Emoji_Presentation */ | ||||
| Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ | Emoji_Modifier = UCD_PROPERTY_EMOJI_MODIFIER, /**< @brief Emoji_Modifier */ | ||||
| Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ | Emoji_Modifier_Base = UCD_PROPERTY_EMOJI_MODIFIER_BASE, /**< @brief Emoji_Modifier_Base */ | ||||
| Regional_Indicator = UCD_PROPERTY_REGIONAL_INDICATOR, /**< @brief Regional_Indicator */ | |||||
| Emoji_Component = UCD_PROPERTY_EMOJI_COMPONENT, /**< @brief Emoji_Component */ | |||||
| }; | }; | ||||
| /** @brief Return the properties of the specified codepoint. | /** @brief Return the properties of the specified codepoint. |
| case 0x2000: | case 0x2000: | ||||
| if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; | if (c == 0x2065) return UCD_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; | ||||
| break; | break; | ||||
| case 0x2300: | |||||
| if (c == 0x23FF) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| break; | |||||
| case 0x2400: | case 0x2400: | ||||
| if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2427 && c <= 0x243F) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x244B && c <= 0x245F) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2B96 && c <= 0x2B97) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2BBA && c <= 0x2BBC) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x2BC9) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x2BD2 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| if (c >= 0x2BD3 && c <= 0x2BEB) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2BF0 && c <= 0x2BFF) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| break; | break; | ||||
| case 0x2E00: | case 0x2E00: | ||||
| if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; | ||||
| break; | break; | ||||
| case 0x0300: | case 0x0300: | ||||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x0400: | case 0x0400: | ||||
| if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; | ||||
| break; | break; | ||||
| case 0x2100: | case 0x2100: | ||||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c == 0x2139) return UCD_PROPERTY_EMOJI; | if (c == 0x2139) return UCD_PROPERTY_EMOJI; | ||||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| break; | break; | ||||
| case 0xFF00: | case 0xFF00: | ||||
| break; | break; | ||||
| case 0x01D400: | case 0x01D400: | ||||
| if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x01D500: | case 0x01D500: | ||||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x01D600: | case 0x01D600: | ||||
| if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; | ||||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; | ||||
| break; | break; | ||||
| case 0x01D700: | case 0x01D700: | ||||
| if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| break; | break; | ||||
| case 0x016F00: | case 0x016F00: | ||||
| if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x016F93 && c <= 0x016F9F) return UCD_PROPERTY_DIACRITIC; | ||||
| if (c == 0x016FE0) return UCD_PROPERTY_EXTENDER; | |||||
| if (c >= 0x016FE0 && c <= 0x016FE1) return UCD_PROPERTY_EXTENDER; | |||||
| break; | break; | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| { | { | ||||
| case 0x000000: | case 0x000000: | ||||
| if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
| if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||||
| if (c >= 0x4E00 && c <= 0x9FEA) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||||
| if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
| if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
| break; | break; | ||||
| case 0x010000: | case 0x010000: | ||||
| if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
| if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
| if (c >= 0x01B170 && c <= 0x01B2FB) return UCD_PROPERTY_IDEOGRAPHIC; | |||||
| break; | break; | ||||
| case 0x020000: | case 0x020000: | ||||
| if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
| if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
| if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
| if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | ||||
| if (c >= 0x02CEB0 && c <= 0x02EBE0) return UCD_PROPERTY_IDEOGRAPHIC | UCD_PROPERTY_UNIFIED_IDEOGRAPH; | |||||
| if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; | if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC; | ||||
| break; | break; | ||||
| } | } | ||||
| if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; | ||||
| break; | break; | ||||
| case 0x0300: | case 0x0300: | ||||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0xFF00: | case 0xFF00: | ||||
| if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; | ||||
| case 0x2100: | case 0x2100: | ||||
| if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x01D400: | case 0x01D400: | ||||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x01D500: | case 0x01D500: | ||||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x01D600: | case 0x01D600: | ||||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; | ||||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| case 0x01D700: | case 0x01D700: | ||||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu | |||||
| if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */ | |||||
| break; | break; | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x1C34 && c <= 0x1C35) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; | if (c == 0x1CE1) return UCD_PROPERTY_DIACRITIC; | ||||
| if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x1CF7) return UCD_PROPERTY_DIACRITIC; | |||||
| break; | break; | ||||
| case 0x3000: | case 0x3000: | ||||
| if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; | if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND; | ||||
| if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011720 && c <= 0x011721) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x011726) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| break; | break; | ||||
| case 0x011A00: | |||||
| if (c >= 0x011A07 && c <= 0x011A08) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011A39) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x011A57 && c <= 0x011A58) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011A97) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| break; | |||||
| case 0x011C00: | case 0x011C00: | ||||
| if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x011C2F) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x011C3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0AC7 && c <= 0x0AC8) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; | if (c == 0x0ACD) return UCD_PROPERTY_DIACRITIC; | ||||
| if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0AE2 && c <= 0x0AE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c >= 0x0AFA && c <= 0x0AFC) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x0AFD && c <= 0x0AFF) return UCD_PROPERTY_DIACRITIC; | |||||
| break; | break; | ||||
| case 0x0B00: | case 0x0B00: | ||||
| if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c == 0x0B01) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0CE2 && c <= 0x0CE3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| break; | break; | ||||
| case 0x0D00: | case 0x0D00: | ||||
| if (c == 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x0D00 && c <= 0x0D01) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x0D3B && c <= 0x0D3C) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0D41 && c <= 0x0D44) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; | if (c == 0x0D4D) return UCD_PROPERTY_DIACRITIC; | ||||
| if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x0D62 && c <= 0x0D63) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| case 0x1D00: | case 0x1D00: | ||||
| if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x1DC4 && c <= 0x1DCF) return UCD_PROPERTY_DIACRITIC; | ||||
| if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x1DE7 && c <= 0x1DF4) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x1DF5) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c >= 0x1DF5 && c <= 0x1DF9) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x1DFD && c <= 0x1DFF) return UCD_PROPERTY_DIACRITIC; | ||||
| break; | break; | ||||
| case 0x2000: | case 0x2000: | ||||
| if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011727 && c <= 0x01172A) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; | if (c == 0x01172B) return UCD_PROPERTY_DIACRITIC; | ||||
| break; | break; | ||||
| case 0x011A00: | |||||
| if (c >= 0x011A01 && c <= 0x011A0A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011A34) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c >= 0x011A35 && c <= 0x011A3E) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011A47) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c >= 0x011A51 && c <= 0x011A5B) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x011A8A && c <= 0x011A96) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011A98) return UCD_PROPERTY_EXTENDER; | |||||
| if (c == 0x011A99) return UCD_PROPERTY_DIACRITIC; | |||||
| break; | |||||
| case 0x011C00: | case 0x011C00: | ||||
| if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011C30 && c <= 0x011C36) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011C38 && c <= 0x011C3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011CB2 && c <= 0x011CB3) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; | if (c >= 0x011CB5 && c <= 0x011CB6) return UCD_PROPERTY_OTHER_ALPHABETIC; | ||||
| break; | break; | ||||
| case 0x011D00: | |||||
| if (c >= 0x011D31 && c <= 0x011D36) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011D3A) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x011D3C && c <= 0x011D3D) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x011D3F && c <= 0x011D41) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c == 0x011D42) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c == 0x011D43) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| if (c >= 0x011D44 && c <= 0x011D45) return UCD_PROPERTY_DIACRITIC; | |||||
| if (c == 0x011D47) return UCD_PROPERTY_OTHER_ALPHABETIC; | |||||
| break; | |||||
| case 0x016A00: | case 0x016A00: | ||||
| if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; | if (c >= 0x016AF0 && c <= 0x016AF4) return UCD_PROPERTY_DIACRITIC; | ||||
| break; | break; | ||||
| switch (c & 0xFFFFFF00) | switch (c & 0xFFFFFF00) | ||||
| { | { | ||||
| case 0x0000: | case 0x0000: | ||||
| if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI; | |||||
| if (c >= 0x0030 && c <= 0x0039) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
| break; | break; | ||||
| case 0xFF00: | case 0xFF00: | ||||
| if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; | if (c >= 0xFF10 && c <= 0xFF19) return UCD_PROPERTY_HEX_DIGIT; | ||||
| break; | break; | ||||
| case 0x2700: | case 0x2700: | ||||
| if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||||
| return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| case 0x2900: | case 0x2900: | ||||
| return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||||
| return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||||
| case 0x2E00: | case 0x2E00: | ||||
| return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| case 0x3000: | case 0x3000: | ||||
| case 0x0000: | case 0x0000: | ||||
| if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; | if (c == 0x0021) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK; | ||||
| if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x0022) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| if (c == 0x0023) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
| if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x0027) return UCD_PROPERTY_QUOTATION_MARK | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| if (c == 0x002A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
| if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; | if (c == 0x002C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COMMA; | ||||
| if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; | if (c == 0x002E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_FULL_STOP; | ||||
| if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | ||||
| case 0x11700: | case 0x11700: | ||||
| if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | if (c >= 0x01173C && c <= 0x01173E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | ||||
| break; | break; | ||||
| case 0x11A00: | |||||
| if (c >= 0x011A42 && c <= 0x011A43) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||||
| if (c >= 0x011A9B && c <= 0x011A9C) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | |||||
| if (c >= 0x011AA1 && c <= 0x011AA2) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | |||||
| break; | |||||
| case 0x11C00: | case 0x11C00: | ||||
| if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | if (c >= 0x011C41 && c <= 0x011C42) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL; | ||||
| if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | if (c == 0x011C43) return UCD_PROPERTY_TERMINAL_PUNCTUATION; | ||||
| break; | break; | ||||
| case 0x2700: | case 0x2700: | ||||
| if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps | |||||
| if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */ | |||||
| return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| case 0x2900: | case 0x2900: | ||||
| return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; | if (c == 0xFFE3) return UCD_PROPERTY_DIACRITIC; | ||||
| break; | break; | ||||
| case 0x01F300: | case 0x01F300: | ||||
| return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER; | |||||
| return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | if (c >= 0x23E9 && c <= 0x23F3) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | ||||
| if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | if (c >= 0x23F8 && c <= 0x23FA) return UCD_PROPERTY_PATTERN_SYNTAX | UCD_PROPERTY_EMOJI; | ||||
| if (c >= 0x23E3 && c <= 0x23FE) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| if (c >= 0x23E3) return UCD_PROPERTY_PATTERN_SYNTAX; | |||||
| break; | break; | ||||
| case 0x2400: | case 0x2400: | ||||
| if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; | if (c >= 0x2400 && c <= 0x244A) return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
| if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; | if (c >= 0x01F170 && c <= 0x01F189) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_UPPERCASE; | ||||
| if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x01F18E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F191 && c <= 0x01F19A) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c >= 0x01F1E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_REGIONAL_INDICATOR | UCD_PROPERTY_EMOJI_COMPONENT; | |||||
| break; | break; | ||||
| case 0x01F200: | case 0x01F200: | ||||
| if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x01F201) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F6EB && c <= 0x01F6EC) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; | if (c == 0x01F6F0) return UCD_PROPERTY_EMOJI; | ||||
| if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; | if (c == 0x01F6F3) return UCD_PROPERTY_EMOJI; | ||||
| if (c >= 0x01F6F4 && c <= 0x01F6F6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c >= 0x01F6F4 && c <= 0x01F6F8) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| break; | break; | ||||
| case 0x01F900: | case 0x01F900: | ||||
| if (c <= 0x01F90B) return 0; | |||||
| if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | if (c >= 0x01F918 && c <= 0x01F91C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | ||||
| if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F910 && c <= 0x01F91D) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c == 0x01F91E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
| if (c >= 0x01F91E && c <= 0x01F91F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
| if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | if (c == 0x01F926) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | ||||
| if (c >= 0x01F920 && c <= 0x01F927) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c == 0x01F930) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
| if (c >= 0x01F920 && c <= 0x01F92F) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c >= 0x01F930 && c <= 0x01F932) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
| if (c == 0x01F93B) return 0; | if (c == 0x01F93B) return 0; | ||||
| if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F93A && c <= 0x01F93C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | if (c >= 0x01F933 && c <= 0x01F93E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | ||||
| if (c == 0x01F946) return 0; | if (c == 0x01F946) return 0; | ||||
| if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F940 && c <= 0x01F94B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c == 0x01F94C) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F950 && c <= 0x01F95E) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F95F && c <= 0x01F96B) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c >= 0x01F980 && c <= 0x01F991) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F992 && c <= 0x01F997) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | if (c == 0x01F9C0) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | ||||
| if (c >= 0x01F9D1 && c <= 0x01F9DD) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION | UCD_PROPERTY_EMOJI_MODIFIER_BASE; | |||||
| if (c >= 0x01F9D0 && c <= 0x01F9E6) return UCD_PROPERTY_EMOJI | UCD_PROPERTY_EMOJI_PRESENTATION; | |||||
| return UCD_PROPERTY_EMOJI; | return UCD_PROPERTY_EMOJI; | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; | ||||
| case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; | case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE | ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR; | ||||
| case UCD_CATEGORY_Zs: return properties_Zs(c); | case UCD_CATEGORY_Zs: return properties_Zs(c); | ||||
| default: return 0; // Co Cs Ii Lt Me | |||||
| default: return 0; /* Co Cs Ii Lt Me */ | |||||
| }; | }; | ||||
| } | } |
| "Geok", | "Geok", | ||||
| "Geor", | "Geor", | ||||
| "Glag", | "Glag", | ||||
| "Gonm", | |||||
| "Goth", | "Goth", | ||||
| "Gran", | "Gran", | ||||
| "Grek", | "Grek", | ||||
| "Sind", | "Sind", | ||||
| "Sinh", | "Sinh", | ||||
| "Sora", | "Sora", | ||||
| "Soyo", | |||||
| "Sund", | "Sund", | ||||
| "Sylo", | "Sylo", | ||||
| "Syrc", | "Syrc", | ||||
| "Xpeo", | "Xpeo", | ||||
| "Xsux", | "Xsux", | ||||
| "Yiii", | "Yiii", | ||||
| "Zanb", | |||||
| "Zinh", | "Zinh", | ||||
| "Zmth", | "Zmth", | ||||
| "Zsym", | "Zsym", |
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | */ | ||||
| #include "config.h" | |||||
| #include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
| #include <locale.h> | #include <locale.h> | ||||
| #include <wchar.h> | #include <wchar.h> | ||||
| #include <wctype.h> | #include <wctype.h> | ||||
| #ifndef HAVE_ISWBLANK | |||||
| static int iswblank(wint_t c) | |||||
| { | |||||
| return iswspace(c) && !(c >= 0x0A && c <= 0x0D); | |||||
| } | |||||
| #endif | |||||
| void fput_utf8c(FILE *out, codepoint_t c) | void fput_utf8c(FILE *out, codepoint_t c) | ||||
| { | { | ||||
| if (c < 0x80) | if (c < 0x80) | ||||
| { | { | ||||
| switch (mode) | switch (mode) | ||||
| { | { | ||||
| case 'c': // character | |||||
| case 'c': /* character */ | |||||
| switch (c) | switch (c) | ||||
| { | { | ||||
| case '\t': fputs("\\t", out); break; | case '\t': fputs("\\t", out); break; | ||||
| default: fput_utf8c(out, c); break; | default: fput_utf8c(out, c); break; | ||||
| } | } | ||||
| break; | break; | ||||
| case 'h': // hexadecimal (lower) | |||||
| case 'h': /* hexadecimal (lower) */ | |||||
| fprintf(out, "%06x", c); | fprintf(out, "%06x", c); | ||||
| break; | break; | ||||
| case 'H': // hexadecimal (upper) | |||||
| case 'H': /* hexadecimal (upper) */ | |||||
| fprintf(out, "%06X", c); | fprintf(out, "%06X", c); | ||||
| break; | break; | ||||
| } | } | ||||
| { | { | ||||
| switch (mode) | switch (mode) | ||||
| { | { | ||||
| case 'A': // alpha-numeric | |||||
| case 'A': /* alpha-numeric */ | |||||
| fputc(iswalnum(c) ? '1' : '0', out); | fputc(iswalnum(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'a': // alpha | |||||
| case 'a': /* alpha */ | |||||
| fputc(iswalpha(c) ? '1' : '0', out); | fputc(iswalpha(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'b': // blank | |||||
| case 'b': /* blank */ | |||||
| fputc(iswblank(c) ? '1' : '0', out); | fputc(iswblank(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'c': // control | |||||
| case 'c': /* control */ | |||||
| fputc(iswcntrl(c) ? '1' : '0', out); | fputc(iswcntrl(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'd': // numeric | |||||
| case 'd': /* numeric */ | |||||
| fputc(iswdigit(c) ? '1' : '0', out); | fputc(iswdigit(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'g': // glyph | |||||
| case 'g': /* glyph */ | |||||
| fputc(iswgraph(c) ? '1' : '0', out); | fputc(iswgraph(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'l': // lower case | |||||
| case 'l': /* lower case */ | |||||
| fputc(iswlower(c) ? '1' : '0', out); | fputc(iswlower(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'P': // printable | |||||
| case 'P': /* printable */ | |||||
| fputc(iswprint(c) ? '1' : '0', out); | fputc(iswprint(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'p': // punctuation | |||||
| case 'p': /* punctuation */ | |||||
| fputc(iswpunct(c) ? '1' : '0', out); | fputc(iswpunct(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 's': // whitespace | |||||
| case 's': /* whitespace */ | |||||
| fputc(iswspace(c) ? '1' : '0', out); | fputc(iswspace(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'u': // upper case | |||||
| case 'u': /* upper case */ | |||||
| fputc(iswupper(c) ? '1' : '0', out); | fputc(iswupper(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'x': // xdigit | |||||
| case 'x': /* xdigit */ | |||||
| fputc(iswxdigit(c) ? '1' : '0', out); | fputc(iswxdigit(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| } | } | ||||
| case '%': | case '%': | ||||
| switch (*++format) | switch (*++format) | ||||
| { | { | ||||
| case 'c': // category | |||||
| case 'c': /* category */ | |||||
| fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | ||||
| break; | break; | ||||
| case 'C': // category group | |||||
| case 'C': /* category group */ | |||||
| fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | ||||
| break; | break; | ||||
| case 'p': // codepoint | |||||
| case 'p': /* codepoint */ | |||||
| uprintf_codepoint(out, c, *++format); | uprintf_codepoint(out, c, *++format); | ||||
| break; | break; | ||||
| case 'P': // properties | |||||
| case 'P': /* properties */ | |||||
| fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | ||||
| break; | break; | ||||
| case 'i': // is* | |||||
| case 'i': /* is* */ | |||||
| uprintf_is(out, c, *++format); | uprintf_is(out, c, *++format); | ||||
| break; | break; | ||||
| case 'L': // lowercase | |||||
| case 'L': /* lowercase */ | |||||
| uprintf_codepoint(out, towlower(c), *++format); | uprintf_codepoint(out, towlower(c), *++format); | ||||
| break; | break; | ||||
| case 's': // script | |||||
| case 's': /* script */ | |||||
| fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | ||||
| break; | break; | ||||
| case 'T': // titlecase | |||||
| case 'T': /* titlecase */ | |||||
| uprintf_codepoint(out, ucd_totitle(c), *++format); | uprintf_codepoint(out, ucd_totitle(c), *++format); | ||||
| break; | break; | ||||
| case 'U': // uppercase | |||||
| case 'U': /* uppercase */ | |||||
| uprintf_codepoint(out, towupper(c), *++format); | uprintf_codepoint(out, towupper(c), *++format); | ||||
| break; | break; | ||||
| } | } | ||||
| { | { | ||||
| FILE *in = NULL; | FILE *in = NULL; | ||||
| const char *format = NULL; | const char *format = NULL; | ||||
| for (int argn = 1; argn != argc; ++argn) | |||||
| int argn; | |||||
| for (argn = 1; argn != argc; ++argn) | |||||
| { | { | ||||
| const char *arg = argv[argn]; | const char *arg = argv[argn]; | ||||
| if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | ||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||||
| codepoint_t c; | |||||
| for (c = 0; c <= 0x10FFFF; ++c) | |||||
| uprintf(stdout, c, format ? format : | uprintf(stdout, c, format ? format : | ||||
| "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | ||||
| } | } |
| { | { | ||||
| switch (mode) | switch (mode) | ||||
| { | { | ||||
| case 'c': // character | |||||
| case 'c': /* character */ | |||||
| switch (c) | switch (c) | ||||
| { | { | ||||
| case '\t': fputs("\\t", out); break; | case '\t': fputs("\\t", out); break; | ||||
| default: fput_utf8c(out, c); break; | default: fput_utf8c(out, c); break; | ||||
| } | } | ||||
| break; | break; | ||||
| case 'h': // hexadecimal (lower) | |||||
| case 'h': /* hexadecimal (lower) */ | |||||
| fprintf(out, "%06x", c); | fprintf(out, "%06x", c); | ||||
| break; | break; | ||||
| case 'H': // hexadecimal (upper) | |||||
| case 'H': /* hexadecimal (upper) */ | |||||
| fprintf(out, "%06X", c); | fprintf(out, "%06X", c); | ||||
| break; | break; | ||||
| } | } | ||||
| { | { | ||||
| switch (mode) | switch (mode) | ||||
| { | { | ||||
| case 'A': // alpha-numeric | |||||
| case 'A': /* alpha-numeric */ | |||||
| fputc(ucd_isalnum(c) ? '1' : '0', out); | fputc(ucd_isalnum(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'a': // alpha | |||||
| case 'a': /* alpha */ | |||||
| fputc(ucd_isalpha(c) ? '1' : '0', out); | fputc(ucd_isalpha(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'b': // blank | |||||
| case 'b': /* blank */ | |||||
| fputc(ucd_isblank(c) ? '1' : '0', out); | fputc(ucd_isblank(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'c': // control | |||||
| case 'c': /* control */ | |||||
| fputc(ucd_iscntrl(c) ? '1' : '0', out); | fputc(ucd_iscntrl(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'd': // numeric | |||||
| case 'd': /* numeric */ | |||||
| fputc(ucd_isdigit(c) ? '1' : '0', out); | fputc(ucd_isdigit(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'g': // glyph | |||||
| case 'g': /* glyph */ | |||||
| fputc(ucd_isgraph(c) ? '1' : '0', out); | fputc(ucd_isgraph(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'l': // lower case | |||||
| case 'l': /* lower case */ | |||||
| fputc(ucd_islower(c) ? '1' : '0', out); | fputc(ucd_islower(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'P': // printable | |||||
| case 'P': /* printable */ | |||||
| fputc(ucd_isprint(c) ? '1' : '0', out); | fputc(ucd_isprint(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'p': // punctuation | |||||
| case 'p': /* punctuation */ | |||||
| fputc(ucd_ispunct(c) ? '1' : '0', out); | fputc(ucd_ispunct(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 's': // whitespace | |||||
| case 's': /* whitespace */ | |||||
| fputc(ucd_isspace(c) ? '1' : '0', out); | fputc(ucd_isspace(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'u': // upper case | |||||
| case 'u': /* upper case */ | |||||
| fputc(ucd_isupper(c) ? '1' : '0', out); | fputc(ucd_isupper(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| case 'x': // xdigit | |||||
| case 'x': /* xdigit */ | |||||
| fputc(ucd_isxdigit(c) ? '1' : '0', out); | fputc(ucd_isxdigit(c) ? '1' : '0', out); | ||||
| break; | break; | ||||
| } | } | ||||
| case '%': | case '%': | ||||
| switch (*++format) | switch (*++format) | ||||
| { | { | ||||
| case 'c': // category | |||||
| case 'c': /* category */ | |||||
| fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | fputs(ucd_get_category_string(ucd_lookup_category(c)), out); | ||||
| break; | break; | ||||
| case 'C': // category group | |||||
| case 'C': /* category group */ | |||||
| fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); | ||||
| break; | break; | ||||
| case 'p': // codepoint | |||||
| case 'p': /* codepoint */ | |||||
| uprintf_codepoint(out, c, *++format); | uprintf_codepoint(out, c, *++format); | ||||
| break; | break; | ||||
| case 'P': // properties | |||||
| case 'P': /* properties */ | |||||
| fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); | ||||
| break; | break; | ||||
| case 'i': // is* | |||||
| case 'i': /* is* */ | |||||
| uprintf_is(out, c, *++format); | uprintf_is(out, c, *++format); | ||||
| break; | break; | ||||
| case 'L': // lowercase | |||||
| case 'L': /* lowercase */ | |||||
| uprintf_codepoint(out, ucd_tolower(c), *++format); | uprintf_codepoint(out, ucd_tolower(c), *++format); | ||||
| break; | break; | ||||
| case 's': // script | |||||
| case 's': /* script */ | |||||
| fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | fputs(ucd_get_script_string(ucd_lookup_script(c)), out); | ||||
| break; | break; | ||||
| case 'T': // titlecase | |||||
| case 'T': /* titlecase */ | |||||
| uprintf_codepoint(out, ucd_totitle(c), *++format); | uprintf_codepoint(out, ucd_totitle(c), *++format); | ||||
| break; | break; | ||||
| case 'U': // uppercase | |||||
| case 'U': /* uppercase */ | |||||
| uprintf_codepoint(out, ucd_toupper(c), *++format); | uprintf_codepoint(out, ucd_toupper(c), *++format); | ||||
| break; | break; | ||||
| } | } | ||||
| { | { | ||||
| FILE *in = NULL; | FILE *in = NULL; | ||||
| const char *format = NULL; | const char *format = NULL; | ||||
| for (int argn = 1; argn != argc; ++argn) | |||||
| int argn; | |||||
| for (argn = 1; argn != argc; ++argn) | |||||
| { | { | ||||
| const char *arg = argv[argn]; | const char *arg = argv[argn]; | ||||
| if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | if (!strcmp(arg, "--stdin") || !strcmp(arg, "-")) | ||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||||
| codepoint_t c; | |||||
| for (c = 0; c <= 0x10FFFF; ++c) | |||||
| uprintf(stdout, c, format ? format : | uprintf(stdout, c, format ? format : | ||||
| "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | "%pH %s %C %c %UH %LH %TH %id %ix %ic %is %ib %ip %iP %ig %iA %ia %iu %il %P\n"); | ||||
| } | } |
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | */ | ||||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| /* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| * the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| */ | |||||
| #include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
| #include <stddef.h> | #include <stddef.h> | ||||
| // Unicode Character Data %s | |||||
| /* Unicode Character Data %s */ | |||||
| struct case_conversion_entry | struct case_conversion_entry | ||||
| { | { |
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | */ | ||||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| /* NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| * the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| */ | |||||
| #include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
| #define Zs UCD_CATEGORY_Zs | #define Zs UCD_CATEGORY_Zs | ||||
| #define Ii UCD_CATEGORY_Ii | #define Ii UCD_CATEGORY_Ii | ||||
| // Unicode Character Data %s | |||||
| /* Unicode Character Data %s */ | |||||
| """ % ucd_version) | """ % ucd_version) | ||||
| for category in special_categories: | for category in special_categories: | ||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoint, table in sorted(category_tables[table_index].items()): | for codepoint, table in sorted(category_tables[table_index].items()): | ||||
| if isinstance(table, str): | if isinstance(table, str): | ||||
| sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint)) | |||||
| sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint)) | |||||
| else: | else: | ||||
| sys.stdout.write('\tcategories_%s,\n' % codepoint) | sys.stdout.write('\tcategories_%s,\n' % codepoint) | ||||
| sys.stdout.write('};\n') | sys.stdout.write('};\n') | ||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoints, category, comment in category_sets: | for codepoints, category, comment in category_sets: | ||||
| if category: | if category: | ||||
| sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment)) | |||||
| sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment)) | |||||
| else: | else: | ||||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||||
| sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||||
| sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
| sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
| sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') | ||||
| sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
| sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') | |||||
| sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n') | |||||
| sys.stdout.write('}\n') | sys.stdout.write('}\n') | ||||
| sys.stdout.write(""" | sys.stdout.write(""" |
| props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data | props += (2 ** 34) * data.get('Emoji_Presentation', 0) # emoji-data | ||||
| props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data | props += (2 ** 35) * data.get('Emoji_Modifier', 0) # emoji-data | ||||
| props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data | props += (2 ** 36) * data.get('Emoji_Modifier_Base', 0) # emoji-data | ||||
| props += (2 ** 37) * data.get('Regional_Indicator', 0) # PropList 10.0.0 | |||||
| props += (2 ** 38) * data.get('Emoji_Component', 0) # emoji-data 5.0 | |||||
| # eSpeak NG extended properties: | # eSpeak NG extended properties: | ||||
| props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | ||||
| props += (2 ** 53) * data.get('Punctuation_In_Word', 0) | props += (2 ** 53) * data.get('Punctuation_In_Word', 0) |
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | ||||
| */ | */ | ||||
| // NOTE: This file is automatically generated from the Scripts.txt file in | |||||
| // the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||||
| /* NOTE: This file is automatically generated from the Scripts.txt file in | |||||
| * the Unicode Character database by the ucd-tools/tools/scripts.py script. | |||||
| */ | |||||
| #include "ucd/ucd.h" | #include "ucd/ucd.h" | ||||
| #define Geok UCD_SCRIPT_Geok | #define Geok UCD_SCRIPT_Geok | ||||
| #define Geor UCD_SCRIPT_Geor | #define Geor UCD_SCRIPT_Geor | ||||
| #define Glag UCD_SCRIPT_Glag | #define Glag UCD_SCRIPT_Glag | ||||
| #define Gonm UCD_SCRIPT_Gonm | |||||
| #define Goth UCD_SCRIPT_Goth | #define Goth UCD_SCRIPT_Goth | ||||
| #define Gran UCD_SCRIPT_Gran | #define Gran UCD_SCRIPT_Gran | ||||
| #define Grek UCD_SCRIPT_Grek | #define Grek UCD_SCRIPT_Grek | ||||
| #define Sind UCD_SCRIPT_Sind | #define Sind UCD_SCRIPT_Sind | ||||
| #define Sinh UCD_SCRIPT_Sinh | #define Sinh UCD_SCRIPT_Sinh | ||||
| #define Sora UCD_SCRIPT_Sora | #define Sora UCD_SCRIPT_Sora | ||||
| #define Soyo UCD_SCRIPT_Soyo | |||||
| #define Sund UCD_SCRIPT_Sund | #define Sund UCD_SCRIPT_Sund | ||||
| #define Sylo UCD_SCRIPT_Sylo | #define Sylo UCD_SCRIPT_Sylo | ||||
| #define Syrc UCD_SCRIPT_Syrc | #define Syrc UCD_SCRIPT_Syrc | ||||
| #define Xpeo UCD_SCRIPT_Xpeo | #define Xpeo UCD_SCRIPT_Xpeo | ||||
| #define Xsux UCD_SCRIPT_Xsux | #define Xsux UCD_SCRIPT_Xsux | ||||
| #define Yiii UCD_SCRIPT_Yiii | #define Yiii UCD_SCRIPT_Yiii | ||||
| #define Zanb UCD_SCRIPT_Zanb | |||||
| #define Zinh UCD_SCRIPT_Zinh | #define Zinh UCD_SCRIPT_Zinh | ||||
| #define Zmth UCD_SCRIPT_Zmth | #define Zmth UCD_SCRIPT_Zmth | ||||
| #define Zsym UCD_SCRIPT_Zsym | #define Zsym UCD_SCRIPT_Zsym | ||||
| #define Zyyy UCD_SCRIPT_Zyyy | #define Zyyy UCD_SCRIPT_Zyyy | ||||
| #define Zzzz UCD_SCRIPT_Zzzz | #define Zzzz UCD_SCRIPT_Zzzz | ||||
| // Unicode Character Data %s | |||||
| /* Unicode Character Data %s */ | |||||
| """ % ucd_version) | """ % ucd_version) | ||||
| for script in special_scripts: | for script in special_scripts: | ||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoint, table in sorted(script_tables[table_index].items()): | for codepoint, table in sorted(script_tables[table_index].items()): | ||||
| if isinstance(table, str): | if isinstance(table, str): | ||||
| sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint)) | |||||
| sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint)) | |||||
| else: | else: | ||||
| sys.stdout.write('\tscripts_%s,\n' % codepoint) | sys.stdout.write('\tscripts_%s,\n' % codepoint) | ||||
| sys.stdout.write('};\n') | sys.stdout.write('};\n') | ||||
| sys.stdout.write('{\n') | sys.stdout.write('{\n') | ||||
| for codepoints, script, comment in script_sets: | for codepoints, script, comment in script_sets: | ||||
| if script: | if script: | ||||
| sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment)) | |||||
| sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment)) | |||||
| else: | else: | ||||
| sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) | |||||
| sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints)) | |||||
| sys.stdout.write('\t{\n') | sys.stdout.write('\t{\n') | ||||
| sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) | ||||
| sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') | ||||
| sys.stdout.write('\t}\n') | sys.stdout.write('\t}\n') | ||||
| sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n') | |||||
| sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n') | |||||
| sys.stdout.write('}\n') | sys.stdout.write('}\n') |