Browse Source

isalpha: use the Alphabetic derived core property to check that the character is in the alpha class.

master
Reece H. Dunn 8 years ago
parent
commit
cbddf092c9
2 changed files with 261 additions and 6 deletions
  1. 260
    2
      src/ctype.c
  2. 1
    4
      tools/printdata.py

+ 260
- 2
src/ctype.c View File

@@ -20,6 +20,259 @@

#include "ucd/ucd.h"

static int other_alphabetic_MnMcSo(codepoint_t c)
{
switch (c & 0xFFFFFF00)
{
case 0x0300:
return c == 0x0345;
case 0x0500:
return (c >= 0x05B0 && c <= 0x05BD)
|| c == 0x05BF
|| (c >= 0x05C1 && c <= 0x05C2)
|| (c >= 0x05C4 && c <= 0x05C5)
|| c == 0x05C7;
case 0x0600:
return (c >= 0x0610 && c <= 0x061A)
|| (c >= 0x064B && c <= 0x0657)
|| (c >= 0x0659 && c <= 0x065F)
|| c == 0x0670
|| (c >= 0x06D6 && c <= 0x06DC)
|| (c >= 0x06E1 && c <= 0x06E4)
|| (c >= 0x06E7 && c <= 0x06E8)
|| c == 0x06ED;
case 0x0700:
return c == 0x0711
|| (c >= 0x0730 && c <= 0x073F)
|| (c >= 0x07A6 && c <= 0x07B0);
case 0x0800:
return (c >= 0x0816 && c <= 0x0817)
|| (c >= 0x081B && c <= 0x0823)
|| (c >= 0x0825 && c <= 0x0827)
|| (c >= 0x0829 && c <= 0x082C)
|| (c >= 0x08D4 && c <= 0x08DF)
|| (c >= 0x08E3 && c <= 0x08E9)
|| c >= 0x08F0;
case 0x0900:
return (c >= 0x0900 && c <= 0x0903)
|| (c >= 0x093A && c <= 0x093B)
|| (c >= 0x093E && c <= 0x094C)
|| (c >= 0x094E && c <= 0x094F)
|| (c >= 0x0955 && c <= 0x0957)
|| (c >= 0x0962 && c <= 0x0963)
|| (c >= 0x0981 && c <= 0x0983)
|| (c >= 0x0981 && c <= 0x0983)
|| (c >= 0x09BE && c <= 0x09C4)
|| (c >= 0x09C7 && c <= 0x09C8)
|| (c >= 0x09CB && c <= 0x09CC)
|| c == 0x09D7
|| (c >= 0x09E2 && c <= 0x09E3);
case 0x0A00:
return (c >= 0x0A01 && c <= 0x0A03)
|| (c >= 0x0A3E && c <= 0x0A42)
|| (c >= 0x0A47 && c <= 0x0A48)
|| (c >= 0x0A4B && c <= 0x0A4C)
|| c == 0x0A51
|| (c >= 0x0A70 && c <= 0x0A71)
|| c == 0x0A75
|| (c >= 0x0A81 && c <= 0x0A83)
|| (c >= 0x0ABE && c <= 0x0AC5)
|| (c >= 0x0AC7 && c <= 0x0AC9)
|| (c >= 0x0ACB && c <= 0x0ACC)
|| (c >= 0x0AE2 && c <= 0x0AE3);
case 0x0B00:
return (c >= 0x0B01 && c <= 0x0B03)
|| (c >= 0x0B3E && c <= 0x0B44)
|| (c >= 0x0B47 && c <= 0x0B48)
|| (c >= 0x0B4B && c <= 0x0B4C)
|| (c >= 0x0B56 && c <= 0x0B57)
|| (c >= 0x0B62 && c <= 0x0B63)
|| c == 0x0B82
|| (c >= 0x0BBE && c <= 0x0BC2)
|| (c >= 0x0BC6 && c <= 0x0BC8)
|| (c >= 0x0BCA && c <= 0x0BCC)
|| c == 0x0BD7;
case 0x0C00:
return (c >= 0x0C00 && c <= 0x0C03)
|| (c >= 0x0C3E && c <= 0x0C44)
|| (c >= 0x0C46 && c <= 0x0C48)
|| (c >= 0x0C4A && c <= 0x0C4C)
|| (c >= 0x0C55 && c <= 0x0C56)
|| (c >= 0x0C62 && c <= 0x0C63)
|| (c >= 0x0C81 && c <= 0x0C83)
|| (c >= 0x0CBE && c <= 0x0CBF)
|| (c >= 0x0CC0 && c <= 0x0CC4)
|| (c >= 0x0CC6 && c <= 0x0CC8)
|| (c >= 0x0CCA && c <= 0x0CCC)
|| (c >= 0x0CD5 && c <= 0x0CD6)
|| (c >= 0x0CE2 && c <= 0x0CE3);
case 0x0D00:
return (c >= 0x0D01 && c <= 0x0D03)
|| (c >= 0x0D3E && c <= 0x0D44)
|| (c >= 0x0D46 && c <= 0x0D48)
|| (c >= 0x0D4A && c <= 0x0D4C)
|| c == 0x0D57
|| (c >= 0x0D62 && c <= 0x0D63)
|| (c >= 0x0D82 && c <= 0x0D83)
|| (c >= 0x0DCF && c <= 0x0DD4)
|| c == 0x0DD6
|| (c >= 0x0DD8 && c <= 0x0DDF)
|| (c >= 0x0DF2 && c <= 0x0DF3);
case 0x0E00:
return c == 0x0E31
|| (c >= 0x0E34 && c <= 0x0E3A)
|| c == 0x0E4D
|| c == 0x0EB1
|| (c >= 0x0EB4 && c <= 0x0EB9)
|| (c >= 0x0EBB && c <= 0x0EBD)
|| c == 0x0ECD;
case 0x0F00:
return (c >= 0x0F71 && c <= 0x0F7F)
|| (c >= 0x0F80 && c <= 0x0F81)
|| (c >= 0x0F8D && c <= 0x0F97)
|| (c >= 0x0F99 && c <= 0x0FBC);
case 0x1000:
return (c >= 0x102B && c <= 0x1036)
|| c == 0x1038
|| (c >= 0x103B && c <= 0x103E)
|| (c >= 0x1056 && c <= 0x1059)
|| (c >= 0x105E && c <= 0x1060)
|| c == 0x1062
|| (c >= 0x1067 && c <= 0x1068)
|| (c >= 0x1071 && c <= 0x1074)
|| (c >= 0x1082 && c <= 0x1086)
|| (c >= 0x109C && c <= 0x109D);
case 0x1300:
return c == 0x135F;
case 0x1700:
return (c >= 0x1712 && c <= 0x1713)
|| (c >= 0x1732 && c <= 0x1733)
|| (c >= 0x1752 && c <= 0x1753)
|| (c >= 0x1772 && c <= 0x1773)
|| (c >= 0x17B6 && c <= 0x17C8);
case 0x1800:
return (c >= 0x1885 && c <= 0x1886)
|| c == 0x18A9;
case 0x1900:
return (c >= 0x1920 && c <= 0x192B)
|| (c >= 0x1930 && c <= 0x1938);
case 0x1A00:
return (c >= 0x1A17 && c <= 0x1A1B)
|| (c >= 0x1A55 && c <= 0x1A5E)
|| (c >= 0x1A61 && c <= 0x1A74);
case 0x1B00:
return (c >= 0x1B00 && c <= 0x1B04)
|| (c >= 0x1B35 && c <= 0x1B43)
|| (c >= 0x1B80 && c <= 0x1B82)
|| (c >= 0x1BA1 && c <= 0x1BA9)
|| (c >= 0x1BAC && c <= 0x1BAD)
|| (c >= 0x1BE7 && c <= 0x1BF1);
case 0x1C00:
return (c >= 0x1C24 && c <= 0x1C35)
|| (c >= 0x1CF2 && c <= 0x1CF3);
case 0x1D00:
return (c >= 0x1DE7 && c <= 0x1DF4);
case 0x2400:
return (c >= 0x24B6 && c <= 0x24E9);
case 0x2D00:
return (c >= 0x2DE0 && c <= 0x2DFF);
case 0xA600:
return (c >= 0xA674 && c <= 0xA67B)
|| (c >= 0xA69E && c <= 0xA69F);
case 0xA800:
return (c >= 0xA823 && c <= 0xA827)
|| (c >= 0xA880 && c <= 0xA881)
|| (c >= 0xA8B4 && c <= 0xA8C3)
|| c == 0xA8C5;
case 0xA900:
return (c >= 0xA926 && c <= 0xA92A)
|| (c >= 0xA947 && c <= 0xA952)
|| (c >= 0xA980 && c <= 0xA983)
|| (c >= 0xA9B4 && c <= 0xA9BF);
case 0xAA00:
return (c >= 0xAA29 && c <= 0xAA36)
|| c == 0xAA43
|| (c >= 0xAA4C && c <= 0xAA4D)
|| c == 0xAAB0
|| (c >= 0xAAB2 && c <= 0xAAB4)
|| (c >= 0xAAB7 && c <= 0xAAB8)
|| c == 0xAABE
|| (c >= 0xAAEB && c <= 0xAAEF)
|| c == 0xAAF5;
case 0xAB00:
return (c >= 0xABE3 && c <= 0xABEA);
case 0xFB00:
return c == 0xFB1E;
case 0x10300:
return (c >= 0x10376 && c <= 0x1037A);
case 0x10A00:
return (c >= 0x10A01 && c <= 0x10A03)
|| (c >= 0x10A05 && c <= 0x10A06)
|| (c >= 0x10A0C && c <= 0x10A0F);
case 0x11000:
return (c >= 0x11000 && c <= 0x11002)
|| (c >= 0x11038 && c <= 0x11045)
|| c == 0x11082
|| (c >= 0x110B0 && c <= 0x110B8);
case 0x11100:
return (c >= 0x11100 && c <= 0x11102)
|| (c >= 0x11127 && c <= 0x11132)
|| (c >= 0x11180 && c <= 0x11182)
|| (c >= 0x111B3 && c <= 0x111BF);
case 0x11200:
return (c >= 0x1122C && c <= 0x11234)
|| c == 0x11237
|| c == 0x1123E
|| (c >= 0x112DF && c <= 0x112E8);
case 0x11300:
return (c >= 0x11300 && c <= 0x11303)
|| (c >= 0x1133E && c <= 0x11344)
|| (c >= 0x11347 && c <= 0x11348)
|| (c >= 0x1134B && c <= 0x1134C)
|| c == 0x11357
|| (c >= 0x11362 && c <= 0x11363);
case 0x11400:
return (c >= 0x11435 && c <= 0x11441)
|| (c >= 0x11443 && c <= 0x11445)
|| (c >= 0x114B0 && c <= 0x114C1);
case 0x11500:
return (c >= 0x115AF && c <= 0x115B5)
|| (c >= 0x115B8 && c <= 0x115BE)
|| (c >= 0x115DC && c <= 0x115DD);
case 0x11600:
return (c >= 0x11630 && c <= 0x1163E)
|| c == 0x11640
|| (c >= 0x116AB && c <= 0x116B5);
case 0x11700:
return (c >= 0x1171D && c <= 0x1172A);
case 0x11C00:
return (c >= 0x11C2F && c <= 0x11C36)
|| (c >= 0x11C38 && c <= 0x11C3E)
|| (c >= 0x11C92 && c <= 0x11CA7)
|| (c >= 0x11CA9 && c <= 0x11CB6);
case 0x16B00:
return (c >= 0x16B30 && c <= 0x16B36);
case 0x16F00:
return (c >= 0x16F51 && c <= 0x16F7E);
case 0x1BC00:
return c == 0x1BC9E;
case 0x1E000:
return (c >= 0x1E000 && c <= 0x1E006)
|| (c >= 0x1E008 && c <= 0x1E018)
|| (c >= 0x1E01B && c <= 0x1E021)
|| (c >= 0x1E023 && c <= 0x1E024)
|| (c >= 0x1E026 && c <= 0x1E02A);
case 0x1E900:
return c == 0x1E947;
case 0x1F100:
return (c >= 0x01F130 && c <= 0x01F149)
|| (c >= 0x01F150 && c <= 0x01F169)
|| (c >= 0x01F170 && c <= 0x01F189);
default:
return 0;
}
}

int ucd_isalnum(codepoint_t c)
{
switch (ucd_lookup_category(c))
@@ -42,12 +295,17 @@ int ucd_isalpha(codepoint_t c)
{
switch (ucd_lookup_category(c))
{
case UCD_CATEGORY_Lu:
case UCD_CATEGORY_Ll:
case UCD_CATEGORY_Lt:
case UCD_CATEGORY_Lm:
case UCD_CATEGORY_Lo:
case UCD_CATEGORY_Lt:
case UCD_CATEGORY_Lu:
case UCD_CATEGORY_Nl:
return 1;
case UCD_CATEGORY_Mn:
case UCD_CATEGORY_Mc:
case UCD_CATEGORY_So:
return other_alphabetic_MnMcSo(c);
default:
return 0;
}

+ 1
- 4
tools/printdata.py View File

@@ -95,10 +95,7 @@ def isalnum(data):
return 0

def isalpha(data):
if data.get('GeneralCategory', 'Cn')[0] in 'L':
return 1
else:
return 0
return data.get('Alphabetic', 0)

def isupper(data):
if data.get('Uppercase', 0):

Loading…
Cancel
Save