Browse Source

Add Other_Grapheme_Extend support from PropList.txt.

master
Reece H. Dunn 8 years ago
parent
commit
fdf398ef84
3 changed files with 42 additions and 21 deletions
  1. 2
    0
      src/include/ucd/ucd.h
  2. 39
    21
      src/proplist.c
  3. 1
    0
      tools/printdata.py

+ 2
- 0
src/include/ucd/ucd.h View File

@@ -346,6 +346,7 @@ typedef enum ucd_property_
UCD_PROPERTY_OTHER_LOWERCASE = 0x00004000, /**< @brief Other_Lowercase */
UCD_PROPERTY_OTHER_UPPERCASE = 0x00008000, /**< @brief Other_Uppercase */
UCD_PROPERTY_NONCHARACTER_CODE_POINT = 0x00010000, /**< @brief Noncharacter_Code_Point */
UCD_PROPERTY_OTHER_GRAPHEME_EXTEND = 0x00020000, /**< @brief Other_Grapheme_Extend */
} ucd_property;

/** @brief Return the properties of the specified codepoint.
@@ -822,6 +823,7 @@ namespace ucd
Other_Lowercase = UCD_PROPERTY_OTHER_LOWERCASE, /**< @brief Other_Lowercase */
Other_Uppercase = UCD_PROPERTY_OTHER_UPPERCASE, /**< @brief Other_Uppercase */
Noncharacter_Code_Point = UCD_PROPERTY_NONCHARACTER_CODE_POINT, /**< @brief Noncharacter_Code_Point */
Other_Grapheme_Extend = UCD_PROPERTY_OTHER_GRAPHEME_EXTEND, /**< @brief Other_Grapheme_Extend */
};

/** @brief Return the properties of the specified codepoint.

+ 39
- 21
src/proplist.c View File

@@ -38,12 +38,16 @@ static int properties_Cf(codepoint_t c)
if (c == 0x061C) return UCD_PROPERTY_BIDI_CONTROL;
break;
case 0x2000:
if (c >= 0x200C && c <= 0x200D) return UCD_PROPERTY_JOIN_CONTROL;
if (c == 0x200C) return UCD_PROPERTY_JOIN_CONTROL | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x200D) return UCD_PROPERTY_JOIN_CONTROL;
if (c >= 0x200E && c <= 0x200F) return UCD_PROPERTY_BIDI_CONTROL;
if (c >= 0x202A && c <= 0x202E) return UCD_PROPERTY_BIDI_CONTROL;
if (c >= 0x2061 && c <= 0x2064) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2066 && c <= 0x2069) return UCD_PROPERTY_BIDI_CONTROL;
break;
case 0x0E0000:
if (c >= 0x0E0020 && c <= 0x0E007F) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
}
return 0;
}
@@ -208,7 +212,7 @@ static int properties_Lm(codepoint_t c)
break;
case 0xFF00:
if (c == 0xFF70) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER;
if (c >= 0xFF9E && c <= 0xFF9F) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xFF9E && c <= 0xFF9F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
case 0x016B00:
if (c >= 0x016B42 && c <= 0x016B43) return UCD_PROPERTY_EXTENDER;
@@ -349,10 +353,11 @@ static int properties_Mc(codepoint_t c)
if (c >= 0x0949 && c <= 0x094C) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x094E && c <= 0x094F) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0982 && c <= 0x0983) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x09BE && c <= 0x09C0) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x09BE) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x09BF && c <= 0x09C0) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x09C7 && c <= 0x09C8) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x09CB && c <= 0x09CC) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x09D7) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x09D7) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
case 0x0A00:
if (c == 0x0A03) return UCD_PROPERTY_OTHER_ALPHABETIC;
@@ -364,36 +369,42 @@ static int properties_Mc(codepoint_t c)
break;
case 0x0B00:
if (c >= 0x0B02 && c <= 0x0B03) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0B3E) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0B3E) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x0B40) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0B47 && c <= 0x0B48) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0B4B && c <= 0x0B4C) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0B57) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0BBE && c <= 0x0BBF) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0B57) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x0BBE) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x0BBF) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0BC1 && c <= 0x0BC2) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0BC6 && c <= 0x0BC8) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0BCA && c <= 0x0BCC) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0BD7) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0BD7) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
case 0x0C00:
if (c >= 0x0C01 && c <= 0x0C03) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0C41 && c <= 0x0C44) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0C82 && c <= 0x0C83) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0CBE) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CC0 && c <= 0x0CC4) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CC0 && c <= 0x0CC1) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0CC2) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0CC3 && c <= 0x0CC4) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CC7 && c <= 0x0CC8) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CCA && c <= 0x0CCB) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CD5 && c <= 0x0CD6) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0CD5 && c <= 0x0CD6) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
case 0x0D00:
if (c >= 0x0D02 && c <= 0x0D03) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D3E && c <= 0x0D40) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0D3E) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0D3F && c <= 0x0D40) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D46 && c <= 0x0D48) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0D4A && c <= 0x0D4C) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0D57) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0D57) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0D82 && c <= 0x0D83) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0DCF && c <= 0x0DD1) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0DD8 && c <= 0x0DDF) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0DCF) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0DD0 && c <= 0x0DD1) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0DD8 && c <= 0x0DDE) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0DDF) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0DF2 && c <= 0x0DF3) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x0F00:
@@ -455,7 +466,7 @@ static int properties_Mc(codepoint_t c)
if (c >= 0x1CF2 && c <= 0x1CF3) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x3000:
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x302E && c <= 0x302F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
case 0xA800:
if (c >= 0xA823 && c <= 0xA824) return UCD_PROPERTY_OTHER_ALPHABETIC;
@@ -510,25 +521,30 @@ static int properties_Mc(codepoint_t c)
break;
case 0x011300:
if (c >= 0x011302 && c <= 0x011303) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x01133E && c <= 0x01133F) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x01133E) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x01133F) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011341 && c <= 0x011344) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011347 && c <= 0x011348) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x01134B && c <= 0x01134C) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x01134D) return UCD_PROPERTY_DIACRITIC;
if (c == 0x011357) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011357) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x011362 && c <= 0x011363) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011400:
if (c >= 0x011435 && c <= 0x011437) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x011440 && c <= 0x011441) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x011445) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0114B0 && c <= 0x0114B2) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0114B0) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0114B1 && c <= 0x0114B2) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0114B9) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0114BB && c <= 0x0114BE) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0114BB && c <= 0x0114BC) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0114BD) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x0114BE) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0114C1) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x011500:
if (c >= 0x0115AF && c <= 0x0115B1) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0115AF) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c >= 0x0115B0 && c <= 0x0115B1) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x0115B8 && c <= 0x0115BB) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0115BE) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
@@ -552,7 +568,9 @@ static int properties_Mc(codepoint_t c)
if (c == 0x011CB4) return UCD_PROPERTY_OTHER_ALPHABETIC;
break;
case 0x01D100:
if (c >= 0x01D16D && c <= 0x01D172) return UCD_PROPERTY_DIACRITIC;
if (c == 0x01D165) return UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
if (c == 0x01D16D) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x01D16E && c <= 0x01D172) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_GRAPHEME_EXTEND;
break;
case 0x016F00:
if (c >= 0x016F51 && c <= 0x016F7E) return UCD_PROPERTY_OTHER_ALPHABETIC;

+ 1
- 0
tools/printdata.py View File

@@ -138,6 +138,7 @@ def properties(data):
props += (2 ** 14) * data.get('Other_Lowercase', 0)
props += (2 ** 15) * data.get('Other_Uppercase', 0)
props += (2 ** 16) * data.get('Noncharacter_Code_Point', 0)
props += (2 ** 17) * data.get('Other_Grapheme_Extend', 0)
return props

if __name__ == '__main__':

Loading…
Cancel
Save