Browse Source

Add Ideographic support from PropList.txt.

master
Reece H. Dunn 8 years ago
parent
commit
3ff6d8ae0f
3 changed files with 47 additions and 2 deletions
  1. 2
    0
      src/include/ucd/ucd.h
  2. 44
    2
      src/proplist.c
  3. 1
    0
      tools/printdata.py

+ 2
- 0
src/include/ucd/ucd.h View File

@@ -341,6 +341,7 @@ typedef enum ucd_property_
UCD_PROPERTY_HEX_DIGIT = 0x00000200, /**< @brief Hex_Digit PropList */
UCD_PROPERTY_ASCII_HEX_DIGIT = 0x00000400, /**< @brief ASCII_Hex_Digit PropList */
UCD_PROPERTY_OTHER_ALPHABETIC = 0x00000800, /**< @brief Other_Alphabetic PropList */
UCD_PROPERTY_IDEOGRAPHIC = 0x00001000, /**< @brief Ideographic PropList */
} ucd_property;

/** @brief Return the properties of the specified codepoint.
@@ -812,6 +813,7 @@ namespace ucd
Hex_Digit = UCD_PROPERTY_HEX_DIGIT, /**< @brief Hex_Digit PropList */
ASCII_Hex_Digit = UCD_PROPERTY_ASCII_HEX_DIGIT, /**< @brief ASCII_Hex_Digit PropList */
Other_Alphabetic = UCD_PROPERTY_OTHER_ALPHABETIC, /**< @brief Other_Alphabetic PropList */
Ideographic = UCD_PROPERTY_IDEOGRAPHIC, /**< @brief Ideographic PropList */
};

/** @brief Return the properties of the specified codepoint.

+ 44
- 2
src/proplist.c View File

@@ -111,12 +111,40 @@ static int properties_Lo(codepoint_t c)
case 0x2100:
if (c >= 0x2135 && c <= 0x2138) return UCD_PROPERTY_OTHER_MATH;
break;
case 0x3000:
if (c == 0x3006) return UCD_PROPERTY_IDEOGRAPHIC;
break;
case 0x01EE00:
return UCD_PROPERTY_OTHER_MATH;
}
return 0;
}

static int properties_Lo_ideographic(codepoint_t c)
{
switch (c & 0xFFFF0000)
{
case 0x000000:
if (c >= 0x3400 && c <= 0x4DB5) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x4E00 && c <= 0x9FD5) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0xF900 && c <= 0xFA6D) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0xFA70 && c <= 0xFAD9) return UCD_PROPERTY_IDEOGRAPHIC;
break;
case 0x010000:
if (c >= 0x017000 && c <= 0x0187EC) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x018800 && c <= 0x018AF2) return UCD_PROPERTY_IDEOGRAPHIC;
break;
case 0x020000:
if (c >= 0x020000 && c <= 0x02A6D6) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x02A700 && c <= 0x02B734) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x02B740 && c <= 0x02B81D) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x02B820 && c <= 0x02CEA1) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x02F800 && c <= 0x02FA1D) return UCD_PROPERTY_IDEOGRAPHIC;
break;
}
return 0;
}

static int properties_Lu(codepoint_t c)
{
switch (c & 0xFFFFFF00)
@@ -692,6 +720,19 @@ static int properties_Nd(codepoint_t c)
return 0;
}

static int properties_Nl(codepoint_t c)
{
switch (c & 0xFFFFFF00)
{
case 0x3000:
if (c == 0x3007) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x3021 && c <= 0x3029) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x3038 && c <= 0x303A) return UCD_PROPERTY_IDEOGRAPHIC;
break;
}
return 0;
}

static int properties_Pc(codepoint_t c)
{
switch (c & 0xFFFFFF00)
@@ -1159,11 +1200,12 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category)
case UCD_CATEGORY_Cc: return properties_Cc(c);
case UCD_CATEGORY_Cf: return properties_Cf(c);
case UCD_CATEGORY_Ll: return properties_Ll(c);
case UCD_CATEGORY_Lo: return properties_Lo(c);
case UCD_CATEGORY_Lo: return properties_Lo(c) | properties_Lo_ideographic(c);
case UCD_CATEGORY_Lu: return properties_Lu(c);
case UCD_CATEGORY_Mc: return properties_Mc(c);
case UCD_CATEGORY_Mn: return properties_Mn(c);
case UCD_CATEGORY_Nd: return properties_Nd(c);
case UCD_CATEGORY_Nl: return properties_Nl(c);
case UCD_CATEGORY_Pc: return properties_Pc(c);
case UCD_CATEGORY_Pd: return properties_Pd(c);
case UCD_CATEGORY_Pe: return properties_Pe(c);
@@ -1177,6 +1219,6 @@ ucd_property ucd_properties(codepoint_t c, ucd_category category)
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE;
case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Cn Co Cs Ii Lm Lt Me Nl No Sc
default: return 0; // Cn Co Cs Ii Lm Lt Me No Sc
};
}

+ 1
- 0
tools/printdata.py View File

@@ -133,6 +133,7 @@ def properties(data):
props += 512 * data.get('Hex_Digit', 0)
props += 1024 * data.get('ASCII_Hex_Digit', 0)
props += 2048 * data.get('Other_Alphabetic', 0)
props += 4096 * data.get('Ideographic', 0)
return props

if __name__ == '__main__':

Loading…
Cancel
Save