Browse Source

Add Other_Lowercase support from PropList.txt.

master
Reece H. Dunn 8 years ago
parent
commit
6a2a87e358
3 changed files with 33 additions and 9 deletions
  1. 2
    0
      src/include/ucd/ucd.h
  2. 30
    9
      src/proplist.c
  3. 1
    0
      tools/printdata.py

+ 2
- 0
src/include/ucd/ucd.h View File

@@ -344,6 +344,7 @@ typedef enum ucd_property_
UCD_PROPERTY_IDEOGRAPHIC = 0x00001000, /**< @brief Ideographic PropList */
UCD_PROPERTY_DIACRITIC = 0x00002000, /**< @brief Diacritic PropList */
UCD_PROPERTY_EXTENDER = 0x00004000, /**< @brief Extender PropList */
UCD_PROPERTY_OTHER_LOWERCASE = 0x00008000, /**< @brief Other_Lowercase PropList */
} ucd_property;

/** @brief Return the properties of the specified codepoint.
@@ -818,6 +819,7 @@ namespace ucd
Ideographic = UCD_PROPERTY_IDEOGRAPHIC, /**< @brief Ideographic PropList */
Diacritic = UCD_PROPERTY_DIACRITIC, /**< @brief Diacritic PropList */
Extender = UCD_PROPERTY_EXTENDER, /**< @brief Extender PropList */
Other_Lowercase = UCD_PROPERTY_OTHER_LOWERCASE, /**< @brief Other_Lowercase PropList */
};

/** @brief Return the properties of the specified codepoint.

+ 30
- 9
src/proplist.c View File

@@ -109,16 +109,18 @@ static int properties_Lm(codepoint_t c)
switch (c & 0xFFFFFF00)
{
case 0x0200:
if (c >= 0x02B0 && c <= 0x02C1) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02B0 && c <= 0x02B8) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x02B9 && c <= 0x02BF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02C0 && c <= 0x02C1) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x02C6 && c <= 0x02CF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02D0 && c <= 0x02D1) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER;
if (c >= 0x02E0 && c <= 0x02E4) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02E0 && c <= 0x02E4) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0x02EC) return UCD_PROPERTY_DIACRITIC;
if (c == 0x02EE) return UCD_PROPERTY_DIACRITIC;
break;
case 0x0300:
if (c == 0x0374) return UCD_PROPERTY_DIACRITIC;
if (c == 0x037A) return UCD_PROPERTY_DIACRITIC;
if (c == 0x037A) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x0500:
if (c == 0x0559) return UCD_PROPERTY_DIACRITIC;
@@ -149,7 +151,17 @@ static int properties_Lm(codepoint_t c)
if (c >= 0x1C78 && c <= 0x1C7D) return UCD_PROPERTY_DIACRITIC;
break;
case 0x1D00:
if (c >= 0x1D2C && c <= 0x1D6A) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1D2C && c <= 0x1D6A) return UCD_PROPERTY_OTHER_LOWERCASE | UCD_PROPERTY_DIACRITIC;
if (c == 0x1D78) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x1D9B && c <= 0x1DBF) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2000:
if (c == 0x2071) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0x207F) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x2090 && c <= 0x209C) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2C00:
if (c >= 0x2C7C && c <= 0x2C7D) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2E00:
if (c == 0x2E2F) return UCD_PROPERTY_DIACRITIC;
@@ -167,12 +179,13 @@ static int properties_Lm(codepoint_t c)
case 0xA600:
if (c == 0xA60C) return UCD_PROPERTY_EXTENDER;
if (c == 0xA67F) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA69C && c <= 0xA69D) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA69C && c <= 0xA69D) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0xA700:
if (c >= 0xA717 && c <= 0xA71F) return UCD_PROPERTY_DIACRITIC;
if (c == 0xA770) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0xA788) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA7F8 && c <= 0xA7F9) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA7F8 && c <= 0xA7F9) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0xA900:
if (c == 0xA9CF) return UCD_PROPERTY_EXTENDER;
@@ -184,7 +197,7 @@ static int properties_Lm(codepoint_t c)
if (c >= 0xAAF3 && c <= 0xAAF4) return UCD_PROPERTY_EXTENDER;
break;
case 0xAB00:
if (c >= 0xAB5C && c <= 0xAB5F) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xAB5C && c <= 0xAB5F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0xFF00:
if (c == 0xFF70) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER;
@@ -205,6 +218,10 @@ static int properties_Lo(codepoint_t c)
{
switch (c & 0xFFFFFF00)
{
case 0x0000:
if (c == 0x00AA) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0x00BA) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2100:
if (c >= 0x2135 && c <= 0x2138) return UCD_PROPERTY_OTHER_MATH;
break;
@@ -543,7 +560,7 @@ static int properties_Mn(codepoint_t c)
{
case 0x0300:
if (c >= 0x0300 && c <= 0x0344) return UCD_PROPERTY_DIACRITIC;
if (c == 0x0345) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0345) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x0346 && c <= 0x034E) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0350 && c <= 0x0357) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x035D && c <= 0x0362) return UCD_PROPERTY_DIACRITIC;
@@ -981,6 +998,9 @@ static int properties_Nl(codepoint_t c)
{
switch (c & 0xFFFFFF00)
{
case 0x2100:
if (c >= 0x2170 && c <= 0x217F) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x3000:
if (c == 0x3007) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x3021 && c <= 0x3029) return UCD_PROPERTY_IDEOGRAPHIC;
@@ -1449,7 +1469,8 @@ static int properties_So(codepoint_t c)
if (c == 0x23E2) return UCD_PROPERTY_OTHER_MATH;
break;
case 0x2400:
if (c >= 0x24B6 && c <= 0x24E9) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x24B6 && c <= 0x24CF) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x24D0 && c <= 0x24E9) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2500:
if (c >= 0x25A0 && c <= 0x25A1) return UCD_PROPERTY_OTHER_MATH;

+ 1
- 0
tools/printdata.py View File

@@ -136,6 +136,7 @@ def properties(data):
props += 4096 * data.get('Ideographic', 0)
props += 8192 * data.get('Diacritic', 0)
props += 16384 * data.get('Extender', 0)
props += 32768 * data.get('Other_Lowercase', 0)
return props

if __name__ == '__main__':

Loading…
Cancel
Save