Browse Source

Add Other_Lowercase support from PropList.txt.

master
Reece H. Dunn 8 years ago
parent
commit
6a2a87e358
3 changed files with 33 additions and 9 deletions
  1. 2
    0
      src/include/ucd/ucd.h
  2. 30
    9
      src/proplist.c
  3. 1
    0
      tools/printdata.py

+ 2
- 0
src/include/ucd/ucd.h View File

UCD_PROPERTY_IDEOGRAPHIC = 0x00001000, /**< @brief Ideographic PropList */ UCD_PROPERTY_IDEOGRAPHIC = 0x00001000, /**< @brief Ideographic PropList */
UCD_PROPERTY_DIACRITIC = 0x00002000, /**< @brief Diacritic PropList */ UCD_PROPERTY_DIACRITIC = 0x00002000, /**< @brief Diacritic PropList */
UCD_PROPERTY_EXTENDER = 0x00004000, /**< @brief Extender PropList */ UCD_PROPERTY_EXTENDER = 0x00004000, /**< @brief Extender PropList */
UCD_PROPERTY_OTHER_LOWERCASE = 0x00008000, /**< @brief Other_Lowercase PropList */
} ucd_property; } ucd_property;


/** @brief Return the properties of the specified codepoint. /** @brief Return the properties of the specified codepoint.
Ideographic = UCD_PROPERTY_IDEOGRAPHIC, /**< @brief Ideographic PropList */ Ideographic = UCD_PROPERTY_IDEOGRAPHIC, /**< @brief Ideographic PropList */
Diacritic = UCD_PROPERTY_DIACRITIC, /**< @brief Diacritic PropList */ Diacritic = UCD_PROPERTY_DIACRITIC, /**< @brief Diacritic PropList */
Extender = UCD_PROPERTY_EXTENDER, /**< @brief Extender PropList */ Extender = UCD_PROPERTY_EXTENDER, /**< @brief Extender PropList */
Other_Lowercase = UCD_PROPERTY_OTHER_LOWERCASE, /**< @brief Other_Lowercase PropList */
}; };


/** @brief Return the properties of the specified codepoint. /** @brief Return the properties of the specified codepoint.

+ 30
- 9
src/proplist.c View File

switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x0200: case 0x0200:
if (c >= 0x02B0 && c <= 0x02C1) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02B0 && c <= 0x02B8) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x02B9 && c <= 0x02BF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02C0 && c <= 0x02C1) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x02C6 && c <= 0x02CF) return UCD_PROPERTY_DIACRITIC; if (c >= 0x02C6 && c <= 0x02CF) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02D0 && c <= 0x02D1) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER; if (c >= 0x02D0 && c <= 0x02D1) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER;
if (c >= 0x02E0 && c <= 0x02E4) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x02E0 && c <= 0x02E4) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0x02EC) return UCD_PROPERTY_DIACRITIC; if (c == 0x02EC) return UCD_PROPERTY_DIACRITIC;
if (c == 0x02EE) return UCD_PROPERTY_DIACRITIC; if (c == 0x02EE) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x0300: case 0x0300:
if (c == 0x0374) return UCD_PROPERTY_DIACRITIC; if (c == 0x0374) return UCD_PROPERTY_DIACRITIC;
if (c == 0x037A) return UCD_PROPERTY_DIACRITIC;
if (c == 0x037A) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0x0500: case 0x0500:
if (c == 0x0559) return UCD_PROPERTY_DIACRITIC; if (c == 0x0559) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1C78 && c <= 0x1C7D) return UCD_PROPERTY_DIACRITIC; if (c >= 0x1C78 && c <= 0x1C7D) return UCD_PROPERTY_DIACRITIC;
break; break;
case 0x1D00: case 0x1D00:
if (c >= 0x1D2C && c <= 0x1D6A) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x1D2C && c <= 0x1D6A) return UCD_PROPERTY_OTHER_LOWERCASE | UCD_PROPERTY_DIACRITIC;
if (c == 0x1D78) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x1D9B && c <= 0x1DBF) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2000:
if (c == 0x2071) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0x207F) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x2090 && c <= 0x209C) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2C00:
if (c >= 0x2C7C && c <= 0x2C7D) return UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0x2E00: case 0x2E00:
if (c == 0x2E2F) return UCD_PROPERTY_DIACRITIC; if (c == 0x2E2F) return UCD_PROPERTY_DIACRITIC;
case 0xA600: case 0xA600:
if (c == 0xA60C) return UCD_PROPERTY_EXTENDER; if (c == 0xA60C) return UCD_PROPERTY_EXTENDER;
if (c == 0xA67F) return UCD_PROPERTY_DIACRITIC; if (c == 0xA67F) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA69C && c <= 0xA69D) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA69C && c <= 0xA69D) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0xA700: case 0xA700:
if (c >= 0xA717 && c <= 0xA71F) return UCD_PROPERTY_DIACRITIC; if (c >= 0xA717 && c <= 0xA71F) return UCD_PROPERTY_DIACRITIC;
if (c == 0xA770) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0xA788) return UCD_PROPERTY_DIACRITIC; if (c == 0xA788) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA7F8 && c <= 0xA7F9) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xA7F8 && c <= 0xA7F9) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0xA900: case 0xA900:
if (c == 0xA9CF) return UCD_PROPERTY_EXTENDER; if (c == 0xA9CF) return UCD_PROPERTY_EXTENDER;
if (c >= 0xAAF3 && c <= 0xAAF4) return UCD_PROPERTY_EXTENDER; if (c >= 0xAAF3 && c <= 0xAAF4) return UCD_PROPERTY_EXTENDER;
break; break;
case 0xAB00: case 0xAB00:
if (c >= 0xAB5C && c <= 0xAB5F) return UCD_PROPERTY_DIACRITIC;
if (c >= 0xAB5C && c <= 0xAB5F) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0xFF00: case 0xFF00:
if (c == 0xFF70) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER; if (c == 0xFF70) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER;
{ {
switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x0000:
if (c == 0x00AA) return UCD_PROPERTY_OTHER_LOWERCASE;
if (c == 0x00BA) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x2100: case 0x2100:
if (c >= 0x2135 && c <= 0x2138) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x2135 && c <= 0x2138) return UCD_PROPERTY_OTHER_MATH;
break; break;
{ {
case 0x0300: case 0x0300:
if (c >= 0x0300 && c <= 0x0344) return UCD_PROPERTY_DIACRITIC; if (c >= 0x0300 && c <= 0x0344) return UCD_PROPERTY_DIACRITIC;
if (c == 0x0345) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_ALPHABETIC;
if (c == 0x0345) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_LOWERCASE;
if (c >= 0x0346 && c <= 0x034E) return UCD_PROPERTY_DIACRITIC; if (c >= 0x0346 && c <= 0x034E) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x0350 && c <= 0x0357) return UCD_PROPERTY_DIACRITIC; if (c >= 0x0350 && c <= 0x0357) return UCD_PROPERTY_DIACRITIC;
if (c >= 0x035D && c <= 0x0362) return UCD_PROPERTY_DIACRITIC; if (c >= 0x035D && c <= 0x0362) return UCD_PROPERTY_DIACRITIC;
{ {
switch (c & 0xFFFFFF00) switch (c & 0xFFFFFF00)
{ {
case 0x2100:
if (c >= 0x2170 && c <= 0x217F) return UCD_PROPERTY_OTHER_LOWERCASE;
break;
case 0x3000: case 0x3000:
if (c == 0x3007) return UCD_PROPERTY_IDEOGRAPHIC; if (c == 0x3007) return UCD_PROPERTY_IDEOGRAPHIC;
if (c >= 0x3021 && c <= 0x3029) return UCD_PROPERTY_IDEOGRAPHIC; if (c >= 0x3021 && c <= 0x3029) return UCD_PROPERTY_IDEOGRAPHIC;
if (c == 0x23E2) return UCD_PROPERTY_OTHER_MATH; if (c == 0x23E2) return UCD_PROPERTY_OTHER_MATH;
break; break;
case 0x2400: case 0x2400:
if (c >= 0x24B6 && c <= 0x24E9) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x24B6 && c <= 0x24CF) return UCD_PROPERTY_OTHER_ALPHABETIC;
if (c >= 0x24D0 && c <= 0x24E9) return UCD_PROPERTY_OTHER_ALPHABETIC | UCD_PROPERTY_OTHER_LOWERCASE;
break; break;
case 0x2500: case 0x2500:
if (c >= 0x25A0 && c <= 0x25A1) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x25A0 && c <= 0x25A1) return UCD_PROPERTY_OTHER_MATH;

+ 1
- 0
tools/printdata.py View File

props += 4096 * data.get('Ideographic', 0) props += 4096 * data.get('Ideographic', 0)
props += 8192 * data.get('Diacritic', 0) props += 8192 * data.get('Diacritic', 0)
props += 16384 * data.get('Extender', 0) props += 16384 * data.get('Extender', 0)
props += 32768 * data.get('Other_Lowercase', 0)
return props return props


if __name__ == '__main__': if __name__ == '__main__':

Loading…
Cancel
Save