| // ignore these characters | // ignore these characters | ||||
| static const unsigned short chars_ignore_default[] = { | static const unsigned short chars_ignore_default[] = { | ||||
| 0xad, 1, // soft hyphen | |||||
| 0x200c, 1, // zero width non-joiner | |||||
| 0x200d, 1, // zero width joiner | |||||
| 0, 0 | |||||
| // U+00AD SOFT HYPHEN | |||||
| // Used to mark hyphenation points in words for where to split a | |||||
| // word at the end of a line to provide readable justified text. | |||||
| 0xad, 1, | |||||
| // U+200C ZERO WIDTH NON-JOINER | |||||
| // Used to prevent combined ligatures being displayed in their | |||||
| // combined form. | |||||
| 0x200c, 1, | |||||
| // U+200D ZERO WIDTH JOINER | |||||
| // Used to indicate an alternative connected form made up of the | |||||
| // characters surrounding the ZWJ in Devanagari, Kannada, Malayalam | |||||
| // and Emoji. | |||||
| 0x200d, 1, | |||||
| // End of the ignored character list. | |||||
| 0, 0 | |||||
| }; | }; | ||||
| // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) | // alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) | ||||
| static const unsigned short chars_ignore_zwnj_hyphen[] = { | static const unsigned short chars_ignore_zwnj_hyphen[] = { | ||||
| 0xad, 1, // soft hyphen | |||||
| 0x640, 1, // igniore Arabic Tatweel (lang=FA) | |||||
| 0x200c, '-', // zero width non-joiner, replace with hyphen | |||||
| 0x200d, 1, // zero width joiner | |||||
| 0, 0 | |||||
| // U+00AD SOFT HYPHEN | |||||
| // Used to mark hyphenation points in words for where to split a | |||||
| // word at the end of a line to provide readable justified text. | |||||
| 0xad, 1, | |||||
| // U+0640 TATWEEL (KASHIDA) | |||||
| // Used in Arabic scripts to stretch characters for justifying | |||||
| // the text. | |||||
| 0x640, 1, | |||||
| // U+200C ZERO WIDTH NON-JOINER | |||||
| // Used to prevent combined ligatures being displayed in their | |||||
| // combined form. | |||||
| 0x200c, '-', | |||||
| // U+200D ZERO WIDTH JOINER | |||||
| // Used to indicate an alternative connected form made up of the | |||||
| // characters surrounding the ZWJ in Devanagari, Kannada, Malayalam | |||||
| // and Emoji. | |||||
| 0x200d, 1, | |||||
| // End of the ignored character list. | |||||
| 0, 0 | |||||
| }; | }; | ||||
| const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 | const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 |