|
|
@@ -158,19 +158,44 @@ static void SetLetterBitsRange(Translator *tr, int group, int first, int last) |
|
|
|
|
|
|
|
// ignore these characters |
|
|
|
static const unsigned short chars_ignore_default[] = { |
|
|
|
0xad, 1, // soft hyphen |
|
|
|
0x200c, 1, // zero width non-joiner |
|
|
|
0x200d, 1, // zero width joiner |
|
|
|
0, 0 |
|
|
|
// U+00AD SOFT HYPHEN |
|
|
|
// Used to mark hyphenation points in words for where to split a |
|
|
|
// word at the end of a line to provide readable justified text. |
|
|
|
0xad, 1, |
|
|
|
// U+200C ZERO WIDTH NON-JOINER |
|
|
|
// Used to prevent combined ligatures being displayed in their |
|
|
|
// combined form. |
|
|
|
0x200c, 1, |
|
|
|
// U+200D ZERO WIDTH JOINER |
|
|
|
// Used to indicate an alternative connected form made up of the |
|
|
|
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam |
|
|
|
// and Emoji. |
|
|
|
0x200d, 1, |
|
|
|
// End of the ignored character list. |
|
|
|
0, 0 |
|
|
|
}; |
|
|
|
|
|
|
|
// alternatively, ignore characters but allow zero-width-non-joiner (lang-fa) |
|
|
|
static const unsigned short chars_ignore_zwnj_hyphen[] = { |
|
|
|
0xad, 1, // soft hyphen |
|
|
|
0x640, 1, // igniore Arabic Tatweel (lang=FA) |
|
|
|
0x200c, '-', // zero width non-joiner, replace with hyphen |
|
|
|
0x200d, 1, // zero width joiner |
|
|
|
0, 0 |
|
|
|
// U+00AD SOFT HYPHEN |
|
|
|
// Used to mark hyphenation points in words for where to split a |
|
|
|
// word at the end of a line to provide readable justified text. |
|
|
|
0xad, 1, |
|
|
|
// U+0640 TATWEEL (KASHIDA) |
|
|
|
// Used in Arabic scripts to stretch characters for justifying |
|
|
|
// the text. |
|
|
|
0x640, 1, |
|
|
|
// U+200C ZERO WIDTH NON-JOINER |
|
|
|
// Used to prevent combined ligatures being displayed in their |
|
|
|
// combined form. |
|
|
|
0x200c, '-', |
|
|
|
// U+200D ZERO WIDTH JOINER |
|
|
|
// Used to indicate an alternative connected form made up of the |
|
|
|
// characters surrounding the ZWJ in Devanagari, Kannada, Malayalam |
|
|
|
// and Emoji. |
|
|
|
0x200d, 1, |
|
|
|
// End of the ignored character list. |
|
|
|
0, 0 |
|
|
|
}; |
|
|
|
|
|
|
|
const unsigned char utf8_ordinal[] = { 0xc2, 0xba, 0 }; // masculine ordinal character, UTF-8 |