@@ -58,6 +58,13 @@ int ucd_isblank(codepoint_t c) | |||
switch (ucd_lookup_category(c)) | |||
{ | |||
case UCD_CATEGORY_Zs: | |||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||
{ | |||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||
case 0x2007: // U+2007 : FIGURE SPACE | |||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||
return 0; | |||
} | |||
return 1; | |||
case UCD_CATEGORY_Cc: | |||
return c == 0x09; // U+0009 : CHARACTER TABULATION |
@@ -248,7 +248,7 @@ int main(int argc, char **argv) | |||
else | |||
{ | |||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %iu %il\n"); | |||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %ib %iu %il\n"); | |||
} | |||
return 0; | |||
} |
@@ -243,7 +243,7 @@ int main(int argc, char **argv) | |||
else | |||
{ | |||
for (codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %iu %il\n"); | |||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %ib %iu %il\n"); | |||
} | |||
return 0; | |||
} |
@@ -243,7 +243,7 @@ int main(int argc, char **argv) | |||
else | |||
{ | |||
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %iu %il\n"); | |||
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %ib %iu %il\n"); | |||
} | |||
return 0; | |||
} |
@@ -56,6 +56,13 @@ def isspace(data): | |||
else: | |||
return 0 | |||
def isblank(data): # word separator | |||
if data.get('GeneralCategory', 'Cn') == 'Zs' or data['CodePoint'].char() == '\t': | |||
dt = data.get('DecompositionType', '') | |||
return 1 if dt == None or not dt.startswith('<noBreak>') else 0 | |||
else: | |||
return 0 | |||
def isupper(data): | |||
if data.get('LowerCase', null) != null: | |||
return 1 | |||
@@ -85,10 +92,10 @@ if __name__ == '__main__': | |||
if title == null: title = codepoint | |||
if upper == null: upper = codepoint | |||
if lower == null: lower = codepoint | |||
print('%s %s %s %s %s %s %s %s %s %s %s %s' % ( | |||
print('%s %s %s %s %s %s %s %s %s %s %s %s %s' % ( | |||
codepoint, script, | |||
data.get('GeneralCategory', 'Cn')[0], data.get('GeneralCategory', 'Cn'), | |||
upper, lower, title, | |||
isdigit(data), isxdigit(data), | |||
isspace(data), | |||
isspace(data), isblank(data), | |||
isupper(data), islower(data))) |