Browse Source

isblank: don't include <noBreak> characters, and add tests for this API.

master
Reece H. Dunn 8 years ago
parent
commit
c9f2940373
5 changed files with 19 additions and 5 deletions
  1. 7
    0
      src/ctype.c
  2. 1
    1
      tests/printcdata.c
  3. 1
    1
      tests/printucddata.c
  4. 1
    1
      tests/printucddata_cpp.cpp
  5. 9
    2
      tools/printdata.py

+ 7
- 0
src/ctype.c View File

@@ -58,6 +58,13 @@ int ucd_isblank(codepoint_t c)
switch (ucd_lookup_category(c))
{
case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
{
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
return 0;
}
return 1;
case UCD_CATEGORY_Cc:
return c == 0x09; // U+0009 : CHARACTER TABULATION

+ 1
- 1
tests/printcdata.c View File

@@ -248,7 +248,7 @@ int main(int argc, char **argv)
else
{
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %iu %il\n");
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %ib %iu %il\n");
}
return 0;
}

+ 1
- 1
tests/printucddata.c View File

@@ -243,7 +243,7 @@ int main(int argc, char **argv)
else
{
for (codepoint_t c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %iu %il\n");
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %ib %iu %il\n");
}
return 0;
}

+ 1
- 1
tests/printucddata_cpp.cpp View File

@@ -243,7 +243,7 @@ int main(int argc, char **argv)
else
{
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c)
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %iu %il\n");
uprintf(stdout, c, format ? format : "%pH %s %C %c %UH %LH %TH %id %ix %is %ib %iu %il\n");
}
return 0;
}

+ 9
- 2
tools/printdata.py View File

@@ -56,6 +56,13 @@ def isspace(data):
else:
return 0

def isblank(data): # word separator
if data.get('GeneralCategory', 'Cn') == 'Zs' or data['CodePoint'].char() == '\t':
dt = data.get('DecompositionType', '')
return 1 if dt == None or not dt.startswith('<noBreak>') else 0
else:
return 0

def isupper(data):
if data.get('LowerCase', null) != null:
return 1
@@ -85,10 +92,10 @@ if __name__ == '__main__':
if title == null: title = codepoint
if upper == null: upper = codepoint
if lower == null: lower = codepoint
print('%s %s %s %s %s %s %s %s %s %s %s %s' % (
print('%s %s %s %s %s %s %s %s %s %s %s %s %s' % (
codepoint, script,
data.get('GeneralCategory', 'Cn')[0], data.get('GeneralCategory', 'Cn'),
upper, lower, title,
isdigit(data), isxdigit(data),
isspace(data),
isspace(data), isblank(data),
isupper(data), islower(data)))

Loading…
Cancel
Save