Browse Source

isspace: don't include <noBreak> characters.

master
Reece H. Dunn 8 years ago
parent
commit
f109bb918f
2 changed files with 14 additions and 2 deletions
  1. 9
    1
      src/ctype.c
  2. 5
    1
      tools/printdata.py

+ 9
- 1
src/ctype.c View File

@@ -140,10 +140,18 @@ int ucd_isspace(codepoint_t c)
{
case UCD_CATEGORY_Zl:
case UCD_CATEGORY_Zp:
return 1;
case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
{
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
return 0;
}
return 1;
case UCD_CATEGORY_Cc:
switch (c) // Some control characters are also whitespace characters:
switch (c) // Include control characters marked as White_Space
{
case 0x09: // U+0009 : CHARACTER TABULATION
case 0x0A: // U+000A : LINE FEED

+ 5
- 1
tools/printdata.py View File

@@ -50,7 +50,11 @@ def isxdigit(data):
return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0

def isspace(data):
return data.get('White_Space', 0)
if data.get('White_Space', 0):
dt = data.get('DecompositionType', '')
return 1 if dt == None or not dt.startswith('<noBreak>') else 0
else:
return 0

def isupper(data):
if data.get('LowerCase', null) != null:

Loading…
Cancel
Save