Browse Source

isspace: don't include <noBreak> characters.

master
Reece H. Dunn 8 years ago
parent
commit
f109bb918f
2 changed files with 14 additions and 2 deletions
  1. 9
    1
      src/ctype.c
  2. 5
    1
      tools/printdata.py

+ 9
- 1
src/ctype.c View File

{ {
case UCD_CATEGORY_Zl: case UCD_CATEGORY_Zl:
case UCD_CATEGORY_Zp: case UCD_CATEGORY_Zp:
return 1;
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
{
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
return 0;
}
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
switch (c) // Some control characters are also whitespace characters:
switch (c) // Include control characters marked as White_Space
{ {
case 0x09: // U+0009 : CHARACTER TABULATION case 0x09: // U+0009 : CHARACTER TABULATION
case 0x0A: // U+000A : LINE FEED case 0x0A: // U+000A : LINE FEED

+ 5
- 1
tools/printdata.py View File

return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0 return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0


def isspace(data): def isspace(data):
return data.get('White_Space', 0)
if data.get('White_Space', 0):
dt = data.get('DecompositionType', '')
return 1 if dt == None or not dt.startswith('<noBreak>') else 0
else:
return 0


def isupper(data): def isupper(data):
if data.get('LowerCase', null) != null: if data.get('LowerCase', null) != null:

Loading…
Cancel
Save