{ | { | ||||
case UCD_CATEGORY_Zl: | case UCD_CATEGORY_Zl: | ||||
case UCD_CATEGORY_Zp: | case UCD_CATEGORY_Zp: | ||||
return 1; | |||||
case UCD_CATEGORY_Zs: | case UCD_CATEGORY_Zs: | ||||
switch (c) // Exclude characters with the <noBreak> DispositionType | |||||
{ | |||||
case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||||
case 0x2007: // U+2007 : FIGURE SPACE | |||||
case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||||
return 0; | |||||
} | |||||
return 1; | return 1; | ||||
case UCD_CATEGORY_Cc: | case UCD_CATEGORY_Cc: | ||||
switch (c) // Some control characters are also whitespace characters: | |||||
switch (c) // Include control characters marked as White_Space | |||||
{ | { | ||||
case 0x09: // U+0009 : CHARACTER TABULATION | case 0x09: // U+0009 : CHARACTER TABULATION | ||||
case 0x0A: // U+000A : LINE FEED | case 0x0A: // U+000A : LINE FEED |
return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0 | return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0 | ||||
def isspace(data): | def isspace(data): | ||||
return data.get('White_Space', 0) | |||||
if data.get('White_Space', 0): | |||||
dt = data.get('DecompositionType', '') | |||||
return 1 if dt == None or not dt.startswith('<noBreak>') else 0 | |||||
else: | |||||
return 0 | |||||
def isupper(data): | def isupper(data): | ||||
if data.get('LowerCase', null) != null: | if data.get('LowerCase', null) != null: |