| { | { | ||||
| case UCD_CATEGORY_Zl: | case UCD_CATEGORY_Zl: | ||||
| case UCD_CATEGORY_Zp: | case UCD_CATEGORY_Zp: | ||||
| return 1; | |||||
| case UCD_CATEGORY_Zs: | case UCD_CATEGORY_Zs: | ||||
| switch (c) // Exclude characters with the <noBreak> DispositionType | |||||
| { | |||||
| case 0x00A0: // U+00A0 : NO-BREAK SPACE | |||||
| case 0x2007: // U+2007 : FIGURE SPACE | |||||
| case 0x202F: // U+202F : NARROW NO-BREAK SPACE | |||||
| return 0; | |||||
| } | |||||
| return 1; | return 1; | ||||
| case UCD_CATEGORY_Cc: | case UCD_CATEGORY_Cc: | ||||
| switch (c) // Some control characters are also whitespace characters: | |||||
| switch (c) // Include control characters marked as White_Space | |||||
| { | { | ||||
| case 0x09: // U+0009 : CHARACTER TABULATION | case 0x09: // U+0009 : CHARACTER TABULATION | ||||
| case 0x0A: // U+000A : LINE FEED | case 0x0A: // U+000A : LINE FEED |
| return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0 | return 1 if data['CodePoint'].char() in '0123456789ABCDEFabcdef' else 0 | ||||
| def isspace(data): | def isspace(data): | ||||
| return data.get('White_Space', 0) | |||||
| if data.get('White_Space', 0): | |||||
| dt = data.get('DecompositionType', '') | |||||
| return 1 if dt == None or not dt.startswith('<noBreak>') else 0 | |||||
| else: | |||||
| return 0 | |||||
| def isupper(data): | def isupper(data): | ||||
| if data.get('LowerCase', null) != null: | if data.get('LowerCase', null) != null: |