Browse Source

Use C-style comments to support using a C89 compiler.

master
Reece H. Dunn 8 years ago
parent
commit
0b56f2e5bc
11 changed files with 1386 additions and 1379 deletions
  1. 1
    0
      CHANGELOG.md
  2. 4
    3
      src/case.c
  3. 623
    622
      src/categories.c
  4. 20
    20
      src/ctype.c
  5. 44
    44
      src/proplist.c
  6. 626
    625
      src/scripts.c
  7. 24
    24
      tests/printcdata.c
  8. 24
    24
      tests/printucddata.c
  9. 4
    3
      tools/case.py
  10. 8
    7
      tools/categories.py
  11. 8
    7
      tools/scripts.py

+ 1
- 0
CHANGELOG.md View File

* Add `iswblank` and `iswxdigit` compatibility. * Add `iswblank` and `iswxdigit` compatibility.
* Improve ctype compatibility. * Improve ctype compatibility.
* PropList and emoji-data property lookup. * PropList and emoji-data property lookup.
* Support building with a C89 compiler.


## 9.0.0 - 2016-12-28 ## 9.0.0 - 2016-12-28



+ 4
- 3
src/case.c View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <stddef.h> #include <stddef.h>


// Unicode Character Data 9.0.0
/* Unicode Character Data 9.0.0 */


struct case_conversion_entry struct case_conversion_entry
{ {

+ 623
- 622
src/categories.c
File diff suppressed because it is too large
View File


+ 20
- 20
src/ctype.c View File

switch (ucd_lookup_category(c)) switch (ucd_lookup_category(c))
{ {
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{ {
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0; return 0;
} }
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
return c == 0x09; // U+0009 : CHARACTER TABULATION
return c == 0x09; /* U+0009 : CHARACTER TABULATION */
default: default:
return 0; return 0;
} }


int ucd_isdigit(codepoint_t c) int ucd_isdigit(codepoint_t c)
{ {
return (c >= 0x30 && c <= 0x39); // [0-9]
return (c >= 0x30 && c <= 0x39); /* [0-9] */
} }


int ucd_isgraph(codepoint_t c) int ucd_isgraph(codepoint_t c)
case UCD_CATEGORY_Zp: case UCD_CATEGORY_Zp:
return 1; return 1;
case UCD_CATEGORY_Zs: case UCD_CATEGORY_Zs:
switch (c) // Exclude characters with the <noBreak> DispositionType
switch (c) /* Exclude characters with the <noBreak> DispositionType */
{ {
case 0x00A0: // U+00A0 : NO-BREAK SPACE
case 0x2007: // U+2007 : FIGURE SPACE
case 0x202F: // U+202F : NARROW NO-BREAK SPACE
case 0x00A0: /* U+00A0 : NO-BREAK SPACE */
case 0x2007: /* U+2007 : FIGURE SPACE */
case 0x202F: /* U+202F : NARROW NO-BREAK SPACE */
return 0; return 0;
} }
return 1; return 1;
case UCD_CATEGORY_Cc: case UCD_CATEGORY_Cc:
switch (c) // Include control characters marked as White_Space
switch (c) /* Include control characters marked as White_Space */
{ {
case 0x09: // U+0009 : CHARACTER TABULATION
case 0x0A: // U+000A : LINE FEED
case 0x0B: // U+000B : LINE TABULATION
case 0x0C: // U+000C : FORM FEED
case 0x0D: // U+000D : CARRIAGE RETURN
case 0x85: // U+0085 : NEXT LINE
case 0x09: /* U+0009 : CHARACTER TABULATION */
case 0x0A: /* U+000A : LINE FEED */
case 0x0B: /* U+000B : LINE TABULATION */
case 0x0C: /* U+000C : FORM FEED */
case 0x0D: /* U+000D : CARRIAGE RETURN */
case 0x85: /* U+0085 : NEXT LINE */
return 1; return 1;
} }
default: default:


int ucd_isxdigit(codepoint_t c) int ucd_isxdigit(codepoint_t c)
{ {
return (c >= 0x30 && c <= 0x39) // [0-9]
|| (c >= 0x41 && c <= 0x46) // [A-Z]
|| (c >= 0x61 && c <= 0x66); // [a-z]
return (c >= 0x30 && c <= 0x39) /* [0-9] */
|| (c >= 0x41 && c <= 0x46) /* [A-Z] */
|| (c >= 0x61 && c <= 0x66); /* [a-z] */
} }

+ 44
- 44
src/proplist.c View File

if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x029D) return UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0x0300: case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH; if (c == 0x03D5) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x03F0 && c <= 0x03F1) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x03F3) return UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x0400: case 0x0400:
if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x0456) return UCD_PROPERTY_SOFT_DOTTED;
if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED; if (c == 0x1ECB) return UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0x2100: case 0x2100:
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2139) return UCD_PROPERTY_EMOJI; if (c == 0x2139) return UCD_PROPERTY_EMOJI;
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2147) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x2148 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
break; break;
case 0xFF00: case 0xFF00:
break; break;
case 0x01D400: case 0x01D400:
if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D422 && c <= 0x01D423) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D456 && c <= 0x01D457) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D48A && c <= 0x01D48B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D458 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D4BB) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D4BE && c <= 0x01D4BF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4BD && c <= 0x01D4C3) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D4F2 && c <= 0x01D4F3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D500: case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D526 && c <= 0x01D527) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D55A && c <= 0x01D55B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D58E && c <= 0x01D58F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D5C2 && c <= 0x01D5C3) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D5F6 && c <= 0x01D5F7) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D600: case 0x01D600:
if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D62A && c <= 0x01D62B) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D65E && c <= 0x01D65F) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED; if (c >= 0x01D692 && c <= 0x01D693) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_SOFT_DOTTED;
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6C2 && c <= 0x01D6DA) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6FC) return UCD_PROPERTY_OTHER_MATH;
break; break;
case 0x01D700: case 0x01D700:
if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH; if (c <= 0x01D714) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D736 && c <= 0x01D74E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D770 && c <= 0x01D788) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D7AA && c <= 0x01D7C2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
} }
return 0; return 0;
if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT; if (c >= 0x0041 && c <= 0x0046) return UCD_PROPERTY_HEX_DIGIT | UCD_PROPERTY_ASCII_HEX_DIGIT;
break; break;
case 0x0300: case 0x0300:
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x03D0 && c <= 0x03D2) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x03F4 && c <= 0x03F5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0xFF00: case 0xFF00:
if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT; if (c >= 0xFF21 && c <= 0xFF26) return UCD_PROPERTY_HEX_DIGIT;
case 0x2100: case 0x2100:
if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2102) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2107) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x210A && c <= 0x2113) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2115) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x2119 && c <= 0x211D) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2124) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH; if (c == 0x2128) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x212C && c <= 0x212D) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x212F && c <= 0x2131) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2133 && c <= 0x2134) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x213C && c <= 0x213F) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x2145 && c <= 0x2149) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D400: case 0x01D400:
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D400 && c <= 0x01D454) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D456 && c <= 0x01D49C) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D49E && c <= 0x01D49F) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D4A2) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4A5 && c <= 0x01D4A6) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D4A9 && c <= 0x01D4AC) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D4AE && c <= 0x01D4B9) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D4C5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D500: case 0x01D500:
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D505) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D507 && c <= 0x01D50A) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D50D && c <= 0x01D514) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D516 && c <= 0x01D51C) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D51E && c <= 0x01D539) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D53B && c <= 0x01D53E) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D540 && c <= 0x01D544) return UCD_PROPERTY_OTHER_MATH;
if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH; if (c == 0x01D546) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D54A && c <= 0x01D550) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D552) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D600: case 0x01D600:
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c <= 0x01D6A5) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH; if (c >= 0x01D6A8 && c <= 0x01D6C0) return UCD_PROPERTY_OTHER_MATH;
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D6DC && c <= 0x01D6FA) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
case 0x01D700: case 0x01D700:
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; // Ll|Lu
if (c >= 0x01D716 && c <= 0x01D734) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D750 && c <= 0x01D76E) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D78A && c <= 0x01D7A8) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
if (c >= 0x01D7C4 && c <= 0x01D7CB) return UCD_PROPERTY_OTHER_MATH; /* Ll|Lu */
break; break;
} }
return 0; return 0;
break; break;
case 0x2700: case 0x2700:
if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x27C6) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
case 0x2E00: case 0x2E00:
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x3000: case 0x3000:
break; break;
case 0x2700: case 0x2700:
if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; if (c == 0x27C5) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; // Pe|Ps
if (c >= 0x27E6 && c <= 0x27EF) return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; /* Pe|Ps */
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x2900: case 0x2900:
return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_OTHER_MATH | UCD_PROPERTY_PATTERN_SYNTAX;
case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; case UCD_CATEGORY_Zl: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE; case UCD_CATEGORY_Zp: return UCD_PROPERTY_WHITE_SPACE | UCD_PROPERTY_PATTERN_WHITE_SPACE;
case UCD_CATEGORY_Zs: return properties_Zs(c); case UCD_CATEGORY_Zs: return properties_Zs(c);
default: return 0; // Co Cs Ii Lt Me
default: return 0; /* Co Cs Ii Lt Me */
}; };
} }

+ 626
- 625
src/scripts.c
File diff suppressed because it is too large
View File


+ 24
- 24
tests/printcdata.c View File

{ {
switch (mode) switch (mode)
{ {
case 'c': // character
case 'c': /* character */
switch (c) switch (c)
{ {
case '\t': fputs("\\t", out); break; case '\t': fputs("\\t", out); break;
default: fput_utf8c(out, c); break; default: fput_utf8c(out, c); break;
} }
break; break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c); fprintf(out, "%06x", c);
break; break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c); fprintf(out, "%06X", c);
break; break;
} }
{ {
switch (mode) switch (mode)
{ {
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(iswalnum(c) ? '1' : '0', out); fputc(iswalnum(c) ? '1' : '0', out);
break; break;
case 'a': // alpha
case 'a': /* alpha */
fputc(iswalpha(c) ? '1' : '0', out); fputc(iswalpha(c) ? '1' : '0', out);
break; break;
case 'b': // blank
case 'b': /* blank */
fputc(iswblank(c) ? '1' : '0', out); fputc(iswblank(c) ? '1' : '0', out);
break; break;
case 'c': // control
case 'c': /* control */
fputc(iswcntrl(c) ? '1' : '0', out); fputc(iswcntrl(c) ? '1' : '0', out);
break; break;
case 'd': // numeric
case 'd': /* numeric */
fputc(iswdigit(c) ? '1' : '0', out); fputc(iswdigit(c) ? '1' : '0', out);
break; break;
case 'g': // glyph
case 'g': /* glyph */
fputc(iswgraph(c) ? '1' : '0', out); fputc(iswgraph(c) ? '1' : '0', out);
break; break;
case 'l': // lower case
case 'l': /* lower case */
fputc(iswlower(c) ? '1' : '0', out); fputc(iswlower(c) ? '1' : '0', out);
break; break;
case 'P': // printable
case 'P': /* printable */
fputc(iswprint(c) ? '1' : '0', out); fputc(iswprint(c) ? '1' : '0', out);
break; break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(iswpunct(c) ? '1' : '0', out); fputc(iswpunct(c) ? '1' : '0', out);
break; break;
case 's': // whitespace
case 's': /* whitespace */
fputc(iswspace(c) ? '1' : '0', out); fputc(iswspace(c) ? '1' : '0', out);
break; break;
case 'u': // upper case
case 'u': /* upper case */
fputc(iswupper(c) ? '1' : '0', out); fputc(iswupper(c) ? '1' : '0', out);
break; break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(iswxdigit(c) ? '1' : '0', out); fputc(iswxdigit(c) ? '1' : '0', out);
break; break;
} }
case '%': case '%':
switch (*++format) switch (*++format)
{ {
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break; break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break; break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format); uprintf_codepoint(out, c, *++format);
break; break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break; break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format); uprintf_is(out, c, *++format);
break; break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, towlower(c), *++format); uprintf_codepoint(out, towlower(c), *++format);
break; break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break; break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format); uprintf_codepoint(out, ucd_totitle(c), *++format);
break; break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, towupper(c), *++format); uprintf_codepoint(out, towupper(c), *++format);
break; break;
} }

+ 24
- 24
tests/printucddata.c View File

{ {
switch (mode) switch (mode)
{ {
case 'c': // character
case 'c': /* character */
switch (c) switch (c)
{ {
case '\t': fputs("\\t", out); break; case '\t': fputs("\\t", out); break;
default: fput_utf8c(out, c); break; default: fput_utf8c(out, c); break;
} }
break; break;
case 'h': // hexadecimal (lower)
case 'h': /* hexadecimal (lower) */
fprintf(out, "%06x", c); fprintf(out, "%06x", c);
break; break;
case 'H': // hexadecimal (upper)
case 'H': /* hexadecimal (upper) */
fprintf(out, "%06X", c); fprintf(out, "%06X", c);
break; break;
} }
{ {
switch (mode) switch (mode)
{ {
case 'A': // alpha-numeric
case 'A': /* alpha-numeric */
fputc(ucd_isalnum(c) ? '1' : '0', out); fputc(ucd_isalnum(c) ? '1' : '0', out);
break; break;
case 'a': // alpha
case 'a': /* alpha */
fputc(ucd_isalpha(c) ? '1' : '0', out); fputc(ucd_isalpha(c) ? '1' : '0', out);
break; break;
case 'b': // blank
case 'b': /* blank */
fputc(ucd_isblank(c) ? '1' : '0', out); fputc(ucd_isblank(c) ? '1' : '0', out);
break; break;
case 'c': // control
case 'c': /* control */
fputc(ucd_iscntrl(c) ? '1' : '0', out); fputc(ucd_iscntrl(c) ? '1' : '0', out);
break; break;
case 'd': // numeric
case 'd': /* numeric */
fputc(ucd_isdigit(c) ? '1' : '0', out); fputc(ucd_isdigit(c) ? '1' : '0', out);
break; break;
case 'g': // glyph
case 'g': /* glyph */
fputc(ucd_isgraph(c) ? '1' : '0', out); fputc(ucd_isgraph(c) ? '1' : '0', out);
break; break;
case 'l': // lower case
case 'l': /* lower case */
fputc(ucd_islower(c) ? '1' : '0', out); fputc(ucd_islower(c) ? '1' : '0', out);
break; break;
case 'P': // printable
case 'P': /* printable */
fputc(ucd_isprint(c) ? '1' : '0', out); fputc(ucd_isprint(c) ? '1' : '0', out);
break; break;
case 'p': // punctuation
case 'p': /* punctuation */
fputc(ucd_ispunct(c) ? '1' : '0', out); fputc(ucd_ispunct(c) ? '1' : '0', out);
break; break;
case 's': // whitespace
case 's': /* whitespace */
fputc(ucd_isspace(c) ? '1' : '0', out); fputc(ucd_isspace(c) ? '1' : '0', out);
break; break;
case 'u': // upper case
case 'u': /* upper case */
fputc(ucd_isupper(c) ? '1' : '0', out); fputc(ucd_isupper(c) ? '1' : '0', out);
break; break;
case 'x': // xdigit
case 'x': /* xdigit */
fputc(ucd_isxdigit(c) ? '1' : '0', out); fputc(ucd_isxdigit(c) ? '1' : '0', out);
break; break;
} }
case '%': case '%':
switch (*++format) switch (*++format)
{ {
case 'c': // category
case 'c': /* category */
fputs(ucd_get_category_string(ucd_lookup_category(c)), out); fputs(ucd_get_category_string(ucd_lookup_category(c)), out);
break; break;
case 'C': // category group
case 'C': /* category group */
fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out); fputs(ucd_get_category_group_string(ucd_lookup_category_group(c)), out);
break; break;
case 'p': // codepoint
case 'p': /* codepoint */
uprintf_codepoint(out, c, *++format); uprintf_codepoint(out, c, *++format);
break; break;
case 'P': // properties
case 'P': /* properties */
fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c))); fprintf(out, "%016llx", ucd_properties(c, ucd_lookup_category(c)));
break; break;
case 'i': // is*
case 'i': /* is* */
uprintf_is(out, c, *++format); uprintf_is(out, c, *++format);
break; break;
case 'L': // lowercase
case 'L': /* lowercase */
uprintf_codepoint(out, ucd_tolower(c), *++format); uprintf_codepoint(out, ucd_tolower(c), *++format);
break; break;
case 's': // script
case 's': /* script */
fputs(ucd_get_script_string(ucd_lookup_script(c)), out); fputs(ucd_get_script_string(ucd_lookup_script(c)), out);
break; break;
case 'T': // titlecase
case 'T': /* titlecase */
uprintf_codepoint(out, ucd_totitle(c), *++format); uprintf_codepoint(out, ucd_totitle(c), *++format);
break; break;
case 'U': // uppercase
case 'U': /* uppercase */
uprintf_codepoint(out, ucd_toupper(c), *++format); uprintf_codepoint(out, ucd_toupper(c), *++format);
break; break;
} }

+ 4
- 3
tools/case.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#include <stddef.h> #include <stddef.h>


// Unicode Character Data %s
/* Unicode Character Data %s */


struct case_conversion_entry struct case_conversion_entry
{ {

+ 8
- 7
tools/categories.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.
/* NOTE: This file is automatically generated from the UnicodeData.txt file in
* the Unicode Character database by the ucd-tools/tools/categories.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#define Zs UCD_CATEGORY_Zs #define Zs UCD_CATEGORY_Zs
#define Ii UCD_CATEGORY_Ii #define Ii UCD_CATEGORY_Ii


// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version) """ % ucd_version)


for category in special_categories: for category in special_categories:
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoint, table in sorted(category_tables[table_index].items()): for codepoint, table in sorted(category_tables[table_index].items()):
if isinstance(table, str): if isinstance(table, str):
sys.stdout.write('\tcategories_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tcategories_%s, /* %s */\n' % (table, codepoint))
else: else:
sys.stdout.write('\tcategories_%s,\n' % codepoint) sys.stdout.write('\tcategories_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, category, comment in category_sets: for codepoints, category, comment in category_sets:
if category: if category:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, category, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, category, codepoints, comment))
else: else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n') sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Ii; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')


sys.stdout.write(""" sys.stdout.write("""

+ 8
- 7
tools/scripts.py View File

* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/ */


// NOTE: This file is automatically generated from the Scripts.txt file in
// the Unicode Character database by the ucd-tools/tools/scripts.py script.
/* NOTE: This file is automatically generated from the Scripts.txt file in
* the Unicode Character database by the ucd-tools/tools/scripts.py script.
*/


#include "ucd/ucd.h" #include "ucd/ucd.h"


#define Zyyy UCD_SCRIPT_Zyyy #define Zyyy UCD_SCRIPT_Zyyy
#define Zzzz UCD_SCRIPT_Zzzz #define Zzzz UCD_SCRIPT_Zzzz


// Unicode Character Data %s
/* Unicode Character Data %s */
""" % ucd_version) """ % ucd_version)


for script in special_scripts: for script in special_scripts:
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoint, table in sorted(script_tables[table_index].items()): for codepoint, table in sorted(script_tables[table_index].items()):
if isinstance(table, str): if isinstance(table, str):
sys.stdout.write('\tscripts_%s, // %s\n' % (table, codepoint))
sys.stdout.write('\tscripts_%s, /* %s */\n' % (table, codepoint))
else: else:
sys.stdout.write('\tscripts_%s,\n' % codepoint) sys.stdout.write('\tscripts_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, script, comment in script_sets: for codepoints, script, comment in script_sets:
if script: if script:
sys.stdout.write('\tif (c <= 0x%s) return %s; // %s : %s\n' % (codepoints.last, script, codepoints, comment))
sys.stdout.write('\tif (c <= 0x%s) return %s; /* %s : %s */\n' % (codepoints.last, script, codepoints, comment))
else: else:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\tif (c <= 0x%s) /* %s */\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = scripts_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n') sys.stdout.write('\t\treturn (ucd_script)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Zzzz; // Invalid Unicode Codepoint\n')
sys.stdout.write('\treturn Zzzz; /* Invalid Unicode Codepoint */\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')

Loading…
Cancel
Save