Browse Source

Add tests for querying UCD properties; fix discovered issues.

master
Reece H. Dunn 12 years ago
parent
commit
ea09eb5c45
9 changed files with 7924 additions and 1079 deletions
  1. 7
    0
      .gitignore
  2. 20
    4
      Makefile.am
  3. 3
    3
      src/case.cpp
  4. 7769
    1069
      src/categories.cpp
  5. 75
    0
      tests/printucddata.cpp
  6. 1
    1
      tools/case.py
  7. 2
    2
      tools/categories.py
  8. 44
    0
      tools/printdata.py
  9. 3
    0
      tools/ucd.py

+ 7
- 0
.gitignore View File

@@ -13,6 +13,13 @@
# build output:

src/libucd.la
tests/printucddata

# test output:

tests/*.expected
tests/*.actual
tests/*.diff

# autotools output:


+ 20
- 4
Makefile.am View File

@@ -66,11 +66,27 @@ libucd_includedir = $(includedir)/ucd
libucd_include_HEADERS = \
src/include/ucd/ucd.h

lib_LTLIBRARIES += src/libucd.la

src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION)
lib_LTLIBRARIES += src/libucd.la
src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION)
src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS}
src_libucd_la_SOURCES = \
src_libucd_la_SOURCES = \
src/case.cpp \
src/categories.cpp \
src/ctype.cpp

############################# tests ###########################################

noinst_bin_PROGRAMS += tests/printucddata
tests_printucddata_SOURCES = tests/printucddata.cpp
tests_printucddata_LDADD = src/libucd.la

tests/unicode-data.expected: tools/printdata.py tools/ucd.py
tools/printdata.py ${UCD_ROOTDIR} > $@

tests/unicode-data.actual: tests/printucddata
tests/printucddata > $@

tests/unicode-data.diff: tests/unicode-data.expected tests/unicode-data.actual
diff -U0 tests/unicode-data.expected tests/unicode-data.actual > tests/unicode-data.diff

check: tests/unicode-data.diff

+ 3
- 3
src/case.cpp View File

@@ -2140,7 +2140,7 @@ ucd::codepoint_t ucd::toupper(codepoint_t c)
int pos = (begin + end) / 2;
const case_conversion_entry *item = (case_conversion_data + pos);
if (c == item->codepoint)
return item->uppercase;
return item->uppercase == 0 ? c : item->uppercase;
else if (c > item->codepoint)
begin = pos + 1;
else
@@ -2158,7 +2158,7 @@ ucd::codepoint_t ucd::tolower(codepoint_t c)
int pos = (begin + end) / 2;
const case_conversion_entry *item = (case_conversion_data + pos);
if (c == item->codepoint)
return item->lowercase;
return item->lowercase == 0 ? c : item->lowercase;
else if (c > item->codepoint)
begin = pos + 1;
else
@@ -2176,7 +2176,7 @@ ucd::codepoint_t ucd::totitle(codepoint_t c)
int pos = (begin + end) / 2;
const case_conversion_entry *item = (case_conversion_data + pos);
if (c == item->codepoint)
return item->titlecase;
return item->titlecase == 0 ? c : item->titlecase;
else if (c > item->codepoint)
begin = pos + 1;
else

+ 7769
- 1069
src/categories.cpp
File diff suppressed because it is too large
View File


+ 75
- 0
tests/printucddata.cpp View File

@@ -0,0 +1,75 @@
/*
* Copyright (C) 2012 Reece H. Dunn
*
* This file is part of ucd-tools.
*
* ucd-tools is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ucd-tools is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

#include "ucd/ucd.h"

#include <stdio.h>

const char *get_category_string(ucd::category c)
{
using namespace ucd;
switch (c)
{
case Cc: return "Cc";
case Cf: return "Cf";
case Cn: return "Cn";
case Co: return "Co";
case Cs: return "Cs";
case Ii: return "Ii";
case Ll: return "Ll";
case Lm: return "Lm";
case Lo: return "Lo";
case Lt: return "Lt";
case Lu: return "Lu";
case Mc: return "Mc";
case Me: return "Me";
case Mn: return "Mn";
case Nd: return "Nd";
case Nl: return "Nl";
case No: return "No";
case Pc: return "Pc";
case Pd: return "Pd";
case Pe: return "Pe";
case Pf: return "Pf";
case Pi: return "Pi";
case Po: return "Po";
case Ps: return "Ps";
case Sc: return "Sc";
case Sk: return "Sk";
case Sm: return "Sm";
case So: return "So";
case Zl: return "Zl";
case Zp: return "Zp";
case Zs: return "Zs";
default: return "--";
}
}

int main()
{
for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c)
{
const char *category = get_category_string(ucd::lookup_category(c));
ucd::codepoint_t upper = ucd::toupper(c);
ucd::codepoint_t lower = ucd::tolower(c);
ucd::codepoint_t title = ucd::totitle(c);
printf("%06X %s %06X %06X %06X\n", c, category, upper, lower, title);
}
return 0;
}

+ 1
- 1
tools/case.py View File

@@ -90,7 +90,7 @@ struct case_conversion_entry
sys.stdout.write('\t\tint pos = (begin + end) / 2;\n')
sys.stdout.write('\t\tconst case_conversion_entry *item = (case_conversion_data + pos);\n')
sys.stdout.write('\t\tif (c == item->codepoint)\n')
sys.stdout.write('\t\t\treturn item->%scase;\n' % case)
sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case))
sys.stdout.write('\t\telse if (c > item->codepoint)\n')
sys.stdout.write('\t\t\tbegin = pos + 1;\n')
sys.stdout.write('\t\telse\n')

+ 2
- 2
tools/categories.py View File

@@ -26,8 +26,8 @@ ucd_version = ucd_rootdir.split('-')[-1]

unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
if isinstance(data['CodePoint'], ucd.CodePoint):
unicode_chars[data['CodePoint']] = data['GeneralCategory']
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory']

# This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that

+ 44
- 0
tools/printdata.py View File

@@ -0,0 +1,44 @@
#!/usr/bin/python

# Copyright (C) 2012 Reece H. Dunn
#
# This file is part of ucd-tools.
#
# ucd-tools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ucd-tools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.

import os
import sys
import ucd

ucd_rootdir = sys.argv[1]

unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data

null = ucd.CodePoint('0000')
if __name__ == '__main__':
for codepoint in ucd.CodeRange('000000..10FFFF'):
try:
data = unicode_chars[codepoint]
title = data['TitleCase']
upper = data['UpperCase']
lower = data['LowerCase']
if title == null: title = codepoint
if upper == null: upper = codepoint
if lower == null: lower = codepoint
print '%s %s %s %s %s' % (codepoint, data['GeneralCategory'], upper, lower, title)
except KeyError:
print '%s Cn %s %s %s' % (codepoint, codepoint, codepoint, codepoint)

+ 3
- 0
tools/ucd.py View File

@@ -33,6 +33,9 @@ class CodePoint:
def __str__(self):
return '%06X' % self.codepoint

def __iter__(self):
yield self

def __hash__(self):
return self.codepoint


Loading…
Cancel
Save