@@ -0,0 +1,43 @@ | |||
.*.swp | |||
# generated files: | |||
src/categories.cpp | |||
# intermediate files: | |||
.deps/ | |||
.libs/ | |||
.dirstamp | |||
*.pyc | |||
*.o | |||
*.lo | |||
# build output: | |||
src/libucd.la | |||
# autotools output: | |||
INSTALL | |||
NEWS | |||
README | |||
aclocal.m4 | |||
autom4te.cache/ | |||
config.guess | |||
config.h.in | |||
config.h | |||
config.log | |||
config.status | |||
config.sub | |||
configure | |||
depcomp | |||
install-sh | |||
libtool | |||
ltmain.sh | |||
m4/ | |||
missing | |||
Makefile | |||
Makefile.in | |||
stamp-h1 |
@@ -0,0 +1,71 @@ | |||
AUTOMAKE_OPTIONS = subdir-objects | |||
localedir = $(datadir)/locale | |||
xdgdatadir = @XDGDATADIR@ | |||
mimedir = $(xdgdatadir)/mime | |||
xmldir = $(mimedir)/packages | |||
AM_LDFLAGS = ${LTLIBINTL} | |||
AM_CXXFLAGS = -Isrc/include -Isrc | |||
ACLOCAL_AMFLAGS = -I m4 | |||
bin_PROGRAMS = | |||
lib_LTLIBRARIES = | |||
man1_MANS = | |||
pkgdata_DATA = | |||
noinst_bin_PROGRAMS = | |||
noinst_LIBRARIES = | |||
noinst_bindir = | |||
EXTRA_DIST = config.rpath ChangeLog | |||
CLEANFILES = | |||
SUBDIRS = | |||
############################# libtool ######################################### | |||
EXTRA_DIST += config.guess config.sub ltmain.sh | |||
# Increment if the interface has changed and is not backward compatible | |||
CURRENT=0 | |||
# Increment if source files have changed | |||
# Reset to 0 if the interface has changed | |||
REVISION=0 | |||
# Increment if the interface is backward compatible (superset) | |||
# Reset to 0 if the interface is not backward compatible | |||
AGE=0 | |||
LIBUCD_VERSION=$(CURRENT):$(REVISION):$(AGE) | |||
############################# ChangeLog ####################################### | |||
ChangeLog: | |||
git log > ChangeLog | |||
dist-hook: ChangeLog | |||
.PHONY: ChangeLog | |||
EXTRA_DIST += ChangeLog | |||
############################# Unicode Character Database ###################### | |||
src/categories.cpp: tools/categories.py | |||
tools/categories.py ${UCD_ROOTDIR} > $@ | |||
############################# libucd ########################################## | |||
libucd_includedir = $(includedir)/ucd | |||
libucd_include_HEADERS = \ | |||
src/include/ucd/ucd.h | |||
lib_LTLIBRARIES += src/libucd.la | |||
src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||
src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS} | |||
src_libucd_la_SOURCES = \ | |||
src/categories.cpp |
@@ -0,0 +1,12 @@ | |||
#!/bin/sh | |||
mkdir -p m4 | |||
ln -sf README.md README | |||
ln -sf docs/ReleaseNotes.md NEWS | |||
aclocal -I m4 || exit 1 | |||
libtoolize || exit 1 | |||
autoheader || exit 1 | |||
automake --add-missing || exit 1 | |||
autoconf || exit 1 |
@@ -0,0 +1,38 @@ | |||
AC_PREREQ([2.65]) | |||
AC_INIT([Unicode Character Database Tools], [0.1], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||
AM_INIT_AUTOMAKE() | |||
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | |||
AM_SILENT_RULES([yes]) | |||
AC_CONFIG_SRCDIR([src]) | |||
AC_CONFIG_MACRO_DIR([m4]) | |||
AC_CONFIG_HEADERS([config.h]) | |||
LT_INIT | |||
dnl ================================================================ | |||
dnl Program checks. | |||
dnl ================================================================ | |||
AC_PROG_CXX | |||
AC_PROG_MAKE_SET | |||
AC_PROG_LIBTOOL | |||
dnl ================================================================ | |||
dnl Generate output. | |||
dnl ================================================================ | |||
AC_CONFIG_FILES([Makefile]) | |||
AC_OUTPUT | |||
AC_MSG_NOTICE([ | |||
Configuration for Unicode Character Data Tools complete. | |||
Source code location: ${srcdir} | |||
XDG data location: ${XDGDATADIR} | |||
Compiler: ${CXX} | |||
Compiler flags: ${CXXFLAGS} | |||
]) |
@@ -0,0 +1,100 @@ | |||
/* Unicode Character Database API | |||
* | |||
* Copyright (C) 2012 Reece H. Dunn | |||
* | |||
* This file is part of ucd-tools. | |||
* | |||
* ucd-tools is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* ucd-tools is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
#ifndef UNICODE_CHARACTER_DATA_H | |||
#define UNICODE_CHARACTER_DATA_H | |||
#include <stdint.h> | |||
/** @brief Unicode Character Database | |||
*/ | |||
namespace ucd | |||
{ | |||
/** @brief Represents a Unicode codepoint. | |||
*/ | |||
typedef uint32_t codepoint_t; | |||
/** @brief Unicode General Category Values | |||
* @see http://www.unicode.org/reports/tr44/ | |||
*/ | |||
enum category | |||
{ | |||
// Other | |||
Cc, /**< @brief Control Character */ | |||
Cf, /**< @brief Format Control Character */ | |||
Cn, /**< @brief Unassigned */ | |||
Co, /**< @brief Private Use */ | |||
Cs, /**< @brief Surrogate Code Point */ | |||
// Letter | |||
Ll, /**< @brief Lower Case Letter */ | |||
Lm, /**< @brief Letter Modifier */ | |||
Lo, /**< @brief Other Letter */ | |||
Lt, /**< @brief Title Case Letter */ | |||
Lu, /**< @brief Upper Case Letter */ | |||
// Mark | |||
Mc, /**< @brief Spacing Mark */ | |||
Me, /**< @brief Enclosing Mark */ | |||
Mn, /**< @brief Non-Spacing Mark */ | |||
// Symbol | |||
Sc, /**< @brief Currency Symbol */ | |||
Sk, /**< @brief Modifier Symbol */ | |||
Sm, /**< @brief Math Symbol */ | |||
So, /**< @brief Other Symbol */ | |||
// Number | |||
Nd, /**< @brief Decimal Digit */ | |||
Nl, /**< @brief Letter-Like Number */ | |||
No, /**< @brief Other Number */ | |||
// Punctuation | |||
Pc, /**< @brief Connector */ | |||
Pd, /**< @brief Dash/Hyphen */ | |||
Pe, /**< @brief Close Punctuation Mark */ | |||
Pf, /**< @brief Final Quotation Mark */ | |||
Pi, /**< @brief Initial Quotation Mark */ | |||
Po, /**< @brief Other */ | |||
Ps, /**< @brief Open PUnctuation Mark */ | |||
// Separator | |||
Zc, /**< @brief Whitespace character in the Cc category */ | |||
Zl, /**< @brief Line Separator */ | |||
Zp, /**< @brief Paragraph Separator */ | |||
Zs, /**< @brief Space Separator */ | |||
}; | |||
/** @brief Lookup the General Category for a Unicode codepoint. | |||
* | |||
* @param c The Unicode codepoint to lookup. | |||
* @return The General Category of the Unicode codepoint. | |||
*/ | |||
category lookup_category(codepoint_t c); | |||
} | |||
#endif |
@@ -0,0 +1,60 @@ | |||
#!/usr/bin/python | |||
# Copyright (C) 2012 Reece H. Dunn | |||
# | |||
# This file is part of ucd-tools. | |||
# | |||
# ucd-tools is free software: you can redistribute it and/or modify | |||
# it under the terms of the GNU General Public License as published by | |||
# the Free Software Foundation, either version 3 of the License, or | |||
# (at your option) any later version. | |||
# | |||
# ucd-tools is distributed in the hope that it will be useful, | |||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
# GNU General Public License for more details. | |||
# | |||
# You should have received a copy of the GNU General Public License | |||
# along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
import os | |||
import sys | |||
import ucd | |||
ucd_rootdir = sys.argv[1] | |||
unicode_data = ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData') | |||
if __name__ == '__main__': | |||
sys.stdout.write("""/* Unicode General Categories | |||
* | |||
* Copyright (C) 2012 Reece H. Dunn | |||
* | |||
* This file is part of ucd-tools. | |||
* | |||
* ucd-tools is free software: you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation, either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* ucd-tools is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | |||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
#include "ucd/ucd.h" | |||
using namespace ucd; | |||
""") | |||
sys.stdout.write('\n') | |||
sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||
sys.stdout.write('{\n') | |||
sys.stdout.write('\treturn Cn;\n') | |||
sys.stdout.write('}\n') |