.*.swp | |||||
# generated files: | |||||
src/categories.cpp | |||||
# intermediate files: | |||||
.deps/ | |||||
.libs/ | |||||
.dirstamp | |||||
*.pyc | |||||
*.o | |||||
*.lo | |||||
# build output: | |||||
src/libucd.la | |||||
# autotools output: | |||||
INSTALL | |||||
NEWS | |||||
README | |||||
aclocal.m4 | |||||
autom4te.cache/ | |||||
config.guess | |||||
config.h.in | |||||
config.h | |||||
config.log | |||||
config.status | |||||
config.sub | |||||
configure | |||||
depcomp | |||||
install-sh | |||||
libtool | |||||
ltmain.sh | |||||
m4/ | |||||
missing | |||||
Makefile | |||||
Makefile.in | |||||
stamp-h1 |
AUTOMAKE_OPTIONS = subdir-objects | |||||
localedir = $(datadir)/locale | |||||
xdgdatadir = @XDGDATADIR@ | |||||
mimedir = $(xdgdatadir)/mime | |||||
xmldir = $(mimedir)/packages | |||||
AM_LDFLAGS = ${LTLIBINTL} | |||||
AM_CXXFLAGS = -Isrc/include -Isrc | |||||
ACLOCAL_AMFLAGS = -I m4 | |||||
bin_PROGRAMS = | |||||
lib_LTLIBRARIES = | |||||
man1_MANS = | |||||
pkgdata_DATA = | |||||
noinst_bin_PROGRAMS = | |||||
noinst_LIBRARIES = | |||||
noinst_bindir = | |||||
EXTRA_DIST = config.rpath ChangeLog | |||||
CLEANFILES = | |||||
SUBDIRS = | |||||
############################# libtool ######################################### | |||||
EXTRA_DIST += config.guess config.sub ltmain.sh | |||||
# Increment if the interface has changed and is not backward compatible | |||||
CURRENT=0 | |||||
# Increment if source files have changed | |||||
# Reset to 0 if the interface has changed | |||||
REVISION=0 | |||||
# Increment if the interface is backward compatible (superset) | |||||
# Reset to 0 if the interface is not backward compatible | |||||
AGE=0 | |||||
LIBUCD_VERSION=$(CURRENT):$(REVISION):$(AGE) | |||||
############################# ChangeLog ####################################### | |||||
ChangeLog: | |||||
git log > ChangeLog | |||||
dist-hook: ChangeLog | |||||
.PHONY: ChangeLog | |||||
EXTRA_DIST += ChangeLog | |||||
############################# Unicode Character Database ###################### | |||||
src/categories.cpp: tools/categories.py | |||||
tools/categories.py ${UCD_ROOTDIR} > $@ | |||||
############################# libucd ########################################## | |||||
libucd_includedir = $(includedir)/ucd | |||||
libucd_include_HEADERS = \ | |||||
src/include/ucd/ucd.h | |||||
lib_LTLIBRARIES += src/libucd.la | |||||
src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||||
src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS} | |||||
src_libucd_la_SOURCES = \ | |||||
src/categories.cpp |
#!/bin/sh | |||||
mkdir -p m4 | |||||
ln -sf README.md README | |||||
ln -sf docs/ReleaseNotes.md NEWS | |||||
aclocal -I m4 || exit 1 | |||||
libtoolize || exit 1 | |||||
autoheader || exit 1 | |||||
automake --add-missing || exit 1 | |||||
autoconf || exit 1 |
AC_PREREQ([2.65]) | |||||
AC_INIT([Unicode Character Database Tools], [0.1], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||||
AM_INIT_AUTOMAKE() | |||||
m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | |||||
AM_SILENT_RULES([yes]) | |||||
AC_CONFIG_SRCDIR([src]) | |||||
AC_CONFIG_MACRO_DIR([m4]) | |||||
AC_CONFIG_HEADERS([config.h]) | |||||
LT_INIT | |||||
dnl ================================================================ | |||||
dnl Program checks. | |||||
dnl ================================================================ | |||||
AC_PROG_CXX | |||||
AC_PROG_MAKE_SET | |||||
AC_PROG_LIBTOOL | |||||
dnl ================================================================ | |||||
dnl Generate output. | |||||
dnl ================================================================ | |||||
AC_CONFIG_FILES([Makefile]) | |||||
AC_OUTPUT | |||||
AC_MSG_NOTICE([ | |||||
Configuration for Unicode Character Data Tools complete. | |||||
Source code location: ${srcdir} | |||||
XDG data location: ${XDGDATADIR} | |||||
Compiler: ${CXX} | |||||
Compiler flags: ${CXXFLAGS} | |||||
]) |
/* Unicode Character Database API | |||||
* | |||||
* Copyright (C) 2012 Reece H. Dunn | |||||
* | |||||
* This file is part of ucd-tools. | |||||
* | |||||
* ucd-tools is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* ucd-tools is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
#ifndef UNICODE_CHARACTER_DATA_H | |||||
#define UNICODE_CHARACTER_DATA_H | |||||
#include <stdint.h> | |||||
/** @brief Unicode Character Database | |||||
*/ | |||||
namespace ucd | |||||
{ | |||||
/** @brief Represents a Unicode codepoint. | |||||
*/ | |||||
typedef uint32_t codepoint_t; | |||||
/** @brief Unicode General Category Values | |||||
* @see http://www.unicode.org/reports/tr44/ | |||||
*/ | |||||
enum category | |||||
{ | |||||
// Other | |||||
Cc, /**< @brief Control Character */ | |||||
Cf, /**< @brief Format Control Character */ | |||||
Cn, /**< @brief Unassigned */ | |||||
Co, /**< @brief Private Use */ | |||||
Cs, /**< @brief Surrogate Code Point */ | |||||
// Letter | |||||
Ll, /**< @brief Lower Case Letter */ | |||||
Lm, /**< @brief Letter Modifier */ | |||||
Lo, /**< @brief Other Letter */ | |||||
Lt, /**< @brief Title Case Letter */ | |||||
Lu, /**< @brief Upper Case Letter */ | |||||
// Mark | |||||
Mc, /**< @brief Spacing Mark */ | |||||
Me, /**< @brief Enclosing Mark */ | |||||
Mn, /**< @brief Non-Spacing Mark */ | |||||
// Symbol | |||||
Sc, /**< @brief Currency Symbol */ | |||||
Sk, /**< @brief Modifier Symbol */ | |||||
Sm, /**< @brief Math Symbol */ | |||||
So, /**< @brief Other Symbol */ | |||||
// Number | |||||
Nd, /**< @brief Decimal Digit */ | |||||
Nl, /**< @brief Letter-Like Number */ | |||||
No, /**< @brief Other Number */ | |||||
// Punctuation | |||||
Pc, /**< @brief Connector */ | |||||
Pd, /**< @brief Dash/Hyphen */ | |||||
Pe, /**< @brief Close Punctuation Mark */ | |||||
Pf, /**< @brief Final Quotation Mark */ | |||||
Pi, /**< @brief Initial Quotation Mark */ | |||||
Po, /**< @brief Other */ | |||||
Ps, /**< @brief Open PUnctuation Mark */ | |||||
// Separator | |||||
Zc, /**< @brief Whitespace character in the Cc category */ | |||||
Zl, /**< @brief Line Separator */ | |||||
Zp, /**< @brief Paragraph Separator */ | |||||
Zs, /**< @brief Space Separator */ | |||||
}; | |||||
/** @brief Lookup the General Category for a Unicode codepoint. | |||||
* | |||||
* @param c The Unicode codepoint to lookup. | |||||
* @return The General Category of the Unicode codepoint. | |||||
*/ | |||||
category lookup_category(codepoint_t c); | |||||
} | |||||
#endif |
#!/usr/bin/python | |||||
# Copyright (C) 2012 Reece H. Dunn | |||||
# | |||||
# This file is part of ucd-tools. | |||||
# | |||||
# ucd-tools is free software: you can redistribute it and/or modify | |||||
# it under the terms of the GNU General Public License as published by | |||||
# the Free Software Foundation, either version 3 of the License, or | |||||
# (at your option) any later version. | |||||
# | |||||
# ucd-tools is distributed in the hope that it will be useful, | |||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
# GNU General Public License for more details. | |||||
# | |||||
# You should have received a copy of the GNU General Public License | |||||
# along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
import os | |||||
import sys | |||||
import ucd | |||||
ucd_rootdir = sys.argv[1] | |||||
unicode_data = ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData') | |||||
if __name__ == '__main__': | |||||
sys.stdout.write("""/* Unicode General Categories | |||||
* | |||||
* Copyright (C) 2012 Reece H. Dunn | |||||
* | |||||
* This file is part of ucd-tools. | |||||
* | |||||
* ucd-tools is free software: you can redistribute it and/or modify | |||||
* it under the terms of the GNU General Public License as published by | |||||
* the Free Software Foundation, either version 3 of the License, or | |||||
* (at your option) any later version. | |||||
* | |||||
* ucd-tools is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
* GNU General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU General Public License | |||||
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
*/ | |||||
// NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
// the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
#include "ucd/ucd.h" | |||||
using namespace ucd; | |||||
""") | |||||
sys.stdout.write('\n') | |||||
sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||||
sys.stdout.write('{\n') | |||||
sys.stdout.write('\treturn Cn;\n') | |||||
sys.stdout.write('}\n') |