| @@ -0,0 +1,43 @@ | |||
| .*.swp | |||
| # generated files: | |||
| src/categories.cpp | |||
| # intermediate files: | |||
| .deps/ | |||
| .libs/ | |||
| .dirstamp | |||
| *.pyc | |||
| *.o | |||
| *.lo | |||
| # build output: | |||
| src/libucd.la | |||
| # autotools output: | |||
| INSTALL | |||
| NEWS | |||
| README | |||
| aclocal.m4 | |||
| autom4te.cache/ | |||
| config.guess | |||
| config.h.in | |||
| config.h | |||
| config.log | |||
| config.status | |||
| config.sub | |||
| configure | |||
| depcomp | |||
| install-sh | |||
| libtool | |||
| ltmain.sh | |||
| m4/ | |||
| missing | |||
| Makefile | |||
| Makefile.in | |||
| stamp-h1 | |||
| @@ -0,0 +1,71 @@ | |||
| AUTOMAKE_OPTIONS = subdir-objects | |||
| localedir = $(datadir)/locale | |||
| xdgdatadir = @XDGDATADIR@ | |||
| mimedir = $(xdgdatadir)/mime | |||
| xmldir = $(mimedir)/packages | |||
| AM_LDFLAGS = ${LTLIBINTL} | |||
| AM_CXXFLAGS = -Isrc/include -Isrc | |||
| ACLOCAL_AMFLAGS = -I m4 | |||
| bin_PROGRAMS = | |||
| lib_LTLIBRARIES = | |||
| man1_MANS = | |||
| pkgdata_DATA = | |||
| noinst_bin_PROGRAMS = | |||
| noinst_LIBRARIES = | |||
| noinst_bindir = | |||
| EXTRA_DIST = config.rpath ChangeLog | |||
| CLEANFILES = | |||
| SUBDIRS = | |||
| ############################# libtool ######################################### | |||
| EXTRA_DIST += config.guess config.sub ltmain.sh | |||
| # Increment if the interface has changed and is not backward compatible | |||
| CURRENT=0 | |||
| # Increment if source files have changed | |||
| # Reset to 0 if the interface has changed | |||
| REVISION=0 | |||
| # Increment if the interface is backward compatible (superset) | |||
| # Reset to 0 if the interface is not backward compatible | |||
| AGE=0 | |||
| LIBUCD_VERSION=$(CURRENT):$(REVISION):$(AGE) | |||
| ############################# ChangeLog ####################################### | |||
| ChangeLog: | |||
| git log > ChangeLog | |||
| dist-hook: ChangeLog | |||
| .PHONY: ChangeLog | |||
| EXTRA_DIST += ChangeLog | |||
| ############################# Unicode Character Database ###################### | |||
| src/categories.cpp: tools/categories.py | |||
| tools/categories.py ${UCD_ROOTDIR} > $@ | |||
| ############################# libucd ########################################## | |||
| libucd_includedir = $(includedir)/ucd | |||
| libucd_include_HEADERS = \ | |||
| src/include/ucd/ucd.h | |||
| lib_LTLIBRARIES += src/libucd.la | |||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||
| src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS} | |||
| src_libucd_la_SOURCES = \ | |||
| src/categories.cpp | |||
| @@ -0,0 +1,12 @@ | |||
| #!/bin/sh | |||
| mkdir -p m4 | |||
| ln -sf README.md README | |||
| ln -sf docs/ReleaseNotes.md NEWS | |||
| aclocal -I m4 || exit 1 | |||
| libtoolize || exit 1 | |||
| autoheader || exit 1 | |||
| automake --add-missing || exit 1 | |||
| autoconf || exit 1 | |||
| @@ -0,0 +1,38 @@ | |||
| AC_PREREQ([2.65]) | |||
| AC_INIT([Unicode Character Database Tools], [0.1], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||
| AM_INIT_AUTOMAKE() | |||
| m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | |||
| AM_SILENT_RULES([yes]) | |||
| AC_CONFIG_SRCDIR([src]) | |||
| AC_CONFIG_MACRO_DIR([m4]) | |||
| AC_CONFIG_HEADERS([config.h]) | |||
| LT_INIT | |||
| dnl ================================================================ | |||
| dnl Program checks. | |||
| dnl ================================================================ | |||
| AC_PROG_CXX | |||
| AC_PROG_MAKE_SET | |||
| AC_PROG_LIBTOOL | |||
| dnl ================================================================ | |||
| dnl Generate output. | |||
| dnl ================================================================ | |||
| AC_CONFIG_FILES([Makefile]) | |||
| AC_OUTPUT | |||
| AC_MSG_NOTICE([ | |||
| Configuration for Unicode Character Data Tools complete. | |||
| Source code location: ${srcdir} | |||
| XDG data location: ${XDGDATADIR} | |||
| Compiler: ${CXX} | |||
| Compiler flags: ${CXXFLAGS} | |||
| ]) | |||
| @@ -0,0 +1,100 @@ | |||
| /* Unicode Character Database API | |||
| * | |||
| * Copyright (C) 2012 Reece H. Dunn | |||
| * | |||
| * This file is part of ucd-tools. | |||
| * | |||
| * ucd-tools is free software: you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation, either version 3 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * ucd-tools is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| #ifndef UNICODE_CHARACTER_DATA_H | |||
| #define UNICODE_CHARACTER_DATA_H | |||
| #include <stdint.h> | |||
| /** @brief Unicode Character Database | |||
| */ | |||
| namespace ucd | |||
| { | |||
| /** @brief Represents a Unicode codepoint. | |||
| */ | |||
| typedef uint32_t codepoint_t; | |||
| /** @brief Unicode General Category Values | |||
| * @see http://www.unicode.org/reports/tr44/ | |||
| */ | |||
| enum category | |||
| { | |||
| // Other | |||
| Cc, /**< @brief Control Character */ | |||
| Cf, /**< @brief Format Control Character */ | |||
| Cn, /**< @brief Unassigned */ | |||
| Co, /**< @brief Private Use */ | |||
| Cs, /**< @brief Surrogate Code Point */ | |||
| // Letter | |||
| Ll, /**< @brief Lower Case Letter */ | |||
| Lm, /**< @brief Letter Modifier */ | |||
| Lo, /**< @brief Other Letter */ | |||
| Lt, /**< @brief Title Case Letter */ | |||
| Lu, /**< @brief Upper Case Letter */ | |||
| // Mark | |||
| Mc, /**< @brief Spacing Mark */ | |||
| Me, /**< @brief Enclosing Mark */ | |||
| Mn, /**< @brief Non-Spacing Mark */ | |||
| // Symbol | |||
| Sc, /**< @brief Currency Symbol */ | |||
| Sk, /**< @brief Modifier Symbol */ | |||
| Sm, /**< @brief Math Symbol */ | |||
| So, /**< @brief Other Symbol */ | |||
| // Number | |||
| Nd, /**< @brief Decimal Digit */ | |||
| Nl, /**< @brief Letter-Like Number */ | |||
| No, /**< @brief Other Number */ | |||
| // Punctuation | |||
| Pc, /**< @brief Connector */ | |||
| Pd, /**< @brief Dash/Hyphen */ | |||
| Pe, /**< @brief Close Punctuation Mark */ | |||
| Pf, /**< @brief Final Quotation Mark */ | |||
| Pi, /**< @brief Initial Quotation Mark */ | |||
| Po, /**< @brief Other */ | |||
| Ps, /**< @brief Open PUnctuation Mark */ | |||
| // Separator | |||
| Zc, /**< @brief Whitespace character in the Cc category */ | |||
| Zl, /**< @brief Line Separator */ | |||
| Zp, /**< @brief Paragraph Separator */ | |||
| Zs, /**< @brief Space Separator */ | |||
| }; | |||
| /** @brief Lookup the General Category for a Unicode codepoint. | |||
| * | |||
| * @param c The Unicode codepoint to lookup. | |||
| * @return The General Category of the Unicode codepoint. | |||
| */ | |||
| category lookup_category(codepoint_t c); | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,60 @@ | |||
| #!/usr/bin/python | |||
| # Copyright (C) 2012 Reece H. Dunn | |||
| # | |||
| # This file is part of ucd-tools. | |||
| # | |||
| # ucd-tools is free software: you can redistribute it and/or modify | |||
| # it under the terms of the GNU General Public License as published by | |||
| # the Free Software Foundation, either version 3 of the License, or | |||
| # (at your option) any later version. | |||
| # | |||
| # ucd-tools is distributed in the hope that it will be useful, | |||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| # GNU General Public License for more details. | |||
| # | |||
| # You should have received a copy of the GNU General Public License | |||
| # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| import os | |||
| import sys | |||
| import ucd | |||
| ucd_rootdir = sys.argv[1] | |||
| unicode_data = ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData') | |||
| if __name__ == '__main__': | |||
| sys.stdout.write("""/* Unicode General Categories | |||
| * | |||
| * Copyright (C) 2012 Reece H. Dunn | |||
| * | |||
| * This file is part of ucd-tools. | |||
| * | |||
| * ucd-tools is free software: you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation, either version 3 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * ucd-tools is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||
| #include "ucd/ucd.h" | |||
| using namespace ucd; | |||
| """) | |||
| sys.stdout.write('\n') | |||
| sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||
| sys.stdout.write('{\n') | |||
| sys.stdout.write('\treturn Cn;\n') | |||
| sys.stdout.write('}\n') | |||