| .*.swp | |||||
| # generated files: | |||||
| src/categories.cpp | |||||
| # intermediate files: | |||||
| .deps/ | |||||
| .libs/ | |||||
| .dirstamp | |||||
| *.pyc | |||||
| *.o | |||||
| *.lo | |||||
| # build output: | |||||
| src/libucd.la | |||||
| # autotools output: | |||||
| INSTALL | |||||
| NEWS | |||||
| README | |||||
| aclocal.m4 | |||||
| autom4te.cache/ | |||||
| config.guess | |||||
| config.h.in | |||||
| config.h | |||||
| config.log | |||||
| config.status | |||||
| config.sub | |||||
| configure | |||||
| depcomp | |||||
| install-sh | |||||
| libtool | |||||
| ltmain.sh | |||||
| m4/ | |||||
| missing | |||||
| Makefile | |||||
| Makefile.in | |||||
| stamp-h1 |
| AUTOMAKE_OPTIONS = subdir-objects | |||||
| localedir = $(datadir)/locale | |||||
| xdgdatadir = @XDGDATADIR@ | |||||
| mimedir = $(xdgdatadir)/mime | |||||
| xmldir = $(mimedir)/packages | |||||
| AM_LDFLAGS = ${LTLIBINTL} | |||||
| AM_CXXFLAGS = -Isrc/include -Isrc | |||||
| ACLOCAL_AMFLAGS = -I m4 | |||||
| bin_PROGRAMS = | |||||
| lib_LTLIBRARIES = | |||||
| man1_MANS = | |||||
| pkgdata_DATA = | |||||
| noinst_bin_PROGRAMS = | |||||
| noinst_LIBRARIES = | |||||
| noinst_bindir = | |||||
| EXTRA_DIST = config.rpath ChangeLog | |||||
| CLEANFILES = | |||||
| SUBDIRS = | |||||
| ############################# libtool ######################################### | |||||
| EXTRA_DIST += config.guess config.sub ltmain.sh | |||||
| # Increment if the interface has changed and is not backward compatible | |||||
| CURRENT=0 | |||||
| # Increment if source files have changed | |||||
| # Reset to 0 if the interface has changed | |||||
| REVISION=0 | |||||
| # Increment if the interface is backward compatible (superset) | |||||
| # Reset to 0 if the interface is not backward compatible | |||||
| AGE=0 | |||||
| LIBUCD_VERSION=$(CURRENT):$(REVISION):$(AGE) | |||||
| ############################# ChangeLog ####################################### | |||||
| ChangeLog: | |||||
| git log > ChangeLog | |||||
| dist-hook: ChangeLog | |||||
| .PHONY: ChangeLog | |||||
| EXTRA_DIST += ChangeLog | |||||
| ############################# Unicode Character Database ###################### | |||||
| src/categories.cpp: tools/categories.py | |||||
| tools/categories.py ${UCD_ROOTDIR} > $@ | |||||
| ############################# libucd ########################################## | |||||
| libucd_includedir = $(includedir)/ucd | |||||
| libucd_include_HEADERS = \ | |||||
| src/include/ucd/ucd.h | |||||
| lib_LTLIBRARIES += src/libucd.la | |||||
| src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) | |||||
| src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS} | |||||
| src_libucd_la_SOURCES = \ | |||||
| src/categories.cpp |
| #!/bin/sh | |||||
| mkdir -p m4 | |||||
| ln -sf README.md README | |||||
| ln -sf docs/ReleaseNotes.md NEWS | |||||
| aclocal -I m4 || exit 1 | |||||
| libtoolize || exit 1 | |||||
| autoheader || exit 1 | |||||
| automake --add-missing || exit 1 | |||||
| autoconf || exit 1 |
| AC_PREREQ([2.65]) | |||||
| AC_INIT([Unicode Character Database Tools], [0.1], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools]) | |||||
| AM_INIT_AUTOMAKE() | |||||
| m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES]) | |||||
| AM_SILENT_RULES([yes]) | |||||
| AC_CONFIG_SRCDIR([src]) | |||||
| AC_CONFIG_MACRO_DIR([m4]) | |||||
| AC_CONFIG_HEADERS([config.h]) | |||||
| LT_INIT | |||||
| dnl ================================================================ | |||||
| dnl Program checks. | |||||
| dnl ================================================================ | |||||
| AC_PROG_CXX | |||||
| AC_PROG_MAKE_SET | |||||
| AC_PROG_LIBTOOL | |||||
| dnl ================================================================ | |||||
| dnl Generate output. | |||||
| dnl ================================================================ | |||||
| AC_CONFIG_FILES([Makefile]) | |||||
| AC_OUTPUT | |||||
| AC_MSG_NOTICE([ | |||||
| Configuration for Unicode Character Data Tools complete. | |||||
| Source code location: ${srcdir} | |||||
| XDG data location: ${XDGDATADIR} | |||||
| Compiler: ${CXX} | |||||
| Compiler flags: ${CXXFLAGS} | |||||
| ]) |
| /* Unicode Character Database API | |||||
| * | |||||
| * Copyright (C) 2012 Reece H. Dunn | |||||
| * | |||||
| * This file is part of ucd-tools. | |||||
| * | |||||
| * ucd-tools is free software: you can redistribute it and/or modify | |||||
| * it under the terms of the GNU General Public License as published by | |||||
| * the Free Software Foundation, either version 3 of the License, or | |||||
| * (at your option) any later version. | |||||
| * | |||||
| * ucd-tools is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
| * GNU General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU General Public License | |||||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
| */ | |||||
| #ifndef UNICODE_CHARACTER_DATA_H | |||||
| #define UNICODE_CHARACTER_DATA_H | |||||
| #include <stdint.h> | |||||
| /** @brief Unicode Character Database | |||||
| */ | |||||
| namespace ucd | |||||
| { | |||||
| /** @brief Represents a Unicode codepoint. | |||||
| */ | |||||
| typedef uint32_t codepoint_t; | |||||
| /** @brief Unicode General Category Values | |||||
| * @see http://www.unicode.org/reports/tr44/ | |||||
| */ | |||||
| enum category | |||||
| { | |||||
| // Other | |||||
| Cc, /**< @brief Control Character */ | |||||
| Cf, /**< @brief Format Control Character */ | |||||
| Cn, /**< @brief Unassigned */ | |||||
| Co, /**< @brief Private Use */ | |||||
| Cs, /**< @brief Surrogate Code Point */ | |||||
| // Letter | |||||
| Ll, /**< @brief Lower Case Letter */ | |||||
| Lm, /**< @brief Letter Modifier */ | |||||
| Lo, /**< @brief Other Letter */ | |||||
| Lt, /**< @brief Title Case Letter */ | |||||
| Lu, /**< @brief Upper Case Letter */ | |||||
| // Mark | |||||
| Mc, /**< @brief Spacing Mark */ | |||||
| Me, /**< @brief Enclosing Mark */ | |||||
| Mn, /**< @brief Non-Spacing Mark */ | |||||
| // Symbol | |||||
| Sc, /**< @brief Currency Symbol */ | |||||
| Sk, /**< @brief Modifier Symbol */ | |||||
| Sm, /**< @brief Math Symbol */ | |||||
| So, /**< @brief Other Symbol */ | |||||
| // Number | |||||
| Nd, /**< @brief Decimal Digit */ | |||||
| Nl, /**< @brief Letter-Like Number */ | |||||
| No, /**< @brief Other Number */ | |||||
| // Punctuation | |||||
| Pc, /**< @brief Connector */ | |||||
| Pd, /**< @brief Dash/Hyphen */ | |||||
| Pe, /**< @brief Close Punctuation Mark */ | |||||
| Pf, /**< @brief Final Quotation Mark */ | |||||
| Pi, /**< @brief Initial Quotation Mark */ | |||||
| Po, /**< @brief Other */ | |||||
| Ps, /**< @brief Open PUnctuation Mark */ | |||||
| // Separator | |||||
| Zc, /**< @brief Whitespace character in the Cc category */ | |||||
| Zl, /**< @brief Line Separator */ | |||||
| Zp, /**< @brief Paragraph Separator */ | |||||
| Zs, /**< @brief Space Separator */ | |||||
| }; | |||||
| /** @brief Lookup the General Category for a Unicode codepoint. | |||||
| * | |||||
| * @param c The Unicode codepoint to lookup. | |||||
| * @return The General Category of the Unicode codepoint. | |||||
| */ | |||||
| category lookup_category(codepoint_t c); | |||||
| } | |||||
| #endif |
| #!/usr/bin/python | |||||
| # Copyright (C) 2012 Reece H. Dunn | |||||
| # | |||||
| # This file is part of ucd-tools. | |||||
| # | |||||
| # ucd-tools is free software: you can redistribute it and/or modify | |||||
| # it under the terms of the GNU General Public License as published by | |||||
| # the Free Software Foundation, either version 3 of the License, or | |||||
| # (at your option) any later version. | |||||
| # | |||||
| # ucd-tools is distributed in the hope that it will be useful, | |||||
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
| # GNU General Public License for more details. | |||||
| # | |||||
| # You should have received a copy of the GNU General Public License | |||||
| # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
| import os | |||||
| import sys | |||||
| import ucd | |||||
| ucd_rootdir = sys.argv[1] | |||||
| unicode_data = ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData') | |||||
| if __name__ == '__main__': | |||||
| sys.stdout.write("""/* Unicode General Categories | |||||
| * | |||||
| * Copyright (C) 2012 Reece H. Dunn | |||||
| * | |||||
| * This file is part of ucd-tools. | |||||
| * | |||||
| * ucd-tools is free software: you can redistribute it and/or modify | |||||
| * it under the terms of the GNU General Public License as published by | |||||
| * the Free Software Foundation, either version 3 of the License, or | |||||
| * (at your option) any later version. | |||||
| * | |||||
| * ucd-tools is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||||
| * GNU General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU General Public License | |||||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||||
| */ | |||||
| // NOTE: This file is automatically generated from the UnicodeData.txt file in | |||||
| // the Unicode Character database by the ucd-tools/tools/categories.py script. | |||||
| #include "ucd/ucd.h" | |||||
| using namespace ucd; | |||||
| """) | |||||
| sys.stdout.write('\n') | |||||
| sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n') | |||||
| sys.stdout.write('{\n') | |||||
| sys.stdout.write('\treturn Cn;\n') | |||||
| sys.stdout.write('}\n') |