Browse Source

Infrastructure for building libucd.a.

master
Reece H. Dunn 12 years ago
parent
commit
2813950acc
6 changed files with 324 additions and 0 deletions
  1. 43
    0
      .gitignore
  2. 71
    0
      Makefile.am
  3. 12
    0
      autogen.sh
  4. 38
    0
      configure.ac
  5. 100
    0
      src/include/ucd/ucd.h
  6. 60
    0
      tools/categories.py

+ 43
- 0
.gitignore View File

@@ -0,0 +1,43 @@
.*.swp

# generated files:

src/categories.cpp

# intermediate files:

.deps/
.libs/
.dirstamp

*.pyc
*.o
*.lo

# build output:

src/libucd.la

# autotools output:

INSTALL
NEWS
README
aclocal.m4
autom4te.cache/
config.guess
config.h.in
config.h
config.log
config.status
config.sub
configure
depcomp
install-sh
libtool
ltmain.sh
m4/
missing
Makefile
Makefile.in
stamp-h1

+ 71
- 0
Makefile.am View File

@@ -0,0 +1,71 @@
AUTOMAKE_OPTIONS = subdir-objects

localedir = $(datadir)/locale
xdgdatadir = @XDGDATADIR@
mimedir = $(xdgdatadir)/mime
xmldir = $(mimedir)/packages

AM_LDFLAGS = ${LTLIBINTL}
AM_CXXFLAGS = -Isrc/include -Isrc

ACLOCAL_AMFLAGS = -I m4

bin_PROGRAMS =
lib_LTLIBRARIES =
man1_MANS =
pkgdata_DATA =

noinst_bin_PROGRAMS =
noinst_LIBRARIES =

noinst_bindir =

EXTRA_DIST = config.rpath ChangeLog
CLEANFILES =
SUBDIRS =

############################# libtool #########################################

EXTRA_DIST += config.guess config.sub ltmain.sh

# Increment if the interface has changed and is not backward compatible
CURRENT=0

# Increment if source files have changed
# Reset to 0 if the interface has changed
REVISION=0

# Increment if the interface is backward compatible (superset)
# Reset to 0 if the interface is not backward compatible
AGE=0

LIBUCD_VERSION=$(CURRENT):$(REVISION):$(AGE)

############################# ChangeLog #######################################

ChangeLog:
git log > ChangeLog

dist-hook: ChangeLog

.PHONY: ChangeLog

EXTRA_DIST += ChangeLog

############################# Unicode Character Database ######################

src/categories.cpp: tools/categories.py
tools/categories.py ${UCD_ROOTDIR} > $@

############################# libucd ##########################################

libucd_includedir = $(includedir)/ucd
libucd_include_HEADERS = \
src/include/ucd/ucd.h

lib_LTLIBRARIES += src/libucd.la

src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION)
src_libucd_la_CXXFLAGS = ${AM_CXXFLAGS}
src_libucd_la_SOURCES = \
src/categories.cpp

+ 12
- 0
autogen.sh View File

@@ -0,0 +1,12 @@
#!/bin/sh

mkdir -p m4
ln -sf README.md README
ln -sf docs/ReleaseNotes.md NEWS

aclocal -I m4 || exit 1

libtoolize || exit 1
autoheader || exit 1
automake --add-missing || exit 1
autoconf || exit 1

+ 38
- 0
configure.ac View File

@@ -0,0 +1,38 @@
AC_PREREQ([2.65])
AC_INIT([Unicode Character Database Tools], [0.1], [https://github.com/rhdunn/ucd-tools/issues], [ucd-tools], [https://github.com/rhdunn/ucd-tools])
AM_INIT_AUTOMAKE()

m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES])
AM_SILENT_RULES([yes])

AC_CONFIG_SRCDIR([src])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_HEADERS([config.h])

LT_INIT

dnl ================================================================
dnl Program checks.
dnl ================================================================

AC_PROG_CXX
AC_PROG_MAKE_SET
AC_PROG_LIBTOOL

dnl ================================================================
dnl Generate output.
dnl ================================================================

AC_CONFIG_FILES([Makefile])
AC_OUTPUT

AC_MSG_NOTICE([

Configuration for Unicode Character Data Tools complete.

Source code location: ${srcdir}
XDG data location: ${XDGDATADIR}

Compiler: ${CXX}
Compiler flags: ${CXXFLAGS}
])

+ 100
- 0
src/include/ucd/ucd.h View File

@@ -0,0 +1,100 @@
/* Unicode Character Database API
*
* Copyright (C) 2012 Reece H. Dunn
*
* This file is part of ucd-tools.
*
* ucd-tools is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ucd-tools is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef UNICODE_CHARACTER_DATA_H
#define UNICODE_CHARACTER_DATA_H

#include <stdint.h>

/** @brief Unicode Character Database
*/
namespace ucd
{
/** @brief Represents a Unicode codepoint.
*/
typedef uint32_t codepoint_t;

/** @brief Unicode General Category Values
* @see http://www.unicode.org/reports/tr44/
*/
enum category
{
// Other

Cc, /**< @brief Control Character */
Cf, /**< @brief Format Control Character */
Cn, /**< @brief Unassigned */
Co, /**< @brief Private Use */
Cs, /**< @brief Surrogate Code Point */

// Letter

Ll, /**< @brief Lower Case Letter */
Lm, /**< @brief Letter Modifier */
Lo, /**< @brief Other Letter */
Lt, /**< @brief Title Case Letter */
Lu, /**< @brief Upper Case Letter */

// Mark

Mc, /**< @brief Spacing Mark */
Me, /**< @brief Enclosing Mark */
Mn, /**< @brief Non-Spacing Mark */

// Symbol

Sc, /**< @brief Currency Symbol */
Sk, /**< @brief Modifier Symbol */
Sm, /**< @brief Math Symbol */
So, /**< @brief Other Symbol */

// Number

Nd, /**< @brief Decimal Digit */
Nl, /**< @brief Letter-Like Number */
No, /**< @brief Other Number */

// Punctuation

Pc, /**< @brief Connector */
Pd, /**< @brief Dash/Hyphen */
Pe, /**< @brief Close Punctuation Mark */
Pf, /**< @brief Final Quotation Mark */
Pi, /**< @brief Initial Quotation Mark */
Po, /**< @brief Other */
Ps, /**< @brief Open PUnctuation Mark */

// Separator

Zc, /**< @brief Whitespace character in the Cc category */
Zl, /**< @brief Line Separator */
Zp, /**< @brief Paragraph Separator */
Zs, /**< @brief Space Separator */
};

/** @brief Lookup the General Category for a Unicode codepoint.
*
* @param c The Unicode codepoint to lookup.
* @return The General Category of the Unicode codepoint.
*/
category lookup_category(codepoint_t c);
}

#endif

+ 60
- 0
tools/categories.py View File

@@ -0,0 +1,60 @@
#!/usr/bin/python

# Copyright (C) 2012 Reece H. Dunn
#
# This file is part of ucd-tools.
#
# ucd-tools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ucd-tools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.

import os
import sys
import ucd

ucd_rootdir = sys.argv[1]
unicode_data = ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData')

if __name__ == '__main__':
sys.stdout.write("""/* Unicode General Categories
*
* Copyright (C) 2012 Reece H. Dunn
*
* This file is part of ucd-tools.
*
* ucd-tools is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ucd-tools is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
*/

// NOTE: This file is automatically generated from the UnicodeData.txt file in
// the Unicode Character database by the ucd-tools/tools/categories.py script.

#include "ucd/ucd.h"

using namespace ucd;
""")

sys.stdout.write('\n')
sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n')
sys.stdout.write('{\n')
sys.stdout.write('\treturn Cn;\n')
sys.stdout.write('}\n')

Loading…
Cancel
Save