9 years ago · 454038dbfa
--- a/src/case.cpp
+++ b/src/case.cpp
@@ -1,6 +1,6 @@
 /* Unicode Case Conversion
 *
 * Copyright (C) 2012 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -2511,6 +2511,11 @@ static const case_conversion_entry case_conversion_data[] =
 	{ 0x0118DF, 0x0118BF, 0x000000, 0x0118BF },
 };

 codepoint_t ucd_toupper(codepoint_t c)
 {
 	return ucd::toupper(c);
 }

 ucd::codepoint_t ucd::toupper(codepoint_t c)
 {
 	int begin = 0;
@@ -2529,6 +2534,11 @@ ucd::codepoint_t ucd::toupper(codepoint_t c)
 	return c;
 }

 codepoint_t ucd_tolower(codepoint_t c)
 {
 	return ucd::tolower(c);
 }

 ucd::codepoint_t ucd::tolower(codepoint_t c)
 {
 	int begin = 0;
@@ -2547,6 +2557,11 @@ ucd::codepoint_t ucd::tolower(codepoint_t c)
 	return c;
 }

 codepoint_t ucd_totitle(codepoint_t c)
 {
 	return ucd::totitle(c);
 }

 ucd::codepoint_t ucd::totitle(codepoint_t c)
 {
 	int begin = 0;
--- a/src/categories.cpp
+++ b/src/categories.cpp
@@ -1,6 +1,6 @@
 /* Unicode General Categories
 *
 * Copyright (C) 2012 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -3314,6 +3314,11 @@ static const uint8_t *categories_0E0000_0E01FF[] =
 	categories_0E0100,
 };

 ucd_category ucd_lookup_category(codepoint_t c)
 {
 	return (ucd_category)ucd::lookup_category((ucd::category)c);
 }

 ucd::category ucd::lookup_category(codepoint_t c)
 {
 	if (c <= 0x00D7FF) // 000000..00D7FF
@@ -3342,6 +3347,11 @@ ucd::category ucd::lookup_category(codepoint_t c)
 	return Ii; // Invalid Unicode Codepoint
 }

 ucd_category_group ucd_get_category_group_for_category(ucd_category c)
 {
 	return (ucd_category_group)ucd::lookup_category_group((ucd::category)c);
 }

 ucd::category_group ucd::lookup_category_group(category c)
 {
 	switch (c)
@@ -3365,6 +3375,11 @@ ucd::category_group ucd::lookup_category_group(category c)
 	}
 }

 ucd_category_group ucd_lookup_category_group(codepoint_t c)
 {
 	return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c));
 }

 ucd::category_group ucd::lookup_category_group(codepoint_t c)
 {
 	return lookup_category_group(lookup_category(c));
--- a/src/ctype.cpp
+++ b/src/ctype.cpp
@@ -1,6 +1,6 @@
 /* ctype-style APIs.
 *
 * Copyright (C) 2012-2013 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -20,6 +20,11 @@

 #include "ucd/ucd.h"

 int ucd_isalnum(codepoint_t c)
 {
 	return ucd::isalnum(c);
 }

 int ucd::isalnum(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -32,6 +37,11 @@ int ucd::isalnum(codepoint_t c)
 	}
 }

 int ucd_isalpha(codepoint_t c)
 {
 	return ucd::isalpha(c);
 }

 int ucd::isalpha(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -43,11 +53,21 @@ int ucd::isalpha(codepoint_t c)
 	}
 }

 int ucd_iscntrl(codepoint_t c)
 {
 	return ucd::iscntrl(c);
 }

 int ucd::iscntrl(codepoint_t c)
 {
 	return lookup_category(c) == Cc;
 }

 int ucd_isdigit(codepoint_t c)
 {
 	return ucd::isdigit(c);
 }

 int ucd::isdigit(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -59,6 +79,11 @@ int ucd::isdigit(codepoint_t c)
 	}
 }

 int ucd_isgraph(codepoint_t c)
 {
 	return ucd::isgraph(c);
 }

 int ucd::isgraph(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -72,11 +97,21 @@ int ucd::isgraph(codepoint_t c)
 	}
 }

 int ucd_islower(codepoint_t c)
 {
 	return ucd::islower(c);
 }

 int ucd::islower(codepoint_t c)
 {
 	return lookup_category(c) == Ll;
 }

 int ucd_isprint(codepoint_t c)
 {
 	return ucd::isprint(c);
 }

 int ucd::isprint(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -89,6 +124,11 @@ int ucd::isprint(codepoint_t c)
 	}
 }

 int ucd_ispunct(codepoint_t c)
 {
 	return ucd::ispunct(c);
 }

 int ucd::ispunct(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -100,6 +140,11 @@ int ucd::ispunct(codepoint_t c)
 	}
 }

 int ucd_isspace(codepoint_t c)
 {
 	return ucd::isspace(c);
 }

 int ucd::isspace(codepoint_t c)
 {
 	switch (lookup_category(c))
@@ -122,6 +167,11 @@ int ucd::isspace(codepoint_t c)
 	}
 }

 int ucd_isupper(codepoint_t c)
 {
 	return ucd::isupper(c);
 }

 int ucd::isupper(codepoint_t c)
 {
 	return lookup_category(c) == Lu;
--- a/src/include/ucd/ucd.h
+++ b/src/include/ucd/ucd.h
@@ -1,6 +1,6 @@
 /* Unicode Character Database API
 *
 * Copyright (C) 2012-2014 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -23,27 +23,433 @@

 #include <stdint.h>

 #ifdef __cplusplus
 extern "C"
 {
 #endif

 /** @brief Represents a Unicode codepoint.
  */
 typedef uint32_t codepoint_t;

 /** @brief Unicode General Category Groups
  * @see   http://www.unicode.org/reports/tr44/
  */
 typedef enum ucd_category_group_
 {
 	UCD_CATEGORY_GROUP_C, /**< @brief Other */
 	UCD_CATEGORY_GROUP_I, /**< @brief Invalid */
 	UCD_CATEGORY_GROUP_L, /**< @brief Letter */
 	UCD_CATEGORY_GROUP_M, /**< @brief Mark */
 	UCD_CATEGORY_GROUP_N, /**< @brief Number */
 	UCD_CATEGORY_GROUP_P, /**< @brief Punctuation */
 	UCD_CATEGORY_GROUP_S, /**< @brief Symbol */
 	UCD_CATEGORY_GROUP_Z, /**< @brief Separator */
 } ucd_category_group;

 /** @brief Get a string representation of the category_group enumeration value.
  *
  * @param c The value to get the string representation for.
  *
  * @return The string representation, or "-" if the value is not recognized.
  */
 const char *ucd_get_category_group_string(ucd_category_group c);

 /** @brief Unicode General Category Values
  * @see   http://www.unicode.org/reports/tr44/
  */
 typedef enum ucd_category_
 {
 	UCD_CATEGORY_Cc, /**< @brief Control Character */
 	UCD_CATEGORY_Cf, /**< @brief Format Control Character */
 	UCD_CATEGORY_Cn, /**< @brief Unassigned */
 	UCD_CATEGORY_Co, /**< @brief Private Use */
 	UCD_CATEGORY_Cs, /**< @brief Surrogate Code Point */

 	UCD_CATEGORY_Ii, /**< @brief Invalid Unicode Codepoint */

 	UCD_CATEGORY_Ll, /**< @brief Lower Case Letter */
 	UCD_CATEGORY_Lm, /**< @brief Letter Modifier */
 	UCD_CATEGORY_Lo, /**< @brief Other Letter */
 	UCD_CATEGORY_Lt, /**< @brief Title Case Letter */
 	UCD_CATEGORY_Lu, /**< @brief Upper Case Letter */

 	UCD_CATEGORY_Mc, /**< @brief Spacing Mark */
 	UCD_CATEGORY_Me, /**< @brief Enclosing Mark */
 	UCD_CATEGORY_Mn, /**< @brief Non-Spacing Mark */

 	UCD_CATEGORY_Nd, /**< @brief Decimal Digit */
 	UCD_CATEGORY_Nl, /**< @brief Letter-Like Number */
 	UCD_CATEGORY_No, /**< @brief Other Number */

 	UCD_CATEGORY_Pc, /**< @brief Connector */
 	UCD_CATEGORY_Pd, /**< @brief Dash/Hyphen */
 	UCD_CATEGORY_Pe, /**< @brief Close Punctuation Mark */
 	UCD_CATEGORY_Pf, /**< @brief Final Quotation Mark */
 	UCD_CATEGORY_Pi, /**< @brief Initial Quotation Mark */
 	UCD_CATEGORY_Po, /**< @brief Other */
 	UCD_CATEGORY_Ps, /**< @brief Open Punctuation Mark */

 	UCD_CATEGORY_Sc, /**< @brief Currency Symbol */
 	UCD_CATEGORY_Sk, /**< @brief Modifier Symbol */
 	UCD_CATEGORY_Sm, /**< @brief Math Symbol */
 	UCD_CATEGORY_So, /**< @brief Other Symbol */

 	UCD_CATEGORY_Zl, /**< @brief Line Separator */
 	UCD_CATEGORY_Zp, /**< @brief Paragraph Separator */
 	UCD_CATEGORY_Zs, /**< @brief Space Separator */
 } ucd_category;

 /** @brief Get a string representation of the category enumeration value.
  *
  * @param c The value to get the string representation for.
  *
  * @return The string representation, or "--" if the value is not recognized.
  */
 const char *ucd_get_category_string(ucd_category c);

 /** @brief Lookup the General Category Group for a General Category.
  *
  * @param c The General Category to lookup.
  * @return  The General Category Group of the General Category.
  */
 ucd_category_group ucd_get_category_group_for_category(ucd_category c);

 /** @brief Lookup the General Category Group for a Unicode codepoint.
  *
  * @param c The Unicode codepoint to lookup.
  * @return  The General Category Group of the Unicode codepoint.
  */
 ucd_category_group ucd_lookup_category_group(codepoint_t c);

 /** @brief Lookup the General Category for a Unicode codepoint.
  *
  * @param c The Unicode codepoint to lookup.
  * @return  The General Category of the Unicode codepoint.
  */
 ucd_category ucd_lookup_category(codepoint_t c);

 /** @brief Unicode Script
  * @see   http://www.iana.org/assignments/language-subtag-registry
  * @see   http://www.unicode.org/iso15924/iso15924-codes.html
  */
 typedef enum ucd_script_
 {
 	UCD_SCRIPT_Afak, /**< @brief Afaka Script */
 	UCD_SCRIPT_Aghb, /**< @brief Caucasian Albanian Script */
 	UCD_SCRIPT_Ahom, /**< @brief Tai Ahom Script */
 	UCD_SCRIPT_Arab, /**< @brief Arabic Script */
 	UCD_SCRIPT_Armi, /**< @brief Imperial Aramaic Script */
 	UCD_SCRIPT_Armn, /**< @brief Armenian Script */
 	UCD_SCRIPT_Avst, /**< @brief Avestan Script */
 	UCD_SCRIPT_Bali, /**< @brief Balinese Script */
 	UCD_SCRIPT_Bamu, /**< @brief Bamum Script */
 	UCD_SCRIPT_Bass, /**< @brief Bassa Vah Script */
 	UCD_SCRIPT_Batk, /**< @brief Batak Script */
 	UCD_SCRIPT_Beng, /**< @brief Bengali Script */
 	UCD_SCRIPT_Blis, /**< @brief Blissymbols Script */
 	UCD_SCRIPT_Bopo, /**< @brief Bopomofo Script */
 	UCD_SCRIPT_Brah, /**< @brief Brahmi Script */
 	UCD_SCRIPT_Brai, /**< @brief Braille Script */
 	UCD_SCRIPT_Bugi, /**< @brief Buginese Script */
 	UCD_SCRIPT_Buhd, /**< @brief Buhid Script */
 	UCD_SCRIPT_Cakm, /**< @brief Chakma Script */
 	UCD_SCRIPT_Cans, /**< @brief Unified Canadian Aboriginal Syllabics */
 	UCD_SCRIPT_Cari, /**< @brief Carian Script */
 	UCD_SCRIPT_Cham, /**< @brief Cham Script */
 	UCD_SCRIPT_Cher, /**< @brief Cherokee Script */
 	UCD_SCRIPT_Cirt, /**< @brief Cirth Script */
 	UCD_SCRIPT_Copt, /**< @brief Coptic Script */
 	UCD_SCRIPT_Cprt, /**< @brief Cypriot Script */
 	UCD_SCRIPT_Cyrl, /**< @brief Cyrillic Script */
 	UCD_SCRIPT_Cyrs, /**< @brief Cyrillic (Old Church Slavonic variant) Script */
 	UCD_SCRIPT_Deva, /**< @brief Devanagari Script */
 	UCD_SCRIPT_Dsrt, /**< @brief Deseret Script */
 	UCD_SCRIPT_Dupl, /**< @brief Duployan Shorthand Script */
 	UCD_SCRIPT_Egyd, /**< @brief Egyptian Demotic Script */
 	UCD_SCRIPT_Egyh, /**< @brief Egyptian Hieratic Script */
 	UCD_SCRIPT_Egyp, /**< @brief Egyptian Hiegoglyphs */
 	UCD_SCRIPT_Elba, /**< @brief Elbasan Script */
 	UCD_SCRIPT_Ethi, /**< @brief Ethiopic Script */
 	UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
 	UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
 	UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
 	UCD_SCRIPT_Goth, /**< @brief Gothic Script */
 	UCD_SCRIPT_Gran, /**< @brief Grantha Script */
 	UCD_SCRIPT_Grek, /**< @brief Greek Script */
 	UCD_SCRIPT_Gujr, /**< @brief Gujarati Script */
 	UCD_SCRIPT_Guru, /**< @brief Gurmukhi Script */
 	UCD_SCRIPT_Hang, /**< @brief Hangul Script */
 	UCD_SCRIPT_Hani, /**< @brief Han (Hanzi, Kanji, Hanja) Script */
 	UCD_SCRIPT_Hano, /**< @brief Hanunoo Script */
 	UCD_SCRIPT_Hans, /**< @brief Han (Simplified) Script */
 	UCD_SCRIPT_Hant, /**< @brief Han (Traditional) Script */
 	UCD_SCRIPT_Hatr, /**< @brief Hatran Script */
 	UCD_SCRIPT_Hebr, /**< @brief Hebrew Script */
 	UCD_SCRIPT_Hira, /**< @brief Hiragana Script */
 	UCD_SCRIPT_Hluw, /**< @brief Anatolian Hieroglyphs */
 	UCD_SCRIPT_Hmng, /**< @brief Pahawh Hmong Script */
 	UCD_SCRIPT_Hrkt, /**< @brief Japanese Syllabaries */
 	UCD_SCRIPT_Hung, /**< @brief Old Hungarian Script */
 	UCD_SCRIPT_Inds, /**< @brief Indus Script */
 	UCD_SCRIPT_Ital, /**< @brief Old Italic Script */
 	UCD_SCRIPT_Java, /**< @brief Javanese Script */
 	UCD_SCRIPT_Jpan, /**< @brief Japanese (Han + Hiragana + Katakana) Scripts */
 	UCD_SCRIPT_Jurc, /**< @brief Jurchen Script */
 	UCD_SCRIPT_Kali, /**< @brief Kayah Li Script */
 	UCD_SCRIPT_Kana, /**< @brief Katakana Script */
 	UCD_SCRIPT_Khar, /**< @brief Kharoshthi Script */
 	UCD_SCRIPT_Khmr, /**< @brief Khmer Script */
 	UCD_SCRIPT_Khoj, /**< @brief Khojki Script */
 	UCD_SCRIPT_Knda, /**< @brief Kannada Script */
 	UCD_SCRIPT_Kore, /**< @brief Korean (Hangul + Han) Scripts */
 	UCD_SCRIPT_Kpel, /**< @brief Kpelle Script */
 	UCD_SCRIPT_Kthi, /**< @brief Kaithi Script */
 	UCD_SCRIPT_Lana, /**< @brief Tai Tham Script */
 	UCD_SCRIPT_Laoo, /**< @brief Lao Script */
 	UCD_SCRIPT_Latf, /**< @brief Latin Script (Fractur Variant) */
 	UCD_SCRIPT_Latg, /**< @brief Latin Script (Gaelic Variant) */
 	UCD_SCRIPT_Latn, /**< @brief Latin Script */
 	UCD_SCRIPT_Lepc, /**< @brief Lepcha Script */
 	UCD_SCRIPT_Limb, /**< @brief Limbu Script */
 	UCD_SCRIPT_Lina, /**< @brief Linear A Script */
 	UCD_SCRIPT_Linb, /**< @brief Linear B Script */
 	UCD_SCRIPT_Lisu, /**< @brief Lisu Script */
 	UCD_SCRIPT_Loma, /**< @brief Loma Script */
 	UCD_SCRIPT_Lyci, /**< @brief Lycian Script */
 	UCD_SCRIPT_Lydi, /**< @brief Lydian Script */
 	UCD_SCRIPT_Mahj, /**< @brief Mahajani Script */
 	UCD_SCRIPT_Mand, /**< @brief Mandaic Script */
 	UCD_SCRIPT_Mani, /**< @brief Manichaean Script */
 	UCD_SCRIPT_Maya, /**< @brief Mayan Hieroglyphs */
 	UCD_SCRIPT_Mend, /**< @brief Mende Kikakui Script */
 	UCD_SCRIPT_Merc, /**< @brief Meroitic Cursive Script */
 	UCD_SCRIPT_Mero, /**< @brief Meroitic Hieroglyphs */
 	UCD_SCRIPT_Mlym, /**< @brief Malayalam Script */
 	UCD_SCRIPT_Modi, /**< @brief Modi Script */
 	UCD_SCRIPT_Mong, /**< @brief Mongolian Script */
 	UCD_SCRIPT_Moon, /**< @brief Moon Script */
 	UCD_SCRIPT_Mroo, /**< @brief Mro Script */
 	UCD_SCRIPT_Mtei, /**< @brief Meitei Mayek Script */
 	UCD_SCRIPT_Mult, /**< @brief Multani Script */
 	UCD_SCRIPT_Mymr, /**< @brief Myanmar (Burmese) Script */
 	UCD_SCRIPT_Narb, /**< @brief Old North Arabian Script */
 	UCD_SCRIPT_Nbat, /**< @brief Nabataean Script */
 	UCD_SCRIPT_Nkgb, /**< @brief Nakhi Geba Script */
 	UCD_SCRIPT_Nkoo, /**< @brief N'Ko Script */
 	UCD_SCRIPT_Nshu, /**< @brief Nushu Script */
 	UCD_SCRIPT_Ogam, /**< @brief Ogham Script */
 	UCD_SCRIPT_Olck, /**< @brief Ol Chiki Script */
 	UCD_SCRIPT_Orkh, /**< @brief Old Turkic Script */
 	UCD_SCRIPT_Orya, /**< @brief Oriya Script */
 	UCD_SCRIPT_Osma, /**< @brief Osmanya Script */
 	UCD_SCRIPT_Palm, /**< @brief Palmyrene Script */
 	UCD_SCRIPT_Pauc, /**< @brief Pau Cin Hau Script */
 	UCD_SCRIPT_Perm, /**< @brief Old Permic */
 	UCD_SCRIPT_Phag, /**< @brief Phags-Pa Script */
 	UCD_SCRIPT_Phli, /**< @brief Inscriptional Pahlavi Script */
 	UCD_SCRIPT_Phlp, /**< @brief Psalter Pahlavi Script */
 	UCD_SCRIPT_Phlv, /**< @brief Book Pahlavi Script */
 	UCD_SCRIPT_Phnx, /**< @brief Phoenician Script */
 	UCD_SCRIPT_Plrd, /**< @brief Miao Script */
 	UCD_SCRIPT_Prti, /**< @brief Inscriptional Parthian Script */
 	UCD_SCRIPT_Qaak, /**< @brief Klingon Script (Private Use) */
 	UCD_SCRIPT_Rjng, /**< @brief Rejang Script */
 	UCD_SCRIPT_Roro, /**< @brief Rongorongo Script */
 	UCD_SCRIPT_Runr, /**< @brief Runic Script */
 	UCD_SCRIPT_Samr, /**< @brief Samaritan Script */
 	UCD_SCRIPT_Sara, /**< @brief Sarati Script */
 	UCD_SCRIPT_Sarb, /**< @brief Old South Arabian Script */
 	UCD_SCRIPT_Saur, /**< @brief Saurashtra Script */
 	UCD_SCRIPT_Sgnw, /**< @brief Sign Writing */
 	UCD_SCRIPT_Shaw, /**< @brief Shavian Script */
 	UCD_SCRIPT_Shrd, /**< @brief Sharada Script */
 	UCD_SCRIPT_Sidd, /**< @brief Siddham Script */
 	UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
 	UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
 	UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
 	UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
 	UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
 	UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
 	UCD_SCRIPT_Syre, /**< @brief Syriac Script (Estrangelo Variant) */
 	UCD_SCRIPT_Syrj, /**< @brief Syriac Script (Western Variant) */
 	UCD_SCRIPT_Syrn, /**< @brief Syriac Script (Eastern Variant) */
 	UCD_SCRIPT_Tagb, /**< @brief Tagbanwa Script */
 	UCD_SCRIPT_Takr, /**< @brief Takri Script */
 	UCD_SCRIPT_Tale, /**< @brief Tai Le Script */
 	UCD_SCRIPT_Talu, /**< @brief New Tai Lue Script */
 	UCD_SCRIPT_Taml, /**< @brief Tamil Script */
 	UCD_SCRIPT_Tang, /**< @brief Tangut Script */
 	UCD_SCRIPT_Tavt, /**< @brief Tai Viet Script */
 	UCD_SCRIPT_Telu, /**< @brief Telugu Script */
 	UCD_SCRIPT_Teng, /**< @brief Tengwar Script */
 	UCD_SCRIPT_Tfng, /**< @brief Tifinagh Script */
 	UCD_SCRIPT_Tglg, /**< @brief Tagalog Script */
 	UCD_SCRIPT_Thaa, /**< @brief Thaana Script */
 	UCD_SCRIPT_Thai, /**< @brief Thai Script */
 	UCD_SCRIPT_Tibt, /**< @brief Tibetan Script */
 	UCD_SCRIPT_Tirh, /**< @brief Tirhuta Script */
 	UCD_SCRIPT_Ugar, /**< @brief Ugaritic Script */
 	UCD_SCRIPT_Vaii, /**< @brief Vai Script */
 	UCD_SCRIPT_Visp, /**< @brief Visible Speech Script */
 	UCD_SCRIPT_Wara, /**< @brief Warang Citi Script */
 	UCD_SCRIPT_Wole, /**< @brief Woleai Script */
 	UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
 	UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
 	UCD_SCRIPT_Yiii, /**< @brief Yi Script */
 	UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
 	UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
 	UCD_SCRIPT_Zsym, /**< @brief Symbols */
 	UCD_SCRIPT_Zxxx, /**< @brief Unwritten Documents */
 	UCD_SCRIPT_Zyyy, /**< @brief Undetermined Script */
 	UCD_SCRIPT_Zzzz, /**< @brief Uncoded Script */
 } ucd_script;

 /** @brief Get a string representation of the script enumeration value.
  *
  * @param s The value to get the string representation for.
  *
  * @return The string representation, or "----" if the value is not recognized.
  */
 const char *ucd_get_script_string(ucd_script s);

 /** @brief Lookup the Script for a Unicode codepoint.
  *
  * @param c The Unicode codepoint to lookup.
  * @return  The Script of the Unicode codepoint.
  */
 ucd_script ucd_lookup_script(codepoint_t c);

 /** @brief Is the codepoint an alpha-numeric character?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a letter or number, zero otherwise.
  */
 int ucd_isalnum(codepoint_t c);

 /** @brief Is the codepoint a letter?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a letter, zero otherwise.
  */
 int ucd_isalpha(codepoint_t c);

 /** @brief Is the codepoint a control character?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a control character, zero otherwise.
  */
 int ucd_iscntrl(codepoint_t c);

 /** @brief Is the codepoint a numeric character?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a number, zero otherwise.
  */
 int ucd_isdigit(codepoint_t c);

 /** @brief Does the codepoint have a displayable glyph?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint has a displayable glyph, zero otherwise.
  */
 int ucd_isgraph(codepoint_t c);

 /** @brief Is the codepoint a lower-case letter?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a lower-case letter, zero otherwise.
  */
 int ucd_islower(codepoint_t c);

 /** @brief Is the codepoint a printable character?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a printable character, zero otherwise.
  */
 int ucd_isprint(codepoint_t c);

 /** @brief Is the codepoint a punctuation character?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a punctuation character, zero otherwise.
  */
 int ucd_ispunct(codepoint_t c);

 /** @brief Is the codepoint a whitespace character?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is a whitespace character, zero otherwise.
  */
 int ucd_isspace(codepoint_t c);

 /** @brief Is the codepoint an upper-case letter?
  *
  * @param c The Unicode codepoint to check.
  * @return  Non-zero if the codepoint is an upper-case letter, zero otherwise.
  */
 int ucd_isupper(codepoint_t c);

 /** @brief Convert the Unicode codepoint to upper-case.
  *
  * This function only uses the simple case mapping present in the
  * UnicodeData file. The data in SpecialCasing requires Unicode
  * codepoints to be mapped to multiple codepoints.
  *
  * @param c The Unicode codepoint to convert.
  * @return  The upper-case Unicode codepoint for this codepoint, or
  *          this codepoint if there is no upper-case codepoint.
  */
 codepoint_t ucd_toupper(codepoint_t c);

 /** @brief Convert the Unicode codepoint to lower-case.
  *
  * This function only uses the simple case mapping present in the
  * UnicodeData file. The data in SpecialCasing requires Unicode
  * codepoints to be mapped to multiple codepoints.
  *
  * @param c The Unicode codepoint to convert.
  * @return  The lower-case Unicode codepoint for this codepoint, or
  *          this codepoint if there is no upper-case codepoint.
  */
 codepoint_t ucd_tolower(codepoint_t c);

 /** @brief Convert the Unicode codepoint to title-case.
  *
  * This function only uses the simple case mapping present in the
  * UnicodeData file. The data in SpecialCasing requires Unicode
  * codepoints to be mapped to multiple codepoints.
  *
  * @param c The Unicode codepoint to convert.
  * @return  The title-case Unicode codepoint for this codepoint, or
  *          this codepoint if there is no upper-case codepoint.
  */
 codepoint_t ucd_totitle(codepoint_t c);

 #ifdef __cplusplus
 }

 /** @brief Unicode Character Database
  */
 namespace ucd
 {
 	/** @brief Represents a Unicode codepoint.
 	  */
 	typedef uint32_t codepoint_t;
 	using ::codepoint_t;

 	/** @brief Unicode General Category Groups
 	  * @see   http://www.unicode.org/reports/tr44/
 	  */
 	enum category_group
 	{
 		C, /**< @brief Other */
 		I, /**< @brief Invalid */
 		L, /**< @brief Letter */
 		M, /**< @brief Mark */
 		N, /**< @brief Number */
 		P, /**< @brief Punctuation */
 		S, /**< @brief Symbol */
 		Z, /**< @brief Separator */
 		C = UCD_CATEGORY_GROUP_C, /**< @brief Other */
 		I = UCD_CATEGORY_GROUP_I, /**< @brief Invalid */
 		L = UCD_CATEGORY_GROUP_L, /**< @brief Letter */
 		M = UCD_CATEGORY_GROUP_M, /**< @brief Mark */
 		N = UCD_CATEGORY_GROUP_N, /**< @brief Number */
 		P = UCD_CATEGORY_GROUP_P, /**< @brief Punctuation */
 		S = UCD_CATEGORY_GROUP_S, /**< @brief Symbol */
 		Z = UCD_CATEGORY_GROUP_Z, /**< @brief Separator */
 	};

 	/** @brief Get a string representation of the category_group enumeration value.
@@ -59,44 +465,44 @@ namespace ucd
 	  */
 	enum category
 	{
 		Cc, /**< @brief Control Character */
 		Cf, /**< @brief Format Control Character */
 		Cn, /**< @brief Unassigned */
 		Co, /**< @brief Private Use */
 		Cs, /**< @brief Surrogate Code Point */

 		Ii, /**< @brief Invalid Unicode Codepoint */

 		Ll, /**< @brief Lower Case Letter */
 		Lm, /**< @brief Letter Modifier */
 		Lo, /**< @brief Other Letter */
 		Lt, /**< @brief Title Case Letter */
 		Lu, /**< @brief Upper Case Letter */

 		Mc, /**< @brief Spacing Mark */
 		Me, /**< @brief Enclosing Mark */
 		Mn, /**< @brief Non-Spacing Mark */

 		Nd, /**< @brief Decimal Digit */
 		Nl, /**< @brief Letter-Like Number */
 		No, /**< @brief Other Number */

 		Pc, /**< @brief Connector */
 		Pd, /**< @brief Dash/Hyphen */
 		Pe, /**< @brief Close Punctuation Mark */
 		Pf, /**< @brief Final Quotation Mark */
 		Pi, /**< @brief Initial Quotation Mark */
 		Po, /**< @brief Other */
 		Ps, /**< @brief Open Punctuation Mark */

 		Sc, /**< @brief Currency Symbol */
 		Sk, /**< @brief Modifier Symbol */
 		Sm, /**< @brief Math Symbol */
 		So, /**< @brief Other Symbol */

 		Zl, /**< @brief Line Separator */
 		Zp, /**< @brief Paragraph Separator */
 		Zs, /**< @brief Space Separator */
 		Cc = UCD_CATEGORY_Cc, /**< @brief Control Character */
 		Cf = UCD_CATEGORY_Cf, /**< @brief Format Control Character */
 		Cn = UCD_CATEGORY_Cn, /**< @brief Unassigned */
 		Co = UCD_CATEGORY_Co, /**< @brief Private Use */
 		Cs = UCD_CATEGORY_Cs, /**< @brief Surrogate Code Point */

 		Ii = UCD_CATEGORY_Ii, /**< @brief Invalid Unicode Codepoint */

 		Ll = UCD_CATEGORY_Ll, /**< @brief Lower Case Letter */
 		Lm = UCD_CATEGORY_Lm, /**< @brief Letter Modifier */
 		Lo = UCD_CATEGORY_Lo, /**< @brief Other Letter */
 		Lt = UCD_CATEGORY_Lt, /**< @brief Title Case Letter */
 		Lu = UCD_CATEGORY_Lu, /**< @brief Upper Case Letter */

 		Mc = UCD_CATEGORY_Mc, /**< @brief Spacing Mark */
 		Me = UCD_CATEGORY_Me, /**< @brief Enclosing Mark */
 		Mn = UCD_CATEGORY_Mn, /**< @brief Non-Spacing Mark */

 		Nd = UCD_CATEGORY_Nd, /**< @brief Decimal Digit */
 		Nl = UCD_CATEGORY_Nl, /**< @brief Letter-Like Number */
 		No = UCD_CATEGORY_No, /**< @brief Other Number */

 		Pc = UCD_CATEGORY_Pc, /**< @brief Connector */
 		Pd = UCD_CATEGORY_Pd, /**< @brief Dash/Hyphen */
 		Pe = UCD_CATEGORY_Pe, /**< @brief Close Punctuation Mark */
 		Pf = UCD_CATEGORY_Pf, /**< @brief Final Quotation Mark */
 		Pi = UCD_CATEGORY_Pi, /**< @brief Initial Quotation Mark */
 		Po = UCD_CATEGORY_Po, /**< @brief Other */
 		Ps = UCD_CATEGORY_Ps, /**< @brief Open Punctuation Mark */

 		Sc = UCD_CATEGORY_Sc, /**< @brief Currency Symbol */
 		Sk = UCD_CATEGORY_Sk, /**< @brief Modifier Symbol */
 		Sm = UCD_CATEGORY_Sm, /**< @brief Math Symbol */
 		So = UCD_CATEGORY_So, /**< @brief Other Symbol */

 		Zl = UCD_CATEGORY_Zl, /**< @brief Line Separator */
 		Zp = UCD_CATEGORY_Zp, /**< @brief Paragraph Separator */
 		Zs = UCD_CATEGORY_Zs, /**< @brief Space Separator */
 	};

 	/** @brief Get a string representation of the category enumeration value.
@@ -134,174 +540,174 @@ namespace ucd
 	  */
 	enum script
 	{
 		Afak, /**< @brief Afaka Script */
 		Aghb, /**< @brief Caucasian Albanian Script */
 		Ahom, /**< @brief Tai Ahom Script */
 		Arab, /**< @brief Arabic Script */
 		Armi, /**< @brief Imperial Aramaic Script */
 		Armn, /**< @brief Armenian Script */
 		Avst, /**< @brief Avestan Script */
 		Bali, /**< @brief Balinese Script */
 		Bamu, /**< @brief Bamum Script */
 		Bass, /**< @brief Bassa Vah Script */
 		Batk, /**< @brief Batak Script */
 		Beng, /**< @brief Bengali Script */
 		Blis, /**< @brief Blissymbols Script */
 		Bopo, /**< @brief Bopomofo Script */
 		Brah, /**< @brief Brahmi Script */
 		Brai, /**< @brief Braille Script */
 		Bugi, /**< @brief Buginese Script */
 		Buhd, /**< @brief Buhid Script */
 		Cakm, /**< @brief Chakma Script */
 		Cans, /**< @brief Unified Canadian Aboriginal Syllabics */
 		Cari, /**< @brief Carian Script */
 		Cham, /**< @brief Cham Script */
 		Cher, /**< @brief Cherokee Script */
 		Cirt, /**< @brief Cirth Script */
 		Copt, /**< @brief Coptic Script */
 		Cprt, /**< @brief Cypriot Script */
 		Cyrl, /**< @brief Cyrillic Script */
 		Cyrs, /**< @brief Cyrillic (Old Church Slavonic variant) Script */
 		Deva, /**< @brief Devanagari Script */
 		Dsrt, /**< @brief Deseret Script */
 		Dupl, /**< @brief Duployan Shorthand Script */
 		Egyd, /**< @brief Egyptian Demotic Script */
 		Egyh, /**< @brief Egyptian Hieratic Script */
 		Egyp, /**< @brief Egyptian Hiegoglyphs */
 		Elba, /**< @brief Elbasan Script */
 		Ethi, /**< @brief Ethiopic Script */
 		Geok, /**< @brief Khutsuri Script */
 		Geor, /**< @brief Geirgian Script */
 		Glag, /**< @brief Glagolitic Script */
 		Goth, /**< @brief Gothic Script */
 		Gran, /**< @brief Grantha Script */
 		Grek, /**< @brief Greek Script */
 		Gujr, /**< @brief Gujarati Script */
 		Guru, /**< @brief Gurmukhi Script */
 		Hang, /**< @brief Hangul Script */
 		Hani, /**< @brief Han (Hanzi, Kanji, Hanja) Script */
 		Hano, /**< @brief Hanunoo Script */
 		Hans, /**< @brief Han (Simplified) Script */
 		Hant, /**< @brief Han (Traditional) Script */
 		Hatr, /**< @brief Hatran Script */
 		Hebr, /**< @brief Hebrew Script */
 		Hira, /**< @brief Hiragana Script */
 		Hluw, /**< @brief Anatolian Hieroglyphs */
 		Hmng, /**< @brief Pahawh Hmong Script */
 		Hrkt, /**< @brief Japanese Syllabaries */
 		Hung, /**< @brief Old Hungarian Script */
 		Inds, /**< @brief Indus Script */
 		Ital, /**< @brief Old Italic Script */
 		Java, /**< @brief Javanese Script */
 		Jpan, /**< @brief Japanese (Han + Hiragana + Katakana) Scripts */
 		Jurc, /**< @brief Jurchen Script */
 		Kali, /**< @brief Kayah Li Script */
 		Kana, /**< @brief Katakana Script */
 		Khar, /**< @brief Kharoshthi Script */
 		Khmr, /**< @brief Khmer Script */
 		Khoj, /**< @brief Khojki Script */
 		Knda, /**< @brief Kannada Script */
 		Kore, /**< @brief Korean (Hangul + Han) Scripts */
 		Kpel, /**< @brief Kpelle Script */
 		Kthi, /**< @brief Kaithi Script */
 		Lana, /**< @brief Tai Tham Script */
 		Laoo, /**< @brief Lao Script */
 		Latf, /**< @brief Latin Script (Fractur Variant) */
 		Latg, /**< @brief Latin Script (Gaelic Variant) */
 		Latn, /**< @brief Latin Script */
 		Lepc, /**< @brief Lepcha Script */
 		Limb, /**< @brief Limbu Script */
 		Lina, /**< @brief Linear A Script */
 		Linb, /**< @brief Linear B Script */
 		Lisu, /**< @brief Lisu Script */
 		Loma, /**< @brief Loma Script */
 		Lyci, /**< @brief Lycian Script */
 		Lydi, /**< @brief Lydian Script */
 		Mahj, /**< @brief Mahajani Script */
 		Mand, /**< @brief Mandaic Script */
 		Mani, /**< @brief Manichaean Script */
 		Maya, /**< @brief Mayan Hieroglyphs */
 		Mend, /**< @brief Mende Kikakui Script */
 		Merc, /**< @brief Meroitic Cursive Script */
 		Mero, /**< @brief Meroitic Hieroglyphs */
 		Mlym, /**< @brief Malayalam Script */
 		Modi, /**< @brief Modi Script */
 		Mong, /**< @brief Mongolian Script */
 		Moon, /**< @brief Moon Script */
 		Mroo, /**< @brief Mro Script */
 		Mtei, /**< @brief Meitei Mayek Script */
 		Mult, /**< @brief Multani Script */
 		Mymr, /**< @brief Myanmar (Burmese) Script */
 		Narb, /**< @brief Old North Arabian Script */
 		Nbat, /**< @brief Nabataean Script */
 		Nkgb, /**< @brief Nakhi Geba Script */
 		Nkoo, /**< @brief N'Ko Script */
 		Nshu, /**< @brief Nushu Script */
 		Ogam, /**< @brief Ogham Script */
 		Olck, /**< @brief Ol Chiki Script */
 		Orkh, /**< @brief Old Turkic Script */
 		Orya, /**< @brief Oriya Script */
 		Osma, /**< @brief Osmanya Script */
 		Palm, /**< @brief Palmyrene Script */
 		Pauc, /**< @brief Pau Cin Hau Script */
 		Perm, /**< @brief Old Permic */
 		Phag, /**< @brief Phags-Pa Script */
 		Phli, /**< @brief Inscriptional Pahlavi Script */
 		Phlp, /**< @brief Psalter Pahlavi Script */
 		Phlv, /**< @brief Book Pahlavi Script */
 		Phnx, /**< @brief Phoenician Script */
 		Plrd, /**< @brief Miao Script */
 		Prti, /**< @brief Inscriptional Parthian Script */
 		Qaak, /**< @brief Klingon Script (Private Use) */
 		Rjng, /**< @brief Rejang Script */
 		Roro, /**< @brief Rongorongo Script */
 		Runr, /**< @brief Runic Script */
 		Samr, /**< @brief Samaritan Script */
 		Sara, /**< @brief Sarati Script */
 		Sarb, /**< @brief Old South Arabian Script */
 		Saur, /**< @brief Saurashtra Script */
 		Sgnw, /**< @brief Sign Writing */
 		Shaw, /**< @brief Shavian Script */
 		Shrd, /**< @brief Sharada Script */
 		Sidd, /**< @brief Siddham Script */
 		Sind, /**< @brief Sindhi Script */
 		Sinh, /**< @brief Sinhala Script */
 		Sora, /**< @brief Sora Sompeng Script */
 		Sund, /**< @brief Sundanese Script */
 		Sylo, /**< @brief Syloti Nagri Script */
 		Syrc, /**< @brief Syriac Script */
 		Syre, /**< @brief Syriac Script (Estrangelo Variant) */
 		Syrj, /**< @brief Syriac Script (Western Variant) */
 		Syrn, /**< @brief Syriac Script (Eastern Variant) */
 		Tagb, /**< @brief Tagbanwa Script */
 		Takr, /**< @brief Takri Script */
 		Tale, /**< @brief Tai Le Script */
 		Talu, /**< @brief New Tai Lue Script */
 		Taml, /**< @brief Tamil Script */
 		Tang, /**< @brief Tangut Script */
 		Tavt, /**< @brief Tai Viet Script */
 		Telu, /**< @brief Telugu Script */
 		Teng, /**< @brief Tengwar Script */
 		Tfng, /**< @brief Tifinagh Script */
 		Tglg, /**< @brief Tagalog Script */
 		Thaa, /**< @brief Thaana Script */
 		Thai, /**< @brief Thai Script */
 		Tibt, /**< @brief Tibetan Script */
 		Tirh, /**< @brief Tirhuta Script */
 		Ugar, /**< @brief Ugaritic Script */
 		Vaii, /**< @brief Vai Script */
 		Visp, /**< @brief Visible Speech Script */
 		Wara, /**< @brief Warang Citi Script */
 		Wole, /**< @brief Woleai Script */
 		Xpeo, /**< @brief Old Persian Script */
 		Xsux, /**< @brief Cuneiform Script */
 		Yiii, /**< @brief Yi Script */
 		Zinh, /**< @brief Inherited Script */
 		Zmth, /**< @brief Mathematical Notation */
 		Zsym, /**< @brief Symbols */
 		Zxxx, /**< @brief Unwritten Documents */
 		Zyyy, /**< @brief Undetermined Script */
 		Zzzz, /**< @brief Uncoded Script */
 		Afak = UCD_SCRIPT_Afak, /**< @brief Afaka Script */
 		Aghb = UCD_SCRIPT_Aghb, /**< @brief Caucasian Albanian Script */
 		Ahom = UCD_SCRIPT_Ahom, /**< @brief Tai Ahom Script */
 		Arab = UCD_SCRIPT_Arab, /**< @brief Arabic Script */
 		Armi = UCD_SCRIPT_Armi, /**< @brief Imperial Aramaic Script */
 		Armn = UCD_SCRIPT_Armn, /**< @brief Armenian Script */
 		Avst = UCD_SCRIPT_Avst, /**< @brief Avestan Script */
 		Bali = UCD_SCRIPT_Bali, /**< @brief Balinese Script */
 		Bamu = UCD_SCRIPT_Bamu, /**< @brief Bamum Script */
 		Bass = UCD_SCRIPT_Bass, /**< @brief Bassa Vah Script */
 		Batk = UCD_SCRIPT_Batk, /**< @brief Batak Script */
 		Beng = UCD_SCRIPT_Beng, /**< @brief Bengali Script */
 		Blis = UCD_SCRIPT_Blis, /**< @brief Blissymbols Script */
 		Bopo = UCD_SCRIPT_Bopo, /**< @brief Bopomofo Script */
 		Brah = UCD_SCRIPT_Brah, /**< @brief Brahmi Script */
 		Brai = UCD_SCRIPT_Brai, /**< @brief Braille Script */
 		Bugi = UCD_SCRIPT_Bugi, /**< @brief Buginese Script */
 		Buhd = UCD_SCRIPT_Buhd, /**< @brief Buhid Script */
 		Cakm = UCD_SCRIPT_Cakm, /**< @brief Chakma Script */
 		Cans = UCD_SCRIPT_Cans, /**< @brief Unified Canadian Aboriginal Syllabics */
 		Cari = UCD_SCRIPT_Cari, /**< @brief Carian Script */
 		Cham = UCD_SCRIPT_Cham, /**< @brief Cham Script */
 		Cher = UCD_SCRIPT_Cher, /**< @brief Cherokee Script */
 		Cirt = UCD_SCRIPT_Cirt, /**< @brief Cirth Script */
 		Copt = UCD_SCRIPT_Copt, /**< @brief Coptic Script */
 		Cprt = UCD_SCRIPT_Cprt, /**< @brief Cypriot Script */
 		Cyrl = UCD_SCRIPT_Cyrl, /**< @brief Cyrillic Script */
 		Cyrs = UCD_SCRIPT_Cyrs, /**< @brief Cyrillic (Old Church Slavonic variant) Script */
 		Deva = UCD_SCRIPT_Deva, /**< @brief Devanagari Script */
 		Dsrt = UCD_SCRIPT_Dsrt, /**< @brief Deseret Script */
 		Dupl = UCD_SCRIPT_Dupl, /**< @brief Duployan Shorthand Script */
 		Egyd = UCD_SCRIPT_Egyd, /**< @brief Egyptian Demotic Script */
 		Egyh = UCD_SCRIPT_Egyh, /**< @brief Egyptian Hieratic Script */
 		Egyp = UCD_SCRIPT_Egyp, /**< @brief Egyptian Hiegoglyphs */
 		Elba = UCD_SCRIPT_Elba, /**< @brief Elbasan Script */
 		Ethi = UCD_SCRIPT_Ethi, /**< @brief Ethiopic Script */
 		Geok = UCD_SCRIPT_Geok, /**< @brief Khutsuri Script */
 		Geor = UCD_SCRIPT_Geor, /**< @brief Geirgian Script */
 		Glag = UCD_SCRIPT_Glag, /**< @brief Glagolitic Script */
 		Goth = UCD_SCRIPT_Goth, /**< @brief Gothic Script */
 		Gran = UCD_SCRIPT_Gran, /**< @brief Grantha Script */
 		Grek = UCD_SCRIPT_Grek, /**< @brief Greek Script */
 		Gujr = UCD_SCRIPT_Gujr, /**< @brief Gujarati Script */
 		Guru = UCD_SCRIPT_Guru, /**< @brief Gurmukhi Script */
 		Hang = UCD_SCRIPT_Hang, /**< @brief Hangul Script */
 		Hani = UCD_SCRIPT_Hani, /**< @brief Han (Hanzi, Kanji, Hanja) Script */
 		Hano = UCD_SCRIPT_Hano, /**< @brief Hanunoo Script */
 		Hans = UCD_SCRIPT_Hans, /**< @brief Han (Simplified) Script */
 		Hant = UCD_SCRIPT_Hant, /**< @brief Han (Traditional) Script */
 		Hatr = UCD_SCRIPT_Hatr, /**< @brief Hatran Script */
 		Hebr = UCD_SCRIPT_Hebr, /**< @brief Hebrew Script */
 		Hira = UCD_SCRIPT_Hira, /**< @brief Hiragana Script */
 		Hluw = UCD_SCRIPT_Hluw, /**< @brief Anatolian Hieroglyphs */
 		Hmng = UCD_SCRIPT_Hmng, /**< @brief Pahawh Hmong Script */
 		Hrkt = UCD_SCRIPT_Hrkt, /**< @brief Japanese Syllabaries */
 		Hung = UCD_SCRIPT_Hung, /**< @brief Old Hungarian Script */
 		Inds = UCD_SCRIPT_Inds, /**< @brief Indus Script */
 		Ital = UCD_SCRIPT_Ital, /**< @brief Old Italic Script */
 		Java = UCD_SCRIPT_Java, /**< @brief Javanese Script */
 		Jpan = UCD_SCRIPT_Jpan, /**< @brief Japanese (Han + Hiragana + Katakana) Scripts */
 		Jurc = UCD_SCRIPT_Jurc, /**< @brief Jurchen Script */
 		Kali = UCD_SCRIPT_Kali, /**< @brief Kayah Li Script */
 		Kana = UCD_SCRIPT_Kana, /**< @brief Katakana Script */
 		Khar = UCD_SCRIPT_Khar, /**< @brief Kharoshthi Script */
 		Khmr = UCD_SCRIPT_Khmr, /**< @brief Khmer Script */
 		Khoj = UCD_SCRIPT_Khoj, /**< @brief Khojki Script */
 		Knda = UCD_SCRIPT_Knda, /**< @brief Kannada Script */
 		Kore = UCD_SCRIPT_Kore, /**< @brief Korean (Hangul + Han) Scripts */
 		Kpel = UCD_SCRIPT_Kpel, /**< @brief Kpelle Script */
 		Kthi = UCD_SCRIPT_Kthi, /**< @brief Kaithi Script */
 		Lana = UCD_SCRIPT_Lana, /**< @brief Tai Tham Script */
 		Laoo = UCD_SCRIPT_Laoo, /**< @brief Lao Script */
 		Latf = UCD_SCRIPT_Latf, /**< @brief Latin Script (Fractur Variant) */
 		Latg = UCD_SCRIPT_Latg, /**< @brief Latin Script (Gaelic Variant) */
 		Latn = UCD_SCRIPT_Latn, /**< @brief Latin Script */
 		Lepc = UCD_SCRIPT_Lepc, /**< @brief Lepcha Script */
 		Limb = UCD_SCRIPT_Limb, /**< @brief Limbu Script */
 		Lina = UCD_SCRIPT_Lina, /**< @brief Linear A Script */
 		Linb = UCD_SCRIPT_Linb, /**< @brief Linear B Script */
 		Lisu = UCD_SCRIPT_Lisu, /**< @brief Lisu Script */
 		Loma = UCD_SCRIPT_Loma, /**< @brief Loma Script */
 		Lyci = UCD_SCRIPT_Lyci, /**< @brief Lycian Script */
 		Lydi = UCD_SCRIPT_Lydi, /**< @brief Lydian Script */
 		Mahj = UCD_SCRIPT_Mahj, /**< @brief Mahajani Script */
 		Mand = UCD_SCRIPT_Mand, /**< @brief Mandaic Script */
 		Mani = UCD_SCRIPT_Mani, /**< @brief Manichaean Script */
 		Maya = UCD_SCRIPT_Maya, /**< @brief Mayan Hieroglyphs */
 		Mend = UCD_SCRIPT_Mend, /**< @brief Mende Kikakui Script */
 		Merc = UCD_SCRIPT_Merc, /**< @brief Meroitic Cursive Script */
 		Mero = UCD_SCRIPT_Mero, /**< @brief Meroitic Hieroglyphs */
 		Mlym = UCD_SCRIPT_Mlym, /**< @brief Malayalam Script */
 		Modi = UCD_SCRIPT_Modi, /**< @brief Modi Script */
 		Mong = UCD_SCRIPT_Mong, /**< @brief Mongolian Script */
 		Moon = UCD_SCRIPT_Moon, /**< @brief Moon Script */
 		Mroo = UCD_SCRIPT_Mroo, /**< @brief Mro Script */
 		Mtei = UCD_SCRIPT_Mtei, /**< @brief Meitei Mayek Script */
 		Mult = UCD_SCRIPT_Mult, /**< @brief Multani Script */
 		Mymr = UCD_SCRIPT_Mymr, /**< @brief Myanmar (Burmese) Script */
 		Narb = UCD_SCRIPT_Narb, /**< @brief Old North Arabian Script */
 		Nbat = UCD_SCRIPT_Nbat, /**< @brief Nabataean Script */
 		Nkgb = UCD_SCRIPT_Nkgb, /**< @brief Nakhi Geba Script */
 		Nkoo = UCD_SCRIPT_Nkoo, /**< @brief N'Ko Script */
 		Nshu = UCD_SCRIPT_Nshu, /**< @brief Nushu Script */
 		Ogam = UCD_SCRIPT_Ogam, /**< @brief Ogham Script */
 		Olck = UCD_SCRIPT_Olck, /**< @brief Ol Chiki Script */
 		Orkh = UCD_SCRIPT_Orkh, /**< @brief Old Turkic Script */
 		Orya = UCD_SCRIPT_Orya, /**< @brief Oriya Script */
 		Osma = UCD_SCRIPT_Osma, /**< @brief Osmanya Script */
 		Palm = UCD_SCRIPT_Palm, /**< @brief Palmyrene Script */
 		Pauc = UCD_SCRIPT_Pauc, /**< @brief Pau Cin Hau Script */
 		Perm = UCD_SCRIPT_Perm, /**< @brief Old Permic */
 		Phag = UCD_SCRIPT_Phag, /**< @brief Phags-Pa Script */
 		Phli = UCD_SCRIPT_Phli, /**< @brief Inscriptional Pahlavi Script */
 		Phlp = UCD_SCRIPT_Phlp, /**< @brief Psalter Pahlavi Script */
 		Phlv = UCD_SCRIPT_Phlv, /**< @brief Book Pahlavi Script */
 		Phnx = UCD_SCRIPT_Phnx, /**< @brief Phoenician Script */
 		Plrd = UCD_SCRIPT_Plrd, /**< @brief Miao Script */
 		Prti = UCD_SCRIPT_Prti, /**< @brief Inscriptional Parthian Script */
 		Qaak = UCD_SCRIPT_Qaak, /**< @brief Klingon Script (Private Use) */
 		Rjng = UCD_SCRIPT_Rjng, /**< @brief Rejang Script */
 		Roro = UCD_SCRIPT_Roro, /**< @brief Rongorongo Script */
 		Runr = UCD_SCRIPT_Runr, /**< @brief Runic Script */
 		Samr = UCD_SCRIPT_Samr, /**< @brief Samaritan Script */
 		Sara = UCD_SCRIPT_Sara, /**< @brief Sarati Script */
 		Sarb = UCD_SCRIPT_Sarb, /**< @brief Old South Arabian Script */
 		Saur = UCD_SCRIPT_Saur, /**< @brief Saurashtra Script */
 		Sgnw = UCD_SCRIPT_Sgnw, /**< @brief Sign Writing */
 		Shaw = UCD_SCRIPT_Shaw, /**< @brief Shavian Script */
 		Shrd = UCD_SCRIPT_Shrd, /**< @brief Sharada Script */
 		Sidd = UCD_SCRIPT_Sidd, /**< @brief Siddham Script */
 		Sind = UCD_SCRIPT_Sind, /**< @brief Sindhi Script */
 		Sinh = UCD_SCRIPT_Sinh, /**< @brief Sinhala Script */
 		Sora = UCD_SCRIPT_Sora, /**< @brief Sora Sompeng Script */
 		Sund = UCD_SCRIPT_Sund, /**< @brief Sundanese Script */
 		Sylo = UCD_SCRIPT_Sylo, /**< @brief Syloti Nagri Script */
 		Syrc = UCD_SCRIPT_Syrc, /**< @brief Syriac Script */
 		Syre = UCD_SCRIPT_Syre, /**< @brief Syriac Script (Estrangelo Variant) */
 		Syrj = UCD_SCRIPT_Syrj, /**< @brief Syriac Script (Western Variant) */
 		Syrn = UCD_SCRIPT_Syrn, /**< @brief Syriac Script (Eastern Variant) */
 		Tagb = UCD_SCRIPT_Tagb, /**< @brief Tagbanwa Script */
 		Takr = UCD_SCRIPT_Takr, /**< @brief Takri Script */
 		Tale = UCD_SCRIPT_Tale, /**< @brief Tai Le Script */
 		Talu = UCD_SCRIPT_Talu, /**< @brief New Tai Lue Script */
 		Taml = UCD_SCRIPT_Taml, /**< @brief Tamil Script */
 		Tang = UCD_SCRIPT_Tang, /**< @brief Tangut Script */
 		Tavt = UCD_SCRIPT_Tavt, /**< @brief Tai Viet Script */
 		Telu = UCD_SCRIPT_Telu, /**< @brief Telugu Script */
 		Teng = UCD_SCRIPT_Teng, /**< @brief Tengwar Script */
 		Tfng = UCD_SCRIPT_Tfng, /**< @brief Tifinagh Script */
 		Tglg = UCD_SCRIPT_Tglg, /**< @brief Tagalog Script */
 		Thaa = UCD_SCRIPT_Thaa, /**< @brief Thaana Script */
 		Thai = UCD_SCRIPT_Thai, /**< @brief Thai Script */
 		Tibt = UCD_SCRIPT_Tibt, /**< @brief Tibetan Script */
 		Tirh = UCD_SCRIPT_Tirh, /**< @brief Tirhuta Script */
 		Ugar = UCD_SCRIPT_Ugar, /**< @brief Ugaritic Script */
 		Vaii = UCD_SCRIPT_Vaii, /**< @brief Vai Script */
 		Visp = UCD_SCRIPT_Visp, /**< @brief Visible Speech Script */
 		Wara = UCD_SCRIPT_Wara, /**< @brief Warang Citi Script */
 		Wole = UCD_SCRIPT_Wole, /**< @brief Woleai Script */
 		Xpeo = UCD_SCRIPT_Xpeo, /**< @brief Old Persian Script */
 		Xsux = UCD_SCRIPT_Xsux, /**< @brief Cuneiform Script */
 		Yiii = UCD_SCRIPT_Yiii, /**< @brief Yi Script */
 		Zinh = UCD_SCRIPT_Zinh, /**< @brief Inherited Script */
 		Zmth = UCD_SCRIPT_Zmth, /**< @brief Mathematical Notation */
 		Zsym = UCD_SCRIPT_Zsym, /**< @brief Symbols */
 		Zxxx = UCD_SCRIPT_Zxxx, /**< @brief Unwritten Documents */
 		Zyyy = UCD_SCRIPT_Zyyy, /**< @brief Undetermined Script */
 		Zzzz = UCD_SCRIPT_Zzzz, /**< @brief Uncoded Script */
 	};

 	/** @brief Get a string representation of the script enumeration value.
@@ -425,5 +831,6 @@ namespace ucd
 	  */
 	codepoint_t totitle(codepoint_t c);
 }
 #endif

 #endif
--- a/src/scripts.cpp
+++ b/src/scripts.cpp
@@ -1,6 +1,6 @@
 /* Unicode Scripts
 *
 * Copyright (C) 2012 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -3374,6 +3374,11 @@ static const uint8_t *scripts_0E0000_0E01FF[] =
 	scripts_0E0100,
 };

 ucd_script ucd_lookup_script(codepoint_t c)
 {
 	return (ucd_script)ucd::lookup_script(c);
 }

 ucd::script ucd::lookup_script(codepoint_t c)
 {
 	if (c <= 0x00D7FF) // 000000..00D7FF
--- a/src/tostring.cpp
+++ b/src/tostring.cpp
@@ -1,6 +1,6 @@
 /* Enumeration types to string.
 *
 * Copyright (C) 2012-2014 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -20,6 +20,11 @@

 #include "ucd/ucd.h"

 const char *ucd_get_category_group_string(ucd_category_group c)
 {
 	return ucd::get_category_group_string((ucd::category_group)c);
 }

 const char *ucd::get_category_group_string(category_group c)
 {
 	switch (c)
@@ -36,6 +41,11 @@ const char *ucd::get_category_group_string(category_group c)
 	}
 }

 const char *ucd_get_category_string(ucd_category c)
 {
 	return ucd::get_category_string((ucd::category)c);
 }

 const char *ucd::get_category_string(category c)
 {
 	switch (c)
@@ -75,6 +85,11 @@ const char *ucd::get_category_string(category c)
 	}
 }

 const char *ucd_get_script_string(ucd_script s)
 {
 	return ucd::get_script_string((ucd::script)s);
 }

 const char *ucd::get_script_string(script s)
 {
 	static const char *scripts[] =
--- a/tools/case.py
+++ b/tools/case.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python

 # Copyright (C) 2012 Reece H. Dunn
 # Copyright (C) 2012-2016 Reece H. Dunn
 #
 # This file is part of ucd-tools.
 #
@@ -33,7 +33,7 @@ for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
 if __name__ == '__main__':
 	sys.stdout.write("""/* Unicode Case Conversion
 *
 * Copyright (C) 2012 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -80,6 +80,11 @@ struct case_conversion_entry
 	sys.stdout.write('};\n')

 	for case in ['upper', 'lower', 'title']:
 		sys.stdout.write('\n')
 		sys.stdout.write('codepoint_t ucd_to%s(codepoint_t c)\n' % case)
 		sys.stdout.write('{\n')
 		sys.stdout.write('\treturn ucd::to%s(c);\n' % case)
 		sys.stdout.write('}\n')
 		sys.stdout.write('\n')
 		sys.stdout.write('ucd::codepoint_t ucd::to%s(codepoint_t c)\n' % case)
 		sys.stdout.write('{\n')
--- a/tools/categories.py
+++ b/tools/categories.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python

 # Copyright (C) 2012, 2014 Reece H. Dunn
 # Copyright (C) 2012-2016 Reece H. Dunn
 #
 # This file is part of ucd-tools.
 #
@@ -92,7 +92,7 @@ for codepoints, category, comment in category_sets:
 if __name__ == '__main__':
 	sys.stdout.write("""/* Unicode General Categories
 *
 * Copyright (C) 2012 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -162,6 +162,12 @@ using namespace ucd;
 					sys.stdout.write('\tcategories_%s,\n' % codepoint)
 			sys.stdout.write('};\n')

 	sys.stdout.write("""
 ucd_category ucd_lookup_category(codepoint_t c)
 {
 	return (ucd_category)ucd::lookup_category((ucd::category)c);
 }
 """)
 	sys.stdout.write('\n')
 	sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n')
 	sys.stdout.write('{\n')
@@ -178,6 +184,11 @@ using namespace ucd;
 	sys.stdout.write('}\n')

 	sys.stdout.write("""
 ucd_category_group ucd_get_category_group_for_category(ucd_category c)
 {
 	return (ucd_category_group)ucd::lookup_category_group((ucd::category)c);
 }

 ucd::category_group ucd::lookup_category_group(category c)
 {
 	switch (c)
@@ -201,6 +212,11 @@ ucd::category_group ucd::lookup_category_group(category c)
 	}
 }

 ucd_category_group ucd_lookup_category_group(codepoint_t c)
 {
 	return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c));
 }

 ucd::category_group ucd::lookup_category_group(codepoint_t c)
 {
 	return lookup_category_group(lookup_category(c));
--- a/tools/scripts.py
+++ b/tools/scripts.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python

 # Copyright (C) 2012, 2014 Reece H. Dunn
 # Copyright (C) 2012-2016 Reece H. Dunn
 #
 # This file is part of ucd-tools.
 #
@@ -86,7 +86,7 @@ for codepoints, script, comment in script_sets:
 if __name__ == '__main__':
 	sys.stdout.write("""/* Unicode Scripts
 *
 * Copyright (C) 2012 Reece H. Dunn
 * Copyright (C) 2012-2016 Reece H. Dunn
 *
 * This file is part of ucd-tools.
 *
@@ -156,6 +156,12 @@ using namespace ucd;
 					sys.stdout.write('\tscripts_%s,\n' % codepoint)
 			sys.stdout.write('};\n')

 	sys.stdout.write("""
 ucd_script ucd_lookup_script(codepoint_t c)
 {
 	return (ucd_script)ucd::lookup_script(c);
 }
 """)
 	sys.stdout.write('\n')
 	sys.stdout.write('ucd::script ucd::lookup_script(codepoint_t c)\n')
 	sys.stdout.write('{\n')