| @@ -108,7 +108,8 @@ src_libucd_la_SOURCES = \ | |||
| src/case.cpp \ | |||
| src/categories.cpp \ | |||
| src/ctype.cpp \ | |||
| src/scripts.cpp | |||
| src/scripts.cpp \ | |||
| src/tostring.cpp | |||
| ############################# tests ########################################### | |||
| @@ -1,6 +1,6 @@ | |||
| /* Unicode Character Database API | |||
| * | |||
| * Copyright (C) 2012 Reece H. Dunn | |||
| * Copyright (C) 2012-2013 Reece H. Dunn | |||
| * | |||
| * This file is part of ucd-tools. | |||
| * | |||
| @@ -51,6 +51,14 @@ namespace ucd | |||
| Z, /**< @brief Separator */ | |||
| }; | |||
| /** @brief Get a string representation of the category_group enumeration value. | |||
| * | |||
| * @param c The value to get the string representation for. | |||
| * | |||
| * @return The string representation, or "-" if the value is not recognized. | |||
| */ | |||
| const char *get_category_group_string(category_group c); | |||
| /** @brief Unicode General Category Values | |||
| * @see http://www.unicode.org/reports/tr44/ | |||
| */ | |||
| @@ -96,6 +104,14 @@ namespace ucd | |||
| Zs, /**< @brief Space Separator */ | |||
| }; | |||
| /** @brief Get a string representation of the category enumeration value. | |||
| * | |||
| * @param c The value to get the string representation for. | |||
| * | |||
| * @return The string representation, or "--" if the value is not recognized. | |||
| */ | |||
| const char *get_category_string(category c); | |||
| /** @brief Lookup the General Category Group for a General Category. | |||
| * | |||
| * @param c The General Category to lookup. | |||
| @@ -236,6 +252,14 @@ namespace ucd | |||
| Zzzz, /**< @brief Unknown Script */ | |||
| }; | |||
| /** @brief Get a string representation of the script enumeration value. | |||
| * | |||
| * @param c The value to get the string representation for. | |||
| * | |||
| * @return The string representation, or "----" if the value is not recognized. | |||
| */ | |||
| const char *get_script_string(script s); | |||
| /** @brief Lookup the Script for a Unicode codepoint. | |||
| * | |||
| * @param c The Unicode codepoint to lookup. | |||
| @@ -0,0 +1,187 @@ | |||
| /* Enumeration types to string. | |||
| * | |||
| * Copyright (C) 2012-2013 Reece H. Dunn | |||
| * | |||
| * This file is part of ucd-tools. | |||
| * | |||
| * ucd-tools is free software: you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation, either version 3 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * ucd-tools is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License | |||
| * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>. | |||
| */ | |||
| #include "ucd/ucd.h" | |||
| const char *ucd::get_category_group_string(category_group c) | |||
| { | |||
| switch (c) | |||
| { | |||
| case C: return "C"; | |||
| case I: return "I"; | |||
| case L: return "L"; | |||
| case M: return "M"; | |||
| case N: return "N"; | |||
| case P: return "P"; | |||
| case S: return "S"; | |||
| case Z: return "Z"; | |||
| default: return "-"; | |||
| } | |||
| } | |||
| const char *ucd::get_category_string(category c) | |||
| { | |||
| switch (c) | |||
| { | |||
| case Cc: return "Cc"; | |||
| case Cf: return "Cf"; | |||
| case Cn: return "Cn"; | |||
| case Co: return "Co"; | |||
| case Cs: return "Cs"; | |||
| case Ii: return "Ii"; | |||
| case Ll: return "Ll"; | |||
| case Lm: return "Lm"; | |||
| case Lo: return "Lo"; | |||
| case Lt: return "Lt"; | |||
| case Lu: return "Lu"; | |||
| case Mc: return "Mc"; | |||
| case Me: return "Me"; | |||
| case Mn: return "Mn"; | |||
| case Nd: return "Nd"; | |||
| case Nl: return "Nl"; | |||
| case No: return "No"; | |||
| case Pc: return "Pc"; | |||
| case Pd: return "Pd"; | |||
| case Pe: return "Pe"; | |||
| case Pf: return "Pf"; | |||
| case Pi: return "Pi"; | |||
| case Po: return "Po"; | |||
| case Ps: return "Ps"; | |||
| case Sc: return "Sc"; | |||
| case Sk: return "Sk"; | |||
| case Sm: return "Sm"; | |||
| case So: return "So"; | |||
| case Zl: return "Zl"; | |||
| case Zp: return "Zp"; | |||
| case Zs: return "Zs"; | |||
| default: return "--"; | |||
| } | |||
| } | |||
| const char *ucd::get_script_string(script s) | |||
| { | |||
| switch (s) | |||
| { | |||
| case Arab: return "Arab"; | |||
| case Armi: return "Armi"; | |||
| case Armn: return "Armn"; | |||
| case Avst: return "Avst"; | |||
| case Bali: return "Bali"; | |||
| case Bamu: return "Bamu"; | |||
| case Batk: return "Batk"; | |||
| case Beng: return "Beng"; | |||
| case Bopo: return "Bopo"; | |||
| case Brah: return "Brah"; | |||
| case Brai: return "Brai"; | |||
| case Bugi: return "Bugi"; | |||
| case Buhd: return "Buhd"; | |||
| case Cans: return "Cans"; | |||
| case Cari: return "Cari"; | |||
| case Cakm: return "Cakm"; | |||
| case Cham: return "Cham"; | |||
| case Cher: return "Cher"; | |||
| case Copt: return "Copt"; | |||
| case Cprt: return "Cprt"; | |||
| case Cyrl: return "Cyrl"; | |||
| case Deva: return "Deva"; | |||
| case Dsrt: return "Dsrt"; | |||
| case Egyp: return "Egyp"; | |||
| case Ethi: return "Ethi"; | |||
| case Geor: return "Geor"; | |||
| case Glag: return "Glag"; | |||
| case Goth: return "Goth"; | |||
| case Grek: return "Grek"; | |||
| case Gujr: return "Gujr"; | |||
| case Guru: return "Guru"; | |||
| case Hang: return "Hang"; | |||
| case Hano: return "Hano"; | |||
| case Hant: return "Hant"; | |||
| case Hebr: return "Hebr"; | |||
| case Hira: return "Hira"; | |||
| case Ital: return "Ital"; | |||
| case Java: return "Java"; | |||
| case Kali: return "Kali"; | |||
| case Kana: return "Kana"; | |||
| case Khar: return "Khar"; | |||
| case Khmr: return "Khmr"; | |||
| case Knda: return "Knda"; | |||
| case Kthi: return "Kthi"; | |||
| case Lana: return "Lana"; | |||
| case Laoo: return "Laoo"; | |||
| case Latn: return "Latn"; | |||
| case Lepc: return "Lepc"; | |||
| case Limb: return "Limb"; | |||
| case Linb: return "Linb"; | |||
| case Lisu: return "Lisu"; | |||
| case Lyci: return "Lyci"; | |||
| case Lydi: return "Lydi"; | |||
| case Mand: return "Mand"; | |||
| case Merc: return "Merc"; | |||
| case Mero: return "Mero"; | |||
| case Mlym: return "Mlym"; | |||
| case Mong: return "Mong"; | |||
| case Mtei: return "Mtei"; | |||
| case Mymr: return "Mymr"; | |||
| case Nkoo: return "Nkoo"; | |||
| case Ogam: return "Ogam"; | |||
| case Olck: return "Olck"; | |||
| case Orkh: return "Orkh"; | |||
| case Orya: return "Orya"; | |||
| case Osma: return "Osma"; | |||
| case Phag: return "Phag"; | |||
| case Phli: return "Phli"; | |||
| case Phnx: return "Phnx"; | |||
| case Plrd: return "Plrd"; | |||
| case Prti: return "Prti"; | |||
| case Qaak: return "Qaak"; | |||
| case Rjng: return "Rjng"; | |||
| case Runr: return "Runr"; | |||
| case Samr: return "Samr"; | |||
| case Sarb: return "Sarb"; | |||
| case Saur: return "Saur"; | |||
| case Shaw: return "Shaw"; | |||
| case Shrd: return "Shrd"; | |||
| case Sinh: return "Sinh"; | |||
| case Sora: return "Sora"; | |||
| case Sund: return "Sund"; | |||
| case Sylo: return "Sylo"; | |||
| case Syrn: return "Syrn"; | |||
| case Tagb: return "Tagb"; | |||
| case Takr: return "Takr"; | |||
| case Tale: return "Tale"; | |||
| case Talu: return "Talu"; | |||
| case Taml: return "Taml"; | |||
| case Tavt: return "Tavt"; | |||
| case Telu: return "Telu"; | |||
| case Tfng: return "Tfng"; | |||
| case Tglg: return "Tglg"; | |||
| case Thaa: return "Thaa"; | |||
| case Thai: return "Thai"; | |||
| case Tibt: return "Tibt"; | |||
| case Ugar: return "Ugar"; | |||
| case Vaii: return "Vaii"; | |||
| case Xpeo: return "Xpeo"; | |||
| case Xsux: return "Xsux"; | |||
| case Yiii: return "Yiii"; | |||
| case Zyyy: return "Zyyy"; | |||
| case Zzzz: return "Zzzz"; | |||
| default: return "----"; | |||
| } | |||
| } | |||
| @@ -1,5 +1,5 @@ | |||
| /* | |||
| * Copyright (C) 2012 Reece H. Dunn | |||
| * Copyright (C) 2012-2013 Reece H. Dunn | |||
| * | |||
| * This file is part of ucd-tools. | |||
| * | |||
| @@ -21,182 +21,13 @@ | |||
| #include <stdio.h> | |||
| const char *get_category_group_string(ucd::category_group c) | |||
| { | |||
| using namespace ucd; | |||
| switch (c) | |||
| { | |||
| case C: return "C"; | |||
| case I: return "I"; | |||
| case L: return "L"; | |||
| case M: return "M"; | |||
| case N: return "N"; | |||
| case P: return "P"; | |||
| case S: return "S"; | |||
| case Z: return "Z"; | |||
| default: return "-"; | |||
| } | |||
| } | |||
| const char *get_category_string(ucd::category c) | |||
| { | |||
| using namespace ucd; | |||
| switch (c) | |||
| { | |||
| case Cc: return "Cc"; | |||
| case Cf: return "Cf"; | |||
| case Cn: return "Cn"; | |||
| case Co: return "Co"; | |||
| case Cs: return "Cs"; | |||
| case Ii: return "Ii"; | |||
| case Ll: return "Ll"; | |||
| case Lm: return "Lm"; | |||
| case Lo: return "Lo"; | |||
| case Lt: return "Lt"; | |||
| case Lu: return "Lu"; | |||
| case Mc: return "Mc"; | |||
| case Me: return "Me"; | |||
| case Mn: return "Mn"; | |||
| case Nd: return "Nd"; | |||
| case Nl: return "Nl"; | |||
| case No: return "No"; | |||
| case Pc: return "Pc"; | |||
| case Pd: return "Pd"; | |||
| case Pe: return "Pe"; | |||
| case Pf: return "Pf"; | |||
| case Pi: return "Pi"; | |||
| case Po: return "Po"; | |||
| case Ps: return "Ps"; | |||
| case Sc: return "Sc"; | |||
| case Sk: return "Sk"; | |||
| case Sm: return "Sm"; | |||
| case So: return "So"; | |||
| case Zl: return "Zl"; | |||
| case Zp: return "Zp"; | |||
| case Zs: return "Zs"; | |||
| default: return "--"; | |||
| } | |||
| } | |||
| const char *get_script_string(ucd::script s) | |||
| { | |||
| using namespace ucd; | |||
| switch (s) | |||
| { | |||
| case Arab: return "Arab"; | |||
| case Armi: return "Armi"; | |||
| case Armn: return "Armn"; | |||
| case Avst: return "Avst"; | |||
| case Bali: return "Bali"; | |||
| case Bamu: return "Bamu"; | |||
| case Batk: return "Batk"; | |||
| case Beng: return "Beng"; | |||
| case Bopo: return "Bopo"; | |||
| case Brah: return "Brah"; | |||
| case Brai: return "Brai"; | |||
| case Bugi: return "Bugi"; | |||
| case Buhd: return "Buhd"; | |||
| case Cans: return "Cans"; | |||
| case Cari: return "Cari"; | |||
| case Cakm: return "Cakm"; | |||
| case Cham: return "Cham"; | |||
| case Cher: return "Cher"; | |||
| case Copt: return "Copt"; | |||
| case Cprt: return "Cprt"; | |||
| case Cyrl: return "Cyrl"; | |||
| case Deva: return "Deva"; | |||
| case Dsrt: return "Dsrt"; | |||
| case Egyp: return "Egyp"; | |||
| case Ethi: return "Ethi"; | |||
| case Geor: return "Geor"; | |||
| case Glag: return "Glag"; | |||
| case Goth: return "Goth"; | |||
| case Grek: return "Grek"; | |||
| case Gujr: return "Gujr"; | |||
| case Guru: return "Guru"; | |||
| case Hang: return "Hang"; | |||
| case Hano: return "Hano"; | |||
| case Hant: return "Hant"; | |||
| case Hebr: return "Hebr"; | |||
| case Hira: return "Hira"; | |||
| case Ital: return "Ital"; | |||
| case Java: return "Java"; | |||
| case Kali: return "Kali"; | |||
| case Kana: return "Kana"; | |||
| case Khar: return "Khar"; | |||
| case Khmr: return "Khmr"; | |||
| case Knda: return "Knda"; | |||
| case Kthi: return "Kthi"; | |||
| case Lana: return "Lana"; | |||
| case Laoo: return "Laoo"; | |||
| case Latn: return "Latn"; | |||
| case Lepc: return "Lepc"; | |||
| case Limb: return "Limb"; | |||
| case Linb: return "Linb"; | |||
| case Lisu: return "Lisu"; | |||
| case Lyci: return "Lyci"; | |||
| case Lydi: return "Lydi"; | |||
| case Mand: return "Mand"; | |||
| case Merc: return "Merc"; | |||
| case Mero: return "Mero"; | |||
| case Mlym: return "Mlym"; | |||
| case Mong: return "Mong"; | |||
| case Mtei: return "Mtei"; | |||
| case Mymr: return "Mymr"; | |||
| case Nkoo: return "Nkoo"; | |||
| case Ogam: return "Ogam"; | |||
| case Olck: return "Olck"; | |||
| case Orkh: return "Orkh"; | |||
| case Orya: return "Orya"; | |||
| case Osma: return "Osma"; | |||
| case Phag: return "Phag"; | |||
| case Phli: return "Phli"; | |||
| case Phnx: return "Phnx"; | |||
| case Plrd: return "Plrd"; | |||
| case Prti: return "Prti"; | |||
| case Qaak: return "Qaak"; | |||
| case Rjng: return "Rjng"; | |||
| case Runr: return "Runr"; | |||
| case Samr: return "Samr"; | |||
| case Sarb: return "Sarb"; | |||
| case Saur: return "Saur"; | |||
| case Shaw: return "Shaw"; | |||
| case Shrd: return "Shrd"; | |||
| case Sinh: return "Sinh"; | |||
| case Sora: return "Sora"; | |||
| case Sund: return "Sund"; | |||
| case Sylo: return "Sylo"; | |||
| case Syrn: return "Syrn"; | |||
| case Tagb: return "Tagb"; | |||
| case Takr: return "Takr"; | |||
| case Tale: return "Tale"; | |||
| case Talu: return "Talu"; | |||
| case Taml: return "Taml"; | |||
| case Tavt: return "Tavt"; | |||
| case Telu: return "Telu"; | |||
| case Tfng: return "Tfng"; | |||
| case Tglg: return "Tglg"; | |||
| case Thaa: return "Thaa"; | |||
| case Thai: return "Thai"; | |||
| case Tibt: return "Tibt"; | |||
| case Ugar: return "Ugar"; | |||
| case Vaii: return "Vaii"; | |||
| case Xpeo: return "Xpeo"; | |||
| case Xsux: return "Xsux"; | |||
| case Yiii: return "Yiii"; | |||
| case Zyyy: return "Zyyy"; | |||
| case Zzzz: return "Zzzz"; | |||
| default: return "----"; | |||
| } | |||
| } | |||
| int main() | |||
| { | |||
| for (ucd::codepoint_t c = 0; c <= 0x10FFFF; ++c) | |||
| { | |||
| const char *script = get_script_string(ucd::lookup_script(c)); | |||
| const char *category = get_category_string(ucd::lookup_category(c)); | |||
| const char *category_group = get_category_group_string(ucd::lookup_category_group(c)); | |||
| const char *script = ucd::get_script_string(ucd::lookup_script(c)); | |||
| const char *category = ucd::get_category_string(ucd::lookup_category(c)); | |||
| const char *category_group = ucd::get_category_group_string(ucd::lookup_category_group(c)); | |||
| ucd::codepoint_t upper = ucd::toupper(c); | |||
| ucd::codepoint_t lower = ucd::tolower(c); | |||
| ucd::codepoint_t title = ucd::totitle(c); | |||