Browse Source

Convert categories.cpp from C++ to C.

master
Reece H. Dunn 9 years ago
parent
commit
707998940d
4 changed files with 99 additions and 61 deletions
  1. 2
    2
      Makefile.am
  2. 43
    28
      src/categories.c
  3. 12
    3
      src/include/ucd/ucd.h
  4. 42
    28
      tools/categories.py

+ 2
- 2
Makefile.am View File



ucd-update: tools/case.py tools/categories.py tools/scripts.py ucd-update: tools/case.py tools/categories.py tools/scripts.py
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/case.c
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.cpp
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/categories.c
tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} ${UCD_FLAGS} > src/scripts.cpp


libucd_includedir = $(includedir)/ucd libucd_includedir = $(includedir)/ucd
src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION) src_libucd_la_LDFLAGS = -version-info $(LIBUCD_VERSION)
src_libucd_la_SOURCES = \ src_libucd_la_SOURCES = \
src/case.c \ src/case.c \
src/categories.cpp \
src/categories.c \
src/ctype.c \ src/ctype.c \
src/scripts.cpp \ src/scripts.cpp \
src/tostring.c src/tostring.c

src/categories.cpp → src/categories.c View File



#include <stddef.h> #include <stddef.h>


using namespace ucd;
#define Cc UCD_CATEGORY_Cc
#define Cf UCD_CATEGORY_Cf
#define Cn UCD_CATEGORY_Cn
#define Co UCD_CATEGORY_Co
#define Cs UCD_CATEGORY_Cs
#define Ll UCD_CATEGORY_Ll
#define Lm UCD_CATEGORY_Lm
#define Lo UCD_CATEGORY_Lo
#define Lt UCD_CATEGORY_Lt
#define Lu UCD_CATEGORY_Lu
#define Mc UCD_CATEGORY_Mc
#define Me UCD_CATEGORY_Me
#define Mn UCD_CATEGORY_Mn
#define Nd UCD_CATEGORY_Nd
#define Nl UCD_CATEGORY_Nl
#define No UCD_CATEGORY_No
#define Pc UCD_CATEGORY_Pc
#define Pd UCD_CATEGORY_Pd
#define Pe UCD_CATEGORY_Pe
#define Pf UCD_CATEGORY_Pf
#define Pi UCD_CATEGORY_Pi
#define Po UCD_CATEGORY_Po
#define Ps UCD_CATEGORY_Ps
#define Sc UCD_CATEGORY_Sc
#define Sk UCD_CATEGORY_Sk
#define Sm UCD_CATEGORY_Sm
#define So UCD_CATEGORY_So
#define Zl UCD_CATEGORY_Zl
#define Zp UCD_CATEGORY_Zp
#define Zs UCD_CATEGORY_Zs
#define Ii UCD_CATEGORY_Ii


// Unicode Character Data 8.0.0 // Unicode Character Data 8.0.0


}; };


ucd_category ucd_lookup_category(codepoint_t c) ucd_category ucd_lookup_category(codepoint_t c)
{
return (ucd_category)ucd::lookup_category((ucd::category)c);
}

ucd::category ucd::lookup_category(codepoint_t c)
{ {
if (c <= 0x00D7FF) // 000000..00D7FF if (c <= 0x00D7FF) // 000000..00D7FF
{ {
const uint8_t *table = categories_000000_00D7FF[(c - 0x000000) / 256]; const uint8_t *table = categories_000000_00D7FF[(c - 0x000000) / 256];
return (ucd::category)table[c % 256];
return (ucd_category)table[c % 256];
} }
if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates
if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area
if (c <= 0x02FAFF) // 00F800..02FAFF if (c <= 0x02FAFF) // 00F800..02FAFF
{ {
const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256]; const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256];
return (ucd::category)table[c % 256];
return (ucd_category)table[c % 256];
} }
if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned
if (c <= 0x0E01FF) // 0E0000..0E01FF if (c <= 0x0E01FF) // 0E0000..0E01FF
{ {
const uint8_t *table = categories_0E0000_0E01FF[(c - 0x0E0000) / 256]; const uint8_t *table = categories_0E0000_0E01FF[(c - 0x0E0000) / 256];
return (ucd::category)table[c % 256];
return (ucd_category)table[c % 256];
} }
if (c <= 0x0EFFFF) return Cn; // 0E0200..0EFFFF : Unassigned if (c <= 0x0EFFFF) return Cn; // 0E0200..0EFFFF : Unassigned
if (c <= 0x0FFFFD) return Co; // 0F0000..0FFFFD : Plane 15 Private Use if (c <= 0x0FFFFD) return Co; // 0F0000..0FFFFD : Plane 15 Private Use
} }


ucd_category_group ucd_get_category_group_for_category(ucd_category c) ucd_category_group ucd_get_category_group_for_category(ucd_category c)
{
return (ucd_category_group)ucd::lookup_category_group((ucd::category)c);
}

ucd::category_group ucd::lookup_category_group(category c)
{ {
switch (c) switch (c)
{ {
case Cc: case Cf: case Cn: case Co: case Cs: case Cc: case Cf: case Cn: case Co: case Cs:
return C;
return UCD_CATEGORY_GROUP_C;
case Ll: case Lm: case Lo: case Lt: case Lu: case Ll: case Lm: case Lo: case Lt: case Lu:
return L;
return UCD_CATEGORY_GROUP_L;
case Mc: case Me: case Mn: case Mc: case Me: case Mn:
return M;
return UCD_CATEGORY_GROUP_M;
case Nd: case Nl: case No: case Nd: case Nl: case No:
return N;
return UCD_CATEGORY_GROUP_N;
case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps:
return P;
return UCD_CATEGORY_GROUP_P;
case Sc: case Sk: case Sm: case So: case Sc: case Sk: case Sm: case So:
return S;
return UCD_CATEGORY_GROUP_S;
case Zl: case Zp: case Zs: case Zl: case Zp: case Zs:
return Z;
return UCD_CATEGORY_GROUP_Z;
case Ii: case Ii:
return I;
return UCD_CATEGORY_GROUP_I;
} }
} }


ucd_category_group ucd_lookup_category_group(codepoint_t c) ucd_category_group ucd_lookup_category_group(codepoint_t c)
{ {
return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c));
}

ucd::category_group ucd::lookup_category_group(codepoint_t c)
{
return lookup_category_group(lookup_category(c));
return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c));
} }

+ 12
- 3
src/include/ucd/ucd.h View File

* @param c The General Category to lookup. * @param c The General Category to lookup.
* @return The General Category Group of the General Category. * @return The General Category Group of the General Category.
*/ */
category_group lookup_category_group(category c);
inline category_group lookup_category_group(category c)
{
return (category_group)ucd_get_category_group_for_category((ucd_category)c);
}


/** @brief Lookup the General Category Group for a Unicode codepoint. /** @brief Lookup the General Category Group for a Unicode codepoint.
* *
* @param c The Unicode codepoint to lookup. * @param c The Unicode codepoint to lookup.
* @return The General Category Group of the Unicode codepoint. * @return The General Category Group of the Unicode codepoint.
*/ */
category_group lookup_category_group(codepoint_t c);
inline category_group lookup_category_group(codepoint_t c)
{
return (category_group)ucd_lookup_category_group(c);
}


/** @brief Lookup the General Category for a Unicode codepoint. /** @brief Lookup the General Category for a Unicode codepoint.
* *
* @param c The Unicode codepoint to lookup. * @param c The Unicode codepoint to lookup.
* @return The General Category of the Unicode codepoint. * @return The General Category of the Unicode codepoint.
*/ */
category lookup_category(codepoint_t c);
inline category lookup_category(codepoint_t c)
{
return (category)ucd_lookup_category(c);
}


/** @brief Unicode Script /** @brief Unicode Script
* @see http://www.iana.org/assignments/language-subtag-registry * @see http://www.iana.org/assignments/language-subtag-registry

+ 42
- 28
tools/categories.py View File



#include <stddef.h> #include <stddef.h>


using namespace ucd;
#define Cc UCD_CATEGORY_Cc
#define Cf UCD_CATEGORY_Cf
#define Cn UCD_CATEGORY_Cn
#define Co UCD_CATEGORY_Co
#define Cs UCD_CATEGORY_Cs
#define Ll UCD_CATEGORY_Ll
#define Lm UCD_CATEGORY_Lm
#define Lo UCD_CATEGORY_Lo
#define Lt UCD_CATEGORY_Lt
#define Lu UCD_CATEGORY_Lu
#define Mc UCD_CATEGORY_Mc
#define Me UCD_CATEGORY_Me
#define Mn UCD_CATEGORY_Mn
#define Nd UCD_CATEGORY_Nd
#define Nl UCD_CATEGORY_Nl
#define No UCD_CATEGORY_No
#define Pc UCD_CATEGORY_Pc
#define Pd UCD_CATEGORY_Pd
#define Pe UCD_CATEGORY_Pe
#define Pf UCD_CATEGORY_Pf
#define Pi UCD_CATEGORY_Pi
#define Po UCD_CATEGORY_Po
#define Ps UCD_CATEGORY_Ps
#define Sc UCD_CATEGORY_Sc
#define Sk UCD_CATEGORY_Sk
#define Sm UCD_CATEGORY_Sm
#define So UCD_CATEGORY_So
#define Zl UCD_CATEGORY_Zl
#define Zp UCD_CATEGORY_Zp
#define Zs UCD_CATEGORY_Zs
#define Ii UCD_CATEGORY_Ii


// Unicode Character Data %s // Unicode Character Data %s
""" % ucd_version) """ % ucd_version)
sys.stdout.write('\tcategories_%s,\n' % codepoint) sys.stdout.write('\tcategories_%s,\n' % codepoint)
sys.stdout.write('};\n') sys.stdout.write('};\n')


sys.stdout.write("""
ucd_category ucd_lookup_category(codepoint_t c)
{
return (ucd_category)ucd::lookup_category((ucd::category)c);
}
""")
sys.stdout.write('\n') sys.stdout.write('\n')
sys.stdout.write('ucd::category ucd::lookup_category(codepoint_t c)\n')
sys.stdout.write('ucd_category ucd_lookup_category(codepoint_t c)\n')
sys.stdout.write('{\n') sys.stdout.write('{\n')
for codepoints, category, comment in category_sets: for codepoints, category, comment in category_sets:
if category: if category:
sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints)) sys.stdout.write('\tif (c <= 0x%s) // %s\n' % (codepoints.last, codepoints))
sys.stdout.write('\t{\n') sys.stdout.write('\t{\n')
sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first)) sys.stdout.write('\t\tconst uint8_t *table = categories_%s_%s[(c - 0x%s) / 256];\n' % (codepoints.first, codepoints.last, codepoints.first))
sys.stdout.write('\t\treturn (ucd::category)table[c % 256];\n')
sys.stdout.write('\t\treturn (ucd_category)table[c % 256];\n')
sys.stdout.write('\t}\n') sys.stdout.write('\t}\n')
sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n') sys.stdout.write('\treturn Ii; // Invalid Unicode Codepoint\n')
sys.stdout.write('}\n') sys.stdout.write('}\n')


sys.stdout.write(""" sys.stdout.write("""
ucd_category_group ucd_get_category_group_for_category(ucd_category c) ucd_category_group ucd_get_category_group_for_category(ucd_category c)
{
return (ucd_category_group)ucd::lookup_category_group((ucd::category)c);
}

ucd::category_group ucd::lookup_category_group(category c)
{ {
switch (c) switch (c)
{ {
case Cc: case Cf: case Cn: case Co: case Cs: case Cc: case Cf: case Cn: case Co: case Cs:
return C;
return UCD_CATEGORY_GROUP_C;
case Ll: case Lm: case Lo: case Lt: case Lu: case Ll: case Lm: case Lo: case Lt: case Lu:
return L;
return UCD_CATEGORY_GROUP_L;
case Mc: case Me: case Mn: case Mc: case Me: case Mn:
return M;
return UCD_CATEGORY_GROUP_M;
case Nd: case Nl: case No: case Nd: case Nl: case No:
return N;
return UCD_CATEGORY_GROUP_N;
case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps: case Pc: case Pd: case Pe: case Pf: case Pi: case Po: case Ps:
return P;
return UCD_CATEGORY_GROUP_P;
case Sc: case Sk: case Sm: case So: case Sc: case Sk: case Sm: case So:
return S;
return UCD_CATEGORY_GROUP_S;
case Zl: case Zp: case Zs: case Zl: case Zp: case Zs:
return Z;
return UCD_CATEGORY_GROUP_Z;
case Ii: case Ii:
return I;
return UCD_CATEGORY_GROUP_I;
} }
} }


ucd_category_group ucd_lookup_category_group(codepoint_t c) ucd_category_group ucd_lookup_category_group(codepoint_t c)
{ {
return (ucd_category_group)ucd::lookup_category_group(ucd::lookup_category(c));
}

ucd::category_group ucd::lookup_category_group(codepoint_t c)
{
return lookup_category_group(lookup_category(c));
return (ucd_category_group)ucd_get_category_group_for_category(ucd_lookup_category(c));
} }
""") """)

Loading…
Cancel
Save