Browse Source

F8D0-D8FF: Klingon

master
Reece H. Dunn 12 years ago
parent
commit
7e411b34e9
10 changed files with 124 additions and 13 deletions
  1. 6
    1
      Makefile.am
  2. 25
    4
      src/categories.cpp
  3. 1
    0
      src/include/ucd/ucd.h
  4. 25
    4
      src/scripts.cpp
  5. 41
    0
      supplemental/Klingon.txt
  6. 1
    0
      tests/printucddata.cpp
  7. 5
    2
      tools/categories.py
  8. 7
    0
      tools/printdata.py
  9. 5
    2
      tools/scripts.py
  10. 8
    0
      tools/ucd.py

+ 6
- 1
Makefile.am View File

@@ -76,14 +76,17 @@ data/ucd/UnicodeData.txt:
############################# libucd ##########################################

src/case.cpp: tools/case.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@

src/categories.cpp: tools/categories.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@

src/scripts.cpp: tools/scripts.py tools/ucd.py \
supplemental/Klingon.txt \
data/language-subtag-registry \
data/ucd/Scripts.txt
tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@
@@ -108,8 +111,10 @@ tests_printucddata_SOURCES = tests/printucddata.cpp
tests_printucddata_LDADD = src/libucd.la

tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt \
data/ucd/PropList.txt
data/ucd/PropList.txt \
data/ucd/Scripts.txt
tools/printdata.py ${UCD_ROOTDIR} > $@

tests/unicode-data.actual: tests/printucddata

+ 25
- 4
src/categories.cpp View File

@@ -1249,6 +1249,26 @@ static const uint8_t categories_00D700[256] =
/* F0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Cn, Cn, Cn, Cn,
};

static const uint8_t categories_00F800[256] =
{
/* 00 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 10 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 20 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 30 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 40 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 50 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 60 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 70 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 80 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 90 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* A0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* B0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* C0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* D0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,
/* E0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Co, Co, Co, Co, Co, Co,
/* F0 */ Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Co, Co, Co, Po, Po, So,
};

static const uint8_t categories_00FA00[256] =
{
/* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,
@@ -2429,8 +2449,9 @@ static const uint8_t *categories_000000_00D7FF[] =
categories_00D700,
};

static const uint8_t *categories_00F900_02FAFF[] =
static const uint8_t *categories_00F800_02FAFF[] =
{
categories_00F800,
categories_Lo, // 00F900
categories_00FA00,
categories_00FB00,
@@ -2961,10 +2982,10 @@ ucd::category ucd::lookup_category(codepoint_t c)
return (ucd::category)table[c % 256];
}
if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates
if (c <= 0x00F8FF) return Co; // 00E000..00F8FF : Private Use Area
if (c <= 0x02FAFF) // 00F900..02FAFF
if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area
if (c <= 0x02FAFF) // 00F800..02FAFF
{
const uint8_t *table = categories_00F900_02FAFF[(c - 0x00F900) / 256];
const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256];
return (ucd::category)table[c % 256];
}
if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned

+ 1
- 0
src/include/ucd/ucd.h View File

@@ -202,6 +202,7 @@ namespace ucd
Phnx, /**< @brief Phoenician Script */
Plrd, /**< @brief Miao Script */
Prti, /**< @brief Inscriptional Parthian Script */
Qaak, /**< @brief Klingon Script */
Rjng, /**< @brief Rejang Script */
Runr, /**< @brief Runic Script */
Samr, /**< @brief Samaritan Script */

+ 25
- 4
src/scripts.cpp View File

@@ -1309,6 +1309,26 @@ static const uint8_t scripts_00D700[256] =
/* F0 */ Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Zzzz, Zzzz, Zzzz, Zzzz,
};

static const uint8_t scripts_00F800[256] =
{
/* 00 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 10 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 20 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 30 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 40 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 50 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 60 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 70 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 80 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 90 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* A0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* B0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* C0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* D0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak,
/* E0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* F0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Zzzz, Zzzz, Zzzz, Zyyy, Zyyy, Zyyy,
};

static const uint8_t scripts_00FA00[256] =
{
/* 00 */ Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant,
@@ -2489,8 +2509,9 @@ static const uint8_t *scripts_000000_00D7FF[] =
scripts_00D700,
};

static const uint8_t *scripts_00F900_02FAFF[] =
static const uint8_t *scripts_00F800_02FAFF[] =
{
scripts_00F800,
scripts_Hant, // 00F900
scripts_00FA00,
scripts_00FB00,
@@ -3020,10 +3041,10 @@ ucd::script ucd::lookup_script(codepoint_t c)
const uint8_t *table = scripts_000000_00D7FF[(c - 0x000000) / 256];
return (ucd::script)table[c % 256];
}
if (c <= 0x00F8FF) return Zzzz; // 00D800..00F8FF : Surrogates / Private Use Area
if (c <= 0x02FAFF) // 00F900..02FAFF
if (c <= 0x00F7FF) return Zzzz; // 00D800..00F7FF : Surrogates / Private Use Area
if (c <= 0x02FAFF) // 00F800..02FAFF
{
const uint8_t *table = scripts_00F900_02FAFF[(c - 0x00F900) / 256];
const uint8_t *table = scripts_00F800_02FAFF[(c - 0x00F800) / 256];
return (ucd::script)table[c % 256];
}
if (c <= 0x0DFFFF) return Zzzz; // 02FB00..0DFFFF : Unassigned

+ 41
- 0
supplemental/Klingon.txt View File

@@ -0,0 +1,41 @@
# Code Point ; Script ; General Category ; Name ; Okrand Transliteration

F8D0;Qaak;Lo;KLINGON LETTER A;a
F8D1;Qaak;Lo;KLINGON LETTER B;a
F8D2;Qaak;Lo;KLINGON LETTER CH;ch
F8D3;Qaak;Lo;KLINGON LETTER D;D
F8D4;Qaak;Lo;KLINGON LETTER E;e
F8D5;Qaak;Lo;KLINGON LETTER GH;gh
F8D6;Qaak;Lo;KLINGON LETTER H;H
F8D7;Qaak;Lo;KLINGON LETTER I;I
F8D8;Qaak;Lo;KLINGON LETTER J;j
F8D9;Qaak;Lo;KLINGON LETTER L;l
F8DA;Qaak;Lo;KLINGON LETTER M;m
F8DB;Qaak;Lo;KLINGON LETTER N;n
F8DC;Qaak;Lo;KLINGON LETTER NG;ng
F8DD;Qaak;Lo;KLINGON LETTER O;o
F8DE;Qaak;Lo;KLINGON LETTER P;p
F8DF;Qaak;Lo;KLINGON LETTER Q;q
F8E0;Qaak;Lo;KLINGON LETTER QH;Q
F8E1;Qaak;Lo;KLINGON LETTER R;r
F8E2;Qaak;Lo;KLINGON LETTER S;S
F8E3;Qaak;Lo;KLINGON LETTER T;t
F8E4;Qaak;Lo;KLINGON LETTER TLH;tlh
F8E5;Qaak;Lo;KLINGON LETTER U;u
F8E6;Qaak;Lo;KLINGON LETTER V;v
F8E7;Qaak;Lo;KLINGON LETTER W;w
F8E8;Qaak;Lo;KLINGON LETTER Y;y
F8E9;Qaak;Lo;KLINGON LETTER GLOTTAL STOP;'
F8F0;Qaak;Nd;KLINGON DIGIT ZERO;0
F8F1;Qaak;Nd;KLINGON DIGIT ONE;1
F8F2;Qaak;Nd;KLINGON DIGIT TWO;2
F8F3;Qaak;Nd;KLINGON DIGIT THREE;3
F8F4;Qaak;Nd;KLINGON DIGIT FOUR;4
F8F5;Qaak;Nd;KLINGON DIGIT FIVE;5
F8F6;Qaak;Nd;KLINGON DIGIT SIX;6
F8F7;Qaak;Nd;KLINGON DIGIT SEVEN;7
F8F8;Qaak;Nd;KLINGON DIGIT EIGHT;8
F8F9;Qaak;Nd;KLINGON DIGIT NINE;9
F8FD;Zyyy;Po;KLINGON COMMA;,
F8FE;Zyyy;Po;KLINGON FULL STOP;.
F8FF;Zyyy;So;KLINGON MUMMIFICATION GLYPH;

+ 1
- 0
tests/printucddata.cpp View File

@@ -154,6 +154,7 @@ const char *get_script_string(ucd::script s)
case Phnx: return "Phnx";
case Plrd: return "Plrd";
case Prti: return "Prti";
case Qaak: return "Qaak";
case Rjng: return "Rjng";
case Runr: return "Runr";
case Samr: return "Samr";

+ 5
- 2
tools/categories.py View File

@@ -28,6 +28,9 @@ unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory']

# This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that
@@ -35,8 +38,8 @@ for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
category_sets = [
(ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00DFFF'), 'Cs', 'Surrogates'),
(ucd.CodeRange('00E000..00F8FF'), 'Co', 'Private Use Area'),
(ucd.CodeRange('00F900..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00E000..00F7FF'), 'Co', 'Private Use Area'),
(ucd.CodeRange('00F800..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('02FB00..0DFFFF'), 'Cn', 'Unassigned'),
(ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('0E0200..0EFFFF'), 'Cn', 'Unassigned'),

+ 7
- 0
tools/printdata.py View File

@@ -35,6 +35,13 @@ for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'):
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']:
unicode_chars[codepoint]['Script'] = data['Script']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data
unicode_chars[codepoint]['Properties'] = []
unicode_chars[codepoint]['UpperCase'] = ucd.CodePoint('0000')
unicode_chars[codepoint]['LowerCase'] = ucd.CodePoint('0000')
unicode_chars[codepoint]['TitleCase'] = ucd.CodePoint('0000')

null = ucd.CodePoint('0000')
if __name__ == '__main__':

+ 5
- 2
tools/scripts.py View File

@@ -28,14 +28,17 @@ unicode_chars = {}
for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']:
unicode_chars[codepoint] = data['Script']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['Script']

# This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that
# need to be generated.
script_sets = [
(ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00F8FF'), 'Zzzz', 'Surrogates / Private Use Area'),
(ucd.CodeRange('00F900..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00F7FF'), 'Zzzz', 'Surrogates / Private Use Area'),
(ucd.CodeRange('00F800..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('02FB00..0DFFFF'), 'Zzzz', 'Unassigned'),
(ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('0E0200..10FFFF'), 'Zzzz', 'Unassigned'),

+ 8
- 0
tools/ucd.py View File

@@ -147,6 +147,14 @@ data_items = {
('LowerCase', codepoint),
('TitleCase', codepoint),
],
# Supplemental Data:
'Klingon': [
('CodePoint', codepoint),
('Script', str),
('GeneralCategory', string),
('Name', string),
('Transliteration', string),
],
}

def parse_ucd_data(ucd_rootdir, dataset):

Loading…
Cancel
Save