Browse Source

F8D0-D8FF: Klingon

master
Reece H. Dunn 12 years ago
parent
commit
7e411b34e9
10 changed files with 124 additions and 13 deletions
  1. 6
    1
      Makefile.am
  2. 25
    4
      src/categories.cpp
  3. 1
    0
      src/include/ucd/ucd.h
  4. 25
    4
      src/scripts.cpp
  5. 41
    0
      supplemental/Klingon.txt
  6. 1
    0
      tests/printucddata.cpp
  7. 5
    2
      tools/categories.py
  8. 7
    0
      tools/printdata.py
  9. 5
    2
      tools/scripts.py
  10. 8
    0
      tools/ucd.py

+ 6
- 1
Makefile.am View File

############################# libucd ########################################## ############################# libucd ##########################################


src/case.cpp: tools/case.py tools/ucd.py \ src/case.cpp: tools/case.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt data/ucd/UnicodeData.txt
tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@ tools/case.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@


src/categories.cpp: tools/categories.py tools/ucd.py \ src/categories.cpp: tools/categories.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt data/ucd/UnicodeData.txt
tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@ tools/categories.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@


src/scripts.cpp: tools/scripts.py tools/ucd.py \ src/scripts.cpp: tools/scripts.py tools/ucd.py \
supplemental/Klingon.txt \
data/language-subtag-registry \ data/language-subtag-registry \
data/ucd/Scripts.txt data/ucd/Scripts.txt
tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@ tools/scripts.py ${UCD_ROOTDIR} ${UCD_VERSION} > $@
tests_printucddata_LDADD = src/libucd.la tests_printucddata_LDADD = src/libucd.la


tests/unicode-data.expected: tools/printdata.py tools/ucd.py \ tests/unicode-data.expected: tools/printdata.py tools/ucd.py \
supplemental/Klingon.txt \
data/ucd/UnicodeData.txt \ data/ucd/UnicodeData.txt \
data/ucd/PropList.txt
data/ucd/PropList.txt \
data/ucd/Scripts.txt
tools/printdata.py ${UCD_ROOTDIR} > $@ tools/printdata.py ${UCD_ROOTDIR} > $@


tests/unicode-data.actual: tests/printucddata tests/unicode-data.actual: tests/printucddata

+ 25
- 4
src/categories.cpp View File

/* F0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Cn, Cn, Cn, Cn, /* F0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Cn, Cn, Cn, Cn,
}; };


static const uint8_t categories_00F800[256] =
{
/* 00 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 10 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 20 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 30 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 40 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 50 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 60 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 70 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 80 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* 90 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* A0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* B0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* C0 */ Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co, Co,
/* D0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,
/* E0 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Co, Co, Co, Co, Co, Co,
/* F0 */ Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Nd, Co, Co, Co, Po, Po, So,
};

static const uint8_t categories_00FA00[256] = static const uint8_t categories_00FA00[256] =
{ {
/* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, /* 00 */ Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo, Lo,
categories_00D700, categories_00D700,
}; };


static const uint8_t *categories_00F900_02FAFF[] =
static const uint8_t *categories_00F800_02FAFF[] =
{ {
categories_00F800,
categories_Lo, // 00F900 categories_Lo, // 00F900
categories_00FA00, categories_00FA00,
categories_00FB00, categories_00FB00,
return (ucd::category)table[c % 256]; return (ucd::category)table[c % 256];
} }
if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates if (c <= 0x00DFFF) return Cs; // 00D800..00DFFF : Surrogates
if (c <= 0x00F8FF) return Co; // 00E000..00F8FF : Private Use Area
if (c <= 0x02FAFF) // 00F900..02FAFF
if (c <= 0x00F7FF) return Co; // 00E000..00F7FF : Private Use Area
if (c <= 0x02FAFF) // 00F800..02FAFF
{ {
const uint8_t *table = categories_00F900_02FAFF[(c - 0x00F900) / 256];
const uint8_t *table = categories_00F800_02FAFF[(c - 0x00F800) / 256];
return (ucd::category)table[c % 256]; return (ucd::category)table[c % 256];
} }
if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned if (c <= 0x0DFFFF) return Cn; // 02FB00..0DFFFF : Unassigned

+ 1
- 0
src/include/ucd/ucd.h View File

Phnx, /**< @brief Phoenician Script */ Phnx, /**< @brief Phoenician Script */
Plrd, /**< @brief Miao Script */ Plrd, /**< @brief Miao Script */
Prti, /**< @brief Inscriptional Parthian Script */ Prti, /**< @brief Inscriptional Parthian Script */
Qaak, /**< @brief Klingon Script */
Rjng, /**< @brief Rejang Script */ Rjng, /**< @brief Rejang Script */
Runr, /**< @brief Runic Script */ Runr, /**< @brief Runic Script */
Samr, /**< @brief Samaritan Script */ Samr, /**< @brief Samaritan Script */

+ 25
- 4
src/scripts.cpp View File

/* F0 */ Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Zzzz, Zzzz, Zzzz, Zzzz, /* F0 */ Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Hang, Zzzz, Zzzz, Zzzz, Zzzz,
}; };


static const uint8_t scripts_00F800[256] =
{
/* 00 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 10 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 20 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 30 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 40 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 50 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 60 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 70 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 80 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* 90 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* A0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* B0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* C0 */ Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* D0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak,
/* E0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz, Zzzz,
/* F0 */ Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Qaak, Zzzz, Zzzz, Zzzz, Zyyy, Zyyy, Zyyy,
};

static const uint8_t scripts_00FA00[256] = static const uint8_t scripts_00FA00[256] =
{ {
/* 00 */ Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, /* 00 */ Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant, Hant,
scripts_00D700, scripts_00D700,
}; };


static const uint8_t *scripts_00F900_02FAFF[] =
static const uint8_t *scripts_00F800_02FAFF[] =
{ {
scripts_00F800,
scripts_Hant, // 00F900 scripts_Hant, // 00F900
scripts_00FA00, scripts_00FA00,
scripts_00FB00, scripts_00FB00,
const uint8_t *table = scripts_000000_00D7FF[(c - 0x000000) / 256]; const uint8_t *table = scripts_000000_00D7FF[(c - 0x000000) / 256];
return (ucd::script)table[c % 256]; return (ucd::script)table[c % 256];
} }
if (c <= 0x00F8FF) return Zzzz; // 00D800..00F8FF : Surrogates / Private Use Area
if (c <= 0x02FAFF) // 00F900..02FAFF
if (c <= 0x00F7FF) return Zzzz; // 00D800..00F7FF : Surrogates / Private Use Area
if (c <= 0x02FAFF) // 00F800..02FAFF
{ {
const uint8_t *table = scripts_00F900_02FAFF[(c - 0x00F900) / 256];
const uint8_t *table = scripts_00F800_02FAFF[(c - 0x00F800) / 256];
return (ucd::script)table[c % 256]; return (ucd::script)table[c % 256];
} }
if (c <= 0x0DFFFF) return Zzzz; // 02FB00..0DFFFF : Unassigned if (c <= 0x0DFFFF) return Zzzz; // 02FB00..0DFFFF : Unassigned

+ 41
- 0
supplemental/Klingon.txt View File

# Code Point ; Script ; General Category ; Name ; Okrand Transliteration

F8D0;Qaak;Lo;KLINGON LETTER A;a
F8D1;Qaak;Lo;KLINGON LETTER B;a
F8D2;Qaak;Lo;KLINGON LETTER CH;ch
F8D3;Qaak;Lo;KLINGON LETTER D;D
F8D4;Qaak;Lo;KLINGON LETTER E;e
F8D5;Qaak;Lo;KLINGON LETTER GH;gh
F8D6;Qaak;Lo;KLINGON LETTER H;H
F8D7;Qaak;Lo;KLINGON LETTER I;I
F8D8;Qaak;Lo;KLINGON LETTER J;j
F8D9;Qaak;Lo;KLINGON LETTER L;l
F8DA;Qaak;Lo;KLINGON LETTER M;m
F8DB;Qaak;Lo;KLINGON LETTER N;n
F8DC;Qaak;Lo;KLINGON LETTER NG;ng
F8DD;Qaak;Lo;KLINGON LETTER O;o
F8DE;Qaak;Lo;KLINGON LETTER P;p
F8DF;Qaak;Lo;KLINGON LETTER Q;q
F8E0;Qaak;Lo;KLINGON LETTER QH;Q
F8E1;Qaak;Lo;KLINGON LETTER R;r
F8E2;Qaak;Lo;KLINGON LETTER S;S
F8E3;Qaak;Lo;KLINGON LETTER T;t
F8E4;Qaak;Lo;KLINGON LETTER TLH;tlh
F8E5;Qaak;Lo;KLINGON LETTER U;u
F8E6;Qaak;Lo;KLINGON LETTER V;v
F8E7;Qaak;Lo;KLINGON LETTER W;w
F8E8;Qaak;Lo;KLINGON LETTER Y;y
F8E9;Qaak;Lo;KLINGON LETTER GLOTTAL STOP;'
F8F0;Qaak;Nd;KLINGON DIGIT ZERO;0
F8F1;Qaak;Nd;KLINGON DIGIT ONE;1
F8F2;Qaak;Nd;KLINGON DIGIT TWO;2
F8F3;Qaak;Nd;KLINGON DIGIT THREE;3
F8F4;Qaak;Nd;KLINGON DIGIT FOUR;4
F8F5;Qaak;Nd;KLINGON DIGIT FIVE;5
F8F6;Qaak;Nd;KLINGON DIGIT SIX;6
F8F7;Qaak;Nd;KLINGON DIGIT SEVEN;7
F8F8;Qaak;Nd;KLINGON DIGIT EIGHT;8
F8F9;Qaak;Nd;KLINGON DIGIT NINE;9
F8FD;Zyyy;Po;KLINGON COMMA;,
F8FE;Zyyy;Po;KLINGON FULL STOP;.
F8FF;Zyyy;So;KLINGON MUMMIFICATION GLYPH;

+ 1
- 0
tests/printucddata.cpp View File

case Phnx: return "Phnx"; case Phnx: return "Phnx";
case Plrd: return "Plrd"; case Plrd: return "Plrd";
case Prti: return "Prti"; case Prti: return "Prti";
case Qaak: return "Qaak";
case Rjng: return "Rjng"; case Rjng: return "Rjng";
case Runr: return "Runr"; case Runr: return "Runr";
case Samr: return "Samr"; case Samr: return "Samr";

+ 5
- 2
tools/categories.py View File

for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'): for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
for codepoint in data['CodePoint']: for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory'] unicode_chars[codepoint] = data['GeneralCategory']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['GeneralCategory']


# This map is a combination of the information in the UnicodeData and Blocks # This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that # data files. It is intended to reduce the number of character tables that
category_sets = [ category_sets = [
(ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'), (ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00DFFF'), 'Cs', 'Surrogates'), (ucd.CodeRange('00D800..00DFFF'), 'Cs', 'Surrogates'),
(ucd.CodeRange('00E000..00F8FF'), 'Co', 'Private Use Area'),
(ucd.CodeRange('00F900..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00E000..00F7FF'), 'Co', 'Private Use Area'),
(ucd.CodeRange('00F800..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('02FB00..0DFFFF'), 'Cn', 'Unassigned'), (ucd.CodeRange('02FB00..0DFFFF'), 'Cn', 'Unassigned'),
(ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'), (ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('0E0200..0EFFFF'), 'Cn', 'Unassigned'), (ucd.CodeRange('0E0200..0EFFFF'), 'Cn', 'Unassigned'),

+ 7
- 0
tools/printdata.py View File

for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']: for codepoint in data['Range']:
unicode_chars[codepoint]['Script'] = data['Script'] unicode_chars[codepoint]['Script'] = data['Script']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data
unicode_chars[codepoint]['Properties'] = []
unicode_chars[codepoint]['UpperCase'] = ucd.CodePoint('0000')
unicode_chars[codepoint]['LowerCase'] = ucd.CodePoint('0000')
unicode_chars[codepoint]['TitleCase'] = ucd.CodePoint('0000')


null = ucd.CodePoint('0000') null = ucd.CodePoint('0000')
if __name__ == '__main__': if __name__ == '__main__':

+ 5
- 2
tools/scripts.py View File

for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'): for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
for codepoint in data['Range']: for codepoint in data['Range']:
unicode_chars[codepoint] = data['Script'] unicode_chars[codepoint] = data['Script']
for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
for codepoint in data['CodePoint']:
unicode_chars[codepoint] = data['Script']


# This map is a combination of the information in the UnicodeData and Blocks # This map is a combination of the information in the UnicodeData and Blocks
# data files. It is intended to reduce the number of character tables that # data files. It is intended to reduce the number of character tables that
# need to be generated. # need to be generated.
script_sets = [ script_sets = [
(ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'), (ucd.CodeRange('000000..00D7FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00F8FF'), 'Zzzz', 'Surrogates / Private Use Area'),
(ucd.CodeRange('00F900..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('00D800..00F7FF'), 'Zzzz', 'Surrogates / Private Use Area'),
(ucd.CodeRange('00F800..02FAFF'), None, 'Multiple Blocks'),
(ucd.CodeRange('02FB00..0DFFFF'), 'Zzzz', 'Unassigned'), (ucd.CodeRange('02FB00..0DFFFF'), 'Zzzz', 'Unassigned'),
(ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'), (ucd.CodeRange('0E0000..0E01FF'), None, 'Multiple Blocks'),
(ucd.CodeRange('0E0200..10FFFF'), 'Zzzz', 'Unassigned'), (ucd.CodeRange('0E0200..10FFFF'), 'Zzzz', 'Unassigned'),

+ 8
- 0
tools/ucd.py View File

('LowerCase', codepoint), ('LowerCase', codepoint),
('TitleCase', codepoint), ('TitleCase', codepoint),
], ],
# Supplemental Data:
'Klingon': [
('CodePoint', codepoint),
('Script', str),
('GeneralCategory', string),
('Name', string),
('Transliteration', string),
],
} }


def parse_ucd_data(ucd_rootdir, dataset): def parse_ucd_data(ucd_rootdir, dataset):

Loading…
Cancel
Save