Browse Source

ucd-tools: Punctuation_In_Word eSpeakNG extended property support; use in clause_type_from_codepoint.

master
Reece H. Dunn 8 years ago
parent
commit
bc13173ac4

+ 4
- 8
src/libespeak-ng/tokenizer.c View File

static const unsigned short punct_chars[] = { static const unsigned short punct_chars[] = {
0x00a1, // inverted exclamation 0x00a1, // inverted exclamation
0x00bf, // inverted question 0x00bf, // inverted question

0x055c, // Armenian exclamation
0x055e, // Armenian question
0x055b, // Armenian emphasis mark 0x055b, // Armenian emphasis mark

0 0
}; };


static const unsigned int punct_attributes[] = { static const unsigned int punct_attributes[] = {
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question

CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian exclamation
CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian question
CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD, // Armenian emphasis mark CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD, // Armenian emphasis mark

0 0
}; };


return CLAUSE_QUESTION; return CLAUSE_QUESTION;
case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
return CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER; return CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER;
case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD:
return CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD;
case ESPEAKNG_PROPERTY_EXCLAMATION_MARK: case ESPEAKNG_PROPERTY_EXCLAMATION_MARK:
return CLAUSE_EXCLAMATION; return CLAUSE_EXCLAMATION;
case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
return CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER; return CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER;
case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD:
return CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD;
case ESPEAKNG_PROPERTY_COMMA: case ESPEAKNG_PROPERTY_COMMA:
return CLAUSE_COMMA; return CLAUSE_COMMA;
case ESPEAKNG_PROPERTY_COMMA | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: case ESPEAKNG_PROPERTY_COMMA | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:

+ 8
- 1
src/ucd-tools/data/espeak-ng/PropList.txt View File

003F ; Question_Mark # Po QUESTION MARK 003F ; Question_Mark # Po QUESTION MARK
00BF ; Question_Mark # Po INVERTED QUESTION MARK 00BF ; Question_Mark # Po INVERTED QUESTION MARK
037E ; Question_Mark # Po GREEK QUESTION MARK 037E ; Question_Mark # Po GREEK QUESTION MARK
055F ; Question_Mark # Po ARMENIAN QUESTION MARK
055E ; Question_Mark # Po ARMENIAN QUESTION MARK
061F ; Question_Mark # Po ARABIC QUESTION MARK 061F ; Question_Mark # Po ARABIC QUESTION MARK
0709 ; Question_Mark # Po SYRIAC SUBLINEAR COLON SKEWED RIGHT 0709 ; Question_Mark # Po SYRIAC SUBLINEAR COLON SKEWED RIGHT
1367 ; Question_Mark # Po ETHIOPIC QUESTION MARK 1367 ; Question_Mark # Po ETHIOPIC QUESTION MARK
FF1F ; Optional_Space_After # Po FULLWIDTH QUESTION MARK FF1F ; Optional_Space_After # Po FULLWIDTH QUESTION MARK


# Total code points: 14 # Total code points: 14

# ================================================

055B..055C ; Punctuation_In_Word # Po [2] ARMENIAN EMPHASIS MARK..ARMENIAN EXCLAMATION MARK
055E ; Punctuation_In_Word # Po ARMENIAN QUESTION MARK

# Total code points: 3

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ #define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */


// eSpeak NG extended properties: // eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD 0x0020000000000000ull /**< @brief Punctuation_In_Word */
#define ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER 0x0040000000000000ull /**< @brief Optional_Space_After */ #define ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER 0x0040000000000000ull /**< @brief Optional_Space_After */
#define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */ #define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */
#define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */ #define ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR 0x0100000000000000ull /**< @brief Paragraph_Separator */

+ 2
- 2
src/ucd-tools/src/proplist.c View File

if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE | ESPEAKNG_PROPERTY_SEMI_COLON; if (c == 0x0387) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_OTHER_ID_CONTINUE | ESPEAKNG_PROPERTY_SEMI_COLON;
break; break;
case 0x0500: case 0x0500:
if (c >= 0x055B && c <= 0x055C) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK;
if (c >= 0x055B && c <= 0x055C) return ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD;
if (c == 0x055D) return ESPEAKNG_PROPERTY_COMMA; if (c == 0x055D) return ESPEAKNG_PROPERTY_COMMA;
if (c == 0x055F) return ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x055E) return ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD;
if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER; if (c == 0x0589) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | ESPEAKNG_PROPERTY_FULL_STOP | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER;
if (c == 0x05C3) return UCD_PROPERTY_TERMINAL_PUNCTUATION; if (c == 0x05C3) return UCD_PROPERTY_TERMINAL_PUNCTUATION;
break; break;

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

props += (2 ** 31) * data.get('Pattern_Syntax', 0) props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 53) * data.get('Punctuation_In_Word', 0)
props += (2 ** 54) * data.get('Optional_Space_After', 0) props += (2 ** 54) * data.get('Optional_Space_After', 0)
props += (2 ** 55) * data.get('Extended_Dash', 0) props += (2 ** 55) * data.get('Extended_Dash', 0)
props += (2 ** 56) * data.get('Paragraph_Separator', 0) props += (2 ** 56) * data.get('Paragraph_Separator', 0)

Loading…
Cancel
Save