/* Tokenizer APIs. | /* Tokenizer APIs. | ||||
* | * | ||||
* Copyright (C) 2005 to 2015 by Jonathan Duddington | |||||
* email: [email protected] | |||||
* Copyright (C) 2017 Reece H. Dunn | * Copyright (C) 2017 Reece H. Dunn | ||||
* | * | ||||
* This program is free software; you can redistribute it and/or modify | * This program is free software; you can redistribute it and/or modify | ||||
#include "synthesize.h" | #include "synthesize.h" | ||||
#include "translate.h" | #include "translate.h" | ||||
// punctuations symbols that can end a clause | |||||
static const unsigned short punct_chars[] = { | |||||
0x00a1, // inverted exclamation | |||||
0x00bf, // inverted question | |||||
0 | |||||
}; | |||||
// indexed by entry num. in punct_chars | |||||
static const unsigned int punct_attributes[] = { | |||||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation | |||||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question | |||||
0 | |||||
}; | |||||
#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull | #define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull | ||||
int clause_type_from_codepoint(uint32_t c) | int clause_type_from_codepoint(uint32_t c) | ||||
ucd_category cat = ucd_lookup_category(c); | ucd_category cat = ucd_lookup_category(c); | ||||
ucd_property props = ucd_properties(c, cat); | ucd_property props = ucd_properties(c, cat); | ||||
for (int ix = 0; punct_chars[ix] != 0; ++ix) { | |||||
if (punct_chars[ix] == c) | |||||
return punct_attributes[ix]; | |||||
} | |||||
switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK) | switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK) | ||||
{ | { | ||||
case ESPEAKNG_PROPERTY_FULL_STOP: | case ESPEAKNG_PROPERTY_FULL_STOP: | ||||
case ESPEAKNG_PROPERTY_EXTENDED_DASH: | case ESPEAKNG_PROPERTY_EXTENDED_DASH: | ||||
return CLAUSE_SEMICOLON; | return CLAUSE_SEMICOLON; | ||||
case ESPEAKNG_PROPERTY_SEMI_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: | case ESPEAKNG_PROPERTY_SEMI_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: | ||||
case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION: | |||||
case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION: | |||||
return CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER; | return CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER; | ||||
case ESPEAKNG_PROPERTY_ELLIPSIS: | case ESPEAKNG_PROPERTY_ELLIPSIS: | ||||
return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; | return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; |
055E ; Punctuation_In_Word # Po ARMENIAN QUESTION MARK | 055E ; Punctuation_In_Word # Po ARMENIAN QUESTION MARK | ||||
# Total code points: 3 | # Total code points: 3 | ||||
# ================================================ | |||||
00A1 ; Inverted_Terminal_Punctuation # Po INVERTED EXCLAMATION MARK | |||||
00BF ; Inverted_Terminal_Punctuation # Po INVERTED QUESTION MARK | |||||
# Total code points: 2 |
#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ | #define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ | ||||
// eSpeak NG extended properties: | // eSpeak NG extended properties: | ||||
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */ | |||||
#define ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD 0x0020000000000000ull /**< @brief Punctuation_In_Word */ | #define ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD 0x0020000000000000ull /**< @brief Punctuation_In_Word */ | ||||
#define ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER 0x0040000000000000ull /**< @brief Optional_Space_After */ | #define ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER 0x0040000000000000ull /**< @brief Optional_Space_After */ | ||||
#define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */ | #define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */ |
if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; | ||||
if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON; | if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON; | ||||
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK; | if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK; | ||||
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER; | |||||
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION; | |||||
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE; | if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE; | ||||
if (c == 0x00BF) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER; | |||||
if (c == 0x00BF) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION; | |||||
return UCD_PROPERTY_PATTERN_SYNTAX; | return UCD_PROPERTY_PATTERN_SYNTAX; | ||||
case 0x0300: | case 0x0300: | ||||
if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK; | if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK; |
props += (2 ** 31) * data.get('Pattern_Syntax', 0) | props += (2 ** 31) * data.get('Pattern_Syntax', 0) | ||||
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) | props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) | ||||
# eSpeak NG extended properties: | # eSpeak NG extended properties: | ||||
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0) | |||||
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) | props += (2 ** 53) * data.get('Punctuation_In_Word', 0) | ||||
props += (2 ** 54) * data.get('Optional_Space_After', 0) | props += (2 ** 54) * data.get('Optional_Space_After', 0) | ||||
props += (2 ** 55) * data.get('Extended_Dash', 0) | props += (2 ** 55) * data.get('Extended_Dash', 0) |