Browse Source

ucd-tools: Inverted_Terminal_Punctuation eSpeakNG extended property support; use in clause_type_from_codepoint.

master
Reece H. Dunn 8 years ago
parent
commit
61d668c0cb

+ 2
- 21
src/libespeak-ng/tokenizer.c View File

/* Tokenizer APIs. /* Tokenizer APIs.
* *
* Copyright (C) 2005 to 2015 by Jonathan Duddington
* email: [email protected]
* Copyright (C) 2017 Reece H. Dunn * Copyright (C) 2017 Reece H. Dunn
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
#include "synthesize.h" #include "synthesize.h"
#include "translate.h" #include "translate.h"


// punctuations symbols that can end a clause
static const unsigned short punct_chars[] = {
0x00a1, // inverted exclamation
0x00bf, // inverted question
0
};

// indexed by entry num. in punct_chars
static const unsigned int punct_attributes[] = {
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question
0
};

#define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull #define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull


int clause_type_from_codepoint(uint32_t c) int clause_type_from_codepoint(uint32_t c)
ucd_category cat = ucd_lookup_category(c); ucd_category cat = ucd_lookup_category(c);
ucd_property props = ucd_properties(c, cat); ucd_property props = ucd_properties(c, cat);


for (int ix = 0; punct_chars[ix] != 0; ++ix) {
if (punct_chars[ix] == c)
return punct_attributes[ix];
}

switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK) switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK)
{ {
case ESPEAKNG_PROPERTY_FULL_STOP: case ESPEAKNG_PROPERTY_FULL_STOP:
case ESPEAKNG_PROPERTY_EXTENDED_DASH: case ESPEAKNG_PROPERTY_EXTENDED_DASH:
return CLAUSE_SEMICOLON; return CLAUSE_SEMICOLON;
case ESPEAKNG_PROPERTY_SEMI_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: case ESPEAKNG_PROPERTY_SEMI_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION:
case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION:
return CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER; return CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER;
case ESPEAKNG_PROPERTY_ELLIPSIS: case ESPEAKNG_PROPERTY_ELLIPSIS:
return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER;

+ 7
- 0
src/ucd-tools/data/espeak-ng/PropList.txt View File

055E ; Punctuation_In_Word # Po ARMENIAN QUESTION MARK 055E ; Punctuation_In_Word # Po ARMENIAN QUESTION MARK


# Total code points: 3 # Total code points: 3

# ================================================

00A1 ; Inverted_Terminal_Punctuation # Po INVERTED EXCLAMATION MARK
00BF ; Inverted_Terminal_Punctuation # Po INVERTED QUESTION MARK

# Total code points: 2

+ 1
- 0
src/ucd-tools/src/include/ucd/ucd.h View File

#define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */ #define UCD_PROPERTY_PREPENDED_CONCATENATION_MARK 0x0000000100000000ull /**< @brief Prepended_Concatenation_Mark */


// eSpeak NG extended properties: // eSpeak NG extended properties:
#define ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION 0x0010000000000000ull /**< @brief Inverted_Terminal_Punctuation */
#define ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD 0x0020000000000000ull /**< @brief Punctuation_In_Word */ #define ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD 0x0020000000000000ull /**< @brief Punctuation_In_Word */
#define ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER 0x0040000000000000ull /**< @brief Optional_Space_After */ #define ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER 0x0040000000000000ull /**< @brief Optional_Space_After */
#define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */ #define ESPEAKNG_PROPERTY_EXTENDED_DASH 0x0080000000000000ull /**< @brief Extended_Dash */

+ 2
- 2
src/ucd-tools/src/proplist.c View File

if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON; if (c == 0x003A) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_COLON;
if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON; if (c == 0x003B) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_SEMI_COLON;
if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK; if (c == 0x003F) return UCD_PROPERTY_TERMINAL_PUNCTUATION | UCD_PROPERTY_SENTENCE_TERMINAL | UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK;
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER;
if (c == 0x00A1) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION;
if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE; if (c == 0x00B7) return UCD_PROPERTY_DIACRITIC | UCD_PROPERTY_EXTENDER | UCD_PROPERTY_OTHER_ID_CONTINUE;
if (c == 0x00BF) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER;
if (c == 0x00BF) return UCD_PROPERTY_PATTERN_SYNTAX | ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION;
return UCD_PROPERTY_PATTERN_SYNTAX; return UCD_PROPERTY_PATTERN_SYNTAX;
case 0x0300: case 0x0300:
if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK; if (c == 0x037E) return UCD_PROPERTY_TERMINAL_PUNCTUATION | ESPEAKNG_PROPERTY_QUESTION_MARK;

+ 1
- 0
src/ucd-tools/tools/printdata.py View File

props += (2 ** 31) * data.get('Pattern_Syntax', 0) props += (2 ** 31) * data.get('Pattern_Syntax', 0)
props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0) props += (2 ** 32) * data.get('Prepended_Concatenation_Mark', 0)
# eSpeak NG extended properties: # eSpeak NG extended properties:
props += (2 ** 52) * data.get('Inverted_Terminal_Punctuation', 0)
props += (2 ** 53) * data.get('Punctuation_In_Word', 0) props += (2 ** 53) * data.get('Punctuation_In_Word', 0)
props += (2 ** 54) * data.get('Optional_Space_After', 0) props += (2 ** 54) * data.get('Optional_Space_After', 0)
props += (2 ** 55) * data.get('Extended_Dash', 0) props += (2 ** 55) * data.get('Extended_Dash', 0)

Loading…
Cancel
Save