/* Tokenizer APIs. * * Copyright (C) 2017 Reece H. Dunn * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see: . */ #include "config.h" #include #include #include #include #include #include #include #include "encoding.h" #include "speech.h" #include "phoneme.h" #include "synthesize.h" #include "translate.h" #define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull int clause_type_from_codepoint(uint32_t c) { ucd_category cat = ucd_lookup_category(c); ucd_property props = ucd_properties(c, cat); switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK) { case ESPEAKNG_PROPERTY_FULL_STOP: return CLAUSE_PERIOD; case ESPEAKNG_PROPERTY_FULL_STOP | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: return CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_QUESTION_MARK: return CLAUSE_QUESTION; case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: return CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD: return CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD; case ESPEAKNG_PROPERTY_EXCLAMATION_MARK: return CLAUSE_EXCLAMATION; case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: return CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD: return CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD; case ESPEAKNG_PROPERTY_COMMA: return CLAUSE_COMMA; case ESPEAKNG_PROPERTY_COMMA | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: return CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_COLON: return CLAUSE_COLON; case ESPEAKNG_PROPERTY_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: return CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_SEMI_COLON: case ESPEAKNG_PROPERTY_EXTENDED_DASH: return CLAUSE_SEMICOLON; case ESPEAKNG_PROPERTY_SEMI_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER: case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION: case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION: return CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_ELLIPSIS: return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER; case ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR: return CLAUSE_PARAGRAPH; } return CLAUSE_NONE; }