eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

tokenizer.c 3.1KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. /* Tokenizer APIs.
  2. *
  3. * Copyright (C) 2017 Reece H. Dunn
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  17. */
  18. #include "config.h"
  19. #include <errno.h>
  20. #include <stdint.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include <string.h>
  24. #include <espeak-ng/espeak_ng.h>
  25. #include <ucd/ucd.h>
  26. #include "encoding.h"
  27. #include "speech.h"
  28. #include "phoneme.h"
  29. #include "synthesize.h"
  30. #include "translate.h"
  31. #define ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK 0xFFF0000000000000ull
  32. int clause_type_from_codepoint(uint32_t c)
  33. {
  34. ucd_category cat = ucd_lookup_category(c);
  35. ucd_property props = ucd_properties(c, cat);
  36. switch (props & ESPEAKNG_CLAUSE_TYPE_PROPERTY_MASK)
  37. {
  38. case ESPEAKNG_PROPERTY_FULL_STOP:
  39. return CLAUSE_PERIOD;
  40. case ESPEAKNG_PROPERTY_FULL_STOP | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
  41. return CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER;
  42. case ESPEAKNG_PROPERTY_QUESTION_MARK:
  43. return CLAUSE_QUESTION;
  44. case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
  45. return CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER;
  46. case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD:
  47. return CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD;
  48. case ESPEAKNG_PROPERTY_EXCLAMATION_MARK:
  49. return CLAUSE_EXCLAMATION;
  50. case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
  51. return CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER;
  52. case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_PUNCTUATION_IN_WORD:
  53. return CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD;
  54. case ESPEAKNG_PROPERTY_COMMA:
  55. return CLAUSE_COMMA;
  56. case ESPEAKNG_PROPERTY_COMMA | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
  57. return CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER;
  58. case ESPEAKNG_PROPERTY_COLON:
  59. return CLAUSE_COLON;
  60. case ESPEAKNG_PROPERTY_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
  61. return CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER;
  62. case ESPEAKNG_PROPERTY_SEMI_COLON:
  63. case ESPEAKNG_PROPERTY_EXTENDED_DASH:
  64. return CLAUSE_SEMICOLON;
  65. case ESPEAKNG_PROPERTY_SEMI_COLON | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER:
  66. case ESPEAKNG_PROPERTY_QUESTION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION:
  67. case ESPEAKNG_PROPERTY_EXCLAMATION_MARK | ESPEAKNG_PROPERTY_OPTIONAL_SPACE_AFTER | ESPEAKNG_PROPERTY_INVERTED_TERMINAL_PUNCTUATION:
  68. return CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER;
  69. case ESPEAKNG_PROPERTY_ELLIPSIS:
  70. return CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER;
  71. case ESPEAKNG_PROPERTY_PARAGRAPH_SEPARATOR:
  72. return CLAUSE_PARAGRAPH;
  73. }
  74. return CLAUSE_NONE;
  75. }