Browse Source

Armenian emphasis mark (U+055B) is used for interjections, so treat it as an exclamation mark.

master
Reece H. Dunn 8 years ago
parent
commit
5c6bc0e556
2 changed files with 1 additions and 3 deletions
  1. 0
    2
      src/libespeak-ng/tokenizer.c
  2. 1
    1
      tests/tokenizer.c

+ 0
- 2
src/libespeak-ng/tokenizer.c View File

static const unsigned short punct_chars[] = { static const unsigned short punct_chars[] = {
0x00a1, // inverted exclamation 0x00a1, // inverted exclamation
0x00bf, // inverted question 0x00bf, // inverted question
0x055b, // Armenian emphasis mark
0 0
}; };


static const unsigned int punct_attributes[] = { static const unsigned int punct_attributes[] = {
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question
CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD, // Armenian emphasis mark
0 0
}; };



+ 1
- 1
tests/tokenizer.c View File

{ {
printf("testing Armenian (Armn) script classification\n"); printf("testing Armenian (Armn) script classification\n");


assert(clause_type_from_codepoint(0x055B) == (CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD));
assert(clause_type_from_codepoint(0x055B) == (CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD));
assert(clause_type_from_codepoint(0x055C) == (CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD)); assert(clause_type_from_codepoint(0x055C) == (CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD));
assert(clause_type_from_codepoint(0x055D) == CLAUSE_COMMA); assert(clause_type_from_codepoint(0x055D) == CLAUSE_COMMA);
assert(clause_type_from_codepoint(0x055E) == (CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD)); assert(clause_type_from_codepoint(0x055E) == (CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD));

Loading…
Cancel
Save