@@ -112,9 +112,6 @@ docs: docs/index.html \ | |||
src/speak-ng.1.html \ | |||
README.html | |||
check: tests/encoding.test | |||
tests/encoding.test | |||
##### build targets: | |||
espeak_includedir = $(includedir)/espeak | |||
@@ -163,6 +160,7 @@ src_libespeak_ng_la_SOURCES = \ | |||
src/libespeak-ng/synthdata.c \ | |||
src/libespeak-ng/synthesize.c \ | |||
src/libespeak-ng/synth_mbrola.c \ | |||
src/libespeak-ng/tokenizer.c \ | |||
src/libespeak-ng/translate.c \ | |||
src/libespeak-ng/tr_languages.c \ | |||
src/libespeak-ng/voices.c \ | |||
@@ -217,6 +215,19 @@ tests_encoding_test_SOURCES = \ | |||
src/libespeak-ng/mnemonics.c \ | |||
tests/encoding.c | |||
noinst_bin_PROGRAMS += tests/tokenizer.test | |||
tests_tokenizer_test_CFLAGS = \ | |||
-Isrc/libespeak-ng -Isrc/include -Isrc/include/compat -I src/ucd-tools/src/include \ | |||
-D _POSIX_C_SOURCE=200112L ${AM_CFLAGS} | |||
tests_tokenizer_test_SOURCES = \ | |||
src/libespeak-ng/tokenizer.c \ | |||
tests/tokenizer.c | |||
check: tests/encoding.test tests/tokenizer.test | |||
tests/encoding.test | |||
tests/tokenizer.test | |||
##### phoneme data: | |||
espeak-ng-data/phondata: phsource/phonemes.stamp |
@@ -33,6 +33,7 @@ ESPEAK_SOURCES := \ | |||
src/libespeak-ng/synthdata.c \ | |||
src/libespeak-ng/synthesize.c \ | |||
src/libespeak-ng/synth_mbrola.c \ | |||
src/libespeak-ng/tokenizer.c \ | |||
src/libespeak-ng/translate.c \ | |||
src/libespeak-ng/tr_languages.c \ | |||
src/libespeak-ng/voices.c \ |
@@ -63,117 +63,6 @@ static int sayas_mode; | |||
static int sayas_start; | |||
static int ssml_ignore_l_angle = 0; | |||
// punctuations symbols that can end a clause | |||
static const unsigned short punct_chars[] = { | |||
',', '.', '?', '!', ':', ';', | |||
0x00a1, // inverted exclamation | |||
0x00bf, // inverted question | |||
0x2013, // en-dash | |||
0x2014, // em-dash | |||
0x2026, // elipsis | |||
0x037e, // Greek question mark (looks like semicolon) | |||
0x0387, // Greek semicolon, ano teleia | |||
0x0964, // Devanagari Danda (fullstop) | |||
0x0589, // Armenian period | |||
0x055d, // Armenian comma | |||
0x055c, // Armenian exclamation | |||
0x055e, // Armenian question | |||
0x055b, // Armenian emphasis mark | |||
0x060c, // Arabic , | |||
0x061b, // Arabic ; | |||
0x061f, // Arabic ? | |||
0x06d4, // Arabic . | |||
0x0df4, // Singhalese Kunddaliya | |||
0x0f0d, // Tibet Shad | |||
0x0f0e, | |||
0x1362, // Ethiopic period | |||
0x1363, | |||
0x1364, | |||
0x1365, | |||
0x1366, | |||
0x1367, | |||
0x1368, | |||
0x10fb, // Georgian paragraph | |||
0x3001, // ideograph comma | |||
0x3002, // ideograph period | |||
0xff01, // fullwidth exclamation | |||
0xff0c, // fullwidth comma | |||
0xff0e, // fullwidth period | |||
0xff1a, // fullwidth colon | |||
0xff1b, // fullwidth semicolon | |||
0xff1f, // fullwidth question mark | |||
0 | |||
}; | |||
// indexed by (entry num. in punct_chars) + 1 | |||
// bits 0-7 pause x 10mS, bits 12-14 intonation type, bit 15 don't need following space or bracket | |||
static const unsigned int punct_attributes[] = { | |||
0, | |||
CLAUSE_COMMA, | |||
CLAUSE_PERIOD, | |||
CLAUSE_QUESTION, | |||
CLAUSE_EXCLAMATION, | |||
CLAUSE_COLON, | |||
CLAUSE_SEMICOLON, | |||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation | |||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question | |||
CLAUSE_SEMICOLON, // en-dash | |||
CLAUSE_SEMICOLON, // em-dash | |||
CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER, // elipsis | |||
CLAUSE_QUESTION, // Greek question mark | |||
CLAUSE_SEMICOLON, // Greek semicolon | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop) | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Armenian period | |||
CLAUSE_COMMA, // Armenian comma | |||
CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian exclamation | |||
CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian question | |||
CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD, // Armenian emphasis mark | |||
CLAUSE_COMMA, // Arabic , | |||
CLAUSE_SEMICOLON, // Arabic ; | |||
CLAUSE_QUESTION, // Arabic question mark | |||
CLAUSE_PERIOD, // Arabic full stop | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Singhalese period | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Tibet period | |||
CLAUSE_PARAGRAPH, | |||
CLAUSE_PERIOD, // Ethiopic period | |||
CLAUSE_COMMA, // Ethiopic comma | |||
CLAUSE_SEMICOLON, // Ethiopic semicolon | |||
CLAUSE_COLON, // Ethiopic colon | |||
CLAUSE_COLON, // Ethiopic preface colon | |||
CLAUSE_QUESTION, // Ethiopic question mark | |||
CLAUSE_PARAGRAPH, // Ethiopic paragraph | |||
CLAUSE_PARAGRAPH, // Georgian paragraph | |||
CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER, // ideograph comma | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // ideograph period | |||
CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER, // fullwidth | |||
CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_SEMICOLON, // spare | |||
0 | |||
}; | |||
// stack for language and voice properties | |||
// frame 0 is for the defaults, before any ssml tags. | |||
typedef struct { | |||
@@ -604,7 +493,7 @@ static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output | |||
if (c1 == '-') | |||
return CLAUSE_NONE; // no pause | |||
attributes = punct_attributes[lookupwchar(punct_chars, c1)]; | |||
attributes = clause_type_from_codepoint(c1); | |||
short_pause = CLAUSE_SHORTFALL; | |||
if ((attributes & CLAUSE_INTONATION_TYPE) == 0x1000) | |||
@@ -1624,7 +1513,6 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
int phoneme_mode = 0; | |||
int n_xml_buf; | |||
int terminator; | |||
int punct; | |||
int found; | |||
int any_alnum = 0; | |||
int self_closing; | |||
@@ -1797,13 +1685,12 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
if ((c2 == '\n') && (option_linelength == -1)) { | |||
// single-line mode, return immediately on NL | |||
if ((punct = lookupwchar(punct_chars, c1)) == 0) { | |||
if ((terminator = clause_type_from_codepoint(c1)) == CLAUSE_NONE) { | |||
charix[ix] = count_characters - clause_start_char; | |||
*charix_top = ix; | |||
ix += utf8_out(c1, &buf[ix]); | |||
terminator = CLAUSE_PERIOD; // line doesn't end in punctuation, assume period | |||
} else | |||
terminator = punct_attributes[punct]; | |||
} | |||
buf[ix] = ' '; | |||
buf[ix+1] = 0; | |||
return terminator; | |||
@@ -1990,9 +1877,7 @@ int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_ | |||
} | |||
punct_data = 0; | |||
if ((punct = lookupwchar(punct_chars, c1)) != 0) { | |||
punct_data = punct_attributes[punct]; | |||
if ((punct_data = clause_type_from_codepoint(c1)) != CLAUSE_NONE) { | |||
if (punct_data & CLAUSE_PUNCTUATION_IN_WORD) { | |||
// Armenian punctuation inside a word | |||
stressed_word = 1; |
@@ -0,0 +1,151 @@ | |||
/* Tokenizer APIs. | |||
* | |||
* Copyright (C) 2005 to 2015 by Jonathan Duddington | |||
* email: [email protected] | |||
* Copyright (C) 2017 Reece H. Dunn | |||
* | |||
* This program is free software; you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation; either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with this program; if not, see: <http://www.gnu.org/licenses/>. | |||
*/ | |||
#include "config.h" | |||
#include <errno.h> | |||
#include <stdint.h> | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include <espeak-ng/espeak_ng.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" | |||
#include "translate.h" | |||
// punctuations symbols that can end a clause | |||
static const unsigned short punct_chars[] = { | |||
',', '.', '?', '!', ':', ';', | |||
0x00a1, // inverted exclamation | |||
0x00bf, // inverted question | |||
0x2013, // en-dash | |||
0x2014, // em-dash | |||
0x2026, // elipsis | |||
0x037e, // Greek question mark (looks like semicolon) | |||
0x0387, // Greek semicolon, ano teleia | |||
0x0964, // Devanagari Danda (fullstop) | |||
0x0589, // Armenian period | |||
0x055d, // Armenian comma | |||
0x055c, // Armenian exclamation | |||
0x055e, // Armenian question | |||
0x055b, // Armenian emphasis mark | |||
0x060c, // Arabic , | |||
0x061b, // Arabic ; | |||
0x061f, // Arabic ? | |||
0x06d4, // Arabic . | |||
0x0df4, // Singhalese Kunddaliya | |||
0x0f0d, // Tibet Shad | |||
0x0f0e, | |||
0x1362, // Ethiopic period | |||
0x1363, | |||
0x1364, | |||
0x1365, | |||
0x1366, | |||
0x1367, | |||
0x1368, | |||
0x10fb, // Georgian paragraph | |||
0x3001, // ideograph comma | |||
0x3002, // ideograph period | |||
0xff01, // fullwidth exclamation | |||
0xff0c, // fullwidth comma | |||
0xff0e, // fullwidth period | |||
0xff1a, // fullwidth colon | |||
0xff1b, // fullwidth semicolon | |||
0xff1f, // fullwidth question mark | |||
0 | |||
}; | |||
// indexed by entry num. in punct_chars | |||
static const unsigned int punct_attributes[] = { | |||
CLAUSE_COMMA, | |||
CLAUSE_PERIOD, | |||
CLAUSE_QUESTION, | |||
CLAUSE_EXCLAMATION, | |||
CLAUSE_COLON, | |||
CLAUSE_SEMICOLON, | |||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted exclamation | |||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, // inverted question | |||
CLAUSE_SEMICOLON, // en-dash | |||
CLAUSE_SEMICOLON, // em-dash | |||
CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER, // elipsis | |||
CLAUSE_QUESTION, // Greek question mark | |||
CLAUSE_SEMICOLON, // Greek semicolon | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Devanagari Danda (fullstop) | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Armenian period | |||
CLAUSE_COMMA, // Armenian comma | |||
CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian exclamation | |||
CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD, // Armenian question | |||
CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD, // Armenian emphasis mark | |||
CLAUSE_COMMA, // Arabic , | |||
CLAUSE_SEMICOLON, // Arabic ; | |||
CLAUSE_QUESTION, // Arabic question mark | |||
CLAUSE_PERIOD, // Arabic full stop | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Singhalese period | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // Tibet period | |||
CLAUSE_PARAGRAPH, | |||
CLAUSE_PERIOD, // Ethiopic period | |||
CLAUSE_COMMA, // Ethiopic comma | |||
CLAUSE_SEMICOLON, // Ethiopic semicolon | |||
CLAUSE_COLON, // Ethiopic colon | |||
CLAUSE_COLON, // Ethiopic preface colon | |||
CLAUSE_QUESTION, // Ethiopic question mark | |||
CLAUSE_PARAGRAPH, // Ethiopic paragraph | |||
CLAUSE_PARAGRAPH, // Georgian paragraph | |||
CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER, // ideograph comma | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, // ideograph period | |||
CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER, // fullwidth | |||
CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER, | |||
0 | |||
}; | |||
int clause_type_from_codepoint(uint32_t c) | |||
{ | |||
for (int ix = 0; punct_chars[ix] != 0; ++ix) { | |||
if (punct_chars[ix] == c) | |||
return punct_attributes[ix]; | |||
} | |||
return CLAUSE_NONE; | |||
} |
@@ -235,6 +235,8 @@ extern "C" | |||
#define CLAUSE_COLON (30 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_CLAUSE) | |||
#define CLAUSE_SEMICOLON (30 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE) | |||
int clause_type_from_codepoint(uint32_t c); | |||
//@} | |||
#define SAYAS_CHARS 0x12 |
@@ -186,6 +186,7 @@ | |||
<ClCompile Include="..\libespeak-ng\synthdata.c" /> | |||
<ClCompile Include="..\libespeak-ng\synthesize.c" /> | |||
<ClCompile Include="..\libespeak-ng\synth_mbrola.c" /> | |||
<ClCompile Include="..\libespeak-ng\tokenizer.c" /> | |||
<ClCompile Include="..\libespeak-ng\translate.c" /> | |||
<ClCompile Include="..\libespeak-ng\tr_languages.c" /> | |||
<ClCompile Include="..\libespeak-ng\voices.c" /> |
@@ -0,0 +1,174 @@ | |||
/* | |||
* Copyright (C) 2017 Reece H. Dunn | |||
* | |||
* This program is free software; you can redistribute it and/or modify | |||
* it under the terms of the GNU General Public License as published by | |||
* the Free Software Foundation; either version 3 of the License, or | |||
* (at your option) any later version. | |||
* | |||
* This program is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
* GNU General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU General Public License | |||
* along with this program; if not, write see: | |||
* <http://www.gnu.org/licenses/>. | |||
*/ | |||
#include "config.h" | |||
#include <assert.h> | |||
#include <stdint.h> | |||
#include <stdlib.h> | |||
#include <stdio.h> | |||
#include <espeak-ng/espeak_ng.h> | |||
#include "encoding.h" | |||
#include "speech.h" | |||
#include "phoneme.h" | |||
#include "synthesize.h" | |||
#include "translate.h" | |||
void | |||
test_latin_common() | |||
{ | |||
printf("testing Latin/Common (Latn/Zyyy) script classification\n"); | |||
assert(clause_type_from_codepoint('a') == CLAUSE_NONE); | |||
assert(clause_type_from_codepoint('.') == CLAUSE_PERIOD); | |||
assert(clause_type_from_codepoint('?') == CLAUSE_QUESTION); | |||
assert(clause_type_from_codepoint('!') == CLAUSE_EXCLAMATION); | |||
assert(clause_type_from_codepoint(',') == CLAUSE_COMMA); | |||
assert(clause_type_from_codepoint(':') == CLAUSE_COLON); | |||
assert(clause_type_from_codepoint(';') == CLAUSE_SEMICOLON); | |||
assert(clause_type_from_codepoint(0x00A1) == (CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0x00Bf) == (CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0x2013) == CLAUSE_SEMICOLON); | |||
assert(clause_type_from_codepoint(0x2014) == CLAUSE_SEMICOLON); | |||
assert(clause_type_from_codepoint(0x2026) == (CLAUSE_SEMICOLON | CLAUSE_SPEAK_PUNCTUATION_NAME | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
} | |||
void | |||
test_greek() | |||
{ | |||
printf("testing Greek (Grek) script classification\n"); | |||
assert(clause_type_from_codepoint(0x037E) == CLAUSE_QUESTION); | |||
assert(clause_type_from_codepoint(0x0387) == CLAUSE_SEMICOLON); | |||
} | |||
void | |||
test_armenian() | |||
{ | |||
printf("testing Armenian (Armn) script classification\n"); | |||
assert(clause_type_from_codepoint(0x055B) == (CLAUSE_PERIOD | CLAUSE_PUNCTUATION_IN_WORD)); | |||
assert(clause_type_from_codepoint(0x055C) == (CLAUSE_EXCLAMATION | CLAUSE_PUNCTUATION_IN_WORD)); | |||
assert(clause_type_from_codepoint(0x055D) == CLAUSE_COMMA); | |||
assert(clause_type_from_codepoint(0x055E) == (CLAUSE_QUESTION | CLAUSE_PUNCTUATION_IN_WORD)); | |||
assert(clause_type_from_codepoint(0x0589) == (CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
} | |||
void | |||
test_arabic() | |||
{ | |||
printf("testing Arabic (Arab) script classification\n"); | |||
assert(clause_type_from_codepoint(0x060C) == CLAUSE_COMMA); | |||
assert(clause_type_from_codepoint(0x061B) == CLAUSE_SEMICOLON); | |||
assert(clause_type_from_codepoint(0x061F) == CLAUSE_QUESTION); | |||
assert(clause_type_from_codepoint(0x06D4) == CLAUSE_PERIOD); | |||
} | |||
void | |||
test_devanagari() | |||
{ | |||
printf("testing Devanagari (Deva) script classification\n"); | |||
assert(clause_type_from_codepoint(0x0964) == (CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
} | |||
void | |||
test_tibetan() | |||
{ | |||
printf("testing Tibetan (Tibt) script classification\n"); | |||
assert(clause_type_from_codepoint(0x0F0D) == (CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0x0F0E) == CLAUSE_PARAGRAPH); | |||
} | |||
void | |||
test_sinhala() | |||
{ | |||
printf("testing Sinhala (Sinh) script classification\n"); | |||
assert(clause_type_from_codepoint(0x0DF4) == (CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
} | |||
void | |||
test_georgian() | |||
{ | |||
printf("testing Georgian (Geor) script classification\n"); | |||
assert(clause_type_from_codepoint(0x10FB) == CLAUSE_PARAGRAPH); | |||
} | |||
void | |||
test_ethiopic() | |||
{ | |||
printf("testing Ethiopic (Ethi) script classification\n"); | |||
assert(clause_type_from_codepoint(0x1362) == CLAUSE_PERIOD); | |||
assert(clause_type_from_codepoint(0x1363) == CLAUSE_COMMA); | |||
assert(clause_type_from_codepoint(0x1364) == CLAUSE_SEMICOLON); | |||
assert(clause_type_from_codepoint(0x1365) == CLAUSE_COLON); | |||
assert(clause_type_from_codepoint(0x1366) == CLAUSE_COLON); | |||
assert(clause_type_from_codepoint(0x1367) == CLAUSE_QUESTION); | |||
assert(clause_type_from_codepoint(0x1368) == CLAUSE_PARAGRAPH); | |||
} | |||
void | |||
test_ideographic() | |||
{ | |||
printf("testing Ideographic (Hani) script classification\n"); | |||
assert(clause_type_from_codepoint(0x3001) == (CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0x3002) == (CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
} | |||
void | |||
test_fullwidth() | |||
{ | |||
printf("testing Full Width/Common (Zyyy) script classification\n"); | |||
assert(clause_type_from_codepoint(0xFF01) == (CLAUSE_EXCLAMATION | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0xFF0C) == (CLAUSE_COMMA | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0xFF0E) == (CLAUSE_PERIOD | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0xFF1A) == (CLAUSE_COLON | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0xFF1B) == (CLAUSE_SEMICOLON | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
assert(clause_type_from_codepoint(0xFF1F) == (CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
} | |||
int | |||
main(int argc, char **argv) | |||
{ | |||
test_latin_common(); | |||
test_greek(); | |||
test_armenian(); | |||
test_arabic(); | |||
test_devanagari(); | |||
test_tibetan(); | |||
test_sinhala(); | |||
test_georgian(); | |||
test_ethiopic(); | |||
test_ideographic(); | |||
test_fullwidth(); | |||
printf("done\n"); | |||
return EXIT_SUCCESS; | |||
} |