|
|
|
@@ -457,6 +457,29 @@ test_Latn_word_tokens() |
|
|
|
destroy_tokenizer(tokenizer); |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
test_Latn_punctuation_tokens() |
|
|
|
{ |
|
|
|
printf("testing Latin (Latn) script punctuation tokens\n"); |
|
|
|
|
|
|
|
espeak_ng_TOKENIZER *tokenizer = create_tokenizer(); |
|
|
|
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); |
|
|
|
|
|
|
|
assert(text_decoder_decode_string(decoder, ".", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK); |
|
|
|
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1); |
|
|
|
|
|
|
|
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_FULL_STOP); |
|
|
|
assert(tokenizer_get_token_text(tokenizer) != NULL); |
|
|
|
assert(strcmp(tokenizer_get_token_text(tokenizer), ".") == 0); |
|
|
|
|
|
|
|
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_END_OF_BUFFER); |
|
|
|
assert(tokenizer_get_token_text(tokenizer) != NULL); |
|
|
|
assert(*tokenizer_get_token_text(tokenizer) == '\0'); |
|
|
|
|
|
|
|
destroy_text_decoder(decoder); |
|
|
|
destroy_tokenizer(tokenizer); |
|
|
|
} |
|
|
|
|
|
|
|
void |
|
|
|
run_tests() |
|
|
|
{ |
|
|
|
@@ -482,6 +505,7 @@ run_tests() |
|
|
|
test_whitespace_tokens(); |
|
|
|
|
|
|
|
test_Latn_word_tokens(); |
|
|
|
test_Latn_punctuation_tokens(); |
|
|
|
|
|
|
|
printf("done\n"); |
|
|
|
} |
|
|
|
@@ -537,6 +561,9 @@ print_tokens(espeak_ng_TEXT_DECODER *decoder) |
|
|
|
case ESPEAKNG_TOKEN_WORD_CAPITALIZED: |
|
|
|
printf("word (capitalized) : %s\n", tokenizer_get_token_text(tokenizer)); |
|
|
|
break; |
|
|
|
case ESPEAKNG_TOKEN_FULL_STOP: |
|
|
|
printf("full stop : %s\n", tokenizer_get_token_text(tokenizer)); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|