Browse Source

tokenizer.c: Tokenise U+000B [VERTICAL TAB (VT)] as whitespace, not as newlines.

master
Reece H. Dunn 8 years ago
parent
commit
f3ea6f68f3
1 changed files with 10 additions and 1 deletions
  1. 10
    1
      tests/tokenizer.c

+ 10
- 1
tests/tokenizer.c View File

espeak_ng_TOKENIZER *tokenizer = create_tokenizer(); espeak_ng_TOKENIZER *tokenizer = create_tokenizer();
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\t\t\n \xE3\x80\x80 \n\xC2\xA0\xC2\xA0\n\xE2\x80\xA9", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK);
assert(text_decoder_decode_string(decoder, "\t\t\n\x0B\x0B\n \xE3\x80\x80 \n\xC2\xA0\xC2\xA0\n\xE2\x80\xA9", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1); assert(tokenizer_reset(tokenizer, decoder) == 1);


// General Category: Cc, Property: White_Space // General Category: Cc, Property: White_Space
assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);
assert(strcmp(tokenizer_get_token_text(tokenizer), "\n") == 0); assert(strcmp(tokenizer_get_token_text(tokenizer), "\n") == 0);


// General Category: Cc, Property: White_Space, VERTICAL TAB (VT) -- Not treated as newline tokens.
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_WHITESPACE);
assert(tokenizer_get_token_text(tokenizer) != NULL);
assert(strcmp(tokenizer_get_token_text(tokenizer), "\x0B\x0B") == 0);

assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE);
assert(tokenizer_get_token_text(tokenizer) != NULL);
assert(strcmp(tokenizer_get_token_text(tokenizer), "\n") == 0);

// General Category: Zs, Property: White_Space // General Category: Zs, Property: White_Space
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_WHITESPACE); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_WHITESPACE);
assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);

Loading…
Cancel
Save