Browse Source

tokenizer.c: Add an options parameter to the tokenizer_reset API.

master
Reece H. Dunn 8 years ago
parent
commit
d093513b65
3 changed files with 16 additions and 9 deletions
  1. 2
    1
      src/libespeak-ng/tokenizer.c
  2. 7
    1
      src/libespeak-ng/tokenizer.h
  3. 7
    7
      tests/tokenizer.c

+ 2
- 1
src/libespeak-ng/tokenizer.c View File



int int
tokenizer_reset(espeak_ng_TOKENIZER *tokenizer, tokenizer_reset(espeak_ng_TOKENIZER *tokenizer,
espeak_ng_TEXT_DECODER *decoder)
espeak_ng_TEXT_DECODER *decoder,
espeak_ng_TOKENIZER_OPTIONS options)
{ {
if (!tokenizer) return 0; if (!tokenizer) return 0;



+ 7
- 1
src/libespeak-ng/tokenizer.h View File

void void
destroy_tokenizer(espeak_ng_TOKENIZER *tokenizer); destroy_tokenizer(espeak_ng_TOKENIZER *tokenizer);


typedef enum
{
ESPEAKNG_TOKENIZER_OPTION_TEXT = 0,
} espeak_ng_TOKENIZER_OPTIONS;

int int
tokenizer_reset(espeak_ng_TOKENIZER *tokenizer, tokenizer_reset(espeak_ng_TOKENIZER *tokenizer,
espeak_ng_TEXT_DECODER *decoder);
espeak_ng_TEXT_DECODER *decoder,
espeak_ng_TOKENIZER_OPTIONS options);


typedef enum typedef enum
{ {

+ 7
- 7
tests/tokenizer.c View File

assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);
assert(*tokenizer_get_token_text(tokenizer) == '\0'); assert(*tokenizer_get_token_text(tokenizer) == '\0');


assert(tokenizer_reset(tokenizer, NULL) == 1);
assert(tokenizer_reset(tokenizer, NULL, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_END_OF_BUFFER); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_END_OF_BUFFER);
assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\n\n", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK); assert(text_decoder_decode_string(decoder, "\n\n", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1);
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE);
assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\r\r", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK); assert(text_decoder_decode_string(decoder, "\r\r", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1);
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE);
assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\r\n\r\n", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK); assert(text_decoder_decode_string(decoder, "\r\n\r\n", -1, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1);
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE);
assert(tokenizer_get_token_text(tokenizer) != NULL); assert(tokenizer_get_token_text(tokenizer) != NULL);
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\x0C\x0C\xC2\x85\xC2\x85\xE2\x80\xA8\xE2\x80\xA8", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK); assert(text_decoder_decode_string(decoder, "\x0C\x0C\xC2\x85\xC2\x85\xE2\x80\xA8\xE2\x80\xA8", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1);
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


// U+000C : FORM FEED (FF) -- Used as a page (not paragraph) break. // U+000C : FORM FEED (FF) -- Used as a page (not paragraph) break.
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_NEWLINE);
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\xE2\x80\xA9\xE2\x80\xA9", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK); assert(text_decoder_decode_string(decoder, "\xE2\x80\xA9\xE2\x80\xA9", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1);
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


// General Category: Zp -- PARAGRAPH SEPARATOR // General Category: Zp -- PARAGRAPH SEPARATOR
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_PARAGRAPH); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_PARAGRAPH);
espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); espeak_ng_TEXT_DECODER *decoder = create_text_decoder();


assert(text_decoder_decode_string(decoder, "\t\t\n\x0B\x0B\n \xE3\x80\x80 \n\xC2\xA0\xC2\xA0", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK); assert(text_decoder_decode_string(decoder, "\t\t\n\x0B\x0B\n \xE3\x80\x80 \n\xC2\xA0\xC2\xA0", -1, ESPEAKNG_ENCODING_UTF_8) == ENS_OK);
assert(tokenizer_reset(tokenizer, decoder) == 1);
assert(tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT) == 1);


// General Category: Cc, Property: White_Space // General Category: Cc, Property: White_Space
assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_WHITESPACE); assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_WHITESPACE);

Loading…
Cancel
Save