Browse Source

tokenizer.c: Create a codepoint_type helper function to classify codepoints for the tokenizer.

master
Reece H. Dunn 8 years ago
parent
commit
539edac795
1 changed files with 22 additions and 4 deletions
  1. 22
    4
      src/libespeak-ng/tokenizer.c

+ 22
- 4
src/libespeak-ng/tokenizer.c View File

return CLAUSE_NONE; return CLAUSE_NONE;
} }


typedef enum {
ESPEAKNG_CTYPE_OTHER,
ESPEAKNG_CTYPE_CARRIAGE_RETURN,
ESPEAKNG_CTYPE_NEWLINE,
ESPEAKNG_CTYPE_END_OF_STRING,
} espeakng_CTYPE;

static espeakng_CTYPE codepoint_type(uint32_t c)
{
switch (c)
{
case '\r': return ESPEAKNG_CTYPE_CARRIAGE_RETURN;
case '\n': return ESPEAKNG_CTYPE_NEWLINE;
case '\0': return ESPEAKNG_CTYPE_END_OF_STRING;
}
return ESPEAKNG_CTYPE_OTHER;
}

struct espeak_ng_TOKENIZER_ struct espeak_ng_TOKENIZER_
{ {
espeak_ng_TEXT_DECODER *decoder; espeak_ng_TEXT_DECODER *decoder;
uint32_t c; uint32_t c;
char *current = tokenizer->token; char *current = tokenizer->token;


switch (c = text_decoder_getc(tokenizer->decoder))
switch (codepoint_type(c = text_decoder_getc(tokenizer->decoder)))
{ {
case '\r':
case ESPEAKNG_CTYPE_CARRIAGE_RETURN: // '\r'
if (text_decoder_peekc(tokenizer->decoder) == '\n') { if (text_decoder_peekc(tokenizer->decoder) == '\n') {
current += utf8_out(c, current); current += utf8_out(c, current);
c = text_decoder_getc(tokenizer->decoder); c = text_decoder_getc(tokenizer->decoder);
} }
// fallthrough // fallthrough
case '\n':
case ESPEAKNG_CTYPE_NEWLINE: // '\n'
current += utf8_out(c, current); current += utf8_out(c, current);
*current = '\0'; *current = '\0';
return ESPEAKNG_TOKEN_NEWLINE; return ESPEAKNG_TOKEN_NEWLINE;
case '\0':
case ESPEAKNG_CTYPE_END_OF_STRING: // '\0'
tokenizer->read = tokenizer_state_end_of_buffer; tokenizer->read = tokenizer_state_end_of_buffer;
return tokenizer_state_end_of_buffer(tokenizer); return tokenizer_state_end_of_buffer(tokenizer);
default: default:

Loading…
Cancel
Save