| @@ -28,6 +28,7 @@ | |||
| #include <ucd/ucd.h> | |||
| #include "encoding.h" | |||
| #include "tokenizer.h" | |||
| #include "speech.h" | |||
| #include "phoneme.h" | |||
| #include "synthesize.h" | |||
| @@ -81,3 +82,46 @@ int clause_type_from_codepoint(uint32_t c) | |||
| return CLAUSE_NONE; | |||
| } | |||
| struct espeak_ng_TOKENIZER_ | |||
| { | |||
| espeak_ng_TEXT_DECODER *decoder; | |||
| }; | |||
| espeak_ng_TOKENIZER * | |||
| create_tokenizer(void) | |||
| { | |||
| espeak_ng_TOKENIZER *tokenizer = malloc(sizeof(espeak_ng_TOKENIZER)); | |||
| if (!tokenizer) return NULL; | |||
| tokenizer->decoder = NULL; | |||
| return tokenizer; | |||
| } | |||
| void | |||
| destroy_tokenizer(espeak_ng_TOKENIZER *tokenizer) | |||
| { | |||
| if (tokenizer) free(tokenizer); | |||
| } | |||
| int | |||
| tokenizer_reset(espeak_ng_TOKENIZER *tokenizer, | |||
| espeak_ng_TEXT_DECODER *decoder) | |||
| { | |||
| if (!tokenizer || !decoder) return 0; | |||
| tokenizer->decoder = decoder; | |||
| return 1; | |||
| } | |||
| espeak_ng_TOKEN_TYPE | |||
| tokenizer_read_next_token(espeak_ng_TOKENIZER *tokenizer) | |||
| { | |||
| return ESPEAKNG_TOKEN_END_OF_BUFFER; | |||
| } | |||
| const char * | |||
| tokenizer_get_token_text(espeak_ng_TOKENIZER *tokenizer) | |||
| { | |||
| return ""; | |||
| } | |||
| @@ -0,0 +1,53 @@ | |||
| /* | |||
| * Copyright (C) 2017 Reece H. Dunn | |||
| * | |||
| * This program is free software; you can redistribute it and/or modify | |||
| * it under the terms of the GNU General Public License as published by | |||
| * the Free Software Foundation; either version 3 of the License, or | |||
| * (at your option) any later version. | |||
| * | |||
| * This program is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
| * GNU General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU General Public License | |||
| * along with this program; if not, see: <http://www.gnu.org/licenses/>. | |||
| */ | |||
| #ifndef ESPEAK_NG_TOKENIZER_H | |||
| #define ESPEAK_NG_TOKENIZER_H | |||
| #ifdef __cplusplus | |||
| extern "C" | |||
| { | |||
| #endif | |||
| typedef struct espeak_ng_TOKENIZER_ espeak_ng_TOKENIZER; | |||
| espeak_ng_TOKENIZER * | |||
| create_tokenizer(void); | |||
| void | |||
| destroy_tokenizer(espeak_ng_TOKENIZER *tokenizer); | |||
| int | |||
| tokenizer_reset(espeak_ng_TOKENIZER *tokenizer, | |||
| espeak_ng_TEXT_DECODER *decoder); | |||
| typedef enum | |||
| { | |||
| ESPEAKNG_TOKEN_END_OF_BUFFER, | |||
| ESPEAKNG_TOKEN_UNKNOWN, | |||
| } espeak_ng_TOKEN_TYPE; | |||
| espeak_ng_TOKEN_TYPE | |||
| tokenizer_read_next_token(espeak_ng_TOKENIZER *tokenizer); | |||
| const char * | |||
| tokenizer_get_token_text(espeak_ng_TOKENIZER *tokenizer); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -26,6 +26,7 @@ | |||
| #include <espeak-ng/espeak_ng.h> | |||
| #include "encoding.h" | |||
| #include "tokenizer.h" | |||
| #include "speech.h" | |||
| #include "phoneme.h" | |||
| #include "synthesize.h" | |||
| @@ -153,6 +154,30 @@ test_fullwidth() | |||
| assert(clause_type_from_codepoint(0xFF1F) == (CLAUSE_QUESTION | CLAUSE_OPTIONAL_SPACE_AFTER)); | |||
| } | |||
| void | |||
| test_unbound_tokenizer() | |||
| { | |||
| printf("testing unbound tokenizer\n"); | |||
| espeak_ng_TOKENIZER *tokenizer = create_tokenizer(); | |||
| assert(tokenizer != NULL); | |||
| assert(tokenizer_get_token_text(tokenizer) != NULL); | |||
| assert(*tokenizer_get_token_text(tokenizer) == '\0'); | |||
| assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_END_OF_BUFFER); | |||
| assert(tokenizer_get_token_text(tokenizer) != NULL); | |||
| assert(*tokenizer_get_token_text(tokenizer) == '\0'); | |||
| assert(tokenizer_reset(tokenizer, NULL) == 0); | |||
| assert(tokenizer_read_next_token(tokenizer) == ESPEAKNG_TOKEN_END_OF_BUFFER); | |||
| assert(tokenizer_get_token_text(tokenizer) != NULL); | |||
| assert(*tokenizer_get_token_text(tokenizer) == '\0'); | |||
| destroy_tokenizer(tokenizer); | |||
| } | |||
| int | |||
| main(int argc, char **argv) | |||
| { | |||
| @@ -168,6 +193,8 @@ main(int argc, char **argv) | |||
| test_ideographic(); | |||
| test_fullwidth(); | |||
| test_unbound_tokenizer(); | |||
| printf("done\n"); | |||
| return EXIT_SUCCESS; | |||