| @@ -19,10 +19,12 @@ | |||
| #include "config.h" | |||
| #include <assert.h> | |||
| #include <errno.h> | |||
| #include <stdint.h> | |||
| #include <stdlib.h> | |||
| #include <string.h> | |||
| #include <stdio.h> | |||
| #include <sys/stat.h> | |||
| #include <espeak-ng/espeak_ng.h> | |||
| @@ -33,6 +35,21 @@ | |||
| #include "synthesize.h" | |||
| #include "translate.h" | |||
| // TODO: Find a better place for this than speech.c, so it can be implemented | |||
| // in one place without having to include all of speech.c. | |||
| int GetFileLength(const char *filename) | |||
| { | |||
| struct stat statbuf; | |||
| if (stat(filename, &statbuf) != 0) | |||
| return -errno; | |||
| if (S_ISDIR(statbuf.st_mode)) | |||
| return -EISDIR; | |||
| return statbuf.st_size; | |||
| } | |||
| void | |||
| test_latin_common() | |||
| { | |||
| @@ -385,8 +402,8 @@ test_whitespace_tokens() | |||
| destroy_tokenizer(tokenizer); | |||
| } | |||
| int | |||
| main(int argc, char **argv) | |||
| void | |||
| run_tests() | |||
| { | |||
| test_latin_common(); | |||
| test_greek(); | |||
| @@ -410,6 +427,99 @@ main(int argc, char **argv) | |||
| test_whitespace_tokens(); | |||
| printf("done\n"); | |||
| } | |||
| void | |||
| escape_newline(const char *s) | |||
| { | |||
| for ( ; *s; ++s) switch (*s) | |||
| { | |||
| case '\r': printf("\\r"); break; | |||
| case '\n': printf("\\n"); break; | |||
| default: putc(*s, stdout); break; | |||
| } | |||
| } | |||
| void | |||
| print_tokens(espeak_ng_TEXT_DECODER *decoder) | |||
| { | |||
| espeak_ng_TOKENIZER *tokenizer = create_tokenizer(); | |||
| if (!tokenizer_reset(tokenizer, decoder, ESPEAKNG_TOKENIZER_OPTION_TEXT)) { | |||
| destroy_tokenizer(tokenizer); | |||
| return; | |||
| } | |||
| while (1) switch (tokenizer_read_next_token(tokenizer)) | |||
| { | |||
| case ESPEAKNG_TOKEN_END_OF_BUFFER: | |||
| destroy_tokenizer(tokenizer); | |||
| return; | |||
| case ESPEAKNG_TOKEN_UNKNOWN: | |||
| printf("unknown : %s\n", tokenizer_get_token_text(tokenizer)); | |||
| break; | |||
| case ESPEAKNG_TOKEN_NEWLINE: | |||
| printf("newline : "); | |||
| escape_newline(tokenizer_get_token_text(tokenizer)); | |||
| putc('\n', stdout); | |||
| break; | |||
| case ESPEAKNG_TOKEN_PARAGRAPH: | |||
| printf("paragraph : %s\n", tokenizer_get_token_text(tokenizer)); | |||
| break; | |||
| case ESPEAKNG_TOKEN_WHITESPACE: | |||
| printf("whitespace : %s\n", tokenizer_get_token_text(tokenizer)); | |||
| break; | |||
| } | |||
| } | |||
| void | |||
| print_tokens_from_file(const char *filename, const char *encoding_name) | |||
| { | |||
| espeak_ng_ENCODING encoding = espeak_ng_EncodingFromName(encoding_name); | |||
| if (encoding == ESPEAKNG_ENCODING_UNKNOWN) { | |||
| printf("Unknown encoding \"%s\".\n", encoding_name); | |||
| return; | |||
| } | |||
| int length = GetFileLength(filename); | |||
| FILE *f = (length > 0) ? fopen(filename, "rb") : NULL; | |||
| if (!f) { | |||
| printf("Cannot open file: %s\n", filename); | |||
| return; | |||
| } | |||
| char *buffer = malloc(length); | |||
| if (!buffer) { | |||
| fclose(f); | |||
| printf("Out of memory!\n"); | |||
| return; | |||
| } | |||
| fread(buffer, 1, length, f); | |||
| fclose(f); | |||
| espeak_ng_TEXT_DECODER *decoder = create_text_decoder(); | |||
| if (text_decoder_decode_string(decoder, buffer, length, encoding) == ENS_OK) | |||
| print_tokens(decoder); | |||
| destroy_text_decoder(decoder); | |||
| } | |||
| void | |||
| usage(const char *program) | |||
| { | |||
| printf("%s -- Run the tokenizer tests.\n", program); | |||
| printf("%s ENCODING FILENAME -- Print the tokens for FILENAME.\n", program); | |||
| } | |||
| int | |||
| main(int argc, char **argv) | |||
| { | |||
| switch (argc) | |||
| { | |||
| case 1: run_tests(); break; | |||
| case 3: print_tokens_from_file(argv[2], argv[1]); break; | |||
| default: usage(argv[0]); return EXIT_FAILURE; | |||
| } | |||
| return EXIT_SUCCESS; | |||
| } | |||