/* * Copyright (C) 2005 to 2013 by Jonathan Duddington * email: jonsd@users.sourceforge.net * Copyright (C) 2013-2016 Reece H. Dunn * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see: . */ #include "config.h" #include "errno.h" #include "stdio.h" #include "ctype.h" #include "string.h" #include "stdlib.h" #include "wchar.h" #include "locale.h" #include #include #include "speech.h" #include #ifdef PLATFORM_WINDOWS #include #include #include #include #else /* PLATFORM_POSIX */ #include #endif #include "espeak_ng.h" #include "speak_lib.h" #include "phoneme.h" #include "synthesize.h" #include "voice.h" #include "translate.h" #include "fifo.h" #include "event.h" #include "wave.h" #ifndef S_ISDIR #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) #endif unsigned char *outbuf = NULL; espeak_EVENT *event_list = NULL; int event_list_ix = 0; int n_event_list; long count_samples; void *my_audio = NULL; static const char *option_device = NULL; static unsigned int my_unique_identifier = 0; static void *my_user_data = NULL; static espeak_ng_OUTPUT_MODE my_mode = ENOUTPUT_MODE_SYNCHRONOUS; static int out_samplerate = 0; static int voice_samplerate = 22050; static espeak_ERROR err = EE_OK; t_espeak_callback *synth_callback = NULL; int (*uri_callback)(int, const char *, const char *) = NULL; int (*phoneme_callback)(const char *) = NULL; char path_home[N_PATH_HOME]; // this is the espeak-data directory extern int saved_parameters[N_SPEECH_PARAM]; // Parameters saved on synthesis start void WVoiceChanged(voice_t *wvoice) { // Voice change in wavegen voice_samplerate = wvoice->samplerate; } #ifdef USE_ASYNC static int dispatch_audio(short *outbuf, int length, espeak_EVENT *event) { int a_wave_can_be_played = fifo_is_command_enabled(); switch (my_mode) { case ENOUTPUT_MODE_SPEAK_AUDIO: { int event_type = 0; if (event) event_type = event->type; if (event_type == espeakEVENT_SAMPLERATE) { voice_samplerate = event->id.number; if (out_samplerate != voice_samplerate) { if (out_samplerate != 0) { // sound was previously open with a different sample rate wave_close(my_audio); sleep(1); } out_samplerate = voice_samplerate; my_audio = wave_open(voice_samplerate, option_device); if (!my_audio) { err = EE_INTERNAL_ERROR; return -1; } wave_set_callback_is_output_enabled(fifo_is_command_enabled); event_init(); } } if (outbuf && length && a_wave_can_be_played) { wave_write(my_audio, (char *)outbuf, 2*length); } while (a_wave_can_be_played) { // TBD: some event are filtered here but some insight might be given // TBD: in synthesise.cpp for avoiding to create WORDs with size=0. // TBD: For example sentence "or ALT)." returns three words // "or", "ALT" and "". // TBD: the last one has its size=0. if (event && (event->type == espeakEVENT_WORD) && (event->length == 0)) break; espeak_ERROR a_error = event_declare(event); if (a_error != EE_BUFFER_FULL) break; usleep(10000); a_wave_can_be_played = fifo_is_command_enabled(); } } break; case 0: if (synth_callback) synth_callback(outbuf, length, event); break; } return a_wave_can_be_played == 0; // 1 = stop synthesis, -1 = error } static int create_events(short *outbuf, int length, espeak_EVENT *event_list, uint32_t the_write_pos) { int finished; int i = 0; // The audio data are written to the output device. // The list of events in event_list (index: event_list_ix) is read: // Each event is declared to the "event" object which stores them internally. // The event object is responsible of calling the external callback // as soon as the relevant audio sample is played. do { // for each event espeak_EVENT *event; if (event_list_ix == 0) event = NULL; else { event = event_list + i; event->sample += the_write_pos; } finished = dispatch_audio((short *)outbuf, length, event); length = 0; // the wave data are played once. i++; } while ((i < event_list_ix) && !finished); return finished; } int sync_espeak_terminated_msg(uint32_t unique_identifier, void *user_data) { int finished = 0; memset(event_list, 0, 2*sizeof(espeak_EVENT)); event_list[0].type = espeakEVENT_MSG_TERMINATED; event_list[0].unique_identifier = unique_identifier; event_list[0].user_data = user_data; event_list[1].type = espeakEVENT_LIST_TERMINATED; event_list[1].unique_identifier = unique_identifier; event_list[1].user_data = user_data; if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) { while (1) { espeak_ERROR a_error = event_declare(event_list); if (a_error != EE_BUFFER_FULL) break; usleep(10000); } } else { if (synth_callback) finished = synth_callback(NULL, 0, event_list); } return finished; } #endif #pragma GCC visibility push(default) ESPEAK_NG_API espeak_ng_STATUS espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode, int buffer_length, const char *device) { option_device = device; my_mode = output_mode; my_audio = NULL; option_waveout = 1; // inhibit portaudio callback from wavegen.cpp out_samplerate = 0; if (output_mode == (ENOUTPUT_MODE_SYNCHRONOUS | ENOUTPUT_MODE_SPEAK_AUDIO)) { option_waveout = 0; WavegenInitSound(); } // buflength is in mS, allocate 2 bytes per sample if ((buffer_length == 0) || (output_mode & ENOUTPUT_MODE_SPEAK_AUDIO)) buffer_length = 200; outbuf_size = (buffer_length * samplerate)/500; outbuf = (unsigned char *)realloc(outbuf, outbuf_size); if ((out_start = outbuf) == NULL) return ENOMEM; // allocate space for event list. Allow 200 events per second. // Add a constant to allow for very small buf_length n_event_list = (buffer_length*200)/1000 + 20; if ((event_list = (espeak_EVENT *)realloc(event_list, sizeof(espeak_EVENT) * n_event_list)) == NULL) return ENOMEM; return ENS_OK; } int GetFileLength(const char *filename) { struct stat statbuf; if (stat(filename, &statbuf) != 0) return 0; if (S_ISDIR(statbuf.st_mode)) return -2; // a directory return statbuf.st_size; } #pragma GCC visibility pop char *Alloc(int size) { char *p; if ((p = (char *)malloc(size)) == NULL) fprintf(stderr, "Can't allocate memory\n"); // I was told that size+1 fixes a crash on 64-bit systems return p; } void Free(void *ptr) { if (ptr != NULL) free(ptr); } #pragma GCC visibility push(default) ESPEAK_NG_API void espeak_ng_InitializePath(const char *path) { if (path != NULL) { sprintf(path_home, "%s/espeak-data", path); return; } #ifdef PLATFORM_WINDOWS HKEY RegKey; unsigned long size; unsigned long var_type; char *env; unsigned char buf[sizeof(path_home)-13]; if ((env = getenv("ESPEAK_DATA_PATH")) != NULL) { sprintf(path_home, "%s/espeak-data", env); if (GetFileLength(path_home) == -2) return; // an espeak-data directory exists } buf[0] = 0; RegOpenKeyExA(HKEY_LOCAL_MACHINE, "Software\\Microsoft\\Speech\\Voices\\Tokens\\eSpeak", 0, KEY_READ, &RegKey); size = sizeof(buf); var_type = REG_SZ; RegQueryValueExA(RegKey, "path", 0, &var_type, buf, &size); sprintf(path_home, "%s\\espeak-data", buf); #elif defined(PLATFORM_DOS) strcpy(path_home, PATH_ESPEAK_DATA); #else char *env; // check for environment variable if ((env = getenv("ESPEAK_DATA_PATH")) != NULL) { snprintf(path_home, sizeof(path_home), "%s/espeak-data", env); if (GetFileLength(path_home) == -2) return; // an espeak-data directory exists } snprintf(path_home, sizeof(path_home), "%s/espeak-data", getenv("HOME")); if (access(path_home, R_OK) != 0) strcpy(path_home, PATH_ESPEAK_DATA); #endif } ESPEAK_NG_API espeak_ng_STATUS espeak_ng_Initialize(void) { int param; int srate = 22050; // default sample rate 22050 Hz // It seems that the wctype functions don't work until the locale has been set // to something other than the default "C". Then, not only Latin1 but also the // other characters give the correct results with iswalpha() etc. if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) { if (setlocale(LC_CTYPE, "UTF-8") == NULL) { if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL) setlocale(LC_CTYPE, ""); } } espeak_ng_STATUS result = LoadPhData(&srate); if (result != ENS_OK) return result; WavegenInit(srate, 0); LoadConfig(); memset(¤t_voice_selected, 0, sizeof(current_voice_selected)); SetVoiceStack(NULL, ""); SynthesizeInit(); InitNamedata(); VoiceReset(0); for (param = 0; param < N_SPEECH_PARAM; param++) param_stack[0].parameter[param] = param_defaults[param]; SetParameter(espeakRATE, 175, 0); SetParameter(espeakVOLUME, 100, 0); SetParameter(espeakCAPITALS, option_capitals, 0); SetParameter(espeakPUNCTUATION, option_punctuation, 0); SetParameter(espeakWORDGAP, 0, 0); #ifdef USE_ASYNC fifo_init(); #endif option_phonemes = 0; option_phoneme_events = 0; return ENS_OK; } ESPEAK_NG_API int espeak_ng_GetSampleRate(void) { return samplerate; } #pragma GCC visibility pop static espeak_ERROR Synthesize(unsigned int unique_identifier, const void *text, int flags) { // Fill the buffer with output sound int length; int finished = 0; int count_buffers = 0; #ifdef USE_ASYNC uint32_t a_write_pos = 0; #endif if ((outbuf == NULL) || (event_list == NULL)) return EE_INTERNAL_ERROR; // espeak_Initialize() has not been called option_multibyte = flags & 7; option_ssml = flags & espeakSSML; option_phoneme_input = flags & espeakPHONEMES; option_endpause = flags & espeakENDPAUSE; count_samples = 0; #ifdef USE_ASYNC if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) a_write_pos = wave_get_write_position(my_audio); #endif if (translator == NULL) espeak_SetVoiceByName("default"); SpeakNextClause(NULL, text, 0); if (my_mode == (ENOUTPUT_MODE_SYNCHRONOUS | ENOUTPUT_MODE_SPEAK_AUDIO)) { for (;;) { #ifdef PLATFORM_WINDOWS Sleep(300); // 0.3s #else #ifdef USE_NANOSLEEP struct timespec period; struct timespec remaining; period.tv_sec = 0; period.tv_nsec = 300000000; // 0.3 sec nanosleep(&period, &remaining); #else sleep(1); #endif #endif if (SynthOnTimer() != 0) break; } return EE_OK; } for (;;) { out_ptr = outbuf; out_end = &outbuf[outbuf_size]; event_list_ix = 0; WavegenFill(); length = (out_ptr - outbuf)/2; count_samples += length; event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list event_list[event_list_ix].unique_identifier = unique_identifier; event_list[event_list_ix].user_data = my_user_data; count_buffers++; if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) { #ifdef USE_ASYNC finished = create_events((short *)outbuf, length, event_list, a_write_pos); if (finished < 0) return EE_INTERNAL_ERROR; length = 0; // the wave data are played once. #endif } else finished = synth_callback((short *)outbuf, length, event_list); if (finished) { SpeakNextClause(NULL, 0, 2); // stop break; } if (Generate(phoneme_list, &n_phoneme_list, 1) == 0) { if (WcmdqUsed() == 0) { // don't process the next clause until the previous clause has finished generating speech. // This ensures that