/*
* Copyright (C) 2005 to 2013 by Jonathan Duddington
* email: jonsd@users.sourceforge.net
* Copyright (C) 2013-2016 Reece H. Dunn
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see: .
*/
#include "config.h"
#include "errno.h"
#include "stdio.h"
#include "ctype.h"
#include "string.h"
#include "stdlib.h"
#include "wchar.h"
#include "locale.h"
#include
#include
#include "speech.h"
#include
#ifdef PLATFORM_WINDOWS
#include
#include
#include
#include
#else /* PLATFORM_POSIX */
#include
#endif
#include "espeak_ng.h"
#include "speak_lib.h"
#include "phoneme.h"
#include "synthesize.h"
#include "voice.h"
#include "translate.h"
#include "fifo.h"
#include "event.h"
#include "wave.h"
#ifndef S_ISDIR
#define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
#endif
unsigned char *outbuf = NULL;
espeak_EVENT *event_list = NULL;
int event_list_ix = 0;
int n_event_list;
long count_samples;
void *my_audio = NULL;
static const char *option_device = NULL;
static unsigned int my_unique_identifier = 0;
static void *my_user_data = NULL;
static espeak_ng_OUTPUT_MODE my_mode = ENOUTPUT_MODE_SYNCHRONOUS;
static int out_samplerate = 0;
static int voice_samplerate = 22050;
static espeak_ERROR err = EE_OK;
t_espeak_callback *synth_callback = NULL;
int (*uri_callback)(int, const char *, const char *) = NULL;
int (*phoneme_callback)(const char *) = NULL;
char path_home[N_PATH_HOME]; // this is the espeak-data directory
extern int saved_parameters[N_SPEECH_PARAM]; // Parameters saved on synthesis start
void WVoiceChanged(voice_t *wvoice)
{
// Voice change in wavegen
voice_samplerate = wvoice->samplerate;
}
#ifdef USE_ASYNC
static int dispatch_audio(short *outbuf, int length, espeak_EVENT *event)
{
int a_wave_can_be_played = fifo_is_command_enabled();
switch (my_mode)
{
case ENOUTPUT_MODE_SPEAK_AUDIO:
{
int event_type = 0;
if (event)
event_type = event->type;
if (event_type == espeakEVENT_SAMPLERATE) {
voice_samplerate = event->id.number;
if (out_samplerate != voice_samplerate) {
if (out_samplerate != 0) {
// sound was previously open with a different sample rate
wave_close(my_audio);
sleep(1);
}
out_samplerate = voice_samplerate;
my_audio = wave_open(voice_samplerate, option_device);
if (!my_audio) {
err = EE_INTERNAL_ERROR;
return -1;
}
wave_set_callback_is_output_enabled(fifo_is_command_enabled);
event_init();
}
}
if (outbuf && length && a_wave_can_be_played) {
wave_write(my_audio, (char *)outbuf, 2*length);
}
while (a_wave_can_be_played) {
// TBD: some event are filtered here but some insight might be given
// TBD: in synthesise.cpp for avoiding to create WORDs with size=0.
// TBD: For example sentence "or ALT)." returns three words
// "or", "ALT" and "".
// TBD: the last one has its size=0.
if (event && (event->type == espeakEVENT_WORD) && (event->length == 0))
break;
espeak_ERROR a_error = event_declare(event);
if (a_error != EE_BUFFER_FULL)
break;
usleep(10000);
a_wave_can_be_played = fifo_is_command_enabled();
}
}
break;
case 0:
if (synth_callback)
synth_callback(outbuf, length, event);
break;
}
return a_wave_can_be_played == 0; // 1 = stop synthesis, -1 = error
}
static int create_events(short *outbuf, int length, espeak_EVENT *event_list, uint32_t the_write_pos)
{
int finished;
int i = 0;
// The audio data are written to the output device.
// The list of events in event_list (index: event_list_ix) is read:
// Each event is declared to the "event" object which stores them internally.
// The event object is responsible of calling the external callback
// as soon as the relevant audio sample is played.
do { // for each event
espeak_EVENT *event;
if (event_list_ix == 0)
event = NULL;
else {
event = event_list + i;
event->sample += the_write_pos;
}
finished = dispatch_audio((short *)outbuf, length, event);
length = 0; // the wave data are played once.
i++;
} while ((i < event_list_ix) && !finished);
return finished;
}
int sync_espeak_terminated_msg(uint32_t unique_identifier, void *user_data)
{
int finished = 0;
memset(event_list, 0, 2*sizeof(espeak_EVENT));
event_list[0].type = espeakEVENT_MSG_TERMINATED;
event_list[0].unique_identifier = unique_identifier;
event_list[0].user_data = user_data;
event_list[1].type = espeakEVENT_LIST_TERMINATED;
event_list[1].unique_identifier = unique_identifier;
event_list[1].user_data = user_data;
if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) {
while (1) {
espeak_ERROR a_error = event_declare(event_list);
if (a_error != EE_BUFFER_FULL)
break;
usleep(10000);
}
} else {
if (synth_callback)
finished = synth_callback(NULL, 0, event_list);
}
return finished;
}
#endif
#pragma GCC visibility push(default)
ESPEAK_NG_API espeak_ng_STATUS espeak_ng_InitializeOutput(espeak_ng_OUTPUT_MODE output_mode, int buffer_length, const char *device)
{
option_device = device;
my_mode = output_mode;
my_audio = NULL;
option_waveout = 1; // inhibit portaudio callback from wavegen.cpp
out_samplerate = 0;
if (output_mode == (ENOUTPUT_MODE_SYNCHRONOUS | ENOUTPUT_MODE_SPEAK_AUDIO)) {
option_waveout = 0;
WavegenInitSound();
}
// buflength is in mS, allocate 2 bytes per sample
if ((buffer_length == 0) || (output_mode & ENOUTPUT_MODE_SPEAK_AUDIO))
buffer_length = 200;
outbuf_size = (buffer_length * samplerate)/500;
outbuf = (unsigned char *)realloc(outbuf, outbuf_size);
if ((out_start = outbuf) == NULL)
return ENOMEM;
// allocate space for event list. Allow 200 events per second.
// Add a constant to allow for very small buf_length
n_event_list = (buffer_length*200)/1000 + 20;
if ((event_list = (espeak_EVENT *)realloc(event_list, sizeof(espeak_EVENT) * n_event_list)) == NULL)
return ENOMEM;
return ENS_OK;
}
int GetFileLength(const char *filename)
{
struct stat statbuf;
if (stat(filename, &statbuf) != 0)
return 0;
if (S_ISDIR(statbuf.st_mode))
return -2; // a directory
return statbuf.st_size;
}
#pragma GCC visibility pop
char *Alloc(int size)
{
char *p;
if ((p = (char *)malloc(size)) == NULL)
fprintf(stderr, "Can't allocate memory\n"); // I was told that size+1 fixes a crash on 64-bit systems
return p;
}
void Free(void *ptr)
{
if (ptr != NULL)
free(ptr);
}
#pragma GCC visibility push(default)
ESPEAK_NG_API void espeak_ng_InitializePath(const char *path)
{
if (path != NULL) {
sprintf(path_home, "%s/espeak-data", path);
return;
}
#ifdef PLATFORM_WINDOWS
HKEY RegKey;
unsigned long size;
unsigned long var_type;
char *env;
unsigned char buf[sizeof(path_home)-13];
if ((env = getenv("ESPEAK_DATA_PATH")) != NULL) {
sprintf(path_home, "%s/espeak-data", env);
if (GetFileLength(path_home) == -2)
return; // an espeak-data directory exists
}
buf[0] = 0;
RegOpenKeyExA(HKEY_LOCAL_MACHINE, "Software\\Microsoft\\Speech\\Voices\\Tokens\\eSpeak", 0, KEY_READ, &RegKey);
size = sizeof(buf);
var_type = REG_SZ;
RegQueryValueExA(RegKey, "path", 0, &var_type, buf, &size);
sprintf(path_home, "%s\\espeak-data", buf);
#elif defined(PLATFORM_DOS)
strcpy(path_home, PATH_ESPEAK_DATA);
#else
char *env;
// check for environment variable
if ((env = getenv("ESPEAK_DATA_PATH")) != NULL) {
snprintf(path_home, sizeof(path_home), "%s/espeak-data", env);
if (GetFileLength(path_home) == -2)
return; // an espeak-data directory exists
}
snprintf(path_home, sizeof(path_home), "%s/espeak-data", getenv("HOME"));
if (access(path_home, R_OK) != 0)
strcpy(path_home, PATH_ESPEAK_DATA);
#endif
}
ESPEAK_NG_API espeak_ng_STATUS espeak_ng_Initialize(void)
{
int param;
int srate = 22050; // default sample rate 22050 Hz
// It seems that the wctype functions don't work until the locale has been set
// to something other than the default "C". Then, not only Latin1 but also the
// other characters give the correct results with iswalpha() etc.
if (setlocale(LC_CTYPE, "C.UTF-8") == NULL) {
if (setlocale(LC_CTYPE, "UTF-8") == NULL) {
if (setlocale(LC_CTYPE, "en_US.UTF-8") == NULL)
setlocale(LC_CTYPE, "");
}
}
espeak_ng_STATUS result = LoadPhData(&srate);
if (result != ENS_OK)
return result;
WavegenInit(srate, 0);
LoadConfig();
memset(¤t_voice_selected, 0, sizeof(current_voice_selected));
SetVoiceStack(NULL, "");
SynthesizeInit();
InitNamedata();
VoiceReset(0);
for (param = 0; param < N_SPEECH_PARAM; param++)
param_stack[0].parameter[param] = param_defaults[param];
SetParameter(espeakRATE, 175, 0);
SetParameter(espeakVOLUME, 100, 0);
SetParameter(espeakCAPITALS, option_capitals, 0);
SetParameter(espeakPUNCTUATION, option_punctuation, 0);
SetParameter(espeakWORDGAP, 0, 0);
#ifdef USE_ASYNC
fifo_init();
#endif
option_phonemes = 0;
option_phoneme_events = 0;
return ENS_OK;
}
ESPEAK_NG_API int espeak_ng_GetSampleRate(void)
{
return samplerate;
}
#pragma GCC visibility pop
static espeak_ERROR Synthesize(unsigned int unique_identifier, const void *text, int flags)
{
// Fill the buffer with output sound
int length;
int finished = 0;
int count_buffers = 0;
#ifdef USE_ASYNC
uint32_t a_write_pos = 0;
#endif
if ((outbuf == NULL) || (event_list == NULL))
return EE_INTERNAL_ERROR; // espeak_Initialize() has not been called
option_multibyte = flags & 7;
option_ssml = flags & espeakSSML;
option_phoneme_input = flags & espeakPHONEMES;
option_endpause = flags & espeakENDPAUSE;
count_samples = 0;
#ifdef USE_ASYNC
if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO)
a_write_pos = wave_get_write_position(my_audio);
#endif
if (translator == NULL)
espeak_SetVoiceByName("default");
SpeakNextClause(NULL, text, 0);
if (my_mode == (ENOUTPUT_MODE_SYNCHRONOUS | ENOUTPUT_MODE_SPEAK_AUDIO)) {
for (;;) {
#ifdef PLATFORM_WINDOWS
Sleep(300); // 0.3s
#else
#ifdef USE_NANOSLEEP
struct timespec period;
struct timespec remaining;
period.tv_sec = 0;
period.tv_nsec = 300000000; // 0.3 sec
nanosleep(&period, &remaining);
#else
sleep(1);
#endif
#endif
if (SynthOnTimer() != 0)
break;
}
return EE_OK;
}
for (;;) {
out_ptr = outbuf;
out_end = &outbuf[outbuf_size];
event_list_ix = 0;
WavegenFill();
length = (out_ptr - outbuf)/2;
count_samples += length;
event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list
event_list[event_list_ix].unique_identifier = unique_identifier;
event_list[event_list_ix].user_data = my_user_data;
count_buffers++;
if (my_mode == ENOUTPUT_MODE_SPEAK_AUDIO) {
#ifdef USE_ASYNC
finished = create_events((short *)outbuf, length, event_list, a_write_pos);
if (finished < 0)
return EE_INTERNAL_ERROR;
length = 0; // the wave data are played once.
#endif
} else
finished = synth_callback((short *)outbuf, length, event_list);
if (finished) {
SpeakNextClause(NULL, 0, 2); // stop
break;
}
if (Generate(phoneme_list, &n_phoneme_list, 1) == 0) {
if (WcmdqUsed() == 0) {
// don't process the next clause until the previous clause has finished generating speech.
// This ensures that