Soundicons are used for external audio with SSML <audio> tag and for replacing punctuation names with sound files in LoadConfig(). Currently there's a bug wih soundicon slots: if both LoadConfig and <audio> are used, the punctuation reserves all slots and no sound from <audio> is played.

4 years ago · 6e197d967e
--- a/Makefile.am
+++ b/Makefile.am
@@ -159,6 +159,7 @@ src_libespeak_ng_la_SOURCES = \
 	src/libespeak-ng/phoneme.c \
 	src/libespeak-ng/phonemelist.c \
 	src/libespeak-ng/setlengths.c \
 	src/libespeak-ng/soundicon.c \
 	src/libespeak-ng/spect.c \
 	src/libespeak-ng/speech.c \
 	src/libespeak-ng/ssml.c \
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -28,6 +28,7 @@ ESPEAK_SOURCES := \
  src/libespeak-ng/phonemelist.c \
  src/libespeak-ng/readclause.c \
  src/libespeak-ng/setlengths.c \
  src/libespeak-ng/soundicon.c \
  src/libespeak-ng/spect.c \
  src/libespeak-ng/speech.c \
  src/libespeak-ng/ssml.c \
--- a/src/libespeak-ng/readclause.c
+++ b/src/libespeak-ng/readclause.c
@@ -41,10 +41,10 @@
 #include "dictionary.h"           // for LookupDictList, DecodePhonemes, Set...
 #include "error.h"                // for create_file_error_context
 #include "phoneme.h"              // for phonSWITCH
 #include "soundicon.h"               // for LookupSoundIcon
 #include "speech.h"               // for GetFileLength, LookupMnem, PATHSEP
 #include "ssml.h"                 // for SSML_STACK, ProcessSsmlTag, N_PARAM...
 #include "synthdata.h"            // for SelectPhonemeTable
 #include "synthesize.h"           // for SOUND_ICON, soundicon_tab, samplerate
 #include "translate.h"            // for Translator, utf8_out, CLAUSE_OPTION...
 #include "voice.h"                // for voice, voice_t, current_voice_selected

@@ -296,143 +296,6 @@ int Read4Bytes(FILE *f)
 	return acc;
 }

 static espeak_ng_STATUS LoadSoundFile(const char *fname, int index, espeak_ng_ERROR_CONTEXT *context)
 {
 	FILE *f;
 	char *p;
 	int *ip;
 	int length;
 	char fname_temp[100];
 	char fname2[sizeof(path_home)+13+40];

 	if (fname == NULL) {
 		// filename is already in the table
 		fname = soundicon_tab[index].filename;
 	}

 	if (fname == NULL)
 		return EINVAL;

 	if (fname[0] != '/') {
 		// a relative path, look in espeak-ng-data/soundicons
 		sprintf(fname2, "%s%csoundicons%c%s", path_home, PATHSEP, PATHSEP, fname);
 		fname = fname2;
 	}

 	f = NULL;
 	if ((f = fopen(fname, "rb")) != NULL) {
 		int ix;
 		int fd_temp;
 		int header[3];
 		char command[sizeof(fname2)+sizeof(fname2)+40];

 		if (fseek(f, 20, SEEK_SET) == -1) {
 			int error = errno;
 			fclose(f);
 			return create_file_error_context(context, error, fname);
 		}

 		for (ix = 0; ix < 3; ix++)
 			header[ix] = Read4Bytes(f);

 		// if the sound file is not mono, 16 bit signed, at the correct sample rate, then convert it
 		if ((header[0] != 0x10001) || (header[1] != samplerate) || (header[2] != samplerate*2)) {
 			fclose(f);
 			f = NULL;

 #ifdef HAVE_MKSTEMP
 			strcpy(fname_temp, "/tmp/espeakXXXXXX");
 			if ((fd_temp = mkstemp(fname_temp)) >= 0)
 				close(fd_temp);
 #else
 			strcpy(fname_temp, tmpnam(NULL));
 #endif

 			sprintf(command, "sox \"%s\" -r %d -c1 -b 16 -t wav %s\n", fname, samplerate, fname_temp);
 			if (system(command) == 0)
 				fname = fname_temp;
 		}
 	}

 	if (f == NULL) {
 		f = fopen(fname, "rb");
 		if (f == NULL)
 			return create_file_error_context(context, errno, fname);
 	}

 	length = GetFileLength(fname);
 	if (length < 0) { // length == -errno
 		fclose(f);
 		return create_file_error_context(context, -length, fname);
 	}
 	if (fseek(f, 0, SEEK_SET) == -1) {
 		int error = errno;
 		fclose(f);
 		return create_file_error_context(context, error, fname);
 	}
 	if ((p = (char *)realloc(soundicon_tab[index].data, length)) == NULL) {
 		fclose(f);
 		return ENOMEM;
 	}
 	if (fread(p, 1, length, f) != length) {
 		int error = errno;
 		fclose(f);
 		remove(fname_temp);
 		free(p);
 		return create_file_error_context(context, error, fname);
 	}
 	fclose(f);
 	remove(fname_temp);

 	ip = (int *)(&p[40]);
 	soundicon_tab[index].length = (*ip) / 2; // length in samples
 	soundicon_tab[index].data = p;
 	return ENS_OK;
 }

 static int LookupSoundicon(int c)
 {
 	// Find the sound icon number for a punctuation character
 	int ix;

 	for (ix = N_SOUNDICON_SLOTS; ix < n_soundicon_tab; ix++) {
 		if (soundicon_tab[ix].name == c) {
 			if (soundicon_tab[ix].length == 0) {
 				if (LoadSoundFile(NULL, ix, NULL) != ENS_OK)
 					return -1; // sound file is not available
 			}
 			return ix;
 		}
 	}
 	return -1;
 }

 int LoadSoundFile2(const char *fname)
 {
 	// Load a sound file into one of the reserved slots in the sound icon table
 	// (if it'snot already loaded)

 	int ix;
 	static int slot = -1;

 	for (ix = 0; ix < n_soundicon_tab; ix++) {
 		if (((soundicon_tab[ix].filename != NULL) && strcmp(fname, soundicon_tab[ix].filename) == 0))
 			return ix; // already loaded
 	}

 	// load the file into the next slot
 	slot++;
 	if (slot >= N_SOUNDICON_SLOTS)
 		slot = 0;

 	if (LoadSoundFile(fname, slot, NULL) != ENS_OK)
 		return -1;

 	soundicon_tab[slot].filename = (char *)realloc(soundicon_tab[ix].filename, strlen(fname)+1);
 	strcpy(soundicon_tab[slot].filename, fname);
 	return slot;
 }

 static int AnnouncePunctuation(Translator *tr, int c1, int *c2_ptr, char *output, int *bufix, int end_clause)
 {
 	// announce punctuation names
--- a/src/libespeak-ng/readclause.h
+++ b/src/libespeak-ng/readclause.h
@@ -42,7 +42,6 @@ int towlower2(unsigned int c, Translator *translator); // Supports Turkish I
 int Eof(void);
 const char *WordToString2(unsigned int word);
 int Read4Bytes(FILE *f);
 int LoadSoundFile2(const char *fname);
 int AddNameData(const char *name,
                int wide);
 int ReadClause(Translator *tr,
--- a/src/libespeak-ng/soundicon.c
+++ b/src/libespeak-ng/soundicon.c
@@ -0,0 +1,183 @@
 /*
 * Copyright (C) 2005 to 2015 by Jonathan Duddington
 * email: [email protected]
 * Copyright (C) 2015-2017 Reece H. Dunn
 * Copyright (C) 2021 Juho Hiltunen
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see: <http://www.gnu.org/licenses/>.
 */

 #include "config.h"

 #include <ctype.h>
 #include <errno.h>
 #include <locale.h>
 #include <math.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>

 #include <espeak-ng/espeak_ng.h>
 #include <espeak-ng/speak_lib.h>
 #include <espeak-ng/encoding.h>
 #include <ucd/ucd.h>

 #include "soundicon.h" 
 #include "error.h"                // for create_file_error_context
 #include "readclause.h"               // for Read4Bytes
 #include "speech.h"                   // for path_home, GetFileLength, PATHSEP
 #include "synthesize.h"                   // for samplerate

 int n_soundicon_tab = N_SOUNDICON_SLOTS;
 SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];


 static espeak_ng_STATUS LoadSoundFile(const char *fname, int index, espeak_ng_ERROR_CONTEXT *context)
 {
 	FILE *f;
 	char *p;
 	int *ip;
 	int length;
 	char fname_temp[100];
 	char fname2[sizeof(path_home)+13+40];

 	if (fname == NULL) {
 		// filename is already in the table
 		fname = soundicon_tab[index].filename;
 	}

 	if (fname == NULL)
 		return EINVAL;

 	if (fname[0] != '/') {
 		// a relative path, look in espeak-ng-data/soundicons
 		sprintf(fname2, "%s%csoundicons%c%s", path_home, PATHSEP, PATHSEP, fname);
 		fname = fname2;
 	}

 	f = NULL;
 	if ((f = fopen(fname, "rb")) != NULL) {
 		int ix;
 		int fd_temp;
 		int header[3];
 		char command[sizeof(fname2)+sizeof(fname2)+40];

 		if (fseek(f, 20, SEEK_SET) == -1) {
 			int error = errno;
 			fclose(f);
 			return create_file_error_context(context, error, fname);
 		}

 		for (ix = 0; ix < 3; ix++)
 			header[ix] = Read4Bytes(f);

 		// if the sound file is not mono, 16 bit signed, at the correct sample rate, then convert it
 		if ((header[0] != 0x10001) || (header[1] != samplerate) || (header[2] != samplerate*2)) {
 			fclose(f);
 			f = NULL;

 #ifdef HAVE_MKSTEMP
 			strcpy(fname_temp, "/tmp/espeakXXXXXX");
 			if ((fd_temp = mkstemp(fname_temp)) >= 0)
 				close(fd_temp);
 #else
 			strcpy(fname_temp, tmpnam(NULL));
 #endif

 			sprintf(command, "sox \"%s\" -r %d -c1 -b 16 -t wav %s\n", fname, samplerate, fname_temp);
 			if (system(command) == 0)
 				fname = fname_temp;
 		}
 	}

 	if (f == NULL) {
 		f = fopen(fname, "rb");
 		if (f == NULL)
 			return create_file_error_context(context, errno, fname);
 	}

 	length = GetFileLength(fname);
 	if (length < 0) { // length == -errno
 		fclose(f);
 		return create_file_error_context(context, -length, fname);
 	}
 	if (fseek(f, 0, SEEK_SET) == -1) {
 		int error = errno;
 		fclose(f);
 		return create_file_error_context(context, error, fname);
 	}
 	if ((p = (char *)realloc(soundicon_tab[index].data, length)) == NULL) {
 		fclose(f);
 		return ENOMEM;
 	}
 	if (fread(p, 1, length, f) != length) {
 		int error = errno;
 		fclose(f);
 		remove(fname_temp);
 		free(p);
 		return create_file_error_context(context, error, fname);
 	}
 	fclose(f);
 	remove(fname_temp);

 	ip = (int *)(&p[40]);
 	soundicon_tab[index].length = (*ip) / 2; // length in samples
 	soundicon_tab[index].data = p;
 	return ENS_OK;
 }

 int LookupSoundicon(int c)
 {
 	// Find the sound icon number for a punctuation character
 	int ix;

 	for (ix = N_SOUNDICON_SLOTS; ix < n_soundicon_tab; ix++) {
 		if (soundicon_tab[ix].name == c) {
 			if (soundicon_tab[ix].length == 0) {
 				if (LoadSoundFile(NULL, ix, NULL) != ENS_OK)
 					return -1; // sound file is not available
 			}
 			return ix;
 		}
 	}
 	return -1;
 }

 int LoadSoundFile2(const char *fname)
 {
 	// Load a sound file into one of the reserved slots in the sound icon table
 	// (if it'snot already loaded)

 	int ix;
 	static int slot = -1;

 	for (ix = 0; ix < n_soundicon_tab; ix++) {
 		if (((soundicon_tab[ix].filename != NULL) && strcmp(fname, soundicon_tab[ix].filename) == 0))
 			return ix; // already loaded
 	}

 	// load the file into the next slot
 	slot++;
 	if (slot >= N_SOUNDICON_SLOTS)
 		slot = 0;

 	if (LoadSoundFile(fname, slot, NULL) != ENS_OK)
 		return -1;

 	soundicon_tab[slot].filename = (char *)realloc(soundicon_tab[ix].filename, strlen(fname)+1);
 	strcpy(soundicon_tab[slot].filename, fname);
 	return slot;
 }
--- a/src/libespeak-ng/soundicon.h
+++ b/src/libespeak-ng/soundicon.h
@@ -0,0 +1,50 @@
 /*
 * Copyright (C) 2005 to 2014 by Jonathan Duddington
 * email: [email protected]
 * Copyright (C) 2015-2017 Reece H. Dunn
 * Copyright (C) 2021 Juho Hiltunen
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, see: <http://www.gnu.org/licenses/>.
 */

 #ifndef ESPEAK_NG_SOUNDICON_H
 #define ESPEAK_NG_SOUNDICON_H

 #ifdef __cplusplus
 extern "C"
 {
 #endif

 int LookupSoundicon(int c);
 int LoadSoundFile2(const char *fname);

 typedef struct {
        int name;
        int length;
        char *data;
        char *filename;
 } SOUND_ICON;

 #define N_SOUNDICON_TAB  80   // total entries in soundicon_tab
 #define N_SOUNDICON_SLOTS 4    // number of slots reserved for dynamic loading of audio files

 extern int n_soundicon_tab;
 extern SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];


 #ifdef __cplusplus
 }
 #endif

 #endif
--- a/src/libespeak-ng/ssml.c
+++ b/src/libespeak-ng/ssml.c
@@ -43,6 +43,7 @@
 #include "dictionary.h"           // for strncpy0
 #include "mnemonics.h"               // for LookupMnemName, MNEM_TAB, 
 #include "readclause.h"           // for PARAM_STACK, param_stack, AddNameData
 #include "soundicon.h"               // for LoadSoundFile2
 #include "synthesize.h"           // for SPEED_FACTORS, speed
 #include "translate.h"            // for CTRL_EMBEDDED, IsDigit09, utf8_out
 #include "voice.h"                // for SelectVoice, SelectVoiceByName
--- a/src/libespeak-ng/synthdata.c
+++ b/src/libespeak-ng/synthdata.c
@@ -36,6 +36,7 @@
 #include "phoneme.h"                  // for PHONEME_TAB, PHONEME_TAB_LIST
 #include "speech.h"                   // for path_home, GetFileLength, PATHSEP
 #include "mbrola.h"                   // for mbrola_name
 #include "soundicon.h"               // for N_SOUNDICON_SLOTS, soundicon_tab
 #include "synthesize.h"               // for PHONEME_LIST, frameref_t, PHONE...
 #include "translate.h"                // for Translator, LANGUAGE_OPTIONS
 #include "voice.h"                    // for ReadTonePoints, tone_points, voice
--- a/src/libespeak-ng/synthesize.c
+++ b/src/libespeak-ng/synthesize.c
@@ -38,6 +38,7 @@
 #include "mbrola.h"               // for MbrolaGenerate, mbrola_name
 #include "phoneme.h"              // for PHONEME_TAB, phVOWEL, phLIQUID, phN...
 #include "setlengths.h"           // for CalcLengths
 #include "soundicon.h"               // for soundicon_tab, n_soundicon
 #include "synthdata.h"            // for InterpretPhoneme, GetEnvelope, Inte...
 #include "translate.h"            // for translator, LANGUAGE_OPTIONS, Trans...
 #include "voice.h"                // for voice_t, voice, LoadVoiceVariant
@@ -66,9 +67,6 @@ static int syllable_centre;

 static voice_t *new_voice = NULL;

 int n_soundicon_tab = N_SOUNDICON_SLOTS;
 SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];

 #define RMS_GLOTTAL1 35   // vowel before glottal stop
 #define RMS_START 28  // 28
 #define VOWEL_FRONT_LENGTH  50
--- a/src/libespeak-ng/synthesize.h
+++ b/src/libespeak-ng/synthesize.h
@@ -334,13 +334,6 @@ typedef struct {

 #define i_StressLevel  0x800

 typedef struct {
 	int name;
 	int length;
 	char *data;
 	char *filename;
 } SOUND_ICON;

 typedef struct {
 	int pause_factor;
 	int clause_pause_factor;
@@ -479,11 +472,6 @@ extern espeak_EVENT *event_list;
 extern t_espeak_callback *synth_callback;
 extern const int version_phdata;

 #define N_SOUNDICON_TAB  80   // total entries in soundicon_tab
 #define N_SOUNDICON_SLOTS 4    // number of slots reserved for dynamic loading of audio files
 extern int n_soundicon_tab;
 extern SOUND_ICON soundicon_tab[N_SOUNDICON_TAB];

 void DoEmbedded(int *embix, int sourceix);
 void DoMarker(int type, int char_posn, int length, int value);
 void DoPhonemeMarker(int type, int char_posn, int length, char *name);