# -Wno-endif-labels : Needed to prevent warnings in ieee80.c. | # -Wno-endif-labels : Needed to prevent warnings in ieee80.c. | ||||
AM_CFLAGS = \ | AM_CFLAGS = \ | ||||
-Isrc/include -Isrc/include/compat -I$(srcdir)/src/ucd-tools/src/include \ | |||||
-Isrc/include -Isrc/include/compat -I$(srcdir)/src/speechPlayer/include -I$(srcdir)/src/ucd-tools/src/include \ | |||||
-D_BSD_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200112L \ | -D_BSD_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200112L \ | ||||
-Wno-endif-labels | -Wno-endif-labels | ||||
src/libespeak-ng/wavegen.c | src/libespeak-ng/wavegen.c | ||||
noinst_HEADERS = \ | noinst_HEADERS = \ | ||||
src/speechPlayer/include/speechPlayer.h | |||||
src/ucd-tools/src/include/ucd/ucd.h | src/ucd-tools/src/include/ucd/ucd.h | ||||
if OPT_KLATT | if OPT_KLATT | ||||
src_libespeak_ng_la_SOURCES += src/libespeak-ng/klatt.c | src_libespeak_ng_la_SOURCES += src/libespeak-ng/klatt.c | ||||
endif | endif | ||||
if OPT_SPEECHPLAYER | |||||
src_libespeak_ng_la_CFLAGS += -DINCLUDE_SPEECHPLAYER | |||||
src_libespeak_ng_la_SOURCES += src/libespeak-ng/sPlayer.c | |||||
src_libespeak_ng_la_SOURCES += src/speechPlayer/src/frame.cpp | |||||
src_libespeak_ng_la_SOURCES += src/speechPlayer/src/speechPlayer.cpp | |||||
src_libespeak_ng_la_SOURCES += src/speechPlayer/src/speechWaveGenerator.cpp | |||||
src_speak_ng_SOURCES = src/speak-ng.cpp | |||||
else | |||||
src_speak_ng_SOURCES = src/speak-ng.c | |||||
endif | |||||
if OPT_MBROLA | if OPT_MBROLA | ||||
src_libespeak_ng_la_CFLAGS += -DINCLUDE_MBROLA | src_libespeak_ng_la_CFLAGS += -DINCLUDE_MBROLA | ||||
src_libespeak_ng_la_SOURCES += src/libespeak-ng/mbrowrap.c | src_libespeak_ng_la_SOURCES += src/libespeak-ng/mbrowrap.c | ||||
src_speak_ng_LDADD = src/libespeak-ng.la | src_speak_ng_LDADD = src/libespeak-ng.la | ||||
src_speak_ng_LDFLAGS = -static -lm ${PCAUDIOLIB_LIBS} | src_speak_ng_LDFLAGS = -static -lm ${PCAUDIOLIB_LIBS} | ||||
src_speak_ng_CFLAGS = -Isrc/libespeak-ng ${AM_CFLAGS} | src_speak_ng_CFLAGS = -Isrc/libespeak-ng ${AM_CFLAGS} | ||||
src_speak_ng_SOURCES = src/speak-ng.c | |||||
bin_PROGRAMS += src/espeak-ng | bin_PROGRAMS += src/espeak-ng | ||||
tests_ssml_fuzzer_test_SOURCES = tests/ssml-fuzzer.c | tests_ssml_fuzzer_test_SOURCES = tests/ssml-fuzzer.c | ||||
tests_ssml_fuzzer_test_LDADD = src/libespeak-ng.la | tests_ssml_fuzzer_test_LDADD = src/libespeak-ng.la | ||||
if OPT_SPEECHPLAYER | |||||
tests_api_test_SOURCES += tests/dummy.cpp | |||||
tests_encoding_test_SOURCES += tests/dummy.cpp | |||||
tests_readclause_test_SOURCES += tests/dummy.cpp | |||||
tests_ssml_fuzzer_test_SOURCES += tests/dummy.cpp | |||||
endif | |||||
if HAVE_LIBFUZZER | if HAVE_LIBFUZZER | ||||
tests_ssml_fuzzer_test_CFLAGS += -fsanitize=fuzzer | tests_ssml_fuzzer_test_CFLAGS += -fsanitize=fuzzer | ||||
tests_ssml_fuzzer_test_LDFLAGS = -fsanitize=fuzzer | tests_ssml_fuzzer_test_LDFLAGS = -fsanitize=fuzzer |
[AS_HELP_STRING([--with-klatt], [enable the Klatt formant synthesizer @<:@default=yes@:>@])], | [AS_HELP_STRING([--with-klatt], [enable the Klatt formant synthesizer @<:@default=yes@:>@])], | ||||
[]) | []) | ||||
AC_ARG_WITH([speechplayer], | |||||
[AS_HELP_STRING([--with-speechplayer], [enable the speechPlayer Klatt implementation @<:@default=yes@:>@])], | |||||
[]) | |||||
AC_ARG_WITH([mbrola], | AC_ARG_WITH([mbrola], | ||||
[AS_HELP_STRING([--with-mbrola], [enable the MBROLA speech synthesizer @<:@default=yes@:>@])], | [AS_HELP_STRING([--with-mbrola], [enable the MBROLA speech synthesizer @<:@default=yes@:>@])], | ||||
[]) | []) | ||||
dnl ================================================================ | dnl ================================================================ | ||||
AC_PROG_CC | AC_PROG_CC | ||||
AC_PROG_CXX | |||||
AC_PROG_MAKE_SET | AC_PROG_MAKE_SET | ||||
AC_PROG_LIBTOOL | AC_PROG_LIBTOOL | ||||
AC_PROG_LN_S | AC_PROG_LN_S | ||||
have_klatt=yes | have_klatt=yes | ||||
fi | fi | ||||
if test "$with_speechplayer" = "no" ; then | |||||
have_speechplayer=no | |||||
else | |||||
have_speechplayer=yes | |||||
fi | |||||
if test "$with_mbrola" = "no" ; then | if test "$with_mbrola" = "no" ; then | ||||
have_mbrola=no | have_mbrola=no | ||||
else | else | ||||
fi | fi | ||||
AM_CONDITIONAL(OPT_KLATT, [test x"$have_klatt" = xyes]) | AM_CONDITIONAL(OPT_KLATT, [test x"$have_klatt" = xyes]) | ||||
AM_CONDITIONAL(OPT_SPEECHPLAYER, [test x"$have_speechplayer" = xyes]) | |||||
AM_CONDITIONAL(OPT_MBROLA, [test x"$have_mbrola" = xyes]) | AM_CONDITIONAL(OPT_MBROLA, [test x"$have_mbrola" = xyes]) | ||||
AM_CONDITIONAL(OPT_ASYNC, [test x"$have_async" = xyes]) | AM_CONDITIONAL(OPT_ASYNC, [test x"$have_async" = xyes]) | ||||
ndk-build (Android): ${NDKBUILD_CHECK} | ndk-build (Android): ${NDKBUILD_CHECK} | ||||
Klatt: ${have_klatt} | Klatt: ${have_klatt} | ||||
speechPlayer: ${have_speechplayer} | |||||
MBROLA: ${have_mbrola} | MBROLA: ${have_mbrola} | ||||
Async: ${have_async} | Async: ${have_async} | ||||
1. a functional autotools system (`make`, `autoconf`, `automake`, `libtool` | 1. a functional autotools system (`make`, `autoconf`, `automake`, `libtool` | ||||
and `pkg-config`); | and `pkg-config`); | ||||
2. a functional c compiler that supports C99 (e.g. gcc or clang). | |||||
2. a functional c compiler that supports C99 (e.g. gcc or clang). Note: if building with speechPlayer, a C++ compiler is required. | |||||
Optionally, you need: | Optionally, you need: | ||||
1. the [pcaudiolib](https://github.com/espeak-ng/pcaudiolib) development library | 1. the [pcaudiolib](https://github.com/espeak-ng/pcaudiolib) development library | ||||
to enable audio output; | to enable audio output; | ||||
2. the speechPlayer development library to | |||||
enable the speechPlayer Klatt implementation; | |||||
3. the [sonic](https://github.com/espeak-ng/sonic) development library to | 3. the [sonic](https://github.com/espeak-ng/sonic) development library to | ||||
enable sonic audio speed up support; | enable sonic audio speed up support; | ||||
4. the `ronn` man-page markdown processor to build the man pages. | 4. the `ronn` man-page markdown processor to build the man pages. | ||||
| Option | Description | Default | | | Option | Description | Default | | ||||
|-----------------|----------------------------------------------|---------| | |-----------------|----------------------------------------------|---------| | ||||
| `--with-klatt` | Enable Klatt formant synthesis. | yes | | | `--with-klatt` | Enable Klatt formant synthesis. | yes | | ||||
| `--with-speechplayer` | Enable the speechPlayer Klatt implementation. | yes | | |||||
| `--with-mbrola` | Enable MBROLA voice support. | yes | | | `--with-mbrola` | Enable MBROLA voice support. | yes | | ||||
| `--with-sonic` | Use the sonic library to support higher WPM. | yes | | | `--with-sonic` | Use the sonic library to support higher WPM. | yes | | ||||
| `--with-async` | Enable asynchronous commands. | yes | | | `--with-async` | Enable asynchronous commands. | yes | |
language variant | |||||
name Edward2 | |||||
klatt 6 | |||||
voicing 100 | |||||
consonants 70 80 | |||||
formant 1 92 100 130 | |||||
formant 2 103 100 80 | |||||
formant 3 103 100 70 | |||||
formant 4 114 100 60 |
language variant | |||||
name klatt6 | |||||
klatt 6 | |||||
#include "klatt.h" | #include "klatt.h" | ||||
#include "synthesize.h" // for frame_t, WGEN_DATA, STEPSIZE, N_KLATTP, echo... | #include "synthesize.h" // for frame_t, WGEN_DATA, STEPSIZE, N_KLATTP, echo... | ||||
#include "voice.h" // for voice_t, N_PEAKS | #include "voice.h" // for voice_t, N_PEAKS | ||||
#ifdef INCLUDE_SPEECHPLAYER | |||||
#include "sPlayer.h" | |||||
#endif | |||||
extern unsigned char *out_ptr; | extern unsigned char *out_ptr; | ||||
extern unsigned char *out_end; | extern unsigned char *out_end; | ||||
{ | { | ||||
int r_ix; | int r_ix; | ||||
#ifdef INCLUDE_SPEECHPLAYER | |||||
KlattResetSP(); | |||||
#endif | |||||
if (control == 2) { | if (control == 2) { | ||||
// Full reset | // Full reset | ||||
kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000; | kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000; | ||||
int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | ||||
{ | { | ||||
#ifdef INCLUDE_SPEECHPLAYER | |||||
if(wvoice->klattv[0] == 6) | |||||
return Wavegen_KlattSP(wdata, wvoice, length, resume, fr1, fr2); | |||||
#endif | |||||
if (resume == 0) | if (resume == 0) | ||||
SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | ||||
int ix; | int ix; | ||||
#ifdef INCLUDE_SPEECHPLAYER | |||||
KlattInitSP(); | |||||
#endif | |||||
sample_count = 0; | sample_count = 0; | ||||
kt_globals.synthesis_model = CASCADE_PARALLEL; | kt_globals.synthesis_model = CASCADE_PARALLEL; |
#include <espeak-ng/espeak_ng.h> | |||||
#include <espeak-ng/speak_lib.h> | |||||
#include "sPlayer.h" | |||||
extern unsigned char *out_ptr; | |||||
extern unsigned char *out_end; | |||||
static speechPlayer_handle_t speechPlayerHandle=NULL; | |||||
static const unsigned int minFadeLength=110; | |||||
static int MAX(int a, int b) { return((a) > (b) ? a : b); } | |||||
static int MIN(int a, int b) { return((a) < (b) ? a : b); } | |||||
static bool needsMixWaveFile(WGEN_DATA *wdata) { | |||||
return (bool)wdata->n_mix_wavefile; | |||||
} | |||||
// mixes the currently queued espeak consonant wave file into the existing content in the given sample buffer. | |||||
// This would be used for voiced consonants where the voiced part is generated by speechPlayer, but the consonant comes from a wave file in eSpeak. | |||||
// e.g. z, v. | |||||
// @param maxNumSamples the maximum number of samples that can be mixed into the sample buffer. | |||||
// @param sampleBuf the buffer of existing samples. | |||||
static void mixWaveFile(WGEN_DATA *wdata, unsigned int maxNumSamples, sample* sampleBuf) { | |||||
unsigned int i=0; | |||||
for(;wdata->mix_wavefile_ix<wdata->n_mix_wavefile;++wdata->mix_wavefile_ix) { | |||||
if(i>=maxNumSamples) break; | |||||
int val; | |||||
if(wdata->mix_wave_scale==0) { | |||||
val=wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]; | |||||
++(wdata->mix_wavefile_ix); | |||||
signed char c=wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]; | |||||
val+=(c*256); | |||||
} else { | |||||
val=(signed char)wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]*wdata->mix_wave_scale; | |||||
} | |||||
val*=(wdata->amplitude_v/1024.0); | |||||
val=(val*wdata->mix_wave_amp)/40; | |||||
sampleBuf[i].value+=val; | |||||
if((wdata->mix_wavefile_ix+wdata->mix_wavefile_offset)>=wdata->mix_wavefile_max) { | |||||
wdata->mix_wavefile_offset-=(wdata->mix_wavefile_max*3)/4; | |||||
} | |||||
++i; | |||||
} | |||||
} | |||||
static bool isKlattFrameFollowing() { | |||||
// eSpeak implements its command queue with a circular buffer. | |||||
// Thus to walk it, we start from the head, walking to the tail, which may wrap around to the beginning of the buffer as it is circular. | |||||
for(int i=(wcmdq_head+1)%N_WCMDQ;i!=wcmdq_tail;i=(i+1)%N_WCMDQ) { | |||||
int cmd=wcmdq[i][0]; | |||||
if(cmd==WCMD_PAUSE||cmd==WCMD_WAVE) { | |||||
break; | |||||
} | |||||
if(cmd==WCMD_KLATT) { | |||||
return true; | |||||
} | |||||
} | |||||
return false; | |||||
} | |||||
static void fillSpeechPlayerFrame(WGEN_DATA *wdata, voice_t *wvoice, frame_t * eFrame, speechPlayer_frame_t* spFrame) { | |||||
// eSpeak stores pitch in 4096ths of a hz. Specifically comments in voice.h mentions pitch<<12. | |||||
// SpeechPlayer deals with floating point values of hz. | |||||
spFrame->voicePitch=(wdata->pitch)/4096.0; | |||||
// eSpeak stores voicing amplitude with 64 representing 100% according to comments in voice.h. | |||||
// speechPlayer uses floating point value of 1 as 100%. | |||||
spFrame->voiceAmplitude=(wvoice->voicing)/64.0; | |||||
// All of eSpeak's relative formant frequency ratio values are stored with 256 representing 100% according to comments in voice.h. | |||||
spFrame->cf1=(eFrame->ffreq[1]*wvoice->freq[1]/256.0)+wvoice->freqadd[1]; | |||||
spFrame->cf2=(eFrame->ffreq[2]*wvoice->freq[2]/256.0)+wvoice->freqadd[2]; | |||||
spFrame->cf3=(eFrame->ffreq[3]*wvoice->freq[3]/256.0)+wvoice->freqadd[3]; | |||||
spFrame->cf4=(eFrame->ffreq[4]*wvoice->freq[4]/256.0)+wvoice->freqadd[4]; | |||||
spFrame->cf5=(eFrame->ffreq[5]*wvoice->freq[5]/256.0)+wvoice->freqadd[5]; | |||||
spFrame->cf6=(eFrame->ffreq[6]*wvoice->freq[6]/256.0)+wvoice->freqadd[6]; | |||||
spFrame->cfNP=200; | |||||
spFrame->cfN0=250; | |||||
if(eFrame->klattp[KLATT_FNZ]>0) { | |||||
spFrame->caNP=1; | |||||
spFrame->cfN0=eFrame->klattp[KLATT_FNZ]*2; | |||||
} else { | |||||
spFrame->caNP=0; | |||||
} | |||||
spFrame->cb1=eFrame->bw[1]*2*(wvoice->width[1]/256.0); | |||||
spFrame->cb2=eFrame->bw[2]*2*(wvoice->width[2]/256.0); | |||||
spFrame->cb3=eFrame->bw[3]*2*(wvoice->width[3]/256.0); | |||||
spFrame->cb4=eFrame->bw[4]*2*(wvoice->width[4]/256.0); | |||||
spFrame->cb5=1000; | |||||
spFrame->cb6=1000; | |||||
spFrame->cbNP=100; | |||||
spFrame->cbN0=100; | |||||
spFrame->preFormantGain=1; | |||||
spFrame->outputGain=3*(wdata->amplitude/100.0); | |||||
spFrame->endVoicePitch=spFrame->voicePitch; | |||||
} | |||||
void KlattInitSP() { | |||||
speechPlayerHandle=speechPlayer_initialize(22050); | |||||
} | |||||
void KlattResetSP() { | |||||
speechPlayer_terminate(speechPlayerHandle); | |||||
speechPlayerHandle=speechPlayer_initialize(22050); | |||||
} | |||||
int Wavegen_KlattSP(WGEN_DATA *wdata, voice_t *wvoice, int length, int resume, frame_t *fr1, frame_t *fr2){ | |||||
if(!resume) { | |||||
speechPlayer_frame_t spFrame1={0}; | |||||
fillSpeechPlayerFrame(wdata, wvoice, fr1,&spFrame1); | |||||
speechPlayer_frame_t spFrame2={0}; | |||||
fillSpeechPlayerFrame(wdata, wvoice, fr2,&spFrame2); | |||||
wdata->pitch_ix+=(wdata->pitch_inc*(length/STEPSIZE)); | |||||
wdata->pitch=((wdata->pitch_env[MIN(wdata->pitch_ix>>8,127)]*wdata->pitch_range)>>8)+wdata->pitch_base; | |||||
spFrame2.endVoicePitch=wdata->pitch/4096; | |||||
bool willMixWaveFile=needsMixWaveFile(wdata); | |||||
if(willMixWaveFile) { | |||||
spFrame1.outputGain/=5; | |||||
spFrame2.outputGain/=5; | |||||
} | |||||
int mainLength=length; | |||||
speechPlayer_queueFrame(speechPlayerHandle,&spFrame1,minFadeLength,minFadeLength,-1,false); | |||||
mainLength-=minFadeLength; | |||||
bool fadeOut=!isKlattFrameFollowing(); | |||||
if(fadeOut) { | |||||
mainLength-=minFadeLength; | |||||
} | |||||
if(mainLength>=1) { | |||||
speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,mainLength,mainLength,-1,false); | |||||
} | |||||
if(fadeOut) { | |||||
spFrame2.voicePitch=spFrame2.endVoicePitch; | |||||
spFrame2.preFormantGain=0; | |||||
speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,minFadeLength/2,minFadeLength/2,-1,false); | |||||
spFrame2.outputGain=0; | |||||
speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,minFadeLength/2,minFadeLength/2,-1,false); | |||||
} | |||||
} | |||||
unsigned int maxLength=(out_end-out_ptr)/sizeof(sample); | |||||
unsigned int outLength=speechPlayer_synthesize(speechPlayerHandle,maxLength,(sample*)out_ptr); | |||||
mixWaveFile(wdata, outLength,(sample*)out_ptr); | |||||
out_ptr=out_ptr+(sizeof(sample)*outLength); | |||||
if(out_ptr>=out_end) return 1; | |||||
return 0; | |||||
} |
#ifndef ESPEAK_NG_SPLAYER_H | |||||
#define ESPEAK_NG_SPLAYER_H | |||||
#include "synthesize.h" | |||||
#include "voice.h" | |||||
#include <speechPlayer.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
void KlattInitSP(); | |||||
void KlattResetSP(); | |||||
int Wavegen_KlattSP(WGEN_DATA *wdata, voice_t *wvoice, int length, int resume, frame_t *fr1, frame_t *fr2); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif |
#include "speak-ng.c" |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYER_H | |||||
#define SPEECHPLAYER_H | |||||
#include <stdbool.h> | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
typedef double speechPlayer_frameParam_t; | |||||
typedef struct { | |||||
// voicing and cascaide | |||||
speechPlayer_frameParam_t voicePitch; // fundermental frequency of voice (phonation) in hz | |||||
speechPlayer_frameParam_t vibratoPitchOffset; // pitch is offset up or down in fraction of a semitone | |||||
speechPlayer_frameParam_t vibratoSpeed; // Speed of vibrato in hz | |||||
speechPlayer_frameParam_t voiceTurbulenceAmplitude; // amplitude of voice breathiness from 0 to 1 | |||||
speechPlayer_frameParam_t glottalOpenQuotient; // fraction between 0 and 1 of a voice cycle that the glottis is open (allows voice turbulance, alters f1...) | |||||
speechPlayer_frameParam_t voiceAmplitude; // amplitude of voice (phonation) source between 0 and 1. | |||||
speechPlayer_frameParam_t aspirationAmplitude; // amplitude of aspiration (voiceless h, whisper) source between 0 and 1. | |||||
speechPlayer_frameParam_t cf1, cf2, cf3, cf4, cf5, cf6, cfN0, cfNP; // frequencies of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
speechPlayer_frameParam_t cb1, cb2, cb3, cb4, cb5, cb6, cbN0, cbNP; // bandwidths of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
speechPlayer_frameParam_t caNP; // amplitude from 0 to 1 of cascade nasal pole formant | |||||
// fricatives and parallel | |||||
speechPlayer_frameParam_t fricationAmplitude; // amplitude of frication noise from 0 to 1. | |||||
speechPlayer_frameParam_t pf1, pf2, pf3, pf4, pf5, pf6; // parallel formants in hz | |||||
speechPlayer_frameParam_t pb1, pb2, pb3, pb4, pb5, pb6; // parallel formant bandwidths in hz | |||||
speechPlayer_frameParam_t pa1, pa2, pa3, pa4, pa5, pa6; // amplitude of parallel formants between 0 and 1 | |||||
speechPlayer_frameParam_t parallelBypass; // amount of signal which should bypass parallel resonators from 0 to 1 | |||||
speechPlayer_frameParam_t preFormantGain; // amplitude from 0 to 1 of all vocal tract sound (voicing, frication) before entering formant resonators. Useful for stopping/starting speech | |||||
speechPlayer_frameParam_t outputGain; // amplitude from 0 to 1 of final output (master volume) | |||||
speechPlayer_frameParam_t endVoicePitch; // pitch of voice at the end of the frame length | |||||
} speechPlayer_frame_t; | |||||
typedef short sampleVal; | |||||
typedef struct { | |||||
sampleVal value; | |||||
} sample; | |||||
typedef void* speechPlayer_handle_t; | |||||
speechPlayer_handle_t speechPlayer_initialize(int sampleRate); | |||||
void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue); | |||||
int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf); | |||||
int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle); | |||||
void speechPlayer_terminate(speechPlayer_handle_t playerHandle); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#include <cstring> | |||||
#include <queue> | |||||
#include "utils.h" | |||||
#include "frame.h" | |||||
using namespace std; | |||||
struct frameRequest_t { | |||||
unsigned int minNumSamples; | |||||
unsigned int numFadeSamples; | |||||
bool NULLFrame; | |||||
speechPlayer_frame_t frame; | |||||
double voicePitchInc; | |||||
int userIndex; | |||||
}; | |||||
class FrameManagerImpl: public FrameManager { | |||||
private: | |||||
queue<frameRequest_t*> frameRequestQueue; | |||||
frameRequest_t* oldFrameRequest; | |||||
frameRequest_t* newFrameRequest; | |||||
speechPlayer_frame_t curFrame; | |||||
bool curFrameIsNULL; | |||||
unsigned int sampleCounter; | |||||
int lastUserIndex; | |||||
void updateCurrentFrame() { | |||||
sampleCounter++; | |||||
if(newFrameRequest) { | |||||
if(sampleCounter>(newFrameRequest->numFadeSamples)) { | |||||
delete oldFrameRequest; | |||||
oldFrameRequest=newFrameRequest; | |||||
newFrameRequest=NULL; | |||||
} else { | |||||
double curFadeRatio=(double)sampleCounter/(newFrameRequest->numFadeSamples); | |||||
for(int i=0;i<speechPlayer_frame_numParams;++i) { | |||||
((speechPlayer_frameParam_t*)&curFrame)[i]=calculateValueAtFadePosition(((speechPlayer_frameParam_t*)&(oldFrameRequest->frame))[i],((speechPlayer_frameParam_t*)&(newFrameRequest->frame))[i],curFadeRatio); | |||||
} | |||||
} | |||||
} else if(sampleCounter>(oldFrameRequest->minNumSamples)) { | |||||
if(!frameRequestQueue.empty()) { | |||||
curFrameIsNULL=false; | |||||
newFrameRequest=frameRequestQueue.front(); | |||||
frameRequestQueue.pop(); | |||||
if(newFrameRequest->NULLFrame) { | |||||
memcpy(&(newFrameRequest->frame),&(oldFrameRequest->frame),sizeof(speechPlayer_frame_t)); | |||||
newFrameRequest->frame.preFormantGain=0; | |||||
newFrameRequest->frame.voicePitch=curFrame.voicePitch; | |||||
newFrameRequest->voicePitchInc=0; | |||||
} else if(oldFrameRequest->NULLFrame) { | |||||
memcpy(&(oldFrameRequest->frame),&(newFrameRequest->frame),sizeof(speechPlayer_frame_t)); | |||||
oldFrameRequest->frame.preFormantGain=0; | |||||
} | |||||
if(newFrameRequest) { | |||||
if(newFrameRequest->userIndex!=-1) lastUserIndex=newFrameRequest->userIndex; | |||||
sampleCounter=0; | |||||
newFrameRequest->frame.voicePitch+=(newFrameRequest->voicePitchInc*newFrameRequest->numFadeSamples); | |||||
} | |||||
} else { | |||||
curFrameIsNULL=true; | |||||
} | |||||
} else { | |||||
curFrame.voicePitch+=oldFrameRequest->voicePitchInc; | |||||
oldFrameRequest->frame.voicePitch=curFrame.voicePitch; | |||||
} | |||||
} | |||||
public: | |||||
FrameManagerImpl(): curFrame(), curFrameIsNULL(true), sampleCounter(0), newFrameRequest(NULL), lastUserIndex(-1) { | |||||
oldFrameRequest=new frameRequest_t(); | |||||
oldFrameRequest->NULLFrame=true; | |||||
} | |||||
void queueFrame(speechPlayer_frame_t* frame, unsigned int minNumSamples, unsigned int numFadeSamples, int userIndex, bool purgeQueue) { | |||||
frameRequest_t* frameRequest=new frameRequest_t; | |||||
frameRequest->minNumSamples=minNumSamples; //max(minNumSamples,1); | |||||
frameRequest->numFadeSamples=numFadeSamples; //max(numFadeSamples,1); | |||||
if(frame) { | |||||
frameRequest->NULLFrame=false; | |||||
memcpy(&(frameRequest->frame),frame,sizeof(speechPlayer_frame_t)); | |||||
frameRequest->voicePitchInc=(frame->endVoicePitch-frame->voicePitch)/frameRequest->minNumSamples; | |||||
} else { | |||||
frameRequest->NULLFrame=true; | |||||
} | |||||
frameRequest->userIndex=userIndex; | |||||
if(purgeQueue) { | |||||
for(;!frameRequestQueue.empty();frameRequestQueue.pop()) delete frameRequestQueue.front(); | |||||
sampleCounter=oldFrameRequest->minNumSamples; | |||||
if(newFrameRequest) { | |||||
oldFrameRequest->NULLFrame=newFrameRequest->NULLFrame; | |||||
memcpy(&(oldFrameRequest->frame),&curFrame,sizeof(speechPlayer_frame_t)); | |||||
delete newFrameRequest; | |||||
newFrameRequest=NULL; | |||||
} | |||||
} | |||||
frameRequestQueue.push(frameRequest); | |||||
} | |||||
const int getLastIndex() { | |||||
return lastUserIndex; | |||||
} | |||||
const speechPlayer_frame_t* const getCurrentFrame() { | |||||
updateCurrentFrame(); | |||||
return curFrameIsNULL?NULL:&curFrame; | |||||
} | |||||
~FrameManagerImpl() { | |||||
if(oldFrameRequest) delete oldFrameRequest; | |||||
if(newFrameRequest) delete newFrameRequest; | |||||
} | |||||
}; | |||||
FrameManager* FrameManager::create() { return new FrameManagerImpl(); } |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYER_FRAME_H | |||||
#define SPEECHPLAYER_FRAME_H | |||||
#include "utils.h" | |||||
typedef double speechPlayer_frameParam_t; | |||||
typedef struct { | |||||
// voicing and cascaide | |||||
speechPlayer_frameParam_t voicePitch; // fundermental frequency of voice (phonation) in hz | |||||
speechPlayer_frameParam_t vibratoPitchOffset; // pitch is offset up or down in fraction of a semitone | |||||
speechPlayer_frameParam_t vibratoSpeed; // Speed of vibrato in hz | |||||
speechPlayer_frameParam_t voiceTurbulenceAmplitude; // amplitude of voice breathiness from 0 to 1 | |||||
speechPlayer_frameParam_t glottalOpenQuotient; // fraction between 0 and 1 of a voice cycle that the glottis is open (allows voice turbulance, alters f1...) | |||||
speechPlayer_frameParam_t voiceAmplitude; // amplitude of voice (phonation) source between 0 and 1. | |||||
speechPlayer_frameParam_t aspirationAmplitude; // amplitude of aspiration (voiceless h, whisper) source between 0 and 1. | |||||
speechPlayer_frameParam_t cf1, cf2, cf3, cf4, cf5, cf6, cfN0, cfNP; // frequencies of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
speechPlayer_frameParam_t cb1, cb2, cb3, cb4, cb5, cb6, cbN0, cbNP; // bandwidths of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
speechPlayer_frameParam_t caNP; // amplitude from 0 to 1 of cascade nasal pole formant | |||||
// fricatives and parallel | |||||
speechPlayer_frameParam_t fricationAmplitude; // amplitude of frication noise from 0 to 1. | |||||
speechPlayer_frameParam_t pf1, pf2, pf3, pf4, pf5, pf6; // parallel formants in hz | |||||
speechPlayer_frameParam_t pb1, pb2, pb3, pb4, pb5, pb6; // parallel formant bandwidths in hz | |||||
speechPlayer_frameParam_t pa1, pa2, pa3, pa4, pa5, pa6; // amplitude of parallel formants between 0 and 1 | |||||
speechPlayer_frameParam_t parallelBypass; // amount of signal which should bypass parallel resonators from 0 to 1 | |||||
speechPlayer_frameParam_t preFormantGain; // amplitude from 0 to 1 of all vocal tract sound (voicing, frication) before entering formant resonators. Useful for stopping/starting speech | |||||
speechPlayer_frameParam_t outputGain; // amplitude from 0 to 1 of final output (master volume) | |||||
speechPlayer_frameParam_t endVoicePitch; // pitch of voice at the end of the frame length | |||||
} speechPlayer_frame_t; | |||||
const int speechPlayer_frame_numParams=sizeof(speechPlayer_frame_t)/sizeof(speechPlayer_frameParam_t); | |||||
class FrameManager { | |||||
public: | |||||
static FrameManager* create(); //factory function | |||||
virtual void queueFrame(speechPlayer_frame_t* frame, unsigned int minNumSamples, unsigned int numFadeSamples, int userIndex, bool purgeQueue)=0; | |||||
virtual const speechPlayer_frame_t* const getCurrentFrame()=0; | |||||
virtual const int getLastIndex()=0; | |||||
}; | |||||
#endif |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYER_SAMPLE_H | |||||
#define SPEECHPLAYER_SAMPLE_H | |||||
typedef short sampleVal; | |||||
typedef struct { | |||||
sampleVal value; | |||||
} sample; | |||||
#endif |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#include "frame.h" | |||||
#include "speechWaveGenerator.h" | |||||
#include "speechPlayer.h" | |||||
typedef struct { | |||||
int sampleRate; | |||||
FrameManager* frameManager; | |||||
SpeechWaveGenerator* waveGenerator; | |||||
} speechPlayer_handleInfo_t; | |||||
speechPlayer_handle_t speechPlayer_initialize(int sampleRate) { | |||||
speechPlayer_handleInfo_t* playerHandleInfo=new speechPlayer_handleInfo_t; | |||||
playerHandleInfo->sampleRate=sampleRate; | |||||
playerHandleInfo->frameManager=FrameManager::create(); | |||||
playerHandleInfo->waveGenerator=SpeechWaveGenerator::create(sampleRate); | |||||
playerHandleInfo->waveGenerator->setFrameManager(playerHandleInfo->frameManager); | |||||
return (speechPlayer_handle_t)playerHandleInfo; | |||||
} | |||||
void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue) { | |||||
speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||||
if (fadeDuration < 1) fadeDuration = 1; | |||||
playerHandleInfo->frameManager->queueFrame(framePtr,minFrameDuration,fadeDuration,userIndex,purgeQueue); | |||||
} | |||||
int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf) { | |||||
return ((speechPlayer_handleInfo_t*)playerHandle)->waveGenerator->generate(sampleCount,sampleBuf); | |||||
} | |||||
int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle) { | |||||
speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||||
return playerHandleInfo->frameManager->getLastIndex(); | |||||
} | |||||
void speechPlayer_terminate(speechPlayer_handle_t playerHandle) { | |||||
speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||||
delete playerHandleInfo->waveGenerator; | |||||
delete playerHandleInfo->frameManager; | |||||
delete playerHandleInfo; | |||||
} | |||||
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYER_H | |||||
#define SPEECHPLAYER_H | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
#include "frame.h" | |||||
#include "sample.h" | |||||
typedef void* speechPlayer_handle_t; | |||||
speechPlayer_handle_t speechPlayer_initialize(int sampleRate); | |||||
void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue); | |||||
int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf); | |||||
int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle); | |||||
void speechPlayer_terminate(speechPlayer_handle_t playerHandle); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif | |||||
#endif |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
/* | |||||
Based on klsyn-88, found at http://linguistics.berkeley.edu/phonlab/resources/ | |||||
*/ | |||||
#define _USE_MATH_DEFINES | |||||
#include <cassert> | |||||
#include <cmath> | |||||
#include <cstdlib> | |||||
#include "utils.h" | |||||
#include "speechWaveGenerator.h" | |||||
using namespace std; | |||||
const double PITWO=M_PI*2; | |||||
class NoiseGenerator { | |||||
private: | |||||
double lastValue; | |||||
public: | |||||
NoiseGenerator(): lastValue(0.0) {}; | |||||
double getNext() { | |||||
lastValue=((double)rand()/RAND_MAX)+0.75*lastValue; | |||||
return lastValue; | |||||
} | |||||
}; | |||||
class FrequencyGenerator { | |||||
private: | |||||
int sampleRate; | |||||
double lastCyclePos; | |||||
public: | |||||
FrequencyGenerator(int sr): sampleRate(sr), lastCyclePos(0) {} | |||||
double getNext(double frequency) { | |||||
double cyclePos=fmod((frequency/sampleRate)+lastCyclePos,1); | |||||
lastCyclePos=cyclePos; | |||||
return cyclePos; | |||||
} | |||||
}; | |||||
class VoiceGenerator { | |||||
private: | |||||
FrequencyGenerator pitchGen; | |||||
FrequencyGenerator vibratoGen; | |||||
NoiseGenerator aspirationGen; | |||||
public: | |||||
bool glottisOpen; | |||||
VoiceGenerator(int sr): pitchGen(sr), vibratoGen(sr), aspirationGen(), glottisOpen(false) {}; | |||||
double getNext(const speechPlayer_frame_t* frame) { | |||||
double vibrato=(sin(vibratoGen.getNext(frame->vibratoSpeed)*PITWO)*0.06*frame->vibratoPitchOffset)+1; | |||||
double voice=pitchGen.getNext(frame->voicePitch*vibrato); | |||||
double aspiration=aspirationGen.getNext()*0.2; | |||||
double turbulence=aspiration*frame->voiceTurbulenceAmplitude; | |||||
glottisOpen=voice>=frame->glottalOpenQuotient; | |||||
if(!glottisOpen) { | |||||
turbulence*=0.01; | |||||
} | |||||
voice=(voice*2)-1; | |||||
voice+=turbulence; | |||||
voice*=frame->voiceAmplitude; | |||||
aspiration*=frame->aspirationAmplitude; | |||||
return aspiration+voice; | |||||
} | |||||
}; | |||||
class Resonator { | |||||
private: | |||||
//raw parameters | |||||
int sampleRate; | |||||
double frequency; | |||||
double bandwidth; | |||||
bool anti; | |||||
//calculated parameters | |||||
bool setOnce; | |||||
double a, b, c; | |||||
//Memory | |||||
double p1, p2; | |||||
public: | |||||
Resonator(int sampleRate, bool anti=false) { | |||||
this->sampleRate=sampleRate; | |||||
this->anti=anti; | |||||
this->setOnce=false; | |||||
this->p1=0; | |||||
this->p2=0; | |||||
} | |||||
void setParams(double frequency, double bandwidth) { | |||||
if(!setOnce||(frequency!=this->frequency)||(bandwidth!=this->bandwidth)) { | |||||
this->frequency=frequency; | |||||
this->bandwidth=bandwidth; | |||||
double r=exp(-M_PI/sampleRate*bandwidth); | |||||
c=-(r*r); | |||||
b=r*cos(PITWO/sampleRate*-frequency)*2.0; | |||||
a=1.0-b-c; | |||||
if(anti&&frequency!=0) { | |||||
a=1.0/a; | |||||
c*=-a; | |||||
b*=-a; | |||||
} | |||||
} | |||||
this->setOnce=true; | |||||
} | |||||
double resonate(double in, double frequency, double bandwidth) { | |||||
setParams(frequency,bandwidth); | |||||
double out=a*in+b*p1+c*p2; | |||||
p2=p1; | |||||
p1=anti?in:out; | |||||
return out; | |||||
} | |||||
}; | |||||
class CascadeFormantGenerator { | |||||
private: | |||||
int sampleRate; | |||||
Resonator r1, r2, r3, r4, r5, r6, rN0, rNP; | |||||
public: | |||||
CascadeFormantGenerator(int sr): sampleRate(sr), r1(sr), r2(sr), r3(sr), r4(sr), r5(sr), r6(sr), rN0(sr,true), rNP(sr) {}; | |||||
double getNext(const speechPlayer_frame_t* frame, bool glottisOpen, double input) { | |||||
input/=2.0; | |||||
double n0Output=rN0.resonate(input,frame->cfN0,frame->cbN0); | |||||
double output=calculateValueAtFadePosition(input,rNP.resonate(n0Output,frame->cfNP,frame->cbNP),frame->caNP); | |||||
output=r6.resonate(output,frame->cf6,frame->cb6); | |||||
output=r5.resonate(output,frame->cf5,frame->cb5); | |||||
output=r4.resonate(output,frame->cf4,frame->cb4); | |||||
output=r3.resonate(output,frame->cf3,frame->cb3); | |||||
output=r2.resonate(output,frame->cf2,frame->cb2); | |||||
output=r1.resonate(output,frame->cf1,frame->cb1); | |||||
return output; | |||||
} | |||||
}; | |||||
class ParallelFormantGenerator { | |||||
private: | |||||
int sampleRate; | |||||
Resonator r1, r2, r3, r4, r5, r6; | |||||
public: | |||||
ParallelFormantGenerator(int sr): sampleRate(sr), r1(sr), r2(sr), r3(sr), r4(sr), r5(sr), r6(sr) {}; | |||||
double getNext(const speechPlayer_frame_t* frame, double input) { | |||||
input/=2.0; | |||||
double output=0; | |||||
output+=(r1.resonate(input,frame->pf1,frame->pb1)-input)*frame->pa1; | |||||
output+=(r2.resonate(input,frame->pf2,frame->pb2)-input)*frame->pa2; | |||||
output+=(r3.resonate(input,frame->pf3,frame->pb3)-input)*frame->pa3; | |||||
output+=(r4.resonate(input,frame->pf4,frame->pb4)-input)*frame->pa4; | |||||
output+=(r5.resonate(input,frame->pf5,frame->pb5)-input)*frame->pa5; | |||||
output+=(r6.resonate(input,frame->pf6,frame->pb6)-input)*frame->pa6; | |||||
return calculateValueAtFadePosition(output,input,frame->parallelBypass); | |||||
} | |||||
}; | |||||
class SpeechWaveGeneratorImpl: public SpeechWaveGenerator { | |||||
private: | |||||
int sampleRate; | |||||
VoiceGenerator voiceGenerator; | |||||
NoiseGenerator fricGenerator; | |||||
CascadeFormantGenerator cascade; | |||||
ParallelFormantGenerator parallel; | |||||
FrameManager* frameManager; | |||||
public: | |||||
SpeechWaveGeneratorImpl(int sr): sampleRate(sr), voiceGenerator(sr), fricGenerator(), cascade(sr), parallel(sr), frameManager(NULL) { | |||||
} | |||||
unsigned int generate(const unsigned int sampleCount, sample* sampleBuf) { | |||||
if(!frameManager) return 0; | |||||
double val=0; | |||||
for(unsigned int i=0;i<sampleCount;++i) { | |||||
const speechPlayer_frame_t* frame=frameManager->getCurrentFrame(); | |||||
if(frame) { | |||||
double voice=voiceGenerator.getNext(frame); | |||||
double cascadeOut=cascade.getNext(frame,voiceGenerator.glottisOpen,voice*frame->preFormantGain); | |||||
double fric=fricGenerator.getNext()*0.3*frame->fricationAmplitude; | |||||
double parallelOut=parallel.getNext(frame,fric*frame->preFormantGain); | |||||
double out=(cascadeOut+parallelOut)*frame->outputGain; | |||||
sampleBuf[i].value=(int)MAX(MIN(out*4000,32000),-32000); | |||||
} else { | |||||
return i; | |||||
} | |||||
} | |||||
return sampleCount; | |||||
} | |||||
void setFrameManager(FrameManager* frameManager) { | |||||
this->frameManager=frameManager; | |||||
} | |||||
}; | |||||
SpeechWaveGenerator* SpeechWaveGenerator::create(int sampleRate) {return new SpeechWaveGeneratorImpl(sampleRate); } |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYERSPEECHWAVEGENERATOR_H | |||||
#define SPEECHPLAYERSPEECHWAVEGENERATOR_H | |||||
#include "frame.h" | |||||
#include "waveGenerator.h" | |||||
class SpeechWaveGenerator: public WaveGenerator { | |||||
public: | |||||
static SpeechWaveGenerator* create(int sampleRate); | |||||
virtual void setFrameManager(FrameManager* frameManager)=0; | |||||
}; | |||||
#endif |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYER_UTILS_H | |||||
#define SPEECHPLAYER_UTILS_H | |||||
#ifndef M_PI | |||||
#define M_PI 3.14159265358979323846 | |||||
#endif | |||||
static inline int MAX(int a, int b) { return((a) > (b) ? a : b); } | |||||
static inline int MIN(int a, int b) { return((a) < (b) ? a : b); } | |||||
static inline int ISNAN (double x) { | |||||
if (x != x) | |||||
return 1; | |||||
else | |||||
return 0; | |||||
} | |||||
inline double calculateValueAtFadePosition(double oldVal, double newVal, double curFadeRatio) { | |||||
if(ISNAN(newVal)) return oldVal; | |||||
return oldVal+((newVal-oldVal)*curFadeRatio); | |||||
} | |||||
#endif |
/* | |||||
This file is a part of the NV Speech Player project. | |||||
URL: https://bitbucket.org/nvaccess/speechplayer | |||||
Copyright 2014 NV Access Limited. | |||||
This program is free software: you can redistribute it and/or modify | |||||
it under the terms of the GNU General Public License version 2.0, as published by | |||||
the Free Software Foundation. | |||||
This program is distributed in the hope that it will be useful, | |||||
but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
This license can be found at: | |||||
http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
*/ | |||||
#ifndef SPEECHPLAYERWAVEGENERATOR_H | |||||
#define SPEECHPLAYERWAVEGENERATOR_H | |||||
#include <list> | |||||
#include "sample.h" | |||||
#include "speechPlayer.h" | |||||
class WaveGenerator { | |||||
public: | |||||
virtual unsigned int generate(const unsigned int bufSize, sample* buffer)=0; | |||||
}; | |||||
#endif |
</PrecompiledHeader> | </PrecompiledHeader> | ||||
<WarningLevel>TurnOffAllWarnings</WarningLevel> | <WarningLevel>TurnOffAllWarnings</WarningLevel> | ||||
<Optimization>Disabled</Optimization> | <Optimization>Disabled</Optimization> | ||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
</ClCompile> | </ClCompile> | ||||
<Link> | <Link> | ||||
</PrecompiledHeader> | </PrecompiledHeader> | ||||
<WarningLevel>TurnOffAllWarnings</WarningLevel> | <WarningLevel>TurnOffAllWarnings</WarningLevel> | ||||
<Optimization>Disabled</Optimization> | <Optimization>Disabled</Optimization> | ||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
</ClCompile> | </ClCompile> | ||||
<Link> | <Link> | ||||
<SubSystem>Windows</SubSystem> | <SubSystem>Windows</SubSystem> | ||||
<Optimization>MaxSpeed</Optimization> | <Optimization>MaxSpeed</Optimization> | ||||
<FunctionLevelLinking>true</FunctionLevelLinking> | <FunctionLevelLinking>true</FunctionLevelLinking> | ||||
<IntrinsicFunctions>true</IntrinsicFunctions> | <IntrinsicFunctions>true</IntrinsicFunctions> | ||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
</ClCompile> | </ClCompile> | ||||
<Link> | <Link> | ||||
<Optimization>MaxSpeed</Optimization> | <Optimization>MaxSpeed</Optimization> | ||||
<FunctionLevelLinking>true</FunctionLevelLinking> | <FunctionLevelLinking>true</FunctionLevelLinking> | ||||
<IntrinsicFunctions>true</IntrinsicFunctions> | <IntrinsicFunctions>true</IntrinsicFunctions> | ||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
<ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
<AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
</ClCompile> | </ClCompile> | ||||
<Link> | <Link> | ||||
<SubSystem>Windows</SubSystem> | <SubSystem>Windows</SubSystem> | ||||
<ClCompile Include="..\libespeak-ng\setlengths.c" /> | <ClCompile Include="..\libespeak-ng\setlengths.c" /> | ||||
<ClCompile Include="..\libespeak-ng\spect.c" /> | <ClCompile Include="..\libespeak-ng\spect.c" /> | ||||
<ClCompile Include="..\libespeak-ng\speech.c" /> | <ClCompile Include="..\libespeak-ng\speech.c" /> | ||||
<ClCompile Include="..\libespeak-ng\sPlayer.c" /> | |||||
<ClCompile Include="..\libespeak-ng\ssml.c" /> | <ClCompile Include="..\libespeak-ng\ssml.c" /> | ||||
<ClCompile Include="..\libespeak-ng\synthdata.c" /> | <ClCompile Include="..\libespeak-ng\synthdata.c" /> | ||||
<ClCompile Include="..\libespeak-ng\synthesize.c" /> | <ClCompile Include="..\libespeak-ng\synthesize.c" /> | ||||
<ClCompile Include="..\pcaudiolib\src\audio.c" /> | <ClCompile Include="..\pcaudiolib\src\audio.c" /> | ||||
<ClCompile Include="..\pcaudiolib\src\windows.c" /> | <ClCompile Include="..\pcaudiolib\src\windows.c" /> | ||||
<ClCompile Include="..\pcaudiolib\src\xaudio2.cpp" /> | <ClCompile Include="..\pcaudiolib\src\xaudio2.cpp" /> | ||||
<ClCompile Include="..\speechPlayer\src\frame.cpp" /> | |||||
<ClCompile Include="..\speechPlayer\src\speechPlayer.cpp" /> | |||||
<ClCompile Include="..\speechPlayer\src\speechWaveGenerator.cpp" /> | |||||
<ClCompile Include="com\comentrypoints.c" /> | <ClCompile Include="com\comentrypoints.c" /> | ||||
<ClCompile Include="com\ttsengine.cpp" /> | <ClCompile Include="com\ttsengine.cpp" /> | ||||
</ItemGroup> | </ItemGroup> | ||||
<ClInclude Include="..\libespeak-ng\sintab.h" /> | <ClInclude Include="..\libespeak-ng\sintab.h" /> | ||||
<ClInclude Include="..\libespeak-ng\spect.h" /> | <ClInclude Include="..\libespeak-ng\spect.h" /> | ||||
<ClInclude Include="..\libespeak-ng\speech.h" /> | <ClInclude Include="..\libespeak-ng\speech.h" /> | ||||
<ClInclude Include="..\libespeak-ng\sPlayer.h" /> | |||||
<ClInclude Include="..\libespeak-ng\synthesize.h" /> | <ClInclude Include="..\libespeak-ng\synthesize.h" /> | ||||
<ClInclude Include="..\libespeak-ng\translate.h" /> | <ClInclude Include="..\libespeak-ng\translate.h" /> | ||||
<ClInclude Include="..\libespeak-ng\voice.h" /> | <ClInclude Include="..\libespeak-ng\voice.h" /> |
void dummy() | |||||
{ | |||||
} |