| # -Wno-endif-labels : Needed to prevent warnings in ieee80.c. | # -Wno-endif-labels : Needed to prevent warnings in ieee80.c. | ||||
| AM_CFLAGS = \ | AM_CFLAGS = \ | ||||
| -Isrc/include -Isrc/include/compat -I$(srcdir)/src/ucd-tools/src/include \ | |||||
| -Isrc/include -Isrc/include/compat -I$(srcdir)/src/speechPlayer/include -I$(srcdir)/src/ucd-tools/src/include \ | |||||
| -D_BSD_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200112L \ | -D_BSD_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200112L \ | ||||
| -Wno-endif-labels | -Wno-endif-labels | ||||
| src/libespeak-ng/wavegen.c | src/libespeak-ng/wavegen.c | ||||
| noinst_HEADERS = \ | noinst_HEADERS = \ | ||||
| src/speechPlayer/include/speechPlayer.h | |||||
| src/ucd-tools/src/include/ucd/ucd.h | src/ucd-tools/src/include/ucd/ucd.h | ||||
| if OPT_KLATT | if OPT_KLATT | ||||
| src_libespeak_ng_la_SOURCES += src/libespeak-ng/klatt.c | src_libespeak_ng_la_SOURCES += src/libespeak-ng/klatt.c | ||||
| endif | endif | ||||
| if OPT_SPEECHPLAYER | |||||
| src_libespeak_ng_la_CFLAGS += -DINCLUDE_SPEECHPLAYER | |||||
| src_libespeak_ng_la_SOURCES += src/libespeak-ng/sPlayer.c | |||||
| src_libespeak_ng_la_SOURCES += src/speechPlayer/src/frame.cpp | |||||
| src_libespeak_ng_la_SOURCES += src/speechPlayer/src/speechPlayer.cpp | |||||
| src_libespeak_ng_la_SOURCES += src/speechPlayer/src/speechWaveGenerator.cpp | |||||
| src_speak_ng_SOURCES = src/speak-ng.cpp | |||||
| else | |||||
| src_speak_ng_SOURCES = src/speak-ng.c | |||||
| endif | |||||
| if OPT_MBROLA | if OPT_MBROLA | ||||
| src_libespeak_ng_la_CFLAGS += -DINCLUDE_MBROLA | src_libespeak_ng_la_CFLAGS += -DINCLUDE_MBROLA | ||||
| src_libespeak_ng_la_SOURCES += src/libespeak-ng/mbrowrap.c | src_libespeak_ng_la_SOURCES += src/libespeak-ng/mbrowrap.c | ||||
| src_speak_ng_LDADD = src/libespeak-ng.la | src_speak_ng_LDADD = src/libespeak-ng.la | ||||
| src_speak_ng_LDFLAGS = -static -lm ${PCAUDIOLIB_LIBS} | src_speak_ng_LDFLAGS = -static -lm ${PCAUDIOLIB_LIBS} | ||||
| src_speak_ng_CFLAGS = -Isrc/libespeak-ng ${AM_CFLAGS} | src_speak_ng_CFLAGS = -Isrc/libespeak-ng ${AM_CFLAGS} | ||||
| src_speak_ng_SOURCES = src/speak-ng.c | |||||
| bin_PROGRAMS += src/espeak-ng | bin_PROGRAMS += src/espeak-ng | ||||
| tests_ssml_fuzzer_test_SOURCES = tests/ssml-fuzzer.c | tests_ssml_fuzzer_test_SOURCES = tests/ssml-fuzzer.c | ||||
| tests_ssml_fuzzer_test_LDADD = src/libespeak-ng.la | tests_ssml_fuzzer_test_LDADD = src/libespeak-ng.la | ||||
| if OPT_SPEECHPLAYER | |||||
| tests_api_test_SOURCES += tests/dummy.cpp | |||||
| tests_encoding_test_SOURCES += tests/dummy.cpp | |||||
| tests_readclause_test_SOURCES += tests/dummy.cpp | |||||
| tests_ssml_fuzzer_test_SOURCES += tests/dummy.cpp | |||||
| endif | |||||
| if HAVE_LIBFUZZER | if HAVE_LIBFUZZER | ||||
| tests_ssml_fuzzer_test_CFLAGS += -fsanitize=fuzzer | tests_ssml_fuzzer_test_CFLAGS += -fsanitize=fuzzer | ||||
| tests_ssml_fuzzer_test_LDFLAGS = -fsanitize=fuzzer | tests_ssml_fuzzer_test_LDFLAGS = -fsanitize=fuzzer |
| [AS_HELP_STRING([--with-klatt], [enable the Klatt formant synthesizer @<:@default=yes@:>@])], | [AS_HELP_STRING([--with-klatt], [enable the Klatt formant synthesizer @<:@default=yes@:>@])], | ||||
| []) | []) | ||||
| AC_ARG_WITH([speechplayer], | |||||
| [AS_HELP_STRING([--with-speechplayer], [enable the speechPlayer Klatt implementation @<:@default=yes@:>@])], | |||||
| []) | |||||
| AC_ARG_WITH([mbrola], | AC_ARG_WITH([mbrola], | ||||
| [AS_HELP_STRING([--with-mbrola], [enable the MBROLA speech synthesizer @<:@default=yes@:>@])], | [AS_HELP_STRING([--with-mbrola], [enable the MBROLA speech synthesizer @<:@default=yes@:>@])], | ||||
| []) | []) | ||||
| dnl ================================================================ | dnl ================================================================ | ||||
| AC_PROG_CC | AC_PROG_CC | ||||
| AC_PROG_CXX | |||||
| AC_PROG_MAKE_SET | AC_PROG_MAKE_SET | ||||
| AC_PROG_LIBTOOL | AC_PROG_LIBTOOL | ||||
| AC_PROG_LN_S | AC_PROG_LN_S | ||||
| have_klatt=yes | have_klatt=yes | ||||
| fi | fi | ||||
| if test "$with_speechplayer" = "no" ; then | |||||
| have_speechplayer=no | |||||
| else | |||||
| have_speechplayer=yes | |||||
| fi | |||||
| if test "$with_mbrola" = "no" ; then | if test "$with_mbrola" = "no" ; then | ||||
| have_mbrola=no | have_mbrola=no | ||||
| else | else | ||||
| fi | fi | ||||
| AM_CONDITIONAL(OPT_KLATT, [test x"$have_klatt" = xyes]) | AM_CONDITIONAL(OPT_KLATT, [test x"$have_klatt" = xyes]) | ||||
| AM_CONDITIONAL(OPT_SPEECHPLAYER, [test x"$have_speechplayer" = xyes]) | |||||
| AM_CONDITIONAL(OPT_MBROLA, [test x"$have_mbrola" = xyes]) | AM_CONDITIONAL(OPT_MBROLA, [test x"$have_mbrola" = xyes]) | ||||
| AM_CONDITIONAL(OPT_ASYNC, [test x"$have_async" = xyes]) | AM_CONDITIONAL(OPT_ASYNC, [test x"$have_async" = xyes]) | ||||
| ndk-build (Android): ${NDKBUILD_CHECK} | ndk-build (Android): ${NDKBUILD_CHECK} | ||||
| Klatt: ${have_klatt} | Klatt: ${have_klatt} | ||||
| speechPlayer: ${have_speechplayer} | |||||
| MBROLA: ${have_mbrola} | MBROLA: ${have_mbrola} | ||||
| Async: ${have_async} | Async: ${have_async} | ||||
| 1. a functional autotools system (`make`, `autoconf`, `automake`, `libtool` | 1. a functional autotools system (`make`, `autoconf`, `automake`, `libtool` | ||||
| and `pkg-config`); | and `pkg-config`); | ||||
| 2. a functional c compiler that supports C99 (e.g. gcc or clang). | |||||
| 2. a functional c compiler that supports C99 (e.g. gcc or clang). Note: if building with speechPlayer, a C++ compiler is required. | |||||
| Optionally, you need: | Optionally, you need: | ||||
| 1. the [pcaudiolib](https://github.com/espeak-ng/pcaudiolib) development library | 1. the [pcaudiolib](https://github.com/espeak-ng/pcaudiolib) development library | ||||
| to enable audio output; | to enable audio output; | ||||
| 2. the speechPlayer development library to | |||||
| enable the speechPlayer Klatt implementation; | |||||
| 3. the [sonic](https://github.com/espeak-ng/sonic) development library to | 3. the [sonic](https://github.com/espeak-ng/sonic) development library to | ||||
| enable sonic audio speed up support; | enable sonic audio speed up support; | ||||
| 4. the `ronn` man-page markdown processor to build the man pages. | 4. the `ronn` man-page markdown processor to build the man pages. | ||||
| | Option | Description | Default | | | Option | Description | Default | | ||||
| |-----------------|----------------------------------------------|---------| | |-----------------|----------------------------------------------|---------| | ||||
| | `--with-klatt` | Enable Klatt formant synthesis. | yes | | | `--with-klatt` | Enable Klatt formant synthesis. | yes | | ||||
| | `--with-speechplayer` | Enable the speechPlayer Klatt implementation. | yes | | |||||
| | `--with-mbrola` | Enable MBROLA voice support. | yes | | | `--with-mbrola` | Enable MBROLA voice support. | yes | | ||||
| | `--with-sonic` | Use the sonic library to support higher WPM. | yes | | | `--with-sonic` | Use the sonic library to support higher WPM. | yes | | ||||
| | `--with-async` | Enable asynchronous commands. | yes | | | `--with-async` | Enable asynchronous commands. | yes | |
| language variant | |||||
| name Edward2 | |||||
| klatt 6 | |||||
| voicing 100 | |||||
| consonants 70 80 | |||||
| formant 1 92 100 130 | |||||
| formant 2 103 100 80 | |||||
| formant 3 103 100 70 | |||||
| formant 4 114 100 60 |
| language variant | |||||
| name klatt6 | |||||
| klatt 6 | |||||
| #include "klatt.h" | #include "klatt.h" | ||||
| #include "synthesize.h" // for frame_t, WGEN_DATA, STEPSIZE, N_KLATTP, echo... | #include "synthesize.h" // for frame_t, WGEN_DATA, STEPSIZE, N_KLATTP, echo... | ||||
| #include "voice.h" // for voice_t, N_PEAKS | #include "voice.h" // for voice_t, N_PEAKS | ||||
| #ifdef INCLUDE_SPEECHPLAYER | |||||
| #include "sPlayer.h" | |||||
| #endif | |||||
| extern unsigned char *out_ptr; | extern unsigned char *out_ptr; | ||||
| extern unsigned char *out_end; | extern unsigned char *out_end; | ||||
| { | { | ||||
| int r_ix; | int r_ix; | ||||
| #ifdef INCLUDE_SPEECHPLAYER | |||||
| KlattResetSP(); | |||||
| #endif | |||||
| if (control == 2) { | if (control == 2) { | ||||
| // Full reset | // Full reset | ||||
| kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000; | kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000; | ||||
| int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | ||||
| { | { | ||||
| #ifdef INCLUDE_SPEECHPLAYER | |||||
| if(wvoice->klattv[0] == 6) | |||||
| return Wavegen_KlattSP(wdata, wvoice, length, resume, fr1, fr2); | |||||
| #endif | |||||
| if (resume == 0) | if (resume == 0) | ||||
| SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | ||||
| int ix; | int ix; | ||||
| #ifdef INCLUDE_SPEECHPLAYER | |||||
| KlattInitSP(); | |||||
| #endif | |||||
| sample_count = 0; | sample_count = 0; | ||||
| kt_globals.synthesis_model = CASCADE_PARALLEL; | kt_globals.synthesis_model = CASCADE_PARALLEL; |
| #include <espeak-ng/espeak_ng.h> | |||||
| #include <espeak-ng/speak_lib.h> | |||||
| #include "sPlayer.h" | |||||
| extern unsigned char *out_ptr; | |||||
| extern unsigned char *out_end; | |||||
| static speechPlayer_handle_t speechPlayerHandle=NULL; | |||||
| static const unsigned int minFadeLength=110; | |||||
| static int MAX(int a, int b) { return((a) > (b) ? a : b); } | |||||
| static int MIN(int a, int b) { return((a) < (b) ? a : b); } | |||||
| static bool needsMixWaveFile(WGEN_DATA *wdata) { | |||||
| return (bool)wdata->n_mix_wavefile; | |||||
| } | |||||
| // mixes the currently queued espeak consonant wave file into the existing content in the given sample buffer. | |||||
| // This would be used for voiced consonants where the voiced part is generated by speechPlayer, but the consonant comes from a wave file in eSpeak. | |||||
| // e.g. z, v. | |||||
| // @param maxNumSamples the maximum number of samples that can be mixed into the sample buffer. | |||||
| // @param sampleBuf the buffer of existing samples. | |||||
| static void mixWaveFile(WGEN_DATA *wdata, unsigned int maxNumSamples, sample* sampleBuf) { | |||||
| unsigned int i=0; | |||||
| for(;wdata->mix_wavefile_ix<wdata->n_mix_wavefile;++wdata->mix_wavefile_ix) { | |||||
| if(i>=maxNumSamples) break; | |||||
| int val; | |||||
| if(wdata->mix_wave_scale==0) { | |||||
| val=wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]; | |||||
| ++(wdata->mix_wavefile_ix); | |||||
| signed char c=wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]; | |||||
| val+=(c*256); | |||||
| } else { | |||||
| val=(signed char)wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]*wdata->mix_wave_scale; | |||||
| } | |||||
| val*=(wdata->amplitude_v/1024.0); | |||||
| val=(val*wdata->mix_wave_amp)/40; | |||||
| sampleBuf[i].value+=val; | |||||
| if((wdata->mix_wavefile_ix+wdata->mix_wavefile_offset)>=wdata->mix_wavefile_max) { | |||||
| wdata->mix_wavefile_offset-=(wdata->mix_wavefile_max*3)/4; | |||||
| } | |||||
| ++i; | |||||
| } | |||||
| } | |||||
| static bool isKlattFrameFollowing() { | |||||
| // eSpeak implements its command queue with a circular buffer. | |||||
| // Thus to walk it, we start from the head, walking to the tail, which may wrap around to the beginning of the buffer as it is circular. | |||||
| for(int i=(wcmdq_head+1)%N_WCMDQ;i!=wcmdq_tail;i=(i+1)%N_WCMDQ) { | |||||
| int cmd=wcmdq[i][0]; | |||||
| if(cmd==WCMD_PAUSE||cmd==WCMD_WAVE) { | |||||
| break; | |||||
| } | |||||
| if(cmd==WCMD_KLATT) { | |||||
| return true; | |||||
| } | |||||
| } | |||||
| return false; | |||||
| } | |||||
| static void fillSpeechPlayerFrame(WGEN_DATA *wdata, voice_t *wvoice, frame_t * eFrame, speechPlayer_frame_t* spFrame) { | |||||
| // eSpeak stores pitch in 4096ths of a hz. Specifically comments in voice.h mentions pitch<<12. | |||||
| // SpeechPlayer deals with floating point values of hz. | |||||
| spFrame->voicePitch=(wdata->pitch)/4096.0; | |||||
| // eSpeak stores voicing amplitude with 64 representing 100% according to comments in voice.h. | |||||
| // speechPlayer uses floating point value of 1 as 100%. | |||||
| spFrame->voiceAmplitude=(wvoice->voicing)/64.0; | |||||
| // All of eSpeak's relative formant frequency ratio values are stored with 256 representing 100% according to comments in voice.h. | |||||
| spFrame->cf1=(eFrame->ffreq[1]*wvoice->freq[1]/256.0)+wvoice->freqadd[1]; | |||||
| spFrame->cf2=(eFrame->ffreq[2]*wvoice->freq[2]/256.0)+wvoice->freqadd[2]; | |||||
| spFrame->cf3=(eFrame->ffreq[3]*wvoice->freq[3]/256.0)+wvoice->freqadd[3]; | |||||
| spFrame->cf4=(eFrame->ffreq[4]*wvoice->freq[4]/256.0)+wvoice->freqadd[4]; | |||||
| spFrame->cf5=(eFrame->ffreq[5]*wvoice->freq[5]/256.0)+wvoice->freqadd[5]; | |||||
| spFrame->cf6=(eFrame->ffreq[6]*wvoice->freq[6]/256.0)+wvoice->freqadd[6]; | |||||
| spFrame->cfNP=200; | |||||
| spFrame->cfN0=250; | |||||
| if(eFrame->klattp[KLATT_FNZ]>0) { | |||||
| spFrame->caNP=1; | |||||
| spFrame->cfN0=eFrame->klattp[KLATT_FNZ]*2; | |||||
| } else { | |||||
| spFrame->caNP=0; | |||||
| } | |||||
| spFrame->cb1=eFrame->bw[1]*2*(wvoice->width[1]/256.0); | |||||
| spFrame->cb2=eFrame->bw[2]*2*(wvoice->width[2]/256.0); | |||||
| spFrame->cb3=eFrame->bw[3]*2*(wvoice->width[3]/256.0); | |||||
| spFrame->cb4=eFrame->bw[4]*2*(wvoice->width[4]/256.0); | |||||
| spFrame->cb5=1000; | |||||
| spFrame->cb6=1000; | |||||
| spFrame->cbNP=100; | |||||
| spFrame->cbN0=100; | |||||
| spFrame->preFormantGain=1; | |||||
| spFrame->outputGain=3*(wdata->amplitude/100.0); | |||||
| spFrame->endVoicePitch=spFrame->voicePitch; | |||||
| } | |||||
| void KlattInitSP() { | |||||
| speechPlayerHandle=speechPlayer_initialize(22050); | |||||
| } | |||||
| void KlattResetSP() { | |||||
| speechPlayer_terminate(speechPlayerHandle); | |||||
| speechPlayerHandle=speechPlayer_initialize(22050); | |||||
| } | |||||
| int Wavegen_KlattSP(WGEN_DATA *wdata, voice_t *wvoice, int length, int resume, frame_t *fr1, frame_t *fr2){ | |||||
| if(!resume) { | |||||
| speechPlayer_frame_t spFrame1={0}; | |||||
| fillSpeechPlayerFrame(wdata, wvoice, fr1,&spFrame1); | |||||
| speechPlayer_frame_t spFrame2={0}; | |||||
| fillSpeechPlayerFrame(wdata, wvoice, fr2,&spFrame2); | |||||
| wdata->pitch_ix+=(wdata->pitch_inc*(length/STEPSIZE)); | |||||
| wdata->pitch=((wdata->pitch_env[MIN(wdata->pitch_ix>>8,127)]*wdata->pitch_range)>>8)+wdata->pitch_base; | |||||
| spFrame2.endVoicePitch=wdata->pitch/4096; | |||||
| bool willMixWaveFile=needsMixWaveFile(wdata); | |||||
| if(willMixWaveFile) { | |||||
| spFrame1.outputGain/=5; | |||||
| spFrame2.outputGain/=5; | |||||
| } | |||||
| int mainLength=length; | |||||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame1,minFadeLength,minFadeLength,-1,false); | |||||
| mainLength-=minFadeLength; | |||||
| bool fadeOut=!isKlattFrameFollowing(); | |||||
| if(fadeOut) { | |||||
| mainLength-=minFadeLength; | |||||
| } | |||||
| if(mainLength>=1) { | |||||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,mainLength,mainLength,-1,false); | |||||
| } | |||||
| if(fadeOut) { | |||||
| spFrame2.voicePitch=spFrame2.endVoicePitch; | |||||
| spFrame2.preFormantGain=0; | |||||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,minFadeLength/2,minFadeLength/2,-1,false); | |||||
| spFrame2.outputGain=0; | |||||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,minFadeLength/2,minFadeLength/2,-1,false); | |||||
| } | |||||
| } | |||||
| unsigned int maxLength=(out_end-out_ptr)/sizeof(sample); | |||||
| unsigned int outLength=speechPlayer_synthesize(speechPlayerHandle,maxLength,(sample*)out_ptr); | |||||
| mixWaveFile(wdata, outLength,(sample*)out_ptr); | |||||
| out_ptr=out_ptr+(sizeof(sample)*outLength); | |||||
| if(out_ptr>=out_end) return 1; | |||||
| return 0; | |||||
| } |
| #ifndef ESPEAK_NG_SPLAYER_H | |||||
| #define ESPEAK_NG_SPLAYER_H | |||||
| #include "synthesize.h" | |||||
| #include "voice.h" | |||||
| #include <speechPlayer.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| void KlattInitSP(); | |||||
| void KlattResetSP(); | |||||
| int Wavegen_KlattSP(WGEN_DATA *wdata, voice_t *wvoice, int length, int resume, frame_t *fr1, frame_t *fr2); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif |
| #include "speak-ng.c" |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYER_H | |||||
| #define SPEECHPLAYER_H | |||||
| #include <stdbool.h> | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| typedef double speechPlayer_frameParam_t; | |||||
| typedef struct { | |||||
| // voicing and cascaide | |||||
| speechPlayer_frameParam_t voicePitch; // fundermental frequency of voice (phonation) in hz | |||||
| speechPlayer_frameParam_t vibratoPitchOffset; // pitch is offset up or down in fraction of a semitone | |||||
| speechPlayer_frameParam_t vibratoSpeed; // Speed of vibrato in hz | |||||
| speechPlayer_frameParam_t voiceTurbulenceAmplitude; // amplitude of voice breathiness from 0 to 1 | |||||
| speechPlayer_frameParam_t glottalOpenQuotient; // fraction between 0 and 1 of a voice cycle that the glottis is open (allows voice turbulance, alters f1...) | |||||
| speechPlayer_frameParam_t voiceAmplitude; // amplitude of voice (phonation) source between 0 and 1. | |||||
| speechPlayer_frameParam_t aspirationAmplitude; // amplitude of aspiration (voiceless h, whisper) source between 0 and 1. | |||||
| speechPlayer_frameParam_t cf1, cf2, cf3, cf4, cf5, cf6, cfN0, cfNP; // frequencies of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
| speechPlayer_frameParam_t cb1, cb2, cb3, cb4, cb5, cb6, cbN0, cbNP; // bandwidths of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
| speechPlayer_frameParam_t caNP; // amplitude from 0 to 1 of cascade nasal pole formant | |||||
| // fricatives and parallel | |||||
| speechPlayer_frameParam_t fricationAmplitude; // amplitude of frication noise from 0 to 1. | |||||
| speechPlayer_frameParam_t pf1, pf2, pf3, pf4, pf5, pf6; // parallel formants in hz | |||||
| speechPlayer_frameParam_t pb1, pb2, pb3, pb4, pb5, pb6; // parallel formant bandwidths in hz | |||||
| speechPlayer_frameParam_t pa1, pa2, pa3, pa4, pa5, pa6; // amplitude of parallel formants between 0 and 1 | |||||
| speechPlayer_frameParam_t parallelBypass; // amount of signal which should bypass parallel resonators from 0 to 1 | |||||
| speechPlayer_frameParam_t preFormantGain; // amplitude from 0 to 1 of all vocal tract sound (voicing, frication) before entering formant resonators. Useful for stopping/starting speech | |||||
| speechPlayer_frameParam_t outputGain; // amplitude from 0 to 1 of final output (master volume) | |||||
| speechPlayer_frameParam_t endVoicePitch; // pitch of voice at the end of the frame length | |||||
| } speechPlayer_frame_t; | |||||
| typedef short sampleVal; | |||||
| typedef struct { | |||||
| sampleVal value; | |||||
| } sample; | |||||
| typedef void* speechPlayer_handle_t; | |||||
| speechPlayer_handle_t speechPlayer_initialize(int sampleRate); | |||||
| void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue); | |||||
| int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf); | |||||
| int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle); | |||||
| void speechPlayer_terminate(speechPlayer_handle_t playerHandle); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #include <cstring> | |||||
| #include <queue> | |||||
| #include "utils.h" | |||||
| #include "frame.h" | |||||
| using namespace std; | |||||
| struct frameRequest_t { | |||||
| unsigned int minNumSamples; | |||||
| unsigned int numFadeSamples; | |||||
| bool NULLFrame; | |||||
| speechPlayer_frame_t frame; | |||||
| double voicePitchInc; | |||||
| int userIndex; | |||||
| }; | |||||
| class FrameManagerImpl: public FrameManager { | |||||
| private: | |||||
| queue<frameRequest_t*> frameRequestQueue; | |||||
| frameRequest_t* oldFrameRequest; | |||||
| frameRequest_t* newFrameRequest; | |||||
| speechPlayer_frame_t curFrame; | |||||
| bool curFrameIsNULL; | |||||
| unsigned int sampleCounter; | |||||
| int lastUserIndex; | |||||
| void updateCurrentFrame() { | |||||
| sampleCounter++; | |||||
| if(newFrameRequest) { | |||||
| if(sampleCounter>(newFrameRequest->numFadeSamples)) { | |||||
| delete oldFrameRequest; | |||||
| oldFrameRequest=newFrameRequest; | |||||
| newFrameRequest=NULL; | |||||
| } else { | |||||
| double curFadeRatio=(double)sampleCounter/(newFrameRequest->numFadeSamples); | |||||
| for(int i=0;i<speechPlayer_frame_numParams;++i) { | |||||
| ((speechPlayer_frameParam_t*)&curFrame)[i]=calculateValueAtFadePosition(((speechPlayer_frameParam_t*)&(oldFrameRequest->frame))[i],((speechPlayer_frameParam_t*)&(newFrameRequest->frame))[i],curFadeRatio); | |||||
| } | |||||
| } | |||||
| } else if(sampleCounter>(oldFrameRequest->minNumSamples)) { | |||||
| if(!frameRequestQueue.empty()) { | |||||
| curFrameIsNULL=false; | |||||
| newFrameRequest=frameRequestQueue.front(); | |||||
| frameRequestQueue.pop(); | |||||
| if(newFrameRequest->NULLFrame) { | |||||
| memcpy(&(newFrameRequest->frame),&(oldFrameRequest->frame),sizeof(speechPlayer_frame_t)); | |||||
| newFrameRequest->frame.preFormantGain=0; | |||||
| newFrameRequest->frame.voicePitch=curFrame.voicePitch; | |||||
| newFrameRequest->voicePitchInc=0; | |||||
| } else if(oldFrameRequest->NULLFrame) { | |||||
| memcpy(&(oldFrameRequest->frame),&(newFrameRequest->frame),sizeof(speechPlayer_frame_t)); | |||||
| oldFrameRequest->frame.preFormantGain=0; | |||||
| } | |||||
| if(newFrameRequest) { | |||||
| if(newFrameRequest->userIndex!=-1) lastUserIndex=newFrameRequest->userIndex; | |||||
| sampleCounter=0; | |||||
| newFrameRequest->frame.voicePitch+=(newFrameRequest->voicePitchInc*newFrameRequest->numFadeSamples); | |||||
| } | |||||
| } else { | |||||
| curFrameIsNULL=true; | |||||
| } | |||||
| } else { | |||||
| curFrame.voicePitch+=oldFrameRequest->voicePitchInc; | |||||
| oldFrameRequest->frame.voicePitch=curFrame.voicePitch; | |||||
| } | |||||
| } | |||||
| public: | |||||
| FrameManagerImpl(): curFrame(), curFrameIsNULL(true), sampleCounter(0), newFrameRequest(NULL), lastUserIndex(-1) { | |||||
| oldFrameRequest=new frameRequest_t(); | |||||
| oldFrameRequest->NULLFrame=true; | |||||
| } | |||||
| void queueFrame(speechPlayer_frame_t* frame, unsigned int minNumSamples, unsigned int numFadeSamples, int userIndex, bool purgeQueue) { | |||||
| frameRequest_t* frameRequest=new frameRequest_t; | |||||
| frameRequest->minNumSamples=minNumSamples; //max(minNumSamples,1); | |||||
| frameRequest->numFadeSamples=numFadeSamples; //max(numFadeSamples,1); | |||||
| if(frame) { | |||||
| frameRequest->NULLFrame=false; | |||||
| memcpy(&(frameRequest->frame),frame,sizeof(speechPlayer_frame_t)); | |||||
| frameRequest->voicePitchInc=(frame->endVoicePitch-frame->voicePitch)/frameRequest->minNumSamples; | |||||
| } else { | |||||
| frameRequest->NULLFrame=true; | |||||
| } | |||||
| frameRequest->userIndex=userIndex; | |||||
| if(purgeQueue) { | |||||
| for(;!frameRequestQueue.empty();frameRequestQueue.pop()) delete frameRequestQueue.front(); | |||||
| sampleCounter=oldFrameRequest->minNumSamples; | |||||
| if(newFrameRequest) { | |||||
| oldFrameRequest->NULLFrame=newFrameRequest->NULLFrame; | |||||
| memcpy(&(oldFrameRequest->frame),&curFrame,sizeof(speechPlayer_frame_t)); | |||||
| delete newFrameRequest; | |||||
| newFrameRequest=NULL; | |||||
| } | |||||
| } | |||||
| frameRequestQueue.push(frameRequest); | |||||
| } | |||||
| const int getLastIndex() { | |||||
| return lastUserIndex; | |||||
| } | |||||
| const speechPlayer_frame_t* const getCurrentFrame() { | |||||
| updateCurrentFrame(); | |||||
| return curFrameIsNULL?NULL:&curFrame; | |||||
| } | |||||
| ~FrameManagerImpl() { | |||||
| if(oldFrameRequest) delete oldFrameRequest; | |||||
| if(newFrameRequest) delete newFrameRequest; | |||||
| } | |||||
| }; | |||||
| FrameManager* FrameManager::create() { return new FrameManagerImpl(); } |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYER_FRAME_H | |||||
| #define SPEECHPLAYER_FRAME_H | |||||
| #include "utils.h" | |||||
| typedef double speechPlayer_frameParam_t; | |||||
| typedef struct { | |||||
| // voicing and cascaide | |||||
| speechPlayer_frameParam_t voicePitch; // fundermental frequency of voice (phonation) in hz | |||||
| speechPlayer_frameParam_t vibratoPitchOffset; // pitch is offset up or down in fraction of a semitone | |||||
| speechPlayer_frameParam_t vibratoSpeed; // Speed of vibrato in hz | |||||
| speechPlayer_frameParam_t voiceTurbulenceAmplitude; // amplitude of voice breathiness from 0 to 1 | |||||
| speechPlayer_frameParam_t glottalOpenQuotient; // fraction between 0 and 1 of a voice cycle that the glottis is open (allows voice turbulance, alters f1...) | |||||
| speechPlayer_frameParam_t voiceAmplitude; // amplitude of voice (phonation) source between 0 and 1. | |||||
| speechPlayer_frameParam_t aspirationAmplitude; // amplitude of aspiration (voiceless h, whisper) source between 0 and 1. | |||||
| speechPlayer_frameParam_t cf1, cf2, cf3, cf4, cf5, cf6, cfN0, cfNP; // frequencies of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
| speechPlayer_frameParam_t cb1, cb2, cb3, cb4, cb5, cb6, cbN0, cbNP; // bandwidths of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||||
| speechPlayer_frameParam_t caNP; // amplitude from 0 to 1 of cascade nasal pole formant | |||||
| // fricatives and parallel | |||||
| speechPlayer_frameParam_t fricationAmplitude; // amplitude of frication noise from 0 to 1. | |||||
| speechPlayer_frameParam_t pf1, pf2, pf3, pf4, pf5, pf6; // parallel formants in hz | |||||
| speechPlayer_frameParam_t pb1, pb2, pb3, pb4, pb5, pb6; // parallel formant bandwidths in hz | |||||
| speechPlayer_frameParam_t pa1, pa2, pa3, pa4, pa5, pa6; // amplitude of parallel formants between 0 and 1 | |||||
| speechPlayer_frameParam_t parallelBypass; // amount of signal which should bypass parallel resonators from 0 to 1 | |||||
| speechPlayer_frameParam_t preFormantGain; // amplitude from 0 to 1 of all vocal tract sound (voicing, frication) before entering formant resonators. Useful for stopping/starting speech | |||||
| speechPlayer_frameParam_t outputGain; // amplitude from 0 to 1 of final output (master volume) | |||||
| speechPlayer_frameParam_t endVoicePitch; // pitch of voice at the end of the frame length | |||||
| } speechPlayer_frame_t; | |||||
| const int speechPlayer_frame_numParams=sizeof(speechPlayer_frame_t)/sizeof(speechPlayer_frameParam_t); | |||||
| class FrameManager { | |||||
| public: | |||||
| static FrameManager* create(); //factory function | |||||
| virtual void queueFrame(speechPlayer_frame_t* frame, unsigned int minNumSamples, unsigned int numFadeSamples, int userIndex, bool purgeQueue)=0; | |||||
| virtual const speechPlayer_frame_t* const getCurrentFrame()=0; | |||||
| virtual const int getLastIndex()=0; | |||||
| }; | |||||
| #endif |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYER_SAMPLE_H | |||||
| #define SPEECHPLAYER_SAMPLE_H | |||||
| typedef short sampleVal; | |||||
| typedef struct { | |||||
| sampleVal value; | |||||
| } sample; | |||||
| #endif |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #include "frame.h" | |||||
| #include "speechWaveGenerator.h" | |||||
| #include "speechPlayer.h" | |||||
| typedef struct { | |||||
| int sampleRate; | |||||
| FrameManager* frameManager; | |||||
| SpeechWaveGenerator* waveGenerator; | |||||
| } speechPlayer_handleInfo_t; | |||||
| speechPlayer_handle_t speechPlayer_initialize(int sampleRate) { | |||||
| speechPlayer_handleInfo_t* playerHandleInfo=new speechPlayer_handleInfo_t; | |||||
| playerHandleInfo->sampleRate=sampleRate; | |||||
| playerHandleInfo->frameManager=FrameManager::create(); | |||||
| playerHandleInfo->waveGenerator=SpeechWaveGenerator::create(sampleRate); | |||||
| playerHandleInfo->waveGenerator->setFrameManager(playerHandleInfo->frameManager); | |||||
| return (speechPlayer_handle_t)playerHandleInfo; | |||||
| } | |||||
| void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue) { | |||||
| speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||||
| if (fadeDuration < 1) fadeDuration = 1; | |||||
| playerHandleInfo->frameManager->queueFrame(framePtr,minFrameDuration,fadeDuration,userIndex,purgeQueue); | |||||
| } | |||||
| int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf) { | |||||
| return ((speechPlayer_handleInfo_t*)playerHandle)->waveGenerator->generate(sampleCount,sampleBuf); | |||||
| } | |||||
| int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle) { | |||||
| speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||||
| return playerHandleInfo->frameManager->getLastIndex(); | |||||
| } | |||||
| void speechPlayer_terminate(speechPlayer_handle_t playerHandle) { | |||||
| speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||||
| delete playerHandleInfo->waveGenerator; | |||||
| delete playerHandleInfo->frameManager; | |||||
| delete playerHandleInfo; | |||||
| } | |||||
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYER_H | |||||
| #define SPEECHPLAYER_H | |||||
| #ifdef __cplusplus | |||||
| extern "C" { | |||||
| #endif | |||||
| #include "frame.h" | |||||
| #include "sample.h" | |||||
| typedef void* speechPlayer_handle_t; | |||||
| speechPlayer_handle_t speechPlayer_initialize(int sampleRate); | |||||
| void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue); | |||||
| int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf); | |||||
| int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle); | |||||
| void speechPlayer_terminate(speechPlayer_handle_t playerHandle); | |||||
| #ifdef __cplusplus | |||||
| } | |||||
| #endif | |||||
| #endif |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| /* | |||||
| Based on klsyn-88, found at http://linguistics.berkeley.edu/phonlab/resources/ | |||||
| */ | |||||
| #define _USE_MATH_DEFINES | |||||
| #include <cassert> | |||||
| #include <cmath> | |||||
| #include <cstdlib> | |||||
| #include "utils.h" | |||||
| #include "speechWaveGenerator.h" | |||||
| using namespace std; | |||||
| const double PITWO=M_PI*2; | |||||
| class NoiseGenerator { | |||||
| private: | |||||
| double lastValue; | |||||
| public: | |||||
| NoiseGenerator(): lastValue(0.0) {}; | |||||
| double getNext() { | |||||
| lastValue=((double)rand()/RAND_MAX)+0.75*lastValue; | |||||
| return lastValue; | |||||
| } | |||||
| }; | |||||
| class FrequencyGenerator { | |||||
| private: | |||||
| int sampleRate; | |||||
| double lastCyclePos; | |||||
| public: | |||||
| FrequencyGenerator(int sr): sampleRate(sr), lastCyclePos(0) {} | |||||
| double getNext(double frequency) { | |||||
| double cyclePos=fmod((frequency/sampleRate)+lastCyclePos,1); | |||||
| lastCyclePos=cyclePos; | |||||
| return cyclePos; | |||||
| } | |||||
| }; | |||||
| class VoiceGenerator { | |||||
| private: | |||||
| FrequencyGenerator pitchGen; | |||||
| FrequencyGenerator vibratoGen; | |||||
| NoiseGenerator aspirationGen; | |||||
| public: | |||||
| bool glottisOpen; | |||||
| VoiceGenerator(int sr): pitchGen(sr), vibratoGen(sr), aspirationGen(), glottisOpen(false) {}; | |||||
| double getNext(const speechPlayer_frame_t* frame) { | |||||
| double vibrato=(sin(vibratoGen.getNext(frame->vibratoSpeed)*PITWO)*0.06*frame->vibratoPitchOffset)+1; | |||||
| double voice=pitchGen.getNext(frame->voicePitch*vibrato); | |||||
| double aspiration=aspirationGen.getNext()*0.2; | |||||
| double turbulence=aspiration*frame->voiceTurbulenceAmplitude; | |||||
| glottisOpen=voice>=frame->glottalOpenQuotient; | |||||
| if(!glottisOpen) { | |||||
| turbulence*=0.01; | |||||
| } | |||||
| voice=(voice*2)-1; | |||||
| voice+=turbulence; | |||||
| voice*=frame->voiceAmplitude; | |||||
| aspiration*=frame->aspirationAmplitude; | |||||
| return aspiration+voice; | |||||
| } | |||||
| }; | |||||
| class Resonator { | |||||
| private: | |||||
| //raw parameters | |||||
| int sampleRate; | |||||
| double frequency; | |||||
| double bandwidth; | |||||
| bool anti; | |||||
| //calculated parameters | |||||
| bool setOnce; | |||||
| double a, b, c; | |||||
| //Memory | |||||
| double p1, p2; | |||||
| public: | |||||
| Resonator(int sampleRate, bool anti=false) { | |||||
| this->sampleRate=sampleRate; | |||||
| this->anti=anti; | |||||
| this->setOnce=false; | |||||
| this->p1=0; | |||||
| this->p2=0; | |||||
| } | |||||
| void setParams(double frequency, double bandwidth) { | |||||
| if(!setOnce||(frequency!=this->frequency)||(bandwidth!=this->bandwidth)) { | |||||
| this->frequency=frequency; | |||||
| this->bandwidth=bandwidth; | |||||
| double r=exp(-M_PI/sampleRate*bandwidth); | |||||
| c=-(r*r); | |||||
| b=r*cos(PITWO/sampleRate*-frequency)*2.0; | |||||
| a=1.0-b-c; | |||||
| if(anti&&frequency!=0) { | |||||
| a=1.0/a; | |||||
| c*=-a; | |||||
| b*=-a; | |||||
| } | |||||
| } | |||||
| this->setOnce=true; | |||||
| } | |||||
| double resonate(double in, double frequency, double bandwidth) { | |||||
| setParams(frequency,bandwidth); | |||||
| double out=a*in+b*p1+c*p2; | |||||
| p2=p1; | |||||
| p1=anti?in:out; | |||||
| return out; | |||||
| } | |||||
| }; | |||||
| class CascadeFormantGenerator { | |||||
| private: | |||||
| int sampleRate; | |||||
| Resonator r1, r2, r3, r4, r5, r6, rN0, rNP; | |||||
| public: | |||||
| CascadeFormantGenerator(int sr): sampleRate(sr), r1(sr), r2(sr), r3(sr), r4(sr), r5(sr), r6(sr), rN0(sr,true), rNP(sr) {}; | |||||
| double getNext(const speechPlayer_frame_t* frame, bool glottisOpen, double input) { | |||||
| input/=2.0; | |||||
| double n0Output=rN0.resonate(input,frame->cfN0,frame->cbN0); | |||||
| double output=calculateValueAtFadePosition(input,rNP.resonate(n0Output,frame->cfNP,frame->cbNP),frame->caNP); | |||||
| output=r6.resonate(output,frame->cf6,frame->cb6); | |||||
| output=r5.resonate(output,frame->cf5,frame->cb5); | |||||
| output=r4.resonate(output,frame->cf4,frame->cb4); | |||||
| output=r3.resonate(output,frame->cf3,frame->cb3); | |||||
| output=r2.resonate(output,frame->cf2,frame->cb2); | |||||
| output=r1.resonate(output,frame->cf1,frame->cb1); | |||||
| return output; | |||||
| } | |||||
| }; | |||||
| class ParallelFormantGenerator { | |||||
| private: | |||||
| int sampleRate; | |||||
| Resonator r1, r2, r3, r4, r5, r6; | |||||
| public: | |||||
| ParallelFormantGenerator(int sr): sampleRate(sr), r1(sr), r2(sr), r3(sr), r4(sr), r5(sr), r6(sr) {}; | |||||
| double getNext(const speechPlayer_frame_t* frame, double input) { | |||||
| input/=2.0; | |||||
| double output=0; | |||||
| output+=(r1.resonate(input,frame->pf1,frame->pb1)-input)*frame->pa1; | |||||
| output+=(r2.resonate(input,frame->pf2,frame->pb2)-input)*frame->pa2; | |||||
| output+=(r3.resonate(input,frame->pf3,frame->pb3)-input)*frame->pa3; | |||||
| output+=(r4.resonate(input,frame->pf4,frame->pb4)-input)*frame->pa4; | |||||
| output+=(r5.resonate(input,frame->pf5,frame->pb5)-input)*frame->pa5; | |||||
| output+=(r6.resonate(input,frame->pf6,frame->pb6)-input)*frame->pa6; | |||||
| return calculateValueAtFadePosition(output,input,frame->parallelBypass); | |||||
| } | |||||
| }; | |||||
| class SpeechWaveGeneratorImpl: public SpeechWaveGenerator { | |||||
| private: | |||||
| int sampleRate; | |||||
| VoiceGenerator voiceGenerator; | |||||
| NoiseGenerator fricGenerator; | |||||
| CascadeFormantGenerator cascade; | |||||
| ParallelFormantGenerator parallel; | |||||
| FrameManager* frameManager; | |||||
| public: | |||||
| SpeechWaveGeneratorImpl(int sr): sampleRate(sr), voiceGenerator(sr), fricGenerator(), cascade(sr), parallel(sr), frameManager(NULL) { | |||||
| } | |||||
| unsigned int generate(const unsigned int sampleCount, sample* sampleBuf) { | |||||
| if(!frameManager) return 0; | |||||
| double val=0; | |||||
| for(unsigned int i=0;i<sampleCount;++i) { | |||||
| const speechPlayer_frame_t* frame=frameManager->getCurrentFrame(); | |||||
| if(frame) { | |||||
| double voice=voiceGenerator.getNext(frame); | |||||
| double cascadeOut=cascade.getNext(frame,voiceGenerator.glottisOpen,voice*frame->preFormantGain); | |||||
| double fric=fricGenerator.getNext()*0.3*frame->fricationAmplitude; | |||||
| double parallelOut=parallel.getNext(frame,fric*frame->preFormantGain); | |||||
| double out=(cascadeOut+parallelOut)*frame->outputGain; | |||||
| sampleBuf[i].value=(int)MAX(MIN(out*4000,32000),-32000); | |||||
| } else { | |||||
| return i; | |||||
| } | |||||
| } | |||||
| return sampleCount; | |||||
| } | |||||
| void setFrameManager(FrameManager* frameManager) { | |||||
| this->frameManager=frameManager; | |||||
| } | |||||
| }; | |||||
| SpeechWaveGenerator* SpeechWaveGenerator::create(int sampleRate) {return new SpeechWaveGeneratorImpl(sampleRate); } |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYERSPEECHWAVEGENERATOR_H | |||||
| #define SPEECHPLAYERSPEECHWAVEGENERATOR_H | |||||
| #include "frame.h" | |||||
| #include "waveGenerator.h" | |||||
| class SpeechWaveGenerator: public WaveGenerator { | |||||
| public: | |||||
| static SpeechWaveGenerator* create(int sampleRate); | |||||
| virtual void setFrameManager(FrameManager* frameManager)=0; | |||||
| }; | |||||
| #endif |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYER_UTILS_H | |||||
| #define SPEECHPLAYER_UTILS_H | |||||
| #ifndef M_PI | |||||
| #define M_PI 3.14159265358979323846 | |||||
| #endif | |||||
| static inline int MAX(int a, int b) { return((a) > (b) ? a : b); } | |||||
| static inline int MIN(int a, int b) { return((a) < (b) ? a : b); } | |||||
| static inline int ISNAN (double x) { | |||||
| if (x != x) | |||||
| return 1; | |||||
| else | |||||
| return 0; | |||||
| } | |||||
| inline double calculateValueAtFadePosition(double oldVal, double newVal, double curFadeRatio) { | |||||
| if(ISNAN(newVal)) return oldVal; | |||||
| return oldVal+((newVal-oldVal)*curFadeRatio); | |||||
| } | |||||
| #endif |
| /* | |||||
| This file is a part of the NV Speech Player project. | |||||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||||
| Copyright 2014 NV Access Limited. | |||||
| This program is free software: you can redistribute it and/or modify | |||||
| it under the terms of the GNU General Public License version 2.0, as published by | |||||
| the Free Software Foundation. | |||||
| This program is distributed in the hope that it will be useful, | |||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||||
| This license can be found at: | |||||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||||
| */ | |||||
| #ifndef SPEECHPLAYERWAVEGENERATOR_H | |||||
| #define SPEECHPLAYERWAVEGENERATOR_H | |||||
| #include <list> | |||||
| #include "sample.h" | |||||
| #include "speechPlayer.h" | |||||
| class WaveGenerator { | |||||
| public: | |||||
| virtual unsigned int generate(const unsigned int bufSize, sample* buffer)=0; | |||||
| }; | |||||
| #endif |
| </PrecompiledHeader> | </PrecompiledHeader> | ||||
| <WarningLevel>TurnOffAllWarnings</WarningLevel> | <WarningLevel>TurnOffAllWarnings</WarningLevel> | ||||
| <Optimization>Disabled</Optimization> | <Optimization>Disabled</Optimization> | ||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
| </ClCompile> | </ClCompile> | ||||
| <Link> | <Link> | ||||
| </PrecompiledHeader> | </PrecompiledHeader> | ||||
| <WarningLevel>TurnOffAllWarnings</WarningLevel> | <WarningLevel>TurnOffAllWarnings</WarningLevel> | ||||
| <Optimization>Disabled</Optimization> | <Optimization>Disabled</Optimization> | ||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| </ClCompile> | </ClCompile> | ||||
| <Link> | <Link> | ||||
| <SubSystem>Windows</SubSystem> | <SubSystem>Windows</SubSystem> | ||||
| <Optimization>MaxSpeed</Optimization> | <Optimization>MaxSpeed</Optimization> | ||||
| <FunctionLevelLinking>true</FunctionLevelLinking> | <FunctionLevelLinking>true</FunctionLevelLinking> | ||||
| <IntrinsicFunctions>true</IntrinsicFunctions> | <IntrinsicFunctions>true</IntrinsicFunctions> | ||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
| </ClCompile> | </ClCompile> | ||||
| <Link> | <Link> | ||||
| <Optimization>MaxSpeed</Optimization> | <Optimization>MaxSpeed</Optimization> | ||||
| <FunctionLevelLinking>true</FunctionLevelLinking> | <FunctionLevelLinking>true</FunctionLevelLinking> | ||||
| <IntrinsicFunctions>true</IntrinsicFunctions> | <IntrinsicFunctions>true</IntrinsicFunctions> | ||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | ||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||||
| </ClCompile> | </ClCompile> | ||||
| <Link> | <Link> | ||||
| <SubSystem>Windows</SubSystem> | <SubSystem>Windows</SubSystem> | ||||
| <ClCompile Include="..\libespeak-ng\setlengths.c" /> | <ClCompile Include="..\libespeak-ng\setlengths.c" /> | ||||
| <ClCompile Include="..\libespeak-ng\spect.c" /> | <ClCompile Include="..\libespeak-ng\spect.c" /> | ||||
| <ClCompile Include="..\libespeak-ng\speech.c" /> | <ClCompile Include="..\libespeak-ng\speech.c" /> | ||||
| <ClCompile Include="..\libespeak-ng\sPlayer.c" /> | |||||
| <ClCompile Include="..\libespeak-ng\ssml.c" /> | <ClCompile Include="..\libespeak-ng\ssml.c" /> | ||||
| <ClCompile Include="..\libespeak-ng\synthdata.c" /> | <ClCompile Include="..\libespeak-ng\synthdata.c" /> | ||||
| <ClCompile Include="..\libespeak-ng\synthesize.c" /> | <ClCompile Include="..\libespeak-ng\synthesize.c" /> | ||||
| <ClCompile Include="..\pcaudiolib\src\audio.c" /> | <ClCompile Include="..\pcaudiolib\src\audio.c" /> | ||||
| <ClCompile Include="..\pcaudiolib\src\windows.c" /> | <ClCompile Include="..\pcaudiolib\src\windows.c" /> | ||||
| <ClCompile Include="..\pcaudiolib\src\xaudio2.cpp" /> | <ClCompile Include="..\pcaudiolib\src\xaudio2.cpp" /> | ||||
| <ClCompile Include="..\speechPlayer\src\frame.cpp" /> | |||||
| <ClCompile Include="..\speechPlayer\src\speechPlayer.cpp" /> | |||||
| <ClCompile Include="..\speechPlayer\src\speechWaveGenerator.cpp" /> | |||||
| <ClCompile Include="com\comentrypoints.c" /> | <ClCompile Include="com\comentrypoints.c" /> | ||||
| <ClCompile Include="com\ttsengine.cpp" /> | <ClCompile Include="com\ttsengine.cpp" /> | ||||
| </ItemGroup> | </ItemGroup> | ||||
| <ClInclude Include="..\libespeak-ng\sintab.h" /> | <ClInclude Include="..\libespeak-ng\sintab.h" /> | ||||
| <ClInclude Include="..\libespeak-ng\spect.h" /> | <ClInclude Include="..\libespeak-ng\spect.h" /> | ||||
| <ClInclude Include="..\libespeak-ng\speech.h" /> | <ClInclude Include="..\libespeak-ng\speech.h" /> | ||||
| <ClInclude Include="..\libespeak-ng\sPlayer.h" /> | |||||
| <ClInclude Include="..\libespeak-ng\synthesize.h" /> | <ClInclude Include="..\libespeak-ng\synthesize.h" /> | ||||
| <ClInclude Include="..\libespeak-ng\translate.h" /> | <ClInclude Include="..\libespeak-ng\translate.h" /> | ||||
| <ClInclude Include="..\libespeak-ng\voice.h" /> | <ClInclude Include="..\libespeak-ng\voice.h" /> |
| void dummy() | |||||
| { | |||||
| } |