| @@ -9,6 +9,8 @@ The espeak-ng project is a fork of the espeak project. | |||
| ### 1.51 (In Development) | |||
| * add support for speechPlayer | |||
| updated languages: | |||
| * ba (Bashkir) -- boracasli98, Valdis Vitolins | |||
| * chr (Cherokee) -- Michael Conrad | |||
| @@ -17,7 +17,7 @@ MKDIR=mkdir -p | |||
| # -Wno-endif-labels : Needed to prevent warnings in ieee80.c. | |||
| AM_CFLAGS = \ | |||
| -Isrc/include -Isrc/include/compat -I$(srcdir)/src/ucd-tools/src/include \ | |||
| -Isrc/include -Isrc/include/compat -I$(srcdir)/src/speechPlayer/include -I$(srcdir)/src/ucd-tools/src/include \ | |||
| -D_BSD_SOURCE -D_DEFAULT_SOURCE -D_POSIX_C_SOURCE=200112L \ | |||
| -Wno-endif-labels | |||
| @@ -171,6 +171,7 @@ src_libespeak_ng_la_SOURCES = \ | |||
| src/libespeak-ng/wavegen.c | |||
| noinst_HEADERS = \ | |||
| src/speechPlayer/include/speechPlayer.h | |||
| src/ucd-tools/src/include/ucd/ucd.h | |||
| if OPT_KLATT | |||
| @@ -178,6 +179,17 @@ src_libespeak_ng_la_CFLAGS += -DINCLUDE_KLATT | |||
| src_libespeak_ng_la_SOURCES += src/libespeak-ng/klatt.c | |||
| endif | |||
| if OPT_SPEECHPLAYER | |||
| src_libespeak_ng_la_CFLAGS += -DINCLUDE_SPEECHPLAYER | |||
| src_libespeak_ng_la_SOURCES += src/libespeak-ng/sPlayer.c | |||
| src_libespeak_ng_la_SOURCES += src/speechPlayer/src/frame.cpp | |||
| src_libespeak_ng_la_SOURCES += src/speechPlayer/src/speechPlayer.cpp | |||
| src_libespeak_ng_la_SOURCES += src/speechPlayer/src/speechWaveGenerator.cpp | |||
| src_speak_ng_SOURCES = src/speak-ng.cpp | |||
| else | |||
| src_speak_ng_SOURCES = src/speak-ng.c | |||
| endif | |||
| if OPT_MBROLA | |||
| src_libespeak_ng_la_CFLAGS += -DINCLUDE_MBROLA | |||
| src_libespeak_ng_la_SOURCES += src/libespeak-ng/mbrowrap.c | |||
| @@ -200,7 +212,6 @@ endif | |||
| src_speak_ng_LDADD = src/libespeak-ng.la | |||
| src_speak_ng_LDFLAGS = -static -lm ${PCAUDIOLIB_LIBS} | |||
| src_speak_ng_CFLAGS = -Isrc/libespeak-ng ${AM_CFLAGS} | |||
| src_speak_ng_SOURCES = src/speak-ng.c | |||
| bin_PROGRAMS += src/espeak-ng | |||
| @@ -276,6 +287,13 @@ tests_ssml_fuzzer_test_CFLAGS = ${AM_CFLAGS} | |||
| tests_ssml_fuzzer_test_SOURCES = tests/ssml-fuzzer.c | |||
| tests_ssml_fuzzer_test_LDADD = src/libespeak-ng.la | |||
| if OPT_SPEECHPLAYER | |||
| tests_api_test_SOURCES += tests/dummy.cpp | |||
| tests_encoding_test_SOURCES += tests/dummy.cpp | |||
| tests_readclause_test_SOURCES += tests/dummy.cpp | |||
| tests_ssml_fuzzer_test_SOURCES += tests/dummy.cpp | |||
| endif | |||
| if HAVE_LIBFUZZER | |||
| tests_ssml_fuzzer_test_CFLAGS += -fsanitize=fuzzer | |||
| tests_ssml_fuzzer_test_LDFLAGS = -fsanitize=fuzzer | |||
| @@ -32,6 +32,10 @@ AC_ARG_WITH([klatt], | |||
| [AS_HELP_STRING([--with-klatt], [enable the Klatt formant synthesizer @<:@default=yes@:>@])], | |||
| []) | |||
| AC_ARG_WITH([speechplayer], | |||
| [AS_HELP_STRING([--with-speechplayer], [enable the speechPlayer Klatt implementation @<:@default=yes@:>@])], | |||
| []) | |||
| AC_ARG_WITH([mbrola], | |||
| [AS_HELP_STRING([--with-mbrola], [enable the MBROLA speech synthesizer @<:@default=yes@:>@])], | |||
| []) | |||
| @@ -65,6 +69,7 @@ dnl Program checks. | |||
| dnl ================================================================ | |||
| AC_PROG_CC | |||
| AC_PROG_CXX | |||
| AC_PROG_MAKE_SET | |||
| AC_PROG_LIBTOOL | |||
| AC_PROG_LN_S | |||
| @@ -221,6 +226,12 @@ else | |||
| have_klatt=yes | |||
| fi | |||
| if test "$with_speechplayer" = "no" ; then | |||
| have_speechplayer=no | |||
| else | |||
| have_speechplayer=yes | |||
| fi | |||
| if test "$with_mbrola" = "no" ; then | |||
| have_mbrola=no | |||
| else | |||
| @@ -246,6 +257,7 @@ else | |||
| fi | |||
| AM_CONDITIONAL(OPT_KLATT, [test x"$have_klatt" = xyes]) | |||
| AM_CONDITIONAL(OPT_SPEECHPLAYER, [test x"$have_speechplayer" = xyes]) | |||
| AM_CONDITIONAL(OPT_MBROLA, [test x"$have_mbrola" = xyes]) | |||
| AM_CONDITIONAL(OPT_ASYNC, [test x"$have_async" = xyes]) | |||
| @@ -341,6 +353,7 @@ AC_MSG_NOTICE([ | |||
| ndk-build (Android): ${NDKBUILD_CHECK} | |||
| Klatt: ${have_klatt} | |||
| speechPlayer: ${have_speechplayer} | |||
| MBROLA: ${have_mbrola} | |||
| Async: ${have_async} | |||
| @@ -66,12 +66,14 @@ In order to build eSpeak NG, you need: | |||
| 1. a functional autotools system (`make`, `autoconf`, `automake`, `libtool` | |||
| and `pkg-config`); | |||
| 2. a functional c compiler that supports C99 (e.g. gcc or clang). | |||
| 2. a functional c compiler that supports C99 (e.g. gcc or clang). Note: if building with speechPlayer, a C++ compiler is required. | |||
| Optionally, you need: | |||
| 1. the [pcaudiolib](https://github.com/espeak-ng/pcaudiolib) development library | |||
| to enable audio output; | |||
| 2. the speechPlayer development library to | |||
| enable the speechPlayer Klatt implementation; | |||
| 3. the [sonic](https://github.com/espeak-ng/sonic) development library to | |||
| enable sonic audio speed up support; | |||
| 4. the `ronn` man-page markdown processor to build the man pages. | |||
| @@ -186,6 +188,7 @@ The following `configure` options control which eSpeak NG features are enabled: | |||
| | Option | Description | Default | | |||
| |-----------------|----------------------------------------------|---------| | |||
| | `--with-klatt` | Enable Klatt formant synthesis. | yes | | |||
| | `--with-speechplayer` | Enable the speechPlayer Klatt implementation. | yes | | |||
| | `--with-mbrola` | Enable MBROLA voice support. | yes | | |||
| | `--with-sonic` | Use the sonic library to support higher WPM. | yes | | |||
| | `--with-async` | Enable asynchronous commands. | yes | | |||
| @@ -0,0 +1,10 @@ | |||
| language variant | |||
| name Edward2 | |||
| klatt 6 | |||
| voicing 100 | |||
| consonants 70 80 | |||
| formant 1 92 100 130 | |||
| formant 2 103 100 80 | |||
| formant 3 103 100 70 | |||
| formant 4 114 100 60 | |||
| @@ -0,0 +1,4 @@ | |||
| language variant | |||
| name klatt6 | |||
| klatt 6 | |||
| @@ -37,6 +37,9 @@ | |||
| #include "klatt.h" | |||
| #include "synthesize.h" // for frame_t, WGEN_DATA, STEPSIZE, N_KLATTP, echo... | |||
| #include "voice.h" // for voice_t, N_PEAKS | |||
| #ifdef INCLUDE_SPEECHPLAYER | |||
| #include "sPlayer.h" | |||
| #endif | |||
| extern unsigned char *out_ptr; | |||
| extern unsigned char *out_end; | |||
| @@ -433,6 +436,10 @@ void KlattReset(int control) | |||
| { | |||
| int r_ix; | |||
| #ifdef INCLUDE_SPEECHPLAYER | |||
| KlattResetSP(); | |||
| #endif | |||
| if (control == 2) { | |||
| // Full reset | |||
| kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000; | |||
| @@ -848,6 +855,11 @@ static double klattp_inc[N_KLATTP]; | |||
| int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | |||
| { | |||
| #ifdef INCLUDE_SPEECHPLAYER | |||
| if(wvoice->klattv[0] == 6) | |||
| return Wavegen_KlattSP(wdata, wvoice, length, resume, fr1, fr2); | |||
| #endif | |||
| if (resume == 0) | |||
| SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | |||
| @@ -1064,6 +1076,10 @@ void KlattInit() | |||
| int ix; | |||
| #ifdef INCLUDE_SPEECHPLAYER | |||
| KlattInitSP(); | |||
| #endif | |||
| sample_count = 0; | |||
| kt_globals.synthesis_model = CASCADE_PARALLEL; | |||
| @@ -0,0 +1,143 @@ | |||
| #include <espeak-ng/espeak_ng.h> | |||
| #include <espeak-ng/speak_lib.h> | |||
| #include "sPlayer.h" | |||
| extern unsigned char *out_ptr; | |||
| extern unsigned char *out_end; | |||
| static speechPlayer_handle_t speechPlayerHandle=NULL; | |||
| static const unsigned int minFadeLength=110; | |||
| static int MAX(int a, int b) { return((a) > (b) ? a : b); } | |||
| static int MIN(int a, int b) { return((a) < (b) ? a : b); } | |||
| static bool needsMixWaveFile(WGEN_DATA *wdata) { | |||
| return (bool)wdata->n_mix_wavefile; | |||
| } | |||
| // mixes the currently queued espeak consonant wave file into the existing content in the given sample buffer. | |||
| // This would be used for voiced consonants where the voiced part is generated by speechPlayer, but the consonant comes from a wave file in eSpeak. | |||
| // e.g. z, v. | |||
| // @param maxNumSamples the maximum number of samples that can be mixed into the sample buffer. | |||
| // @param sampleBuf the buffer of existing samples. | |||
| static void mixWaveFile(WGEN_DATA *wdata, unsigned int maxNumSamples, sample* sampleBuf) { | |||
| unsigned int i=0; | |||
| for(;wdata->mix_wavefile_ix<wdata->n_mix_wavefile;++wdata->mix_wavefile_ix) { | |||
| if(i>=maxNumSamples) break; | |||
| int val; | |||
| if(wdata->mix_wave_scale==0) { | |||
| val=wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]; | |||
| ++(wdata->mix_wavefile_ix); | |||
| signed char c=wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]; | |||
| val+=(c*256); | |||
| } else { | |||
| val=(signed char)wdata->mix_wavefile[wdata->mix_wavefile_ix+wdata->mix_wavefile_offset]*wdata->mix_wave_scale; | |||
| } | |||
| val*=(wdata->amplitude_v/1024.0); | |||
| val=(val*wdata->mix_wave_amp)/40; | |||
| sampleBuf[i].value+=val; | |||
| if((wdata->mix_wavefile_ix+wdata->mix_wavefile_offset)>=wdata->mix_wavefile_max) { | |||
| wdata->mix_wavefile_offset-=(wdata->mix_wavefile_max*3)/4; | |||
| } | |||
| ++i; | |||
| } | |||
| } | |||
| static bool isKlattFrameFollowing() { | |||
| // eSpeak implements its command queue with a circular buffer. | |||
| // Thus to walk it, we start from the head, walking to the tail, which may wrap around to the beginning of the buffer as it is circular. | |||
| for(int i=(wcmdq_head+1)%N_WCMDQ;i!=wcmdq_tail;i=(i+1)%N_WCMDQ) { | |||
| int cmd=wcmdq[i][0]; | |||
| if(cmd==WCMD_PAUSE||cmd==WCMD_WAVE) { | |||
| break; | |||
| } | |||
| if(cmd==WCMD_KLATT) { | |||
| return true; | |||
| } | |||
| } | |||
| return false; | |||
| } | |||
| static void fillSpeechPlayerFrame(WGEN_DATA *wdata, voice_t *wvoice, frame_t * eFrame, speechPlayer_frame_t* spFrame) { | |||
| // eSpeak stores pitch in 4096ths of a hz. Specifically comments in voice.h mentions pitch<<12. | |||
| // SpeechPlayer deals with floating point values of hz. | |||
| spFrame->voicePitch=(wdata->pitch)/4096.0; | |||
| // eSpeak stores voicing amplitude with 64 representing 100% according to comments in voice.h. | |||
| // speechPlayer uses floating point value of 1 as 100%. | |||
| spFrame->voiceAmplitude=(wvoice->voicing)/64.0; | |||
| // All of eSpeak's relative formant frequency ratio values are stored with 256 representing 100% according to comments in voice.h. | |||
| spFrame->cf1=(eFrame->ffreq[1]*wvoice->freq[1]/256.0)+wvoice->freqadd[1]; | |||
| spFrame->cf2=(eFrame->ffreq[2]*wvoice->freq[2]/256.0)+wvoice->freqadd[2]; | |||
| spFrame->cf3=(eFrame->ffreq[3]*wvoice->freq[3]/256.0)+wvoice->freqadd[3]; | |||
| spFrame->cf4=(eFrame->ffreq[4]*wvoice->freq[4]/256.0)+wvoice->freqadd[4]; | |||
| spFrame->cf5=(eFrame->ffreq[5]*wvoice->freq[5]/256.0)+wvoice->freqadd[5]; | |||
| spFrame->cf6=(eFrame->ffreq[6]*wvoice->freq[6]/256.0)+wvoice->freqadd[6]; | |||
| spFrame->cfNP=200; | |||
| spFrame->cfN0=250; | |||
| if(eFrame->klattp[KLATT_FNZ]>0) { | |||
| spFrame->caNP=1; | |||
| spFrame->cfN0=eFrame->klattp[KLATT_FNZ]*2; | |||
| } else { | |||
| spFrame->caNP=0; | |||
| } | |||
| spFrame->cb1=eFrame->bw[1]*2*(wvoice->width[1]/256.0); | |||
| spFrame->cb2=eFrame->bw[2]*2*(wvoice->width[2]/256.0); | |||
| spFrame->cb3=eFrame->bw[3]*2*(wvoice->width[3]/256.0); | |||
| spFrame->cb4=eFrame->bw[4]*2*(wvoice->width[4]/256.0); | |||
| spFrame->cb5=1000; | |||
| spFrame->cb6=1000; | |||
| spFrame->cbNP=100; | |||
| spFrame->cbN0=100; | |||
| spFrame->preFormantGain=1; | |||
| spFrame->outputGain=3*(wdata->amplitude/100.0); | |||
| spFrame->endVoicePitch=spFrame->voicePitch; | |||
| } | |||
| void KlattInitSP() { | |||
| speechPlayerHandle=speechPlayer_initialize(22050); | |||
| } | |||
| void KlattResetSP() { | |||
| speechPlayer_terminate(speechPlayerHandle); | |||
| speechPlayerHandle=speechPlayer_initialize(22050); | |||
| } | |||
| int Wavegen_KlattSP(WGEN_DATA *wdata, voice_t *wvoice, int length, int resume, frame_t *fr1, frame_t *fr2){ | |||
| if(!resume) { | |||
| speechPlayer_frame_t spFrame1={0}; | |||
| fillSpeechPlayerFrame(wdata, wvoice, fr1,&spFrame1); | |||
| speechPlayer_frame_t spFrame2={0}; | |||
| fillSpeechPlayerFrame(wdata, wvoice, fr2,&spFrame2); | |||
| wdata->pitch_ix+=(wdata->pitch_inc*(length/STEPSIZE)); | |||
| wdata->pitch=((wdata->pitch_env[MIN(wdata->pitch_ix>>8,127)]*wdata->pitch_range)>>8)+wdata->pitch_base; | |||
| spFrame2.endVoicePitch=wdata->pitch/4096; | |||
| bool willMixWaveFile=needsMixWaveFile(wdata); | |||
| if(willMixWaveFile) { | |||
| spFrame1.outputGain/=5; | |||
| spFrame2.outputGain/=5; | |||
| } | |||
| int mainLength=length; | |||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame1,minFadeLength,minFadeLength,-1,false); | |||
| mainLength-=minFadeLength; | |||
| bool fadeOut=!isKlattFrameFollowing(); | |||
| if(fadeOut) { | |||
| mainLength-=minFadeLength; | |||
| } | |||
| if(mainLength>=1) { | |||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,mainLength,mainLength,-1,false); | |||
| } | |||
| if(fadeOut) { | |||
| spFrame2.voicePitch=spFrame2.endVoicePitch; | |||
| spFrame2.preFormantGain=0; | |||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,minFadeLength/2,minFadeLength/2,-1,false); | |||
| spFrame2.outputGain=0; | |||
| speechPlayer_queueFrame(speechPlayerHandle,&spFrame2,minFadeLength/2,minFadeLength/2,-1,false); | |||
| } | |||
| } | |||
| unsigned int maxLength=(out_end-out_ptr)/sizeof(sample); | |||
| unsigned int outLength=speechPlayer_synthesize(speechPlayerHandle,maxLength,(sample*)out_ptr); | |||
| mixWaveFile(wdata, outLength,(sample*)out_ptr); | |||
| out_ptr=out_ptr+(sizeof(sample)*outLength); | |||
| if(out_ptr>=out_end) return 1; | |||
| return 0; | |||
| } | |||
| @@ -0,0 +1,20 @@ | |||
| #ifndef ESPEAK_NG_SPLAYER_H | |||
| #define ESPEAK_NG_SPLAYER_H | |||
| #include "synthesize.h" | |||
| #include "voice.h" | |||
| #include <speechPlayer.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| void KlattInitSP(); | |||
| void KlattResetSP(); | |||
| int Wavegen_KlattSP(WGEN_DATA *wdata, voice_t *wvoice, int length, int resume, frame_t *fr1, frame_t *fr2); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1 @@ | |||
| #include "speak-ng.c" | |||
| @@ -0,0 +1,67 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYER_H | |||
| #define SPEECHPLAYER_H | |||
| #include <stdbool.h> | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| typedef double speechPlayer_frameParam_t; | |||
| typedef struct { | |||
| // voicing and cascaide | |||
| speechPlayer_frameParam_t voicePitch; // fundermental frequency of voice (phonation) in hz | |||
| speechPlayer_frameParam_t vibratoPitchOffset; // pitch is offset up or down in fraction of a semitone | |||
| speechPlayer_frameParam_t vibratoSpeed; // Speed of vibrato in hz | |||
| speechPlayer_frameParam_t voiceTurbulenceAmplitude; // amplitude of voice breathiness from 0 to 1 | |||
| speechPlayer_frameParam_t glottalOpenQuotient; // fraction between 0 and 1 of a voice cycle that the glottis is open (allows voice turbulance, alters f1...) | |||
| speechPlayer_frameParam_t voiceAmplitude; // amplitude of voice (phonation) source between 0 and 1. | |||
| speechPlayer_frameParam_t aspirationAmplitude; // amplitude of aspiration (voiceless h, whisper) source between 0 and 1. | |||
| speechPlayer_frameParam_t cf1, cf2, cf3, cf4, cf5, cf6, cfN0, cfNP; // frequencies of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||
| speechPlayer_frameParam_t cb1, cb2, cb3, cb4, cb5, cb6, cbN0, cbNP; // bandwidths of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||
| speechPlayer_frameParam_t caNP; // amplitude from 0 to 1 of cascade nasal pole formant | |||
| // fricatives and parallel | |||
| speechPlayer_frameParam_t fricationAmplitude; // amplitude of frication noise from 0 to 1. | |||
| speechPlayer_frameParam_t pf1, pf2, pf3, pf4, pf5, pf6; // parallel formants in hz | |||
| speechPlayer_frameParam_t pb1, pb2, pb3, pb4, pb5, pb6; // parallel formant bandwidths in hz | |||
| speechPlayer_frameParam_t pa1, pa2, pa3, pa4, pa5, pa6; // amplitude of parallel formants between 0 and 1 | |||
| speechPlayer_frameParam_t parallelBypass; // amount of signal which should bypass parallel resonators from 0 to 1 | |||
| speechPlayer_frameParam_t preFormantGain; // amplitude from 0 to 1 of all vocal tract sound (voicing, frication) before entering formant resonators. Useful for stopping/starting speech | |||
| speechPlayer_frameParam_t outputGain; // amplitude from 0 to 1 of final output (master volume) | |||
| speechPlayer_frameParam_t endVoicePitch; // pitch of voice at the end of the frame length | |||
| } speechPlayer_frame_t; | |||
| typedef short sampleVal; | |||
| typedef struct { | |||
| sampleVal value; | |||
| } sample; | |||
| typedef void* speechPlayer_handle_t; | |||
| speechPlayer_handle_t speechPlayer_initialize(int sampleRate); | |||
| void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue); | |||
| int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf); | |||
| int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle); | |||
| void speechPlayer_terminate(speechPlayer_handle_t playerHandle); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,131 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #include <cstring> | |||
| #include <queue> | |||
| #include "utils.h" | |||
| #include "frame.h" | |||
| using namespace std; | |||
| struct frameRequest_t { | |||
| unsigned int minNumSamples; | |||
| unsigned int numFadeSamples; | |||
| bool NULLFrame; | |||
| speechPlayer_frame_t frame; | |||
| double voicePitchInc; | |||
| int userIndex; | |||
| }; | |||
| class FrameManagerImpl: public FrameManager { | |||
| private: | |||
| queue<frameRequest_t*> frameRequestQueue; | |||
| frameRequest_t* oldFrameRequest; | |||
| frameRequest_t* newFrameRequest; | |||
| speechPlayer_frame_t curFrame; | |||
| bool curFrameIsNULL; | |||
| unsigned int sampleCounter; | |||
| int lastUserIndex; | |||
| void updateCurrentFrame() { | |||
| sampleCounter++; | |||
| if(newFrameRequest) { | |||
| if(sampleCounter>(newFrameRequest->numFadeSamples)) { | |||
| delete oldFrameRequest; | |||
| oldFrameRequest=newFrameRequest; | |||
| newFrameRequest=NULL; | |||
| } else { | |||
| double curFadeRatio=(double)sampleCounter/(newFrameRequest->numFadeSamples); | |||
| for(int i=0;i<speechPlayer_frame_numParams;++i) { | |||
| ((speechPlayer_frameParam_t*)&curFrame)[i]=calculateValueAtFadePosition(((speechPlayer_frameParam_t*)&(oldFrameRequest->frame))[i],((speechPlayer_frameParam_t*)&(newFrameRequest->frame))[i],curFadeRatio); | |||
| } | |||
| } | |||
| } else if(sampleCounter>(oldFrameRequest->minNumSamples)) { | |||
| if(!frameRequestQueue.empty()) { | |||
| curFrameIsNULL=false; | |||
| newFrameRequest=frameRequestQueue.front(); | |||
| frameRequestQueue.pop(); | |||
| if(newFrameRequest->NULLFrame) { | |||
| memcpy(&(newFrameRequest->frame),&(oldFrameRequest->frame),sizeof(speechPlayer_frame_t)); | |||
| newFrameRequest->frame.preFormantGain=0; | |||
| newFrameRequest->frame.voicePitch=curFrame.voicePitch; | |||
| newFrameRequest->voicePitchInc=0; | |||
| } else if(oldFrameRequest->NULLFrame) { | |||
| memcpy(&(oldFrameRequest->frame),&(newFrameRequest->frame),sizeof(speechPlayer_frame_t)); | |||
| oldFrameRequest->frame.preFormantGain=0; | |||
| } | |||
| if(newFrameRequest) { | |||
| if(newFrameRequest->userIndex!=-1) lastUserIndex=newFrameRequest->userIndex; | |||
| sampleCounter=0; | |||
| newFrameRequest->frame.voicePitch+=(newFrameRequest->voicePitchInc*newFrameRequest->numFadeSamples); | |||
| } | |||
| } else { | |||
| curFrameIsNULL=true; | |||
| } | |||
| } else { | |||
| curFrame.voicePitch+=oldFrameRequest->voicePitchInc; | |||
| oldFrameRequest->frame.voicePitch=curFrame.voicePitch; | |||
| } | |||
| } | |||
| public: | |||
| FrameManagerImpl(): curFrame(), curFrameIsNULL(true), sampleCounter(0), newFrameRequest(NULL), lastUserIndex(-1) { | |||
| oldFrameRequest=new frameRequest_t(); | |||
| oldFrameRequest->NULLFrame=true; | |||
| } | |||
| void queueFrame(speechPlayer_frame_t* frame, unsigned int minNumSamples, unsigned int numFadeSamples, int userIndex, bool purgeQueue) { | |||
| frameRequest_t* frameRequest=new frameRequest_t; | |||
| frameRequest->minNumSamples=minNumSamples; //max(minNumSamples,1); | |||
| frameRequest->numFadeSamples=numFadeSamples; //max(numFadeSamples,1); | |||
| if(frame) { | |||
| frameRequest->NULLFrame=false; | |||
| memcpy(&(frameRequest->frame),frame,sizeof(speechPlayer_frame_t)); | |||
| frameRequest->voicePitchInc=(frame->endVoicePitch-frame->voicePitch)/frameRequest->minNumSamples; | |||
| } else { | |||
| frameRequest->NULLFrame=true; | |||
| } | |||
| frameRequest->userIndex=userIndex; | |||
| if(purgeQueue) { | |||
| for(;!frameRequestQueue.empty();frameRequestQueue.pop()) delete frameRequestQueue.front(); | |||
| sampleCounter=oldFrameRequest->minNumSamples; | |||
| if(newFrameRequest) { | |||
| oldFrameRequest->NULLFrame=newFrameRequest->NULLFrame; | |||
| memcpy(&(oldFrameRequest->frame),&curFrame,sizeof(speechPlayer_frame_t)); | |||
| delete newFrameRequest; | |||
| newFrameRequest=NULL; | |||
| } | |||
| } | |||
| frameRequestQueue.push(frameRequest); | |||
| } | |||
| const int getLastIndex() { | |||
| return lastUserIndex; | |||
| } | |||
| const speechPlayer_frame_t* const getCurrentFrame() { | |||
| updateCurrentFrame(); | |||
| return curFrameIsNULL?NULL:&curFrame; | |||
| } | |||
| ~FrameManagerImpl() { | |||
| if(oldFrameRequest) delete oldFrameRequest; | |||
| if(newFrameRequest) delete newFrameRequest; | |||
| } | |||
| }; | |||
| FrameManager* FrameManager::create() { return new FrameManagerImpl(); } | |||
| @@ -0,0 +1,55 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYER_FRAME_H | |||
| #define SPEECHPLAYER_FRAME_H | |||
| #include "utils.h" | |||
| typedef double speechPlayer_frameParam_t; | |||
| typedef struct { | |||
| // voicing and cascaide | |||
| speechPlayer_frameParam_t voicePitch; // fundermental frequency of voice (phonation) in hz | |||
| speechPlayer_frameParam_t vibratoPitchOffset; // pitch is offset up or down in fraction of a semitone | |||
| speechPlayer_frameParam_t vibratoSpeed; // Speed of vibrato in hz | |||
| speechPlayer_frameParam_t voiceTurbulenceAmplitude; // amplitude of voice breathiness from 0 to 1 | |||
| speechPlayer_frameParam_t glottalOpenQuotient; // fraction between 0 and 1 of a voice cycle that the glottis is open (allows voice turbulance, alters f1...) | |||
| speechPlayer_frameParam_t voiceAmplitude; // amplitude of voice (phonation) source between 0 and 1. | |||
| speechPlayer_frameParam_t aspirationAmplitude; // amplitude of aspiration (voiceless h, whisper) source between 0 and 1. | |||
| speechPlayer_frameParam_t cf1, cf2, cf3, cf4, cf5, cf6, cfN0, cfNP; // frequencies of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||
| speechPlayer_frameParam_t cb1, cb2, cb3, cb4, cb5, cb6, cbN0, cbNP; // bandwidths of standard cascaide formants, nasal (anti) 0 and nasal pole in hz | |||
| speechPlayer_frameParam_t caNP; // amplitude from 0 to 1 of cascade nasal pole formant | |||
| // fricatives and parallel | |||
| speechPlayer_frameParam_t fricationAmplitude; // amplitude of frication noise from 0 to 1. | |||
| speechPlayer_frameParam_t pf1, pf2, pf3, pf4, pf5, pf6; // parallel formants in hz | |||
| speechPlayer_frameParam_t pb1, pb2, pb3, pb4, pb5, pb6; // parallel formant bandwidths in hz | |||
| speechPlayer_frameParam_t pa1, pa2, pa3, pa4, pa5, pa6; // amplitude of parallel formants between 0 and 1 | |||
| speechPlayer_frameParam_t parallelBypass; // amount of signal which should bypass parallel resonators from 0 to 1 | |||
| speechPlayer_frameParam_t preFormantGain; // amplitude from 0 to 1 of all vocal tract sound (voicing, frication) before entering formant resonators. Useful for stopping/starting speech | |||
| speechPlayer_frameParam_t outputGain; // amplitude from 0 to 1 of final output (master volume) | |||
| speechPlayer_frameParam_t endVoicePitch; // pitch of voice at the end of the frame length | |||
| } speechPlayer_frame_t; | |||
| const int speechPlayer_frame_numParams=sizeof(speechPlayer_frame_t)/sizeof(speechPlayer_frameParam_t); | |||
| class FrameManager { | |||
| public: | |||
| static FrameManager* create(); //factory function | |||
| virtual void queueFrame(speechPlayer_frame_t* frame, unsigned int minNumSamples, unsigned int numFadeSamples, int userIndex, bool purgeQueue)=0; | |||
| virtual const speechPlayer_frame_t* const getCurrentFrame()=0; | |||
| virtual const int getLastIndex()=0; | |||
| }; | |||
| #endif | |||
| @@ -0,0 +1,24 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYER_SAMPLE_H | |||
| #define SPEECHPLAYER_SAMPLE_H | |||
| typedef short sampleVal; | |||
| typedef struct { | |||
| sampleVal value; | |||
| } sample; | |||
| #endif | |||
| @@ -0,0 +1,55 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #include "frame.h" | |||
| #include "speechWaveGenerator.h" | |||
| #include "speechPlayer.h" | |||
| typedef struct { | |||
| int sampleRate; | |||
| FrameManager* frameManager; | |||
| SpeechWaveGenerator* waveGenerator; | |||
| } speechPlayer_handleInfo_t; | |||
| speechPlayer_handle_t speechPlayer_initialize(int sampleRate) { | |||
| speechPlayer_handleInfo_t* playerHandleInfo=new speechPlayer_handleInfo_t; | |||
| playerHandleInfo->sampleRate=sampleRate; | |||
| playerHandleInfo->frameManager=FrameManager::create(); | |||
| playerHandleInfo->waveGenerator=SpeechWaveGenerator::create(sampleRate); | |||
| playerHandleInfo->waveGenerator->setFrameManager(playerHandleInfo->frameManager); | |||
| return (speechPlayer_handle_t)playerHandleInfo; | |||
| } | |||
| void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue) { | |||
| speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||
| if (fadeDuration < 1) fadeDuration = 1; | |||
| playerHandleInfo->frameManager->queueFrame(framePtr,minFrameDuration,fadeDuration,userIndex,purgeQueue); | |||
| } | |||
| int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf) { | |||
| return ((speechPlayer_handleInfo_t*)playerHandle)->waveGenerator->generate(sampleCount,sampleBuf); | |||
| } | |||
| int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle) { | |||
| speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||
| return playerHandleInfo->frameManager->getLastIndex(); | |||
| } | |||
| void speechPlayer_terminate(speechPlayer_handle_t playerHandle) { | |||
| speechPlayer_handleInfo_t* playerHandleInfo=(speechPlayer_handleInfo_t*)playerHandle; | |||
| delete playerHandleInfo->waveGenerator; | |||
| delete playerHandleInfo->frameManager; | |||
| delete playerHandleInfo; | |||
| } | |||
| @@ -0,0 +1,37 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYER_H | |||
| #define SPEECHPLAYER_H | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| #endif | |||
| #include "frame.h" | |||
| #include "sample.h" | |||
| typedef void* speechPlayer_handle_t; | |||
| speechPlayer_handle_t speechPlayer_initialize(int sampleRate); | |||
| void speechPlayer_queueFrame(speechPlayer_handle_t playerHandle, speechPlayer_frame_t* framePtr, unsigned int minFrameDuration, unsigned int fadeDuration, int userIndex, bool purgeQueue); | |||
| int speechPlayer_synthesize(speechPlayer_handle_t playerHandle, unsigned int sampleCount, sample* sampleBuf); | |||
| int speechPlayer_getLastIndex(speechPlayer_handle_t playerHandle); | |||
| void speechPlayer_terminate(speechPlayer_handle_t playerHandle); | |||
| #ifdef __cplusplus | |||
| } | |||
| #endif | |||
| #endif | |||
| @@ -0,0 +1,221 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| /* | |||
| Based on klsyn-88, found at http://linguistics.berkeley.edu/phonlab/resources/ | |||
| */ | |||
| #define _USE_MATH_DEFINES | |||
| #include <cassert> | |||
| #include <cmath> | |||
| #include <cstdlib> | |||
| #include "utils.h" | |||
| #include "speechWaveGenerator.h" | |||
| using namespace std; | |||
| const double PITWO=M_PI*2; | |||
| class NoiseGenerator { | |||
| private: | |||
| double lastValue; | |||
| public: | |||
| NoiseGenerator(): lastValue(0.0) {}; | |||
| double getNext() { | |||
| lastValue=((double)rand()/RAND_MAX)+0.75*lastValue; | |||
| return lastValue; | |||
| } | |||
| }; | |||
| class FrequencyGenerator { | |||
| private: | |||
| int sampleRate; | |||
| double lastCyclePos; | |||
| public: | |||
| FrequencyGenerator(int sr): sampleRate(sr), lastCyclePos(0) {} | |||
| double getNext(double frequency) { | |||
| double cyclePos=fmod((frequency/sampleRate)+lastCyclePos,1); | |||
| lastCyclePos=cyclePos; | |||
| return cyclePos; | |||
| } | |||
| }; | |||
| class VoiceGenerator { | |||
| private: | |||
| FrequencyGenerator pitchGen; | |||
| FrequencyGenerator vibratoGen; | |||
| NoiseGenerator aspirationGen; | |||
| public: | |||
| bool glottisOpen; | |||
| VoiceGenerator(int sr): pitchGen(sr), vibratoGen(sr), aspirationGen(), glottisOpen(false) {}; | |||
| double getNext(const speechPlayer_frame_t* frame) { | |||
| double vibrato=(sin(vibratoGen.getNext(frame->vibratoSpeed)*PITWO)*0.06*frame->vibratoPitchOffset)+1; | |||
| double voice=pitchGen.getNext(frame->voicePitch*vibrato); | |||
| double aspiration=aspirationGen.getNext()*0.2; | |||
| double turbulence=aspiration*frame->voiceTurbulenceAmplitude; | |||
| glottisOpen=voice>=frame->glottalOpenQuotient; | |||
| if(!glottisOpen) { | |||
| turbulence*=0.01; | |||
| } | |||
| voice=(voice*2)-1; | |||
| voice+=turbulence; | |||
| voice*=frame->voiceAmplitude; | |||
| aspiration*=frame->aspirationAmplitude; | |||
| return aspiration+voice; | |||
| } | |||
| }; | |||
| class Resonator { | |||
| private: | |||
| //raw parameters | |||
| int sampleRate; | |||
| double frequency; | |||
| double bandwidth; | |||
| bool anti; | |||
| //calculated parameters | |||
| bool setOnce; | |||
| double a, b, c; | |||
| //Memory | |||
| double p1, p2; | |||
| public: | |||
| Resonator(int sampleRate, bool anti=false) { | |||
| this->sampleRate=sampleRate; | |||
| this->anti=anti; | |||
| this->setOnce=false; | |||
| this->p1=0; | |||
| this->p2=0; | |||
| } | |||
| void setParams(double frequency, double bandwidth) { | |||
| if(!setOnce||(frequency!=this->frequency)||(bandwidth!=this->bandwidth)) { | |||
| this->frequency=frequency; | |||
| this->bandwidth=bandwidth; | |||
| double r=exp(-M_PI/sampleRate*bandwidth); | |||
| c=-(r*r); | |||
| b=r*cos(PITWO/sampleRate*-frequency)*2.0; | |||
| a=1.0-b-c; | |||
| if(anti&&frequency!=0) { | |||
| a=1.0/a; | |||
| c*=-a; | |||
| b*=-a; | |||
| } | |||
| } | |||
| this->setOnce=true; | |||
| } | |||
| double resonate(double in, double frequency, double bandwidth) { | |||
| setParams(frequency,bandwidth); | |||
| double out=a*in+b*p1+c*p2; | |||
| p2=p1; | |||
| p1=anti?in:out; | |||
| return out; | |||
| } | |||
| }; | |||
| class CascadeFormantGenerator { | |||
| private: | |||
| int sampleRate; | |||
| Resonator r1, r2, r3, r4, r5, r6, rN0, rNP; | |||
| public: | |||
| CascadeFormantGenerator(int sr): sampleRate(sr), r1(sr), r2(sr), r3(sr), r4(sr), r5(sr), r6(sr), rN0(sr,true), rNP(sr) {}; | |||
| double getNext(const speechPlayer_frame_t* frame, bool glottisOpen, double input) { | |||
| input/=2.0; | |||
| double n0Output=rN0.resonate(input,frame->cfN0,frame->cbN0); | |||
| double output=calculateValueAtFadePosition(input,rNP.resonate(n0Output,frame->cfNP,frame->cbNP),frame->caNP); | |||
| output=r6.resonate(output,frame->cf6,frame->cb6); | |||
| output=r5.resonate(output,frame->cf5,frame->cb5); | |||
| output=r4.resonate(output,frame->cf4,frame->cb4); | |||
| output=r3.resonate(output,frame->cf3,frame->cb3); | |||
| output=r2.resonate(output,frame->cf2,frame->cb2); | |||
| output=r1.resonate(output,frame->cf1,frame->cb1); | |||
| return output; | |||
| } | |||
| }; | |||
| class ParallelFormantGenerator { | |||
| private: | |||
| int sampleRate; | |||
| Resonator r1, r2, r3, r4, r5, r6; | |||
| public: | |||
| ParallelFormantGenerator(int sr): sampleRate(sr), r1(sr), r2(sr), r3(sr), r4(sr), r5(sr), r6(sr) {}; | |||
| double getNext(const speechPlayer_frame_t* frame, double input) { | |||
| input/=2.0; | |||
| double output=0; | |||
| output+=(r1.resonate(input,frame->pf1,frame->pb1)-input)*frame->pa1; | |||
| output+=(r2.resonate(input,frame->pf2,frame->pb2)-input)*frame->pa2; | |||
| output+=(r3.resonate(input,frame->pf3,frame->pb3)-input)*frame->pa3; | |||
| output+=(r4.resonate(input,frame->pf4,frame->pb4)-input)*frame->pa4; | |||
| output+=(r5.resonate(input,frame->pf5,frame->pb5)-input)*frame->pa5; | |||
| output+=(r6.resonate(input,frame->pf6,frame->pb6)-input)*frame->pa6; | |||
| return calculateValueAtFadePosition(output,input,frame->parallelBypass); | |||
| } | |||
| }; | |||
| class SpeechWaveGeneratorImpl: public SpeechWaveGenerator { | |||
| private: | |||
| int sampleRate; | |||
| VoiceGenerator voiceGenerator; | |||
| NoiseGenerator fricGenerator; | |||
| CascadeFormantGenerator cascade; | |||
| ParallelFormantGenerator parallel; | |||
| FrameManager* frameManager; | |||
| public: | |||
| SpeechWaveGeneratorImpl(int sr): sampleRate(sr), voiceGenerator(sr), fricGenerator(), cascade(sr), parallel(sr), frameManager(NULL) { | |||
| } | |||
| unsigned int generate(const unsigned int sampleCount, sample* sampleBuf) { | |||
| if(!frameManager) return 0; | |||
| double val=0; | |||
| for(unsigned int i=0;i<sampleCount;++i) { | |||
| const speechPlayer_frame_t* frame=frameManager->getCurrentFrame(); | |||
| if(frame) { | |||
| double voice=voiceGenerator.getNext(frame); | |||
| double cascadeOut=cascade.getNext(frame,voiceGenerator.glottisOpen,voice*frame->preFormantGain); | |||
| double fric=fricGenerator.getNext()*0.3*frame->fricationAmplitude; | |||
| double parallelOut=parallel.getNext(frame,fric*frame->preFormantGain); | |||
| double out=(cascadeOut+parallelOut)*frame->outputGain; | |||
| sampleBuf[i].value=(int)MAX(MIN(out*4000,32000),-32000); | |||
| } else { | |||
| return i; | |||
| } | |||
| } | |||
| return sampleCount; | |||
| } | |||
| void setFrameManager(FrameManager* frameManager) { | |||
| this->frameManager=frameManager; | |||
| } | |||
| }; | |||
| SpeechWaveGenerator* SpeechWaveGenerator::create(int sampleRate) {return new SpeechWaveGeneratorImpl(sampleRate); } | |||
| @@ -0,0 +1,27 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYERSPEECHWAVEGENERATOR_H | |||
| #define SPEECHPLAYERSPEECHWAVEGENERATOR_H | |||
| #include "frame.h" | |||
| #include "waveGenerator.h" | |||
| class SpeechWaveGenerator: public WaveGenerator { | |||
| public: | |||
| static SpeechWaveGenerator* create(int sampleRate); | |||
| virtual void setFrameManager(FrameManager* frameManager)=0; | |||
| }; | |||
| #endif | |||
| @@ -0,0 +1,37 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYER_UTILS_H | |||
| #define SPEECHPLAYER_UTILS_H | |||
| #ifndef M_PI | |||
| #define M_PI 3.14159265358979323846 | |||
| #endif | |||
| static inline int MAX(int a, int b) { return((a) > (b) ? a : b); } | |||
| static inline int MIN(int a, int b) { return((a) < (b) ? a : b); } | |||
| static inline int ISNAN (double x) { | |||
| if (x != x) | |||
| return 1; | |||
| else | |||
| return 0; | |||
| } | |||
| inline double calculateValueAtFadePosition(double oldVal, double newVal, double curFadeRatio) { | |||
| if(ISNAN(newVal)) return oldVal; | |||
| return oldVal+((newVal-oldVal)*curFadeRatio); | |||
| } | |||
| #endif | |||
| @@ -0,0 +1,27 @@ | |||
| /* | |||
| This file is a part of the NV Speech Player project. | |||
| URL: https://bitbucket.org/nvaccess/speechplayer | |||
| Copyright 2014 NV Access Limited. | |||
| This program is free software: you can redistribute it and/or modify | |||
| it under the terms of the GNU General Public License version 2.0, as published by | |||
| the Free Software Foundation. | |||
| This program is distributed in the hope that it will be useful, | |||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |||
| This license can be found at: | |||
| http://www.gnu.org/licenses/old-licenses/gpl-2.0.html | |||
| */ | |||
| #ifndef SPEECHPLAYERWAVEGENERATOR_H | |||
| #define SPEECHPLAYERWAVEGENERATOR_H | |||
| #include <list> | |||
| #include "sample.h" | |||
| #include "speechPlayer.h" | |||
| class WaveGenerator { | |||
| public: | |||
| virtual unsigned int generate(const unsigned int bufSize, sample* buffer)=0; | |||
| }; | |||
| #endif | |||
| @@ -87,8 +87,8 @@ | |||
| </PrecompiledHeader> | |||
| <WarningLevel>TurnOffAllWarnings</WarningLevel> | |||
| <Optimization>Disabled</Optimization> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | |||
| </ClCompile> | |||
| <Link> | |||
| @@ -104,9 +104,9 @@ | |||
| </PrecompiledHeader> | |||
| <WarningLevel>TurnOffAllWarnings</WarningLevel> | |||
| <Optimization>Disabled</Optimization> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| </ClCompile> | |||
| <Link> | |||
| <SubSystem>Windows</SubSystem> | |||
| @@ -123,8 +123,8 @@ | |||
| <Optimization>MaxSpeed</Optimization> | |||
| <FunctionLevelLinking>true</FunctionLevelLinking> | |||
| <IntrinsicFunctions>true</IntrinsicFunctions> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | |||
| </ClCompile> | |||
| <Link> | |||
| @@ -144,9 +144,9 @@ | |||
| <Optimization>MaxSpeed</Optimization> | |||
| <FunctionLevelLinking>true</FunctionLevelLinking> | |||
| <IntrinsicFunctions>true</IntrinsicFunctions> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <PreprocessorDefinitions>LIBESPEAK_NG_EXPORT;INCLUDE_KLATT;INCLUDE_SPEECHPLAYER;INCLUDE_MBROLA;HAVE_PCAUDIOLIB_AUDIO_H;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |||
| <ProgramDataBaseFileName>$(IntDir)libespeak-ng.pdb</ProgramDataBaseFileName> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)/../include;$(ProjectDir)/../pcaudiolib/src/include;$(ProjectDir)/../speechPlayer/include;$(ProjectDir)/../include/compat;$(ProjectDir)/../ucd-tools/src/include</AdditionalIncludeDirectories> | |||
| </ClCompile> | |||
| <Link> | |||
| <SubSystem>Windows</SubSystem> | |||
| @@ -183,6 +183,7 @@ | |||
| <ClCompile Include="..\libespeak-ng\setlengths.c" /> | |||
| <ClCompile Include="..\libespeak-ng\spect.c" /> | |||
| <ClCompile Include="..\libespeak-ng\speech.c" /> | |||
| <ClCompile Include="..\libespeak-ng\sPlayer.c" /> | |||
| <ClCompile Include="..\libespeak-ng\ssml.c" /> | |||
| <ClCompile Include="..\libespeak-ng\synthdata.c" /> | |||
| <ClCompile Include="..\libespeak-ng\synthesize.c" /> | |||
| @@ -194,6 +195,9 @@ | |||
| <ClCompile Include="..\pcaudiolib\src\audio.c" /> | |||
| <ClCompile Include="..\pcaudiolib\src\windows.c" /> | |||
| <ClCompile Include="..\pcaudiolib\src\xaudio2.cpp" /> | |||
| <ClCompile Include="..\speechPlayer\src\frame.cpp" /> | |||
| <ClCompile Include="..\speechPlayer\src\speechPlayer.cpp" /> | |||
| <ClCompile Include="..\speechPlayer\src\speechWaveGenerator.cpp" /> | |||
| <ClCompile Include="com\comentrypoints.c" /> | |||
| <ClCompile Include="com\ttsengine.cpp" /> | |||
| </ItemGroup> | |||
| @@ -207,6 +211,7 @@ | |||
| <ClInclude Include="..\libespeak-ng\sintab.h" /> | |||
| <ClInclude Include="..\libespeak-ng\spect.h" /> | |||
| <ClInclude Include="..\libespeak-ng\speech.h" /> | |||
| <ClInclude Include="..\libespeak-ng\sPlayer.h" /> | |||
| <ClInclude Include="..\libespeak-ng\synthesize.h" /> | |||
| <ClInclude Include="..\libespeak-ng\translate.h" /> | |||
| <ClInclude Include="..\libespeak-ng\voice.h" /> | |||
| @@ -0,0 +1,3 @@ | |||
| void dummy() | |||
| { | |||
| } | |||