#include "klatt.h" | #include "klatt.h" | ||||
extern unsigned char *out_ptr; | extern unsigned char *out_ptr; | ||||
extern unsigned char *out_start; | |||||
extern unsigned char *out_end; | extern unsigned char *out_end; | ||||
extern WGEN_DATA wdata; | |||||
static int nsamples; | static int nsamples; | ||||
static int sample_count; | static int sample_count; | ||||
static double DBtoLIN(long); | static double DBtoLIN(long); | ||||
static void frame_init(klatt_frame_ptr); | static void frame_init(klatt_frame_ptr); | ||||
static void setabc(long, long, resonator_ptr); | static void setabc(long, long, resonator_ptr); | ||||
static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *v, int control); | |||||
static void setzeroabc(long, long, resonator_ptr); | static void setzeroabc(long, long, resonator_ptr); | ||||
static klatt_frame_t kt_frame; | static klatt_frame_t kt_frame; | ||||
Converts synthesis parameters to a waveform. | Converts synthesis parameters to a waveform. | ||||
*/ | */ | ||||
static int parwave(klatt_frame_ptr frame) | |||||
static int parwave(klatt_frame_ptr frame, WGEN_DATA *wdata) | |||||
{ | { | ||||
double temp; | double temp; | ||||
int value; | int value; | ||||
out = outbypas - out; | out = outbypas - out; | ||||
out = resonator(&(kt_globals.rsn[Rout]), out); | out = resonator(&(kt_globals.rsn[Rout]), out); | ||||
temp = (int)(out * wdata.amplitude * kt_globals.amp_gain0); // Convert back to integer | |||||
temp = (int)(out * wdata->amplitude * kt_globals.amp_gain0); // Convert back to integer | |||||
// mix with a recorded WAV if required for this phoneme | // mix with a recorded WAV if required for this phoneme | ||||
signed char c; | signed char c; | ||||
int sample; | int sample; | ||||
if (wdata.mix_wavefile_ix < wdata.n_mix_wavefile) { | |||||
if (wdata.mix_wave_scale == 0) { | |||||
if (wdata->mix_wavefile_ix < wdata->n_mix_wavefile) { | |||||
if (wdata->mix_wave_scale == 0) { | |||||
// a 16 bit sample | // a 16 bit sample | ||||
c = wdata.mix_wavefile[wdata.mix_wavefile_ix+1]; | |||||
sample = wdata.mix_wavefile[wdata.mix_wavefile_ix] + (c * 256); | |||||
wdata.mix_wavefile_ix += 2; | |||||
c = wdata->mix_wavefile[wdata->mix_wavefile_ix+1]; | |||||
sample = wdata->mix_wavefile[wdata->mix_wavefile_ix] + (c * 256); | |||||
wdata->mix_wavefile_ix += 2; | |||||
} else { | } else { | ||||
// a 8 bit sample, scaled | // a 8 bit sample, scaled | ||||
sample = (signed char)wdata.mix_wavefile[wdata.mix_wavefile_ix++] * wdata.mix_wave_scale; | |||||
sample = (signed char)wdata->mix_wavefile[wdata->mix_wavefile_ix++] * wdata->mix_wave_scale; | |||||
} | } | ||||
int z2 = sample * wdata.amplitude_v / 1024; | |||||
z2 = (z2 * wdata.mix_wave_amp)/40; | |||||
int z2 = sample * wdata->amplitude_v / 1024; | |||||
z2 = (z2 * wdata->mix_wave_amp)/40; | |||||
temp += z2; | temp += z2; | ||||
} | } | ||||
return (double)(amptable[dB]) * 0.001; | return (double)(amptable[dB]) * 0.001; | ||||
} | } | ||||
extern voice_t *wvoice; | |||||
static klatt_peaks_t peaks[N_PEAKS]; | static klatt_peaks_t peaks[N_PEAKS]; | ||||
static int end_wave; | static int end_wave; | ||||
static int klattp[N_KLATTP]; | static int klattp[N_KLATTP]; | ||||
static double klattp1[N_KLATTP]; | static double klattp1[N_KLATTP]; | ||||
static double klattp_inc[N_KLATTP]; | static double klattp_inc[N_KLATTP]; | ||||
static int Wavegen_Klatt(int resume) | |||||
int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | |||||
{ | { | ||||
if (resume == 0) | |||||
SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | |||||
int pk; | int pk; | ||||
int x; | int x; | ||||
int ix; | int ix; | ||||
sample_count = 0; | sample_count = 0; | ||||
while (sample_count < nsamples) { | while (sample_count < nsamples) { | ||||
kt_frame.F0hz10 = (wdata.pitch * 10) / 4096; | |||||
kt_frame.F0hz10 = (wdata->pitch * 10) / 4096; | |||||
// formants F6,F7,F8 are fixed values for cascade resonators, set in KlattInit() | // formants F6,F7,F8 are fixed values for cascade resonators, set in KlattInit() | ||||
// but F6 is used for parallel resonator | // but F6 is used for parallel resonator | ||||
} | } | ||||
// advance the pitch | // advance the pitch | ||||
wdata.pitch_ix += wdata.pitch_inc; | |||||
if ((ix = wdata.pitch_ix>>8) > 127) ix = 127; | |||||
x = wdata.pitch_env[ix] * wdata.pitch_range; | |||||
wdata.pitch = (x>>8) + wdata.pitch_base; | |||||
wdata->pitch_ix += wdata->pitch_inc; | |||||
if ((ix = wdata->pitch_ix>>8) > 127) ix = 127; | |||||
x = wdata->pitch_env[ix] * wdata->pitch_range; | |||||
wdata->pitch = (x>>8) + wdata->pitch_base; | |||||
kt_globals.nspfr = (nsamples - sample_count); | kt_globals.nspfr = (nsamples - sample_count); | ||||
if (kt_globals.nspfr > STEPSIZE) | if (kt_globals.nspfr > STEPSIZE) | ||||
frame_init(&kt_frame); // get parameters for next frame of speech | frame_init(&kt_frame); // get parameters for next frame of speech | ||||
if (parwave(&kt_frame) == 1) | |||||
if (parwave(&kt_frame, wdata) == 1) | |||||
return 1; // output buffer is full | return 1; // output buffer is full | ||||
} | } | ||||
end_wave = 0; | end_wave = 0; | ||||
sample_count -= fade; | sample_count -= fade; | ||||
kt_globals.nspfr = fade; | kt_globals.nspfr = fade; | ||||
if (parwave(&kt_frame) == 1) | |||||
if (parwave(&kt_frame, wdata) == 1) | |||||
return 1; // output buffer is full | return 1; // output buffer is full | ||||
} | } | ||||
return 0; | return 0; | ||||
} | } | ||||
static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *v, int control) | |||||
static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *wvoice, int control) | |||||
{ | { | ||||
int ix; | int ix; | ||||
double next; | double next; | ||||
nsamples = length; | nsamples = length; | ||||
for (ix = 1; ix < 6; ix++) { | for (ix = 1; ix < 6; ix++) { | ||||
peaks[ix].freq1 = (fr1->ffreq[ix] * v->freq[ix] / 256.0) + v->freqadd[ix]; | |||||
peaks[ix].freq1 = (fr1->ffreq[ix] * wvoice->freq[ix] / 256.0) + wvoice->freqadd[ix]; | |||||
peaks[ix].freq = (int)peaks[ix].freq1; | peaks[ix].freq = (int)peaks[ix].freq1; | ||||
next = (fr2->ffreq[ix] * v->freq[ix] / 256.0) + v->freqadd[ix]; | |||||
next = (fr2->ffreq[ix] * wvoice->freq[ix] / 256.0) + wvoice->freqadd[ix]; | |||||
peaks[ix].freq_inc = ((next - peaks[ix].freq1) * STEPSIZE) / length; | peaks[ix].freq_inc = ((next - peaks[ix].freq1) * STEPSIZE) / length; | ||||
if (ix < 4) { | if (ix < 4) { | ||||
} | } | ||||
} | } | ||||
int Wavegen_Klatt2(int length, int resume, frame_t *fr1, frame_t *fr2) | |||||
{ | |||||
if (resume == 0) | |||||
SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | |||||
return Wavegen_Klatt(resume); | |||||
} | |||||
void KlattInit() | void KlattInit() | ||||
{ | { | ||||
void KlattInit(void); | void KlattInit(void); | ||||
void KlattReset(int control); | void KlattReset(int control); | ||||
int Wavegen_Klatt2(int length, int resume, frame_t *fr1, frame_t *fr2); | |||||
int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice); | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
} | } |
// Several phoneme tables may be loaded into memory. phoneme_tab points to | // Several phoneme tables may be loaded into memory. phoneme_tab points to | ||||
// one for the current voice | // one for the current voice | ||||
extern int n_phoneme_tab; | extern int n_phoneme_tab; | ||||
extern int current_phoneme_table; | |||||
extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | ||||
extern unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited | |||||
typedef struct { | typedef struct { | ||||
char name[N_PHONEME_TAB_NAME]; | char name[N_PHONEME_TAB_NAME]; |
unsigned char *outbuf = NULL; | unsigned char *outbuf = NULL; | ||||
int outbuf_size = 0; | int outbuf_size = 0; | ||||
unsigned char *out_start; | |||||
espeak_EVENT *event_list = NULL; | espeak_EVENT *event_list = NULL; | ||||
int event_list_ix = 0; | int event_list_ix = 0; |
// copy the current phoneme table into here | // copy the current phoneme table into here | ||||
int n_phoneme_tab; | int n_phoneme_tab; | ||||
int current_phoneme_table; | |||||
PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | ||||
unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited | |||||
unsigned short *phoneme_index = NULL; | unsigned short *phoneme_index = NULL; | ||||
char *phondata_ptr = NULL; | char *phondata_ptr = NULL; | ||||
int wavefile_amp; | int wavefile_amp; | ||||
int seq_len_adjust; | int seq_len_adjust; | ||||
int vowel_transition[4]; | |||||
static espeak_ng_STATUS ReadPhFile(void **ptr, const char *fname, int *size, espeak_ng_ERROR_CONTEXT *context) | static espeak_ng_STATUS ReadPhFile(void **ptr, const char *fname, int *size, espeak_ng_ERROR_CONTEXT *context) | ||||
{ | { | ||||
return (unsigned char *)&phondata_ptr[index]; | return (unsigned char *)&phondata_ptr[index]; | ||||
} | } | ||||
static void SetUpPhonemeTable(int number, bool recursing) | |||||
static void SetUpPhonemeTable(int number) | |||||
{ | { | ||||
int ix; | int ix; | ||||
int includes; | int includes; | ||||
int ph_code; | int ph_code; | ||||
PHONEME_TAB *phtab; | PHONEME_TAB *phtab; | ||||
if (recursing == false) | |||||
memset(phoneme_tab_flags, 0, sizeof(phoneme_tab_flags)); | |||||
if ((includes = phoneme_tab_list[number].includes) > 0) { | if ((includes = phoneme_tab_list[number].includes) > 0) { | ||||
// recursively include base phoneme tables | // recursively include base phoneme tables | ||||
SetUpPhonemeTable(includes-1, true); | |||||
SetUpPhonemeTable(includes - 1); | |||||
} | } | ||||
// now add the phonemes from this table | // now add the phonemes from this table | ||||
phoneme_tab[ph_code] = &phtab[ix]; | phoneme_tab[ph_code] = &phtab[ix]; | ||||
if (ph_code > n_phoneme_tab) | if (ph_code > n_phoneme_tab) | ||||
n_phoneme_tab = ph_code; | n_phoneme_tab = ph_code; | ||||
if (recursing == 0) | |||||
phoneme_tab_flags[ph_code] |= 1; // not inherited | |||||
} | } | ||||
} | } | ||||
void SelectPhonemeTable(int number) | |||||
int SelectPhonemeTable(int number) | |||||
{ | { | ||||
n_phoneme_tab = 0; | n_phoneme_tab = 0; | ||||
SetUpPhonemeTable(number, false); // recursively for included phoneme tables | |||||
SetUpPhonemeTable(number); // recursively for included phoneme tables | |||||
n_phoneme_tab++; | n_phoneme_tab++; | ||||
current_phoneme_table = number; | |||||
return number; | |||||
} | } | ||||
int LookupPhonemeTable(const char *name) | int LookupPhonemeTable(const char *name) |
int NumInstnWords(unsigned short *prog); | int NumInstnWords(unsigned short *prog); | ||||
int PhonemeCode(unsigned int mnem); | int PhonemeCode(unsigned int mnem); | ||||
void SelectPhonemeTable(int number); | |||||
int SelectPhonemeTable(int number); | |||||
int SelectPhonemeTableName(const char *name); | int SelectPhonemeTableName(const char *name); | ||||
#ifdef __cplusplus | #ifdef __cplusplus |
#include "synthesize.h" | #include "synthesize.h" | ||||
#include "translate.h" | #include "translate.h" | ||||
extern FILE *f_log; | |||||
static void SmoothSpect(void); | static void SmoothSpect(void); | ||||
// list of phonemes in a clause | // list of phonemes in a clause | ||||
syllable_end = wcmdq_tail; | syllable_end = wcmdq_tail; | ||||
SmoothSpect(); | SmoothSpect(); | ||||
syllable_centre = -1; | syllable_centre = -1; | ||||
memset(vowel_transition, 0, sizeof(vowel_transition)); | |||||
} | } | ||||
} | } | ||||
syllable_end = wcmdq_tail; | syllable_end = wcmdq_tail; | ||||
syllable_centre = -1; | syllable_centre = -1; | ||||
last_pitch_cmd = -1; | last_pitch_cmd = -1; | ||||
memset(vowel_transition, 0, sizeof(vowel_transition)); | |||||
memset(&worddata, 0, sizeof(worddata)); | memset(&worddata, 0, sizeof(worddata)); | ||||
DoPause(0, 0); // isolate from the previous clause | DoPause(0, 0); // isolate from the previous clause | ||||
} | } | ||||
return 0; // finished the phoneme list | return 0; // finished the phoneme list | ||||
} | } | ||||
static int current_phoneme_table; | |||||
int SpeakNextClause(int control) | int SpeakNextClause(int control) | ||||
{ | { | ||||
// Speak text from memory (text_in) | // Speak text from memory (text_in) | ||||
} | } | ||||
if (current_phoneme_table != voice->phoneme_tab_ix) | if (current_phoneme_table != voice->phoneme_tab_ix) | ||||
SelectPhonemeTable(voice->phoneme_tab_ix); | |||||
current_phoneme_table = SelectPhonemeTable(voice->phoneme_tab_ix); | |||||
// read the next clause from the input text file, translate it, and generate | // read the next clause from the input text file, translate it, and generate | ||||
// entries in the wavegen command queue | // entries in the wavegen command queue |
extern int wavefile_ix; | extern int wavefile_ix; | ||||
extern int wavefile_amp; | extern int wavefile_amp; | ||||
extern int vowel_transition[4]; | |||||
#define N_ECHO_BUF 5500 // max of 250mS at 22050 Hz | #define N_ECHO_BUF 5500 // max of 250mS at 22050 Hz | ||||
extern int echo_head; | extern int echo_head; | ||||
extern SPEED_FACTORS speed; | extern SPEED_FACTORS speed; | ||||
extern unsigned char *out_ptr; | extern unsigned char *out_ptr; | ||||
extern unsigned char *out_start; | |||||
extern unsigned char *out_end; | extern unsigned char *out_end; | ||||
extern espeak_EVENT *event_list; | extern espeak_EVENT *event_list; | ||||
extern t_espeak_callback *synth_callback; | extern t_espeak_callback *synth_callback; |
#define N_WAV_BUF 10 | #define N_WAV_BUF 10 | ||||
static void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v); | |||||
voice_t *wvoice = NULL; | voice_t *wvoice = NULL; | ||||
FILE *f_log = NULL; | |||||
static int option_harmonic1 = 10; | static int option_harmonic1 = 10; | ||||
static int flutter_amp = 64; | static int flutter_amp = 64; | ||||
static double two_pi_t; | static double two_pi_t; | ||||
unsigned char *out_ptr; | unsigned char *out_ptr; | ||||
unsigned char *out_start; | |||||
unsigned char *out_end; | unsigned char *out_end; | ||||
espeak_ng_OUTPUT_HOOKS* output_hooks = NULL; | espeak_ng_OUTPUT_HOOKS* output_hooks = NULL; | ||||
return value; | return value; | ||||
} | } | ||||
static int Wavegen() | |||||
static int Wavegen(int length, int modulation, bool resume, frame_t *fr1, frame_t *fr2, voice_t *wvoice) | |||||
{ | { | ||||
if (resume == false) | |||||
SetSynth(length, modulation, fr1, fr2, wvoice); | |||||
if (wvoice == NULL) | if (wvoice == NULL) | ||||
return 0; | return 0; | ||||
} | } | ||||
} | } | ||||
static int Wavegen2(int length, int modulation, bool resume, frame_t *fr1, frame_t *fr2) | |||||
{ | |||||
if (resume == false) | |||||
SetSynth(length, modulation, fr1, fr2, wvoice); | |||||
return Wavegen(); | |||||
} | |||||
void Write4Bytes(FILE *f, int value) | void Write4Bytes(FILE *f, int value) | ||||
{ | { | ||||
// Write 4 bytes to a file, least significant first | // Write 4 bytes to a file, least significant first | ||||
wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | ||||
case WCMD_SPECT: | case WCMD_SPECT: | ||||
echo_complete = echo_length; | echo_complete = echo_length; | ||||
result = Wavegen2(length & 0xffff, q[1] >> 16, resume, (frame_t *)q[2], (frame_t *)q[3]); | |||||
result = Wavegen(length & 0xffff, q[1] >> 16, resume, (frame_t *)q[2], (frame_t *)q[3], wvoice); | |||||
break; | break; | ||||
#ifdef INCLUDE_KLATT | #ifdef INCLUDE_KLATT | ||||
case WCMD_KLATT2: // as WCMD_SPECT but stop any concurrent wave file | case WCMD_KLATT2: // as WCMD_SPECT but stop any concurrent wave file | ||||
wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | ||||
case WCMD_KLATT: | case WCMD_KLATT: | ||||
echo_complete = echo_length; | echo_complete = echo_length; | ||||
result = Wavegen_Klatt2(length & 0xffff, resume, (frame_t *)q[2], (frame_t *)q[3]); | |||||
result = Wavegen_Klatt(length & 0xffff, resume, (frame_t *)q[2], (frame_t *)q[3], &wdata, wvoice); | |||||
break; | break; | ||||
#endif | #endif | ||||
case WCMD_MARKER: | case WCMD_MARKER: |