| #include "klatt.h" | #include "klatt.h" | ||||
| extern unsigned char *out_ptr; | extern unsigned char *out_ptr; | ||||
| extern unsigned char *out_start; | |||||
| extern unsigned char *out_end; | extern unsigned char *out_end; | ||||
| extern WGEN_DATA wdata; | |||||
| static int nsamples; | static int nsamples; | ||||
| static int sample_count; | static int sample_count; | ||||
| static double DBtoLIN(long); | static double DBtoLIN(long); | ||||
| static void frame_init(klatt_frame_ptr); | static void frame_init(klatt_frame_ptr); | ||||
| static void setabc(long, long, resonator_ptr); | static void setabc(long, long, resonator_ptr); | ||||
| static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *v, int control); | |||||
| static void setzeroabc(long, long, resonator_ptr); | static void setzeroabc(long, long, resonator_ptr); | ||||
| static klatt_frame_t kt_frame; | static klatt_frame_t kt_frame; | ||||
| Converts synthesis parameters to a waveform. | Converts synthesis parameters to a waveform. | ||||
| */ | */ | ||||
| static int parwave(klatt_frame_ptr frame) | |||||
| static int parwave(klatt_frame_ptr frame, WGEN_DATA *wdata) | |||||
| { | { | ||||
| double temp; | double temp; | ||||
| int value; | int value; | ||||
| out = outbypas - out; | out = outbypas - out; | ||||
| out = resonator(&(kt_globals.rsn[Rout]), out); | out = resonator(&(kt_globals.rsn[Rout]), out); | ||||
| temp = (int)(out * wdata.amplitude * kt_globals.amp_gain0); // Convert back to integer | |||||
| temp = (int)(out * wdata->amplitude * kt_globals.amp_gain0); // Convert back to integer | |||||
| // mix with a recorded WAV if required for this phoneme | // mix with a recorded WAV if required for this phoneme | ||||
| signed char c; | signed char c; | ||||
| int sample; | int sample; | ||||
| if (wdata.mix_wavefile_ix < wdata.n_mix_wavefile) { | |||||
| if (wdata.mix_wave_scale == 0) { | |||||
| if (wdata->mix_wavefile_ix < wdata->n_mix_wavefile) { | |||||
| if (wdata->mix_wave_scale == 0) { | |||||
| // a 16 bit sample | // a 16 bit sample | ||||
| c = wdata.mix_wavefile[wdata.mix_wavefile_ix+1]; | |||||
| sample = wdata.mix_wavefile[wdata.mix_wavefile_ix] + (c * 256); | |||||
| wdata.mix_wavefile_ix += 2; | |||||
| c = wdata->mix_wavefile[wdata->mix_wavefile_ix+1]; | |||||
| sample = wdata->mix_wavefile[wdata->mix_wavefile_ix] + (c * 256); | |||||
| wdata->mix_wavefile_ix += 2; | |||||
| } else { | } else { | ||||
| // a 8 bit sample, scaled | // a 8 bit sample, scaled | ||||
| sample = (signed char)wdata.mix_wavefile[wdata.mix_wavefile_ix++] * wdata.mix_wave_scale; | |||||
| sample = (signed char)wdata->mix_wavefile[wdata->mix_wavefile_ix++] * wdata->mix_wave_scale; | |||||
| } | } | ||||
| int z2 = sample * wdata.amplitude_v / 1024; | |||||
| z2 = (z2 * wdata.mix_wave_amp)/40; | |||||
| int z2 = sample * wdata->amplitude_v / 1024; | |||||
| z2 = (z2 * wdata->mix_wave_amp)/40; | |||||
| temp += z2; | temp += z2; | ||||
| } | } | ||||
| return (double)(amptable[dB]) * 0.001; | return (double)(amptable[dB]) * 0.001; | ||||
| } | } | ||||
| extern voice_t *wvoice; | |||||
| static klatt_peaks_t peaks[N_PEAKS]; | static klatt_peaks_t peaks[N_PEAKS]; | ||||
| static int end_wave; | static int end_wave; | ||||
| static int klattp[N_KLATTP]; | static int klattp[N_KLATTP]; | ||||
| static double klattp1[N_KLATTP]; | static double klattp1[N_KLATTP]; | ||||
| static double klattp_inc[N_KLATTP]; | static double klattp_inc[N_KLATTP]; | ||||
| static int Wavegen_Klatt(int resume) | |||||
| int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice) | |||||
| { | { | ||||
| if (resume == 0) | |||||
| SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | |||||
| int pk; | int pk; | ||||
| int x; | int x; | ||||
| int ix; | int ix; | ||||
| sample_count = 0; | sample_count = 0; | ||||
| while (sample_count < nsamples) { | while (sample_count < nsamples) { | ||||
| kt_frame.F0hz10 = (wdata.pitch * 10) / 4096; | |||||
| kt_frame.F0hz10 = (wdata->pitch * 10) / 4096; | |||||
| // formants F6,F7,F8 are fixed values for cascade resonators, set in KlattInit() | // formants F6,F7,F8 are fixed values for cascade resonators, set in KlattInit() | ||||
| // but F6 is used for parallel resonator | // but F6 is used for parallel resonator | ||||
| } | } | ||||
| // advance the pitch | // advance the pitch | ||||
| wdata.pitch_ix += wdata.pitch_inc; | |||||
| if ((ix = wdata.pitch_ix>>8) > 127) ix = 127; | |||||
| x = wdata.pitch_env[ix] * wdata.pitch_range; | |||||
| wdata.pitch = (x>>8) + wdata.pitch_base; | |||||
| wdata->pitch_ix += wdata->pitch_inc; | |||||
| if ((ix = wdata->pitch_ix>>8) > 127) ix = 127; | |||||
| x = wdata->pitch_env[ix] * wdata->pitch_range; | |||||
| wdata->pitch = (x>>8) + wdata->pitch_base; | |||||
| kt_globals.nspfr = (nsamples - sample_count); | kt_globals.nspfr = (nsamples - sample_count); | ||||
| if (kt_globals.nspfr > STEPSIZE) | if (kt_globals.nspfr > STEPSIZE) | ||||
| frame_init(&kt_frame); // get parameters for next frame of speech | frame_init(&kt_frame); // get parameters for next frame of speech | ||||
| if (parwave(&kt_frame) == 1) | |||||
| if (parwave(&kt_frame, wdata) == 1) | |||||
| return 1; // output buffer is full | return 1; // output buffer is full | ||||
| } | } | ||||
| end_wave = 0; | end_wave = 0; | ||||
| sample_count -= fade; | sample_count -= fade; | ||||
| kt_globals.nspfr = fade; | kt_globals.nspfr = fade; | ||||
| if (parwave(&kt_frame) == 1) | |||||
| if (parwave(&kt_frame, wdata) == 1) | |||||
| return 1; // output buffer is full | return 1; // output buffer is full | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *v, int control) | |||||
| static void SetSynth_Klatt(int length, frame_t *fr1, frame_t *fr2, voice_t *wvoice, int control) | |||||
| { | { | ||||
| int ix; | int ix; | ||||
| double next; | double next; | ||||
| nsamples = length; | nsamples = length; | ||||
| for (ix = 1; ix < 6; ix++) { | for (ix = 1; ix < 6; ix++) { | ||||
| peaks[ix].freq1 = (fr1->ffreq[ix] * v->freq[ix] / 256.0) + v->freqadd[ix]; | |||||
| peaks[ix].freq1 = (fr1->ffreq[ix] * wvoice->freq[ix] / 256.0) + wvoice->freqadd[ix]; | |||||
| peaks[ix].freq = (int)peaks[ix].freq1; | peaks[ix].freq = (int)peaks[ix].freq1; | ||||
| next = (fr2->ffreq[ix] * v->freq[ix] / 256.0) + v->freqadd[ix]; | |||||
| next = (fr2->ffreq[ix] * wvoice->freq[ix] / 256.0) + wvoice->freqadd[ix]; | |||||
| peaks[ix].freq_inc = ((next - peaks[ix].freq1) * STEPSIZE) / length; | peaks[ix].freq_inc = ((next - peaks[ix].freq1) * STEPSIZE) / length; | ||||
| if (ix < 4) { | if (ix < 4) { | ||||
| } | } | ||||
| } | } | ||||
| int Wavegen_Klatt2(int length, int resume, frame_t *fr1, frame_t *fr2) | |||||
| { | |||||
| if (resume == 0) | |||||
| SetSynth_Klatt(length, fr1, fr2, wvoice, 1); | |||||
| return Wavegen_Klatt(resume); | |||||
| } | |||||
| void KlattInit() | void KlattInit() | ||||
| { | { | ||||
| void KlattInit(void); | void KlattInit(void); | ||||
| void KlattReset(int control); | void KlattReset(int control); | ||||
| int Wavegen_Klatt2(int length, int resume, frame_t *fr1, frame_t *fr2); | |||||
| int Wavegen_Klatt(int length, int resume, frame_t *fr1, frame_t *fr2, WGEN_DATA *wdata, voice_t *wvoice); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } |
| // Several phoneme tables may be loaded into memory. phoneme_tab points to | // Several phoneme tables may be loaded into memory. phoneme_tab points to | ||||
| // one for the current voice | // one for the current voice | ||||
| extern int n_phoneme_tab; | extern int n_phoneme_tab; | ||||
| extern int current_phoneme_table; | |||||
| extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | extern PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | ||||
| extern unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited | |||||
| typedef struct { | typedef struct { | ||||
| char name[N_PHONEME_TAB_NAME]; | char name[N_PHONEME_TAB_NAME]; |
| unsigned char *outbuf = NULL; | unsigned char *outbuf = NULL; | ||||
| int outbuf_size = 0; | int outbuf_size = 0; | ||||
| unsigned char *out_start; | |||||
| espeak_EVENT *event_list = NULL; | espeak_EVENT *event_list = NULL; | ||||
| int event_list_ix = 0; | int event_list_ix = 0; |
| // copy the current phoneme table into here | // copy the current phoneme table into here | ||||
| int n_phoneme_tab; | int n_phoneme_tab; | ||||
| int current_phoneme_table; | |||||
| PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | PHONEME_TAB *phoneme_tab[N_PHONEME_TAB]; | ||||
| unsigned char phoneme_tab_flags[N_PHONEME_TAB]; // bit 0: not inherited | |||||
| unsigned short *phoneme_index = NULL; | unsigned short *phoneme_index = NULL; | ||||
| char *phondata_ptr = NULL; | char *phondata_ptr = NULL; | ||||
| int wavefile_amp; | int wavefile_amp; | ||||
| int seq_len_adjust; | int seq_len_adjust; | ||||
| int vowel_transition[4]; | |||||
| static espeak_ng_STATUS ReadPhFile(void **ptr, const char *fname, int *size, espeak_ng_ERROR_CONTEXT *context) | static espeak_ng_STATUS ReadPhFile(void **ptr, const char *fname, int *size, espeak_ng_ERROR_CONTEXT *context) | ||||
| { | { | ||||
| return (unsigned char *)&phondata_ptr[index]; | return (unsigned char *)&phondata_ptr[index]; | ||||
| } | } | ||||
| static void SetUpPhonemeTable(int number, bool recursing) | |||||
| static void SetUpPhonemeTable(int number) | |||||
| { | { | ||||
| int ix; | int ix; | ||||
| int includes; | int includes; | ||||
| int ph_code; | int ph_code; | ||||
| PHONEME_TAB *phtab; | PHONEME_TAB *phtab; | ||||
| if (recursing == false) | |||||
| memset(phoneme_tab_flags, 0, sizeof(phoneme_tab_flags)); | |||||
| if ((includes = phoneme_tab_list[number].includes) > 0) { | if ((includes = phoneme_tab_list[number].includes) > 0) { | ||||
| // recursively include base phoneme tables | // recursively include base phoneme tables | ||||
| SetUpPhonemeTable(includes-1, true); | |||||
| SetUpPhonemeTable(includes - 1); | |||||
| } | } | ||||
| // now add the phonemes from this table | // now add the phonemes from this table | ||||
| phoneme_tab[ph_code] = &phtab[ix]; | phoneme_tab[ph_code] = &phtab[ix]; | ||||
| if (ph_code > n_phoneme_tab) | if (ph_code > n_phoneme_tab) | ||||
| n_phoneme_tab = ph_code; | n_phoneme_tab = ph_code; | ||||
| if (recursing == 0) | |||||
| phoneme_tab_flags[ph_code] |= 1; // not inherited | |||||
| } | } | ||||
| } | } | ||||
| void SelectPhonemeTable(int number) | |||||
| int SelectPhonemeTable(int number) | |||||
| { | { | ||||
| n_phoneme_tab = 0; | n_phoneme_tab = 0; | ||||
| SetUpPhonemeTable(number, false); // recursively for included phoneme tables | |||||
| SetUpPhonemeTable(number); // recursively for included phoneme tables | |||||
| n_phoneme_tab++; | n_phoneme_tab++; | ||||
| current_phoneme_table = number; | |||||
| return number; | |||||
| } | } | ||||
| int LookupPhonemeTable(const char *name) | int LookupPhonemeTable(const char *name) |
| int NumInstnWords(unsigned short *prog); | int NumInstnWords(unsigned short *prog); | ||||
| int PhonemeCode(unsigned int mnem); | int PhonemeCode(unsigned int mnem); | ||||
| void SelectPhonemeTable(int number); | |||||
| int SelectPhonemeTable(int number); | |||||
| int SelectPhonemeTableName(const char *name); | int SelectPhonemeTableName(const char *name); | ||||
| #ifdef __cplusplus | #ifdef __cplusplus |
| #include "synthesize.h" | #include "synthesize.h" | ||||
| #include "translate.h" | #include "translate.h" | ||||
| extern FILE *f_log; | |||||
| static void SmoothSpect(void); | static void SmoothSpect(void); | ||||
| // list of phonemes in a clause | // list of phonemes in a clause | ||||
| syllable_end = wcmdq_tail; | syllable_end = wcmdq_tail; | ||||
| SmoothSpect(); | SmoothSpect(); | ||||
| syllable_centre = -1; | syllable_centre = -1; | ||||
| memset(vowel_transition, 0, sizeof(vowel_transition)); | |||||
| } | } | ||||
| } | } | ||||
| syllable_end = wcmdq_tail; | syllable_end = wcmdq_tail; | ||||
| syllable_centre = -1; | syllable_centre = -1; | ||||
| last_pitch_cmd = -1; | last_pitch_cmd = -1; | ||||
| memset(vowel_transition, 0, sizeof(vowel_transition)); | |||||
| memset(&worddata, 0, sizeof(worddata)); | memset(&worddata, 0, sizeof(worddata)); | ||||
| DoPause(0, 0); // isolate from the previous clause | DoPause(0, 0); // isolate from the previous clause | ||||
| } | } | ||||
| return 0; // finished the phoneme list | return 0; // finished the phoneme list | ||||
| } | } | ||||
| static int current_phoneme_table; | |||||
| int SpeakNextClause(int control) | int SpeakNextClause(int control) | ||||
| { | { | ||||
| // Speak text from memory (text_in) | // Speak text from memory (text_in) | ||||
| } | } | ||||
| if (current_phoneme_table != voice->phoneme_tab_ix) | if (current_phoneme_table != voice->phoneme_tab_ix) | ||||
| SelectPhonemeTable(voice->phoneme_tab_ix); | |||||
| current_phoneme_table = SelectPhonemeTable(voice->phoneme_tab_ix); | |||||
| // read the next clause from the input text file, translate it, and generate | // read the next clause from the input text file, translate it, and generate | ||||
| // entries in the wavegen command queue | // entries in the wavegen command queue |
| extern int wavefile_ix; | extern int wavefile_ix; | ||||
| extern int wavefile_amp; | extern int wavefile_amp; | ||||
| extern int vowel_transition[4]; | |||||
| #define N_ECHO_BUF 5500 // max of 250mS at 22050 Hz | #define N_ECHO_BUF 5500 // max of 250mS at 22050 Hz | ||||
| extern int echo_head; | extern int echo_head; | ||||
| extern SPEED_FACTORS speed; | extern SPEED_FACTORS speed; | ||||
| extern unsigned char *out_ptr; | extern unsigned char *out_ptr; | ||||
| extern unsigned char *out_start; | |||||
| extern unsigned char *out_end; | extern unsigned char *out_end; | ||||
| extern espeak_EVENT *event_list; | extern espeak_EVENT *event_list; | ||||
| extern t_espeak_callback *synth_callback; | extern t_espeak_callback *synth_callback; |
| #define N_WAV_BUF 10 | #define N_WAV_BUF 10 | ||||
| static void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v); | |||||
| voice_t *wvoice = NULL; | voice_t *wvoice = NULL; | ||||
| FILE *f_log = NULL; | |||||
| static int option_harmonic1 = 10; | static int option_harmonic1 = 10; | ||||
| static int flutter_amp = 64; | static int flutter_amp = 64; | ||||
| static double two_pi_t; | static double two_pi_t; | ||||
| unsigned char *out_ptr; | unsigned char *out_ptr; | ||||
| unsigned char *out_start; | |||||
| unsigned char *out_end; | unsigned char *out_end; | ||||
| espeak_ng_OUTPUT_HOOKS* output_hooks = NULL; | espeak_ng_OUTPUT_HOOKS* output_hooks = NULL; | ||||
| return value; | return value; | ||||
| } | } | ||||
| static int Wavegen() | |||||
| static int Wavegen(int length, int modulation, bool resume, frame_t *fr1, frame_t *fr2, voice_t *wvoice) | |||||
| { | { | ||||
| if (resume == false) | |||||
| SetSynth(length, modulation, fr1, fr2, wvoice); | |||||
| if (wvoice == NULL) | if (wvoice == NULL) | ||||
| return 0; | return 0; | ||||
| } | } | ||||
| } | } | ||||
| static int Wavegen2(int length, int modulation, bool resume, frame_t *fr1, frame_t *fr2) | |||||
| { | |||||
| if (resume == false) | |||||
| SetSynth(length, modulation, fr1, fr2, wvoice); | |||||
| return Wavegen(); | |||||
| } | |||||
| void Write4Bytes(FILE *f, int value) | void Write4Bytes(FILE *f, int value) | ||||
| { | { | ||||
| // Write 4 bytes to a file, least significant first | // Write 4 bytes to a file, least significant first | ||||
| wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | ||||
| case WCMD_SPECT: | case WCMD_SPECT: | ||||
| echo_complete = echo_length; | echo_complete = echo_length; | ||||
| result = Wavegen2(length & 0xffff, q[1] >> 16, resume, (frame_t *)q[2], (frame_t *)q[3]); | |||||
| result = Wavegen(length & 0xffff, q[1] >> 16, resume, (frame_t *)q[2], (frame_t *)q[3], wvoice); | |||||
| break; | break; | ||||
| #ifdef INCLUDE_KLATT | #ifdef INCLUDE_KLATT | ||||
| case WCMD_KLATT2: // as WCMD_SPECT but stop any concurrent wave file | case WCMD_KLATT2: // as WCMD_SPECT but stop any concurrent wave file | ||||
| wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | wdata.n_mix_wavefile = 0; // ... and drop through to WCMD_SPECT case | ||||
| case WCMD_KLATT: | case WCMD_KLATT: | ||||
| echo_complete = echo_length; | echo_complete = echo_length; | ||||
| result = Wavegen_Klatt2(length & 0xffff, resume, (frame_t *)q[2], (frame_t *)q[3]); | |||||
| result = Wavegen_Klatt(length & 0xffff, resume, (frame_t *)q[2], (frame_t *)q[3], &wdata, wvoice); | |||||
| break; | break; | ||||
| #endif | #endif | ||||
| case WCMD_MARKER: | case WCMD_MARKER: |