Browse Source

Fixes for Klatt synthesizer


git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@234 d46cf337-b52f-0410-862d-fd96e6ae7743
master
jonsd 15 years ago
parent
commit
dd1623232f
10 changed files with 144 additions and 114 deletions
  1. 2
    2
      src/compiledata.cpp
  2. 2
    2
      src/formantdlg.cpp
  3. 66
    28
      src/klatt.cpp
  4. 14
    14
      src/klatt.h
  5. 10
    11
      src/spect.cpp
  6. 1
    2
      src/spect.h
  7. 45
    53
      src/spectseq.cpp
  8. 1
    1
      src/synthdata.cpp
  9. 2
    1
      src/synthesize.cpp
  10. 1
    0
      src/wavegen.cpp

+ 2
- 2
src/compiledata.cpp View File

@@ -890,7 +890,7 @@ int Compile::LoadWavefile(FILE *f, const char *fname)
c2 = c3 << 24;
c2 = c2 >> 16; // sign extend

sample = (c1 & 0xff) + c2;;
sample = (c1 & 0xff) + c2;

if(sample > max)
max = sample;
@@ -2210,7 +2210,7 @@ wxString CompileAllDictionaries()
if(f_phused != NULL)
fclose(f_phused);

LoadVoice(save_voice_name,1);
LoadVoiceVariant(save_voice_name,0);

if(errors == 0)
return(wxString::Format(_T(" Compiled %d dictionaries"),dict_count));

+ 2
- 2
src/formantdlg.cpp View File

@@ -277,9 +277,9 @@ void FormantDlg::GetValues(SpectSeq *spectseq, int frame)
}
}

for(ix=1; ix < 6; ix++)
for(ix=1; ix < 7; ix++)
{
if(ix < 3)
if(ix < 4)
{
num = 0;
value = t_klt_bw[ix]->GetValue();

+ 66
- 28
src/klatt.cpp View File

@@ -245,12 +245,13 @@ static int parwave(klatt_frame_ptr frame)

flutter(frame); /* add f0 flutter */

#ifdef deleted
#ifdef LOG_FRAMES
if(option_log_frames)
{
FILE *f;
f=fopen("klatt_log","a");
fprintf(f,"%4dhz %2dAV %4d %3d, %4d %3d, %4d %3d, %4d %3d, %4d, %3d, %4d %3d TLT=%2d\n",frame->F0hz10,frame->AVdb,
frame->F1hz,frame->B1hz,frame->F2hz,frame->B2hz,frame->F3hz,frame->B3hz,frame->F4hz,frame->B4hz,frame->F5hz,frame->B5hz,frame->F6hz,frame->B6hz,frame->TLTdb);
f=fopen("log-klatt","a");
fprintf(f,"%4dhz %2dAV %4d %3d, %4d %3d, %4d %3d, %4d %3d, %4d, %3d, FNZ=%3d TLT=%2d\n",frame->F0hz10,frame->AVdb,
frame->Fhz[1],frame->Bhz[1],frame->Fhz[2],frame->Bhz[2],frame->Fhz[3],frame->Bhz[3],frame->Fhz[4],frame->Bhz[4],frame->Fhz[5],frame->Bhz[5],frame->Fhz[0],frame->TLTdb);
fclose(f);
}
#endif
@@ -468,7 +469,7 @@ static int parwave(klatt_frame_ptr frame)
temp = 32767.0;
}
*out_ptr++ = int(temp); // **JSD
*out_ptr++ = int(temp);
*out_ptr++ = int(temp) >> 8;
sample_count++;
if(out_ptr >= out_end)
@@ -492,15 +493,28 @@ to zero.
static void reset_resonators()
{
int r_ix;
#ifdef LOG_FRAMES
if(option_log_frames)
{
FILE *f_log;
f_log=fopen("log-klatt","a");
if(f_log != NULL)
{
fprintf(f_log,"Reset\n");
fclose(f_log);
}
}
#endif

for(r_ix=0; r_ix < N_RSN; r_ix++)
for(r_ix=0; r_ix <= R6p; r_ix++)
{
kt_globals.rsn[r_ix].p1 = 0;
kt_globals.rsn[r_ix].p2 = 0;
}
}

static void parwave_init()

void KlattReset()
{
kt_globals.FLPhz = (950 * kt_globals.samrate) / 10000;
kt_globals.BLPhz = (630 * kt_globals.samrate) / 10000;
@@ -556,7 +570,7 @@ static void frame_init(klatt_frame_ptr frame)
kt_globals.amp_gain0 = DBtoLIN(Gain0_tmp) / kt_globals.scale_wav;
/* Set coefficients of variable cascade resonators */
for(ix=0; ix<=8; ix++)
for(ix=1; ix<=9; ix++)
{
// formants 1 to 8, plus nasal pole
setabc(frame->Fhz[ix],frame->Bhz[ix],&(kt_globals.rsn[ix]));
@@ -1122,12 +1136,29 @@ void SetSynth_Klatt(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v
}
}

#ifdef LOG_FRAMES
if(option_log_frames)
{
//FILE *f;
//f=fopen("klatt_log","a");
//fprintf(f,"len %4d (%3d %4d %4d) (%3d %4d %4d)\n",length,fr1->ffreq[1],fr1->ffreq[2],fr1->ffreq[3],fr2->ffreq[1],fr2->ffreq[2],fr2->ffreq[3]);
//fclose(f);
FILE *f_log;
f_log=fopen("log-espeakedit","a");
if(f_log != NULL)
{
fprintf(f_log,"K %3dmS %3d %3d %4d %4d %4d %4d (%2d) to %3d %3d %4d %4d %4d %4d (%2d)\n",length*1000/samplerate,
fr1->klattp[KLATT_FNZ]*2,fr1->ffreq[1],fr1->ffreq[2],fr1->ffreq[3],fr1->ffreq[4],fr1->ffreq[5], fr1->klattp[KLATT_AV],
fr2->klattp[KLATT_FNZ]*2,fr2->ffreq[1],fr2->ffreq[2],fr2->ffreq[3],fr1->ffreq[4],fr1->ffreq[5], fr2->klattp[KLATT_AV] );
fclose(f_log);
}
f_log=fopen("log-klatt","a");
if(f_log != NULL)
{
fprintf(f_log,"K %3dmS %3d %3d %4d %4d (%2d) to %3d %3d %4d %4d (%2d)\n",length*1000/samplerate,
fr1->klattp[KLATT_FNZ]*2,fr1->ffreq[1],fr1->ffreq[2],fr1->ffreq[3], fr1->klattp[KLATT_AV],
fr2->klattp[KLATT_FNZ]*2,fr2->ffreq[1],fr2->ffreq[2],fr2->ffreq[3], fr2->klattp[KLATT_AV] );
fclose(f_log);
}
}
#endif

if(control & 1)
{
@@ -1135,24 +1166,25 @@ void SetSynth_Klatt(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v
{
// A break, not following on from another synthesized sound.
// Reset the synthesizer
//reset_resonators(&kt_globals);
parwave_init();
KlattReset();
}
else
{
if((prev_fr.ffreq[1] != fr1->ffreq[1]) || (prev_fr.ffreq[2] != fr1->ffreq[2]))
for(ix=1; ix<6; ix++)
{

// fade out to avoid a click, but only up to the end of output buffer
ix = (out_end - out_ptr)/2;
if(ix > 64)
ix = 64;
kt_globals.fadeout = ix;
kt_globals.nspfr = ix;
parwave(&kt_frame);

//reset_resonators(&kt_globals);
parwave_init();
if(prev_fr.ffreq[ix] != fr1->ffreq[ix])
{
// Discontinuity in formants.
// fade out to avoid a click, but only up to the end of output buffer
ix = (out_end - out_ptr)/2;
if(ix > 64)
ix = 64;
kt_globals.fadeout = ix;
kt_globals.nspfr = ix;
parwave(&kt_frame);
reset_resonators();
break;
}
}
}
wdata.prev_was_synth = 1;
@@ -1203,8 +1235,14 @@ void SetSynth_Klatt(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v

// nasal zero frequency
peaks[0].freq1 = fr1->klattp[KLATT_FNZ] * 2;
if(peaks[0].freq1 == 0)
peaks[0].freq1 = kt_frame.Fhz[F_NP]; // if no nasal zero, set it to same freq as nasal pole

peaks[0].freq = int(peaks[0].freq1);
next = fr2->klattp[KLATT_FNZ] * 2;
if(next == 0)
next = kt_frame.Fhz[F_NP];

peaks[0].freq_inc = ((next - peaks[0].freq1) * STEPSIZE) / length;

peaks[0].bw1 = 89;
@@ -1275,7 +1313,7 @@ void KlattInit()
kt_globals.outsl = 0;
kt_globals.f0_flutter = 20;

parwave_init();
KlattReset();

// set default values for frame parameters
for(ix=0; ix<=9; ix++)
@@ -1297,7 +1335,7 @@ void KlattInit()
kt_frame.Kskew = 0;
kt_frame.AB = 0;
kt_frame.AVpdb = 0;
kt_frame.Gain0 = 60; // 62
kt_frame.Gain0 = 62; // 60
} // end of KlattInit

#endif // INCLUDE_KLATT

+ 14
- 14
src/klatt.h View File

@@ -68,7 +68,7 @@ typedef struct
int scale_wav; // depends on the voicing source

#define N_RSN 20
#define Rnpc 0
#define Rnz 0 // nasal zero, anti-resonator
#define R1c 1
#define R2c 2
#define R3c 3
@@ -77,7 +77,7 @@ typedef struct
#define R6c 6
#define R7c 7
#define R8c 8
#define Rnz 9
#define Rnpc 9 // nasal pole

#define Rparallel 10
#define Rnpp 10
@@ -111,25 +111,25 @@ typedef struct

typedef struct
{
long F0hz10; /* Voicing fund freq in Hz */
long AVdb; /* Amp of voicing in dB, 0 to 70 */
int F0hz10; /* Voicing fund freq in Hz */
int AVdb; /* Amp of voicing in dB, 0 to 70 */
int Fhz[10]; // formant Hz, F_NZ to F6 to F_NP
int Bhz[10];
int Ap[10]; /* Amp of parallel formants in dB, 0 to 80 */
int Bphz[10]; /* Parallel formants bw in Hz, 40 to 1000 */

long ASP; /* Amp of aspiration in dB, 0 to 70 */
long Kopen; /* # of samples in open period, 10 to 65 */
long Aturb; /* Breathiness in voicing, 0 to 80 */
long TLTdb; /* Voicing spectral tilt in dB, 0 to 24 */
long AF; /* Amp of frication in dB, 0 to 80 */
long Kskew; /* Skewness of alternate periods, 0 to 40 in sample#/2 */
int ASP; /* Amp of aspiration in dB, 0 to 70 */
int Kopen; /* # of samples in open period, 10 to 65 */
int Aturb; /* Breathiness in voicing, 0 to 80 */
int TLTdb; /* Voicing spectral tilt in dB, 0 to 24 */
int AF; /* Amp of frication in dB, 0 to 80 */
int Kskew; /* Skewness of alternate periods, 0 to 40 in sample#/2 */

long AB; /* Amp of bypass fric. in dB, 0 to 80 */
long AVpdb; /* Amp of voicing, par in dB, 0 to 70 */
long Gain0; /* Overall gain, 60 dB is unity, 0 to 60 */
int AB; /* Amp of bypass fric. in dB, 0 to 80 */
int AVpdb; /* Amp of voicing, par in dB, 0 to 70 */
int Gain0; /* Overall gain, 60 dB is unity, 0 to 60 */

long AVdb_tmp; //copy of AVdb, which is changed within parwave()
int AVdb_tmp; //copy of AVdb, which is changed within parwave()
int Fhz_next[10]; // Fhz for the next chunk, so we can do interpolation of resonator (a,b,c) parameters
int Bhz_next[10];
} klatt_frame_t, *klatt_frame_ptr;

+ 10
- 11
src/spect.cpp View File

@@ -288,7 +288,6 @@ int SpectFrame::Load(wxInputStream& stream, int file_format_type)
int x;
unsigned short *spect_data;

file_format = file_format_type;
wxDataInputStream s(stream);

time = s.ReadDouble();
@@ -299,10 +298,10 @@ int SpectFrame::Load(wxInputStream& stream, int file_format_type)
markers = s.Read16();
amp_adjust = s.Read16();

if(file_format == 2)
if(file_format_type == 2)
{
s.Read16(); // spare
s.Read16(); // spare
ix = s.Read16(); // spare
ix = s.Read16(); // spare
}

for(ix=0; ix<N_PEAKS; ix++)
@@ -315,7 +314,7 @@ int SpectFrame::Load(wxInputStream& stream, int file_format_type)
peaks[ix].pkwidth = s.Read16();
peaks[ix].pkright = s.Read16();

if(file_format == 2)
if(file_format_type == 2)
{
peaks[ix].klt_bw = s.Read16();
peaks[ix].klt_ap = s.Read16();
@@ -323,7 +322,7 @@ int SpectFrame::Load(wxInputStream& stream, int file_format_type)
}
}

if(file_format > 0)
if(file_format_type > 0)
{
for(ix=0; ix<N_KLATTP2; ix++)
{
@@ -351,8 +350,8 @@ int SpectFrame::Load(wxInputStream& stream, int file_format_type)
} // End of SpectFrame::Load


int SpectFrame::Save(wxOutputStream& stream)
{//=========================================
int SpectFrame::Save(wxOutputStream& stream, int file_format_type)
{//===============================================================
int ix;

wxDataOutputStream s(stream);
@@ -365,7 +364,7 @@ int SpectFrame::Save(wxOutputStream& stream)
s.Write16(markers);
s.Write16(amp_adjust);

if(file_format == 2)
if(file_format_type == 2)
{
s.Write16(0); // spare
s.Write16(0); // spare
@@ -381,7 +380,7 @@ int SpectFrame::Save(wxOutputStream& stream)
s.Write16(peaks[ix].pkwidth);
s.Write16(peaks[ix].pkright);

if(file_format == 2)
if(file_format_type == 2)
{
s.Write16(peaks[ix].klt_bw);
s.Write16(peaks[ix].klt_ap);
@@ -389,7 +388,7 @@ int SpectFrame::Save(wxOutputStream& stream)
}
}

if(file_format > 0)
if(file_format_type > 0)
{
for(ix=0; ix<N_KLATTP2; ix++)
{

+ 1
- 2
src/spect.h View File

@@ -135,7 +135,7 @@ public:
int Import(wxInputStream &stream);
int ImportSPC2(wxInputStream &stream, float &time_acc);
int Load(wxInputStream &stream, int file_format_type);
int Save(wxOutputStream &stream);
int Save(wxOutputStream &stream, int file_format_type);
void Draw(wxDC &dc, int offy, int frame_width, double scalex, double scaley);

void ZeroPeaks();
@@ -161,7 +161,6 @@ public:
int nx;
int markers;
int max_y;
int file_format; // 0=eSpeak, 1=Klatt, 2=eSpeak+Klatt
USHORT *spect; // sqrt of harmonic amplitudes, 1-nx at 'pitch'

short klatt_param[N_KLATTP2];

+ 45
- 53
src/spectseq.cpp View File

@@ -41,6 +41,7 @@ extern int Wavegen_Klatt(int resume);
extern void SetSynth(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v);
extern int Wavegen();
extern void CloseWaveFile2();
extern void KlattReset();
extern FILE *f_wave;

static int frame_width;
@@ -114,11 +115,9 @@ void MakeWaveFile(int synthesis_method)
out_ptr = out_start = wav_outbuf;
out_end = &wav_outbuf[sizeof(wav_outbuf)];

#ifdef KLATT_TEST
if(synthesis_method == 1)
result = Wavegen_Klatt(resume);
else
#endif
result = Wavegen();

if(f_wave != NULL)
@@ -559,9 +558,12 @@ int SpectSeq::Save(wxOutputStream &stream, int selection)
wxDataOutputStream s(stream);

file_format = 2; // inclue Klatt data in new saves

s.Write32(FILEID1_SPECTSEQ);
if(file_format == 2)
s.Write32(FILEID2_SPECTSQ2);
else
if(file_format == 1)
s.Write32(FILEID2_SPECTSEK);
else
@@ -576,7 +578,7 @@ int SpectSeq::Save(wxOutputStream &stream, int selection)
{
if((selection==0) || frames[ix]->selected)
{
if(frames[ix]->Save(stream) != 0) return(1);
if(frames[ix]->Save(stream, file_format) != 0) return(1);
}
}
return(0);
@@ -806,19 +808,12 @@ void SpectSeq::CopyDown(int frame, int direction)
{//==============================================
// Copy peaks down from next earlier/later keyframe
int f1;
int pk;

for(f1=frame+direction; f1>=0 && f1<numframes; f1 += direction)
{
if(frames[f1]->keyframe)
{
for(pk=0; pk<N_PEAKS; pk++)
{
frames[frame]->peaks[pk].pkfreq = frames[f1]->peaks[pk].pkfreq;
frames[frame]->peaks[pk].pkheight = frames[f1]->peaks[pk].pkheight;
frames[frame]->peaks[pk].pkwidth = frames[f1]->peaks[pk].pkwidth;
frames[frame]->peaks[pk].pkright = frames[f1]->peaks[pk].pkright;
}
memcpy(frames[frame]->peaks, frames[f1]->peaks, sizeof(frames[frame]->peaks));
memcpy(frames[frame]->klatt_param, frames[f1]->klatt_param, sizeof(frames[frame]->klatt_param));
break;
}
@@ -928,10 +923,19 @@ void PeaksToFrame(SpectFrame *sp1, peak_t *pks, frame_t *fr)
int ix;
int x;

fr->frflags = FRFLAG_KLATT;

for(ix=0; ix < 8; ix++)
{
if(ix < 7)
{
fr->ffreq[ix] = pks[ix].pkfreq;
fr->klatt_ap[ix] = pks[ix].klt_ap;
fr->klatt_bp[ix] = pks[ix].klt_bp/2;
}

if(ix < 4)
fr->bw[ix] = pks[ix].klt_bw/2;

fr->fheight[ix] = pks[ix].pkheight >> 6;
if(ix < 6)
@@ -953,6 +957,11 @@ void PeaksToFrame(SpectFrame *sp1, peak_t *pks, frame_t *fr)
{
fr->klattp[ix] = sp1->klatt_param[ix];
}
fr->klattp[KLATT_FNZ] = sp1->klatt_param[KLATT_FNZ]/2;
if(fr->fheight[1] == 0)
{
fr->klattp[KLATT_AV] -= 10; // fade in/out
}
}

static void SetSynth_mS(int length_mS, SpectFrame *sp1, SpectFrame *sp2, peak_t *pks1, peak_t *pks2, int control)
@@ -962,9 +971,11 @@ static void SetSynth_mS(int length_mS, SpectFrame *sp1, SpectFrame *sp2, peak_t
PeaksToFrame(sp1,pks1,&fr1);
PeaksToFrame(sp2,pks2,&fr2);

#ifdef KLATT_TEST
// SetSynth_Klatt((length_mS * samplerate) / 1000, 0, &fr1, &fr2, voice, control); // convert mS to samples
#endif
if(voice->klattv[0])
{
SetSynth_Klatt((length_mS * samplerate) / 1000, 0, &fr1, &fr2, voice, control); // convert mS to samples
}
else
{
SetSynth((length_mS * samplerate) / 1000, 0, &fr1, &fr2, voice); // convert mS to samples
}
@@ -992,9 +1003,12 @@ void SpectSeq::MakeWave(int start, int end, PitchEnvelope &pitch)
peak_t peaks2[N_PEAKS];
int synthesizer_type = 0;

#ifdef KLATT_TEST
KlattInit();
#endif
if(voice->klattv[0])
{
synthesizer_type = 1;
KlattReset();
}

SpeakNextClause(NULL,NULL,2); // stop speaking file

if(numframes==0) return;
@@ -1036,12 +1050,6 @@ KlattInit();
lfactor = 1;
}

// if((start==end) || (total_length == 0))
// {
// sp1->MakeWaveF(1, pitch, amplitude, duration);
// return;
// }

len_samples = int(((total_length * lfactor + 50) * samplerate) / 1000);
SetPitch(len_samples,pitch.env,9,44);

@@ -1066,14 +1074,8 @@ KlattInit();

if(first)
{
if(synthesizer_type == 1)
{
memcpy(peaks0,peaks1,sizeof(peaks0));
}
else
{
PeaksZero(peaks1,peaks0); // fade in
}
PeaksZero(peaks1,peaks0); // fade in

SetSynth_mS(20,sp1,sp1,peaks0,peaks1,0);
MakeWaveFile(synthesizer_type);
first=0;
@@ -1086,14 +1088,8 @@ KlattInit();
}
}

if(synthesizer_type == 1)
{
memcpy(peaks0,peaks2,sizeof(peaks0));
}
else
{
PeaksZero(peaks2,peaks0); // fade out
}
PeaksZero(peaks2,peaks0); // fade out

SetSynth_mS(30,sp2,sp2,peaks2,peaks0,2);
MakeWaveFile(synthesizer_type);

@@ -1126,9 +1122,12 @@ void SpectFrame::MakeWaveF(int control, PitchEnvelope &pitche, int amplitude, in
char *fname_speech;
int synthesizer_type = 0;

#ifdef KLATT_TEST
KlattInit();
#endif
if(voice->klattv[0])
{
synthesizer_type = 1;
KlattReset();
}

SpeakNextClause(NULL,NULL,2); // stop speaking file

length = duration;
@@ -1149,19 +1148,12 @@ KlattInit();
{
memcpy(peaks1,peaks,sizeof(peaks1));

if(synthesizer_type == 0)
{
for(ix=0; ix<N_PEAKS; ix++)
{
y = peaks1[ix].pkheight * amp_adjust * amplitude;
peaks1[ix].pkheight = y/10000;
}
PeaksZero(peaks1,peaks0);
}
else
for(ix=0; ix<N_PEAKS; ix++)
{
memcpy(peaks0,peaks1,sizeof(peaks0));
y = peaks1[ix].pkheight * amp_adjust * amplitude;
peaks1[ix].pkheight = y/10000;
}
PeaksZero(peaks1,peaks0);
SetSynth_mS(20,this,this,peaks0,peaks1,0);
MakeWaveFile(synthesizer_type);

+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.42.04 09.Dec.09";
const char *version_string = "1.42.09 13.Dec.09";
const int version_phdata = 0x014200;

int option_device_number = -1;

+ 2
- 1
src/synthesize.cpp View File

@@ -553,7 +553,8 @@ static short vcolouring[N_VCOLOUR][5] = {

if(voice->klattv[0])
{
fr->klattp[KLATT_AV] = 53; // reduce the amplituide of the start of a vowel
// fr->klattp[KLATT_AV] = 53; // reduce the amplituide of the start of a vowel
fr->klattp[KLATT_AV] = seq[1].frame->klattp[KLATT_AV] - 6;
}
if(f2 != 0)
{

+ 1
- 0
src/wavegen.cpp View File

@@ -754,6 +754,7 @@ void WavegenInit(int rate, int wavemult_fact)

#ifdef LOG_FRAMES
remove("log-espeakedit");
remove("log-klatt");
#endif
} // end of WavegenInit


Loading…
Cancel
Save