Browse Source

Merge branch 'development'

Conflicts:
	src/klatt.cpp
master
Reece H. Dunn 12 years ago
parent
commit
170ce4c560

+ 2
- 0
dictsource/en_list View File

@@ -2289,6 +2289,7 @@ pasting peIstIN
pastry peIstri
pate pat2eI
?3 patent pat@nt
pathe paTeI
pathetic $alt3
pathos peIT0s
patio patIoU
@@ -2996,6 +2997,7 @@ vaseline vas@li:n
veg vEdZ
vegetable vEdZI2t@b@L
vehicle vi@k@L
vega vi:g@
veggy vEdZI
vengeance vEndZ@ns
ventriloquist $2

+ 82
- 39
dictsource/hy_list View File

@@ -5,45 +5,88 @@
// ?1 West Armenian

// names of letters
_ա aIb
բ ben
գ gim
դ da
ե jetS
զ za
_է e
_ը '@t#
թ t#o
ժ Ze
ի ini
լ lyn
խ Xe
ծ tsa
կ ken
հ ho
ձ dza
ղ r"at
ճ ce
մ men
յ hi
ն nu
շ Sa
ո vo
չ tSa
պ pe
ջ dZe
ռ R2a
ս se
վ vev
տ tyn
ր re
ց ts#o
ւ hyn
փ p#yr
ք k#e
և jev
_օ o
ֆ fe

_ա a:
բ b'@
գ g'@
դ d'@
ե j'e
զ z'@
_է 'e:
_ը '@:
թ t#'@
ժ Z'@
ի 'i:
լ l'@
խ X'@
ծ ts'@
կ k'@
հ h'@
ձ dz'@
ղ r"'@
ճ tS'@
մ m'@
յ j'@
ն n'@
շ S'@
ո v'o
չ tS#'@
պ p'@
ջ dZ'@
ռ R2'@
ս s'@
վ v'@
տ t'@
ր r'@
ց ts#'@
ւ 'u:
փ p#'@
ք k#'@
և j'ev
_օ 'o:
ֆ f'@

// Note: Western Armenian consonants are transformed in hy-west voice file
?1 _ա aIb
?1 բ ben
?1 գ gim
?1 դ da
?1 ե jetS
?1 զ za
?1 _է e
?1 _ը '@t#
?1 թ t#o
?1 ժ Ze
?1 ի ini
?1 լ lyn
?1 խ Xe
?1 ծ tsa
?1 կ ken
?1 հ ho
?1 ձ dza
?1 ղ r"at
?1 ճ tSe
?1 մ men
?1 յ hi
?1 ն nu
?1 շ Sa
?1 ո vo
?1 չ tS#a
?1 պ pe
?1 ջ dZe
?1 ռ R2a
?1 ս se
?1 վ vev
?1 տ tyn
?1 ր re
?1 ց ts#o
?1 ւ hyn
?1 փ p#yr
?1 ք k#e
?1 և jev
?1 _օ o
?1 ֆ fe


_cap m,ets_|
_?? g,iR

+ 4
- 4
dictsource/hy_rules View File

@@ -89,8 +89,8 @@
_) ղ (C r"@

.group ճ
ճ c
_) ճ (C c@
ճ tS
_) ճ (C tS@

.group մ
մ m
@@ -125,8 +125,8 @@
?1 ոյ (_ o

.group չ
չ tS
_) չ (C tS@
չ tS#
_) չ (C tS#@

.group պ
պ p

+ 1
- 3
dictsource/pt_list View File

@@ -662,7 +662,6 @@ aeroportos $alt
alcateia $alt
alcova $alt2
alegro $alt2 $noun
alferes $alt
almoço $alt $verb
ameba $alt
amores $alt2
@@ -892,7 +891,6 @@ falangeta $alt2
farofa $alt
farolete $alt2
febre $alt
fera $alt
feitora $alt2
fezes $alt
filete $alt2
@@ -1020,7 +1018,6 @@ ordens $alt
osso $alt2
ossos $alt
ostra $alt2
ovelha $alt2
ovos $alt
pachorra $alt2
paexa paeSa
@@ -1041,6 +1038,7 @@ pelo $alt2
pelo $alt $verb
pereba $alt
peroba $alt
peses $alt
peseta $alt2
peso $alt $verb
piloto $alt2 $noun

+ 7
- 8
dictsource/pt_rules View File

@@ -186,7 +186,7 @@
?2 el (K EU
e (lh+ e
e (ll+ E
v) e (lhA_ E

e (bL02_ E

@@ -215,7 +215,7 @@
e (lL04_ E

i) e (r_ E
@s) e (r_ E
qu) e (r_ E

ib) e (rL04_ E
@@ -225,10 +225,11 @@
iv) e (rL04_ E
pod) e (rA_ e

@s) e (rL04_ E
@s) e (L05_ E

sp) e (rL04_ E
f) e (rL02_ E

f) e (rL04_ E

e (quL02_ E
e (rbL04_ E
@@ -237,7 +238,6 @@
e (rsL04_ E

e (sL02_ E

e (ssL04_ E
e (ss_ E // stress and english words
?2 C) ee (_ i
@@ -346,7 +346,6 @@
?1 v) e (l_ ,E // eg: possível, amovível, disponível, etc...
?1 l) e (ta 'E // eg: bicicleta, atleta.

sf) e (ra_ E // esfera, biosfera
ef) e (tuL02_ e
t) e (tra E

@@ -363,6 +362,7 @@
óCC) e (CA_ e
úC) e (CA_ e
úC) el (_ eU
ection (_ 'EkS&~N

//sort
_d) e (L05_ E
@@ -371,7 +371,6 @@
_vi) e (L05_ E
end) e (L05_ e // entenderam, etc.
fiz) e (L05_ E
pus) e (L05_ E // com- im- de-
tiv) e (L05_ E // es-
viv) e (L05_ e
_com) e (L05_ e
@@ -381,7 +380,6 @@
_exag) e (L05_ E
_houv) e (L05_ E
_mulh) e (L05_ E
_quis) e (L05_ E
_soub) e (L05_ E
_talh) e (L05_ E
_escond) e (L05_ e
@@ -427,6 +425,7 @@ _corro) e (L07_ e
_interromp) e (L07_ e
_troux) e (L07_ E
_ens) e (bL03_ E
_v) e (lhL03_ E
_amanc) e (bL03_ E
qu) e (brL04_ E
_cel) e (brL04_ E

+ 4
- 0
espeak-data/voices/!v/klatt4 View File

@@ -0,0 +1,4 @@
language variant
name klatt4
klatt 4

+ 2
- 2
espeak-data/voices/asia/hy-west View File

@@ -9,13 +9,13 @@ dictrules 1
replace 00 b p#
replace 00 d t#
replace 00 dz ts#
replace 00 dZ tS
replace 00 dZ tS#
replace 00 g k#

replace 00 p b
replace 00 t d
replace 00 ts dz
replace 00 c dZ
replace 00 tS dZ
replace 00 k g

replace 00 R2 R // ??

+ 13
- 2
phsource/ph_armenian View File

@@ -123,10 +123,21 @@ phoneme ts
endphoneme


phoneme c
import_phoneme consonants/c2
phoneme tS
vls pla afr sibilant
voicingswitch dZ
lengthmod 2
Vowelin f1=0 f2=2300 200 400 f3=-100 80

IF nextPh(isPause2) THEN
WAV(ustop/tsh_unasp, 60)
ENDIF
WAV(ustop/tsh_unasp)
endphoneme

phoneme tS#
import_phoneme base/tS
endphoneme

phoneme k
import_phoneme consonants/k-

+ 5
- 4
phsource/ph_hindi View File

@@ -396,13 +396,13 @@ phoneme t.#
lengthmod 2
ipa ʈU+02B0
voicingswitch d.#
Vowelin f1=0 f2=1800 -300 300 f3=-400 80
Vowelin f1=0 f2=1800 -300 300 f3=-200 80
Vowelout f1=0 f2=1800 -300 250 f3=-400 80 rms=20 colr=2
IF nextPhW(t.#) THEN
ChangePhoneme(t.)
ENDIF
WAV(ustop/th_rfx)
WAV(ustop/th_rfx2, 80)
endphoneme

phoneme d.
@@ -430,14 +430,15 @@ phoneme d.#
lengthmod 5
ipa ɖU+02B0
voicingswitch t.#
Vowelin f1=1 f2=1800 -300 300 f3=-150 80
Vowelin f1=1 f2=1800 -300 300 f3=-400 80
Vowelout f1=1 f2=1800 -300 300 f3=-400 80 colr=2

IF PreVoicing THEN
FMT(d/xdr)
ENDIF

FMT(d/dr2, 50) addWav(ustop/th_rfx2, 80)
FMT(d/dr) addWav(ustop/t)
// FMT(d/dr) addWav(ustop/th_rfx)
endphoneme



+ 1
- 1
phsource/ph_turkish View File

@@ -201,7 +201,7 @@ endphoneme


phoneme R
liquid trill rhotic
liquid trill
lengthmod 6
IF prevPhW(isVowel) AND nextPhW(isVowel) THEN
ChangePhoneme(*)

+ 4
- 4
phsource/phonemes View File

@@ -344,7 +344,7 @@ endphoneme


phoneme R
liquid rhotic
liquid
trill
lengthmod 6
ipa r
@@ -357,7 +357,7 @@ phoneme R
endphoneme

phoneme R2 // this is [R] from Slovak/Czech
liquid rhotic
liquid
trill
lengthmod 6
ipa r
@@ -367,7 +367,7 @@ phoneme R2 // this is [R] from Slovak/Czech
endphoneme

phoneme R3 // Afrikaans
liquid rhotic
liquid
trill
lengthmod 6
length 60
@@ -394,7 +394,7 @@ endphoneme


phoneme r" // uvular trill
vcd uvl frc rhotic
vcd uvl frc
ipa ʀ
lengthmod 6
IF nextPh(isNotVowel) THEN

+ 241
- 181
src/klatt.cpp View File

@@ -22,7 +22,7 @@
* <http://www.gnu.org/licenses/>. *
***************************************************************************/

// See URL: ftp://svr-ftp.eng.cam.ac.uk/pub/comp.speech/synthesis/klatt.3.04.tar.gz
// See URL: ftp://svr-ftp.eng.cam.ac.uk/pub/comp.speech/synthesis/klatt.3.04.tar.gz

#include "StdAfx.h"

@@ -58,19 +58,69 @@ static int sample_count;
/* function prototypes for functions private to this file */

static void flutter(klatt_frame_ptr);
static double sampled_source (void);
static double sampled_source (int);
static double impulsive_source (void);
static double natural_source (void);
static void pitch_synch_par_reset (klatt_frame_ptr);
static void pitch_synch_par_reset (klatt_frame_ptr);
static double gen_noise (double);
static double DBtoLIN (long);
static void frame_init (klatt_frame_ptr);
static void frame_init (klatt_frame_ptr);
static void setabc (long,long,resonator_ptr);
static void setzeroabc (long,long,resonator_ptr);

static klatt_frame_t kt_frame;
static klatt_global_t kt_globals;

#define NUMBER_OF_SAMPLES 100

static int scale_wav_tab[] = {45,38,45,45,55}; // scale output from different voicing sources

// For testing, this can be overwritten in KlattInit()
static short natural_samples2[256]= {
2583, 2516, 2450, 2384, 2319, 2254, 2191, 2127,
2067, 2005, 1946, 1890, 1832, 1779, 1726, 1675,
1626, 1579, 1533, 1491, 1449, 1409, 1372, 1336,
1302, 1271, 1239, 1211, 1184, 1158, 1134, 1111,
1089, 1069, 1049, 1031, 1013, 996, 980, 965,
950, 936, 921, 909, 895, 881, 869, 855,
843, 830, 818, 804, 792, 779, 766, 754,
740, 728, 715, 702, 689, 676, 663, 651,
637, 626, 612, 601, 588, 576, 564, 552,
540, 530, 517, 507, 496, 485, 475, 464,
454, 443, 434, 424, 414, 404, 394, 385,
375, 366, 355, 347, 336, 328, 317, 308,
299, 288, 280, 269, 260, 250, 240, 231,
220, 212, 200, 192, 181, 172, 161, 152,
142, 133, 123, 113, 105, 94, 86, 76,
67, 57, 49, 39, 30, 22, 11, 4,
-5, -14, -23, -32, -41, -50, -60, -69,
-78, -87, -96, -107, -115, -126, -134, -144,
-154, -164, -174, -183, -193, -203, -213, -222,
-233, -242, -252, -262, -271, -281, -291, -301,
-310, -320, -330, -339, -349, -357, -368, -377,
-387, -397, -406, -417, -426, -436, -446, -456,
-467, -477, -487, -499, -509, -521, -532, -543,
-555, -567, -579, -591, -603, -616, -628, -641,
-653, -666, -679, -692, -705, -717, -732, -743,
-758, -769, -783, -795, -808, -820, -834, -845,
-860, -872, -885, -898, -911, -926, -939, -955,
-968, -986, -999, -1018, -1034, -1054, -1072, -1094,
-1115, -1138, -1162, -1188, -1215, -1244, -1274, -1307,
-1340, -1377, -1415, -1453, -1496, -1538, -1584, -1631,
-1680, -1732, -1783, -1839, -1894, -1952, -2010, -2072,
-2133, -2196, -2260, -2325, -2390, -2456, -2522, -2589,
};
static short natural_samples[100]=
{
-310,-400,530,356,224,89,23,-10,-58,-16,461,599,536,701,770,
605,497,461,560,404,110,224,131,104,-97,155,278,-154,-1165,
-598,737,125,-592,41,11,-247,-10,65,92,80,-304,71,167,-1,122,
233,161,-43,278,479,485,407,266,650,134,80,236,68,260,269,179,
53,140,275,293,296,104,257,152,311,182,263,245,125,314,140,44,
203,230,-235,-286,23,107,92,-91,38,464,443,176,98,-784,-2449,
-1891,-1045,-1600,-1462,-1384,-1261,-949,-730
};

/*
function RESONATOR

@@ -81,7 +131,7 @@ is stored in the globals structure.
static double resonator(resonator_ptr r, double input)
{
double x;
x = (double) ((double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2);
r->p2 = (double)r->p1;
r->p1 = (double)x;
@@ -92,11 +142,11 @@ static double resonator(resonator_ptr r, double input)
static double resonator2(resonator_ptr r, double input)
{
double x;
x = (double) ((double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2);
r->p2 = (double)r->p1;
r->p1 = (double)x;
r->a += r->a_inc;
r->b += r->b_inc;
r->c += r->c_inc;
@@ -105,13 +155,13 @@ static double resonator2(resonator_ptr r, double input)



/*
/*
function ANTIRESONATOR

This is a generic anti-resonator function. The code is the same as resonator
except that a,b,c need to be set with setzeroabc() and we save inputs in
This is a generic anti-resonator function. The code is the same as resonator
except that a,b,c need to be set with setzeroabc() and we save inputs in
p1/p2 rather than outputs. There is currently only one of these - "rnz"
Output = (rnz.a * input) + (rnz.b * oldin1) + (rnz.c * oldin2)
Output = (rnz.a * input) + (rnz.b * oldin1) + (rnz.c * oldin2)
*/

#ifdef deleted
@@ -129,7 +179,7 @@ static double antiresonator2(resonator_ptr r, double input)
register double x = (double)r->a * (double)input + (double)r->b * (double)r->p1 + (double)r->c * (double)r->p2;
r->p2 = (double)r->p1;
r->p1 = (double)input;
r->a += r->a_inc;
r->b += r->b_inc;
r->c += r->c_inc;
@@ -143,7 +193,7 @@ function FLUTTER

This function adds F0 flutter, as specified in:

"Analysis, synthesis and perception of voice quality variations among
"Analysis, synthesis and perception of voice quality variations among
female and male talkers" D.H. Klatt and L.C. Klatt JASA 87(2) February 1990.

Flutter is added by applying a quasi-random element constructed from three
@@ -155,7 +205,7 @@ static void flutter(klatt_frame_ptr frame)
static int time_count;
double delta_f0;
double fla,flb,flc,fld,fle;
fla = (double) kt_globals.f0_flutter / 50;
flb = (double) kt_globals.original_f0 / 100;
// flc = sin(2*PI*12.7*time_count);
@@ -178,7 +228,7 @@ Allows the use of a glottal excitation waveform sampled from a real
voice.
*/

static double sampled_source()
static double sampled_source(int source_num)
{
int itemp;
double ftemp;
@@ -187,23 +237,35 @@ static double sampled_source()
int current_value;
int next_value;
double temp_diff;
short *samples;

if(source_num == 0)
{
samples = natural_samples;
kt_globals.num_samples = 100;
}
else
{
samples = natural_samples2;
kt_globals.num_samples = 256;
}

if(kt_globals.T0!=0)
{
ftemp = (double) kt_globals.nper;
ftemp = ftemp / kt_globals.T0;
ftemp = ftemp * kt_globals.num_samples;
itemp = (int) ftemp;
temp_diff = ftemp - (double) itemp;
current_value = kt_globals.natural_samples[itemp];
next_value = kt_globals.natural_samples[itemp+1];
current_value = samples[itemp];
next_value = samples[itemp+1];
diff_value = (double) next_value - (double) current_value;
diff_value = diff_value * temp_diff;
result = kt_globals.natural_samples[itemp] + diff_value;
result = samples[itemp] + diff_value;
result = result * kt_globals.sample_factor;
}
else
@@ -216,14 +278,14 @@ static double sampled_source()



/*
/*
function PARWAVE

Converts synthesis parameters to a waveform.
*/


static int parwave(klatt_frame_ptr frame)
static int parwave(klatt_frame_ptr frame)
{
double temp;
int value;
@@ -257,7 +319,7 @@ if(option_log_frames)

/* MAIN LOOP, for each output sample of current frame: */

for (kt_globals.ns=0; kt_globals.ns<kt_globals.nspfr; kt_globals.ns++)
for (kt_globals.ns=0; kt_globals.ns<kt_globals.nspfr; kt_globals.ns++)
{
/* Get low-passed random number for aspiration and frication noise */
noise = gen_noise(noise);
@@ -266,22 +328,22 @@ if(option_log_frames)
Amplitude modulate noise (reduce noise amplitude during
second half of glottal period) if voicing simultaneously present.
*/
if (kt_globals.nper > kt_globals.nmod)
if (kt_globals.nper > kt_globals.nmod)
{
noise *= (double) 0.5;
}
/* Compute frication noise */
frics = kt_globals.amp_frica * noise;
/*
Compute voicing waveform. Run glottal source simulation at 4
times normal sample rate to minimize quantization noise in
Compute voicing waveform. Run glottal source simulation at 4
times normal sample rate to minimize quantization noise in
period of female voice.
*/
for (n4=0; n4<4; n4++)
for (n4=0; n4<4; n4++)
{
switch(kt_globals.glsource)
{
@@ -289,64 +351,67 @@ if(option_log_frames)
voice = impulsive_source();
break;
case NATURAL:
voice = natural_source();
voice = natural_source();
break;
case SAMPLED:
voice = sampled_source();
voice = sampled_source(0);
break;
case SAMPLED2:
voice = sampled_source(1);
break;
}
/* Reset period when counter 'nper' reaches T0 */
if (kt_globals.nper >= kt_globals.T0)
if (kt_globals.nper >= kt_globals.T0)
{
kt_globals.nper = 0;
pitch_synch_par_reset(frame);
}
/*
Low-pass filter voicing waveform before downsampling from 4*samrate
to samrate samples/sec. Resonator f=.09*samrate, bw=.06*samrate
to samrate samples/sec. Resonator f=.09*samrate, bw=.06*samrate
*/
voice = resonator(&(kt_globals.rsn[RLP]),voice);
/* Increment counter that keeps track of 4*samrate samples per sec */
kt_globals.nper++;
}
/*
Tilt spectrum of voicing source down by soft low-pass filtering, amount
of tilt determined by TLTdb
*/
voice = (voice * kt_globals.onemd) + (vlast * kt_globals.decay);
vlast = voice;
/*
Add breathiness during glottal open phase. Amount of breathiness
determined by parameter Aturb Use nrand rather than noise because
noise is low-passed.
/*
Add breathiness during glottal open phase. Amount of breathiness
determined by parameter Aturb Use nrand rather than noise because
noise is low-passed.
*/
if (kt_globals.nper < kt_globals.nopen)
if (kt_globals.nper < kt_globals.nopen)
{
voice += kt_globals.amp_breth * kt_globals.nrand;
}
/* Set amplitude of voicing */
glotout = kt_globals.amp_voice * voice;
par_glotout = kt_globals.par_amp_voice * voice;
/* Compute aspiration amplitude and add to voicing source */
aspiration = kt_globals.amp_aspir * noise;
glotout += aspiration;
par_glotout += aspiration;
/*
/*
Cascade vocal tract, excited by laryngeal sources.
Nasal antiresonator, then formants FNP, F5, F4, F3, F2, F1
Nasal antiresonator, then formants FNP, F5, F4, F3, F2, F1
*/

out=0;
@@ -363,20 +428,20 @@ if(option_log_frames)
casc_next_in = resonator2(&(kt_globals.rsn[R2c]),casc_next_in);
out = resonator2(&(kt_globals.rsn[R1c]),casc_next_in);
}
/* Excite parallel F1 and FNP by voicing waveform */
sourc = par_glotout; /* Source is voicing plus aspiration */

/*
Standard parallel vocal tract Formants F6,F5,F4,F3,F2,
outputs added with alternating sign. Sound source for other
parallel resonators is frication plus first difference of
voicing waveform.
Standard parallel vocal tract Formants F6,F5,F4,F3,F2,
outputs added with alternating sign. Sound source for other
parallel resonators is frication plus first difference of
voicing waveform.
*/
out += resonator(&(kt_globals.rsn[R1p]),sourc);
out += resonator(&(kt_globals.rsn[Rnpp]),sourc);
sourc = frics + par_glotout - glotlast;
glotlast = par_glotout;

@@ -384,14 +449,14 @@ if(option_log_frames)
{
out = resonator(&(kt_globals.rsn[ix]),sourc) - out;
}
outbypas = kt_globals.amp_bypas * sourc;
out = outbypas - out;

#ifdef deleted
// for testing
if (kt_globals.outsl != 0)
if (kt_globals.outsl != 0)
{
switch(kt_globals.outsl)
{
@@ -401,7 +466,7 @@ if(option_log_frames)
case 2:
out = aspiration;
break;
case 3:
case 3:
out = frics;
break;
case 4:
@@ -429,7 +494,7 @@ if(option_log_frames)
int z2;
signed char c;
int sample;
z2 = 0;
if(wdata.mix_wavefile_ix < wdata.n_mix_wavefile)
{
@@ -466,7 +531,7 @@ if(option_log_frames)
{
value = -32768;
}
if (value > 32767)
{
value = 32767;
@@ -530,13 +595,13 @@ void KlattReset(int control)
}


/*
/*
function FRAME_INIT

Use parameters from the input frame to set up resonator coefficients.
*/

static void frame_init(klatt_frame_ptr frame)
static void frame_init(klatt_frame_ptr frame)
{
double amp_par[7];
static double amp_par_factor[7] = {0.6, 0.4, 0.15, 0.06, 0.04, 0.022, 0.03};
@@ -544,13 +609,13 @@ static void frame_init(klatt_frame_ptr frame)
int ix;

kt_globals.original_f0 = frame->F0hz10 / 10;
frame->AVdb_tmp = frame->AVdb - 7;
if (frame->AVdb_tmp < 0)
{
frame->AVdb_tmp = 0;
}
kt_globals.amp_aspir = DBtoLIN(frame->ASP) * 0.05;
kt_globals.amp_frica = DBtoLIN(frame->AF) * 0.25;
kt_globals.par_amp_voice = DBtoLIN(frame->AVpdb);
@@ -563,12 +628,12 @@ static void frame_init(klatt_frame_ptr frame)
}

Gain0_tmp = frame->Gain0 - 3;
if (Gain0_tmp <= 0)
if (Gain0_tmp <= 0)
{
Gain0_tmp = 57;
}
kt_globals.amp_gain0 = DBtoLIN(Gain0_tmp) / kt_globals.scale_wav;
/* Set coefficients of variable cascade resonators */
for(ix=1; ix<=9; ix++)
{
@@ -578,7 +643,7 @@ static void frame_init(klatt_frame_ptr frame)
if(ix <= 5)
{
setabc(frame->Fhz_next[ix],frame->Bhz_next[ix],&(kt_globals.rsn_next[ix]));
kt_globals.rsn[ix].a_inc = (kt_globals.rsn_next[ix].a - kt_globals.rsn[ix].a) / 64.0;
kt_globals.rsn[ix].b_inc = (kt_globals.rsn_next[ix].b - kt_globals.rsn[ix].b) / 64.0;
kt_globals.rsn[ix].c_inc = (kt_globals.rsn_next[ix].c - kt_globals.rsn[ix].c) / 64.0;
@@ -592,19 +657,19 @@ static void frame_init(klatt_frame_ptr frame)
kt_globals.rsn[F_NZ].b_inc = (kt_globals.rsn_next[F_NZ].b - kt_globals.rsn[F_NZ].b) / 64.0;
kt_globals.rsn[F_NZ].c_inc = (kt_globals.rsn_next[F_NZ].c - kt_globals.rsn[F_NZ].c) / 64.0;

/* Set coefficients of parallel resonators, and amplitude of outputs */
for(ix=0; ix<=6; ix++)
{
setabc(frame->Fhz[ix],frame->Bphz[ix],&(kt_globals.rsn[Rparallel+ix]));
kt_globals.rsn[Rparallel+ix].a *= amp_par[ix];
}
/* output low-pass filter */
setabc((long)0.0,(long)(kt_globals.samrate/2),&(kt_globals.rsn[Rout]));
}


@@ -612,27 +677,27 @@ static void frame_init(klatt_frame_ptr frame)
/*
function IMPULSIVE_SOURCE

Generate a low pass filtered train of impulses as an approximation of
a natural excitation waveform. Low-pass filter the differentiated impulse
with a critically-damped second-order filter, time constant proportional
Generate a low pass filtered train of impulses as an approximation of
a natural excitation waveform. Low-pass filter the differentiated impulse
with a critically-damped second-order filter, time constant proportional
to Kopen.
*/


static double impulsive_source()
static double impulsive_source()
{
static double doublet[] = {0.0,13000000.0,-13000000.0};
static double vwave;
if (kt_globals.nper < 3)
if (kt_globals.nper < 3)
{
vwave = doublet[kt_globals.nper];
}
else
else
{
vwave = 0.0;
}
return(resonator(&(kt_globals.rsn[RGL]),vwave));
}

@@ -645,20 +710,20 @@ Vwave is the differentiated glottal flow waveform, there is a weak
spectral zero around 800 Hz, magic constants a,b reset pitch synchronously.
*/

static double natural_source()
static double natural_source()
{
double lgtemp;
static double vwave;
if (kt_globals.nper < kt_globals.nopen)
if (kt_globals.nper < kt_globals.nopen)
{
kt_globals.pulse_shape_a -= kt_globals.pulse_shape_b;
vwave += kt_globals.pulse_shape_a;
lgtemp=vwave * 0.028;
return(lgtemp);
}
else
else
{
vwave = 0.0;
return(0.0);
@@ -692,125 +757,125 @@ Assume voicing waveform V(t) has form: k1 t**2 - k2 t**3
potion of the voicing cycle "nopen".

Let integral of dV/dt have no net dc flow --> a = (b * nopen) / 3
Let maximum of dUg(n)/dn be constant --> b = gain / (nopen * nopen)
meaning as nopen gets bigger, V has bigger peak proportional to n

Thus, to generate the table below for 40 <= nopen <= 263:
B0[nopen - 40] = 1920000 / (nopen * nopen)
*/

static void pitch_synch_par_reset(klatt_frame_ptr frame)
static void pitch_synch_par_reset(klatt_frame_ptr frame)
{
long temp;
double temp1;
static long skew;
static short B0[224] =
static short B0[224] =
{
1200,1142,1088,1038, 991, 948, 907, 869, 833, 799, 768, 738, 710, 683, 658,
634, 612, 590, 570, 551, 533, 515, 499, 483, 468, 454, 440, 427, 415, 403,
391, 380, 370, 360, 350, 341, 332, 323, 315, 307, 300, 292, 285, 278, 272,
265, 259, 253, 247, 242, 237, 231, 226, 221, 217, 212, 208, 204, 199, 195,
192, 188, 184, 180, 177, 174, 170, 167, 164, 161, 158, 155, 153, 150, 147,
145, 142, 140, 137, 135, 133, 131, 128, 126, 124, 122, 120, 119, 117, 115,
145, 142, 140, 137, 135, 133, 131, 128, 126, 124, 122, 120, 119, 117, 115,
113,111, 110, 108, 106, 105, 103, 102, 100, 99, 97, 96, 95, 93, 92, 91, 90,
88, 87, 86, 85, 84, 83, 82, 80, 79, 78, 77, 76, 75, 75, 74, 73, 72, 71,
70, 69, 68, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57,
70, 69, 68, 68, 67, 66, 65, 64, 64, 63, 62, 61, 61, 60, 59, 59, 58, 57,
57, 56, 56, 55, 55, 54, 54, 53, 53, 52, 52, 51, 51, 50, 50, 49, 49, 48, 48,
47, 47, 46, 46, 45, 45, 44, 44, 43, 43, 42, 42, 41, 41, 41, 41, 40, 40,
39, 39, 38, 38, 38, 38, 37, 37, 36, 36, 36, 36, 35, 35, 35, 35, 34, 34,33,
33, 33, 33, 32, 32, 32, 32, 31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29,
28, 28, 28, 28, 27, 27
};
if (frame->F0hz10 > 0)
if (frame->F0hz10 > 0)
{
/* T0 is 4* the number of samples in one pitch period */
kt_globals.T0 = (40 * kt_globals.samrate) / frame->F0hz10;
kt_globals.amp_voice = DBtoLIN(frame->AVdb_tmp);
/* Duration of period before amplitude modulation */
kt_globals.nmod = kt_globals.T0;
if (frame->AVdb_tmp > 0)
if (frame->AVdb_tmp > 0)
{
kt_globals.nmod >>= 1;
}
/* Breathiness of voicing waveform */
kt_globals.amp_breth = DBtoLIN(frame->Aturb) * 0.1;
/* Set open phase of glottal period where 40 <= open phase <= 263 */
kt_globals.nopen = 4 * frame->Kopen;
if ((kt_globals.glsource == IMPULSIVE) && (kt_globals.nopen > 263))
{
kt_globals.nopen = 263;
}
if (kt_globals.nopen >= (kt_globals.T0-1))
if (kt_globals.nopen >= (kt_globals.T0-1))
{
// printf("Warning: glottal open period cannot exceed T0, truncated\n");
kt_globals.nopen = kt_globals.T0 - 2;
}
if (kt_globals.nopen < 40)
if (kt_globals.nopen < 40)
{
/* F0 max = 1000 Hz */
// printf("Warning: minimum glottal open period is 10 samples.\n");
// printf("truncated, nopen = %d\n",kt_globals.nopen);
kt_globals.nopen = 40;
}
/* Reset a & b, which determine shape of "natural" glottal waveform */
kt_globals.pulse_shape_b = B0[kt_globals.nopen-40];
kt_globals.pulse_shape_a = (kt_globals.pulse_shape_b * kt_globals.nopen) * 0.333;
/* Reset width of "impulsive" glottal pulse */
temp = kt_globals.samrate / kt_globals.nopen;
setabc((long)0,temp,&(kt_globals.rsn[RGL]));
/* Make gain at F1 about constant */
temp1 = kt_globals.nopen *.00833;
kt_globals.rsn[RGL].a *= temp1 * temp1;
/*
Truncate skewness so as not to exceed duration of closed phase
of glottal period.
*/
temp = kt_globals.T0 - kt_globals.nopen;
if (frame->Kskew > temp)
if (frame->Kskew > temp)
{
// printf("Kskew duration=%d > glottal closed period=%d, truncate\n", frame->Kskew, kt_globals.T0 - kt_globals.nopen);
frame->Kskew = temp;
}
if (skew >= 0)
if (skew >= 0)
{
skew = frame->Kskew;
}
else
else
{
skew = - frame->Kskew;
}
/* Add skewness to closed portion of voicing period */
kt_globals.T0 = kt_globals.T0 + skew;
skew = - skew;
}
else
else
{
kt_globals.T0 = 4; /* Default for f0 undefined */
kt_globals.amp_voice = 0.0;
@@ -819,20 +884,20 @@ static void pitch_synch_par_reset(klatt_frame_ptr frame)
kt_globals.pulse_shape_a = 0.0;
kt_globals.pulse_shape_b = 0.0;
}
/* Reset these pars pitch synchronously or at update rate if f0=0 */
if ((kt_globals.T0 != 4) || (kt_globals.ns == 0))
if ((kt_globals.T0 != 4) || (kt_globals.ns == 0))
{
/* Set one-pole low-pass filter that tilts glottal source */
kt_globals.decay = (0.033 * frame->TLTdb);
if (kt_globals.decay > 0.0)
if (kt_globals.decay > 0.0)
{
kt_globals.onemd = 1.0 - kt_globals.decay;
}
else
else
{
kt_globals.onemd = 1.0;
}
@@ -844,7 +909,7 @@ static void pitch_synch_par_reset(klatt_frame_ptr frame)
/*
function SETABC

Convert formant freqencies and bandwidth into resonator difference
Convert formant freqencies and bandwidth into resonator difference
equation constants.
*/

@@ -853,18 +918,18 @@ static void setabc(long int f, long int bw, resonator_ptr rp)
{
double r;
double arg;
/* Let r = exp(-pi bw t) */
arg = kt_globals.minus_pi_t * bw;
r = exp(arg);
/* Let c = -r**2 */
rp->c = -(r * r);
/* Let b = r * 2*cos(2 pi f t) */
arg = kt_globals.two_pi_t * f;
rp->b = r * cos(arg) * 2.0;
/* Let a = 1.0 - b - c */
rp->a = 1.0 - rp->b - rp->c;
}
@@ -873,7 +938,7 @@ static void setabc(long int f, long int bw, resonator_ptr rp)
/*
function SETZEROABC

Convert formant freqencies and bandwidth into anti-resonator difference
Convert formant freqencies and bandwidth into anti-resonator difference
equation constants.
*/

@@ -881,7 +946,7 @@ static void setzeroabc(long int f, long int bw, resonator_ptr rp)
{
double r;
double arg;
f = -f;

//NOTE, changes made 30.09.2011 for Reece Dunn <[email protected]>
@@ -891,17 +956,18 @@ static void setzeroabc(long int f, long int bw, resonator_ptr rp)
/* Let r = exp(-pi bw t) */
arg = kt_globals.minus_pi_t * bw;
r = exp(arg);
/* Let c = -r**2 */
rp->c = -(r * r);
/* Let b = r * 2*cos(2 pi f t) */
arg = kt_globals.two_pi_t * f;
rp->b = r * cos(arg) * 2.;
/* Let a = 1.0 - b - c */
rp->a = 1.0 - rp->b - rp->c;

/* Now convert to antiresonator coefficients (a'=1/a, b'=b/a, c'=c/a) */
/* If f == 0 then rp->a gets set to 0 which makes a'=1/a set a', b' and c' to
* INF, causing an audible sound spike when triggered (e.g. apiration with the
* nasal register set to f=0, bw=0).
@@ -916,26 +982,26 @@ static void setzeroabc(long int f, long int bw, resonator_ptr rp)
}


/*
/*
function GEN_NOISE

Random number generator (return a number between -8191 and +8191)
Noise spectrum is tilted down by soft low-pass filter having a pole near
the origin in the z-plane, i.e. output = input + (0.75 * lastoutput)
Random number generator (return a number between -8191 and +8191)
Noise spectrum is tilted down by soft low-pass filter having a pole near
the origin in the z-plane, i.e. output = input + (0.75 * lastoutput)
*/


static double gen_noise(double noise)
static double gen_noise(double noise)
{
long temp;
static double nlast;
temp = (long) getrandom(-8191,8191);
kt_globals.nrand = (long) temp;
noise = kt_globals.nrand + (0.75 * nlast);
nlast = noise;
return(noise);
}

@@ -952,16 +1018,16 @@ Conversion table, db to linear, 87 dB --> 32767
81 dB --> 16384 (6 dB down = 0.5)
...
0 dB --> 0
The just noticeable difference for a change in intensity of a vowel
is approximately 1 dB. Thus all amplitudes are quantized to 1 dB
steps.
*/


static double DBtoLIN(long dB)
static double DBtoLIN(long dB)
{
static short amptable[88] =
static short amptable[88] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7,
8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32,
@@ -972,12 +1038,12 @@ static double DBtoLIN(long dB)
4096, 4547, 5104, 5751, 6488, 7291, 8192, 9093, 10207,
11502, 12976, 14582, 16384, 18350, 20644, 23429,
26214, 29491, 32767 };
if ((dB < 0) || (dB > 87))
{
return(0);
}
return((double)(amptable[dB]) * 0.001);
}

@@ -992,7 +1058,6 @@ static int klattp[N_KLATTP];
static double klattp1[N_KLATTP];
static double klattp_inc[N_KLATTP];

static int scale_wav_tab[] = {45,38,45,45}; // scale output from different voicing sources



@@ -1126,7 +1191,7 @@ void SetSynth_Klatt(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v

if(wvoice != NULL)
{
if((wvoice->klattv[0] > 0) && (wvoice->klattv[0] <=3 ))
if((wvoice->klattv[0] > 0) && (wvoice->klattv[0] <=4 ))
{
kt_globals.glsource = wvoice->klattv[0];
kt_globals.scale_wav = scale_wav_tab[kt_globals.glsource];
@@ -1146,7 +1211,7 @@ void SetSynth_Klatt(int length, int modn, frame_t *fr1, frame_t *fr2, voice_t *v
{
if(qix >= N_WCMDQ) qix = 0;
if(qix == wcmdq_tail) break;
cmd = wcmdq[qix][0];
if(cmd==WCMD_KLATT)
{
@@ -1187,7 +1252,7 @@ if(option_log_frames)
fprintf(f_log,"K %3dmS %3d %3d %4d %4d (%2d) to %3d %3d %4d %4d (%2d)\n",length*1000/samplerate,
fr1->klattp[KLATT_FNZ]*2,fr1->ffreq[1],fr1->ffreq[2],fr1->ffreq[3], fr1->klattp[KLATT_AV],
fr2->klattp[KLATT_FNZ]*2,fr2->ffreq[1],fr2->ffreq[2],fr2->ffreq[3], fr2->klattp[KLATT_AV] );
fclose(f_log);
}
}
@@ -1292,18 +1357,7 @@ int Wavegen_Klatt2(int length, int modulation, int resume, frame_t *fr1, frame_t

void KlattInit()
{
#define NUMBER_OF_SAMPLES 100

static short natural_samples[NUMBER_OF_SAMPLES]=
{
-310,-400,530,356,224,89,23,-10,-58,-16,461,599,536,701,770,
605,497,461,560,404,110,224,131,104,-97,155,278,-154,-1165,
-598,737,125,-592,41,11,-247,-10,65,92,80,-304,71,167,-1,122,
233,161,-43,278,479,485,407,266,650,134,80,236,68,260,269,179,
53,140,275,293,296,104,257,152,311,182,263,245,125,314,140,44,
203,230,-235,-286,23,107,92,-91,38,464,443,176,98,-784,-2449,
-1891,-1045,-1600,-1462,-1384,-1261,-949,-730
};
static short formant_hz[10] = {280,688,1064,2806,3260,3700,6500,7000,8000,280};
static short bandwidth[10] = {89,160,70,160,200,200,500,500,500,89};
static short parallel_amp[10] = { 0,59,59,59,59,59,59,0,0,0};
@@ -1311,6 +1365,12 @@ void KlattInit()

int ix;

for(ix=0; ix<256; ix++)
{
// TEST: Overwrite natural_samples2
// sawtooth wave
// natural_samples2[ix] = (128-ix) * 20;
}
sample_count=0;

kt_globals.synthesis_model = CASCADE_PARALLEL;

+ 3
- 2
src/klatt.h View File

@@ -1,11 +1,12 @@


#define CASCADE_PARALLEL 1 /* Type of synthesis model */
#define ALL_PARALLEL 2
#define ALL_PARALLEL 2

#define IMPULSIVE 1 /* Type of voicing source */
#define NATURAL 2
#define SAMPLED 3
#define SAMPLED2 4

#define PI 3.1415927

@@ -96,7 +97,7 @@ typedef struct
resonator_t rsn_next[N_RSN];

} klatt_global_t, *klatt_global_ptr;
/* Structure for Klatt Parameters */

#define F_NZ 0 // nasal zero formant

+ 1
- 1
src/synthdata.cpp View File

@@ -35,7 +35,7 @@
#include "translate.h"
#include "wave.h"

const char *version_string = "1.47.10a 03.May.13";
const char *version_string = "1.47.10b 03.May.13";
const int version_phdata = 0x014709;

int option_device_number = -1;

Loading…
Cancel
Save