mahta.fetrat
/
HomoFast-eSpeak-Persian


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
							/***************************************************************************
 *   Copyright (C) 2005 to 2007 by Jonathan Duddington                     *
 *   email: [email protected]                                    *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 3 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write see:                           *
 *               <http://www.gnu.org/licenses/>.                           *
 ***************************************************************************/

#include "StdAfx.h"

#include <stdlib.h>
#include <stdio.h>
#include <wctype.h>

#include "speak_lib.h"
#include "speech.h"
#include "phoneme.h"
#include "synthesize.h"
#include "voice.h"
#include "translate.h"

extern int GetAmplitude(void);


// convert from words-per-minute to internal speed factor
static unsigned char speed_lookup[290] = {
	 250, 246, 243, 239, 236,	//  80
	 233, 229, 226, 223, 220,	//  85
	 217, 214, 211, 208, 205,	//  90
	 202, 197, 194, 192, 190,	//  95
	 187, 185, 183, 180, 178,	// 100
	 176, 174, 172, 170, 168,	// 105
	 166, 164, 161, 159, 158,	// 110
	 156, 154, 152, 150, 148,	// 115
	 146, 145, 143, 141, 137,	// 120
	 136, 135, 133, 132, 131,	// 125
	 129, 128, 127, 126, 125,	// 130
	 124, 122, 121, 120, 119,	// 135
	 117, 116, 115, 114, 113,	// 140
	 112, 111, 110, 108, 107,	// 145
	 106, 105, 104, 103, 102,	// 150
	 101, 100,  99,  98,  97,	// 155
	  96,  95,  93,  92,  92,	// 160
	  91,  90,  89,  89,  88,	// 165
	  87,  87,  86,  85,  85,	// 170
	  84,  83,  83,  82,  81,	// 175
	  80,  80,  79,  78,  78,	// 180
	  77,  76,  76,  75,  73,	// 185
	  72,  72,  71,  71,  70,	// 190
	  70,  69,  69,  68,  67,	// 195
	  67,  66,  66,  65,  65,	// 200
	  64,  64,  63,  63,  62,	// 205
	  62,  61,  60,  60,  59,	// 210
	  59,  58,  58,  57,  57,	// 215
	  56,  56,  55,  55,  55,	// 220
	  54,  54,  53,  53,  52,	// 225
	  52,  51,  51,  50,  50,	// 230
	  49,  49,  49,  48,  48,	// 235
	  47,  47,  46,  46,  46,	// 240
	  45,  45,  44,  44,  43,	// 245
	  43,  43,  42,  42,  41,	// 250
	  41,  41,  40,  40,  39,	// 255
	  39,  39,  38,  38,  38,	// 260
	  37,  37,  37,  36,  36,	// 265
	  35,  35,  35,  34,  34,	// 270
	  34,  33,  33,  33,  32,	// 275
	  32,  32,  32,  31,  31,	// 280
	  31,  30,  30,  30,  29,	// 285
	  29,  29,  29,  28,  28,	// 290
	  28,  28,  27,  27,  27,	// 295
	  26,  26,  26,  26,  25,	// 300
	  25,  25,  22,  22,  22,	// 305
	  22,  22,  22,  22,  22,	// 310
	  21,  21,  21,  21,  21,	// 315
	  21,  20,  20,  20,  20,	// 320
	  20,  15,  15,  15,  15,	// 325
	  15,  15,  15,  15,  16,	// 330
	  16,  16,  16,  15,  15,	// 335
	  15,  15,  15,  15,  15,	// 340
	  15,  17,  17,  16,  16,	// 345
	  15,  15,  14,  14,  13,	// 350
	  13,  12,  12,  11,  11,	// 355
	  10,  10,   9,   8,   8,	// 360
	   7,   6,   5,   5,   4,	// 365
};

// speed_factor2 adjustments for speeds 370 to 390
static unsigned char faster[] = {
114,112,110,109,107,105,104,102,100,98, // 370-379
96,94,92,90,88,85,83,80,78,75,72 }; //380-390

static int speed1 = 130;
static int speed2 = 121;
static int speed3 = 118;


void SetSpeed(int control)
{//=======================
	int x;
	int s1;
	int wpm;
	int wpm2;

	wpm2 = wpm = embedded_value[EMBED_S];
	if(wpm > 369) wpm = 369;
	if(wpm < 80) wpm = 80;

	x = speed_lookup[wpm-80];

	if(control & 1)
	{
		// set speed factors for different syllable positions within a word
		// these are used in CalcLengths()
		speed1 = (x * voice->speedf1)/256;
		speed2 = (x * voice->speedf2)/256;
		speed3 = (x * voice->speedf3)/256;
	}

	if(control & 2)
	{
		// these are used in synthesis file
		s1 = (x * voice->speedf1)/256;
		speed_factor1 = (256 * s1)/115;      // full speed adjustment, used for pause length
if(speed_factor1 < 15)
	speed_factor1 = 15;
		if(wpm >= 170)
//			speed_factor2 = 100 + (166*s1)/128;  // reduced speed adjustment, used for playing recorded sounds
			speed_factor2 = 110 + (150*s1)/128;  // reduced speed adjustment, used for playing recorded sounds
		else
			speed_factor2 = 128 + (128*s1)/130;  // = 215 at 170 wpm

		if(wpm2 > 369)
		{
			if(wpm2 > 390)
				wpm2 = 390;
			speed_factor2 = faster[wpm2 - 370];
		}
	}

	speed_min_sample_len = 450;
}  //  end of SetSpeed


#ifdef deleted
void SetAmplitude(int amp)
{//=======================
	static unsigned char amplitude_factor[] = {0,5,6,7,9,11,14,17,21,26, 32, 38,44,50,56,63,70,77,84,91,100 };

	if((amp >= 0) && (amp <= 20))
	{
		option_amplitude = (amplitude_factor[amp] * 480)/256; 
	}
}
#endif


void SetParameter(int parameter, int value, int relative)
{//======================================================
// parameter: reset-all, amp, pitch, speed, linelength, expression, capitals, number grouping
// relative 0=absolute  1=relative

	int new_value = value;
	int default_value;

	if(relative)
	{
		if(parameter < 5)
		{
			default_value = param_defaults[parameter];
			new_value = default_value + (default_value * value)/100;
		}
	}
	param_stack[0].parameter[parameter] = new_value;

	switch(parameter)
	{
	case espeakRATE:
		embedded_value[EMBED_S] = new_value;
		embedded_value[EMBED_S2] = new_value;
		SetSpeed(3);
		break;

	case espeakVOLUME:
		embedded_value[EMBED_A] = new_value;
		GetAmplitude();
		break;

	case espeakPITCH:
		if(new_value > 99) new_value = 99;
		if(new_value < 0) new_value = 0;
		embedded_value[EMBED_P] = new_value;
		break;

	case espeakRANGE:
		if(new_value > 99) new_value = 99;
		embedded_value[EMBED_R] = new_value;
		break;

	case espeakLINELENGTH:
		option_linelength = new_value;
		break;

	case espeakWORDGAP:
		option_wordgap = new_value;
		break;

	case espeakINTONATION:
		if((new_value & 0xff) != 0)
			translator->langopts.intonation_group = new_value & 0xff;
		option_tone_flags = new_value;
		break;

	default:
		break;
	}
}  // end of SetParameter


static void DoEmbedded2(int &embix)
{//================================
	// There were embedded commands in the text at this point

	unsigned int word;

	do {
		word = embedded_list[embix++];

		if((word & 0x1f) == EMBED_S)
		{
			// speed
			SetEmbedded(word & 0x7f, word >> 8);   // adjusts embedded_value[EMBED_S]
			SetSpeed(1);
		}
	} while((word & 0x80) == 0);
}


void Translator::CalcLengths()
{//===========================
	int ix;
	int ix2;
	PHONEME_LIST *prev;
	PHONEME_LIST *next;
	PHONEME_LIST *next2;
	PHONEME_LIST *next3;
	PHONEME_LIST *p;
	PHONEME_LIST *p2;

	int  stress;
	int  type;
	static int  more_syllables=0;
	int  pre_sonorant=0;
	int  pre_voiced=0;
	int  last_pitch = 0;
	int  pitch_start;
	int  length_mod;
	int  len;
	int  env2;
	int  end_of_clause;
	int  embedded_ix = 0;
	int  min_drop;
	int emphasized;
	unsigned char *pitch_env=NULL;

	for(ix=1; ix<n_phoneme_list; ix++)
	{
		prev = &phoneme_list[ix-1];
		p = &phoneme_list[ix];
		stress = p->tone & 0x7;
		emphasized = p->tone & 0x8;

		next = &phoneme_list[ix+1];

		if(p->synthflags & SFLAG_EMBEDDED)
		{
			DoEmbedded2(embedded_ix);
		}

		type = p->type;
		if(p->synthflags & SFLAG_SYLLABLE)
			type = phVOWEL;

		switch(type)
		{
		case phPAUSE:
			last_pitch = 0;
			break;
			
		case phSTOP:
			last_pitch = 0;
			if(prev->type == phFRICATIVE)
				p->prepause = 20;
			else
			if((more_syllables > 0) || (stress < 4))
				p->prepause = 40;
			else
				p->prepause = 60;

			if((langopts.word_gap & 0x10) && (p->newword))
				p->prepause = 60;

			if(p->synthflags & SFLAG_LENGTHEN)
				p->prepause += langopts.long_stop;
			break;

		case phVFRICATIVE:
			if(next->type==phVOWEL)
			{
				pre_voiced = 1;
			}   // drop through
		case phFRICATIVE:
			if(p->newword)
				p->prepause = 15;

			if(next->type==phPAUSE && prev->type==phNASAL && !(p->ph->phflags&phFORTIS))
				p->prepause = 25;

			if(prev->ph->phflags & phBRKAFTER)
				p->prepause = 30;

			if((p->ph->phflags & phSIBILANT) && next->type==phSTOP && !next->newword)
			{
				if(prev->type == phVOWEL)
					p->length = 200;      // ?? should do this if it's from a prefix
				else
					p->length = 150;
			}
			else
				p->length = 256;

			if((langopts.word_gap & 0x10) && (p->newword))
				p->prepause = 30;

			break;

		case phVSTOP:
			if(prev->type==phVFRICATIVE || prev->type==phFRICATIVE || (prev->ph->phflags & phSIBILANT) || (prev->type == phLIQUID))
				p->prepause = 30;

			if(next->type==phVOWEL || next->type==phLIQUID)
			{
				if((next->type==phVOWEL) || !next->newword)
					pre_voiced = 1;

				p->prepause = 40;

				if((prev->type == phPAUSE) || (prev->type == phVOWEL)) // || (prev->ph->mnemonic == ('/'*256+'r')))
					p->prepause = 0;
				else
				if(p->newword==0)
				{
					if(prev->type==phLIQUID)
						p->prepause = 20;
					if(prev->type==phNASAL)
						p->prepause = 12;

					if(prev->type==phSTOP && !(prev->ph->phflags & phFORTIS))
						p->prepause = 0;
				}
			}
			if((langopts.word_gap & 0x10) && (p->newword) && (p->prepause < 20))
				p->prepause = 20;

			break;

		case phLIQUID:
		case phNASAL:
			p->amp = stress_amps[1];  // unless changed later
			p->length = 256;  //  TEMPORARY
			min_drop = 0;
			
			if(p->newword)
			{
				if(prev->type==phLIQUID)
					p->prepause = 25;
				if(prev->type==phVOWEL)
					p->prepause = 12;
			}

			if(next->type==phVOWEL)
			{
				pre_sonorant = 1;
			}
			else
			if((prev->type==phVOWEL) || (prev->type == phLIQUID))
			{
				p->length = prev->length;
				p->pitch2 = last_pitch;
				if(p->pitch2 < 7)
					p->pitch2 = 7;
				p->pitch1 = p->pitch2 - 8;
				p->env = PITCHfall;
				pre_voiced = 0;
				
				if(p->type == phLIQUID)
				{
					p->length = speed1;
//p->pitch1 = p->pitch2 - 20;   // post vocalic [r/]
				}

				if(next->type == phVSTOP)
				{
					p->length = (p->length * 160)/100;
				}
				if(next->type == phVFRICATIVE)
				{
					p->length = (p->length * 120)/100;
				}
			}
			else
			{
				p->pitch2 = last_pitch;
				for(ix2=ix; ix2<n_phoneme_list; ix2++)
				{
					if(phoneme_list[ix2].type == phVOWEL)
					{
						p->pitch2 = phoneme_list[ix2].pitch2;
						break;
					}
				}
				p->pitch1 = p->pitch2-8;
				p->env = PITCHfall;
				pre_voiced = 0;
			}
			break;

		case phVOWEL:
			min_drop = 0;
			next2 = &phoneme_list[ix+2];
			next3 = &phoneme_list[ix+3];

			if(stress > 7) stress = 7;

			if(pre_sonorant)
				p->amp = stress_amps[stress]-1;
			else
				p->amp = stress_amps[stress];

			if(emphasized)
				p->amp = 25;

			if(ix >= (n_phoneme_list-3))
			{
				// last phoneme of a clause, limit its amplitude
				if(p->amp > langopts.param[LOPT_MAXAMP_EOC])
					p->amp = langopts.param[LOPT_MAXAMP_EOC];
			}

			// is the last syllable of a word ?
			more_syllables=0;
			end_of_clause = 0;
			for(p2 = p+1; p2->newword== 0; p2++)
			{
				if((p2->type == phVOWEL) && !(p2->ph->phflags & phNONSYLLABIC))
					more_syllables++;
			}
			if((p2->newword & 2) && (more_syllables==0))
			{
				end_of_clause = 2;
			}

			// calc length modifier
			if(more_syllables==0)
			{
				len = langopts.length_mods0[next2->ph->length_mod *10+ next->ph->length_mod];

				if((next->newword) && (langopts.word_gap & 0x20))
				{
					// consider as a pause + first phoneme of the next word
					length_mod = (len + langopts.length_mods0[next->ph->length_mod *10+ 1])/2;
				}
				else
					length_mod = len;
			}
			else
			{
				length_mod = langopts.length_mods[next2->ph->length_mod *10+ next->ph->length_mod];

				if((next->type == phNASAL) && (next2->type == phSTOP || next2->type == phVSTOP) && (next3->ph->phflags & phFORTIS))
					length_mod -= 15;
			}

			if(more_syllables==0)
				length_mod *= speed1;
			else
			if(more_syllables==1)
				length_mod *= speed2;
			else
				length_mod *= speed3;

			length_mod = length_mod / 128;
//			if(length_mod < 9)
//				length_mod = 9;     // restrict how much lengths can be reduced
			if(length_mod < 8)
				length_mod = 8;     // restrict how much lengths can be reduced

			if(stress >= 7)
			{
				// tonic syllable, include a constant component so it doesn't decrease directly with speed
				length_mod += 20;
			}
			else
			if(emphasized)
			{
				length_mod += 20;
			}
			
			if((len = stress_lengths[stress]) == 0)
				len = stress_lengths[6];

			length_mod = (length_mod * len)/128;

			if(end_of_clause == 2)
			{
				// this is the last syllable in the clause, lengthen it - more for short vowels
				length_mod = length_mod * (256 + (280 - p->ph->std_length)/3)/256;
			}

if(p->type != phVOWEL)
{
	length_mod = 256;   // syllabic consonant
	min_drop = 8;
}
			p->length = length_mod;

			// pre-vocalic part
			// set last-pitch
			env2 = p->env;
			if(env2 > 1) env2++;   // version for use with preceding semi-vowel

			if(p->tone_ph != 0)
			{
				pitch_env = LookupEnvelope(phoneme_tab[p->tone_ph]->spect);
			}
			else
			{
				pitch_env = envelope_data[env2];
			}

			pitch_start = p->pitch1 + ((p->pitch2-p->pitch1)*pitch_env[0])/256;

			if(pre_sonorant || pre_voiced)
			{
				// set pitch for pre-vocalic part
				if(pitch_start - last_pitch > 8)   // was 9
					last_pitch = pitch_start - 8;
				prev->pitch1 = last_pitch;
				prev->pitch2 = pitch_start;
				if(last_pitch < pitch_start)
				{
					prev->env = PITCHrise;
					p->env = env2;
				}
				else
				{
					prev->env = PITCHfall;
				}

				prev->length = length_mod;

				prev->amp = p->amp;
				if((prev->type != phLIQUID) && (prev->amp > 18))
					prev->amp = 18;
			}

			// vowel & post-vocalic part
			next->synthflags &= ~SFLAG_SEQCONTINUE;
			if(next->type == phNASAL && next2->type != phVOWEL)
				next->synthflags |= SFLAG_SEQCONTINUE;
				
			if(next->type == phLIQUID)
			{
				next->synthflags |= SFLAG_SEQCONTINUE;
					
				if(next2->type == phVOWEL)
				{
					next->synthflags &= ~SFLAG_SEQCONTINUE;
				}

				if(next2->type != phVOWEL)
				{
					if(next->ph->mnemonic == ('/'*256+'r'))
					{
						next->synthflags &= ~SFLAG_SEQCONTINUE;
//						min_drop = 15;
					}
				}
			}

			if((min_drop > 0) && ((p->pitch2 - p->pitch1) < min_drop))
			{
				p->pitch1 = p->pitch2 - min_drop;
				if(p->pitch1 < 0)
					p->pitch1 = 0;
			}

			last_pitch = p->pitch1 + ((p->pitch2-p->pitch1)*envelope_data[p->env][127])/256;
			pre_sonorant = 0;
			pre_voiced = 0;
			break;
		}
	}
}  //  end of CalcLengths