/*************************************************************************** * Copyright (C) 2005 to 2007 by Jonathan Duddington * * email: jonsd@users.sourceforge.net * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write see: * * . * ***************************************************************************/ #include "StdAfx.h" #include #include #include #include #include #include #include "speak_lib.h" #include "speech.h" #include "phoneme.h" #include "synthesize.h" #include "translate.h" #include "voice.h" extern int Read4Bytes(FILE *f); extern void SetPitch2(voice_t *voice, int pitch1, int pitch2, int *pitch_base, int *pitch_range); #ifdef USE_MBROLA_LIB extern unsigned char *outbuf; #ifndef PLATFORM_WINDOWS #include "mbrolib.h" void * mb_handle; #else #include typedef void (WINAPI *PROCVV)(void); typedef void (WINAPI *PROCVI)(int); typedef void (WINAPI *PROCVF)(float); typedef int (WINAPI *PROCIV)(); typedef int (WINAPI *PROCIC) (char *); typedef int (WINAPI *PROCISI)(short *,int); typedef char* (WINAPI *PROCVCI)(char *,int); PROCIC init_MBR; PROCIC write_MBR; PROCIV flush_MBR; PROCISI read_MBR; PROCVV close_MBR; PROCVV reset_MBR; PROCIV lastError_MBR; PROCVCI lastErrorStr_MBR; PROCVI setNoError_MBR; PROCVI setFreq_MBR; PROCVF setVolumeRatio_MBR; HINSTANCE hinstDllMBR = NULL; BOOL load_MBR() { if(hinstDllMBR != NULL) return TRUE; // already loaded if (!(hinstDllMBR=LoadLibrary("mbrola.dll"))) return FALSE; init_MBR =(PROCIC) GetProcAddress(hinstDllMBR,"init_MBR"); write_MBR =(PROCIC) GetProcAddress(hinstDllMBR,"write_MBR"); flush_MBR =(PROCIV) GetProcAddress(hinstDllMBR,"flush_MBR"); read_MBR =(PROCISI) GetProcAddress(hinstDllMBR,"read_MBR"); close_MBR =(PROCVV) GetProcAddress(hinstDllMBR,"close_MBR"); reset_MBR =(PROCVV) GetProcAddress(hinstDllMBR,"reset_MBR"); lastError_MBR =(PROCIV) GetProcAddress(hinstDllMBR,"lastError_MBR"); lastErrorStr_MBR =(PROCVCI) GetProcAddress(hinstDllMBR,"lastErrorStr_MBR"); setNoError_MBR =(PROCVI) GetProcAddress(hinstDllMBR,"setNoError_MBR"); setVolumeRatio_MBR =(PROCVF) GetProcAddress(hinstDllMBR,"setVolumeRatio_MBR"); return TRUE; } void unload_MBR() { if (hinstDllMBR) { FreeLibrary (hinstDllMBR); hinstDllMBR=NULL; } } #endif // windows #endif // USE_MBROLA_LIB MBROLA_TAB *mbrola_tab = NULL; int mbrola_control = 0; espeak_ERROR LoadMbrolaTable(const char *mbrola_voice, const char *phtrans, int srate) {//=================================================================================== // Load a phoneme name translation table from espeak-data/mbrola int size; int ix; int *pw; FILE *f_in; char path[sizeof(path_home)+15]; mbrola_name[0] = 0; mbrola_delay = 0; if(mbrola_voice == NULL) { samplerate = samplerate_native; SetParameter(espeakVOICETYPE,0,0); return(EE_OK); } sprintf(path,"%s/mbrola/%s",path_home,mbrola_voice); #ifdef USE_MBROLA_LIB #ifdef PLATFORM_WINDOWS if(load_MBR() == FALSE) // load mbrola.dll return(EE_INTERNAL_ERROR); if(init_MBR(path) != 0) // initialise the required mbrola voice return(EE_NOT_FOUND); setNoError_MBR(1); // don't stop on phoneme errors #else mb_handle = mbrolib_init(srate); mbrolib_parameter m_parameters; if(mb_handle == NULL) return(EE_INTERNAL_ERROR); MBROLIB_ERROR a_status = mbrolib_set_voice(mb_handle, mbrola_voice); if(a_status != MBROLIB_OK) return(EE_NOT_FOUND); #endif // not windows #endif // USE_MBROLA_LIB // read eSpeak's mbrola phoneme translation data, eg. en1_phtrans sprintf(path,"%s/mbrola_ph/%s",path_home,phtrans); size = GetFileLength(path); if((f_in = fopen(path,"r")) == NULL) return(EE_NOT_FOUND); if((mbrola_tab = (MBROLA_TAB *)realloc(mbrola_tab,size)) == NULL) { fclose(f_in); return(EE_INTERNAL_ERROR); } mbrola_control = Read4Bytes(f_in); pw = (int *)mbrola_tab; for(ix=4; ixmnemonic; MBROLA_TAB *pr; PHONEME_TAB *other_ph; int found = 0; // control // bit 0 skip the next phoneme // bit 1 match this and Previous phoneme // bit 2 only at the start of a word // bit 3 don't match two phonemes across a word boundary pr = mbrola_tab; while(pr->name != 0) { if(mnem == pr->name) { if(pr->next_phoneme == 0) found = 1; else if((pr->next_phoneme == ':') && (plist->synthflags & SFLAG_LENGTHEN)) { found = 1; } else { if(pr->control & 2) other_ph = ph_prev; else if((pr->control & 8) && ((plist+1)->newword)) other_ph = phoneme_tab[phPAUSE]; // don't match the next phoneme over a word boundary else other_ph = ph_next; if((pr->next_phoneme == other_ph->mnemonic) || ((pr->next_phoneme == 2) && (other_ph->type == phVOWEL)) || ((pr->next_phoneme == '_') && (other_ph->type == phPAUSE))) { found = 1; } } if((pr->control & 4) && (plist->newword == 0)) // only at start of word found = 0; if(found) { *name2 = pr->mbr_name2; *split = pr->percent; *control = pr->control; return(pr->mbr_name); } } pr++; } *name2=0; *split=0; *control=0; return(mnem); } static char *WritePitch(int env, int pitch1, int pitch2, int split, int final) {//=========================================================================== // final=1: only give the final pitch value. int x; int ix; int pitch_base; int pitch_range; int p1,p2,p_end; unsigned char *pitch_env; int max = -1; int min = 999; int y_max=0; int y_min=0; int env100 = 80; // apply the pitch change only over this proportion of the mbrola phoneme(s) int y2; int y[4]; int env_split; char buf[50]; static char output[50]; output[0] = 0; pitch_env = envelope_data[env]; SetPitch2(voice, pitch1, pitch2, &pitch_base, &pitch_range); env_split = (split * 128)/100; if(env_split < 0) env_split = 0-env_split; // find max and min in the pitch envelope for(x=0; x<128; x++) { if(pitch_env[x] > max) { max = pitch_env[x]; y_max = x; } if(pitch_env[x] < min) { min = pitch_env[x]; y_min = x; } } // set an additional pitch point half way through the phoneme. // but look for a maximum or a minimum and use that instead y[2] = 64; if((y_max > 0) && (y_max < 127)) { y[2] = y_max; } if((y_min > 0) && (y_min < 127)) { y[2] = y_min; } y[1] = y[2] / 2; y[3] = y[2] + (127 - y[2])/2; // set initial pitch p1 = ((pitch_env[0]*pitch_range)>>8) + pitch_base; // Hz << 12 p_end = ((pitch_env[127]*pitch_range)>>8) + pitch_base; if(split >= 0) { sprintf(buf," 0 %d",p1/4096); strcat(output,buf); } // don't use intermediate pitch points for linear rise and fall if(env > 1) { for(ix=1; ix<4; ix++) { p2 = ((pitch_env[y[ix]]*pitch_range)>>8) + pitch_base; if(split > 0) { y2 = (y[ix] * env100)/env_split; } else if(split < 0) { y2 = ((y[ix]-env_split) * env100)/env_split; } else { y2 = (y[ix] * env100)/128; } if((y2 > 0) && (y2 <= env100)) { sprintf(buf," %d %d",y2,p2/4096); strcat(output,buf); } } } p_end = p_end/4096; if(split <= 0) { sprintf(buf," %d %d",env100,p_end); strcat(output,buf); } if(env100 < 100) { sprintf(buf," %d %d",100,p_end); strcat(output,buf); } strcat(output,"\n"); if(final) sprintf(output,"\t100 %d\n",p_end); return(output); } // end of WritePitch #ifdef USE_MBROLA_LIB static void MbrolaMarker(int type, int char_posn, int length, int value) {//===================================================================== MarkerEvent(type,(char_posn & 0xffffff) | (length << 24),value,outbuf); } static void MbrolaEmbedded(int &embix, int sourceix) {//================================================= // There were embedded commands in the text at this point unsigned int word; // bit 7=last command for this word, bits 5,6 sign, bits 0-4 command unsigned int value; int command; int sign=0; do { word = embedded_list[embix++]; value = word >> 8; command = word & 0x1f; if((word & 0x60) == 0x60) sign = -1; else if((word & 0x60) == 0x40) sign = 1; if(command < N_EMBEDDED_VALUES) { if(sign == 0) embedded_value[command] = value; else embedded_value[command] += (value * sign); } switch(command & 0x1f) { case EMBED_M: // named marker MbrolaMarker(espeakEVENT_MARK, (sourceix & 0x7ff) + clause_start_char, 0, value); break; } } while ((word & 0x80) == 0); } #ifdef PLATFORM_WINDOWS int MbrolaSynth(char *p_mbrola) {//============================ // p_mbrola is a string of mbrola pho lines - Windows int len; int finished; int result=0; if(synth_callback == NULL) return(1); if(p_mbrola == NULL) flush_MBR(); else result = write_MBR(p_mbrola); finished = 0; while(!finished && ((len = read_MBR((short *)outbuf, outbuf_size/2)) > 0)) { out_ptr = outbuf + len*2; if(event_list) { event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list event_list[event_list_ix].user_data = 0; } count_samples += len; finished = synth_callback((short *)outbuf, len, event_list); event_list_ix=0; } if(finished) { // cancelled by user, discard any unused mbrola speech flush_MBR(); while((len = read_MBR((short *)outbuf, outbuf_size/2)) > 0); } return(finished); } // end of SynthMbrola #else int MbrolaSynth(char *p_mbrola) {//============================ // p_mbrola is a string of mbrola pho lines - Linux // This is wrong // It must be called from WavegenFill() int len; int finished; int result=0; if(synth_callback == NULL) return(1); if(p_mbrola == NULL) mbrolib_flush(mb_handle); else result = mbrolib_write(mb_handle,p_mbrola,strlen(p_mbrola)); finished = 0; while(!finished && (mbrolib_read(mb_handle, (short *)out_ptr, (out_end - out_ptr)/2, &len) == MBROLIB_OK)) { if(len == 0) break; out_ptr += (len*2); if(event_list) { event_list[event_list_ix].type = espeakEVENT_LIST_TERMINATED; // indicates end of event list event_list[event_list_ix].user_data = 0; } count_samples += len; finished = synth_callback((short *)outbuf, len, event_list); event_list_ix=0; } if(finished) { // cancelled by user, discard any unused mbrola speech mbrolib_flush(mb_handle); while(mbrolib_read(mb_handle, (short *)outbuf, outbuf_size/2, &len) == MBROLIB_OK) { if(len == 0) break; } } return(finished); } // end of SynthMbrola #endif // not windows #endif // USE_MBROLA_LIB void MbrolaTranslate(PHONEME_LIST *plist, int n_phonemes, FILE *f_mbrola) {//====================================================================== // Generate a mbrola pho file unsigned int name; int phix; int len; int len1; PHONEME_TAB *ph; PHONEME_TAB *ph_next; PHONEME_TAB *ph_prev; PHONEME_LIST *p; PHONEME_LIST *next; PHONEME_LIST *prev; int pause = 0; int released; int name2; int control; int done; int len_percent; const char *final_pitch; char buf[80]; char mbr_buf[120]; #ifdef USE_MBROLA_LIB int embedded_ix=0; int word_count=0; event_list_ix = 0; out_ptr = outbuf; #ifdef PLATFORM_WINDOWS setNoError_MBR(1); // don't stop on phoneme errors #endif #else // fprintf(f_mbrola,";; v=%.2f\n",(float)(mbrola_control & 0xff)/16.0); // ;; v= has no effect on mbrola #endif for(phix=1; phix < n_phonemes; phix++) { mbr_buf[0] = 0; p = &plist[phix]; next = &plist[phix+1]; prev = &plist[phix-1]; ph = p->ph; ph_prev = plist[phix-1].ph; ph_next = plist[phix+1].ph; #ifdef USE_MBROLA_LIB if(p->synthflags & SFLAG_EMBEDDED) { MbrolaEmbedded(embedded_ix, p->sourceix); } if(p->newword & 4) MbrolaMarker(espeakEVENT_SENTENCE, (p->sourceix & 0x7ff) + clause_start_char, 0, count_sentences); if(p->newword & 1) MbrolaMarker(espeakEVENT_WORD, (p->sourceix & 0x7ff) + clause_start_char, p->sourceix >> 11, clause_start_word + word_count++); #endif name = GetMbrName(p,ph,ph_prev,ph_next,&name2,&len_percent,&control); if(control & 1) phix++; if(name == 0) continue; // ignore this phoneme if((ph->type == phPAUSE) && (name == ph->mnemonic)) { // a pause phoneme, which has not been changed by the translation name = '_'; len = (p->length * speed_factor1)/256; // if(len == 0) continue; if(len == 0) len = 1; } else len = (80 * speed_factor2)/256; #ifdef USE_MBROLA_LIB MbrolaMarker(espeakEVENT_PHONEME, (p->sourceix & 0x7ff) + clause_start_char, 0, ph->mnemonic); #endif sprintf(buf,"%s\t",WordToString(name)); strcat(mbr_buf,buf); if(name2 == '_') { // add a pause after this phoneme pause = PauseLength(len_percent,0); name2 = 0; } done = 0; final_pitch = ""; switch(ph->type) { case phVOWEL: len = ph->std_length; if(p->synthflags & SFLAG_LENGTHEN) len += phoneme_tab[phonLENGTHEN]->std_length; // phoneme was followed by an extra : symbol if(ph_next->type == phPAUSE) len += 50; // lengthen vowels before a pause len = (len * p->length)/256; if(name2 == 0) { sprintf(buf,"%d\t%s", len, WritePitch(p->env,p->pitch1,p->pitch2,0,0)); strcat(mbr_buf,buf); } else { len1 = (len * len_percent)/100; sprintf(buf,"%d\t%s", len1, WritePitch(p->env,p->pitch1,p->pitch2,len_percent,0)); strcat(mbr_buf,buf); sprintf(buf,"%s\t%d\t%s", WordToString(name2), len-len1, WritePitch(p->env,p->pitch1,p->pitch2,-len_percent,0)); strcat(mbr_buf,buf); } done = 1; break; case phSTOP: released = 0; if(next->type==phVOWEL) released = 1; if(next->type==phLIQUID && !next->newword) released = 1; if(released) len = DoSample(p->ph,next->ph,2,0,-1); else len = DoSample(p->ph,phoneme_tab[phonPAUSE],2,0,-1); len = (len * 1000)/samplerate; // convert to mS len += PauseLength(p->prepause,1); break; case phVSTOP: len = (80 * speed_factor2)/256; break; case phFRICATIVE: len = 0; if(p->synthflags & SFLAG_LENGTHEN) len = DoSample(ph,ph_next,2,p->length,-1); // play it twice for [s:] etc. len += DoSample(ph,ph_next,2,p->length,-1); len = (len * 1000)/samplerate; // convert to mS break; case phNASAL: if(next->type != phVOWEL) { len = DoSpect(p->ph,prev->ph,phoneme_tab[phonPAUSE],2,p,-1); len = (len * 1000)/samplerate; if(next->type == phPAUSE) len += 50; final_pitch = WritePitch(p->env,p->pitch1,p->pitch2,0,1); } break; case phLIQUID: if(next->type == phPAUSE) { len += 50; final_pitch = WritePitch(p->env,p->pitch1,p->pitch2,0,1); } break; } if(!done) { if(name2 != 0) { len1 = (len * len_percent)/100; sprintf(buf,"%d\n%s\t",len1,WordToString(name2)); strcat(mbr_buf,buf); len -= len1; } sprintf(buf,"%d%s\n",len,final_pitch); strcat(mbr_buf,buf); } if(pause) { sprintf(buf,"_ \t%d\n",PauseLength(pause,0)); strcat(mbr_buf,buf); pause = 0; } if(f_mbrola) { fwrite(mbr_buf,1,strlen(mbr_buf),f_mbrola); // write .pho to a file } else { #ifdef USE_MBROLA_LIB if(MbrolaSynth(mbr_buf) != 0) return; #endif } } #ifdef USE_MBROLA_LIB MbrolaSynth(NULL); #endif } // end of MbrolaTranslate #ifdef TEST_MBROLA PHONEME_LIST mbrola_phlist; int mbrola_n_ph; int mbrola_phix; int MbrolaFill(int fill_zeros) {//=========================== } int MbrolaGenerate(PHONEME_LIST *phoneme_list, int *n_ph, int resume) {//================================================================== if(resume == 0) { mbrola_phlist = phoneme_list; mbrola_n_ph = n_ph; mbrola_phix = 0; } resume(0); // finished phoneme list } #endif