/***************************************************************************
* Copyright (C) 2006 to 2007 by Jonathan Duddington *
* email: jonsd@users.sourceforge.net *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write see: *
* . *
***************************************************************************/
#include
#include
#include
#include
#include
#include
#include "speak_lib.h"
// This version of the command-line speak program uses the
// libespeak.so.1 library
static const char *help_text =
"\nspeak [options] [\"\"]\n\n"
"-f Text file to speak\n"
"--stdin Read text input from stdin instead of a file\n\n"
"If neither -f nor --stdin, are spoken, or if none then text is\n"
"spoken from stdin, each line separately.\n\n"
"-a \n"
"\t Amplitude, 0 to 200, default is 100\n"
"-g \n"
"\t Word gap. Pause between words, units of 10mS at the default speed\n"
"-l \n"
"\t Line length. If not zero (which is the default), consider\n"
"\t lines less than this length as end-of-clause\n"
"-p \n"
"\t Pitch adjustment, 0 to 99, default is 50\n"
"-s \n"
"\t Speed in words per minute, 80 to 370, default is 170\n"
"-v \n"
"\t Use voice file of this name from espeak-data/voices\n"
"-w \n"
"\t Write output to this WAV file, rather than speaking it directly\n"
"-b\t Input text is 8-bit encoding\n"
"-m\t Interpret SSML markup, and ignore other < > tags\n"
"-q\t Quiet, don't produce any speech (may be useful with -x)\n"
"-x\t Write phoneme mnemonics to stdout\n"
"-X\t Write phonemes mnemonics and translation trace to stdout\n"
"-z\t No final sentence pause at the end of the text\n"
"--stdout Write speech output to stdout\n"
"--compile=\n"
"\t Compile the pronunciation rules and dictionary in the current\n"
"\t directory. = is optional and specifies which language\n"
"--punct=\"\"\n"
"\t Speak the names of punctuation characters during speaking. If\n"
"\t = is omitted, all punctuation is spoken.\n"
"--split=\"\"\n"
"\t Starts a new WAV file every . Used with -w\n"
"--voices=\n"
"\t List the available voices for the specified language.\n"
"\t If is omitted, then list all voices.\n"
"-k \n"
"\t Indicate capital letters with: 1=sound, 2=the word \"capitals\",\n"
"\t higher values = a pitch increase (try -k20).\n";
int samplerate;
int quiet = 0;
unsigned int samples_total = 0;
unsigned int samples_split = 0;
unsigned int wavefile_count = 0;
FILE *f_wavfile = NULL;
char filetype[5];
char wavefile[200];
int GetFileLength(const char *filename)
{//====================================
struct stat statbuf;
if(stat(filename,&statbuf) != 0)
return(0);
if((statbuf.st_mode & S_IFMT) == S_IFDIR)
return(-2); // a directory
return(statbuf.st_size);
} // end of GetFileLength
void strncpy0(char *dest, const char *source, int size)
{//====================================================
if(source!=NULL)
{
strncpy(dest,source,size);
dest[size-1] = 0;
}
}
void DisplayVoices(FILE *f_out, char *language)
{//============================================
int ix;
const char *p;
int len;
int count;
int scores = 0;
const espeak_VOICE *v;
const char *lang_name;
char age_buf[12];
const espeak_VOICE **voices;
espeak_VOICE voice_select;
static char genders[4] = {' ','M','F',' '};
if((language != NULL) && (language[0] != 0))
{
// display only voices for the specified language, in order of priority
voice_select.languages = language;
voice_select.age = 0;
voice_select.gender = 0;
voice_select.name = NULL;
voices = espeak_ListVoices(&voice_select);
scores = 1;
}
else
{
voices = espeak_ListVoices(NULL);
}
fprintf(f_out,"Pty Language Age/Gender VoiceName File Other Langs\n");
for(ix=0; (v = voices[ix]) != NULL; ix++)
{
count = 0;
p = v->languages;
while(*p != 0)
{
len = strlen(p+1);
lang_name = p+1;
if(v->age == 0)
strcpy(age_buf," ");
else
sprintf(age_buf,"%3d",v->age);
if(count==0)
{
fprintf(f_out,"%2d %-12s%s%c %-17s %-11s ",
p[0],lang_name,age_buf,genders[v->gender],v->name,v->identifier);
}
else
{
fprintf(f_out,"(%s %d)",lang_name,p[0]);
}
count++;
p += len+2;
}
// if(scores)
// fprintf(f_out,"%3d ",v->score);
fputc('\n',f_out);
}
} // end of DisplayVoices
static void Write4Bytes(FILE *f, int value)
{//=================================
// Write 4 bytes to a file, least significant first
int ix;
for(ix=0; ix<4; ix++)
{
fputc(value & 0xff,f);
value = value >> 8;
}
}
int OpenWavFile(char *path, int rate)
//===================================
{
static unsigned char wave_hdr[44] = {
'R','I','F','F',0x24,0xf0,0xff,0x7f,'W','A','V','E','f','m','t',' ',
0x10,0,0,0,1,0,1,0, 9,0x3d,0,0,0x12,0x7a,0,0,
2,0,0x10,0,'d','a','t','a', 0x00,0xf0,0xff,0x7f};
if(path == NULL)
return(2);
if(path[0] == 0)
return(0);
if(strcmp(path,"stdout")==0)
f_wavfile = stdout;
else
f_wavfile = fopen(path,"wb");
if(f_wavfile != NULL)
{
fwrite(wave_hdr,1,24,f_wavfile);
Write4Bytes(f_wavfile,rate);
Write4Bytes(f_wavfile,rate * 2);
fwrite(&wave_hdr[32],1,12,f_wavfile);
return(0);
}
return(1);
} // end of OpenWavFile
static void CloseWavFile()
//========================
{
unsigned int pos;
if((f_wavfile==NULL) || (f_wavfile == stdout))
return;
fflush(f_wavfile);
pos = ftell(f_wavfile);
fseek(f_wavfile,4,SEEK_SET);
Write4Bytes(f_wavfile,pos - 8);
fseek(f_wavfile,40,SEEK_SET);
Write4Bytes(f_wavfile,pos - 44);
fclose(f_wavfile);
f_wavfile = NULL;
} // end of CloseWavFile
static int SynthCallback(short *wav, int numsamples, espeak_EVENT *events)
{//========================================================================
char fname[210];
if(quiet) return(0); // -q quiet mode
if(wav == NULL)
{
CloseWavFile();
return(0);
}
if(samples_split > 0)
{
// start a new WAV file when this limit is reached, at the next sentence boundary
while(events->type != 0)
{
if((events->type == espeakEVENT_SENTENCE) && (samples_total > samples_split))
{
CloseWavFile();
samples_total = 0;
}
events++;
}
}
if(f_wavfile == NULL)
{
sprintf(fname,"%s_%.2d%s",wavefile,++wavefile_count,filetype);
if(OpenWavFile(fname, samplerate) != 0)
return(1);
}
if(numsamples > 0)
{
samples_total += numsamples;
fwrite(wav,numsamples*2,1,f_wavfile);
}
return(0);
}
int main (int argc, char **argv)
//==============================
{
static struct option long_options[] =
{
/* These options set a flag. */
// {"verbose", no_argument, &verbose_flag, 1},
// {"brief", no_argument, &verbose_flag, 0},
/* These options don't set a flag.
We distinguish them by their indices. */
{"help", no_argument, 0, 'h'},
{"stdin", no_argument, 0, 0x100},
{"compile-debug", optional_argument, 0, 0x101},
{"compile", optional_argument, 0, 0x102},
{"punct", optional_argument, 0, 0x103},
{"voices", optional_argument, 0, 0x104},
{"stdout", no_argument, 0, 0x105},
{"split", optional_argument, 0, 0x106},
{0, 0, 0, 0}
};
static const char* err_load = "Failed to read ";
FILE *f_text=NULL;
char *p_text=NULL;
int option_index = 0;
int c;
int ix;
int flag_stdin = 0;
int flag_compile = 0;
int filesize = 0;
int synth_flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE;
int volume = -1;
int speed = -1;
int pitch = -1;
int wordgap = -1;
int option_capitals = -1;
int option_punctuation = -1;
int option_phonemes = -1;
int option_linelength = 0;
int option_waveout = 0;
char filename[120];
char voicename[40];
char voice_mbrola[20];
char dictname[40];
#define N_PUNCTLIST 100
wchar_t option_punctlist[N_PUNCTLIST];
voicename[0] = 0;
voice_mbrola[0] = 0;
dictname[0] = 0;
wavefile[0] = 0;
filename[0] = 0;
option_punctlist[0] = 0;
while(true)
{
c = getopt_long (argc, argv, "a:bf:g:hk:l:mp:qs:v:w:xXz",
long_options, &option_index);
/* Detect the end of the options. */
if (c == -1)
break;
switch (c)
{
case 'b':
synth_flags |= espeakCHARS_8BIT;
break;
case 'h':
printf("\n");
printf("eSpeak text-to-speech: %s\n%s",espeak_Info(NULL),help_text);
exit(0);
break;
case 'k':
option_capitals = atoi(optarg);
break;
case 'x':
option_phonemes = 1;
break;
case 'X':
option_phonemes = 2;
break;
case 'm':
synth_flags |= espeakSSML;
break;
case 'p':
pitch = atoi(optarg);
break;
case 'q':
quiet = 1;
break;
case 'f':
strncpy0(filename,optarg,sizeof(filename));
break;
case 'l':
option_linelength = atoi(optarg);
break;
case 'a':
volume = atoi(optarg);
break;
case 's':
speed = atoi(optarg);
break;
case 'g':
wordgap = atoi(optarg);
break;
case 'v':
strncpy0(voicename,optarg,sizeof(voicename));
break;
case 'w':
option_waveout = 1;
strncpy0(wavefile,optarg,sizeof(filename));
break;
case 'z': // remove pause from the end of a sentence
synth_flags &= ~espeakENDPAUSE;
break;
case 0x100: // --stdin
flag_stdin = 1;
break;
case 0x105: // --stdout
option_waveout = 1;
strcpy(wavefile,"stdout");
break;
case 0x101: // --compile-debug
case 0x102: // --compile
strncpy0(voicename,optarg,sizeof(voicename));
flag_compile = c;
quiet = 1;
break;
case 0x103: // --punct
option_punctuation = 1;
if(optarg != NULL)
{
ix = 0;
while((ix < N_PUNCTLIST) && ((option_punctlist[ix] = optarg[ix]) != 0)) ix++;
option_punctlist[N_PUNCTLIST-1] = 0;
option_punctuation = 2;
}
break;
case 0x104: // --voices
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,0,NULL,0);
DisplayVoices(stdout,optarg);
exit(0);
case 0x106: // -- split
if(optarg == NULL)
samples_split = 30; // default 30 minutes
else
samples_split = atoi(optarg);
break;
default:
exit(0);
}
}
if(option_waveout || quiet)
{
// writing to a file (or no output), we can use synchronous mode
samplerate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,0,NULL,0);
samples_split = (samplerate * samples_split) * 60;
espeak_SetSynthCallback(SynthCallback);
if(samples_split)
{
char *extn;
extn = strrchr(wavefile,'.');
if((extn != NULL) && ((wavefile + strlen(wavefile) - extn) <= 4))
{
strcpy(filetype,extn);
*extn = 0;
}
}
else
if(option_waveout)
{
if(OpenWavFile(wavefile,samplerate) != 0)
exit(4);
}
}
else
{
// play the sound output
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,NULL,0);
}
if(voicename[0] == 0)
strcpy(voicename,"default");
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
fprintf(stderr,"%svoice '%s'\n",err_load,voicename);
exit(2);
}
if(flag_compile)
{
// This must be done after the voice is set
espeak_CompileDictionary("", stderr, flag_compile & 0x1);
exit(0);
}
// set any non-default values of parameters. This must be done after espeak_Initialize()
if(speed > 0)
espeak_SetParameter(espeakRATE,speed,0);
if(volume >= 0)
espeak_SetParameter(espeakVOLUME,volume,0);
if(pitch >= 0)
espeak_SetParameter(espeakPITCH,pitch,0);
if(option_capitals >= 0)
espeak_SetParameter(espeakCAPITALS,option_capitals,0);
if(option_punctuation >= 0)
espeak_SetParameter(espeakPUNCTUATION,option_punctuation,0);
if(wordgap >= 0)
espeak_SetParameter(espeakWORDGAP,wordgap,0);
if(option_linelength > 0)
espeak_SetParameter(espeakLINELENGTH,option_linelength,0);
if(option_punctuation == 2)
espeak_SetPunctuationList(option_punctlist);
if(option_phonemes >= 0)
espeak_SetPhonemeTrace(option_phonemes,stderr);
if(filename[0]==0)
{
if((optind < argc) && (flag_stdin == 0))
{
// there's a non-option parameter, and no -f or --stdin
// use it as text
p_text = argv[optind];
}
else
{
f_text = stdin;
if(flag_stdin == 0)
{
flag_stdin = 2;
}
}
}
else
{
filesize = GetFileLength(filename);
f_text = fopen(filename,"r");
}
if((f_text == NULL) && (p_text == NULL))
{
fprintf(stderr,"%sfile '%s'\n",err_load,filename);
exit(1);
}
if(p_text != NULL)
{
int size;
size = strlen(p_text);
espeak_Synth(p_text,size+1,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
}
else
if(flag_stdin)
{
int max = 1000;
p_text = (char *)malloc(max);
if(flag_stdin == 2)
{
// line by line input on stdin
while(fgets(p_text,max,stdin) != NULL)
{
p_text[max-1] = 0;
espeak_Synth(p_text,max,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
}
}
else
{
// bulk input on stdin
ix = 0;
while(!feof(stdin))
{
p_text[ix++] = fgetc(stdin);
if(ix >= (max-1))
{
max += 1000;
p_text = (char *)realloc(p_text,max);
}
}
if(ix > 0)
{
p_text[ix-1] = 0;
espeak_Synth(p_text,ix+1,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
}
}
}
else
if(f_text != NULL)
{
if((p_text = (char *)malloc(filesize+1)) == NULL)
{
fprintf(stderr,"Failed to allocate memory %d bytes",filesize);
exit(3);
}
fread(p_text,1,filesize,f_text);
p_text[filesize]=0;
espeak_Synth(p_text,filesize+1,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
fclose(f_text);
}
espeak_Synchronize();
return(0);
}