/***************************************************************************
* Copyright (C) 2006 to 2013 by Jonathan Duddington *
* email: jonsd@users.sourceforge.net *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program; if not, write see: *
* . *
***************************************************************************/
#include
#include
#include
#include
#ifndef NEED_GETOPT
#include
#endif
#include
#include
#include "speak_lib.h"
// This version of the command-line speak program uses the
// libespeak.so.1 library
static const char *help_text =
"\nespeak [options] [\"\"]\n\n"
"-f Text file to speak\n"
"--stdin Read text input from stdin instead of a file\n\n"
"If neither -f nor --stdin, then are spoken, or if none then text\n"
"is spoken from stdin, each line separately.\n\n"
"-a \n"
"\t Amplitude, 0 to 200, default is 100\n"
"-g \n"
"\t Word gap. Pause between words, units of 10mS at the default speed\n"
"-k \n"
"\t Indicate capital letters with: 1=sound, 2=the word \"capitals\",\n"
"\t higher values indicate a pitch increase (try -k20).\n"
"-l \n"
"\t Line length. If not zero (which is the default), consider\n"
"\t lines less than this length as end-of-clause\n"
"-p \n"
"\t Pitch adjustment, 0 to 99, default is 50\n"
"-s \n"
"\t Speed in words per minute, 80 to 450, default is 175\n"
"-v \n"
"\t Use voice file of this name from espeak-data/voices\n"
"-w \n"
"\t Write speech to this WAV file, rather than speaking it directly\n"
"-b\t Input text encoding, 1=UTF8, 2=8 bit, 4=16 bit \n"
"-m\t Interpret SSML markup, and ignore other < > tags\n"
"-q\t Quiet, don't produce any speech (may be useful with -x)\n"
"-x\t Write phoneme mnemonics to stdout\n"
"-X\t Write phonemes mnemonics and translation trace to stdout\n"
"-z\t No final sentence pause at the end of the text\n"
"--compile=\n"
"\t Compile pronunciation rules and dictionary from the current\n"
"\t directory. specifies the language\n"
"--ipa Write phonemes to stdout using International Phonetic Alphabet\n"
"\t --ipa=1 Use ties, --ipa=2 Use ZWJ, --ipa=3 Separate with _\n"
"--path=\"\"\n"
"\t Specifies the directory containing the espeak-data directory\n"
"--pho Write mbrola phoneme data (.pho) to stdout or to the file in --phonout\n"
"--phonout=\"\"\n"
"\t Write phoneme output from -x -X --ipa and --pho to this file\n"
"--punct=\"\"\n"
"\t Speak the names of punctuation characters during speaking. If\n"
"\t = is omitted, all punctuation is spoken.\n"
"--split=\"\"\n"
"\t Starts a new WAV file every . Used with -w\n"
"--stdout Write speech output to stdout\n"
"--version Shows version number and date, and location of espeak-data\n"
"--voices=\n"
"\t List the available voices for the specified language.\n"
"\t If is omitted, then list all voices.\n";
int samplerate;
int quiet = 0;
unsigned int samples_total = 0;
unsigned int samples_split = 0;
unsigned int samples_split_seconds = 0;
unsigned int wavefile_count = 0;
FILE *f_wavfile = NULL;
char filetype[5];
char wavefile[200];
int GetFileLength(const char *filename)
{//====================================
struct stat statbuf;
if(stat(filename,&statbuf) != 0)
return(0);
if((statbuf.st_mode & S_IFMT) == S_IFDIR)
return(-2); // a directory
return(statbuf.st_size);
} // end of GetFileLength
void strncpy0(char *dest, const char *source, int size)
{//====================================================
if(source!=NULL)
{
strncpy(dest,source,size);
dest[size-1] = 0;
}
}
void DisplayVoices(FILE *f_out, char *language)
{//============================================
int ix;
const char *p;
int len;
int count;
int c;
int j;
const espeak_VOICE *v;
const char *lang_name;
char age_buf[12];
char buf[80];
const espeak_VOICE **voices;
espeak_VOICE voice_select;
static char genders[4] = {'-','M','F','-'};
if((language != NULL) && (language[0] != 0))
{
// display only voices for the specified language, in order of priority
voice_select.languages = language;
voice_select.age = 0;
voice_select.gender = 0;
voice_select.name = NULL;
voices = espeak_ListVoices(&voice_select);
}
else
{
voices = espeak_ListVoices(NULL);
}
fprintf(f_out,"Pty Language Age/Gender VoiceName File Other Languages\n");
for(ix=0; (v = voices[ix]) != NULL; ix++)
{
count = 0;
p = v->languages;
while(*p != 0)
{
len = strlen(p+1);
lang_name = p+1;
if(v->age == 0)
strcpy(age_buf," ");
else
sprintf(age_buf,"%3d",v->age);
if(count==0)
{
for(j=0; j < sizeof(buf); j++)
{
// replace spaces in the name
if((c = v->name[j]) == ' ')
c = '_';
if((buf[j] = c) == 0)
break;
}
fprintf(f_out,"%2d %-12s%s%c %-20s %-13s ",
p[0],lang_name,age_buf,genders[v->gender],buf,v->identifier);
}
else
{
fprintf(f_out,"(%s %d)",lang_name,p[0]);
}
count++;
p += len+2;
}
fputc('\n',f_out);
}
} // end of DisplayVoices
static void Write4Bytes(FILE *f, int value)
{//=================================
// Write 4 bytes to a file, least significant first
int ix;
for(ix=0; ix<4; ix++)
{
fputc(value & 0xff,f);
value = value >> 8;
}
}
int OpenWavFile(char *path, int rate)
//===================================
{
static unsigned char wave_hdr[44] = {
'R','I','F','F',0x24,0xf0,0xff,0x7f,'W','A','V','E','f','m','t',' ',
0x10,0,0,0,1,0,1,0, 9,0x3d,0,0,0x12,0x7a,0,0,
2,0,0x10,0,'d','a','t','a', 0x00,0xf0,0xff,0x7f};
if(path == NULL)
return(2);
while(isspace(*path)) path++;
f_wavfile = NULL;
if(path[0] != 0)
{
if(strcmp(path,"stdout")==0)
f_wavfile = stdout;
else
f_wavfile = fopen(path,"wb");
}
if(f_wavfile == NULL)
{
fprintf(stderr,"Can't write to: '%s'\n",path);
return(1);
}
fwrite(wave_hdr,1,24,f_wavfile);
Write4Bytes(f_wavfile,rate);
Write4Bytes(f_wavfile,rate * 2);
fwrite(&wave_hdr[32],1,12,f_wavfile);
return(0);
} // end of OpenWavFile
static void CloseWavFile()
//========================
{
unsigned int pos;
if((f_wavfile==NULL) || (f_wavfile == stdout))
return;
fflush(f_wavfile);
pos = ftell(f_wavfile);
fseek(f_wavfile,4,SEEK_SET);
Write4Bytes(f_wavfile,pos - 8);
fseek(f_wavfile,40,SEEK_SET);
Write4Bytes(f_wavfile,pos - 44);
fclose(f_wavfile);
f_wavfile = NULL;
} // end of CloseWavFile
static int SynthCallback(short *wav, int numsamples, espeak_EVENT *events)
{//========================================================================
char fname[210];
if(quiet) return(0); // -q quiet mode
if(wav == NULL)
{
CloseWavFile();
return(0);
}
while(events->type != 0)
{
if(events->type == espeakEVENT_SAMPLERATE)
{
samplerate = events->id.number;
samples_split = samples_split_seconds * samplerate;
}
else
if(events->type == espeakEVENT_SENTENCE)
{
// start a new WAV file when the limit is reached, at this sentence boundary
if((samples_split > 0) && (samples_total > samples_split))
{
CloseWavFile();
samples_total = 0;
wavefile_count++;
}
}
events++;
}
if(f_wavfile == NULL)
{
if(samples_split > 0)
{
sprintf(fname,"%s_%.2d%s",wavefile,wavefile_count+1,filetype);
if(OpenWavFile(fname, samplerate) != 0)
return(1);
}
else
{
if(OpenWavFile(wavefile, samplerate) != 0)
return(1);
}
}
if(numsamples > 0)
{
samples_total += numsamples;
fwrite(wav,numsamples*2,1,f_wavfile);
}
return(0);
}
static void PrintVersion()
{//=======================
const char *version;
const char *path_data;
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, NULL, espeakINITIALIZE_DONT_EXIT);
version = espeak_Info(&path_data);
printf("eSpeak text-to-speech: %s Data at: %s\n", version, path_data);
}
#ifdef NEED_GETOPT
struct option {
char *name;
int has_arg;
int *flag;
int val;
};
int optind;
static int optional_argument;
static const char *arg_opts = "abfgklpsvw"; // which options have arguments
static char *opt_string="";
#define no_argument 0
#define required_argument 1
#define optional_argument 2
#endif
int main (int argc, char **argv)
//==============================
{
static struct option long_options[] =
{
/* These options set a flag. */
// {"verbose", no_argument, &verbose_flag, 1},
// {"brief", no_argument, &verbose_flag, 0},
/* These options don't set a flag.
We distinguish them by their indices. */
{"help", no_argument, 0, 'h'},
{"stdin", no_argument, 0, 0x100},
{"compile-debug", optional_argument, 0, 0x101},
{"compile", optional_argument, 0, 0x102},
{"punct", optional_argument, 0, 0x103},
{"voices", optional_argument, 0, 0x104},
{"stdout", no_argument, 0, 0x105},
{"split", optional_argument, 0, 0x106},
{"path", required_argument, 0, 0x107},
{"phonout", required_argument, 0, 0x108},
{"pho", no_argument, 0, 0x109},
{"ipa", optional_argument, 0, 0x10a},
{"version", no_argument, 0, 0x10b},
{0, 0, 0, 0}
};
static const char* err_load = "Failed to read ";
FILE *f_text=NULL;
char *p_text=NULL;
FILE *f_phonemes_out = stdout;
char *data_path = NULL; // use default path for espeak-data
int option_index = 0;
int c;
int ix;
char *optarg2;
int value;
int flag_stdin = 0;
int flag_compile = 0;
int filesize = 0;
int synth_flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE;
int volume = -1;
int speed = -1;
int pitch = -1;
int wordgap = -1;
int option_capitals = -1;
int option_punctuation = -1;
int option_phonemes = 0;
int option_mbrola_phonemes = 0;
int option_linelength = 0;
int option_waveout = 0;
espeak_VOICE voice_select;
char filename[200];
char voicename[40];
#define N_PUNCTLIST 100
wchar_t option_punctlist[N_PUNCTLIST];
voicename[0] = 0;
wavefile[0] = 0;
filename[0] = 0;
option_punctlist[0] = 0;
#ifdef NEED_GETOPT
optind = 1;
opt_string = "";
while(optind < argc)
{
int len;
char *p;
if((c = *opt_string) == 0)
{
opt_string = argv[optind];
if(opt_string[0] != '-')
break;
optind++;
opt_string++;
c = *opt_string;
}
opt_string++;
p = optarg2 = opt_string;
if(c == '-')
{
if(p[0] == 0)
break; // -- means don't interpret further - as commands
opt_string="";
for(ix=0; ;ix++)
{
if(long_options[ix].name == 0)
break;
len = strlen(long_options[ix].name);
if(memcmp(long_options[ix].name,p,len)==0)
{
c = long_options[ix].val;
optarg2 = NULL;
if((long_options[ix].has_arg != 0) && (p[len]=='='))
{
optarg2 = &p[len+1];
}
break;
}
}
}
else
if(strchr(arg_opts,c) != NULL)
{
opt_string="";
if(optarg2[0]==0)
{
// the option's value is in the next argument
optarg2 = argv[optind++];
}
}
#else
while(true)
{
c = getopt_long (argc, argv, "a:b:f:g:hk:l:mp:qs:v:w:xXz",
long_options, &option_index);
/* Detect the end of the options. */
if (c == -1)
break;
optarg2 = optarg;
#endif
switch (c)
{
case 'b':
// input character encoding, 8bit, 16bit, UTF8
if((sscanf(optarg2,"%d",&value) == 1) && (value <= 4))
synth_flags |= value;
else
synth_flags |= espeakCHARS_8BIT;
break;
case 'h':
printf("\n");
PrintVersion();
printf("%s", help_text);
exit(0);
break;
case 'k':
option_capitals = atoi(optarg2);
break;
case 'x':
option_phonemes = 1;
break;
case 'X':
option_phonemes = 2;
break;
case 'm':
synth_flags |= espeakSSML;
break;
case 'p':
pitch = atoi(optarg2);
break;
case 'q':
quiet = 1;
break;
case 'f':
strncpy0(filename,optarg2,sizeof(filename));
break;
case 'l':
option_linelength = atoi(optarg2);
break;
case 'a':
volume = atoi(optarg2);
break;
case 's':
speed = atoi(optarg2);
break;
case 'g':
wordgap = atoi(optarg2);
break;
case 'v':
strncpy0(voicename,optarg2,sizeof(voicename));
break;
case 'w':
option_waveout = 1;
strncpy0(wavefile,optarg2,sizeof(filename));
break;
case 'z': // remove pause from the end of a sentence
synth_flags &= ~espeakENDPAUSE;
break;
case 0x100: // --stdin
flag_stdin = 1;
break;
case 0x105: // --stdout
option_waveout = 1;
strcpy(wavefile,"stdout");
break;
case 0x101: // --compile-debug
case 0x102: // --compile
strncpy0(voicename,optarg2,sizeof(voicename));
flag_compile = c;
quiet = 1;
break;
case 0x103: // --punct
option_punctuation = 1;
if(optarg2 != NULL)
{
ix = 0;
while((ix < N_PUNCTLIST) && ((option_punctlist[ix] = optarg2[ix]) != 0)) ix++;
option_punctlist[N_PUNCTLIST-1] = 0;
option_punctuation = 2;
}
break;
case 0x104: // --voices
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,0,data_path,0);
DisplayVoices(stdout,optarg2);
exit(0);
case 0x106: // -- split
if(optarg2 == NULL)
samples_split_seconds = 30 * 60; // default 30 minutes
else
samples_split_seconds = atoi(optarg2) * 60;
break;
case 0x107: // --path
data_path = optarg2;
break;
case 0x108: // --phonout
if((f_phonemes_out = fopen(optarg2,"w")) == NULL)
{
fprintf(stderr,"Can't write to: %s\n",optarg2);
}
break;
case 0x109: // --pho
option_mbrola_phonemes = 16;
break;
case 0x10a: // --ipa
option_phonemes = 3;
if(optarg2 != NULL)
{
value = -1;
sscanf(optarg2,"%d",&value);
if((value<0) || (value>3))
{
fprintf(stderr,"Bad value for -ipa=\n");
value = 0;
}
option_phonemes += value;
}
break;
case 0x10b: // -version
PrintVersion();
exit(0);
default:
exit(0);
}
}
if(option_waveout || quiet)
{
// writing to a file (or no output), we can use synchronous mode
samplerate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS,0,data_path,0);
samples_split = samplerate * samples_split_seconds;
espeak_SetSynthCallback(SynthCallback);
if(samples_split)
{
char *extn;
extn = strrchr(wavefile,'.');
if((extn != NULL) && ((wavefile + strlen(wavefile) - extn) <= 4))
{
strcpy(filetype,extn);
*extn = 0;
}
}
}
else
{
// play the sound output
samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0);
}
if(voicename[0] == 0)
strcpy(voicename,"default");
if(espeak_SetVoiceByName(voicename) != EE_OK)
{
memset(&voice_select,0,sizeof(voice_select));
voice_select.languages = voicename;
if(espeak_SetVoiceByProperties(&voice_select) != EE_OK)
{
fprintf(stderr,"%svoice '%s'\n",err_load,voicename);
exit(2);
}
}
if(flag_compile)
{
// This must be done after the voice is set
espeak_CompileDictionary("", stderr, flag_compile & 0x1);
exit(0);
}
// set any non-default values of parameters. This must be done after espeak_Initialize()
if(speed > 0)
espeak_SetParameter(espeakRATE,speed,0);
if(volume >= 0)
espeak_SetParameter(espeakVOLUME,volume,0);
if(pitch >= 0)
espeak_SetParameter(espeakPITCH,pitch,0);
if(option_capitals >= 0)
espeak_SetParameter(espeakCAPITALS,option_capitals,0);
if(option_punctuation >= 0)
espeak_SetParameter(espeakPUNCTUATION,option_punctuation,0);
if(wordgap >= 0)
espeak_SetParameter(espeakWORDGAP,wordgap,0);
if(option_linelength > 0)
espeak_SetParameter(espeakLINELENGTH,option_linelength,0);
if(option_punctuation == 2)
espeak_SetPunctuationList(option_punctlist);
espeak_SetPhonemeTrace(option_phonemes | option_mbrola_phonemes,f_phonemes_out);
if(filename[0]==0)
{
if((optind < argc) && (flag_stdin == 0))
{
// there's a non-option parameter, and no -f or --stdin
// use it as text
p_text = argv[optind];
}
else
{
f_text = stdin;
if(flag_stdin == 0)
{
flag_stdin = 2;
}
}
}
else
{
filesize = GetFileLength(filename);
f_text = fopen(filename,"r");
}
if((f_text == NULL) && (p_text == NULL))
{
fprintf(stderr,"%sfile '%s'\n",err_load,filename);
exit(1);
}
if(p_text != NULL)
{
int size;
size = strlen(p_text);
espeak_Synth(p_text,size+1,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
}
else
if(flag_stdin)
{
int max = 1000;
p_text = (char *)malloc(max);
if(flag_stdin == 2)
{
// line by line input on stdin
while(fgets(p_text,max,stdin) != NULL)
{
p_text[max-1] = 0;
espeak_Synth(p_text,max,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
}
}
else
{
// bulk input on stdin
ix = 0;
while(!feof(stdin))
{
p_text[ix++] = fgetc(stdin);
if(ix >= (max-1))
{
max += 1000;
p_text = (char *)realloc(p_text,max);
}
}
if(ix > 0)
{
p_text[ix-1] = 0;
espeak_Synth(p_text,ix+1,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
}
}
}
else
if(f_text != NULL)
{
if((p_text = (char *)malloc(filesize+1)) == NULL)
{
fprintf(stderr,"Failed to allocate memory %d bytes",filesize);
exit(3);
}
fread(p_text,1,filesize,f_text);
p_text[filesize]=0;
espeak_Synth(p_text,filesize+1,0,POS_CHARACTER,0,synth_flags,NULL,NULL);
fclose(f_text);
}
if(espeak_Synchronize() != EE_OK)
{
fprintf(stderr, "espeak_Synchronize() failed, maybe error when opening output device\n");
exit(4);
}
if(f_phonemes_out != stdout)
fclose(f_phonemes_out); // needed for WinCE
return(0);
}