Added attributes in voice file: voicing, breath, breathw. Added "en-wisper" voice. espeakedit, fixed crash when playing sounds using a voice file with a high pitch. lang=it, use phoneme [u], not [U] Added directory docs to svn. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@45 d46cf337-b52f-0410-862d-fd96e6ae7743

18 years ago · 257ddb2436
--- a/dictsource/dict_phonemes
+++ b/dictsource/dict_phonemes
@@ -164,7 +164,7 @@ t    tS   ts   v    z    Z    z2
 Dictionary it_dict

@-   a    a/   aI   aU   e    E    i    
 o    O    oI   U    u    
 o    O    oI   u    

 *    :    b    d    dZ   f    g    j    
 k    l    l^   m    n    N    n^   p    
@@ -285,8 +285,8 @@ Dictionary pt_dict

 &    &/   &U~  &~   @    @-   a    A    
 aI   aU   e    E    eI   EI   eU   EU   
 e~   i    i/   iU   o    O    oI   OI   
 o~   U    u    uI   u~   y    
 i    i/   iU   i~   o    O    oI   OI   
 o~   u    U    uI   u~   y    

 *    :    ;    b    C    d    dZ   f    
 g    h    j    k    l    l^   m    n    
--- a/dictsource/it_list
+++ b/dictsource/it_list
@@ -7,7 +7,7 @@
 _cap k'apital
 _??  s'imbolo

 _!   p'Untoesklamat'ivo
 _!   p'untoesklamat'ivo
 _"   viRgolet:e
 _#   kantSellet:o
 _$   dolla:Ro
@@ -15,37 +15,37 @@ _%   peRtS'ento
 _&   _!e:
 _'   ap'OstRofo
 _(   ap'eRtapaR'entezi
 _)   kj,UzapaR'entezi
 _)   kj,uzapaR'entezi
 _*   asteRisko
 _+   pjU
 _+   pju
 _,   v'iRgola
 _-   t@-*at:ino
 _.   pUnto
 _.   punto
 _/   slaS
 _:   d,Uep'Unte
 _;   p'Untoev'iRgola
 _:   d,uep'unte
 _;   p'untoev'iRgola
 _<   ap'eRta'angolo
 _=   Ugwale
 _>   kj,Uza'angolo
 _?   p'UntointeRogat'ivo
 _=   ugwale
 _>   kj,uza'angolo
 _?   p'untointeRogat'ivo
 _@   ki'otS:ola
 _[   ap'eRtakw'ad@-*a
 _\   bakslaS
 _]   kj,Uzakw'ad@-*a
 _^   kap:UtS:o
 __   'UndeRskoRe
 _]   kj,uzakw'ad@-*a
 _^   kap:utS:o
 __   'undeRskoRe
 _`   bakp@-*aIm
 _{   ap'eRtag@-*'af:a
 _|   b'aRa
 _}   kj,Uzag@-*'af:a
 _}   kj,uzag@-*'af:a
 _~   tilde



 // numbers
 _0	dz'eRo
 _1	'Uno
 _2	d'Ue
 _1	'uno
 _2	d'ue
 _3	tR'e
 _4	kw'at:Ro
 _5	tS'inkwe
--- a/dictsource/it_rules
+++ b/dictsource/it_rules
@@ -266,22 +266,22 @@
 		tt	t:
 		
 .group u
 	_)	u(_	U:
 		u	U:
 		u (CK	U
 	_)	u(_	u:
 		u	u:
 		u (CK	u
 		u (A	w2
 	Kr)	u (A	U
 	Kl)	u (A	U
 	Kr)	u (A	u
 	Kl)	u (A	u

 	@)	u (dine_  'u
 	@)	u (colo_  'u

 .group v
 	_)	v(_	vU:
 	_)	v(_	vu:
 		v	v
 	
 .group w
 	_)	w(_	d'op:jav'U
 	_)	w(_	d'op:jav'u
 		w	w2

 .group x
@@ -315,8 +315,8 @@
 		ó (CK	'o
 		ò	'O:
 		ò (CK	'O
 		ù	'U:
 		ù (CK	'U
 		ù	'u:
 		ù (CK	'u

 		%       pertS'ento
 		$	dolla:ro
@@ -325,14 +325,14 @@
 		&	_!e
 		@       ki'otS:ola
 		©       k'opiraIt
 		+	pjU
 		.       pUnto
 		+	pju
 		.       punto
 		, (D    v'irgola
 		:       d,Uep'Unte
 		;       p'Untoev'irgola
 		!       _p'Untoesklamat'ivo_
 		:       d,uep'unte
 		;       p'untoev'irgola
 		!       _p'untoesklamat'ivo_
 	!)	!
 		?	_p'UntointeRogat'ivo_
 		?	_p'untointeRogat'ivo_
 	?)	?
 		\\	bakslaS
 		/	slaS
--- a/docs/add_language.html
+++ b/docs/add_language.html
@@ -0,0 +1,132 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <HTML>
 <HEAD>
 	<META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=utf-8">
 	<TITLE></TITLE>
 </HEAD>
 <BODY LANG="en-GB" DIR="LTR">
 <A href="docindex.html">Back</A>
 <HR>
 <H2>6. ADDING OR IMPROVING A LANGUAGE</H2>
 <HR>
 Most of the work doesn't need any programming knowledge.  Just an understanding of the language, an
 awareness of its features, patience and attention to detail.  Wikipedia is a good source of basic phonetic information, eg
 <a href="http://en.wikipedia.org/wiki/Vowel">http://en.wikipedia.org/wiki/Vowel</a>
 <P>
 In many cases it should be fairly easy to add a rough implementation of a new language, hopefully
 enough to be intelligible.<br>
 After that it's a gradual process of improvement to:
 <ul>
 <li>Make the spelling-to-phoneme translation rules more accurate, including the position of stressed
 syllables within words. Some languages are easier than others. I expect most are easier than English.
 <p><li>Improve the sounds of the phonemes.  This may consist of making small adjustments to vowel and diphthong quality,
 or adjusting the strength of consonants.  Bigger changes may be recording new or replacement consonant
 sounds, or even writing program code to implement new types of sounds.
 <p><li>Marking some common words in the dictionary that should be unstressed (words such as "the", "is"), should be preceded
 by a short pause (such as "and", "but"), or have other attributes, in order to make the speech flow better.
 <p><li>Improve the rhythm of the speech by adjusting the relative lengths of vowels in different contexts, eg. stressed/unstressed syllable,
 or depending on the following phonemes.  This is important for making the speech sound good for the language.
 <p><li>Identify or implement new functions in the program to improve the speech, or to deal with
 characteristics of the language which are not currently implemented.  For example, a different intonation module.
 </ul>
 If you are interested in working on a language, please contact me to set up the initial data and to
 discuss the features of the language.
 <HR>
 <H3>6.1 Language Code</H3>
 <P>Generally, the language's international ISO 639-1 code is used to
 identify the language. It is used in the filenames which
 contains the language's data. In the examples below the code &quot;<B>en</B>&quot;
 (English) is used as an example. Replace this with the code of your
 language.<p>
 It is possible to have different variants of a language, for example where the sound of some phonemes changed,
 or where some of the pronunciation rules differ.
 <HR>
 <H3>6.2 Phoneme File</H3>
 <P>You must first decide on the set of phonemes to be used for the
 language. These should be listed and defined in a phonemes file such as
 <B>ph_english</B>. A reference to this file is then included at the end of
 the <B>phonemes,</B> file (the master phoneme file), eg:</P>
 <PRE>   phonemetable  en  base
   include  ph_english</PRE><P>
 This example defines a phoneme table &quot;<B>en</B>&quot; which inherits
 the contents of phoneme table &quot;<B>base</B>&quot;. Its contents are
 found in the file <B>ph_english</B>.</P>
 <P>The <B>base</B> phoneme table contains definitions of a basic set of
 consonants, and also some &quot;control&quot; phonemes such as stress marks and
 pauses. The phoneme table for a language will generally inherit this,
 or alternatively it may inherit the phoneme table of another language
 which in turn inherits the <B>base</B> phoneme table.</P>
 <P>The phonemes file for the language defines those additional
 phonemes which are not inherited (generally the vowels and diphthongs, plus any additional
 consonants), or phonemes whose definitions differ from the
 inherited version (eg. the redefinition of a consonant).</P>
 <P>Details of the contents of phonemes files are given in
 <A href="phontab.html">phontab.html</A>.</P>
 The <B>Compile phoneme data</B> function of the <B>espeakedit</B>
 program compiles the phonemes files to produce the files
 <B>espeak-data/phontab</B>, <B>phonindex</B>, and <B>phondata.</B><P>
 For information on how to analyse recorded sounds of the language and to
 prepare the corresponding phoneme data, see (not yet written).<p>
 For an initial draft a language will often be able to use vowels and
 consonants which have already been set up for another language.
 <HR>
 <H3>6.3 Dictionary Files</H3>
 <P STYLE="font-weight: medium">Once the language's phonemes have been
 defined, then pronunciation dictionary data can be produced in order
 to translate the language's source text into phonemes. This consists
 of two source files: <B>en_rules</B> (the spelling to phoneme rules) and
 <B>en_list</B> (an exceptions list, and attributes of certain words). The corresponding compiled data
 file is <B>espeak-data/en_dict</B> which is produced from <B>en_rules</B>
 and <B>en_list</B> sources by the command: <B>speak&nbsp; --compile=en</B>.</P>
 <P STYLE="font-weight: medium">Details of the contents of the
 dictionary files are given in <A href="dictionary.html">dictionary.html</A>.</P>
 <P STYLE="font-weight: medium">The <B>en_list</B> file contains not
 only pronunciation exceptions, but also gives attributes to specific
 words, Most notable of these are:</P>
 <P STYLE="font-weight: medium"><B>$u </B>Some common words should be
 marked as &quot;unstressed&quot; in order to make the speech flow better.
 These words generally include articles (eg: a, the, this, that),
 auxillary verbs (eg: is, have, will, can, may), pronouns and
 possessive adjectives (eg: he, his), some common prepositions (eg:
 of, to, in, of), some common conjunctions (eg. and, or, if)., some
 common adverbs and adjectives (eg. any, already)</P>
 <P><B>$pause </B>Some words should be marked to have a short pause
 before then, in order to produce natural pauses in long sentences.
 These include conjunctions (eg. and, or, but, however) and perhaps
 some prepositions.</P>
 <HR>
 <H3>6.4 Voice File</H3>
 <P STYLE="font-weight: medium">Each language should have one or more
 voice files in <B>espeak-data/voices</B>. The filename of the default voice
 for a language should be the same as the language code.</P>
 <P STYLE="font-weight: medium">Details of the contants of voice files
 are given in <A href="voices.html">voices.html</A>.</P>
 <P STYLE="font-weight: medium">The simplest voice file would contain
 just a single line to give the language code, eg:</P>
 <PRE STYLE="margin-bottom: 0.5cm">   language en</PRE><P STYLE="font-weight: medium">
 This language code specifies the phoneme table (i.e. <b>phonemetable  en</b> and the
 dictionary (i.e. <B>espeak-data/en_dict</B>) to be used. If needed, these can be
 overridden by <B>phonemes</B> and <B>dictionary</B> attributes in the
 voices file.</P>
 <HR>
 <H3>6.5 Program Code</H3>
 <P STYLE="font-weight: medium">The behaviour of the speak program is
 controlled by various options (eg. whether words are stressed on the first,
 last, or penultimate syllable). The function <B>SetTranslator()</B> at the
 start of the <B>tr_languages.cpp</B> file recognizes the language
 code and sets the appropriate set of options.</P>
 <P STYLE="font-weight: medium">For a new language, you would add its
 language code and the required options in <B>SetTranslator()</B>. However, this
 may not be necessary during testing because most of the options can also be
 set from the voice file in
 <B>espeak-data/voices</B>.</P>
 <P STYLE="font-weight: medium">If necessary, you can define a new
 translator class for a language, and select this in the
 SetTranslator() function. This inherits the standard functions
 from the base translator class, but allows you to replace these where
 needed by new functions which are written specially for this
 language.</P>
 <hr>

 </BODY>
 </HTML>
--- a/docs/analyse.html
+++ b/docs/analyse.html
@@ -0,0 +1,69 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title></title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <A href="docindex.html">Back</A>
 <hr>
 <h2>ANALYSIS</h2>
 <hr>
 (Further notes are needed)
 <p>
 Recordings of spoken words and phrases can be analysed to try and make eSpeak match a language more closely.

 Unlike most other (larger and better quality) synthesizers, eSpeak's data is not produced directly from recorded sounds.  To use an analogy, it's like a drawing or sketch compared with a photograph. Or vector graphics compared with a bitmap image.  It's smaller, less accurate, with less subtlety, but it can sometimes show some aspects of the picture more clearly than a more accurate image.

 <h4>Recording Sounds</h4>
 Recordings should be made while speaking slowly, clearly, and firmly and loudly (but not shouting).  Speak about half a metre from the microphone.  Try to avoid background noise and hum interference from electrical power cables.


 <h4>Praat</h4>
 I use a modified version of the praat program (<a href="www.praat.org">www.praat.org</a>) to view and analyse both sound recordings and output from eSpeak.  The modification adds a new function (<code>Spectrum->To_eSpeak</code>) which analysis a voiced sound and produces a file which can be loaded into espeakedit.  Details of the modification are in the <code>"praat-mod"</code> directory in the espeakedit package. 

 The analysis contains a sequence of frames, one per cycle at the speech's fundamental frequency.  Each frame is a short time spectrum, together with praat's estimation of the f1 to f5 formant frequencies at the time of that cycle.

 I also use Praat's <code>New->Record_mono_sound</code> function to make sound recordings.

 <h3>Vowels and Diphthongs</h3>
 <h4>Analysing a Recording</h4>

 Make a recording, with a male voice, and trim it in Praat to keep just the required vowel sound.  Then use the new <code>Spectrum->To_eSpeak</code> modification (this was named <code>To_Spectrogram2</code> in earlier versions) to analyse the sound.  It produces a file named <code>"spectrum.dat"</code>.

 Load the <code>"spectrum.dat"</code> file into espeakedit.  Espeakedit has two Open functions, <code>File->Open</code> and <code>File->Open2</code>.  They are the same, except that they remember different paths.  I generally use <code>File->Open2</code> for reading the <code>"spectrum.dat"</code> file.

 The data is displayed in espeakedit as a sequence of spectrum frames (see <a href="editor.html">editor.html</a>).

 <h4>Tone Quality</h4>

 It can be difficult to match the tonal quality of a new vowel to be compatible with existing vowel files.  This is determined by the relative heights and widths of the formant peaks.  These vary depending on how the recording was made, the microphone, and the strength and tone of the voice.  Also the positions of the higher peaks (F3 upwards) can vary depending on the characteristics of the speaker's voice.  Formant peaks correspond to resonances within the mouth and throat, and they depend on its size and shape.  With a female voice, all the formants (F1 upwards) are generally shifted to higher frequencies.

 For these reasons, it's best to use a male voice, and to use its analysed spectra only as guidance.  Rather than construct formant-peaks entirely to match the analysed data, instead copy keyframes from a similar existing vowel.  Then make small adjustments to match the position of the F1, F2, F3 formant peaks and hopefully produce the required vowel sound.

 <h4>Using an Existing Vowel File</h4>

 Choose a similar vowel file from <code>phsource/vowel</code> and open it into espeakedit.  It may be useful to use <code>phsource/vowel/vowelchart</code> as a map to show how vowel files compare with each other.  You can select a keyframe from the vowel file and use CTRL-C and CTRL-V to copy the green formant peaks onto a frame of the new spectrum sequence.  Then adjust the peaks to match the new frame.  Press F1 to hear the sound of the formant peaks in the selected frame.

 The F0 peak is provided in order to adjust the correct balance of low frequencies, below the F1 peak.  If the sound is too muffled, or conversely, too "thin", try adjusting the amplitude or position of the F0 peak.


 <h4>Length and Amplitude</h4>

 Use an existing vowel file as a guide for how to set the amplitude and length of the keyframes.  At the right of each keyframe, its length is shown in mS and under that is its relative (RMS) amplitude.

 The second keyframe should be marked with a red marker (use CTRL-M to toggle this).  This divides the vowel into the front-part (with one frame), and the rest.

 Use F2 to play the sound of the new vowel sequence.  It will also produce a WAV file (the default name is speech.wav) which you can read into praat to see whether it has a sensible shape.


 <h4>Using the New Vowel</h4>

 Make a new directory (eg. vwl_xx) in phsource for your new vowels.  Save the spectrum sequence with a name which you have chosen for it.

 You can then edit the phoneme file for your language (eg. phsource/ph_xxx), and change a phoneme to refer to your new vowel file. Then do <code>Data->Compile_Phoneme_Data</code> from espeakedit's menubar to re-compile the phoneme data.

 </body>
 </html>
--- a/docs/commands.html
+++ b/docs/commands.html
@@ -0,0 +1,169 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak Speech Synthesizer</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
 <A href="index.html">Back</A>
 <hr>
 <h2>2.1 INSTALLATION</h2>
 <hr>
 (This section only applies to Linux and other Posix systems).<br>
 There are two versions of the command line program. They both have the same command parameters (see below).
 <ol>
 <li><strong>espeak</strong> uses speech engine in the <strong>libespeak</strong> shared library.  The libespeak library must first be installed.
 <p>
 <li><strong>speak</strong> is a stand-alone version which includes its own copy of the speech engine.
 </ol>
 Place the <strong>espeak</strong> or <strong>speak</strong> executable file in the command path, eg in <strong>/usr/local/bin</strong>
 <p>
 Place the "<strong>espeak-data</strong>" directory in /usr/share as <strong>/usr/share/espeak-data</strong>.<br>
 Alternatively if it is placed in the user's home directory (i.e. <strong>/home/&lt;user&gt;/espeak-data</strong>)
 then that will be used instead.
 <p>
 <h4>Dependencies</h4>
 <strong>espeak</strong> uses the PortAudio sound library (version 18), so you will need to have the <strong>libportaudio0</strong> library package installed.  It may be already, since it's used by other software, such as OpenOffice.org and the Audacity sound editor.<p>
 Some Linux distrubitions (eg. SuSe 10) have version 19 of PortAudio which has a slightly different API. The speak program can be compiled to use version 19 of PortAudio by copying the file portaudio19.h to portaudio.h before compiling.<p>
 The speak program may be compiled without using PortAudio, by removing the line<pre>   #define USE_PORTAUDIO
 </pre>in the file speech.h. 
 <p>&nbsp;<hr>
 <h2>2.2 COMMAND OPTIONS</h2>
 <hr>
 <h3>2.2.1 Examples</h3>
 To use at the command line, type:<br>
 &nbsp; <strong>espeak "This is a test"</strong><br>
 or<br>
 &nbsp; <strong>espeak -f &lt;text file&gt;</strong>
 <p>
 Or just type<br>
 &nbsp; <strong>espeak</strong><br>
 followed by text on subsequent lines. Each line is spoken when
 RETURN is pressed.<br>Use <strong>espeak -x</strong> to see the corresponding phoneme codes.
 <p>&nbsp;<hr>
 <h3>2.2.2 Use with KDE Text-to-Speech (KTTS)</h3>
 To add to KDE-Text-to-Speech Manager (KTTSMgr), use it as a "Command" talker
 with "command for speaking texts" set to:<br>
   <strong>cat %f | espeak --stdin -w %w</strong>
 <p>
 Note:
 <ul>
 <li>When used by the KTTS system, I noticed a slight background hiss with the speech, which is not present when I use <strong>espeak</strong> directly from the command line. This was because KDE sound default was set to "8 bits" rather than 16 bits.
 <li>KTTSMgr breaks the text into sentences to pass to the speech engine, but it mistakenly assumes sentence breaks when dots follow abbreviations and therefore pauses after the dots in "eg. Mr. John B. Smith etc."  Speaking a text file directly with <strong>espeak</strong> gives better results in this respect.
 <li>Speaking text from a web page using KTTS often causes headings and image captions to be run together with the following text as a single sentence.  Speaking the HTML directly  with the <strong>-m</strong> option set (i.e. using <strong>espeak -m -f text.html</strong>), may help if this is a problem. 
 </ul>
 <p>&nbsp;<hr>
 <h3>2.2.3 The Command Line Options</h3>
 <dl>
 <dt>
 <strong>espeak [options] ["words"]</strong><br>
 <dd>Text input can be taken either from a file, from a string in the command, or from stdin.
 <p>
 <dt>
 <strong>-f &lt;text file&gt;</strong><br>
 <dd>Speaks a text file.
 <p>
 <dt>
 <strong> --stdin</strong><br>
 <dd>Takes the text input from stdin.
 <p>
 <dt>
 If neither -f nor --stdin is given, then the text input is taken from "words" (a text string within double quotes). <br>If that is not present then text is taken from stdin, but each line is treated as a separate sentence.
 <p>
 <dt>
 <strong>-a &lt;integer&gt;</strong><br>
 <dd>Sets amplitude (volume) in a range of 0 to 200.  The default is 100.
 <p>
 <dt>
 <strong>-p &lt;integer&gt;</strong><br>
 <dd>Adjusts the pitch in a range of 0 to 99.  The default is 50.
 <p>
 <dt>
 <strong>-s &lt;integer&gt;</strong><br>
 <dd>Sets the speed in words-per-minute (approximate values for the default voice, others may
 differ slightly). The default value is 170. I generally use a faster speed
 of 190.  Range 80 to 370.
 <p>
 <dt>
 <strong>-b</strong><br>
 <dd>Indicates that the input text is in the 8-bit character set which corresponds to the language (eg. Latin-2 for Polish). Without this option, eSpeak assumes text is UTF8, but will automatically switch to the 8-bit character set if it finds an illegal UTF8 sequence.  That may give wrong results if some 8-bit character sequences look like valid UFT8 multibyte characters.
 <p>
 <dt>
 <strong>-l &lt;integer&gt;</strong><br>
 <dd>Line-break length, default value 0.  If set, then lines which are shorter
 than this are treated as separate clauses and spoken separately with a
 break between them.  This can be useful for some text files, but bad for
 others.
 <p>
 <dt>
 <strong>-m</strong><br>
 <dd>Indicates that the text contains SSML (Speech Synthesis Markup Language) tags or other XML tags.  Those SSML tags which are supported are interpreted.  Other tags, including HTML, are ignored, except that some HTML tags such as &lt;hr&gt; &lt;h2&gt; and &lt;li&gt; ensure a break in the speech.
 <p>
 <dt>
 <strong>-v &lt;voice filename&gt;[+&lt;variant&gt;]</strong><br>
 <dd>Sets a Voice for the speech, usually to select a language. eg:
 <pre>   espeak -vaf</pre>
 To use the Afrikaans voice.  A modifier after the voice name can be used to vary the tone of the voice, eg:
 <pre>   espeak -vaf+3</pre>
 The variants are <code> +1  +2  +3  +4  +5 </code> for male voices and <code> +11 +12 +13 +14 </code> which simulate female voices by using higher pitches.
 <p>
 &lt;voice filename&gt; is a file within the <code>espeak-data/voices</code> directory.<br>
 Voice files can specify a language, different pitches, tonal qualities, and prosody for the voice.
 See the <a href="voices.html">voices.html</a> file.<p>
 Voice names which start with <b>mb-</b> are for use with Mbrola diphone voices, see <a href="mbrola.html">mbrola.html</a><p>
 Some languages may need additional dictionary data, see <a href="languages.html">languages.html</a> 
 <p>
 <dt>
 <strong>-w &lt;wave file&gt;</strong><br>
 <dd>Writes the speech output to a file in WAV format, rather than speaking it.
 <p>
 <dt>
 <strong>-x</strong><br>
 <dd>The phoneme mnemonics, into which the input text is translated, are
 shown on stdout.
 <p>
 <dt>
 <strong>-X</strong><br>
 <dd>As -x, but in addition, details are shown of the pronunciation rule and dictionary list lookup.  This can be useful to see why a certain pronunciation is being produced.  Each matching pronunciation rule is listed, together with its score, the highest scoring rule being used in the translation.  "Found:" indicates the word was found in the dictionary lookup list, and "Flags:" means the word was found with only properties and not a pronunciation.  You can see when a word has been retranslated after removing a prefix or suffix.
 <p>
 <dt><strong>-q</strong><br><dd>
 Quiet. No sound is generated.  This may be useful with the -x option.
 <p>
 <dt>
 <strong>-z</strong><br>
 <dd>The option removes the end-of-sentence pause which normally occurs at the end of the text.
 <p>
 <dt>
 <strong>--stdout</strong><br>
 <dd>Writes the speech output to stdout rather than speaking it.
 <p>
 <dt><strong>--compile[=&lt;voice name&gt;]</strong><br>
 <dd>
 Compile the pronunciation rule and dictionary lookup data from their source files in the current directory.  The Voice determines which language's files are compiled.  For example, if it's an English voice, then <em>en_rules</em>, <em>en_list</em>, and <em>en_extra</em> (if present), are compiled to replace <em>en_dict</em>  in the <em>speak-data</em> directory.  If no Voice is specified then the default Voice is used.
 <p>
 <dt><strong>--punct[="&lt;characters&gt;"]</strong><br>
 <dd>
 Speaks the names of punctuation characters when they are encountered in the text.  If &lt;characters&gt; are given, then only those listed punctuation characters are spoken, eg.  <code> --punct=".,;?"</code>
 <p>
 <dt>
 <strong>--voices[=&lt;language code&gt;]</strong><br>
 <dd>Lists the available voices.<br>
 If =&lt;language code&gt; is present then only those voices which are suitable for that language are listed.<br>

 </dl>
 <p>&nbsp;<hr>
 <h3>2.2.4 The Input Text</h3>
 <dl>
 <dt><b>HTML Input</b>
 <dd>
 If the -m option is used to indicate marked-up text, then HTML can be spoken directly.
 <p>
 <dt><b>Phoneme Input</b>
 <dd>
 As well as plain text, phoneme mnemonics can be used in the text input to <strong>espeak</strong>.  They are enclosed within double square brackets.  Spaces are used to separate words and all stressed syllables must be marked explicitly.<br>
 &nbsp; eg: &nbsp; <code> [[D,Is Iz sVm f@n'EtIk t'Ekst 'InpUt]] </code>
 </dl>
 </body>
 </b>
--- a/docs/dictionary.html
+++ b/docs/dictionary.html
@@ -0,0 +1,566 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak: Pronunciation Dictionaries</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>

 <A href="docindex.html">Back</A>
 <hr>
 <h2>4. TEXT TO PHONEME TRANSLATION</h2>
 <hr>
 <h3>4.1 Translation Files</h3>
 There is a separate set of pronunciation files for each language, their names starting with the language name.
 <p>
 There are two separate methods for translating words into phonemes:
 <ul>
 <li>Pronunciation Rules.  These are an attempt to define the pronunciation rules for the language. The source file is:<br>
 	<strong><em>&lt;language&gt;_rules</em></strong>&nbsp;	(eg.  en_rules)<br>

 <p>
 <li>
 Lookup Dictionary.  A list of individual words and their pronunciations and/or various other properties.  The source files are:<br>
 	<strong><em>&lt;language&gt;_list</em></strong>&nbsp;	(eg.  en_list) and optionally <strong><em>&lt;language&gt;_extra</em></strong>&nbsp;<br>
 </ul>
 These two files are compiled into the file
 	<strong><em>&lt;language&gt;_dict</em></strong>&nbsp; in the espeak-data directory (eg.  espeak-data/en_dict)

 <p>&nbsp;<hr>
 <h3>4.2  Phoneme names</h3>
 Each of the language's phonemes is represented by a mnemonic of 1, 2, 3, or 4 characters.  Together with a number of utility codes (eg. stress marks and pauses), these are defined in the phoneme data file (see *spec not yet available*).
 <p>
 The utility 'phonemes' are:

 <ul><table>
  <tbody align="left">
    <tr>
      <td><strong>' </strong></td>
      <td>primary stress</td>
    </tr>
    <tr>
      <td><strong>, </strong></td>
      <td>secondary stress</td>
    </tr>
    <tr>
      <td><strong>% </strong></td>
      <td>unstressed syllable</td>
    </tr>
    <tr>
      <td><strong>=&nbsp;&nbsp;&nbsp;</strong></td>
      <td>put the primary stress on the preceding syllable</td>
    </tr>
    <tr>
      <td><strong>_:</strong></td>
      <td>short pause</td>
    </tr>
    <tr>
      <td><strong>_</strong></td>
      <td>a shorter pause</td>
    </tr>
    <tr>
      <td><strong>|| </strong></td>
      <td>indicates a word boundary within a phoneme string</td>
    </tr>
    <tr>
      <td><strong>| </strong></td>
      <td>can be used to separate two adjacent characters, to prevent them from being considered as a multi-character phoneme mnemonic</td>
    </tr>
  </tbody>
 </table>
 </ul>

 It is not necessary to specify the stress of every syllable.  Stress markers are only needed in order to change the effect of the language's default stress rule.
 <p>
 The phonemes which are used to represent a language's sounds are based on the Kirshenbaum ascii character representation of the International Phonetic Alphabet <a href="http://www.kirshenbaum.net/IPA/ascii-ipa.pdf">www.kirshenbaum.net/IPA/ascii-ipa.pdf</a>

 <p>&nbsp;<hr>
 <h3>4.3  Pronunciation Rules</h3>
 The rules in the <strong><em>&lt;language&gt;_rules</em></strong>&nbsp;  file specify the phonemes which are used to pronounce each letter, or sequence of letters.  Some rules only apply when the letter or letters are preceded by, or followed by, other specified letters.
 <p>
 To find the pronunciation of a word, the rules are searched and any which match the letters at the in the word are given a score depending on how many letters are matched.  The pronunciation from the best matching rule is chosen. The pointer into the source word is then advanced past those letters which have been matched and the process is repeated until all the letters of the word have been processed.
 <p>
 <h4>4.3.1 Rule Groups</h4>
 The rules are organized in groups, each starting with a ".group" line:
 <ul><dl>
 <dt><strong>.group &lt;character&gt;</strong><br><dd>
 	A group for each letter or character.
 <p>
 <dt><strong>.group &lt;2 characters&gt;</strong><br><dd>
 	Optional groups for some common 2 letter combinations.  This is only needed, for efficiency, in cases where there are many rules for a particular letter.  They would not be needed for a language which has regular spelling rules.
 <p>
 <dt><strong>.group</strong><br><dd>
 	A group for other characters which don't have their own group.
 <p>
 <dt><strong>.L&lt;nn&gt;</strong><br><dd>
 	Defines a group of letter sequences, any of which can match with <strong>Lnn</strong> in a <strong>post</strong> rule (see below).  <strong>nn</strong> is a 2 digit decimal number in the range 01 to 20.  eg:<br>
 	<code>.L01  b bl br pl pr</code>
 </dl>
 </ul>When matching a word, firstly the 2-letter group for the two letters at the current position in the word (if such a group exists) is searched, and then the single-letter group.  The highest scoring rule in either of those two groups is used.

 <h4>4.3.2 Rules</h4>
 Each rule is on separate line, and has the syntax:
 <ul>
 	[&lt;pre&gt;)]  &lt;match&gt;  [(&lt;post&gt;]  &lt;phoneme string&gt;
 </ul>
 eg.

 <ul><pre>.group o
       o        0	// "o" is pronounced as [0]
       oo       u:      // but "oo" is pronounced as [u:]
    b) oo (k    U
 </pre>
 </ul> "oo" is pronounced as [u:], but when also preceded by "b" and followed by "k", it is pronounced [U].
 <p>
 In the case of a single-letter group, the first character of &lt;match&gt; much be the group letter.  In the case of a 2-letter group, the first two characters of &lt;match&gt; must be the group letters.  The second and third rules above may be in either  .group o  or  .group oo
 <p>
 Alphabetic characters in the &lt;pre&gt;, &lt;match&gt;, and &lt;post&gt; parts must be lower case, and matching is case-insensitive.  Some upper case letters are used in &lt;pre&gt; and &lt;post&gt; with special meanings.
 <p>
 <h4>4.3.3 Special characters in &lt;phoneme string&gt;:</h4>
 <ul><table>
  <tbody>
    <tr>
      <td><strong>_^_&lt;language code&gt;&nbsp;&nbsp;&nbsp;</strong></td>
      <td>Translate using a different language.</td>
    </tr>
  </tbody>
 </table>
 If this rule is selected when translating a word, then the translation is aborted and the word is re-translated using the specified different language.  &lt;language code&gt; may be upper or lower case. This can be used to recognise certain letter combinations as being foreign words and to use the foreign pronunciation for them. eg:
 <pre>
    th (_     _^_EN
 </pre>
 indicates that a word which ends in "th" is translated using the English translation rules and spoken with English phonemes.
 </ul>
 <h4>4.3.4 Special Characters in both &lt;pre&gt; and &lt;post&gt;:</h4>

 <ul><table>
  <tbody>
    <tr>
      <td><strong>_</strong></td>
      <td>Beginning or end of a word (or a hyphen).</td>
    </tr>
    <tr>
      <td><strong>-</strong></td>
      <td>Hyphen.</td>
    </tr>
    <tr>
      <td><strong>A</strong></td>
      <td>Any vowel (the set of vowel characters may be defined for a particular language).</td>
    </tr>
    <tr>
      <td><strong>C</strong></td>
      <td>Any consonant.</td>
    </tr>
    <tr>
      <td><strong>B&nbsp;H&nbsp;F&nbsp;G&nbsp;Y&nbsp;</strong></td>
      <td>These may indicate other sets of characters (defined for a particular language).</td>
    </tr>
    <tr>
      <td><strong>D</strong></td>
      <td>Any digit.</td>
    </tr>
    <tr>
      <td><strong>K</strong></td>
      <td>Not a vowel (i.e. a consonant or word boundary or non-alphabetic character).</td>
    </tr>
    <tr>
      <td><strong>X</strong></td>
      <td>There is no vowel until the word boundary.</td>
    </tr>
    <tr>
      <td><strong>Z</strong></td>
      <td>A non-alphabetic character.</td>
    </tr>
    <tr>
      <td><strong>%</strong></td>
      <td>Doubled (placed before a character in &lt;pre&gt; and after it in &lt;post&gt;.</td>
    </tr>
    <tr>
      <td><strong>/</strong></td>
      <td>The following character is treated literally.</td>
    </tr>
  </tbody>
 </table>
 </ul>
 The sets of letters indicated by A, B, C, E, F G may be defined differently for each language.
 <p>
 Examples of rules:
 <pre>     _)  a         // "a" at the start of a word
         a (CC     // "a" followed by two consonants
         a (C%     // "a" followed by a double consonant (the same letter twice)
         a (/%     // "a" followed by a percent sign
     %C) a         // "a" preceded by a double consonants
 </pre>
 <h4>4.3.5 Special characters only in &lt;pre&gt;:</h4>
 <ul><table>
  <tbody>
    <tr>
      <td><strong>@&nbsp;&nbsp;&nbsp;</strong></td>
      <td>Any syllable.</td>
    </tr>
    <tr>
      <td><strong>&#038;</strong></td>
      <td>A syllable which may be stressed (i.e. is not defined as unstressed).</td>
    </tr>
    <tr>
      <td><strong>V</strong></td>
      <td>Matches only if a previous word has indicated that a verb form is expected.</td>
    </tr>
  </tbody>
 </table>
 </ul>
 eg.
 <pre>     @@)  bi      // "bi" preceded by at least two syllables
     @@a) bi      // "bi" preceded by at least 2 syllables and following 'a'
 </pre>
 Note, that matching characters in the &lt;pre&gt; part do not affect the syllable counting.
 <p>
 <h4>4.3.6 Special characters only in &lt;post&gt;:</h4>
 <ul><table>
  <tbody>
    <tr>
      <td><strong>@</strong></td>
      <td>A vowel follows somewhere in the word.</td>
    </tr>
    <tr>
      <td><strong>+</strong></td>
      <td>Force an increase in the score in this rule (may be repeated for more effect).</td>
    </tr>
    <tr>
      <td><strong>S&lt;number&gt;&nbsp;&nbsp;</strong></td>
      <td>This number of matching characters are a standard suffix, remove them and retranslate the word.</td>
    </tr>
    <tr>
      <td><strong>P&lt;number&gt;</strong></td>
      <td>This number of matching characters are a standard prefix, remove them and retranslate the word.</td>
    </tr>
    <tr>
      <td><strong>Lnn</strong></td>
      <td><strong>nn</strong> is a 2-digit decimal number in the range 01 to 20<br>
          Matches with any of the letter sequences which have been defined for letter group <strong>nn</strong></td>
    </tr>
    <tr>
      <td><strong>N</strong></td>
      <td>Only use this rule if the word is not a retranslation after removing a suffix.</td>
    </tr>
    <tr>
      <td><strong>T</strong></td>
      <td>Only use this rule if the word in found in the *_list file with the <b>$alt1</b> attribute.</td>
    </tr>
    <tr>
      <td><strong>#</strong></td>
      <td>(English specific) change the next "e" into a special character "E"</td>
    </tr>
  </tbody>
 </table>
 </ul>

 eg.
 <pre>   @) ly (_$2   lI      // "ly", at end of a word with at least one other
                        //   syllable, is a suffix pronounced [lI].  Remove
                        //   it and retranslate the word.

   _) un (@P2   ¬Vn     // "un" at the start of a word is an unstressed
                        //   prefix pronounced [Vn]
   _) un (i     ju:     // ... except in words starting "uni"
   _) un (inP2  ,Vn     // ... but it is for words starting "unin"
 </pre>
 S and P must be at the end of the &lt;post&gt; string.
 <p>
 S&lt;number&gt; may be followed by additonal letters (eg. S2ei ).  Some of these are probably specific to English, but similar functions could be used for other languages.

 <ul><table>
  <tbody>
    <tr>
      <td><strong>q</strong></td>
      <td>query the _list file to find stress position or other attributes for the stem, but don't re-translate the word with the suffix removed.</td>
    </tr>
    <tr>
      <td><strong>t</strong></td>
      <td>determine the stress pattern of the word <strong>before</strong> adding the suffix</td>
    </tr>
    <tr>
      <td><strong>d&nbsp;&nbsp;&nbsp;</strong></td>
      <td>the previous letter may have been doubled when the suffix was added.</td>
    </tr>
    <tr>
      <td><strong>e</strong></td>
      <td>"e" may have been removed.</td>
    </tr>
    <tr>
      <td><strong>i</strong></td>
      <td>"y" may have been changed to "i."</td>
    </tr>
    <tr>
      <td><strong>v</strong></td>
      <td>the suffix means the verb form of pronunciation should be used.</td>
    </tr>
    <tr>
      <td><strong>f</strong></td>
      <td>the suffix means the next word is likely to be a verb.</td>
    </tr>
  </tbody>
 </table>
 </ul>
 <p>
 P&lt;number&gt; may be followed by additonal letters (eg. P3v ).

 <ul><table>
  <tbody>
    <tr>
      <td><strong>t&nbsp;&nbsp;&nbsp;</strong></td>
      <td>determine the stress pattern of the word <strong>before</strong> adding the prefix</td>
    </tr>
    <tr>
      <td><strong>v</strong></td>
      <td>the suffix means the verb form of pronunciation should be used.</td>
    </tr>
  </tbody>
 </table>
 </ul>

 <p>&nbsp;<hr>
 <h3>4.4  Pronunciation Dictionary List</h3>
 The <strong><em>&lt;language&gt;_list</em></strong>&nbsp;  file contains a list of words whose pronunciations are given explicitly, rather than determined by the Pronunciation Rules.
 The <strong><em>&lt;language&gt;_extra</em></strong>&nbsp; file, if present, is also used and it's contents are taken as coming after those in <strong><em>&lt;language&gt;_list</em></strong>.
 <p>
 Also the list can be used to specify the stress pattern, or other properties, of a word.
 <p>
 If the Pronunciation rules are applied to a word and indicate a standard prefix or suffix, then the word is again looked up in Pronunciation Dictionary List after the prefix or suffix has been removed.
 <p>
 Lines in the dictionary list have the form:
 <ul>
 &lt;word&gt; &nbsp; &nbsp;  [&lt;phoneme string&gt;] &nbsp; &nbsp;	[&lt;flags&gt;]
 </ul>eg.
 <pre>     book      bUk
 </pre>
 Rather than a full pronunciation, just the stress may be given, to change where it would be otherwise placed by the Pronunciation Rules:
 <pre>     berlin       $2      // stress on second syllable
     absolutely   $3      // stress on third syllable
     for          $u      // an unstressed word
 </pre>
 <h4>4.4.1 Multiple Words</h4>
 A pronunciation may also be specified for a group of words, when these appear together. Up to  four words may be given, enclosed in brackets.  This may be used for change the pronunciation or stress pattern when these words occur together,
 <pre>    (de jure)    deI||dZ'U@rI2   // note || used as a word break in the phoneme string</pre>
 or to run them together, pronounced as a single word
 <pre>    (of a)       @v@
 </pre>
 or to give them a flag when they occur together
 <pre>    (such as)    sVtS||a2z   $pause	   // precede with a pause
 </pre>
 <h4>4.4.2 Special characters in &lt;phoneme string&gt;:</h4>
 <ul><table>
  <tbody>
    <tr>
      <td><strong>_^_&lt;language code&gt;&nbsp;&nbsp;&nbsp;</strong></td>
      <td>Translate using a different language.  See explanation in 4.3.3 above.</td>
    </tr>
  </tbody>
 </table>
 </ul>
 <h4>4.4.3 Flags</h4>
 A word (or group of words) may be given one or more flags, either instead of, or as well as, the phonetic translation.

 <ul><table>
  <tbody>
    <tr>
      <td>$u</td>
      <td>The word is unstressed. In the case of a multi-syllable word, a slight stress is applied according to the default stress rules.</td>
    </tr>
    <tr>
      <td>$u1</td>
      <td>The word is unstressed, with a slight stress on its 1st syllable.</td>
    </tr>
    <tr>
      <td>$u2</td>
      <td>The word is unstressed, with a slight stress on its 2nd syllable.</td>
    </tr>
    <tr>
      <td>$u3</td>
      <td>The word is unstressed, with a slight stress on its 3rd syllable.</td>
    </tr>
    <tr>
      <td>&nbsp;</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>$u+ $u1+ $u2+ $u3+</td>
      <td>As above, but the word has full stress if it's at the end of a clause.</td>
    </tr>
    <tr>
      <td>&nbsp;</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>$1</tr>
      <td>Primary stress on the 1st syllable.</td>
    </tr>
    <tr>
      <td>$2</td>
      <td>Primary stress on the 2nd syllable.</td>
    </tr>
    <tr>
      <td>$3</td>
      <td>Primary stress on the 3rd syllable.</td>
    </tr>
    <tr>
      <td>$4</td>
      <td>Primary stress on the 4th syllable.</td>
    </tr>
    <tr>
      <td>$5</td>
      <td>Primary stress on the 5th syllable.</td>
    </tr>
    <tr>
      <td>$6</td>
      <td>Primary stress on the 6th syllable.</td>
    </tr>
    <tr>
      <td>$7</td>
      <td>Primary stress on the 7th syllable.</td>
    </tr>
    <tr>
      <td>&nbsp;</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>$pause</td>
      <td>Ensure a short pause before this word (eg. for conjunctions such as "and", some prepositions, etc).</td>
    </tr>
    <tr>
      <td>$brk</td>
      <td>Ensure a very short pause before this word, shorter than $pause (eg. for some prepositions, etc).</td>
    </tr>
    <tr>
      <td>$only</td>
      <td>The rule does not apply if a prefix or suffix has already been removed.</td>
    </tr>
    <tr>
      <td>$onlys</td>
      <td>As $only, except that a standard plural ending is allowed.</td>
    </tr>
    <tr>
      <td>$stem</td>
      <td>The rule only applies if a suffix has already been removed.</td>
    </tr>
    <tr>
      <td>$strend</td>
      <td>Word is fully stressed if it's at the end of a clause.</td>
    </tr>
    <tr>
      <td>$strend2</td>
      <td>As $strend, but the word is also stressed if followed only by unstressed word(s).</td>
    </tr>
    <tr>
      <td>$unstressend&nbsp;</td>
      <td>Word is unstressed if it's at the end of a clause.</td>
    </tr>
    <tr>
      <td>$atend</td>
      <td>Use this pronunciation if it's at the end of a clause.</td>
    </tr>
    <tr>
      <td>$double</td>
      <td>Cause a doubling of the initial consonant of the following word (used for Italian).</td>
    </tr>
    <tr>
      <td>$capital</td>
      <td>Use this pronunciation if word has initial capital letter (eg. polish v Polish).</td>
    </tr>
    <tr>
      <td>$dot</td>
      <td>Ignore a . after this word even when followed by a capital letter (eg. Mr. Dr. ).</td>
    </tr>
    <tr>
      <td>$abbrev</td>
      <td>This has two meanings.<br> 1. If there is no phoneme string: Speak the word as individual letters, even if it contains a vowel (eg. "abc" should be spoken as "a" "b" "c").<br>2. If there is a phoneme string: Speak this word as the specified phoneme string, not as individual letters, even if it's all capital letters (eg, Roman numerals III, IV ).</td>
    </tr>
    <tr>
      <td>&nbsp;</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>$alt &nbsp; $alt2</td>
      <td>These are language specific.  Their use should be described in the language's **_list file</td>
    </tr>
    <tr>
    <tr>
      <td>&nbsp;</td>
      <td>&nbsp;</td>
    </tr>
    <tr>
      <td>$verb</td>
      <td>Use this pronunciation if it's a verb.</td>
    </tr>
      <td>$past</td>
      <td>Use this pronunciation if it's past tense.</td>
    </tr>
    <tr>
      <td>$verbf</td>
      <td>The following word is probably is a verb.</td>
    </tr>
    <tr>
      <td>$verbsf</td>
      <td>The following word is probably is a if it has an "s" suffix.</td>
    </tr>
    <tr>
      <td>$nounf</td>
      <td>The following word is probably not a verb.</td>
    </tr>
    <tr>
      <td>$pastf</td>
      <td>The following word is probably past tense.</td>
    </tr>
    <tr>
      <td>$verbextend</td>
      <td>Extend the influence of $verbf and $verbsf.</td>
    </tr>
  </tbody>
 </table></ul>
 The last group are probably English specific, but something similar may be useful in other languages. They are a crude attempt to improve the accuracy of pairs like  ob'ject (verb) v  'object (noun) and read (present) v read (past).
 <p>
 The dictionary list is searched from bottom to top.  The first match that satisfies any conditions is used (i.e. the one lowest down the list).  So if we have:
 <pre>
    to    t@               // unstressed version
    to    tu:   $atend     // stressed version
 </pre>
 then if "to" is at the end of the clause, we get [tu:], if not then we get [t@].

 <p>&nbsp;<hr>
 <h3>4.5  Numbers and Character Names</h3>
 <h4>4.5.1 Letter names</h4>
 The names of individual letters can be given either in the <b>_rules</b> or <b>_list</b> file.  Sometimes an individual letter is also used as a word in the language and its pronunciation as a word differs from its letter name.  If so, it should be listed in the <b>_list</b> file, preceded by an underscore, to give the letter name (as distinct from its pronunciation as a word).  eg. in English:
 <pre>   _a   eI</pre>
 <h4>4.5.2 Numbers</h4>
 The operation the TranslateNumber() function is controlled by the language's <code>langopts.numbers</code> option.  This constructs spoken numbers from fragments according to various options which can be set for each language.  The number fragments are given in the <b>_list</b> file.
 <p>
 <ul>
 <table><tbody align="left">
 <tr>
 <td>
 _0 to _9 &nbsp;
 <td>The numbers 0 to 9
 </tr>
 <tr>
 <td>_2X &nbsp;_3X<td>Twenty, thirty, etc., used to make numbers 10 to 99 &nbsp;
 </tr>
 <tr>
 <td>_nn<td>Any two digit numbers with a special pronunciation (eg. _15 "fifteen").
 </tr>
 <tr><TD>_0C<td>The word for "hundred"</td>
 <tr><TD>_1C &nbsp;_2C<td>Special pronunciation for one hundred, two hundred, etc., if needed.</tr>
 <tr><TD>_1C0<td>Special pronunciation (if needed) for 100 exactly</td>
 <tr><TD>_0M1<td>The word for "thousand"</tr>
 <tr><TD>_0M2<td>The word for "million"</tr>
 <tr><TD>_0M3<td>The word for 1000000000</tr>
 <tr><TD>_1M1 &nbsp;_2T1<td>Special pronunciation for one thousand, two thousand, tc, if needed</td>
 <tr><TD>_0and<td>Word for "and" when speaking numbers (eg. "two hundred and twenty").</tr>
 <tr><TD>_dpt<td>Word spoken for the decimnal point/comma</tr>
 <tr><TD>_dpt2<td>Word spoken (if any) at the end of all the digits after a decimal point.</tr>
 </tbody></table>
 </ul>

 </body>
 </html>
--- a/docs/docindex.html
+++ b/docs/docindex.html
@@ -0,0 +1,65 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak Speech Synthesizer</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <table  border="1" cellpadding="10" background="images/sand-light.jpg" width="100%">
  <tbody>
    <tr>
      <td width="15%">
 <a href="http://sourceforge.net"><img src="http://sflogo.sourceforge.net/sflogo.php?group_id=159649&amp;type=2" width="125" height="37" border="0" alt="SourceForge.net Logo" /></a>
      </td>
      <td>
 <div align="center"><h1>eSpeak - Documents</h1></div>
 </td>
 </tr>

 <tr>
 <td valign="top">
 <font size="+1"><strong>
 <A href="index.html">Home</A>
 <p>
 <A href="commands.html">Usage</A>
 <p>
 <A href="languages.html">Languages</A>
 </strong></font>
 </td>
 <td>
 <h3><A href="voices.html">Voice Files</A></h3>
 Voice files specify a language and other characteristics of a voice.
 <h3><A href="mbrola.html">Mbrola Voices</A></h3>
 eSpeak can be used as a front-end for Mbrola diphone voices.
 <h3><A href="dictionary.html">Pronunciation Dictionary</A></h3>
 <ul>
 <li>How to add pronunciation corrections.
 <li>How to build up pronunciation rules for a new language.
 </ul><p>
 <h3><A href="add_language.html">Adding a Language</A></h3>
 How to add or improve a language.
 <h3><A href="phonemes.html">Phonemes</A></h3>
 The list of phoneme mnemonics, for use in the Pronunciation Dictionary.
 <h3><A href="phontab.html">Phoneme Tables</A></h3>
 The tables of the phonemes used by each language, with their properties and sound production.
 <h3><A href="speak_lib.h">eSpeak Libary API</A></h3>
 API definition and header file for a shared library version of eSpeak.
 <h3><A href="ssml.html">Markup tags</A></h3>
 SSML (Speech Synthesis Markup Language) and HTML tags recognized by eSpeak.
 <h3><A href="editor.html">The espeakedit program</A></h3>
 GUI software to edit vowel files and to compile the phoneme data for use by eSpeak.<br>
 <ul>
 <li><a href="editor_if.html">espeakedit program GUI details</a>
 <li><a href="analyse.html">Analysing sound recordings</a>
 <li><a href="makephonemes.html">Adjusting phoneme data</a> (to be written)
 </ul>
 </td>
 </tr>
 </tbody>
 </table>


 </body>
 </html>
--- a/docs/download.html
+++ b/docs/download.html
@@ -0,0 +1,37 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>espeak: Downloads</title>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <A href="docindex.html">Back</A>
 <hr>
 <h2>ESPEAK DOWNLOADS</h2>
 <hr>
 This page gives direct links to eSpeak downloads on one of the Sourceforge mirrors.<br>To get other versions of eSpeak, or use other mirrors, use the <a href="http://sourceforge.net/project/showfiles.php?group_id=159649">Sourceforge download page</a>
 <p>
 Source code and dictionary data.<br>
 <a href="http://kent.dl.sourceforge.net/sourceforge/espeak/espeak-1.23-source.zip">espeak-1.23-source.zip</a> (0.9 MBytes)
 <p>
 Compiled for Linux i386 (Debian/Ubuntu based distros)<br>
 <a href="http://kent.dl.sourceforge.net/sourceforge/espeak/espeak-1.23-linux.zip">espeak-1.23-linux.zip</a> (0.7 MBytes)
 <p>
 Compiled for Windows. SAPI5 and command-line versions.<br>
 <a href="http://kent.dl.sourceforge.net/sourceforge/espeak/espeak-1.23-win.zip">espeak-1.23-win.zip</a> (0.7 MBytes)
 <p>
 Compiled for RISC OS<br>
 <a href="http://kent.dl.sourceforge.net/sourceforge/espeak/espeak-3.23-riscos.zip">espeak-3.23-riscos.zip</a> (0.7 MBytes)

 <hr><p>
 <b>espeakedit program and the phoneme data.</b>
 <p>
 Source code and compiled for Linux.<br>
 <a href="http://kent.dl.sourceforge.net/sourceforge/espeak/espeakedit-1.23.zip">espeakedit-1.23.zip</a> (6 MBytes)
 <p>
 Compiled for Windows.<br>
 <a href="http://kent.dl.sourceforge.net/sourceforge/espeak/espeakedit-1.23-win.zip">espeakedit-1.23-win.zip</a> (4 MBytes)

 </body>
 </html>
--- a/docs/editor.html
+++ b/docs/editor.html
@@ -0,0 +1,75 @@

 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>espeakedit</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <A href="docindex.html">Back</A>
 <hr>
 <h2>ESPEAKEDIT PROGRAM</h2>
 <hr>
 The <strong>espeakedit</strong> program is used to prepare phoneme data for the eSpeak speech synthesizer.<p>
 It has two main functions:
 <ul>
 <li>Prepare keyframe files for individual vowels and voiced consonants.  These each contain a sequence of keyframes which define how formant peaks (peaks in the frequency spectrum) vary during the sound.<p>
 <li>Process the master <strong>phonemes</strong> file which, by including the phoneme files for the various languages, defines all their phonemes and references the keyframe files and the sound sample files which they use.  <strong>espeakedit</strong> processes these and compiles them into the <strong>phondata</strong>, <strong>phonindex</strong>, and <strong>phontab</strong> files in the <strong>espeak-data</strong> directory which are used by the eSpeak speech synthesizer. 
 </ul>
 <hr>
 <h3>Installation</h3>
 <strong>espeakedit</strong> needs the following packages:<br>
 (The package names mentioned here are those from the Ubuntu "Dapper" Linux distribution).
 <ul>
 <li><strong>sox</strong> &nbsp; (a universal sound sample translator)
 <li><strong>libwxgtk2.6-0</strong> &nbsp; (wxWidgets Cross-platform C++ GUI toolkit)
 <li><strong>portaudio0</strong> &nbsp; (Portaudio V18, portable audio I/O)
 </ul>
 In addition, a modified version of <strong>praat</strong> (<a href="www.praat.org">www.praat.org</a>) is used to view and analyse WAV sound files.
 This needs the package  <strong>libmotif3</strong>  to run and  <strong>libmotif-dev</strong>  to compile.
 <hr>
 <h3>Quick Guide</h3>
 This will quickly illustrate the main features.  Details of the interface and key commands are given in <a href="editor_if.html">editor_if.html</a><p>
 For more detailed information on analysing sound recordings and preparing phoneme definitions and keyframe data see <a href="analyse.html">analyse.html</a> (to be written).
 <h4>Compiling Phoneme Data</h4>
 <ol>
 <li>Run the <strong>espeakedit</strong> program.<p>
 <li>Select <b>Data->Compile phoneme data</b> from the menu bar.  Dialog boxes will ask you to locate the directory (<b>phsource</b>) which contains the master phonemes file, and the directory (<b>dictsource,</b>) which contains the dictionary files (en_rules, en_list, etc).  Once specified, espeakedit will remember their locations, although they can be changed later from <b>Options->Paths</b>.<p>
 <li>A message in the status line at the bottom of the espeakedit window will indicate whether there are any errors in the phoneme data, and how many language's dictionary files have been compiled.  The compiled data is placed into the <b>espeak-data</b> directory, ready for use by the speak program.  If errors are found in the phoneme data, they are listed in a file <b>error_log</b> in the <b>phsource</b> directory.</li>
 <p>
 NOTE: espeakedit can be used from the command line to compile the phoneme data, with the command: <b> espeakedit --compile</b>
 <li>Select <b>Tools->Make vowels chart->From compiled phoneme data</b>.  This will look for the vowels in the compiled phoneme data of each language and produce a vowel chart (.png file) in <b>phsource/vowelcharts</b>.  These charts plot the vowels' F1 (formant 1) frequency against their F2 frequency, which corresponds approximately to their open/close and front/back positions. The colour in the circle for each vowel indicates its F3 frequency, red indicates a low F3, through yellow and green to blue and violet for a high F3. In the case of a diphthong, a line is drawn from the circle to the position of the end of the vowel.
 </ol>
 <h4>Keyframe Sequences</h4>
 <ol>
 <li>Select <b>File->Open</b> from the menu bar and select a vowel file, <b>phsource/vowel/a</b>.  This will open a tab in the espeakedit window which contains a sequence of 4 keyframes.  Each keyframe shows a black graph, which is the outline of an original analysed spectrum from a sound recording, and also a green line, which shows the formant peaks which have been added (using the black graph as a guide) and which produce the sound.<p>
 <li>Click in the "a" tab window and then press the <b>F2</b> key.  This will produce and play the sound of the keyframe sequence.  The first time you do this, you'll get a save dialog asking where you want the WAV file to be saved.  Once you give a location all future sounds will be stored in that same location, although it can be changed from <b>Options->Paths</b>.<p>
 <li>Click on the second of the four frames, the one with the red square.  Press <b>F1</b>.  That plays the sound of just that frame.<p>
 <li>Press the <b>1</b> (number one) key.  That selects formant F1 and a red triangle appears under the F1 formant peak to indicate that it's selected.  Also an = sign appears next to formant 1 in the formants list in the left panel of the window.<p>
 <li>Press the left-arrow key a couple of times to move the F1 peak to the left.  The red triangle and its associated green formant peak moves lower frequency.  Its numeric value in the formants list in the left panel decreases.<p>
 <li>Press the <b>F1</b> key again.  The frame will give a slightly different vowel sound.  As you move the F1 peak slightly up and down and then press <b>F1</b> again, the sound changes.  Similarly if you press the <b>2</b> key to select the F2 formant, then moving that will also change the sound.  If you move the F1 peak down to about 700 Hz (and reduce its height a bit with the down-arrow key) and move F2 up to 1400 Hz, then you'll hear a "er" schwa [@] sound instead of the original [a].<p>
 <li>Select <b>File->Open</b> and choose <b>phsource/vowel/aI</b>.  This opens a new tab labelled "aI" which contains more frames.  This is the [aI] diphthong and if you click in the tab window and press <b>F2</b> you'll hear the English word "eye".  If you click on each frame in turn and press <b>F1</b> then you can hear each of the keyframes in turn.  They sound different, starting with an [A] sound (as in "palm"), going through something like [@] in "her" and ending with something like [I] in "kit" (or perhaps a French é).  Together they make the diphthong [aI].
 </ol>
 <h4>Text and Prosody Windows</h4>
 <ol>
 <li>Click on the <b>Text</b> tab in the left panel. Two text windows appear in the panel with buttons <b>Translate</b> and <b>Speak</b> below them.<p>
 <li>Type some text into the top window and click the <b>Translate</b> button.  The phonetic translation will appear in the lower window.<p>
 <li>Click the <b>Speak</b> button.  The text will be spoken and a <b>Prosody</b> tab will open in the main window.<p>
 <li>Click on a vowel phoneme which is displayed in the Prosody tab. A red line appears under it to indicate that it has been selected.<p>
 <li>Use the <b>up-arrow</b> or <b>down-arrow</b> key to move the vowel's blue pitch contour up or down.  Then click the <b>Speak</b> button again to hear the effect of the altered pitch.  If the adjacent phoneme also has a pitch contour then you may hear a discontinuity in the sound if it no longer matches with the one which you have moved.<p>
 <li>Hold down the <b>Ctrl</b> key while using the <b>up-arrow</b> or <b>down-arrow</b> keys.  The gradient of the pitch contour will change.<p>
 <li>Click with the right mouse button over a phoneme.  A menu allows you to select a different pitch envelope shape.  Details of the currently selected phoneme appear in the Status line at the bottom of the window.  The <b>Stress</b> number gives the stress level of the phoneme (see voices.html for a list).<p>
 <li>Click the <b>Translate</b> button.  This re-translates the text and restores the original pitches.<p>
 <li>Click on a vowel phoneme in the Prosody window and use the <b>&lt;</b> and <b>&gt;</b> keys to shorten or lengthen it.<p>
 </ol>
 The Prosody window can be used to experiment with different phoneme lengths and different intonation.<p>

 <hr>

 </body>
 </html>



--- a/docs/editor_if.html
+++ b/docs/editor_if.html
@@ -0,0 +1,143 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>Editor - Spectrum</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <A href="docindex.html">Back</A>
 <hr>
 <h2>USER INTERFACE - FORMANT EDITOR</h2>
 <hr>
 <h3>Frame Sequence Display</h3>
 The eSpeak editor can display a number of frame-sequencies in tabbed windows.  Each frame can contain a short-time frequency spectrum, covering the period of one cycle at the sound's pitch. Frames can also show:
 <ul>
 <LI>Blue vertical lines showing the estimated position of the f1 to f5 formants (if the sequence was produced by praat analysis).  These should correspond with the peaks in the spectrum, but may not do so exactly<p>
 <li>Numbers at the right side of the frame showing the position from the start of the sequence in mS, and the pitch of the sound.<p>
 <li>Up to 9 formant peaks (numbered 0 to 9) added by the user, usually to match the peaks in the spectrum, in order to produce the required sound.  These are shown in green, can be moved by keyboard presses as described below, and may merge if they are close together.  If a frame has formant peaks then it is a Keyframe and is shown with a pale yellow background.<p>
 <li>If formant peaks are present, a relative amplitude (r.m.s.) value is shown at the right side of the frame.
 <li>
 </ul>
 <h3>Text Tab</h3>
 Enter text in the top left text window.  Click the <b>Translate</b> button to see the phonetic transcription in the text window below.  Then click the <b>Speak</b> button to speak the text and show the results in the <b>Prosody</b> tab, if that is open.
 <p>
 If changes are made in the <b>Prosody</b> tab, then clicking <b>Speak</b> will speak the modified prosody while <b>Translate</b> will revert to the default prosody settings for the text.
 <p>
 To enter phonetic symbols (Kirschenbaum encoding) in the top left text window, enclose them within [[  ]].
 <h3>Spect Tab</h3>
 The "Spect" tab in the left panel of the eSpeak editor shows information about the currently selected frame and sequence.
 <ul>
 <li>The <strong>Formants</strong> section displays the Frequency, Height, and Width of each formant peak (peaks 0 to 8).  Peaks 6, 7, 8 don't have a variable width.<p>
 <li><strong>% amp - Frame</strong> can be used to adjust the amplitiude of the frame.  If you change this value then the rms amplitude value at the right side of the frame will change.  The formant peaks don't change, just the overall amplitude of the frame.<p>
 <li><strong>mS</strong> shows the time in mS until the next keyframe (or end of sequence if there is none).  The spin control initially shows the same value, but this can be changed in order to increase or decrease the effctive length of a keyframe.<p>
 <li><strong>% amp - Sequence</strong> /ul> adjusts the amplitude of the whole sequence.  Changing this values changes the rms amplitudes of all the keyframes in the sequence.<p>
 <li><strong>% mS - Sequence</strong> /ul> shows the total length of the sequence.<p>
 <li><strong>Graph</strong><br>
 Yellow vertical lines show the position of keyframes within the sequence.<br>
 Black bars on these show the frequencies of formant peaks which have been set at these keyframes.<br>
 Thick red lines, if present, show the formants, as detected in the original analysis.<br>
 Thin black line, if present, shows the pitch profile measured in the original analysis.
 </ul>
 </li>
 </ul>
 <h3>Key Commands</h3>
 <ul>
 <li><strong>Selection</strong>.<p>
 The selected frame(s) are shown with a red border.  The selected formant peak is also indicated by an equals ("=") sign next to its number in the "Spect" panel to the right of the window.<p>
 The selected formant peak is shown with a red triangle under the peak.<p>
 Keyframes are shown with a pale yellow background. A keyframe is any frame with any formant peaks which are not zero height.  If all formant peaks become zero height, the frame is no longer a keyframe.  If you increase a peak's height the frame becomes a keyframe.

 <dl>
 <dt><strong>Numbers 0 to 8</strong>
 <dd>Select formant peak number 0 to 8.
 <dt><strong>Page Up/Down</strong>
 <dd>Move to next/previous frame
 </dl>
 <li><strong>Formant movement</strong>.  With the following keys, holding down <b>Shift</b> causes slower movement.
 <dl>
 <dt>Left
 <dd>Moves the selected formant peak to higher frequency.
 <dt>Right
 <dd>Moves the selected formant peak to lower frequency.
 <dt>Up
 <dd>Increases height of the selected formant peak.
 <dt>Down
 <dd>Decreases height of the selected formant peak.
 <dt><strong>&lt;</strong>
 <dd>Narrows the selected formant peak.
 <dt><strong>&gt;</strong>
 <dd>Widens the selected formant peak.
 <dt><strong>CTRL &lt;</strong>
 <dd>Narrows the selected formant peak.
 <dt><strong>CTRL &gt;</strong>
 <dd>Widens the selected formant peak.
 <dt><b>/</b>
 <dd>Makes the selected formant peak symmetrical.
 </dl>
 <li><strong>Frame Cut and Paste</strong>
 <dl>
 <dt><b>CTRL A</b>
 <dd>Select all frames in the sequence.
 <dt><b>CTRL C</b>
 <dd>Copy selected frames to (internal) clipboard.
 <dt><b>CTRL V</b>
 <dd>Paste frames from the clipboard to overwrite the contents of the selected frame and the frames which follow it. Only the formant peaks information is pasted.
 <dt><b>CTRL SHIFT V</b>
 <dd>Paste frames from the clippoard to insert them  above the selected frame.
 <dt><b>CTRL X</b>
 <dd>Delete the selected frames.
 </dl>
 <li><strong>Frame editing</strong>
 <dl>
 <dt><b>CTRL D</b>
 <dd>Copy the formant peaks down to the selected frame from the next keyframe above.
 <dt><b>CTRL SHIFT D</b>
 <dd>Copy the formant peaks up to the selected frame from the next key-frame below.
 <dt><b>CTRL Z</b>
 <dd>Set all formant peaks in the selected frame to zero height.  It is no longer a key-frame.
 <dt><b>CTRL I</b>
 <dd>Set the formant peaks in the selected frame as an interpolation between the next keyframes above and below it.  A dialog box allows you to enter a percentage. 50% gives values half-way between the two adjacent key-frames, 0% gives values equal to the one above, and 100% equal to the one below.
 </dl>
 <li><strong>Display and Sound</strong>
 <dl>
 <dt><b>CTRL Q</b>
 <dd>Shows interpolated formant peaks on non-keyframes.  These frames don't become keyframes until any of the peaks are edited to increase their height.
 <dt><b>CTRL SHIFT Q</b>
 <dd>Removes the interpolated formant peaks display.
 <dt><b>CTRL G</b>
 <dd>Toggle grid on and off.
 <dt><b>F1</b>
 <dd>Play sound made from the one selected keyframe.
 <dt><b>F2</b>
 <dd>Play sound made from all the keyframes in the sequence.
 </ul>
 <p>&nbsp;
 <hr>
 <h2>USER INTERFACE - PROSODY EDITOR</h2>
 <hr>
 <ul><LI>
 <dl>
 <dt><b>Left</b>
 <dd>Move to previous phoneme.
 <dt><b>Right</b>
 <dd>Move to next phoneme.
 <dt><b>Up</b>
 <dd>Increase pitch.
 <dt><b>Down</b>
 <dd>Decrease pitch.
 <dt><b>Ctrl Up</b>
 <dd>Increase pitch range.
 <dt><b>Ctrl Down</b>
 <dd>Decrease pitch range.
 <dt><b>&gt;</b>
 <dd>Increase length.
 <dt><b>&lt;</b>
 <dd>Decrease length.
 </dd>
 </dl>
 </LI>
 </ul>
 </body>
 </html>
--- a/docs/images/lips.png
+++ b/docs/images/lips.png
--- a/docs/images/sand-light.jpg
+++ b/docs/images/sand-light.jpg
--- a/docs/index.html
+++ b/docs/index.html
@@ -0,0 +1,78 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>
    <head>
        <title>eSpeak: Speech Synthesizer</title>
    </head>
    <body>

 <table  border="1" cellpadding="10" background="images/sand-light.jpg">
  <tbody>
    <tr>
      <td width="15%" valign="top">
 <a href="http://sourceforge.net"><img src="http://sflogo.sourceforge.net/sflogo.php?group_id=159649&amp;type=2" width="125" height="37" border="0" alt="SourceForge.net Logo" /></a>
 </td>
      <td>
 <div align="center"><IMG src="images/lips.png" width="193" height="172" border="0">
 <h1>eSpeak  text to speech</h1></div>
 <div align="center">
 (email) &nbsp; jonsd at users dot sourceforge.net<br>
 <a href="download.html"><strong>Download</strong></a>
 &nbsp; &nbsp; &nbsp; &nbsp; <a href="http://sourceforge.net/forum/?group_id=159649"><strong>Forum</strong></a>
 </div>
 </td>
    </tr>
    <tr>
 <td valign="top">
 <font size="+1"><strong>
 <A href="commands.html">Usage</a>
 <p>
 <A href="languages.html">Languages</A>
 <p>
 <A href="docindex.html">Documents</A>
 <p>
 <A href="samples.html">Samples</A>
 </strong></font>
 </td>
 <td>
 eSpeak is a compact open source software speech synthesizer for English and other languages. &nbsp;
 <a href="http://espeak.sourceforge.net/"><strong>http://espeak.sourceforge.net</strong></a>
 <p>
 eSpeak produces good quality English speech.  It uses a different synthesis method from other open source TTS engines, and sounds quite different.  It's perhaps not as natural or "smooth", but I find the articulation clearer and easier to listen to for long periods.
 <p>
 It can run as a command line program to speak text from a file or from stdin.  A shared library version is also available.

 <ul>
 <li>Includes different Voices, whose characteristics can be altered.
 <li>Can produce speech output as a WAV file.
 <li>SSML (Speech Synthesis Markup Language) is supported (not complete), and also HTML.
 <li>Compact size.  The program and its data, including several languages, totals about 700 kbytes.
 <li>Can translate text to phoneme codes, so it could be adapted as a front end for another speech synthesis engine.
 <li>Potential for other languages. Several are included in varying stages of progress. Help from native speakers for these or other languages is welcomed.
 <li>Development tools available for producing and tuning phoneme data.
 <li>Written in C++.
 </ul>
 <p>
 It works well as a "Talker" with the KDE text to speech system (KTTS), as an alternative to Festival for example.  As such, it can speak text which has been selected into the clipboard, or directly from the Konquerer browser or the Kate editor.  A Gnome Speech driver is now available.
 <p>
 I regularly use it to listen to blogs and news sites. I prefer the sound through a domestic stereo system rather than my small computer speakers.
 <hr>
 <strong>Windows Version</strong>.  There is now a Windows SAPI5 version of eSpeak.  It can be used with screen readers such as NVDA, JAWS, Supernova, and Window-Eyes.<br>
 This is available as a Windows installer package from the eSpeak <a href="download.html"><strong>Download</strong></a> page.<p>
 A Windows version of the espeakedit program is also available.

 <hr>
 <strong>Languages</strong>. The eSpeak speech synthesizer supports several languages, however in most cases these are initial drafts and need more work to improve them.  Assistance from native speakers is welcome for these, or other new languages.  Please contact me if you want to help.<p>
 eSpeak does text to speech synthesis for the following languages, some better than others. Afrikaans, Croatian, Czech, Dutch, English, Esperanto, Finnish, French, German, Greek, Hindi, Hungarian, Italian, Norwegian, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swahili, Swedish, Vietnamese, Welsh.  See <a href="languages.html">Languages</a>.
 <hr>
 <strong>espeakedit</strong> is a GUI program used to prepare and compile phoneme data.  It is now available for download.  Documentation is currently sparse, but if you want to use it to add or improve language support, let me know.
 <hr>
 <strong>speak</strong> was originally written for Acorn/RISC_OS computers starting in 1995. This version is an update and re-write, including a relaxation of the original memory and processing power constraints, and with support for additional languages if anyone is interested in doing so.
 <p>
 The project name <strong>speak</strong> had already been taken by another project on SourceForge (for a Windows TTS front-end) so I added a letter 'e' to the front to make <strong>eSpeak</strong>.  For now, the program executable remains <strong>speak</strong> and is referred to as such in the documentation.
 </td>
    </tr>
  </tbody>
 </table>

    </body>
 </html>
--- a/docs/languages.html
+++ b/docs/languages.html
@@ -0,0 +1,220 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak Speech Synthesizer</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
 <A href="index.html">Back</A>
 <hr>
 <h2>3. LANGUAGES</h2>
 <hr>
 <h4>Help Needed</h4>
 Many of these are just experimental attempts at these languages, produced after a quick reading of the corresponding article on wikipedia.org.  They will need work or advice from native speakers to improve them.  Please contact me if you want to advise or assist with these or other languages.<p>
 The sound of some phonemes may be poorly implemented, particularly [r] since I'm English and therefore unable to make a "proper" [r] sound.<p>
 A major factor is the rhythm or cadance.  An Italian speaker told me the Italian voice improved from "difficult to understand" to "good" by changing the relative length of stressed syllables.  Identifying unstressed function words in the xx_list file is also important to make the speech flow well.  See <a href="add_language.html">Adding or Improving a Language</a>
 <h4>Character sets</h4>
 Languages recognise text either as UTF8 or alternatively in an 8-bit character set which is appropriate for that language.  For example, for Polish this is Latin2, for Russian it is KOI8-R.  This choice can be overridden by a line in the voices file to specify an ISO 8859 character set, eg. for Russian the line:<br>
 <pre>     charset 5</pre>
 will mean that ISO 8859-5 is used as the 8-bit character set rather than KOI8-R.
 <p>
 In the case of a language which uses a non-Latin character set (eg. Greek or Russian) if the text contains a word with Latin characters then that particular word will be pronounced using English pronunciation rules and English phonemes.  Speaking entirely English text using a Greek or Russian voice will sound OK, but each word is spoken separately so it won't flow properly.
 <p>
 Sample texts in various languages can be found at  <a href="http://meta.wikimedia.org/wiki/List_of_Wikipedias"> http://&lt;language&gt;.wikipedia.org</a>  and <a href="http://www.gutenberg.org">www.gutenberg.org</a>
 <h3>3.1 Voice Files</h3>

 A number of Voice files are provided in the <code>espeak-data/voices</code> directory.
 You can select one of these with the <strong>-v &lt;voice filename&gt;</strong> parameter to the
 speak command, eg:
 <pre>   espeak -vaf</pre>
 to speak using the Afrikaans voice.
 <p>
 For details of the voice files see <a href="voices.html">Voices</a>.
 <h4>Default Voice</h4>
 <ul>
 <dl>
 <dt>
 <strong>default</strong><br>
 <dd>   This voice is used if none is specified in the speak command.  Copy your preferred voice to "default" so you can use the speak command without the need to specify a voice.</dd>
 </dl>
 </ul>
 <h3>3.2 English Voices</h3>
 <ul><dl>
 <dt>
 <strong>en</strong><br>
 <dd>   is the standard default English voice.</dd>
 <p>
 <dt>
 <strong>en-sc</strong><br>
 <dd>   Scottish English.
 <p>
 <dt>
 <strong>en-r</strong><br>
 <dd>   Some slight vowel changes, and a "rhotic" accent, where "r" is pronounced even when not followed by a vowel.  This may sound less "British" to an American.
 <p>
 <dt>
 <strong>en-n<br>
 en-rp<br>
 en-wm</strong><br>
 <dd>   are different English voices.  These can be considered caricatures of
   various British accents: Northern, Received Pronunciation, West Midlands
   respectively.</dd>
 <p>

 </dl></ul>
 <h3>3.3 Voice Variants</h3>
 To make alternative voices for a language, you can make additional voice files in espeak-data/voices which contains commands to change various voice and pronunciation attributes.  See <a href="voices.html">voices.html</a>.
 <p>
 Alternatively there are some preset voice variants which can be applied to any of the language voices, by appending <code>+</code> and a number. Their effects are defined by files in <code>espeak-data/voices/!v</code>.
 <p>
 The variant numbers are <code> +1  +2  +3  +4  +5 </code> for male voices and <code> +11 +12 +13 +14 </code> for female voices. For example:
 <pre>   espeak -ven+3</pre>
 <h3>3.4 Other Languages</h3>
 The eSpeak speech synthesizer does text to speech for the following additional langauges.
 <ul>
 <dl>
 <p>
 <dt>
 <strong>af &nbsp;Afrikaans</strong><br>
 <dd>This has been worked on by a native speaker and it should be OK.</dd>
 <p>
 <dt>
 <strong>cs &nbsp;Czech</strong><br>
 <dd>Usable.
 </dd>
 <p>
 <dt>
 <strong>de &nbsp;German</strong><br>
 <dd>This has improved from easlier versions.  A problem is stress placement (which like English is irregular), prosody, and the use of compound words where correct detection of the sub-word boundaries would probably be needed for accurate pronunciation.
 </dd>
 <p>
 <dt>
 <strong>el &nbsp;Greek</strong><br>
 <dd>Stress position is marked in text and spelling is fairly regular, so it shouldn't be too bad.  It uses a different alphabet and switches to English pronunciation for words which contain Latin characters a-z.</dd>
 <p>
 <dt>
 <strong>eo &nbsp;Esperanto</strong><br>
 <dd>Esperanto has simple and regular pronunciation rules, so it should be OK, although I'm not
   certain how it's supposed to sound, other than what I've read in an introduction.
   Text can be either UTF-8, or Latin3 alphabet, or
   can use the Latin1 convention of two-letter combinations (cx,
   gx, etc).</dd>
 <p>
 <dt>
 <strong>es &nbsp;Spanish</strong><br>
 <dd>Not much feedback yet, but spelling is regular and stress is explicitly marked when it deviates from the normal rules, so it might be reasonably intelligible.  This would be a good opportunity for a native Spanish speaker to assist.  Perhaps we could have different variants for Castilian Spanish, Mexican Spanish, etc., in a similar way to how I've done different English accents.</dd>
 <p>
 <dt>
 <strong>fi &nbsp;Finnish</strong><br>
 <dd>This has had assistance from native speakers and should be usable.
 </dd>
 <p>
 <dt>
 <strong>fr &nbsp;French</strong><br>
 <dd>Needs improvement, both for spelling-to-phoneme rules and the sounds and prosody.
 </dd>
 <p>
 <dt>
 <strong>hr &nbsp;Croatian</strong><br>
 <dd>Usable, but I'm unsure whether wrong stressed syllables are a problem.
 </dd>
 <p>
 <dt>
 <strong>hu &nbsp;Hungarian</strong><br>
 <dd>This has had assistance from a native speaker and it should be OK. 
 </dd>
 <p>
 <dt>
 <strong>it &nbsp;Italian</strong><br>
 <dd>This has had some feedback from a native speaker but more work is needed.  Spelling is fairly regular, but stress marks and vowel accents are often omitted from text, so for some words the dictionary/exceptions list will need to determine the stress position or whether to use open/close [e] or [E] and [o] or [O].</dd>
 <p>
 <dt>
 <strong>pt &nbsp;Portuguese (Brazil)</strong><br>
 <dd>Brazilian Portuguese.  This has had assistance from a native speaker and it should be OK.  Like Italian there is further work to do about the ambiguity in the spelling between open/close "e" and "o" vowels.<p>
 </dd>
 <p>
 <dt>
 <strong>pt-pt &nbsp;Portuguese (European)</strong><br>
 <dd>
 </dd>
 <p>
 <dt>
 <strong>ro &nbsp;Romanian</strong><br>
 <dd>Probably OK. More work is needed to improve the position of stress within words.
 </dd>
 <p>
 <dt>
 <strong>sk &nbsp;Slovak</strong><br>
 <dd>A little initial feedback.
 </dd>
 <p>
 <dt>
 <strong>sv &nbsp;Swedish</strong><br>
 <dd>This has now had some work done on the pronunciation rules, so it should be useable.
 </dd>
 <p>
 <dt>
 <strong>sw &nbsp;Swahihi</strong><br>
 <dd>Not much feedback yet, but the spelling and stress rules are fairly regular, so it's probably usable.
 </dd>
 <p>
 </dl></ul>
 <h3>3.5 Provisional Languages</h3>
 These languages are only initial naive implementations which have had little or no feedback and improvement from native speakers.
 <ul>
 <dl>
 <p>
 <dt>
 <strong>cy &nbsp;Welsh</strong><br>
 <dd>An initial guess, awaiting feedback.
 </dd>
 <p>
 <dt>
 <strong>hi &nbsp;Hindi</strong><br>
 <dd>This is interesting because it uses the Devanagari characters.  I'm not sure about Hindi stress rules, and I expect the sound of aspirated/unaspirated consonant pairs needs improvement.
 </dd>
 <p>
 <dt>
 <strong>nl &nbsp;Dutch</strong><br>
 <dd>Probably needs improvement of the spelling-to-phoneme rules.
 </dd>
 <p>
 <dt>
 <strong>is &nbsp;Icelandic</strong><br>
 <dd>An initial guess, awaiting feedback.
 </dd>
 <p>
 <dt>
 <strong>no &nbsp;Norwegian</strong><br>
 <dd>An initial guess, awaiting feedback.
 </dd>
 <p>
 <dt>
 <strong>pl &nbsp;Polish</strong><br>
 <dd>Some initial feedback, but I'm told it's difficult to understand, so more work is needed.
 </dd>
 <p>
 <dt>
 <strong>ru &nbsp;Russian</strong><br>
 <dd>So far it's just an initial attempt with basic pronunciation rules.  Work is needed especially on the consonants.  Russian has two versions of most consonants, "hard" and "soft" (palatalised) and in most cases eSpeak doesn't yet make a proper distinction.<br>
 Russian stress position is unpredictable so a large lookup dictionary is needed of those words where eSpeak doesn't guess correctly.  To avoid increasing the size of the basic eSpeak package, this is available separately at: <a href="http://espeak.sourceforge.net/data/">http://espeak.sourceforge.net/data/</a>
 </dd>
 <p>
 <dt>
 <strong>vi &nbsp;Vietnamese</strong><br>
 <dd>This is interesting because it's a tone language.  I don't know how it should sound, so it's just a guess and I need feedback.
 </dd>
 <p>
 <dt>
 <strong>zhy &nbsp;Cantonese Chinese</strong><br>
 <dd>Just a naive simple one-to-one translation from single Simplified Chinese characters to phonetic equivalents in Cantonese.  No attempt so far at disambiguation, grouping characters into words, or adjusting tones according to their surrounding syllables.  This voice needs Chinese character to phonetic translation data, which is available as a separate download at:  <a href="http://espeak.sourceforge.net/data/">http://espeak.sourceforge.net/data/</a>.<br>The voice can also read Jyutping romanised text.
 </dd>
 <h3>3.6 Mbrola Voices</h3>
 Some additional voices, whose name start with <b>mb-</b> (for example <b>mb-en1</b>) use eSpeak as a front-end to Mbrola diphone voices.  eSpeak does the spelling-to-phoneme translation and intonation.
 See <a href="mbrola.html">mbrola.html</a>. 
 <p>

 </body>
 </html>
--- a/docs/mbrola.html
+++ b/docs/mbrola.html
@@ -0,0 +1,93 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>espeakedit: Mbrola Voices</title>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <A href="docindex.html">Back</A>
 <hr>
 <h2>MBROLA VOICES</h2>
 <hr>
 The Mbrola project is a collection of diphone voices for speech synthesis.  They do not include any text-to-phoneme translation, so this must be done by another program.  The Mbrola voices are cost-free but are not open source.  They are available from the Mbrola website at:<br>
  <a href="http://www.tcts.fpms.ac.be/synthesis/mbrola/mbrcopybin.html">http://www.tcts.fpms.ac.be/synthesis/mbrola/mbrcopybin.html</a>

 <p>
 eSpeak can be used as a front-end to Mbrola.  It provides the spelling-to-phoneme translation and intonation, which Mbrola then uses to generate speech sound.

 <h3>Voice Names</h3>

 To use a Mbrola voice, eSpeak needs information to translate from its own phonemes to the equivalent Mbrola phonemes.  This has been set up for only some voices so far.
 <p>
 The eSpeak voices which use Mbrola are named as:<br>
 &nbsp; <b>mb-</b>xxx
 <p>
 where xxx is the name of a Mbrola voice (eg. <b>mb-en1</b>  for the Mbrola "<b>en1</b>" English voice).  These voice files are in eSpeak's directory <code>espeak-data/voices/mbrola</code>.
 <p>
 The installation instructions below use the Mbrola voice "en1" as an example.  You can use other mbrola voices for which there is an equivalent eSpeak voice in <code>espeak-data/voices/mbrola</code>.
 <p>
 There are some additional eSpeak Mbrola voices which speak English text using a Mbrola voice for a different language.  These contain the name of the Mbrola voice with a suffix <b>-en</b>.  For example, the voice <b>mb-de4-en</b> will speak English text with a German accent by using the Mbrola <b>de4</b> voice.

 <h3>Windows Installation</h3>

 The SAPI5 version of eSpeak uses the mbrola.dll.
 <ol>
 <li>Install eSpeak. Include the voice <b>mb-en1</b> in the
 list of voices during the eSpeak installation.
 <p>
 <li>Install the PC/Windows version of Mbrola (MbrolaTools35.exe) from:
 <a href="http://www.tcts.fpms.ac.be/synthesis/mbrola/bin/pcwin/MbrolaTools35.exe"> http://www.tcts.fpms.ac.be/synthesis/mbrola/bin/pcwin/MbrolaTools35.exe</a>.
 <p>
 <li>Get the <b>en1</b> voice from:
 <a href="http://www.tcts.fpms.ac.be/synthesis/mbrola/mbrcopybin.html"> http://www.tcts.fpms.ac.be/synthesis/mbrola/mbrcopybin.html</a>
 unpack the archive, and copy the "<b>en1</b>" data file (not the whole "en1"
 directory) into
 <code>C:/Program Files/eSpeak/espeak-data/mbrola</code>.
 <p>
 <li>Use the voice <b>espeak-MB-EN1</b> from the list of SAPI5 voices.
 </ol>
 <h3>Linux Installation</h3>

 I don't think there's a Linux shared library version of Mbrola (equivalent to mbrola.dll), so eSpeak has to pipe phoneme data to the command-line Mbrola.
 <ol>
 <li>To install the Linux Mbrola binary, download:
 <a href="http://www.tcts.fpms.ac.be/synthesis/mbrola/bin/pclinux/mbr301h.zip"> http://www.tcts.fpms.ac.be/synthesis/mbrola/bin/pclinux/mbr301h.zip</a>.
 Unpack the archive, and copy and rename the file: <code>mbrola-linux-i386</code> to
 <code>mbrola</code> somewhere in your executable path (eg. <code>/usr/bin/mbrola</code> ).
 <p>
 <li>Get the en1 voice from:
 <a href="http://www.tcts.fpms.ac.be/synthesis/mbrola/mbrcopybin.html"> http://www.tcts.fpms.ac.be/synthesis/mbrola/mbrcopybin.html</a>.
 Unpack the archive, and copy the "<b>en1</b>" data file (not the whole "en1"
 directory) somewhere convenient (eg. <code>/usr/share/mbrola/en1</code> ).
 <p>
 <li>If you use the eSpeak voice "<b>mb-en1</b>" then eSpeak will generate
 Mbrola phoneme data on its stdout.  You can pipe this into Mbrola.
 <p>
 <code>espeak -v mb-en1 -f textfile | mbrola -e /usr/share/mbrola/en1 -
 test.wav</code>
 <p>
 will put the Mbrola speech output into a WAV file.  Or you can pipe the output from Mbrola through aplay:
 <p>
 <code>espeak -v mb-en1 -f textfile | mbrola -e /usr/share/mbrola/en1 - - | aplay -r16000 -fS16</code>
 <p>
 The -e option prevents Mbrola from stopping if it finds a combination
 of phonemes which it doesn't recognise.
 </ol>
 <h3>Mbrola Voice Files</h3>

 eSpeak's voice files for Mbrola voices are in directory <code>espeak-data/voices/mbrola</code>.  They contain a line:<br>
 &nbsp; <code>mbrola  &lt;voice&gt;  &lt;translation&gt;</code>
 <br>
 eg.<br>
 &nbsp; <code>mbrola  en1  en1_phtrans</code>
 <ul>
 <li><b>&lt;voice&gt;</b> is the name of the Mbrola voice.
 <p>
 <li><b>&lt;translation&gt;</b> is a translation file to convert between eSpeak phonemes and the equivalent Mbrola phonemes.  These are kept in:
  <code>espeak-data/mbrola_ph</code>
 </ul>
 They are binary files which are compiled, using espeakedit, from source files in <code>phsource/mbrola</code>. Details to be defined.

 </body>
 </html>
--- a/docs/phonemes.html
+++ b/docs/phonemes.html
@@ -0,0 +1,168 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>sSpeak: Phonemes</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>

 <A href="docindex.html">Back</A>
 <hr>
 <h2>PHONEMES</h2>
 <hr>
 In general a different set of phonemes can be defined for each language.
 <p>
 In most cases different languages inherit the same basic set of consonants.  They can add to these or modify them as needed.
 <p>
 The phoneme mnemonics are based on the scheme by Kirshenbaum which represents International Phonetic Alphabet symbols using ascii characters.  See: <a href="http://www.kirshenbaum.net/IPA/ascii-ipa.pdf">www.kirshenbaum.net/IPA/ascii-ipa.pdf</a>.
 <p>
 Phoneme mnemonics can be used directly in the text input to <strong>espeak</strong>.  They are enclosed within double square brackets.  Spaces are used to separate words, and all stressed syllables must be marked explicitly. eg:<br>
 <code>[[D,Is Iz sVm f@n'EtIk t'Ekst 'InpUt]]</code>
 <h3>English Consonants</h3>
 <table>
 <tbody valign=top>
 <tr>
 <td width=25><code>[p]</code><td width=80>
 <td width=25><code>[b]</code><td width=80>
 <tr>
 <td><code>[t]</code><td>
 <td><code>[d]</code><td>
 <tr>
 <td><code>[tS]</code><td><b>ch</b>urch
 <td><code>[dZ]</code><td><b>j</b>udge
 <tr>
 <td><code>[k]</code><td>
 <td><code>[g]</code><td>
 <tr><td><p>

 <tr>
 <td><code>[f]</code><td>
 <td><code>[v]</code><td>
 <tr>
 <td><code>[T]</code><td><b>th</b>in
 <td><code>[D]</code><td><b>th</b>is
 <tr>
 <td><code>[s]</code><td>
 <td><code>[z]</code><td>
 <tr>
 <td><code>[S]</code><td><b>sh</b>op
 <td><code>[Z]</code><td>plea<b>s</b>ure
 <tr>
 <td><code>[h]</code><td>
 <tr><td><p>

 <tr>
 <td><code>[m]</code><td>
 <td><code>[n]</code><td>
 <tr>
 <td><code>[N]</code><td>si<b>ng</b>
 <tr>
 <td><code>[l]</code><td>
 <td><code>[r]</code><td><b>r</b>ed (Omitted if not immediately followed by a vowel).
 <tr>
 <td><code>[j]</code><td><b>y</b>es
 <td><code>[w]</code><td>
 <tr><td><p>

 <tr><td colspan=3><strong>Some Additional Consonants</strong></td>
 <p>
 <tr>
 <td><code>[C]</code><td>German i<b>ch</b>
 <td><code>[x]</code><td>German bu<b>ch</b>
 <tr>
 <td><code>[l^]</code><td>Italian <b>gl</b>i
 <td><code>[n^]</code><td>Spanish <b>ñ</b>

 </tbody>
 </table>


 </tbody>
 </table>


 <h3>English Vowels</h3>
 These are the phonemes which are used by the English spelling-to-phoneme translations (en_rules and en_list).  In some varieties of English different phonemes may have the same sound, but they are kept separate because they may differ in another variety.
 <p>
 In rhotic accents, such as General American, the phonemes <code>[3:], [A@], [e@], [i@], [O@], [U@] </code> include the "r" sound.
 <p>

 <table>
 <tbody valign=top>
 <tr><td width=25><code>[@]</code>
 <td width=60>alph<b>a</b><td width=80>schwa

 <tr><td><code>[3]</code>
 <td>bett<b>er</b><td>rhotic schwa. In British English this is the same as <code>[@]</code>, but it includes 'r' colouring in American and other rhotic accents.  In these cases a separate <code>[r]</code> should not be included unless it is followed immediately by another vowel.

 <tr><td><code>[3:]</code><td>n<b>ur</b>se
 <tr><td><code>[@L]</code><td>simp<b>le</b>
 <tr><td><code>[@2]</code><td>the<td>Used only for "the".
 <tr><td><code>[@5]</code><td>to<td>Used only for "to".
 <tr><td><p>

 <tr><td><code>[a]</code><td>tr<b>a</b>p
 <tr><td><code>[aa]</code><td>b<b>a</b>th<td>This is <code>[a]</code> in some accents, <code>[A:]</code> in others.
 <tr><td><code>[a2]</code><td><b>a</b>bout<td>This may be <code>[@]</code> or may be a more open schwa.
 <tr><td><code>[A:]</code><td>p<b>al</b>m
 <tr><td><code>[A@]</code><td>st<b>ar</b>t
 <tr><td><p>

 <tr><td><code>[E]</code><td>dr<b>e</b>ss
 <tr><td><code>[e@]</code><td>squ<b>are</b>
 <tr><td><p>

 <tr><td><code>[I]</code><td>k<b>i</b>t
 <tr><td><code>[I2]</code><td><b>i</b>ntend<td>As <code>[I]</code>, but also indicates an unstressed syllable.
 <tr><td><code>[i]</code><td>happ<b>y</b><td>An unstressed "i" sound at the end of a word.
 <tr><td><code>[i:]</code><td>fl<b>ee</b>ce
 <tr><td><code>[i@]</code><td>n<b>ear</b>
 <tr><td><p>

 <tr><td><code>[0]</code><td>l<b>o</b>t
 <tr><td><p>

 <tr><td><code>[V]</code><td>str<b>u</b>t
 <tr><td><p>

 <tr><td><code>[u:]</code><td>g<b>oo</b>se
 <tr><td><code>[U]</code><td>f<b>oo</b>t
 <tr><td><code>[U@]</code><td>c<b>ure</b>
 <tr><td><p>

 <tr><td><code>[O:]</code><td>th<b>ou</b>ght
 <tr><td><code>[O@]</code><td>f<b>or</b>ce
 <tr><td><p>


 <tr><td><code>[aI]</code><td>pr<b>i</b>ce
 <tr><td><code>[eI]</code><td>f<b>a</b>ce
 <tr><td><code>[OI]</code><td>ch<b>oi</b>ce
 <tr><td><code>[aU]</code><td>m<b>ou</b>th
 <tr><td><code>[oU]</code><td>g<b>oa</b>t

 <tr><td><code>[aI@]</code>
 <tr><td><code>[aU@]</code>
 </tbody>
 </table>

 <h3>Some Additional Vowels</h3>
 Other languages will have their own vowel definitions, eg:

 <table>
 <tbody valign=top>
 <tr><td width=30><code>[e]</code><td>German <b>eh</b>, French <b>é</b>
 <tr><td><code>[o]</code><td>German <b>oo</b>, French <b>o</b>
 <tr><td><code>[y]</code><td>German <b>ü</b>, French <b>u</b>
 <tr><td><code>[Y]</code><td>German <b>ö</b>, French <b>oe</b>

 </tbody>
 </table>


 <hr>

 </body>
 </html>
--- a/docs/phontab.html
+++ b/docs/phontab.html
@@ -0,0 +1,211 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak: Phoneme tables</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <A href="docindex.html">Back</A>
 <hr>
 <h2>PHONEME TABLES</h2>
 <hr>
 A phoneme table defines all the phonemes which are used by a language, together with their properties and the data for their production as sounds.
 <p>
 Generally each language has its own phoneme table, although additional phoneme tables can be used for different voices within the language.  These alternatives are referenced from Voices files.
 <p>
 A phoneme table does not need to define all the phonemes used by a language.  Instead it can reference a previously defined phoneme table, whose phonemes it inherits.  These can then be used as they are, or overridden by new definitions, or new phonemes added.  For example, a phoneme table may redefine (or add) some of the vowels that it uses, but inherit most of its consonants from a standard set.
 <p>
 <blockquote>Note: This specification is not yet complete and does not include the definitions of the formant sequence specifications.
 <br>
 The source files for the phoneme data is in the "phsource" directory in the espeakedit download package.
 </blockquote>
 <p>&nbsp;<hr>
 <h3>Phoneme files</h3>
 The phoneme tables are defined in a master phoneme file, named <strong>phonemes</strong>.  This starts with the <strong>base</strong> phoneme table followed by other phoneme tables for languages and voices which inherit phonemes from the <strong>base</strong> table or from each other.
 <p>
 In addition to phoneme definitions, the phoneme file can contain the following:
 <dl>
 <dt><strong>include</strong> &lt;filename&gt;
 <dd>Includes the text of the specified file at this point. This allows different phoneme tables to be kept in different text files, for convenience.  &lt;filename&gt; is a relative path.  The included file can itself contain <strong>include</strong> statements.
 <p>
 <dt><strong>phonemetable</strong> &lt;name&gt; &lt;parent&gt;
 <dd>Starts a new phoneme table, and ends the previous table.<br>
 &lt;name&gt; Is the name of this phoneme table. This name is used in Voices files.<br>
 &lt;parent&gt; Is the name of a previously defined phoneme table whose phoneme definitions are inherited by this one.  The name <strong>base</strong> indicates the first (base) phoneme table.
 <p>
 <dt><strong>phonemenumber</strong> &lt;integer&gt;
 <dd>This statement is used at the start of the master <strong>phonemes</strong> file to define some specific code numbers for various phonemes which are used directly within the <strong>speak</strong> program.
 </dl>
 <p>&nbsp;<hr>
 <h3>Phoneme definitions</h3>
 A phoneme table contains a list of phoneme definitions. Each starts with the keyword <strong>phoneme</strong> and the phoneme name (this is the name used in the pronunciation rules), and ends with the keyword <strong>endphoneme</strong>.  For example:
 <pre>  phoneme aI
    vowel
    length 230
    formants vowels/ai
    starttype (a) endtype (I)
  endphoneme

  phoneme s
    vls alv frc sibilant
    vowelin  f1=0  f2=1700 -300 300  f3=-100 100
    vowelout f1=0  f2=1700 -300 250  f3=-100 100  rms=20
    lengthmod 3
    wave unvoc/s
    before _ unvoc/s_
    before p unvoc/s!
    before t unvoc/s!
    before k unvoc/s!
    switchvoicing z
  endphoneme

 </pre>
 <p>
 Within the phoneme definition the following lines may occur:  ( (V) indicates only for vowels, (C) only for consonants)
 <p>
 <ul>
 <dl><dt>Type. One of these must be present.
 <dd><table>
 <tr><TD width="100"><b>vowel</b></TD></tr>
 <tr><TD><b>liquid</b></TD><td>semi-vowels, such as:&nbsp; <code> r, l, j, w</code></td></tr>
 <tr><TD><b>nasal</b></TD><td>nasal eg:&nbsp; <code> m, n, N</code></td></tr>
 <tr><TD><b>stop</b></TD><td>stop eg:&nbsp; <code> p, b, t, d, k, g</code></td></tr>
 <tr><TD><b>frc</b></TD><td>fricative eg:&nbsp; <code> f, v, T, D, s, z, S, Z, C, x</code></td></tr>
 <tr><TD><b>afr</b></TD><td>affricate eg:&nbsp; <code> tS, dZ</code></td></tr>
 <tr><TD><b>pause</b></TD><td></td></tr>
 <tr><TD><b>stress</b></TD><td>stress symbols, eg: ' , = %</td></tr>
 <tr><TD><b>virtual</b></TD><td>Used to represent a class of phonemes. See section ("Phoneme Pairs", below)</td></tr>
 </table>
 </dl>
 <dl><dt>Properties:
 <dd><table>
 <tr><TD width="100"><b>vls</b></TD><td>(C) voiceless  eg. <code>  p, t, k, f, s</code></TD></tr>
 <tr><TD><b>vcd</b></TD><td>(C) voiced  eg. <code>  b, d, g, v, z</code></td></tr>
 <tr><TD><b>sibilant</b></TD><td>(C) eg: <code>  s, z, S, Z, tS, dZ</code></td></tr>
 <tr><TD><b>palatal</b></TD><td>(C) A palatal or palatalized consonant.</td></tr>
 <tr><TD><b>unstressed</b></TD><td>(V) This vowel is always unstressed, unless explicitly marked otherwise.</td></tr>
 <tr><TD><b>nolink</b></TD><td>Prevent any linking from the previous phoneme.</td></tr>
 <tr><TD><b>trill</b></TD><td>(C) Apply trill to the voicing.</td></tr>
 </table>
 </dl>
 <dl><dt>Place of Articulation (C):
 <dd><table>
 <tr><TD><b>blb &nbsp;</b></TD><td width="100">bi-labial</TD>
 <TD><b>ldb &nbsp;</b></TD><td width="110">labio-dental</TD>
 <TD><b>dnt &nbsp;</b></TD><td>dental</TD></tr>

 <tr><TD><b>alv</b></TD><td>alveolar</td>
 <TD><b>rfx</b></TD><td>retroflex</TD>
 <TD><b>pla</b></TD><td>palato-alveolar</TD></tr>

 <tr><TD><b>pal</b></TD><td>palatal</td>
 <TD><b>vel</b></TD><td>velar</TD>
 <TD><b>lbv</b></TD><td>labio-velar</TD></tr>

 <tr><TD><b>uvl</b></TD><td>uvular</td>
 <TD><b>phr</b></TD><td>pharyngeal</TD>
 <TD><b>glt</b></TD><td>glottal</TD></tr>

 </table>
 </dl>

 <dl>
 <dt><strong>length</strong>
 <dd>(V) The relative length of the phoneme, typically about 140 for a short vowel and from 200 to 250 for a long vowel or diphong.  Currently used only for vowels.
 <p>
 <dt><strong>formants</strong> &lt;sound spec&gt;
 <dd>&lt;sound spece&gt; is a relative path to a file which defines how to generate the sound (a vowel or voiced consonant) from a sequence of formant values. (see **)
 <p>
 <dt><strong>wave</strong> &lt;wavefile&gt;
 <dd>(C) This is an alternative to <strong>formants</strong>. &nbsp;&lt;wavefile&gt; is a relative path to a WAV file (22 kHz, 16 bits) which will be played to produce the sound.  This method is used for unvoiced consonants. &lt;wavefile&gt; does not include a .WAV filename extension, although the file to which it refers may or may not have one.
 <p>
 <dt><strong>before</strong> &lt;phoneme&gt; &lt;sound spec&gt;
 <dd>This specifies an alternative realization when the phoneme followed by another specified phoneme. <strong>before</strong> may be followed by several &lt;phoneme&gt; &lt;sound seq&gt; pairs.
 <p>
 <dt><strong>after</strong> &lt;phoneme&gt; &lt;sound spec&gt;
 <dd>This specifies an alternative realization when the phoneme follows another specified phoneme. Vowels are considered as two parts, start and end, so both a <strong>before</strong> and an <strong>after</strong> condition may apply to the same vowel.
 <p>
 <dt><strong>starttype</strong> &lt;phoneme&gt;
 <dd>Allocates this phoneme to a category for the purposes of choosing the variant of a phoneme that precedes it.  See section "Phoneme Pairs" below.
 <p>
 <dt><strong>endtype</strong> &lt;phoneme&gt;
 <dd>Allocates this phoneme to a category for the purposes of choosing the variant of a phoneme that follows it.  See section "Phoneme Pairs" below.
 <p>
 <dt><strong>reduceto</strong> &lt;phoneme&gt; &lt;level&gt;
 <dd>(V) Change to the specified phoneme (such as schwa, @) if this syllable has a stress level less than that specified by &lt;level&gt;
 <p>
 <dt><strong>linkout</strong> &lt;phoneme&gt;
 <dd>If the following phoneme is a vowel then this additional phoneme will be inserted before it.
 <p>
 <dt><strong>beforevowel</strong> &lt;phoneme&gt;
 <dd>The phoneme changes to this one if the next phoneme is a vowel.
 <p>
 <dt><strong>beforevowelpause</strong> &lt;phoneme&gt;
 <dd>Change to this if the next phoneme is a vowel or pause.
 <p>
 <dt><strong>beforenotvowel</strong> &lt;phoneme&gt;
 <dd>Change to this if the next phoneme is <strong>not</strong> a vowel.
 <p>
 <dt><strong>lengthmod</strong> &lt;integer&gt;
 <dd>(C) Determines how this consonant affects the length of the previous vowel. This value is used as index into the <code>length_mods</code> table in the <code>CalcLengths()</code> function in the speak program.
 <p>
 <dt><strong>vowelin</strong> &lt;vowel transition data&gt;
 <dd>(C) Specifies the effects of this consonant on the formants of a following vowel. See "vowel transitions", below.
 <p>
 <dt><strong>vowelout</strong> &lt;vowel transition data&gt;
 <dd>(C) Specifies the effects of this consonant on the formants of a preceding vowel. See "vowel transitions", below.
 <p>

 </dl>
 </ul>
 <p>&nbsp;<hr>
 <h3>Phoneme Pairs</h3>
 The pronunciation of a phoneme can depend on the phonemes before and after it. Some of this modification is done automatically - the program automatically adjusts the beginning and end of a vowel to match its adjacent sounds.  You can also specify variant pronunciations in the phoneme table.
 <p>
 The <strong>before</strong> and <strong>after</strong> statements can specify different sound variants to be used when the phoneme is before or is after another specified phoneme.  The adjacent phoneme that's specified in a <strong>before</strong> or <strong>after</strong> statement may refer not just to one, but to other phonemes too.  For example:<pre>   before ; unvoc/s;</pre>means that the sound <code>unvoc/s;</code> is used (rather than <code>unvoc/s</code> if the following phoneme is <code>[;]</code>.  But this rule also applies if the next phoneme is another type of pause, <code>[_]</code> or <code>[;;]</code>.  This is because these two include a line<pre>   starttype ;</pre>in their phoneme specifications.  This means that they look like a <code>[;]</code> to a preceding phoneme.
 <p>
 When looking for a matching <strong>before</strong> or <strong>after</strong> rule, if an exact match is not found, then a match is looked for by replacing either or both of the two phonemes by their <strong>starttype</strong> and <strong>endtype</strong> groups as appropriate.
 <p>
 <strong>virtual</strong> phonemes can be defined for use in <strong>starttype</strong> and <strong>endtype</strong> statements.  For example, a virtual phoneme <code>[ (i) ]</code> is used to represent vowels which start with and end with an <code>[i]</code> type sound. So <code>[i:]</code> and <code>[I]</code> have <code> starttype (i) </code> and those, plus diphthongs such as <code>[aI]  [eI]  [OI]</code> have <code> endtype (i) </code>.  By convension, names of virtual phonemes include a pair of round brackets.
 <p>&nbsp;<hr>
 <h3>Sound Specifications</h3>
 There are three ways to produce sounds:
 <ul>
 <li>Playing a WAV file.  This is used for unvoiced consonants such as <code> [p] [t] [s]</code>.
 <li>Generating a wave from a sequence of formant parameters.  This is used for vowels and also for sonorants such as <code> [l] [j] [n]</code>.
 <li>A mixture of these.  A stored WAV file is mixed with a wave generated from formant parameters.  This is used for voiced stops and fricatives such as <code> [b] [g] [v] [z]</code>.
 </ul>
 A <em>&lt;sound spec&gt;</em> in the phoneme table can refer to a WAV file, a formant sequence, or a mixture of both. It can also include a numeric value to adjust the length of the sound.
 <p>&nbsp;<hr>
 <h3>Vowel Transitions</h3>
 These specify how a consonant affects an adjacent vowel.  A consonant may cause a transition in the vowel's formants as the mouth changes shape between the consonant and the vowel.  The following attributes may be specified.  Note that the maximum rate of change of formant frequencies is limited by the speak program.<p>
 <ul><dl>
 <dt><strong>len=&lt;integer&gt;</strong>
 <dd>Nominal length of the transition in mS.  If omitted a default value is used.
 <dt><strong>rms=&lt;integer&gt;</strong>
 <dd>Adjusts the amplitude of the vowel at the end of the transition.  If omitted a default value is used.
 <dt><strong>f1=&lt;integer&gt;</strong>
 <dd>
 0: &nbsp; f1 formant frequency unchanged.<br>
 1: &nbsp; f1 formant frequency decreases.<br>
 2: &nbsp; f1 formant frequency decreases more.
 <dt><strong>f2=&lt;freq&gt; &lt;min&gt; &lt;max&gt;</strong>
 <dd>
 &lt;freq&gt;: &nbsp; The frequency towards which the f2 formant moves (Hz).<br>
 &lt;min&gt;: &nbsp; Signed integer (Hz).&nbsp;  The minimum f2 frequency change.<br>
 &lt;max&gt;: &nbsp; Signed integer (Hz).&nbsp;  The maximum f2 frequency change.
 <dt><strong>f3=&lt;change&gt; &lt;amplitude&gt;</strong>
 <dd>
 &lt;change&gt;: &nbsp; Signed integer (Hz).&nbsp; Frequence change of f3, f4, and f5 formants.<br>
 &lt;amplitude&gt;: &nbsp; Amplitude of the f3, f4, and f5 formants at the end of the transition. 100 = no change.
 <dt><strong>brk</strong>
 <dd>Break. Do not merge the synthesized wave of the consonant into the vowel.  This will produce a discontinuity in the formants.
 <dt><strong>rate</strong>
 <dd>Allow a greater maximum rate of change of formant frequencies.
 <dt><strong>glstop</strong>
 <dd>Indicates a glottal stop.
 </dl></ul>
 </body>
 </html>
--- a/docs/speak_lib.h
+++ b/docs/speak_lib.h
@@ -0,0 +1,571 @@
 #ifndef SPEAK_LIB_H
 #define SPEAK_LIB_H
 /***************************************************************************
 *   Copyright (C) 2006 by Jonathan Duddington                             *
 *   [email protected]                                           *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU General Public License     *
 *   along with this program; if not, write to the                         *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 ***************************************************************************/


 /*************************************************************/
 /* This is the header file for the library version of espeak */
 /*                                                           */
 /*************************************************************/

 #include <stdio.h>

         /********************/
         /*  Initialization  */
         /********************/


 typedef enum {
  espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list.
  espeakEVENT_WORD = 1,            // Start of word
  espeakEVENT_SENTENCE,            // Start of sentence
  espeakEVENT_MARK,                // Mark
  espeakEVENT_PLAY,                // Audio element
  espeakEVENT_END,                 // End of sentence
  espeakEVENT_MSG_TERMINATED       // End of message
 } espeak_EVENT_TYPE;



 typedef struct {
 	espeak_EVENT_TYPE type;
 	unsigned int unique_identifier; // message identifier (or 0 for key or character)
 	int text_position;    // the number of characters from the start of the text
 	int length;           // word length, in characters (for espeakEVENT_WORD)
 	int audio_position;   // the time in mS within the generated speech output data
 	int sample;           // sample id (internal use)
 	void* user_data;      // pointer supplied by the calling program
 	union {
 		int number;        // used for WORD and SENTENCE events
 		const char *name;  // used for MARK and PLAY events.  UTF8 string
 	} id;
 } espeak_EVENT;
 /* 
   When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called.


   In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED).

   In PLAYBACK mode, the callback function is called as soon as an event happens.

   For example suppose that the following message is supplied to espeak_Synth: 
   "hello, hello."


   * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function :

   ** Block 1:
   <audio data> + 
   List of events: SENTENCE + WORD + LIST_TERMINATED
 
   ** Block 2:
   <audio data> +
   List of events: WORD + END + LIST_TERMINATED

   ** Block 3:
   no audio data
   List of events: MSG_TERMINATED + LIST_TERMINATED


   * Once processed in PLAYBACK mode, it could lead to 5 calls of the callback function:

   ** SENTENCE
   ** WORD (call when the sounds are actually played)
   ** WORD
   ** END (call when the end of sentence is actually played.)
   ** MSG_TERMINATED


   The MSG_TERMINATED event is the last event. It can inform the calling program to clear the user data related to the message.
   So if the synthesis must be stopped, the callback function is called for each pending message with the MSG_TERMINATED event.

   A MARK event indicates a <mark> element in the text.
   A PLAY event indicates an <audio> element in the text, for which the calling program should play the named sound file.
 */



 typedef enum {
 	POS_CHARACTER = 1,
 	POS_WORD,
 	POS_SENTENCE
 } espeak_POSITION_TYPE;


 typedef enum {
 	/* PLAYBACK mode: plays the audio data, supplies events to the calling program*/
 	AUDIO_OUTPUT_PLAYBACK, 

 	/* RETRIEVAL mode: supplies audio data and events to the calling program */
 	AUDIO_OUTPUT_RETRIEVAL,
 
 	/* SYNCHRONOUS mode: as RETRIEVAL but doesn't return until synthesis is completed */
 	AUDIO_OUTPUT_SYNCHRONOUS,

 	/* Synchronous playback */
 	AUDIO_OUTPUT_SYNCH_PLAYBACK

 } espeak_AUDIO_OUTPUT;


 typedef enum {
 	EE_OK=0,
 	EE_INTERNAL_ERROR=-1,
 	EE_BUFFER_FULL=1,
 	EE_NOT_FOUND=2
 } espeak_ERROR;


 #ifdef __cplusplus
 extern "C"
 #endif
 int espeak_Initialize(espeak_AUDIO_OUTPUT output, int buflength, const char *path);
 /* Must be called before any synthesis functions are called.
   output: the audio data can either be played by eSpeak or passed back by the SynthCallback function.

   buflength:  The length in mS of sound buffers passed to the SynthCallback function.

   path: The directory which contains the espeak-data directory, or NULL for the default location.

   Returns: sample rate in Hz, or -1 (EE_INTERNAL_ERROR).
 */

 typedef int (t_espeak_callback)(short*, int, espeak_EVENT*);

 #ifdef __cplusplus
 extern "C"
 #endif
 void espeak_SetSynthCallback(t_espeak_callback* SynthCallback);
 /* Must be called before any synthesis functions are called.
   This specifies a function in the calling program which is called when a buffer of
   speech sound data has been produced. 


   The callback function is of the form:

 int SynthCallback(short *wav, int numsamples, espeak_EVENT *events);

   wav:  is the speech sound data which has been produced.
      NULL indicates that the synthesis has been completed.

   numsamples: is the number of entries in wav.  This number may vary, may be less than
      the value implied by the buflength parameter given in espeak_Initialize, and may
      sometimes be zero (which does NOT indicate end of synthesis).

   events: an array of espeak_EVENT items which indicate word and sentence events, and
      also the occurance if <mark> and <audio> elements within the text.


   Callback returns: 0=continue synthesis,  1=abort synthesis.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 void espeak_SetUriCallback(int (*UriCallback)(int, const char*, const char*));
 /* This function must be called before synthesis functions are used, in order to deal with
   <audio> tags.  It specifies a callback function which is called when an <audio> element is
   encountered and allows the calling program to indicate whether the sound file which
   is specified in the <audio> element is available and is to be played.

   The callback function is of the form:

 int UriCallback(int type, const char *uri, const char *base);

   type:  type of callback event.  Currently only 1= <audio> element

   uri:   the "src" attribute from the <audio> element

   base:  the "xml:base" attribute (if any) from the <speak> element

   Return: 1=don't play the sound, but speak the text alternative.
           0=place a PLAY event in the event list at the point where the <audio> element
             occurs.  The calling program can then play the sound at that point.
 */


         /********************/
         /*    Synthesis     */
         /********************/


 #define espeakCHARS_AUTO   0
 #define espeakCHARS_UTF8   1
 #define espeakCHARS_8BIT   2
 #define espeakCHARS_WCHAR  3

 #define espeakSSML        0x10
 #define espeakPHONEMES    0x100
 #define espeakENDPAUSE    0x1000
 #define espeakKEEP_NAMEDATA 0x2000

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Synth(const void *text,
 	size_t size,
 	unsigned int position,
 	espeak_POSITION_TYPE position_type,
 	unsigned int end_position,
 	unsigned int flags,
 	unsigned int* unique_identifier,
 	void* user_data);
 /* Synthesize speech for the specified text.  The speech sound data is passed to the calling
   program in buffers by means of the callback function specified by espeak_SetSynthCallback(). The command is asynchronous: it is internally buffered and returns as soon as possible. If espeak_Initialize was previously called with AUDIO_OUTPUT_PLAYBACK as argument, the sound data are played by eSpeak.

   text: The text to be spoken, terminated by a zero character. It may be either 8-bit characters,
      wide characters (wchar_t), or UTF8 encoding.  Which of these is determined by the "flags"
      parameter.

   size: Equal to (or greatrer than) the size of the text data, in bytes.  This is used in order
      to allocate internal storage space for the text.  This value is not used for
      AUDIO_OUTPUT_SYNCHRONOUS mode.

   position:  The position in the text where speaking starts. Zero indicates speak from the
      start of the text.

   position_type:  Determines whether "position" is a number of characters, words, or sentences.
      Values: 

   end_position:  If set, this gives a character position at which speaking will stop.  A value
      of zero indicates no end position.

   flags:  These may be OR'd together:
      Type of character codes, one of:
         espeakCHARS_UTF8     UTF8 encoding
         espeakCHARS_8BIT     The 8 bit ISO-8859 character set for the particular language.
         espeakCHARS_AUTO     8 bit or UTF8  (this is the default)
         espeakCHARS_WCHAR    Wide characters (wchar_t)

      espeakSSML   Elements within < > are treated as SSML elements, or if not recognised are ignored.

      espeakPHONEMES  Text within [[ ]] is treated as phonemes codes (in espeak's Hirschenbaum encoding).

      espeakENDPAUSE  If set then a sentence pause is added at the end of the text.  If not set then
         this pause is suppressed.

   unique_identifier: message identifier; helpful for identifying later 
     data supplied to the callback.

   user_data: pointer which will be passed to the callback function.

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Synth_Mark(const void *text,
 	size_t size,
 	const char *index_mark,
 	unsigned int end_position,
 	unsigned int flags,
 	unsigned int* unique_identifier,
 	void* user_data);
 /* Synthesize speech for the specified text.  Similar to espeak_Synth() but the start position is
   specified by the name of a <mark> element in the text.

   index_mark:  The "name" attribute of a <mark> element within the text which specified the
      point at which synthesis starts.  UTF8 string.

   For the other parameters, see espeak_Synth()

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Key(const char *key_name);
 /* Speak the name of a keyboard key.
   Currently this just speaks the "key_name" as given 

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Char(wchar_t character);
 /* Speak the name of the given character 

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 /* Note, there is no function to play a sound icon. This would be done by the calling program */



         /***********************/
         /*  Speech Parameters  */
         /***********************/

 typedef enum {
  espeakSILENCE=0, /* internal use */
  espeakRATE,
  espeakVOLUME,
  espeakPITCH,
  espeakRANGE,
  espeakPUNCTUATION,
  espeakCAPITALS,
  espeakEMPHASIS,   /* internal use */
  espeakLINELENGTH, /* internal use */
  espeakVOICETYPE,  // internal, 1=mbrola
  N_SPEECH_PARAM    /* last enum */
 } espeak_PARAMETER;

 typedef enum {
  espeakPUNCT_NONE=0,
  espeakPUNCT_ALL=1,
  espeakPUNCT_SOME=2
 } espeak_PUNCT_TYPE;

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_SetParameter(espeak_PARAMETER parameter, int value, int relative);
 /* Sets the value of the specified parameter.
   relative=0   Sets the absolute value of the parameter.
   relative=1   Sets a relative value of the parameter.

   parameter:
      espeakRATE:    speaking speed in word per minute.

      espeakVOLUME:  volume in range 0-100    0=silence

      espeakPITCH:   base pitch, range 0-100.  50=normal

      espeakRANGE:   pitch range, range 0-100. 0-monotone, 50=normal

      espeakPUNCTUATION:  which punctuation characters to announce:
         value in espeak_PUNCT_TYPE (none, all, some), 
 	 see espeak_GetParameter() to specify which characters are announced.

      espeakCAPITALS: announce capital letters by:
         0=none,
         1=sound icon,
         2=spelling,
         3 or higher, by raising pitch.  This values gives the amount in Hz by which the pitch
            of a word raised to indicate it has a capital letter.

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 int espeak_GetParameter(espeak_PARAMETER parameter, int current);
 /* current=0  Returns the default value of the specified parameter.
   current=1  Returns the current value of the specified parameter, as set by SetParameter()
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_SetPunctuationList(const wchar_t *punctlist);
 /* Specified a list of punctuation characters whose names are to be spoken when the
   value of the Punctuation parameter is set to "some".

   punctlist:  A list of character codes, terminated by a zero character.

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 void espeak_SetPhonemeTrace(int value, FILE *stream);
 /* Controls the output of phoneme symbols for the text
   value=0  No phoneme output (default)
   value=1  Output the translated phoneme symbols for the text
   value=2  as (1), but also output a trace of how the translation was done (matching rules and list entries)

   stream   output stream for the phoneme symbols (and trace).  If stream=NULL then it uses stdout.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 void espeak_CompileDictionary(const char *path, FILE *log);
 /* Compile pronunciation dictionary for a language which corresponds to the currently
   selected voice.  The required voice should be selected before calling this function.

   path:  The directory which contains the language's '_rules' and '_list' files.
          'path' should end with a path separator character ('/').
   log:   Stream for error reports and statistics information. If log=NULL then stderr will be used.
 */
         /***********************/
         /*   Voice Selection   */
         /***********************/


 // voice table
 typedef struct {
 	char *name;            // a given name for this voice. UTF8 string.
 	char *languages;       // list of pairs of (byte) priority + (string) language (and dialect qualifier)
 	char *identifier;      // the filename for this voice within espeak-data/voices
 	unsigned char gender;  // 0=none 1=male, 2=female,
 	unsigned char age;     // 0=not specified, or age in years
 	unsigned char variant; // only used when passed as a parameter to espeak_SetVoiceByProperties
 	unsigned char xx1;     // for internal use 
 	int score;       // for internal use
 	void *spare;     // for internal use
 } espeak_VOICE;

 /* Note: The espeak_VOICE structure is used for two purposes:
  1.  To return the details of the available voices.
  2.  As a parameter to  espeak_SetVoiceByProperties() in order to specify selection criteria.

   In (1), the "languages" field consists of a list of (UTF8) language names for which this voice
   may be used, each language name in the list is terminated by a zero byte and is also preceded by
   a single byte which gives a "priority" number.  The list of languages is terminated by an
   additional zero byte.

   A language name consists of a language code, optionally followed by one or more qualifier (dialect)
   names separated by hyphens (eg. "en-uk").  A voice might, for example, have languages "en-uk" and
   "en".  Even without "en" listed, voice would still be selected for the "en" language (because
   "en-uk" is related) but at a lower priority.

   The priority byte indicates how the voice is preferred for the language. A low number indicates a
   more preferred voice, a higher number indicates a less preferred voice.

   In (2), the "languages" field consists simply of a single (UTF8) language name, with no preceding
   priority byte.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 const espeak_VOICE **espeak_ListVoices(espeak_VOICE *voice_spec);
 /* Reads the voice files from espeak-data/voices and creates an array of espeak_VOICE pointers.
   The list is terminated by a NULL pointer

   If voice_spec is NULL then all voices are listed.
   If voice spec is give, then only the voices which are compatible with the voice_spec
   are listed, and they are listed in preference order.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_SetVoiceByName(const char *name);
 /* Searches for a voice with a matching "name" field.  Language is not considered.
   "name" is a UTF8 string.

   Return: EE_OK: operation achieved 
           EE_BUFFER_FULL: the command can not be buffered; 
             you may try after a while to call the function again.
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_SetVoiceByProperties(espeak_VOICE *voice_spec);
 /* An espeak_VOICE structure is used to pass criteria to select a voice.  Any of the following
   fields may be set:

   name     NULL, or a voice name

   languages  NULL, or a single language string (with optional dialect), eg. "en-uk", or "en"

   gender   0=not specified, 1=male, 2=female

   age      0=not specified, or an age in years

   variant  After a list of candidates is produced, scored and sorted, "variant" is used to index
            that list and choose a voice.
            variant=0 takes the top voice (i.e. best match). variant=1 takes the next voice, etc
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_VOICE *espeak_GetCurrentVoice(void);
 /* Returns the espeak_VOICE data for the currently selected voice.
   This is not affected by temporary voice changes caused by SSML elements such as <voice> and <s>
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Cancel(void);
 /* Stop immediately synthesis and audio output of the current text. When this
   function returns, the audio output is fully stopped and the synthesizer is ready to
   synthesize a new message.

   Return: EE_OK: operation achieved 
 	   EE_INTERNAL_ERROR.
 */


 #ifdef __cplusplus
 extern "C"
 #endif
 int espeak_IsPlaying(void);
 /* Returns 1 if audio is played, 0 otherwise.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Synchronize(void);
 /* This function returns when all data have been spoken.
   Return: EE_OK: operation achieved 
 	   EE_INTERNAL_ERROR.
 */

 #ifdef __cplusplus
 extern "C"
 #endif
 espeak_ERROR espeak_Terminate(void);
 /* last function to be called.
   Return: EE_OK: operation achieved 
 	   EE_INTERNAL_ERROR.
 */


 #ifdef __cplusplus
 extern "C"
 #endif
 const char *espeak_Info(void* ptr);
 /* Returns the version number string.
   The parameter is for future use, and should be set to NULL
 */
 #endif
--- a/docs/ssml.html
+++ b/docs/ssml.html
@@ -0,0 +1,89 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 </head>
 <body>
 <hr>
 <h2>TEXT MARKUP</h2>
 <hr>
 <h3>SSML: Speech Synthesis Markup Language</h3>
 The following markup tags and attributes are recognised:<p>
 <dl></dl>
 <p><b>&lt;speak&gt;</b>
 <ul>
 <li>xml:base &nbsp; (the value is just passed back as a parameter with the UriCallback() function)
 <li>xml:lang
 </ul>

 <p><b>&lt;voice&gt;</b>
 <ul>
 <li>xml:lang
 <li>name
 <li>age
 <li>variant
 <li>gender
 </ul>
 <p><b>&lt;prosody&gt;</b>
 <ul>
 <li>rate
 <li>volume
 <li>pitch
 <li>range
 </ul>
 <p><b>&lt;say-as&gt;</b>
 <ul>
 <li>interpret-as="characters"
 <li>interpret-as="characters" &nbsp;format="glyphs"
 <li>interpret-as="tts:key"
 <li>interpret-as="tts:char"
 <li>interpret-as="tts:digits"
 </ul>
 <p><b>&lt;mark&gt;</b> name
 <p><b>&lt;s&gt;</b>
 <ul>
 <li>xml:lang
 </ul>
 <p><b>&lt;p&gt;</b>
 <ul>
 <li>xml:lang
 </ul>
 <p><b>&lt;sub&gt;</b> alias
 <p><b>&lt;tts:style&gt;</b>
 <ul>
 <li>field="punctuation" &nbsp; mode=none,all,some
 <li>field="capital_letters" &nbsp; mode=no,spelling,icon,pitch
 </ul>
 <p><b>&lt;audio&gt;</b> src
 <p><b>&lt;emphasis&gt;</b>
 <ul>
 <li>level
 </ul>
 <p><b>&lt;break&gt;</b>
 <ul>
 <li>strength
 <li>time
 </ul>
 </dl>
 <hr>
 <h3>HTML</h3>
 eSpeak can speak HTML text directly, or text containing both SSML and HTML markup.<br>
 Any unrecognised tags are ignored.<p>
 The following tags case a sentence break.<br>
 <b>&lt;br&gt; &nbsp;
 &lt;li&gt; &nbsp;
 &lt;img&gt; &nbsp;
 &lt;td&gt; &nbsp;
 </b><p>
 The following tags case a paragraph break.<br>
 <b>&lt;h1&gt; &nbsp;
 &lt;h2&gt; &nbsp;
 &lt;h3&gt; &nbsp;
 &lt;h4&gt; &nbsp;
 &lt;hr&gt; &nbsp;
 </b><p>
 </body>
 </html>
--- a/docs/voices.html
+++ b/docs/voices.html
@@ -0,0 +1,243 @@
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 <html>

 <head>
  <title>eSpeak: Voice Files</title>
  <meta name="GENERATOR" content="Quanta Plus">
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
 </head>
 <body>
 <A href="index.html">Back</A>
 <hr>
 <h2>5. VOICES</h2>
 <hr>
 <h3>5.1 Voice Files</h3>
 A Voice file specifies a language (and possibly a language variant or dialect) together with various attributes that affect the characteristics of the voice quality and how the language is spoken.<p>
 Voice files are placed in the <code>espeak-data/voices</code> directory, or within subdirectories in there.<p>
 The available voice files can be listed by:<pre>
   espeak --voices
 or
   espeak --voices=&lt;language&gt;</pre>
 <hr>
 <h3>5.2 Contents of Voice Files</h3>
 The <strong>language</strong> attribute is mandatory.  All the other attributes are optional.
 <p>
 <h4>Identification Attributes</h4>
 <ul>
 <dl>
 <dt>
 <strong>name &nbsp;&lt;name&gt;</strong><br>
 <dd>A name given to this voice.
 <p>
 <dt>
 <strong>language &nbsp;&lt;language code&gt; [&lt;priority&gt;]</strong><br>
 <dd>This attribute should appear before the other attributes which are listed below.<p>
 It selectes the default behaviour and characteristics for the language, and sets default values for
 "phonemes", "dictionary" and other attributes. The &lt;language code&gt; should be a two-letter ISO 639-1 language code.  One or more language variant codes may be appended, separated by hyphens.  (eg.  en-uk-north).<p>
 The optional &lt;priority&gt; value gives the preference of this voice compared with others for the specified language.  A low value indicates a more preferred voice.  The default value is 5.<p>
 More than one <strong>language</strong> line may be present.  A voice may be selected for other related languages (variants which have the same initial 2 letter language code as the specified language), but it will be less preferred for these.  Different language variants may be specified by additional <strong>language</strong> lines in order to indicate that this is a preferred voice for them also.  Eg.<pre>
   language en-uk-north
   language en</pre>
 indicates that this is voice is for the "en-uk-north" dialect, but it is also a main choice when a general "en" language is specified.  Without the second <strong>language</strong> line, it would be disfavoured for "en" for being a more specialised voice.
 <p>
 <dt>
 <strong>gender &nbsp;&lt;gender&gt; [&lt;age&gt;]</strong><br>
 <ul>&lt;gender&gt may be  male, female, or unknown.<br>
 &lt;age&gt; is optional and gives an age in years.
 </dl>
 </ul>
 <h4>Voice Attributes</h4>
 <ul>
 <dl>
 <dt>
 <strong>pitch &nbsp;&lt;base&gt; &lt;range&gt;</strong><br>
 <dd>   Two integer values.
   The first gives a base pitch to the voice (value in Hz)
   The second controls the range of pitches used by the voice. Setting
   it equal to the base pitch will give a monotone. The default values are 82 118.
 <p>
 <dt>
 <strong>formant &nbsp;&lt;number&gt; &lt;frequency&gt; &lt;strength&gt; &lt;width&gt;</strong><br>
 <dd>   Systematically adjusts the frequency, strength, and width of the
   resonance peaks of the voice.  Values are percentages of the
   default values.  Changing these affects the tone/quality of the voice.
 <ul>
   <li>Formants 1,2,3 are the standard three formants which define vowels.</li>
   <li>Formant 0 is used to give a low frequency component to the sounds, of
      frequency lower than F1.</li>
   <li>Formants 4,5 are higher than F3.  They affect the quality of the voice.</li>
   <li>Formants 6,7,8 are weak, high frequency, additions to vowels to give
      a clearer sound.</li>
 </ul>
 <p>
 <dt>
 <strong>echo &nbsp;&lt;delay&gt; &lt;amplitude&gt;</strong><br>
 <dd>   Parameter 1 gives the delay in mS  (0 to 250mS).<br>
   Parameter 2 gives the echo amplitude (0 to 100).<br>

   Adding some echo can give a clearer or more interesting sound,
   especially when listening through a domestic stereo sound system,
   rather than small computer speakers.
 <dt>
 <strong>tone</strong><br>
 <dd>  Controls the tone of the sound.<br>
 <strong>tone</strong> is followed by up to 4 pairs of &lt;frequency&gt; &lt;amplitude&gt; which define a frequency response graph.  Frequency is
 in Hz and amplitude is in the range 0 to 255. The default is:<p>
 <code> &nbsp; tone 600 170 &nbsp;1200 135 &nbsp;2000 110</code><p>
 This means that from frequency 0Hz to 600Hz the amplitude is 170. From
 600Hz to 1200Hz the amplitude decreases from 170 to 135, then decreases to 110 at 2000Hz
 and remains at 110 at higher frequencies.  This adjustment applies only to voiced sounds such as
 vowels and sonorant consonants (such as [n] and [l]). Unvoiced sounds such
 as [s] are unaffected.<p>
 This <strong>tone</strong> statement can also appear in <code>espeak-data/config</code>, in which case it applies to all voices which
 don't have their own <strong>tone</strong> statement.
 <p>
 <dt>
 <strong>flutter &nbsp;&lt;value&gt;</strong><br>
 <dd>   Default value: 2.<br>

   Adds pitch fluctuations to give a wavering or older-sounding voice.
   A large value (eg. 20) makes the voice sound "croaky".
 <p>
 <dt>
 <strong>roughness &nbsp;&lt;value&gt;</strong><br>
 <dd>   Default value: 2. Range 0 - 7<br>

   Reduces the amplitude of alternate waveform cycles in order to make the voice sound creaky.
 <p>
 <dt>
 <strong>voicing &nbsp;&lt;value&gt;</strong><br>
 <dd>   Default value: 100.<br>

   Adjusts the strength of formant-synthesized sounds (vowels and sonorant consonants).
 <p>
 <dt>
 <strong>breath &nbsp;&lt;up to 8 integer values&gt;</strong><br>
 <dd>   Default values: 0.<br>

   Adds noise which corresponds to the formant frequency peaks.  The values give the strength
   of noise for each formant peak (formants 1 to 8).
 <p>
   Use together with a low or zero value of the <strong>voicing</strong> attribute to make a "wisper".
   For example:<br>
   <code>breath &nbsp; 75 75 60 40 15 10<br>
         breathw &nbsp;150 150 200 200 400 400<br>
         voicing &nbsp;18<br>
         flutter &nbsp;20<br>
         formant &nbsp; 0 100 0 100 &nbsp; // remove formant 0
   </code>

 <p>
 <dt>
 <strong>breathw &nbsp;&lt;up to 8 integer values&gt;</strong><br>
 <dd> 
   These values give bandwidths of the noise peaks of the <strong>breath</strong> attribute.  If <strong>breathw</strong> values are not given, then suitable default values will be used.
 <p>
 </dl>
 </ul>
 <h4>Language Attributes</h4>
 <ul>
 <dl>
 <p>
 <dt>
 <strong>phonemes &nbsp;&lt;name&gt;</strong><br>
 <dd>Specifies which set of phonemes to use from those contained in the
   phontab, phonindex, and phondata data files.
   This is a <strong>phonemetable</strong> name as given in the "phoneme" source file.
 <p>
   This parameter is usually not needed as it is set by default to the first two letters of the "language" parameter.
   However, different voices of the same language can use different phoneme sets, to give different accents.
 </dd>
 <dt>
 <strong>dictionary &nbsp;&lt;name&gt;</strong><br>
 <dd>   Specifies which pair of dictionary files to use.  eg. "english"
   indicates that <em>speak-data/en_dict</em> should
   be used to translate from words to phonemes.  This parameter is usually
   not needed as it is set by default to the first two letters of "language" parameter.</dd>
 <p>
 <dt>
 <strong>dictrules &nbsp;&lt;list of rule numbers&gt;</strong><br>
 <dd>
 Gives a list of conditional dictionary rules which are applied for this voice.  Rule numbers are in the range 0 to 31 and are specific to a language.  They can apply to rules in the langauge's <b>_rules</b> dictionary file and also its <b>_list</b> exceptions list.
 See <a href="dictionary.html">dictionary.html</a>.
 </dd>
 <p>
 <dt>
 <strong>replace &nbsp;&lt;flags&gt; &lt;phoneme&gt; &lt;replacement phoneme&gt;</strong><br>
 <dd>   Replace a phoneme by another whenever it occurs.<p>
   &lt;replacement phoneme&gt; may be NULL.<p>
   Flags: bit 0:  replacement only occurs on the final phoneme of a word.<br>
   Flags: bit 1:  replacement doesn't occur in stressed syllables.<br>
   eg.
 <pre>
      replace  0  h  NULL      // drops h's
      replace  0  V  U         // replaces vowel in 'strut' by that in 'foot'
                               // as occurs in northern British English
      replace  3  N  n         // change 'fishing' to 'fishin' etc.
                               // (only the last phoneme of a word, only in unstressed syllables)
 </pre>
   The phoneme mnemonics can be defined for each language, but some are listed in <A href="phonemes.html">phonemes.html</A>
 </dd>
 <p>
 <dt>
 <strong>stressLength &nbsp;&lt;8 integer values&gt;</strong><br>
 <dd>   Eight integer parameters.  These control the relative lengths of the vowels in
   stressed and unstressed syllables.
 <ul>
 <li>      0 &nbsp; unstressed
 </li><li>      1 &nbsp; diminished. Its use depends on the language. In English it's used for unstressed syllables within multisyllabic words. In Spanish it's used for unstressed final syllables.
 </li><li>      2 &nbsp; secondary stress
 </li><li>      3 &nbsp; words marked as "unstressed" in the dictionary
 </li><li>      4 &nbsp; &nbsp;  not currently used
 </li><li>      5 &nbsp; &nbsp;  not currently used
 </li><li>      6 &nbsp; stressed syllable (the main syllable in stressed words)
 </li><li>      7 &nbsp; tonic syllable (by default, the last stressed syllable in the clause)
 </li></ul>
 </dd>
 <p>
 <dt>
 <strong>stressAdd &nbsp;&lt;8 integer values&gt;</strong><br>
 <dd>   Eight integer parameters.  These are added to the voice's corresponding stressLength values.  They are used in the voice variant files in <code>espeak-data/voices/!v</code> to give some variety.  Negative values may be used.</dd>
 <p>
 <dt>
 <strong>stressAmp &nbsp;&lt;8 integer values&gt;</strong><br>
 <dd>   Eight integer parameters.  These control the relative amplitudes of the vowels in
   stressed and unstressed syllables (see stressLength above).
   The general default values are:  16, 16, 20, 20, 20, 24, 24, 22, although these defaults may be different for particular languages.</dd>
 <p>
 <dt>
 <strong>intonation &nbsp;&lt;param1&gt; &lt;param2&gt;</strong><br>
 <dd>   (for further development)<br>


 </dd>
 <p>
 <dt>
 <strong>charset &nbsp;&lt;param1&gt;</strong><br>
 <dd>
 The ISO 8859 character set number. (not all are implemented).
 </dd>
 <p>
 Additional attributes are available to set various internal options which control how language is processed.  These would normally be set in the program code rather than in a voice file.
 <p>
 <dt>
 <strong>stressrule &nbsp;&lt;param1&gt; &lt;param2&gt; &lt;param3&gt; &lt;param4&gt;</strong><br>
 <dd>
 Controls how different stress levels are applied to the syllables of a word.
 </dd>
 </ul>
 <hr>
 <h3>5.3 Voice Files Provided</h3>
 A number of Voice files are provided in the <code>espeak-data/voices</code> directory.
 You can select one of these with the <strong>-v &lt;voice filename&gt;</strong> parameter to the
 speak command.
 <p>
 <dl>
 <dt>
 <strong>default</strong><br>
 <dd>   This voice is used if none is specified in the speak command.  Copy your preferred voice to "default" so you can use the speak command without the need to specify a voice.</dd>
 </dl>
 For a list of voices provided for English and other languages see <a href="languages.html">Languages</a>.

 </body>
 </html>
--- a/phsource/compile_report
+++ b/phsource/compile_report
@@ -1,6 +1,7 @@
 34 phoneme tables
 35 phoneme tables
          new total
    base  96   96
   base2  24  114
      en  53  144
    en_n  29  144
   en_us  37  144
@@ -22,12 +23,12 @@
      cs   5  124
      hr  25  133
      ru  36  124
      it  25  115
      es   6  115
      pt  28  132
   pt_pt  20  132
      ro  36  139
      el   8  115
      it  12  114
      es   6  114
      pt  28  131
   pt_pt  20  131
      ro  36  138
      el   8  114
      sv  25  118
      no  28  122
      is  32  121
@@ -45,16 +46,16 @@
  2  b/bo           base hi
  2  b/bu           base hi
  2  b/xb           base hi
 15  d/d            base fi fr hi hu pl hr ru it ro el sw
 16  d/d_           base fi fr hi hu pl hr ru it ro el sw
 15  d/d            base base2 fi fr hi hu pl hr ru ro el sw
 16  d/d_           base base2 fi fr hi hu pl hr ru ro el sw
  1  d/d_dnt        hi
 17  d/dr           base fi fr hi hu pl hr ru it ro el sw
 17  d/dr           base base2 fi fr hi hu pl hr ru ro el sw
  1  d/tap          ro
  2  d/tap1         base
  2  d/tap2         base
  2  d/tap3         base
  1  d/tap_i        ro
 13  d/xd           base fi fr hi hu pl hr ru it ro el sw
 13  d/xd           base base2 fi fr hi hu pl hr ru ro el sw
  1  d/xd_dnt       hi
  2  d/xd_pzd       pl ru
  1  d/x_tap        base
@@ -240,25 +241,25 @@
  1  ufric/x_hr     hr
  1  ustop/c        base
  5  ustop/k        base en fr hi sw
  9  ustop/k_       base en fi fr hi hu it el sw
 12  ustop/ki       base en af fi fr hi hu it is sw
 10  ustop/kl       base en fi fr hi hu it el sw
 11  ustop/kr       base en fi fr hi hu it el sw
  9  ustop/k_unasp  base fi hi hu it el
  9  ustop/k_       base base2 en fi fr hi hu el sw
 12  ustop/ki       base base2 en af fi fr hi hu is sw
 10  ustop/kl       base base2 en fi fr hi hu el sw
 11  ustop/kr       base base2 en fi fr hi hu el sw
  9  ustop/k_unasp  base base2 fi hi hu el
  2  ustop/p        base hi
  2  ustop/p_       base hi
  1  ustop/percus10 base
  8  ustop/pl       base fi hi hu hr it ro
  8  ustop/pr       base fi hi hu it ro
  7  ustop/p_unasp  base fi hi hu hr it ro
  6  ustop/p_unasp_ fi hi hu hr it ro
  8  ustop/pl       base base2 fi hi hu hr ro
  8  ustop/pr       base base2 fi hi hu ro
  7  ustop/p_unasp  base base2 fi hi hu hr ro
  6  ustop/p_unasp_ base2 fi hi hu hr ro
  3  ustop/t        base en hi
  6  ustop/t_       base en hi sk el
 27  ustop/t_dnt    base en fi fr hi hu pl hr ru it ro vi ++
 27  ustop/t_dnt    base base2 en fi fr hi hu pl hr ru ro vi ++
  2  ustop/t_dnt2   hi vi
  4  ustop/t_pzd    pl ru
  7  ustop/tr       base en hi ru
  8  ustop/ts       de eo hu pl ru it ro zh_yue
  8  ustop/ts       base2 de eo hu pl ru ro zh_yue
  2  ustop/tsh      base zh_yue
  2  ustop/tsh_     base zh_yue
  3  ustop/t_short  hi sk el
@@ -276,7 +277,7 @@
  1  vdiph2/ii@     en
  1  vdiph2/ii@_2   en_wm
  1  vdiph2/ii@_3   vi
  3  vdiph2/iu      it vi zh_yue
  3  vdiph2/iu      base2 vi zh_yue
  1  vdiph2/iu_2    fi
  1  vdiph2/iu_3    af
  2  vdiph2/iu_4    cy
@@ -301,7 +302,7 @@
  1  vdiph/aau_4    vi
  1  vdiph/ae       fr_ca
  1  vdiph/ae_2     en_n
  5  vdiph/ai       eo hr it pt vi
  6  vdiph/ai       base2 eo hr it pt vi
  3  vdiph/ai_2     en_us cy
  1  vdiph/ai_3     no
  1  vdiph/ai_4     af
@@ -313,7 +314,7 @@
  1  vdiph/au#      en_sc
  2  vdiph/au_2     en_us zh_yue
  1  vdiph/au_3     en_rp
  5  vdiph/au_4     cy eo sk it is
  6  vdiph/au_4     base2 cy eo sk it is
  1  vdiph/ee-e     hi
  3  vdiph/eei      en pt vi
  2  vdiph/eei_2    eo fi
@@ -323,9 +324,9 @@
  1  vdiph/eeu_2    pt_pt
  2  vdiph/eeu_3    en_n en_wm
  1  vdiph/eey      fi
  6  vdiph/ei       nl it pt is vi zh_yue
  6  vdiph/ei       base2 nl pt is vi zh_yue
  1  vdiph/ei_2     hr
  8  vdiph/eu       en cy eo nl sk it pt vi
  8  vdiph/eu       base2 en cy eo nl sk pt vi
  1  vdiph/eu_2     fi
  2  vdiph/&i       fi hi
  3  vdiph/@i_2     af cy
@@ -335,7 +336,7 @@
  1  vdiph/@i_4     vi
  2  vdiph/ii       hr ro
  1  vdiph/i#i      ro
  4  vdiph/oi       en_wm de it vi
  5  vdiph/oi       base2 en_wm de it vi
  1  vdiph/oi_2     af
 10  vdiph/ooi      en en_n en_us en_rp cy eo fi no zh_yue
  1  vdiph/ooi_2    af
@@ -347,7 +348,7 @@
  1  vdiph/@u_2     en_rp
  1  vdiph/@u_3     ro
  3  vdiph/@u_en    en vi
  6  vdiph/ui       en eo fi it vi zh_yue
  6  vdiph/ui       base2 en eo fi vi zh_yue
  1  vdiph/u-i      vi
  1  vdiph/ui_2     af
  2  vdiph/ui_3     cy
@@ -408,7 +409,7 @@
 14  vowel/@        base en en_us en_rp cy hi hr
  1  vowel/@-       base
  4  vowel/&        en_rp fi hi sv
  4  vowel/0        en hi it pt
  4  vowel/0        base2 en hi pt
  4  vowel/0_2      en_n en_wm pt_pt sw
  5  vowel/0_3      en_us en_sc en_rp hu
  1  vowel/@_2      fr
@@ -420,8 +421,8 @@
  2  vowel/8_2      en_us sv
  1  vowel/8_3      zh_yue
 11  vowel/a        en_n cy de hu nl pl sk hr
  4  vowel/a#       en_sc it pt
  6  vowel/a_2      eo it pt pt_pt ro vi
  5  vowel/a#       base2 en_sc it pt
  7  vowel/a_2      base2 eo it pt pt_pt ro vi
  4  vowel/a#_2     hr sv is sw
  6  vowel/a_3      en_sc cs is
 12  vowel/a#_3     en en_n en_us en_wm de hi ru pt_pt vi zh_yue
@@ -437,27 +438,27 @@
  1  vowel/aa_7     nl
  4  vowel/a_en     en fr
  1  vowel/@_bck    hi
 13  vowel/e        en en_n af cy eo fr hu hr it pt pt_pt vi
 14  vowel/e        base2 en en_n af cy eo fr hu hr it pt pt_pt ++
  2  vowel/e#       en_sc
  6  vowel/e_2      en_sc de hi sv no
  1  vowel/e_3      hu
  1  vowel/e_5      en_sc
  3  vowel/ee       fr pl sv
  2  vowel/e_e      en_sc is
  7  vowel/ee_1     en en_n pl it sv no zh_yue
  7  vowel/ee_1     base2 en en_n pl sv no zh_yue
  3  vowel/ee_2     en cy nl
  1  vowel/ee#_2    sv
  3  vowel/ee_3     af pt vi
  3  vowel/ee_6     en_n sk sv
 12  vowel/e_mid    en_rp en_wm fr_ca hi hu sk cs hr es pt_pt no is
 13  vowel/e_mid    en_rp en_wm fr_ca hi hu sk cs hr it es pt_pt no ++
 10  vowel/e_mid2   af de fi nl sk ro el sw
  1  vowel/@_fnt    hr
  1  vowel/@_hgh    no
 18  vowel/i        en_n en_rp en_wm cy eo fr hu pl it pt pt_pt ro ++
 19  vowel/i        base2 en_n en_rp en_wm cy eo fr hu pl it pt pt_pt ++
  1  vowel/i#       cy
  3  vowel/i_2      de nl sv
  2  vowel/i_3      af sk
  4  vowel/i_4      fi hu it is
  3  vowel/i_4      fi hu is
  5  vowel/i_5      en_sc
  3  vowel/i#_5     pt_pt ro
  1  vowel/i_6      hr
@@ -469,7 +470,7 @@
  2  vowel/ii_4     en_rp
  6  vowel/ii_en    en en_n
  5  vowel/@_low    hi ro no
  9  vowel/o        en en_wm de fr hi it pt_pt sv
 10  vowel/o        base2 en en_wm de fr hi it pt_pt sv
  4  vowel/o_2      cy hi hu no
  2  vowel/o-_2     en_n en_wm
  2  vowel/o_3      en_sc
@@ -505,9 +506,9 @@
  1  vowel/u_5      sw
  3  vowel/u_6      en_rp pt_pt
  1  vowel/u_7      vi
 15  vowel/u_bck    cy fi fr_ca hi hu nl pl sk hr it pt sv ++
 16  vowel/u_bck    base2 cy fi fr_ca hi hu nl pl sk hr it pt ++
  2  vowel/uu       en en_wm
  2  vowel/uu_2     de it
  2  vowel/uu_2     base2 de
  1  vowel/uu_3     af
  2  vowel/uu_4     fi sv
  7  vowel/uu_bck   fr_ca hi hu pt no zh_yue
@@ -601,7 +602,7 @@
  1  w/_w           base
  1  w/w_           base
  1  w/w@           base
  8  w/w2           pl sk it pt_pt
  8  w/w2           base2 pl sk pt_pt
  1  w/wa           base
  1  w/we           base
  3  w/wi           base vi zh_yue
@@ -612,8 +613,8 @@
 20  x/b            base hi ro is
  1  x/b_           base
 10  x/d            base en_us hi hr el is
 14  x/d_           base fi fr hi pl hr ru it ro el is sw
 23  x/d_dnt        base fi fr hi hu pl ru it ro sw
 14  x/d_           base base2 fi fr hi pl hr ru ro el is sw
 23  x/d_dnt        base base2 fi fr hi hu pl ru ro sw
  6  x/d_pzd        pl ru
  4  x/dzh          base hi
  5  x/dzh_         base hi ru
--- a/phsource/ph_base2
+++ b/phsource/ph_base2
@@ -0,0 +1,209 @@

 //====================================================
 //  Italian
 //====================================================

 phoneme : //  Lengthen previous vowel by "length"
  virtual
  length 70
 endphoneme




 phoneme a
  vowel starttype (a) endtype (a)
  length 180
  formants vowel/a_2
  reduceto a/  4
 endphoneme


 phoneme a/
  vowel starttype (@) endtype (@)
  length 180
  formants vowel/a#
 endphoneme


 phoneme e
  vowel starttype (e) endtype (e)
  length 170
  formants vowel/e
 endphoneme


 phoneme E
  vowel starttype (e) endtype (e)
  length 170
  formants vowel/ee_1
  reduceto  e  4        // [E] only in stressed syllables
 endphoneme


 phoneme i
  vowel starttype (i) endtype (i)
  length 150
  formants vowel/i
  linkout ;
 endphoneme


 phoneme o
  vowel starttype (o) endtype (o)
  length 170
  formants vowel/o
 endphoneme


 phoneme O
  vowel starttype (o) endtype (o)
  length 170
  formants vowel/0
  reduceto  o  4        // [O] only in stressed syllables
 endphoneme


 phoneme u
  vowel starttype (u) endtype (u)
  length 160
  formants vowel/u_bck
 endphoneme


 phoneme U
  vowel starttype (u) endtype (u)
  length 160
  formants vowel/uu_2
 endphoneme


 phoneme aU
  vowel starttype (a) endtype (u)
  length 270
  formants vdiph/au_4
 endphoneme


 phoneme eU
  vowel starttype (e) endtype (u)
  length 260
  formants vdiph/eu
 endphoneme


 phoneme iU
  vowel starttype (i) endtype (u)
  length 240
  formants vdiph2/iu
 endphoneme


 phoneme aI
  vowel starttype (a) endtype (i)
  length 250
  formants vdiph/ai
 endphoneme


 phoneme eI
  vowel starttype (e) endtype (i)
  length 250
  formants vdiph/ei
 endphoneme


 phoneme oI
  vowel starttype (o) endtype (i)
  length 240
  formants vdiph/oi
 endphoneme


 phoneme uI
  vowel starttype (u) endtype (i)
  length 240
  formants vdiph/ui
 endphoneme



 // CONSONANTS
 //===========



 phoneme w2
  starttype w  endtype w
  liquid
  length 100
  beforenotvowel w/
  lengthmod 7
  formants w/w2
  after _  w/w2
 endphoneme


 phoneme p
  vls blb stop
  vowelin  f1=0  f2=1000 -50 -100  f3=-200 80
  vowelout f1=0  f2=1000 -500 -350  f3=-300 80 rms=30
  lengthmod 2
  wave ustop/p_unasp
  before _ ustop/p_unasp_%80
  before r ustop/pr
  before l ustop/pl
  switchvoicing b
 endphoneme

 phoneme ts
  vls alv afr sibilant
  vowelin f1=0  f2=1700 -300 300  f3=-100 80
  vowelout f1=0 f2=1700 -300 250  f3=-100 80  rms=20
  lengthmod 2
  wave ustop/ts
 endphoneme


 phoneme t        // dental variant of /t/
  vls dnt stop
  vowelin f1=0  f2=1600 -300 300  f3=-100 80
  vowelout f1=0 f2=1600 -300 250  f3=-100 80  rms=20
  lengthmod 2
  wave  ustop/t_dnt%50
  before _ ustop/t_dnt%35
  switchvoicing d
 endphoneme



 phoneme d       // dental variant of /d/
  vcd dnt stop
  vowelin f1=2  f2=1500 -300 300  f3=-150 80
  vowelout f1=2 f2=1500 -300 300  f3=-150 80
  formants d/d+x/d_dnt%110
  before _ d/d_+x/d_
  before r d/dr+x/d_dnt%110
  after _ NULL
  after @ d/xd
  lengthmod 5
  switchvoicing t
 endphoneme



 phoneme k
  vls vel stop
  vowelin f1=0  f2=2300 200 400  f3=-100 80
  vowelout f1=0 f2=2300 300 400  f3=-100 80  rms=20
  lengthmod 2
  wave ustop/k_unasp%70   // weaker
  before _ ustop/k_
  before r ustop/kr
  before l ustop/kl
  before (i) ustop/ki
  switchvoicing g
 endphoneme



--- a/phsource/ph_italian
+++ b/phsource/ph_italian
@@ -3,13 +3,6 @@
 //  Italian
 //====================================================

 phoneme : //  Lengthen previous vowel by "length"
  virtual
  length 70
 endphoneme




 phoneme a
  vowel starttype (a) endtype (a)
@@ -36,7 +29,7 @@ endphoneme
 phoneme E
  vowel starttype (e) endtype (e)
  length 170
  formants vowel/ee_1
  formants vowel/e_mid
  reduceto  e  4        // [E] only in stressed syllables
 endphoneme

@@ -48,13 +41,6 @@ phoneme i
  linkout ;
 endphoneme

 phoneme i2
  vowel starttype (i) endtype (i)
  length 150
  formants vowel/i_4
  linkout ;
 endphoneme


 phoneme o
  vowel starttype (o) endtype (o)
@@ -66,18 +52,12 @@ endphoneme
 phoneme O
  vowel starttype (o) endtype (o)
  length 170
  formants vowel/0
 //  formants vowel/oo_4
 //  formants vowel/0
  reduceto  o  4        // [O] only in stressed syllables
 endphoneme


 phoneme U
  vowel starttype (u) endtype (u)
  length 160
  formants vowel/uu_2
 endphoneme


 phoneme u
  vowel starttype (u) endtype (u)
  length 160
@@ -92,19 +72,6 @@ phoneme aU
 endphoneme


 phoneme eU
  vowel starttype (e) endtype (u)
  length 260
  formants vdiph/eu
 endphoneme


 phoneme iU
  vowel starttype (i) endtype (u)
  length 240
  formants vdiph2/iu
 endphoneme


 phoneme aI
  vowel starttype (a) endtype (i)
@@ -113,13 +80,6 @@ phoneme aI
 endphoneme


 phoneme eI
  vowel starttype (e) endtype (i)
  length 250
  formants vdiph/ei
 endphoneme


 phoneme oI
  vowel starttype (o) endtype (i)
  length 230
@@ -127,90 +87,7 @@ phoneme oI
 endphoneme


 phoneme uI
  vowel starttype (u) endtype (i)
  length 240
  formants vdiph/ui
 endphoneme



 // CONSONANTS
 //===========



 phoneme w2
  starttype w  endtype w
  liquid
  length 100
  beforenotvowel w/
  lengthmod 7
  formants w/w2
  after _  w/w2
 endphoneme


 phoneme p
  vls blb stop
  vowelin  f1=0  f2=1000 -50 -100  f3=-200 80
  vowelout f1=0  f2=1000 -500 -350  f3=-300 80 rms=30
  lengthmod 2
  wave ustop/p_unasp
  before _ ustop/p_unasp_%80
  before r ustop/pr
  before l ustop/pl
  switchvoicing b
 endphoneme

 phoneme ts
  vls alv afr sibilant
  vowelin f1=0  f2=1700 -300 300  f3=-100 80
  vowelout f1=0 f2=1700 -300 250  f3=-100 80  rms=20
  lengthmod 2
  wave ustop/ts
 endphoneme


 phoneme t        // dental variant of /t/
  vls dnt stop
  vowelin f1=0  f2=1600 -300 300  f3=-100 80
  vowelout f1=0 f2=1600 -300 250  f3=-100 80  rms=20
  lengthmod 2
  wave  ustop/t_dnt%50
  before _ ustop/t_dnt%35
  switchvoicing d
 endphoneme



 phoneme d       // dental variant of /d/
  vcd dnt stop
  vowelin f1=2  f2=1500 -300 300  f3=-150 80
  vowelout f1=2 f2=1500 -300 300  f3=-150 80
  formants d/d+x/d_dnt%110
  before _ d/d_+x/d_
  before r d/dr+x/d_dnt%110
  after _ NULL
  after @ d/xd
  lengthmod 5
  switchvoicing t
 endphoneme



 phoneme k
  vls vel stop
  vowelin f1=0  f2=2300 200 400  f3=-100 80
  vowelout f1=0 f2=2300 300 400  f3=-100 80  rms=20
  lengthmod 2
  wave ustop/k_unasp%70   // weaker
  before _ ustop/k_
  before r ustop/kr
  before l ustop/kl
  before (i) ustop/ki
  switchvoicing g
 endphoneme



--- a/phsource/phonemes
+++ b/phsource/phonemes
@@ -1125,6 +1125,8 @@ endphoneme
 // ADDITIONAL PHONEME TABLES
 //*******************************************************************

 phonemetable base2 base
 include ph_base2

 phonemetable en base
 include ph_english
@@ -1190,22 +1192,22 @@ phonemetable ru base
 include ph_russian


 phonemetable it base
 phonemetable it base2
 include ph_italian

 phonemetable es it
 phonemetable es base2
 include ph_spanish

 phonemetable pt it
 phonemetable pt base2
 include ph_pt_brazil

 phonemetable pt_pt pt
 include ph_portugual

 phonemetable ro it
 phonemetable ro base2
 include ph_romanian

 phonemetable el it
 phonemetable el base2
 include ph_greek

 phonemetable sv base
--- a/src/extras.cpp
+++ b/src/extras.cpp
@@ -42,10 +42,11 @@
 FILE *f_wavtest = NULL;
 FILE *f_events = NULL;

 int OpenWaveFile3(const char *path, int rate)
 /******************************************/
 FILE *OpenWaveFile3(const char *path)
 /***********************************/
 {
 	int *p;
 	FILE *f;

 	static unsigned char wave_hdr[44] = {
 		'R','I','F','F',0,0,0,0,'W','A','V','E','f','m','t',' ',
@@ -54,56 +55,54 @@ int OpenWaveFile3(const char *path, int rate)


 	if(path == NULL)
 		return(2);
 		return(NULL);

 	// set the sample rate in the header
 	p = (int *)(&wave_hdr[24]);
 	p[0] = rate;
 	p[1] = rate * 2;
 	p[0] = samplerate;
 	p[1] = samplerate * 2;

 	f_wavtest = fopen(path,"wb");
 	f = fopen(path,"wb");

 	if(f_wavtest != NULL)
 	if(f != NULL)
 	{
 		fwrite(wave_hdr,1,sizeof(wave_hdr),f_wavtest);
 		return(0);
 		fwrite(wave_hdr,1,sizeof(wave_hdr),f);
 	}
 	return(1);
 	return(f);
 }   //  end of OpenWaveFile




 void CloseWaveFile3(int rate)
 /******************/
 void CloseWaveFile3(FILE *f)
 /*************************/
 {
   unsigned int pos;
   static int value;


   fflush(f_wavtest);
   pos = ftell(f_wavtest);
   fflush(f);
   pos = ftell(f);

   value = pos - 8;
   fseek(f_wavtest,4,SEEK_SET);
   fwrite(&value,4,1,f_wavtest);
   fseek(f,4,SEEK_SET);
   fwrite(&value,4,1,f);

 	value = rate;
 	fseek(f_wavtest,24,SEEK_SET);
 	fwrite(&value,4,1,f_wavtest);
 	value = samplerate;
 	fseek(f,24,SEEK_SET);
 	fwrite(&value,4,1,f);

 	value = rate*2;
 	fseek(f_wavtest,28,SEEK_SET);
 	fwrite(&value,4,1,f_wavtest);
 	value = samplerate*2;
 	fseek(f,28,SEEK_SET);
 	fwrite(&value,4,1,f);

   value = pos - 44;
   fseek(f_wavtest,40,SEEK_SET);
   fwrite(&value,4,1,f_wavtest);
   fseek(f,40,SEEK_SET);
   fwrite(&value,4,1,f);

   fclose(f_wavtest);
   f_wavtest = NULL;
   fclose(f);

 } // end of CloseWaveFile2
 } // end of CloseWaveFile3


 int TestUriCallback(int type, const char *uri, const char *base)
@@ -126,7 +125,8 @@ if(f_wavtest == NULL) return(0);
 	if(wav == NULL)
 	{
 fprintf(f_events,"Finished\n");
 		CloseWaveFile3(samplerate);
 		CloseWaveFile3(f_wavtest);
 		f_wavtest = NULL;
 		fclose(f_events);
 		return(0);
 	}
@@ -1183,8 +1183,7 @@ void TestTest(int control)
 //CharsetToUnicode("ISO-8859-4");
 //CharsetToUnicode("ISCII");


 //return;
 return;

 if(control==2)
 {
@@ -1206,7 +1205,7 @@ if(control==2)
 	textbuf[ix] = 0;
 	fclose(f);

 	OpenWaveFile3("/home/jsd1/speechdata/text/test.wav",samplerate);
 	f_wavtest = OpenWaveFile3("/home/jsd1/speechdata/text/test.wav");
 	f_events = fopen("/home/jsd1/speechdata/text/events","w");
 	fprintf(f_events,"Audio Text Length Type Id\n");

--- a/src/spectseq.cpp
+++ b/src/spectseq.cpp
@@ -962,7 +962,8 @@ void SpectSeq::MakeWave(int start, int end, PitchEnvelope &pitch)
 //	}

 	len_samples = int(((total_length * lfactor + 50) * samplerate) / 1000);
 	SetPitch(len_samples,pitch.env,pitch.pitch1-pbase,pitch.pitch2-pbase);
 //	SetPitch(len_samples,pitch.env,pitch.pitch1-pbase,pitch.pitch2-pbase);
 	SetPitch(len_samples,pitch.env,9,44);

 	fname_speech = WavFileName();
 	OpenWaveFile2(fname_speech);
@@ -1038,7 +1039,8 @@ void SpectFrame::MakeWave(int control, PitchEnvelope &pitche, int amplitude, int

 	len_samples = (length * samplerate) / 1000;
 	pbase = voice->pitch_base >> 12;
 	SetPitch(len_samples + 50,pitche.env,pitche.pitch1-pbase,pitche.pitch2-pbase);
 //	SetPitch(len_samples + 50,pitche.env,pitche.pitch1-pbase,pitche.pitch2-pbase);
 	SetPitch(len_samples + 50,pitche.env,9,44);

 	fname_speech = WavFileName();
 	if(OpenWaveFile2(fname_speech) != 0)
--- a/src/synthdata.cpp
+++ b/src/synthdata.cpp
@@ -35,7 +35,7 @@
 #include "translate.h"
 #include "wave.h"

 const char *version_string = "1.26.03  12.Jun.07";
 const char *version_string = "1.26.04  13.Jun.07";
 const int version_phdata  = 0x012601;

 int option_device_number = -1;
--- a/src/synthesize.h
+++ b/src/synthesize.h
@@ -81,6 +81,15 @@ typedef struct {
 	DOUBLEX right_inc;
 }  wavegen_peaks_t;

 typedef struct {
 	double a;
 	double b;
 	double c;
 	double x1;
 	double x2;
 }  RESONATOR;


 typedef struct {
   short length;
   unsigned char  n_frames;
@@ -240,3 +249,5 @@ int DoSample(PHONEME_TAB *ph1, PHONEME_TAB *ph2, int which, int length_mod, int
 int DoSpect(PHONEME_TAB *this_ph, PHONEME_TAB *prev_ph, PHONEME_TAB *next_ph,
 		int which, PHONEME_LIST *plist, int modulation);
 int PauseLength(int pause);

 void InitBreath(void);
--- a/src/translate.cpp
+++ b/src/translate.cpp
@@ -1187,8 +1187,8 @@ int Translator::TranslateWord2(char *word, WORD_TAB *wtab, int pre_pause, int ne
 			if(sylimit & 0x100)
 			{
 				// only if the second word has $alt attribute
 				flags2 = translator->TranslateWord(p2+1, 0, wtab+1);
 				strcpy(ph_buf,word_phonemes);
 				flags2 = translator->TranslateWord(p2+1, 0, wtab+1);
 				if((flags2 & FLAG_ALT_TRANS) == 0)
 				{
 					ok = 0;
--- a/src/voice.h
+++ b/src/voice.h
@@ -36,6 +36,7 @@ typedef struct {
 	int echo_amp;
 	int n_harmonic_peaks;  // highest formant which is formed from adding harmonics
 	int peak_shape;        // alternative shape for formant peaks (0=standard 1=squarer)
 	int voicing;          // 100% = 64, level of formant-synthesized sound

 	// parameters used by Wavegen
 	int freq[N_PEAKS];    // 100% = 256
@@ -47,6 +48,9 @@ typedef struct {
 	int height2[N_PEAKS];  // 100% = 256
 	int width2[N_PEAKS];   // 100% = 256

 	int breath[N_PEAKS];  // amount of breath for each formant. breath[0] indicates whether any are set.
 	int breathw[N_PEAKS];  // width of each breath formant

 	// This table provides the opportunity for tone control.
 	// Adjustment of harmonic amplitudes, steps of 8Hz
 	// value of 128 means no change
--- a/src/voices.cpp
+++ b/src/voices.cpp
@@ -84,23 +84,26 @@ char voice_name[40];
 #define V_ROUGHNESS  11
 #define V_CLARITY    12
 #define V_TONE       13
 #define V_VOICING    14
 #define V_BREATH     15
 #define V_BREATHW    16

 // these override defaults set by the translator
 #define V_WORDGAP    15
 #define V_INTONATION 16
 #define V_STRESSLENGTH  17
 #define V_STRESSAMP  18
 #define V_STRESSADD  19
 #define V_DICTRULES   20
 #define V_STRESSRULE  21
 #define V_CHARSET     22
 #define V_NUMBERS     23
 #define V_OPTION      24

 #define V_MBROLA     25
 #define V_WORDGAP    17
 #define V_INTONATION 18
 #define V_STRESSLENGTH  19
 #define V_STRESSAMP  20
 #define V_STRESSADD  21
 #define V_DICTRULES   22
 #define V_STRESSRULE  23
 #define V_CHARSET     24
 #define V_NUMBERS     25
 #define V_OPTION      26

 #define V_MBROLA     27

 // these need a phoneme table to have been specified
 #define V_REPLACE    26
 #define V_REPLACE    28



@@ -133,6 +136,9 @@ static keywtab_t keyword_tab[] = {
 	{"roughness",  V_ROUGHNESS},
 	{"clarity",    V_CLARITY},
 	{"tone",       V_TONE},
 	{"voicing",    V_VOICING},
 	{"breath",     V_BREATH},
 	{"breathw",    V_BREATHW},
 	{"numbers",    V_NUMBERS},
 	{"option",     V_OPTION},
 	{"mbrola",     V_MBROLA},
@@ -352,7 +358,9 @@ static espeak_VOICE *ReadVoiceFile(FILE *f_in, const char *fname, const char*lea
 void VoiceReset(int tone_only)
 {//===========================
 // Set voice to the default values

 	int  pk;
 	static int breath_widths[N_PEAKS] = {0,200,200,400,400,400,600,600,600};

 	// default is:  pitch 82,118
 	voice->pitch_base =   0x49000;    // default, 73 << 12;
@@ -363,17 +371,21 @@ void VoiceReset(int tone_only)
 	voice->flutter = 64;
 	voice->n_harmonic_peaks = 5;
 	voice->peak_shape = 1;
 	voice->voicing = 64;
 #ifdef PLATFORM_RISCOS
 	voice->roughness = 1;
 #else
 	voice->roughness = 2;
 #endif

 	InitBreath();
 	for(pk=0; pk<N_PEAKS; pk++)
 	{
 		voice->freq[pk] = 256;
 		voice->height[pk] = 256;
 		voice->width[pk] = 256;
 		voice->breath[pk] = 0;
 		voice->breathw[pk] = breath_widths[pk];  // default breath formant woidths

 		// adjust formant smoothing depending on sample rate
 		formant_rate[pk] = (formant_rate_22050[pk] * 22050)/samplerate;
@@ -769,6 +781,19 @@ voice_t *LoadVoice(char *vname, int control)
 			}
 			break;

 		case V_VOICING:
 			if(sscanf(p,"%d",&value)==1)
 				voice->voicing = (value * 64)/100;
 			break;

 		case V_BREATH:
 				voice->breath[0] = Read8Numbers(p,&voice->breath[1]);
 			break;

 		case V_BREATHW:
 				voice->breathw[0] = Read8Numbers(p,&voice->breathw[1]);
 			break;

 		case V_MBROLA:
 			{
 				char name[40];
--- a/src/wavegen.cpp
+++ b/src/wavegen.cpp
@@ -49,7 +49,7 @@
 #include "sintab.h"



 #define PI  3.1415927
 #define PI2 6.283185307
 #define STEPSIZE  64                // 2.9mS at 22 kHz sample rate
 #define N_WAV_BUF   10
@@ -82,6 +82,9 @@ static int echo_tail;
 static int echo_amp = 0;
 static short echo_buf[N_ECHO_BUF];

 static int voicing;
 RESONATOR rbreath[N_PEAKS];

 static int harm_sqrt_n = 0;


@@ -124,6 +127,9 @@ static int cycle_samples;         // number of samples in a cycle at current pit
 static int cbytes;
 static int hf_factor;

 static double minus_pi_t;
 static double two_pi_t;


 unsigned char *out_ptr;
 unsigned char *out_start;
@@ -770,6 +776,7 @@ static void WavegenSetEcho(void)
 	int delay;
 	int amp;

 	voicing = wvoice->voicing;
 	delay = wvoice->echo_delay;
 	amp = wvoice->echo_amp;

@@ -824,6 +831,8 @@ int PeaksToHarmspect(wavegen_peaks_t *peaks, int pitch, int *htab, int control)
 	if(wvoice == NULL)
 		return(1);
 	hmax = (peaks[wvoice->n_harmonic_peaks].freq + peaks[wvoice->n_harmonic_peaks].right)/pitch;
 	if(hmax >= MAX_HARMONIC)
 		hmax = MAX_HARMONIC-1;

 	// restrict highest harmonic to half the samplerate
 	hmax_samplerate = (((samplerate * 19)/40) << 16)/pitch;   // only 95% of Nyquist freq
@@ -961,6 +970,110 @@ static void AdvanceParameters()



 static double resonator(RESONATOR *r, double input)
 {//================================================
 	double x;

 	x = r->a * input + r->b * r->x1 + r->c * r->x2;
 	r->x2 = r->x1;
 	r->x1 = x;

 return x;
 }



 static void setresonator(RESONATOR *rp, int freq, int bwidth, int init)
 {//====================================================================
 // freq    Frequency of resonator in Hz
 // bwidth  Bandwidth of resonator in Hz
 // init    Initialize internal data

 	double x;
 	double arg;

 	if(init)
 	{
 		rp->x1 = 0;
 		rp->x2 = 0;
 	}

   // x  =  exp(-pi * bwidth * t)
 	arg = minus_pi_t * bwidth;
 	x = exp(arg);

 	// c  =  -(x*x)
 	rp->c = -(x * x);

 	// b = x * 2*cos(2 pi * freq * t)

 	arg = two_pi_t * freq;
 	rp->b = x * cos(arg) * 2.0;

 	// a = 1.0 - b - c
 	rp->a = 1.0 - rp->b - rp->c;
 }  // end if setresonator



 void InitBreath(void)
 {//==================
 	int ix;

 	minus_pi_t = -PI / samplerate;
 	two_pi_t = -2.0 * minus_pi_t;

 	for(ix=0; ix<N_PEAKS; ix++)
 	{
 		setresonator(&rbreath[ix],2000,200,1);
 	}
 }  // end of InitBreath



 void SetBreath()
 {//=============
 	int pk;

 	if(wvoice->breath[0] == 0)
 		return;

 	for(pk=1; pk<N_PEAKS; pk++)
 	{
 		if(wvoice->breath[pk] != 0)
 		{
 			// breath[0] indicates that some breath formants are needed
 			// set the freq from the current ynthesis formant and the width from the voice data
 			setresonator(&rbreath[pk], peaks[pk].freq >> 16, wvoice->breathw[pk],0);
 		}
 	}
 }  // end of SetBreath


 #define getrandom(min,max) ((rand()%(int)(((max)+1)-(min)))+(min))

 int ApplyBreath(void)
 {//==================
 	int noise;
 	int ix;
 	int amp;
 	int value = 0;

 	noise = getrandom(-4095,4095);

 	for(ix=1; ix < N_PEAKS; ix++)
 	{
 		if((amp = wvoice->breath[ix]) > 0)
 		{
 			amp *= (peaks[ix].height >> 13);
 			value += int(resonator(&rbreath[ix],noise) * amp);
 		}
 	}
 	return (value);
 }



 static int Wavegen()
 {//=================
 	unsigned short waveph;
@@ -1015,6 +1128,7 @@ static int Wavegen()
 			hswitch ^= 1;
 			maxh2 = PeaksToHarmspect(peaks,pitch<<4,hspect[hswitch],1);

 			SetBreath();
 		}
 		else
 		if((samplecount & 0x07) == 0)
@@ -1150,6 +1264,17 @@ static int Wavegen()
 			h++;
 		}
 #endif

 		if(voicing != 64)
 		{
 			total = (total >> 6) * voicing;
 		}

 		if(wvoice->breath[0])
 		{
 			total +=  ApplyBreath();
 		}

 		// mix with sampled wave if required
 		z2 = 0;
 		if(mix_wavefile_ix < n_mix_wavefile)