The Android 4.x+ code has changed a lot since the initial import of the eyes-free codebase. A lot of bug fixes and improvements have been made to it. The eyes-free codebase had two different code paths: * one for ICS (4.0) or later, using the Java-based TTS APIs provided by the Android platform; * one for pre-ICS using an internal C++-based TTS API. Thus, any bug fixes or improvements would have to be done to both code bases if Android 2.x/3.x support is required. This is not maintainable. If pre-ICS support is to be re-added in the future, the plan will be to: * forward the C++-based APIs to the Java-based APIs via a compatibility layer; * use a compatibility layer (Android Support Library?) for using the ICS settings API on pre-ICS.master
<category android:name="android.intent.category.LAUNCHER" /> | <category android:name="android.intent.category.LAUNCHER" /> | ||||
</intent-filter> | </intent-filter> | ||||
</activity> | </activity> | ||||
<!-- Legacy code for pre-ICS compatibility. --> | |||||
<activity | |||||
android:name=".eSpeak" | |||||
android:label="@string/app_name" | |||||
android:theme="@android:style/Theme.Translucent.NoTitleBar" > | |||||
<intent-filter> | |||||
<action android:name="android.intent.action.START_TTS_ENGINE" /> | |||||
<category android:name="android.intent.category.tts_engine.ESPEAK" /> | |||||
<category android:name="android.intent.category.tts_lang.afr" /> | |||||
<category android:name="android.intent.category.tts_lang.bos" /> | |||||
<category android:name="android.intent.category.tts_lang.zho" /> | |||||
<category android:name="android.intent.category.tts_lang.cmn" /> | |||||
<category android:name="android.intent.category.tts_lang.yue" /> | |||||
<category android:name="android.intent.category.tts_lang.hrv" /> | |||||
<category android:name="android.intent.category.tts_lang.ces" /> | |||||
<category android:name="android.intent.category.tts_lang.nld" /> | |||||
<category android:name="android.intent.category.tts_lang.eng" /> | |||||
<category android:name="android.intent.category.tts_lang.eng.USA" /> | |||||
<category android:name="android.intent.category.tts_lang.eng.GBR" /> | |||||
<category android:name="android.intent.category.tts_lang.epo" /> | |||||
<category android:name="android.intent.category.tts_lang.fin" /> | |||||
<category android:name="android.intent.category.tts_lang.fra" /> | |||||
<category android:name="android.intent.category.tts_lang.deu" /> | |||||
<category android:name="android.intent.category.tts_lang.ell" /> | |||||
<category android:name="android.intent.category.tts_lang.hin" /> | |||||
<category android:name="android.intent.category.tts_lang.hun" /> | |||||
<category android:name="android.intent.category.tts_lang.isl" /> | |||||
<category android:name="android.intent.category.tts_lang.ind" /> | |||||
<category android:name="android.intent.category.tts_lang.ita" /> | |||||
<category android:name="android.intent.category.tts_lang.kur" /> | |||||
<category android:name="android.intent.category.tts_lang.lat" /> | |||||
<category android:name="android.intent.category.tts_lang.mkd" /> | |||||
<category android:name="android.intent.category.tts_lang.nor" /> | |||||
<category android:name="android.intent.category.tts_lang.pol" /> | |||||
<category android:name="android.intent.category.tts_lang.por" /> | |||||
<category android:name="android.intent.category.tts_lang.ron" /> | |||||
<category android:name="android.intent.category.tts_lang.rus" /> | |||||
<category android:name="android.intent.category.tts_lang.srp" /> | |||||
<category android:name="android.intent.category.tts_lang.slk" /> | |||||
<category android:name="android.intent.category.tts_lang.spa" /> | |||||
<category android:name="android.intent.category.tts_lang.spa.MEX" /> | |||||
<category android:name="android.intent.category.tts_lang.swa" /> | |||||
<category android:name="android.intent.category.tts_lang.swe" /> | |||||
<category android:name="android.intent.category.tts_lang.tam" /> | |||||
<category android:name="android.intent.category.tts_lang.tur" /> | |||||
<category android:name="android.intent.category.tts_lang.vie" /> | |||||
<category android:name="android.intent.category.tts_lang.cym" /> | |||||
</intent-filter> | |||||
</activity> | |||||
<provider | |||||
android:name="com.reecedunn.espeak.providers.SettingsProvider" | |||||
android:authorities="com.reecedunn.espeak.providers.SettingsProvider" /> | |||||
</application> | </application> | ||||
</manifest> | </manifest> |
return null; | return null; | ||||
} | } | ||||
/** | |||||
* This tests that the location of the espeak TTS shared object matches | |||||
* the location that Android 2.2 looks for it in. | |||||
*/ | |||||
public void testSharedObjectLocation() | |||||
{ | |||||
Intent intent = new Intent("android.intent.action.START_TTS_ENGINE"); | |||||
intent.setPackage("com.reecedunn.espeak"); | |||||
PackageManager pm = getContext().getPackageManager(); | |||||
List<ResolveInfo> resolveInfos = pm.queryIntentActivities(intent, 0); | |||||
assertThat(resolveInfos, is(notNullValue())); | |||||
assertThat(resolveInfos.isEmpty(), is(false)); | |||||
ResolveInfo[] enginesArray = resolveInfos.toArray(new ResolveInfo[0]); | |||||
ActivityInfo aInfo = enginesArray[0].activityInfo; | |||||
String soFilename = aInfo.name.replace(aInfo.packageName + ".", "") + ".so"; | |||||
soFilename = soFilename.toLowerCase(); | |||||
assertThat(soFilename, is("espeak.so")); | |||||
soFilename = "/data/data/" + aInfo.packageName + "/lib/libtts" + soFilename; | |||||
assertThat(soFilename, is("/data/data/com.reecedunn.espeak/lib/libttsespeak.so")); | |||||
File f = new File(soFilename); | |||||
assertThat(f.exists(), is(true)); | |||||
} | |||||
public void testConstruction() | public void testConstruction() | ||||
{ | { | ||||
final SpeechSynthesis synth = new SpeechSynthesis(getContext(), mCallback); | final SpeechSynthesis synth = new SpeechSynthesis(getContext(), mCallback); |
/* | |||||
* Copyright (C) 2009 Google Inc. | |||||
* Copyright (C) 2012 Reece H. Dunn | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/* | |||||
* This file contains the TtsEngine interface used by Android to implement | |||||
* Text-to-Speech services. | |||||
* | |||||
* Android Version: 2.2 (Froyo) | |||||
* API Version: 8 | |||||
*/ | |||||
#ifndef TTS_ENGINE_H_ | |||||
#define TTS_ENGINE_H_ | |||||
namespace android { | |||||
#define ANDROID_TTS_ENGINE_PROPERTY_CONFIG "engineConfig" | |||||
#define ANDROID_TTS_ENGINE_PROPERTY_PITCH "pitch" | |||||
#define ANDROID_TTS_ENGINE_PROPERTY_RATE "rate" | |||||
#define ANDROID_TTS_ENGINE_PROPERTY_VOLUME "volume" | |||||
enum tts_synth_status { | |||||
TTS_SYNTH_DONE = 0, | |||||
TTS_SYNTH_PENDING = 1 | |||||
}; | |||||
enum tts_callback_status { | |||||
TTS_CALLBACK_HALT = 0, | |||||
TTS_CALLBACK_CONTINUE = 1 | |||||
}; | |||||
// NOTE: This is duplicated in compat/jni/tts.h. Please | |||||
// make changes there as well. | |||||
enum tts_audio_format { | |||||
TTS_AUDIO_FORMAT_INVALID = -1, | |||||
TTS_AUDIO_FORMAT_DEFAULT = 0, | |||||
TTS_AUDIO_FORMAT_PCM_16_BIT = 1, | |||||
TTS_AUDIO_FORMAT_PCM_8_BIT = 2, | |||||
}; | |||||
// The callback is used by the implementation of this interface to notify its | |||||
// client, the Android TTS service, that the last requested synthesis has been | |||||
// completed. // TODO reword | |||||
// The callback for synthesis completed takes: | |||||
// @param [inout] void *& - The userdata pointer set in the original | |||||
// synth call | |||||
// @param [in] uint32_t - Track sampling rate in Hz | |||||
// @param [in] tts_audio_format - The audio format | |||||
// @param [in] int - The number of channels | |||||
// @param [inout] int8_t *& - A buffer of audio data only valid during the | |||||
// execution of the callback | |||||
// @param [inout] size_t & - The size of the buffer | |||||
// @param [in] tts_synth_status - indicate whether the synthesis is done, or | |||||
// if more data is to be synthesized. | |||||
// @return TTS_CALLBACK_HALT to indicate the synthesis must stop, | |||||
// TTS_CALLBACK_CONTINUE to indicate the synthesis must continue if | |||||
// there is more data to produce. | |||||
typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, | |||||
tts_audio_format, int, int8_t *&, size_t&, tts_synth_status); | |||||
class TtsEngine; | |||||
extern "C" TtsEngine* getTtsEngine(); | |||||
enum tts_result { | |||||
TTS_SUCCESS = 0, | |||||
TTS_FAILURE = -1, | |||||
TTS_FEATURE_UNSUPPORTED = -2, | |||||
TTS_VALUE_INVALID = -3, | |||||
TTS_PROPERTY_UNSUPPORTED = -4, | |||||
TTS_PROPERTY_SIZE_TOO_SMALL = -5, | |||||
TTS_MISSING_RESOURCES = -6 | |||||
}; | |||||
enum tts_support_result { | |||||
TTS_LANG_COUNTRY_VAR_AVAILABLE = 2, | |||||
TTS_LANG_COUNTRY_AVAILABLE = 1, | |||||
TTS_LANG_AVAILABLE = 0, | |||||
TTS_LANG_MISSING_DATA = -1, | |||||
TTS_LANG_NOT_SUPPORTED = -2 | |||||
}; | |||||
class TtsEngine | |||||
{ | |||||
public: | |||||
virtual ~TtsEngine() {} | |||||
// Initialize the TTS engine and returns whether initialization succeeded. | |||||
// @param synthDoneCBPtr synthesis callback function pointer | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result init(synthDoneCB_t synthDoneCBPtr, const char *engineConfig); | |||||
// Shut down the TTS engine and releases all associated resources. | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result shutdown(); | |||||
// Interrupt synthesis and flushes any synthesized data that hasn't been | |||||
// output yet. This will block until callbacks underway are completed. | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result stop(); | |||||
// Returns the level of support for the language, country and variant. | |||||
// @return TTS_LANG_COUNTRY_VAR_AVAILABLE if the language, country and variant are supported, | |||||
// and the corresponding resources are correctly installed | |||||
// TTS_LANG_COUNTRY_AVAILABLE if the language and country are supported and the | |||||
// corresponding resources are correctly installed, but there is no match for | |||||
// the specified variant | |||||
// TTS_LANG_AVAILABLE if the language is supported and the | |||||
// corresponding resources are correctly installed, but there is no match for | |||||
// the specified country and variant | |||||
// TTS_LANG_MISSING_DATA if the required resources to provide any level of support | |||||
// for the language are not correctly installed | |||||
// TTS_LANG_NOT_SUPPORTED if the language is not supported by the TTS engine. | |||||
virtual tts_support_result isLanguageAvailable(const char *lang, const char *country, | |||||
const char *variant); | |||||
// Load the resources associated with the specified language. The loaded | |||||
// language will only be used once a call to setLanguage() with the same | |||||
// language value is issued. Language and country values are coded according to the ISO three | |||||
// letter codes for languages and countries, as can be retrieved from a java.util.Locale | |||||
// instance. The variant value is encoded as the variant string retrieved from a | |||||
// java.util.Locale instance built with that variant data. | |||||
// @param lang pointer to the ISO three letter code for the language | |||||
// @param country pointer to the ISO three letter code for the country | |||||
// @param variant pointer to the variant code | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result loadLanguage(const char *lang, const char *country, const char *variant); | |||||
// Load the resources associated with the specified language, country and Locale variant. | |||||
// The loaded language will only be used once a call to setLanguageFromLocale() with the same | |||||
// language value is issued. Language and country values are coded according to the ISO three | |||||
// letter codes for languages and countries, as can be retrieved from a java.util.Locale | |||||
// instance. The variant value is encoded as the variant string retrieved from a | |||||
// java.util.Locale instance built with that variant data. | |||||
// @param lang pointer to the ISO three letter code for the language | |||||
// @param country pointer to the ISO three letter code for the country | |||||
// @param variant pointer to the variant code | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result setLanguage(const char *lang, const char *country, const char *variant); | |||||
// Retrieve the currently set language, country and variant, or empty strings if none of | |||||
// parameters have been set. Language and country are represented by their 3-letter ISO code | |||||
// @param[out] pointer to the retrieved 3-letter code language value | |||||
// @param[out] pointer to the retrieved 3-letter code country value | |||||
// @param[out] pointer to the retrieved variant value | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result getLanguage(char *language, char *country, char *variant); | |||||
// Notifies the engine what audio parameters should be used for the synthesis. | |||||
// This is meant to be used as a hint, the engine implementation will set the output values | |||||
// to those of the synthesis format, based on a given hint. | |||||
// @param[inout] encoding in: the desired audio sample format | |||||
// out: the format used by the TTS engine | |||||
// @param[inout] rate in: the desired audio sample rate | |||||
// out: the sample rate used by the TTS engine | |||||
// @param[inout] channels in: the desired number of audio channels | |||||
// out: the number of channels used by the TTS engine | |||||
// @return TTS_SUCCESS, or TTS_FAILURE | |||||
virtual tts_result setAudioFormat(tts_audio_format& encoding, uint32_t& rate, | |||||
int& channels); | |||||
// Set a property for the the TTS engine | |||||
// "size" is the maximum size of "value" for properties "property" | |||||
// @param property pointer to the property name | |||||
// @param value pointer to the property value | |||||
// @param size maximum size required to store this type of property | |||||
// @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS, or TTS_FAILURE, | |||||
// or TTS_VALUE_INVALID | |||||
virtual tts_result setProperty(const char *property, const char *value, | |||||
const size_t size); | |||||
// Retrieve a property from the TTS engine | |||||
// @param property pointer to the property name | |||||
// @param[out] value pointer to the retrieved language value | |||||
// @param[inout] iosize in: stores the size available to store the | |||||
// property value. | |||||
// out: stores the size required to hold the language | |||||
// value if getLanguage() returned | |||||
// TTS_PROPERTY_SIZE_TOO_SMALL, unchanged otherwise | |||||
// @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS, | |||||
// or TTS_PROPERTY_SIZE_TOO_SMALL | |||||
virtual tts_result getProperty(const char *property, char *value, | |||||
size_t *iosize); | |||||
// Synthesize the text. | |||||
// As the synthesis is performed, the engine invokes the callback to notify | |||||
// the TTS framework that it has filled the given buffer, and indicates how | |||||
// many bytes it wrote. The callback is called repeatedly until the engine | |||||
// has generated all the audio data corresponding to the text. | |||||
// Note about the format of the input: the text parameter may use the | |||||
// following elements | |||||
// and their respective attributes as defined in the SSML 1.0 specification: | |||||
// * lang | |||||
// * say-as: | |||||
// o interpret-as | |||||
// * phoneme | |||||
// * voice: | |||||
// o gender, | |||||
// o age, | |||||
// o variant, | |||||
// o name | |||||
// * emphasis | |||||
// * break: | |||||
// o strength, | |||||
// o time | |||||
// * prosody: | |||||
// o pitch, | |||||
// o contour, | |||||
// o range, | |||||
// o rate, | |||||
// o duration, | |||||
// o volume | |||||
// * mark | |||||
// Differences between this text format and SSML are: | |||||
// * full SSML documents are not supported | |||||
// * namespaces are not supported | |||||
// Text is coded in UTF-8. | |||||
// @param text the UTF-8 text to synthesize | |||||
// @param userdata pointer to be returned when the call is invoked | |||||
// @param buffer the location where the synthesized data must be written | |||||
// @param bufferSize the number of bytes that can be written in buffer | |||||
// @return TTS_SUCCESS or TTS_FAILURE | |||||
virtual tts_result synthesizeText(const char *text, int8_t *buffer, | |||||
size_t bufferSize, void *userdata); | |||||
}; | |||||
} // namespace android | |||||
#endif /* TTS_ENGINE_H_ */ |
#include <jni.h> | #include <jni.h> | ||||
#include <speak_lib.h> | #include <speak_lib.h> | ||||
#include <TtsEngine.h> | |||||
#include <Log.h> | #include <Log.h> | ||||
/** @name Java to Wide String Helpers | /** @name Java to Wide String Helpers |
/* | |||||
* Copyright (C) 2008 Google Inc. | |||||
* Copyright (C) 2012 Reece H. Dunn | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
/* | |||||
* This file contains the TtsEngine implementation for the eSpeak | |||||
* Text-to-Speech engine. | |||||
* | |||||
* Android Version: 2.2 (Froyo) | |||||
* API Version: 8 | |||||
*/ | |||||
#include <stdio.h> | |||||
#include <unistd.h> | |||||
#include <stdlib.h> | |||||
#define LOG_TAG "eSpeak Engine" | |||||
#define DEBUG true | |||||
#include <speak_lib.h> | |||||
#include <TtsEngine.h> | |||||
#include <Log.h> | |||||
/* | |||||
* This is the Manager layer. It sits on top of the native eSpeak engine | |||||
* and provides the interface to the defined Google TTS engine API. | |||||
* The Google engine API is the boundary to allow a TTS engine to be swapped. | |||||
* The Manager layer also provide the SSML tag interpretation. | |||||
* The supported SSML tags are mapped to corresponding tags natively supported by eSpeak. | |||||
* Native eSpeak functions always begin with espeak_XXX. | |||||
* | |||||
* Only a subset of SSML 1.0 tags are supported. | |||||
* Some SSML tags involve significant complexity. | |||||
* If the language is changed through an SSML tag, there is a latency for the load. | |||||
*/ | |||||
using namespace android; | |||||
const char *ESPEAK_DIRECTORY = "espeak-data"; | |||||
const char *eSpeakBaseResources[] = {"intonations", "phondata", "phonindex", "phontab", | |||||
"en_dict", "voices/en/en-us" }; | |||||
const int NUM_BASE_RESOURCES = 6; | |||||
// Format is {espeak voice, iso3 code, name} | |||||
const char *eSpeakSupportedVoices[][3] = { | |||||
{"en-us", "eng", "English"}, | |||||
{"en-us", "eng-USA", "English (US)"}, | |||||
{"en", "eng-GBR", "English (UK)"}, | |||||
{"en-sc", "eng-GBR-sc", "English (Scottish)"}, | |||||
{"en-n", "eng-GBR-n", "English (Northern UK)"}, | |||||
{"en-rp", "eng-GBR-rp", "English (Received Pronunciation)"}, | |||||
{"en-wm", "eng-GBR-wm", "English (West Midlands)"}, | |||||
{"af", "afr", "Afrikaans"}, | |||||
{"bs", "bos", "Bosnian"}, | |||||
{"ca", "cat", "Catalan"}, | |||||
{"cs", "ces", "Czech"}, | |||||
{"da", "dan", "Danish"}, | |||||
{"de", "deu", "German"}, | |||||
{"el", "ell", "Greek"}, | |||||
{"eo", "epo", "Esperanto"}, | |||||
{"es", "spa", "Spanish"}, | |||||
{"es-la", "spa-MEX", "Spanish (Latin America)"}, | |||||
{"fi", "fin", "Finnish"}, | |||||
{"fr", "fra", "French"}, | |||||
{"hr", "hrv", "Croatian"}, | |||||
{"hu", "hun", "Hungarian"}, | |||||
{"it", "ita", "Italian"}, | |||||
{"kn", "kan", "Kannada"}, | |||||
{"ku", "kur", "Kurdish"}, | |||||
{"lv", "lav", "Latvian"}, | |||||
{"nl", "nld", "Dutch"}, | |||||
{"pl", "pol", "Polish"}, | |||||
{"pt", "por", "Portuguese (Brazil)"}, | |||||
{"pt", "por-BRA", "Portuguese (Brazil)"}, | |||||
{"pt-pt", "por-PRT", "Portuguese"}, | |||||
{"ro", "ron", "Romanian"}, | |||||
{"sk", "slk", "Slovak"}, | |||||
{"sr", "srp", "Serbian"}, | |||||
{"sv", "swe", "Swedish"}, | |||||
{"sw", "swa", "Swahili"}, | |||||
{"ta", "tam", "Tamil"}, | |||||
{"tr", "tur", "Turkish"}, | |||||
{"zh", "zho", "Chinese (Mandarin)"}, | |||||
{"cy", "cym", "Welsh"}, | |||||
{"hi", "hin", "Hindi"}, | |||||
{"hy", "hye", "Armenian"}, | |||||
{"id", "ind", "Indonesian"}, | |||||
{"is", "isl", "Icelandic"}, | |||||
{"ka", "kat", "Georgian"}, | |||||
{"la", "lat", "Latin"}, | |||||
{"mk", "mkd", "Macedonian"}, | |||||
{"no", "nor", "Norwegian"}, | |||||
{"ru", "rus", "Russian"}, | |||||
{"sq", "sqi", "Albanian"}, | |||||
{"vi", "vie", "Vietnamese"}, | |||||
{"zh-yue", "zho-HKG", "Chinese (Cantonese)"}, | |||||
{"grc", "grc", "Ancient Greek"}, | |||||
{"jbo", "jbo", "Lojban"}, | |||||
{"nci", "nci", "Nahuatl (Classical)"}, | |||||
{"pap", "pap", "Papiamento" } | |||||
}; | |||||
const int NUM_SUPPORTED_VOICES = 55; | |||||
// Callback to the TTS API | |||||
synthDoneCB_t *ttsSynthDoneCBPointer; | |||||
char *eSpeakDataPath = NULL; | |||||
char currentLanguage[33]; | |||||
char currentLang[10]; | |||||
char currentCountry[10]; | |||||
char currentVariant[10]; | |||||
int sampleRate = 0; | |||||
bool hasInitialized = false; | |||||
/* Functions internal to the eSpeak engine wrapper */ | |||||
static void setSpeechRate(int speechRate) { | |||||
espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0); | |||||
} | |||||
/* Functions exposed to the TTS API */ | |||||
/* Callback from espeak. Should call back to the TTS API */ | |||||
static int eSpeakCallback(short *wav, int numsamples, espeak_EVENT *events) { | |||||
LOGI("Callback with %d samples", numsamples); | |||||
int8_t * castedWav = (int8_t *) wav; | |||||
size_t bufferSize = 0; | |||||
if (numsamples < 1) { | |||||
int8_t silenceData[] = { 0, 0 }; | |||||
size_t silenceBufferSize = sizeof(silenceData)/sizeof(silenceData[0]); | |||||
int8_t *silence = silenceData; // Passing in an empty buffer can cause a crash. | |||||
ttsSynthDoneCBPointer(events->user_data, 22050, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, silence, | |||||
silenceBufferSize, TTS_SYNTH_DONE); | |||||
return 1; | |||||
} | |||||
bufferSize = numsamples * sizeof(short); | |||||
ttsSynthDoneCBPointer(events->user_data, 22050, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, castedWav, | |||||
bufferSize, TTS_SYNTH_PENDING); | |||||
return 0; // continue synthesis (1 is to abort) | |||||
} | |||||
static bool fileExists(char *fileName) { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
FILE *file = fopen(fileName, "r"); | |||||
if (file == NULL) { | |||||
return false; | |||||
} else { | |||||
fclose(file); | |||||
return true; | |||||
} | |||||
} | |||||
static bool hasBaseResources() { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
char filename[255]; | |||||
for (int i = 0; i < NUM_BASE_RESOURCES; i++) { | |||||
sprintf(filename, "%s/%s/%s", eSpeakDataPath, ESPEAK_DIRECTORY, eSpeakBaseResources[i]); | |||||
if (!fileExists(filename)) { | |||||
LOGE("Missing resource: %s", filename); | |||||
return false; | |||||
} | |||||
} | |||||
return true; | |||||
} | |||||
/* Google Engine API function implementations */ | |||||
tts_result attemptInit() { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
if (hasInitialized) { | |||||
return TTS_SUCCESS; | |||||
} | |||||
if (!hasBaseResources()) { | |||||
return TTS_FAILURE; | |||||
} | |||||
// TODO Make sure that the speech data is loaded in | |||||
// the directory /sdcard/espeak-data before calling this. | |||||
sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, eSpeakDataPath, 0); | |||||
if (sampleRate <= 0) { | |||||
LOGE("eSpeak initialization failed!"); | |||||
return TTS_FAILURE; | |||||
} | |||||
espeak_SetSynthCallback(eSpeakCallback); | |||||
espeak_VOICE voice; | |||||
memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first | |||||
const char *langNativeString = "en-us"; //Default to US English | |||||
voice.languages = langNativeString; | |||||
voice.variant = 0; | |||||
espeak_SetVoiceByProperties(&voice); | |||||
strcpy(currentLanguage, "en-us"); | |||||
hasInitialized = true; | |||||
return TTS_SUCCESS; | |||||
} | |||||
/** init | |||||
* Allocates eSpeak memory block and initializes the eSpeak system. | |||||
* synthDoneCBPtr - Pointer to callback function which will receive generated samples | |||||
* config - the engine configuration parameters, not used here | |||||
* return tts_result | |||||
*/ | |||||
tts_result TtsEngine::init(synthDoneCB_t synthDoneCBPtr, const char *engineConfig) { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
ttsSynthDoneCBPointer = synthDoneCBPtr; | |||||
hasInitialized = false; | |||||
if ((engineConfig != NULL) && (strlen(engineConfig) > 0)) { | |||||
eSpeakDataPath = (char *) malloc(strlen(engineConfig)); | |||||
strcpy(eSpeakDataPath, engineConfig); | |||||
} else { | |||||
eSpeakDataPath = NULL; | |||||
LOGE("Data path not specified!"); | |||||
return TTS_FAILURE; | |||||
} | |||||
return attemptInit(); | |||||
} | |||||
/** shutdown | |||||
* Unloads all eSpeak resources; terminates eSpeak system and frees eSpeak memory block. | |||||
* return tts_result | |||||
*/ | |||||
tts_result TtsEngine::shutdown(void) { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
if (eSpeakDataPath != NULL) { | |||||
free(eSpeakDataPath); | |||||
} | |||||
espeak_Terminate(); | |||||
return TTS_SUCCESS; | |||||
} | |||||
tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) { | |||||
if (DEBUG) LOGV("loadLanguage(\"%s\", \"%s\", \"%s\")", lang, country, variant); | |||||
return TTS_FAILURE; | |||||
} | |||||
tts_support_result isLanguageSupported(const char *lang, const char *country, const char *variant, | |||||
int *pindex) { | |||||
if (DEBUG) LOGV("isLanguageSupported(\"%s\", \"%s\", \"%s\")", lang, country, variant); | |||||
if ((lang == NULL) || (strlen(lang) == 0)) { | |||||
LOGE("TtsEngine::isLanguageAvailable called with no language"); | |||||
return TTS_LANG_NOT_SUPPORTED; | |||||
} | |||||
if (pindex != NULL) { | |||||
*pindex = -1; | |||||
} | |||||
int langIndex = -1; | |||||
int countryIndex = -1; | |||||
int variantIndex = -1; | |||||
if (strlen(lang) == 3) { | |||||
for (int i = 0; i < NUM_SUPPORTED_VOICES; i++) { | |||||
if (strncmp(lang, eSpeakSupportedVoices[i][1], 3) == 0) { | |||||
LOGI("Found ISO3 language at index %d", i); | |||||
langIndex = i; | |||||
break; | |||||
} | |||||
} | |||||
} else if (strlen(lang) == 2) { | |||||
for (int i = 0; i < NUM_SUPPORTED_VOICES; i++) { | |||||
if (strncmp(lang, eSpeakSupportedVoices[i][0], 2) == 0) { | |||||
LOGI("Found ISO2 language at index %d", i); | |||||
langIndex = i; | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
if (langIndex < 0) { | |||||
LOGV("TtsEngine::isLanguageAvailable called with unsupported language"); | |||||
return TTS_LANG_NOT_SUPPORTED; | |||||
} | |||||
if ((country == NULL) || (strlen(country) == 0)) { | |||||
// TODO: Check whether resources are available for this language. | |||||
if (pindex != NULL) { | |||||
*pindex = langIndex; | |||||
} | |||||
LOGI("No country specified, language is available"); | |||||
return TTS_LANG_AVAILABLE; | |||||
} | |||||
char lang_country[10]; | |||||
sprintf(lang_country, "%s-%s", lang, country); | |||||
// Find country | |||||
if (strlen(country) == 3) { | |||||
for (int i = langIndex; i < NUM_SUPPORTED_VOICES; i++) { | |||||
if ((strcmp(lang_country, eSpeakSupportedVoices[i][1]) == 0)) { | |||||
LOGI("Found ISO3 country at index %d", i); | |||||
countryIndex = i; | |||||
break; | |||||
} | |||||
} | |||||
} else if (strlen(country) == 2) { | |||||
for (int i = langIndex; i < NUM_SUPPORTED_VOICES; i++) { | |||||
if ((strcmp(lang_country, eSpeakSupportedVoices[i][0]) == 0)) { | |||||
LOGI("Found ISO2 country at index %d", i); | |||||
countryIndex = i; | |||||
break; | |||||
} | |||||
} | |||||
} | |||||
if (countryIndex < 0) { | |||||
if (pindex != NULL) { | |||||
*pindex = langIndex; | |||||
} | |||||
LOGI("No country found, language is available"); | |||||
return TTS_LANG_AVAILABLE; | |||||
} | |||||
if ((variant == NULL) || (strlen(variant) == 0)) { | |||||
if (pindex != NULL) { | |||||
*pindex = countryIndex; | |||||
} | |||||
LOGI("No variant specified, language and country are available"); | |||||
return TTS_LANG_COUNTRY_AVAILABLE; | |||||
} | |||||
char lang_country_variant[15]; | |||||
sprintf(lang_country_variant, "%s-%s-%s", lang, country, variant); | |||||
// Find variant | |||||
for (int i = countryIndex; i < NUM_SUPPORTED_VOICES; i++) { | |||||
if ((strcmp(lang_country_variant, eSpeakSupportedVoices[i][1]) == 0)) { | |||||
LOGI("Found variant at index %d", i); | |||||
variantIndex = i; | |||||
break; | |||||
} | |||||
} | |||||
if (variantIndex < 0) { | |||||
if (pindex != NULL) { | |||||
*pindex = countryIndex; | |||||
} | |||||
LOGI("No variant found, language and country are available"); | |||||
return TTS_LANG_COUNTRY_AVAILABLE; | |||||
} | |||||
if (pindex != NULL) { | |||||
*pindex = variantIndex; | |||||
} | |||||
LOGI("Language, country, and variant are available"); | |||||
return TTS_LANG_COUNTRY_VAR_AVAILABLE; | |||||
} | |||||
tts_result TtsEngine::setLanguage(const char *lang, const char *country, const char *variant) { | |||||
if (DEBUG) LOGV("setLanguage(\"%s\", \"%s\", \"%s\")", lang, country, variant); | |||||
// Make sure the engine is initialized! | |||||
attemptInit(); | |||||
int index = -1; | |||||
isLanguageSupported(lang, country, variant, &index); | |||||
if (index < 0) { | |||||
LOGE("setLanguage called with unsupported language"); | |||||
return TTS_FAILURE; | |||||
} | |||||
strcpy(currentLanguage, lang); | |||||
strcpy(currentLang, lang); | |||||
strcpy(currentCountry, country); | |||||
strcpy(currentVariant, variant); | |||||
char espeakLangStr[7]; | |||||
strcpy(espeakLangStr, eSpeakSupportedVoices[index][0]); | |||||
espeak_VOICE voice; | |||||
memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first | |||||
voice.variant = 0; | |||||
voice.languages = espeakLangStr; | |||||
espeak_ERROR err = espeak_SetVoiceByProperties(&voice); | |||||
if (err != EE_OK) { | |||||
LOGE("Error code %d when setting voice properties!", err); | |||||
return TTS_FAILURE; | |||||
} | |||||
return TTS_SUCCESS; | |||||
} | |||||
tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country, | |||||
const char *variant) { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
return isLanguageSupported(lang, country, variant, NULL); | |||||
} | |||||
tts_result TtsEngine::getLanguage(char *language, char *country, char *variant) { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
strcpy(language, currentLang); | |||||
strcpy(country, currentCountry); | |||||
strcpy(variant, currentVariant); | |||||
return TTS_SUCCESS; | |||||
} | |||||
/** setAudioFormat | |||||
* sets the audio format to use for synthesis, returns what is actually used. | |||||
* @encoding - reference to encoding format | |||||
* @rate - reference to sample rate | |||||
* @channels - reference to number of channels | |||||
* return tts_result | |||||
* */ | |||||
tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate, int& channels) { | |||||
LOGE("setAudioFormat(%d, %d, %d) is unsupported", encoding, rate, channels); | |||||
encoding = TTS_AUDIO_FORMAT_PCM_16_BIT; | |||||
rate = sampleRate; | |||||
channels = 1; | |||||
return TTS_SUCCESS; | |||||
} | |||||
// Sets the property with the specified value | |||||
tts_result TtsEngine::setProperty(const char *property, const char *value, const size_t size) { | |||||
if (DEBUG) LOGV("setProperty(\"%s\", \"%s\", %d)", property, value, size); | |||||
/* Set a specific property for the engine. | |||||
Supported properties include: language (locale), rate, pitch, volume. */ | |||||
/* Sanity check */ | |||||
if (property == NULL) { | |||||
LOGE("setProperty called with property NULL"); | |||||
return TTS_PROPERTY_UNSUPPORTED; | |||||
} | |||||
if (value == NULL) { | |||||
LOGE("setProperty called with value NULL"); | |||||
return TTS_VALUE_INVALID; | |||||
} | |||||
espeak_ERROR result; | |||||
if (strncmp(property, "language", 8) == 0) { | |||||
// TODO: Set this property | |||||
result = EE_OK; | |||||
} else if (strncmp(property, "rate", 4) == 0) { | |||||
int rate = atoi(value) * espeak_GetParameter(espeakRATE, 0) / 100; | |||||
if (DEBUG) LOGV("setProperty rate : rate=%s, wpm=%d", value, rate); | |||||
result = espeak_SetParameter(espeakRATE, rate, 0); | |||||
} else if (strncmp(property, "pitch", 5) == 0) { | |||||
int pitch = atoi(value); | |||||
// The values of pitch from android range from 50 - 200, with 100 being normal. | |||||
// The values espeak supports are from 0 - 100, with 50 being normal. | |||||
// Therefore, halve the value to get the value that espeak supports: | |||||
pitch = pitch / 2; | |||||
if (DEBUG) LOGV("setProperty pitch : pitch=%d", pitch); | |||||
result = espeak_SetParameter(espeakPITCH, pitch, 0); | |||||
} else if (strncmp(property, "volume", 6) == 0) { | |||||
int volume = atoi(value); | |||||
result = espeak_SetParameter(espeakVOLUME, volume, 0); | |||||
} else { | |||||
return TTS_PROPERTY_UNSUPPORTED; | |||||
} | |||||
if (result == EE_OK) { | |||||
return TTS_SUCCESS; | |||||
} else { | |||||
return TTS_FAILURE; | |||||
} | |||||
} | |||||
// Sets the property with the specified value | |||||
tts_result TtsEngine::getProperty(const char *property, char *value, size_t *iosize) { | |||||
if (DEBUG) LOGV("getProperty(\"%s\", ...)", property); | |||||
/* Get the property for the engine. | |||||
This property was previously set by setProperty or by default. */ | |||||
/* sanity check */ | |||||
if (property == NULL) { | |||||
LOGE("getProperty called with property NULL"); | |||||
return TTS_PROPERTY_UNSUPPORTED; | |||||
} | |||||
if (value == NULL) { | |||||
LOGE("getProperty called with value NULL"); | |||||
return TTS_VALUE_INVALID; | |||||
} | |||||
if (strncmp(property, "language", 8) == 0) { | |||||
if (currentLanguage == NULL) { | |||||
strcpy(value, ""); | |||||
} else { | |||||
if (*iosize < strlen(currentLanguage)+1) { | |||||
*iosize = strlen(currentLanguage) + 1; | |||||
return TTS_PROPERTY_SIZE_TOO_SMALL; | |||||
} | |||||
strcpy(value, currentLanguage); | |||||
} | |||||
return TTS_SUCCESS; | |||||
} else if (strncmp(property, "rate", 4) == 0) { | |||||
int rate = espeak_GetParameter(espeakRATE, 1) * 100 / espeak_GetParameter(espeakRATE, 0); | |||||
char tmprate[4]; | |||||
sprintf(tmprate, "%d", rate); | |||||
if (*iosize < strlen(tmprate)+1) { | |||||
*iosize = strlen(tmprate) + 1; | |||||
return TTS_PROPERTY_SIZE_TOO_SMALL; | |||||
} | |||||
strcpy(value, tmprate); | |||||
return TTS_SUCCESS; | |||||
} else if (strncmp(property, "pitch", 5) == 0) { | |||||
char tmppitch[4]; | |||||
sprintf(tmppitch, "%d", (espeak_GetParameter(espeakPITCH, 1) * 2)); | |||||
if (*iosize < strlen(tmppitch)+1) { | |||||
*iosize = strlen(tmppitch) + 1; | |||||
return TTS_PROPERTY_SIZE_TOO_SMALL; | |||||
} | |||||
strcpy(value, tmppitch); | |||||
return TTS_SUCCESS; | |||||
} else if (strncmp(property, "volume", 6) == 0) { | |||||
char tmpvolume[4]; | |||||
sprintf(tmpvolume, "%d", espeak_GetParameter(espeakVOLUME, 1)); | |||||
if (*iosize < strlen(tmpvolume)+1) { | |||||
*iosize = strlen(tmpvolume) + 1; | |||||
return TTS_PROPERTY_SIZE_TOO_SMALL; | |||||
} | |||||
strcpy(value, tmpvolume); | |||||
return TTS_SUCCESS; | |||||
} | |||||
LOGE("Unsupported property"); | |||||
return TTS_PROPERTY_UNSUPPORTED; | |||||
} | |||||
/** synthesizeText | |||||
* Synthesizes a text string. | |||||
* The text string could be annotated with SSML tags. | |||||
* @text - text to synthesize | |||||
* @buffer - buffer which will receive generated samples | |||||
* @bufferSize - size of buffer | |||||
* @userdata - pointer to user data which will be passed back to callback function | |||||
* return tts_result | |||||
*/ | |||||
tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, | |||||
void *userdata) { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
espeak_SetSynthCallback(eSpeakCallback); | |||||
unsigned int unique_identifier; | |||||
espeak_Synth(text, strlen(text), 0, // position | |||||
POS_CHARACTER, 0, // end position (0 means no end position) | |||||
espeakCHARS_UTF8, // text is UTF-8 encoded | |||||
&unique_identifier, userdata); | |||||
espeak_Synchronize(); | |||||
LOGI("Synthesis done"); | |||||
return TTS_SUCCESS; | |||||
} | |||||
/** stop | |||||
* Aborts the running synthesis. | |||||
* return tts_result | |||||
*/ | |||||
tts_result TtsEngine::stop() { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
espeak_Cancel(); | |||||
return TTS_SUCCESS; | |||||
} | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
#endif | |||||
TtsEngine* getTtsEngine() { | |||||
if (DEBUG) LOGV("%s", __FUNCTION__); | |||||
return new TtsEngine(); | |||||
} | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif |
/* | |||||
* Copyright (C) 2011 The Android Open Source Project | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
package com.reecedunn.espeak; | |||||
import android.app.Activity; | |||||
import android.os.Bundle; | |||||
/* | |||||
* The Java portion of this TTS plugin engine app does nothing. | |||||
* This activity is only here so that the native code can be | |||||
* wrapped up inside an apk file. | |||||
* | |||||
* The file path structure convention is that the native library | |||||
* implementing TTS must be a file placed here: | |||||
* /data/data/<PACKAGE_NAME>/lib/libtts<ACTIVITY_NAME_LOWERCASED>.so | |||||
* Example: | |||||
* /data/data/com.reecedunn.espeak/lib/libttsespeak.so | |||||
*/ | |||||
public class eSpeak extends Activity { | |||||
@Override | |||||
public void onCreate(Bundle savedInstanceState) { | |||||
super.onCreate(savedInstanceState); | |||||
// The Java portion of this does nothing. | |||||
// This activity is only here so that everything | |||||
// can be wrapped up inside an apk file. | |||||
finish(); | |||||
} | |||||
} |
/* | |||||
* Copyright (C) 2011 Google Inc. | |||||
* | |||||
* Licensed under the Apache License, Version 2.0 (the "License"); | |||||
* you may not use this file except in compliance with the License. | |||||
* You may obtain a copy of the License at | |||||
* | |||||
* http://www.apache.org/licenses/LICENSE-2.0 | |||||
* | |||||
* Unless required by applicable law or agreed to in writing, software | |||||
* distributed under the License is distributed on an "AS IS" BASIS, | |||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||||
* See the License for the specific language governing permissions and | |||||
* limitations under the License. | |||||
*/ | |||||
package com.reecedunn.espeak.providers; | |||||
import com.reecedunn.espeak.CheckVoiceData; | |||||
import android.content.ContentProvider; | |||||
import android.content.ContentValues; | |||||
import android.database.Cursor; | |||||
import android.database.MatrixCursor; | |||||
import android.net.Uri; | |||||
import java.io.File; | |||||
/** | |||||
* Provides the "engineConfig" parameter for the legacy (pre-ICS) TTS API. | |||||
* | |||||
* @author [email protected] (Alan Viverette) | |||||
*/ | |||||
public class SettingsProvider extends ContentProvider { | |||||
private class SettingsCursor extends MatrixCursor { | |||||
private String settings; | |||||
public SettingsCursor(String[] columnNames) { | |||||
super(columnNames); | |||||
} | |||||
public void putSettings(String settings) { | |||||
this.settings = settings; | |||||
} | |||||
@Override | |||||
public int getCount() { | |||||
return 1; | |||||
} | |||||
@Override | |||||
public String getString(int column) { | |||||
return settings; | |||||
} | |||||
} | |||||
@Override | |||||
public int delete(Uri uri, String selection, String[] selectionArgs) { | |||||
return 0; | |||||
} | |||||
@Override | |||||
public String getType(Uri uri) { | |||||
return null; | |||||
} | |||||
@Override | |||||
public Uri insert(Uri uri, ContentValues values) { | |||||
return null; | |||||
} | |||||
@Override | |||||
public boolean onCreate() { | |||||
return true; | |||||
} | |||||
@Override | |||||
public Cursor query(Uri uri, String[] projection, String selection, String[] selectionArgs, | |||||
String sortOrder) { | |||||
final File dataPath = CheckVoiceData.getDataPath(getContext()); | |||||
final String[] dummyColumns = { | |||||
"", "" | |||||
}; | |||||
final SettingsCursor cursor = new SettingsCursor(dummyColumns); | |||||
cursor.putSettings(dataPath.getParent()); | |||||
return cursor; | |||||
} | |||||
@Override | |||||
public int update(Uri uri, ContentValues values, String selection, String[] selectionArgs) { | |||||
return 0; | |||||
} | |||||
} |