12 years ago · 2b78ade000
--- a/jni/include/Log.h
+++ b/jni/include/Log.h
@@ -0,0 +1,26 @@
 /*
 * Copyright (C) 2009 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #ifndef LOG_H_
 #define LOG_H_

 #include <android/log.h>

 #define LOGV(...) __android_log_print(ANDROID_LOG_VERBOSE, LOG_TAG, __VA_ARGS__)
 #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
 #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)

 #endif /* LOG_H_ */
--- a/jni/include/TtsEngine.h
+++ b/jni/include/TtsEngine.h
@@ -0,0 +1,245 @@
 /*
 * Copyright (C) 2009 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 // This header defines the interface used by the Android platform
 // to access Text-To-Speech functionality in shared libraries that implement
 // speech synthesis and the management of resources associated with the
 // synthesis.
 // An example of the implementation of this interface can be found in
 // FIXME: add path+name to implementation of default TTS engine
 // Libraries implementing this interface are used in:
 //  frameworks/base/tts/jni/android_tts_SpeechSynthesis.cpp

 #ifndef TTS_ENGINE_H_
 #define TTS_ENGINE_H_

 namespace android {

 #define ANDROID_TTS_ENGINE_PROPERTY_CONFIG "engineConfig"
 #define ANDROID_TTS_ENGINE_PROPERTY_PITCH  "pitch"
 #define ANDROID_TTS_ENGINE_PROPERTY_RATE   "rate"
 #define ANDROID_TTS_ENGINE_PROPERTY_VOLUME "volume"


 enum tts_synth_status {
    TTS_SYNTH_DONE              = 0,
    TTS_SYNTH_PENDING           = 1
 };

 enum tts_callback_status {
    TTS_CALLBACK_HALT           = 0,
    TTS_CALLBACK_CONTINUE       = 1
 };

 // NOTE: This is duplicated in compat/jni/tts.h. Please
 // make changes there as well.
 enum tts_audio_format {
    TTS_AUDIO_FORMAT_INVALID    = -1,
    TTS_AUDIO_FORMAT_DEFAULT    = 0,
    TTS_AUDIO_FORMAT_PCM_16_BIT = 1,
    TTS_AUDIO_FORMAT_PCM_8_BIT  = 2,
 };

 // The callback is used by the implementation of this interface to notify its
 // client, the Android TTS service, that the last requested synthesis has been
 // completed. // TODO reword
 // The callback for synthesis completed takes:
 // @param [inout] void *&       - The userdata pointer set in the original
 //                                 synth call
 // @param [in]    uint32_t      - Track sampling rate in Hz
 // @param [in] tts_audio_format - The audio format
 // @param [in]    int           - The number of channels
 // @param [inout] int8_t *&     - A buffer of audio data only valid during the
 //                                execution of the callback
 // @param [inout] size_t  &     - The size of the buffer
 // @param [in] tts_synth_status - indicate whether the synthesis is done, or
 //                                 if more data is to be synthesized.
 // @return TTS_CALLBACK_HALT to indicate the synthesis must stop,
 //         TTS_CALLBACK_CONTINUE to indicate the synthesis must continue if
 //            there is more data to produce.
 typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t,
        tts_audio_format, int, int8_t *&, size_t&, tts_synth_status);

 class TtsEngine;
 extern "C" TtsEngine* getTtsEngine();

 enum tts_result {
    TTS_SUCCESS                 = 0,
    TTS_FAILURE                 = -1,
    TTS_FEATURE_UNSUPPORTED     = -2,
    TTS_VALUE_INVALID           = -3,
    TTS_PROPERTY_UNSUPPORTED    = -4,
    TTS_PROPERTY_SIZE_TOO_SMALL = -5,
    TTS_MISSING_RESOURCES       = -6
 };

 enum tts_support_result {
    TTS_LANG_COUNTRY_VAR_AVAILABLE = 2,
    TTS_LANG_COUNTRY_AVAILABLE = 1,
    TTS_LANG_AVAILABLE = 0,
    TTS_LANG_MISSING_DATA = -1,
    TTS_LANG_NOT_SUPPORTED = -2
 };


 class TtsEngine
 {
 public:
    virtual ~TtsEngine() {}

    // Initialize the TTS engine and returns whether initialization succeeded.
    // @param synthDoneCBPtr synthesis callback function pointer
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result init(synthDoneCB_t synthDoneCBPtr, const char *engineConfig);

    // Shut down the TTS engine and releases all associated resources.
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result shutdown();

    // Interrupt synthesis and flushes any synthesized data that hasn't been
    // output yet. This will block until callbacks underway are completed.
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result stop();

    // Returns the level of support for the language, country and variant.
    // @return TTS_LANG_COUNTRY_VAR_AVAILABLE if the language, country and variant are supported,
    //            and the corresponding resources are correctly installed
    //         TTS_LANG_COUNTRY_AVAILABLE if the language and country are supported and the
    //             corresponding resources are correctly installed, but there is no match for
    //             the specified variant
    //         TTS_LANG_AVAILABLE if the language is supported and the
    //             corresponding resources are correctly installed, but there is no match for
    //             the specified country and variant
    //         TTS_LANG_MISSING_DATA if the required resources to provide any level of support
    //             for the language are not correctly installed
    //         TTS_LANG_NOT_SUPPORTED if the language is not supported by the TTS engine.
    virtual tts_support_result isLanguageAvailable(const char *lang, const char *country,
            const char *variant);

    // Load the resources associated with the specified language. The loaded
    // language will only be used once a call to setLanguage() with the same
    // language value is issued. Language and country values are coded according to the ISO three
    // letter codes for languages and countries, as can be retrieved from a java.util.Locale
    // instance. The variant value is encoded as the variant string retrieved from a
    // java.util.Locale instance built with that variant data.
    // @param lang pointer to the ISO three letter code for the language
    // @param country pointer to the ISO three letter code for the country
    // @param variant pointer to the variant code
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result loadLanguage(const char *lang, const char *country, const char *variant);

    // Load the resources associated with the specified language, country and Locale variant.
    // The loaded language will only be used once a call to setLanguageFromLocale() with the same
    // language value is issued. Language and country values are coded according to the ISO three
    // letter codes for languages and countries, as can be retrieved from a java.util.Locale
    // instance. The variant value is encoded as the variant string retrieved from a
    // java.util.Locale instance built with that variant data.
    // @param lang pointer to the ISO three letter code for the language
    // @param country pointer to the ISO three letter code for the country
    // @param variant pointer to the variant code
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result setLanguage(const char *lang, const char *country, const char *variant);

    // Retrieve the currently set language, country and variant, or empty strings if none of
    // parameters have been set. Language and country are represented by their 3-letter ISO code
    // @param[out]   pointer to the retrieved 3-letter code language value
    // @param[out]   pointer to the retrieved 3-letter code country value
    // @param[out]   pointer to the retrieved variant value
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result getLanguage(char *language, char *country, char *variant);

    // Notifies the engine what audio parameters should be used for the synthesis.
    // This is meant to be used as a hint, the engine implementation will set the output values
    // to those of the synthesis format, based on a given hint.
    // @param[inout] encoding in: the desired audio sample format
    //                         out: the format used by the TTS engine
    // @param[inout] rate in: the desired audio sample rate
    //                         out: the sample rate used by the TTS engine
    // @param[inout] channels in: the desired number of audio channels
    //                         out: the number of channels used by the TTS engine
    // @return TTS_SUCCESS, or TTS_FAILURE
    virtual tts_result setAudioFormat(tts_audio_format& encoding, uint32_t& rate,
            int& channels);

    // Set a property for the the TTS engine
    // "size" is the maximum size of "value" for properties "property"
    // @param property pointer to the property name
    // @param value    pointer to the property value
    // @param size     maximum size required to store this type of property
    // @return         TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS, or TTS_FAILURE,
    //                  or TTS_VALUE_INVALID
    virtual tts_result setProperty(const char *property, const char *value,
            const size_t size);

    // Retrieve a property from the TTS engine
    // @param        property pointer to the property name
    // @param[out]   value    pointer to the retrieved language value
    // @param[inout] iosize   in: stores the size available to store the
    //                          property value.
    //                        out: stores the size required to hold the language
    //                          value if getLanguage() returned
    //                          TTS_PROPERTY_SIZE_TOO_SMALL, unchanged otherwise
    // @return TTS_PROPERTY_UNSUPPORTED, or TTS_SUCCESS,
    //         or TTS_PROPERTY_SIZE_TOO_SMALL
    virtual tts_result getProperty(const char *property, char *value,
            size_t *iosize);

    // Synthesize the text.
    // As the synthesis is performed, the engine invokes the callback to notify
    // the TTS framework that it has filled the given buffer, and indicates how
    // many bytes it wrote. The callback is called repeatedly until the engine
    // has generated all the audio data corresponding to the text.
    // Note about the format of the input: the text parameter may use the
    // following elements
    // and their respective attributes as defined in the SSML 1.0 specification:
    //    * lang
    //    * say-as:
    //          o interpret-as
    //    * phoneme
    //    * voice:
    //          o gender,
    //          o age,
    //          o variant,
    //          o name
    //    * emphasis
    //    * break:
    //          o strength,
    //          o time
    //    * prosody:
    //          o pitch,
    //          o contour,
    //          o range,
    //          o rate,
    //          o duration,
    //          o volume
    //    * mark
    // Differences between this text format and SSML are:
    //    * full SSML documents are not supported
    //    * namespaces are not supported
    // Text is coded in UTF-8.
    // @param text      the UTF-8 text to synthesize
    // @param userdata  pointer to be returned when the call is invoked
    // @param buffer    the location where the synthesized data must be written
    // @param bufferSize the number of bytes that can be written in buffer
    // @return          TTS_SUCCESS or TTS_FAILURE
    virtual tts_result synthesizeText(const char *text, int8_t *buffer,
            size_t bufferSize, void *userdata);

 };

 } // namespace android

 #endif /* TTS_ENGINE_H_ */
--- a/jni/jni/com_google_espeakengine.cpp
+++ b/jni/jni/com_google_espeakengine.cpp
@@ -0,0 +1,593 @@
 /*
 * Copyright (C) 2008 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>

 #define LOG_TAG "eSpeak Engine"

 #include <speak_lib.h>
 #include <TtsEngine.h>
 #include <Log.h>

 /*
 * This is the Manager layer.  It sits on top of the native eSpeak engine
 * and provides the interface to the defined Google TTS engine API.
 * The Google engine API is the boundary to allow a TTS engine to be swapped.
 * The Manager layer also provide the SSML tag interpretation.
 * The supported SSML tags are mapped to corresponding tags natively supported by eSpeak.
 * Native eSpeak functions always begin with espeak_XXX.
 *
 * Only a subset of SSML 1.0 tags are supported.
 * Some SSML tags involve significant complexity.
 * If the language is changed through an SSML tag, there is a latency for the load.
 */

 using namespace android;

 const char *ESPEAK_DIRECTORY = "espeak-data";

 const char *eSpeakBaseResources[] = {"intonations", "phondata", "phonindex", "phontab",
    "en_dict", "voices/en/en-us" };

 const int NUM_BASE_RESOURCES = 6;

 // Format is {espeak voice, iso3 code, name}
 const char *eSpeakSupportedVoices[][3] = {
        {"en-us",  "eng",        "English"},
        {"en-us",  "eng-USA",    "English (US)"},
        {"en",     "eng-GBR",    "English (UK)"},
        {"en-sc",  "eng-GBR-sc", "English (Scottish)"},
        {"en-n",   "eng-GBR-n",  "English (Northern UK)"},
        {"en-rp",  "eng-GBR-rp", "English (Received Pronunciation)"},
        {"en-wm",  "eng-GBR-wm", "English (West Midlands)"},
        {"af",     "afr",        "Afrikaans"},
        {"bs",     "bos",        "Bosnian"},
        {"ca",     "cat",        "Catalan"},
        {"cs",     "ces",        "Czech"},
        {"da",     "dan",        "Danish"},
        {"de",     "deu",        "German"},
        {"el",     "ell",        "Greek"},
        {"eo",     "epo",        "Esperanto"},
        {"es",     "spa",        "Spanish"},
        {"es-la",  "spa-MEX",    "Spanish (Latin America)"},
        {"fi",     "fin",        "Finnish"},
        {"fr",     "fra",        "French"},
        {"hr",     "hrv",        "Croatian"},
        {"hu",     "hun",        "Hungarian"},
        {"it",     "ita",        "Italian"},
        {"kn",     "kan",        "Kannada"},
        {"ku",     "kur",        "Kurdish"},
        {"lv",     "lav",        "Latvian"},
        {"nl",     "nld",        "Dutch"},
        {"pl",     "pol",        "Polish"},
        {"pt",     "por",        "Portuguese (Brazil)"},
        {"pt",     "por-BRA",    "Portuguese (Brazil)"},
        {"pt-pt",  "por-PRT",    "Portuguese"},
        {"ro",     "ron",        "Romanian"},
        {"sk",     "slk",        "Slovak"},
        {"sr",     "srp",        "Serbian"},
        {"sv",     "swe",        "Swedish"},
        {"sw",     "swa",        "Swahili"},
        {"ta",     "tam",        "Tamil"},
        {"tr",     "tur",        "Turkish"},
        {"zh",     "zho",        "Chinese (Mandarin)"},
        {"cy",     "cym",        "Welsh"},
        {"hi",     "hin",        "Hindi"},
        {"hy",     "hye",        "Armenian"},
        {"id",     "ind",        "Indonesian"},
        {"is",     "isl",        "Icelandic"},
        {"ka",     "kat",        "Georgian"},
        {"la",     "lat",        "Latin"},
        {"mk",     "mkd",        "Macedonian"},
        {"no",     "nor",        "Norwegian"},
        {"ru",     "rus",        "Russian"},
        {"sq",     "sqi",        "Albanian"},
        {"vi",     "vie",        "Vietnamese"},
        {"zh-yue", "zho-HKG",    "Chinese (Cantonese)"},
        {"grc",    "grc",        "Ancient Greek"},
        {"jbo",    "jbo",        "Lojban"},
        {"nci",    "nci",        "Nahuatl (Classical)"},
        {"pap",    "pap",        "Papiamento" }
    };

 const int NUM_SUPPORTED_VOICES = 55;

 /* Integer constants */
 const int DEFAULT_SPEECH_RATE = 150;

 // Callback to the TTS API
 synthDoneCB_t *ttsSynthDoneCBPointer;

 char *currentLanguage = (char *) "en-us";
 char *currentRate = (char *) "150";
 char *eSpeakDataPath = NULL;

 char currentLang[10];
 char currentCountry[10];
 char currentVariant[10];

 bool hasInitialized = false;

 /* Functions internal to the eSpeak engine wrapper */
 static void setSpeechRate(int speechRate) {
  espeak_ERROR err = espeak_SetParameter(espeakRATE, speechRate, 0);
 }

 /* Functions exposed to the TTS API */

 /* Callback from espeak.  Should call back to the TTS API */
 static int eSpeakCallback(short *wav, int numsamples, espeak_EVENT *events) {
  LOGI("Callback with %d samples", numsamples);

  int8_t * castedWav = (int8_t *) wav;
  size_t bufferSize = 0;
  if (numsamples < 1) {
    size_t silenceBufferSize = 2;
    int8_t *silence = new int8_t[silenceBufferSize]; // TODO: This will be a small memory leak, but do it this way for now because passing in an empty buffer can cause a crash.
    silence[0] = 0;
    silence[1] = 0;
    ttsSynthDoneCBPointer(events->user_data, 22050, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, silence,
                          silenceBufferSize, TTS_SYNTH_DONE);
    return 1;
  }
  bufferSize = numsamples * sizeof(short);
  ttsSynthDoneCBPointer(events->user_data, 22050, TTS_AUDIO_FORMAT_PCM_16_BIT, 1, castedWav,
                        bufferSize, TTS_SYNTH_PENDING);
  return 0; // continue synthesis (1 is to abort)
 }

 static bool fileExists(char *fileName) {
  FILE *file = fopen(fileName, "r");

  if (file == NULL) {
    return false;
  } else {
    fclose(file);
    return true;
  }
 }

 static bool hasBaseResources() {
  char filename[255];

  for (int i = 0; i < NUM_BASE_RESOURCES; i++) {
    sprintf(filename, "%s/%s/%s", eSpeakDataPath, ESPEAK_DIRECTORY, eSpeakBaseResources[i]);

    if (!fileExists(filename)) {
      LOGE("Missing resource: %s", filename);
      return false;
    }
  }

  return true;
 }

 /* Google Engine API function implementations */

 tts_result attemptInit() {
  if (hasInitialized) {
    return TTS_SUCCESS;
  }

  if (!hasBaseResources()) {
    return TTS_FAILURE;
  }

  // TODO Make sure that the speech data is loaded in
  // the directory /sdcard/espeak-data before calling this.
  int sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, eSpeakDataPath, 0);

  if (sampleRate <= 0) {
    LOGE("eSpeak initialization failed!");
    return TTS_FAILURE;
  }

  espeak_SetSynthCallback(eSpeakCallback);
  espeak_SetParameter(espeakRATE, DEFAULT_SPEECH_RATE, 0);

  espeak_VOICE voice;
  memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
  const char *langNativeString = "en-us"; //Default to US English
  voice.languages = langNativeString;
  voice.variant = 0;
  espeak_SetVoiceByProperties(&voice);

  hasInitialized = true;

  return TTS_SUCCESS;
 }

 /** init
 *  Allocates eSpeak memory block and initializes the eSpeak system.
 *  synthDoneCBPtr - Pointer to callback function which will receive generated samples
 *  config - the engine configuration parameters, not used here
 *  return tts_result
 */
 tts_result TtsEngine::init(synthDoneCB_t synthDoneCBPtr, const char *engineConfig) {
  ttsSynthDoneCBPointer = synthDoneCBPtr;
  hasInitialized = false;

  if ((engineConfig != NULL) && (strlen(engineConfig) > 0)) {
    eSpeakDataPath = (char *) malloc(strlen(engineConfig));
    strcpy(eSpeakDataPath, engineConfig);
  } else {
    eSpeakDataPath = NULL;
    LOGE("Data path not specified!");
    return TTS_FAILURE;
  }

  return attemptInit();
 }

 /** shutdown
 *  Unloads all eSpeak resources; terminates eSpeak system and frees eSpeak memory block.
 *  return tts_result
 */
 tts_result TtsEngine::shutdown(void) {
  if (eSpeakDataPath != NULL) {
    free(eSpeakDataPath);
  }

  espeak_Terminate();

  return TTS_SUCCESS;
 }

 tts_result TtsEngine::loadLanguage(const char *lang, const char *country, const char *variant) {
  LOGV("loadLanguage(\"%s\", \"%s\", \"%s\")", lang, country, variant);

  return TTS_FAILURE;
 }

 tts_support_result isLanguageSupported(const char *lang, const char *country, const char *variant,
                                       int *pindex) {
  LOGV("isLanguageSupported(\"%s\", \"%s\", \"%s\")", lang, country, variant);

  if ((lang == NULL) || (strlen(lang) == 0)) {
    LOGE("TtsEngine::isLanguageAvailable called with no language");
    return TTS_LANG_NOT_SUPPORTED;
  }

  if (pindex != NULL) {
    *pindex = -1;
  }

  int langIndex = -1;
  int countryIndex = -1;
  int variantIndex = -1;

  if (strlen(lang) == 3) {
    for (int i = 0; i < NUM_SUPPORTED_VOICES; i++) {
      if (strncmp(lang, eSpeakSupportedVoices[i][1], 3) == 0) {
        LOGI("Found ISO3 language at index %d", i);
        langIndex = i;
        break;
      }
    }
  } else if (strlen(lang) == 2) {
    for (int i = 0; i < NUM_SUPPORTED_VOICES; i++) {
      if (strncmp(lang, eSpeakSupportedVoices[i][0], 2) == 0) {
        LOGI("Found ISO2 language at index %d", i);
        langIndex = i;
        break;
      }
    }
  }

  if (langIndex < 0) {
    LOGV("TtsEngine::isLanguageAvailable called with unsupported language");
    return TTS_LANG_NOT_SUPPORTED;
  }

  if ((country == NULL) || (strlen(country) == 0)) {
    // TODO: Check whether resources are available for this language.

    if (pindex != NULL) {
      *pindex = langIndex;
    }

    LOGI("No country specified, language is available");
    return TTS_LANG_AVAILABLE;
  }

  char lang_country[10];
  sprintf(lang_country, "%s-%s", lang, country);

  // Find country
  if (strlen(country) == 3) {
    for (int i = langIndex; i < NUM_SUPPORTED_VOICES; i++) {
      if ((strcmp(lang_country, eSpeakSupportedVoices[i][1]) == 0)) {
        LOGI("Found ISO3 country at index %d", i);
        countryIndex = i;
        break;
      }
    }
  } else if (strlen(country) == 2) {
    for (int i = langIndex; i < NUM_SUPPORTED_VOICES; i++) {
      if ((strcmp(lang_country, eSpeakSupportedVoices[i][0]) == 0)) {
        LOGI("Found ISO2 country at index %d", i);
        countryIndex = i;
        break;
      }
    }
  }

  if (countryIndex < 0) {
    if (pindex != NULL) {
      *pindex = langIndex;
    }

    LOGI("No country found, language is available");
    return TTS_LANG_AVAILABLE;
  }

  if ((variant == NULL) || (strlen(variant) == 0)) {
    if (pindex != NULL) {
      *pindex = countryIndex;
    }

    LOGI("No variant specified, language and country are available");
    return TTS_LANG_COUNTRY_AVAILABLE;
  }

  char lang_country_variant[15];
  sprintf(lang_country_variant, "%s-%s-%s", lang, country, variant);

  // Find variant
  for (int i = countryIndex; i < NUM_SUPPORTED_VOICES; i++) {
    if ((strcmp(lang_country_variant, eSpeakSupportedVoices[i][1]) == 0)) {
      LOGI("Found variant at index %d", i);
      variantIndex = i;
      break;
    }
  }

  if (variantIndex < 0) {
    if (pindex != NULL) {
      *pindex = countryIndex;
    }

    LOGI("No variant found, language and country are available");
    return TTS_LANG_COUNTRY_AVAILABLE;
  }

  if (pindex != NULL) {
    *pindex = variantIndex;
  }

  LOGI("Language, country, and variant are available");
  return TTS_LANG_COUNTRY_VAR_AVAILABLE;
 }

 tts_result TtsEngine::setLanguage(const char *lang, const char *country, const char *variant) {
  LOGV("setLanguage(\"%s\", \"%s\", \"%s\")", lang, country, variant);

  // Make sure the engine is initialized!
  attemptInit();

  int index = -1;

  isLanguageSupported(lang, country, variant, &index);

  if (index < 0) {
    LOGE("setLanguage called with unsupported language");
    return TTS_FAILURE;
  }

  strcpy(currentLang, lang);
  strcpy(currentCountry, country);
  strcpy(currentVariant, variant);

  char espeakLangStr[7];
  strcpy(espeakLangStr, eSpeakSupportedVoices[index][0]);

  espeak_VOICE voice;
  memset(&voice, 0, sizeof(espeak_VOICE)); // Zero out the voice first
  voice.variant = 0;
  voice.languages = espeakLangStr;
  espeak_ERROR err = espeak_SetVoiceByProperties(&voice);
  currentLanguage = new char[strlen(lang)];
  strcpy(currentLanguage, lang);

  if (err != EE_OK) {
    LOGE("Error code %d when setting voice properties!", err);
    return TTS_FAILURE;
  }

  return TTS_SUCCESS;
 }

 tts_support_result TtsEngine::isLanguageAvailable(const char *lang, const char *country,
                                                  const char *variant) {
  return isLanguageSupported(lang, country, variant, NULL);
 }

 tts_result TtsEngine::getLanguage(char *language, char *country, char *variant) {
  strcpy(language, currentLang);
  strcpy(country, currentCountry);
  strcpy(variant, currentVariant);

  return TTS_SUCCESS;
 }

 /** setAudioFormat
 * sets the audio format to use for synthesis, returns what is actually used.
 * @encoding - reference to encoding format
 * @rate - reference to sample rate
 * @channels - reference to number of channels
 * return tts_result
 * */
 tts_result TtsEngine::setAudioFormat(tts_audio_format& encoding, uint32_t& rate, int& channels) {
  LOGE("setAudioFormat(%d, %d, %d) is unsupported", encoding, rate, channels);

  // TODO: Fix this!
  return TTS_SUCCESS;
 }

 // Sets the property with the specified value
 tts_result TtsEngine::setProperty(const char *property, const char *value, const size_t size) {
  LOGV("setProperty(\"%s\", \"%s\", %d)", property, value, size);

  /* Set a specific property for the engine.
   Supported properties include: language (locale), rate, pitch, volume.    */
  /* Sanity check */
  if (property == NULL) {
    LOGE("setProperty called with property NULL");
    return TTS_PROPERTY_UNSUPPORTED;
  }

  if (value == NULL) {
    LOGE("setProperty called with value NULL");
    return TTS_VALUE_INVALID;
  }

  espeak_ERROR result;

  if (strncmp(property, "language", 8) == 0) {
    // TODO: Set this property
    result = EE_OK;
  } else if (strncmp(property, "rate", 4) == 0) {
    int rate = atoi(value) * DEFAULT_SPEECH_RATE / 100;
    result = espeak_SetParameter(espeakRATE, rate, 0);
  } else if (strncmp(property, "pitch", 5) == 0) {
    int pitch = atoi(value);
    result = espeak_SetParameter(espeakPITCH, pitch, 0);
  } else if (strncmp(property, "volume", 6) == 0) {
    int volume = atoi(value);
    result = espeak_SetParameter(espeakVOLUME, volume, 0);
  } else {
    return TTS_PROPERTY_UNSUPPORTED;
  }

  if (result == EE_OK) {
    return TTS_SUCCESS;
  } else {
    return TTS_FAILURE;
  }
 }

 // Sets the property with the specified value
 tts_result TtsEngine::getProperty(const char *property, char *value, size_t *iosize) {
  LOGV("getProperty(\"%s\", ...)", property);

  /* Get the property for the engine.
   This property was previously set by setProperty or by default.       */
  /* sanity check */
  if (property == NULL) {
    LOGE("getProperty called with property NULL");
    return TTS_PROPERTY_UNSUPPORTED;
  }

  if (value == NULL) {
    LOGE("getProperty called with value NULL");
    return TTS_VALUE_INVALID;
  }

  if (strncmp(property, "language", 8) == 0) {
    if (currentLanguage == NULL) {
        strcpy(value, "");
    } else {
        if (*iosize < strlen(currentLanguage)+1)  {
            *iosize = strlen(currentLanguage) + 1;
            return TTS_PROPERTY_SIZE_TOO_SMALL;
        }
        strcpy(value, currentLanguage);
    }
    return TTS_SUCCESS;
  } else if (strncmp(property, "rate", 4) == 0) {
    int rate = espeak_GetParameter(espeakRATE, 1) * 100 / DEFAULT_SPEECH_RATE;
    char tmprate[4];
    sprintf(tmprate, "%d", rate);
    if (*iosize < strlen(tmprate)+1) {
        *iosize = strlen(tmprate) + 1;
        return TTS_PROPERTY_SIZE_TOO_SMALL;
    }
    strcpy(value, tmprate);
    return TTS_SUCCESS;
  } else if (strncmp(property, "pitch", 5) == 0) {
    char tmppitch[4];
    sprintf(tmppitch, "%d", espeak_GetParameter(espeakPITCH, 1));
    if (*iosize < strlen(tmppitch)+1) {
        *iosize = strlen(tmppitch) + 1;
        return TTS_PROPERTY_SIZE_TOO_SMALL;
    }
    strcpy(value, tmppitch);
    return TTS_SUCCESS;
  } else if (strncmp(property, "volume", 6) == 0) {
    char tmpvolume[4];
    sprintf(tmpvolume, "%d", espeak_GetParameter(espeakVOLUME, 1));
    if (*iosize < strlen(tmpvolume)+1) {
        *iosize = strlen(tmpvolume) + 1;
        return TTS_PROPERTY_SIZE_TOO_SMALL;
    }
    strcpy(value, tmpvolume);
    return TTS_SUCCESS;
  }

  LOGE("Unsupported property");
  return TTS_PROPERTY_UNSUPPORTED;
 }

 /** synthesizeText
 *  Synthesizes a text string.
 *  The text string could be annotated with SSML tags.
 *  @text     - text to synthesize
 *  @buffer   - buffer which will receive generated samples
 *  @bufferSize - size of buffer
 *  @userdata - pointer to user data which will be passed back to callback function
 *  return tts_result
 */
 tts_result TtsEngine::synthesizeText(const char *text, int8_t *buffer, size_t bufferSize,
                                     void *userdata) {
  LOGI("Synthesize: %s", text);

  espeak_SetSynthCallback(eSpeakCallback);

  unsigned int unique_identifier;

  espeak_Synth(text, strlen(text), 0, // position
                     POS_CHARACTER, 0, // end position (0 means no end position)
                     espeakCHARS_UTF8 | espeakSSML, // use or ignore xml tags
                     &unique_identifier, userdata);
  espeak_Synchronize();

  LOGI("Synthesis done");

  return TTS_SUCCESS;
 }

 /** stop
 *  Aborts the running synthesis.
 *  return tts_result
 */
 tts_result TtsEngine::stop() {
  espeak_Cancel();
  return TTS_SUCCESS;
 }

 #ifdef __cplusplus
 extern "C" {
 #endif

 TtsEngine* getTtsEngine() {
  return new TtsEngine();
 }

 #ifdef __cplusplus
 }
 #endif
--- a/jni/jni/com_googlecode_eyesfree_espeak_eSpeakService.cpp
+++ b/jni/jni/com_googlecode_eyesfree_espeak_eSpeakService.cpp
@@ -0,0 +1,341 @@
 /*
 * Copyright (C) 2011 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <jni.h>

 #include <speak_lib.h>
 #include <TtsEngine.h>
 #include <Log.h>

 #define LOG_TAG "eSpeakService"
 #define DEBUG false

 enum audio_channel_count {
  CHANNEL_COUNT_MONO = 1,
  CHANNEL_COUNT_STEREO = 2
 };

 enum audio_encoding {
  ENCODING_INVALID = 0x00,
  ENCODING_DEFAULT = 0x01,
  ENCODING_PCM_16BIT = 0x02,
  ENCODING_PCM_8BIT = 0x03
 };

 enum synthesis_result {
  SYNTH_CONTINUE = 0,
  SYNTH_ABORT = 1
 };

 const int DEFAULT_SAMPLE_RATE = 22050;
 const int DEFAULT_CHANNEL_COUNT = CHANNEL_COUNT_MONO;
 const int DEFAULT_AUDIO_FORMAT = ENCODING_PCM_16BIT;
 const int DEFAULT_BUFFER_SIZE = 1000;

 struct native_data_t {
  JNIEnv *env;
  jobject object;
  int sampleRate;
  int channelCount;
  int audioFormat;
  int bufferSizeInMillis;

  native_data_t() {
    env = NULL;
    object = NULL;
    sampleRate = DEFAULT_SAMPLE_RATE;
    channelCount = DEFAULT_CHANNEL_COUNT;
    audioFormat = DEFAULT_AUDIO_FORMAT;
    bufferSizeInMillis = DEFAULT_BUFFER_SIZE;
  }
 };

 jmethodID METHOD_nativeSynthCallback;
 jfieldID FIELD_mNativeData;

 static inline native_data_t *getNativeData(JNIEnv *env, jobject object) {
  return (native_data_t *) (env->GetIntField(object, FIELD_mNativeData));
 }

 /* Callback from espeak.  Should call back to the TTS API */
 static int SynthCallback(short *audioData, int numSamples,
                         espeak_EVENT *events) {
  native_data_t *nat = (native_data_t *) events->user_data;
  JNIEnv *env = nat->env;
  jobject object = nat->object;

  if (numSamples < 1) {
    env->CallVoidMethod(object, METHOD_nativeSynthCallback, NULL);
    return SYNTH_ABORT;
  } else {
    jbyteArray arrayAudioData = env->NewByteArray(numSamples * 2);
    env->SetByteArrayRegion(arrayAudioData, 0, (numSamples * 2), (jbyte *) audioData);
    env->CallVoidMethod(object, METHOD_nativeSynthCallback, arrayAudioData);
    return SYNTH_CONTINUE;
  }
 }

 #ifdef __cplusplus
 extern "C" {
 #endif /* __cplusplus */

 JNIEXPORT jint
 JNICALL JNI_OnLoad(JavaVM *vm, void *reserved) {
  JNIEnv *env;

  if (vm->GetEnv((void **) &env, JNI_VERSION_1_6) != JNI_OK) {
    LOGE("Failed to get the environment using GetEnv()");
    return -1;
  }

  return JNI_VERSION_1_6;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeClassInit(
    JNIEnv* env, jclass clazz) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  METHOD_nativeSynthCallback = env->GetMethodID(clazz, "nativeSynthCallback", "([B)V");
  FIELD_mNativeData = env->GetFieldID(clazz, "mNativeData", "I");

  return JNI_TRUE;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeCreate(
    JNIEnv *env, jobject object, jstring path) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  native_data_t *nat = new native_data_t;

  if (nat == NULL) {
    LOGE("%s: out of memory!", __FUNCTION__);
    return JNI_FALSE;
  }

  env->SetIntField(object, FIELD_mNativeData, (jint) nat);

  const char *c_path = env->GetStringUTFChars(path, NULL);

  nat->object = env->NewWeakGlobalRef(object);
  if (DEBUG) LOGV("Initializing with path %s", c_path);
  nat->sampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, nat->bufferSizeInMillis, c_path, 0);

  env->ReleaseStringUTFChars(path, c_path);

  if (nat->sampleRate > 0) {
    return JNI_TRUE;
  } else {
    return JNI_FALSE;
  }
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeDestroy(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  native_data_t *nat = getNativeData(env, object);

  env->DeleteWeakGlobalRef(nat->object);

  free(nat);

  return JNI_TRUE;
 }

 JNIEXPORT jint
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeGetSampleRate(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const native_data_t *nat = getNativeData(env, object);
  return (jint) nat->sampleRate;
 }

 JNIEXPORT jint
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeGetChannelCount(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const native_data_t *nat = getNativeData(env, object);
  return (jint) nat->channelCount;
 }

 JNIEXPORT jint
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeGetAudioFormat(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const native_data_t *nat = getNativeData(env, object);
  return (jint) nat->audioFormat;
 }

 JNIEXPORT jint
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeGetBufferSizeInMillis(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const native_data_t *nat = getNativeData(env, object);
  return (jint) nat->bufferSizeInMillis;
 }

 JNIEXPORT jobjectArray
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeGetAvailableVoices(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);

  const espeak_VOICE **voices = espeak_ListVoices(NULL);

  int count;

  // First, count the number of voices returned.
  for (count = 0; voices[count] != NULL; count++);

  // Next, create a Java String array.
  jobjectArray voicesArray = (jobjectArray) env->NewObjectArray(
      count * 4, env->FindClass("java/lang/String"), NULL);

  const espeak_VOICE *v;
  char gender_buf[12];
  char age_buf[12];

  // Finally, populate the array.
  for (int i = 0, voicesIndex = 0; (v = voices[i]) != NULL; i++) {
    const char *lang_name = v->languages + 1;
    const char *identifier = v->identifier;
    sprintf(gender_buf, "%d", v->gender);
    sprintf(age_buf, "%d", v->age);

    env->SetObjectArrayElement(
        voicesArray, voicesIndex++, env->NewStringUTF(lang_name));
    env->SetObjectArrayElement(
        voicesArray, voicesIndex++, env->NewStringUTF(identifier));
    env->SetObjectArrayElement(
        voicesArray, voicesIndex++, env->NewStringUTF(gender_buf));
    env->SetObjectArrayElement(
        voicesArray, voicesIndex++, env->NewStringUTF(age_buf));
  }

  return voicesArray;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeSetVoiceByProperties(
    JNIEnv *env, jobject object, jstring name, jstring languages, jint gender, jint age,
    jint variant) {
  if (DEBUG) LOGV("%s", __FUNCTION__);

  const char *c_name = env->GetStringUTFChars(name, NULL);
  const char *c_languages = env->GetStringUTFChars(languages, NULL);

  espeak_VOICE voice_select;
  memset(&voice_select, 0, sizeof(espeak_VOICE));

  voice_select.name = c_name;
  voice_select.languages = c_languages;
  voice_select.age = (int) age;
  voice_select.gender = (int) gender;
  voice_select.variant = (int) variant;

  const espeak_ERROR result = espeak_SetVoiceByProperties(&voice_select);

  env->ReleaseStringUTFChars(name, c_name);
  env->ReleaseStringUTFChars(languages, c_languages);

  if (result == EE_OK)
    return JNI_TRUE;
  else
    return JNI_FALSE;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeSetLanguage(
    JNIEnv *env, jobject object, jstring language, jint variant) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const char *c_language = env->GetStringUTFChars(language, NULL);
  const int len = strlen(c_language);
  char *lang_copy = (char *) calloc(len, sizeof(char));
  strcpy(lang_copy, c_language);
  env->ReleaseStringUTFChars(language, c_language);

  espeak_VOICE voice;
  memset(&voice, 0, sizeof(espeak_VOICE));  // Zero out the voice first
  voice.languages = lang_copy;
  voice.variant = (int) variant;
  const espeak_ERROR result = espeak_SetVoiceByProperties(&voice);

  if (result == EE_OK)
    return JNI_TRUE;
  else
    return JNI_FALSE;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeSetRate(
    JNIEnv *env, jobject object, jint rate) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const espeak_ERROR result = espeak_SetParameter(espeakRATE, (int) rate, 0);

  if (result == EE_OK)
    return JNI_TRUE;
  else
    return JNI_FALSE;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeSetPitch(
    JNIEnv *env, jobject object, jint pitch) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  const espeak_ERROR result = espeak_SetParameter(espeakPITCH, (int) pitch, 0);

  if (result == EE_OK)
    return JNI_TRUE;
  else
    return JNI_FALSE;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeSynthesize(
    JNIEnv *env, jobject object, jstring text) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  native_data_t *nat = getNativeData(env, object);
  const char *c_text = env->GetStringUTFChars(text, NULL);
  unsigned int unique_identifier;

  nat->env = env;

  espeak_SetSynthCallback(SynthCallback);
  espeak_Synth(c_text, strlen(c_text), 0,  // position
               POS_CHARACTER, 0,  // end position (0 means no end position)
               espeakCHARS_UTF8 | espeakSSML, // use or ignore xml tags
               &unique_identifier, nat);
  espeak_Synchronize();

  env->ReleaseStringUTFChars(text, c_text);

  return JNI_TRUE;
 }

 JNIEXPORT jboolean
 JNICALL Java_com_googlecode_eyesfree_espeak_SpeechSynthesis_nativeStop(
    JNIEnv *env, jobject object) {
  if (DEBUG) LOGV("%s", __FUNCTION__);
  espeak_Cancel();

  return JNI_TRUE;
 }

 #ifdef __cplusplus
 }
 #endif /* __cplusplus */