Browse Source

Prepare Arabic letters using PrepareLetters() function

PrepareLetters() function also could be used to set up letter groups A, B, C, etc. from string
and further could be loaded from configuration file
master
Valdis Vitolins 6 years ago
parent
commit
1cf7549392
1 changed files with 64 additions and 0 deletions
  1. 64
    0
      src/libespeak-ng/tr_languages.c

+ 64
- 0
src/libespeak-ng/tr_languages.c View File

@@ -113,6 +113,8 @@ ALPHABET *AlphabetFromChar(int c)

static void Translator_Russian(Translator *tr);

static void PrepareLetters(char *, char *, int, int);

static void ResetLetterBits(Translator *tr, int groups)
{
// Clear all the specified groups
@@ -391,6 +393,38 @@ static const unsigned char ru_consonants[] = { // б в г д ж з й к л м
0x11, 0x12, 0x13, 0x14, 0x16, 0x17, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1f, 0x20, 0x21, 0x22, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x73, 0x7b, 0x83, 0x9b, 0
};

static void SetArabicLetters(Translator *tr)
{
const char ar_vowel_letters[] = {"َ ُ ِ"};
const char ar_consonant_letters[] = {"ب پ ت ة ث ج ح خ د ذ ر ز س ش ص ض ط ظ ع غ ف ق ك ل م ن ئ ؤ ء أ آ إ ه"};
const char ar_consonant_vowel_letters[] = {"ا و ي"};
const char ar_thick_letters[] = {"ص ض ط ظ ق"};
const char ar_shadda_letter[] = {" ّ "};
const char ar_hamza_letter[] = {" ّ "};
const char ar_sukun_letter[] = {" ّ "};
static char ar_vowel_codes[4];
static char ar_consonant_codes[34];
static char ar_consonant_vowel_codes[4];
static char ar_thick_codes[6];
static char ar_shadda_code[2];
static char ar_hamza_code[2];
static char ar_sukun_code[2];
PrepareLetters(&ar_vowel_letters, &ar_vowel_codes, 4, OFFSET_ARABIC);
PrepareLetters(&ar_consonant_letters, &ar_consonant_codes, 34, OFFSET_ARABIC);
PrepareLetters(&ar_consonant_vowel_letters, &ar_consonant_vowel_codes, 4, OFFSET_ARABIC);
PrepareLetters(&ar_thick_letters, &ar_thick_codes, 6, OFFSET_ARABIC);
PrepareLetters(&ar_shadda_letter, &ar_shadda_code, 2, OFFSET_ARABIC);
PrepareLetters(&ar_hamza_letter, &ar_hamza_code, 2, OFFSET_ARABIC);
PrepareLetters(&ar_sukun_letter, &ar_sukun_code, 2, OFFSET_ARABIC);
SetLetterBits(tr, LETTERGP_A, (char *) ar_vowel_codes);
SetLetterBits(tr, LETTERGP_B, (char *) ar_consonant_vowel_codes);
SetLetterBits(tr, LETTERGP_C, (char *) ar_consonant_codes);
SetLetterBits(tr, LETTERGP_F, (char *) ar_thick_codes);
SetLetterBits(tr, LETTERGP_G, (char *) ar_shadda_code);
SetLetterBits(tr, LETTERGP_H, (char *) ar_hamza_code);
SetLetterBits(tr, LETTERGP_Y, (char *) ar_sukun_code);
}

static void SetCyrillicLetters(Translator *tr)
{
// character codes offset by 0x420
@@ -510,6 +544,7 @@ Translator *SelectTranslator(const char *name)
tr->langopts.numbers = NUM_SWAP_TENS | NUM_AND_UNITS | NUM_HUNDRED_AND | NUM_OMIT_1_HUNDRED | NUM_AND_HUNDRED | NUM_THOUSAND_AND | NUM_OMIT_1_THOUSAND;
tr->langopts.param[LOPT_UNPRONOUNCABLE] = 1; // disable check for unpronouncable words
tr->encoding = ESPEAKNG_ENCODING_ISO_8859_6;
SetArabicLetters(tr);
break;
case L('b', 'g'): // Bulgarian
{
@@ -1585,3 +1620,32 @@ static void Translator_Russian(Translator *tr)
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_OMIT_1_HUNDRED;
tr->langopts.numbers2 = 0x2 + NUM2_THOUSANDS_VAR1; // variant numbers before thousands
}

static void PrepareLetters(char *letters, char *codes, int size, int shift)
{
/* Prepare array of shifted letter codes for letter groups from passed string.
* letters: pointer to string of UTF-8 encoded letters (can be space delimited).
* codes: pointer to array of letter codes.
* size: size of reserved cells in codes array, (last cell in codes should be leaved for null value).
* shift: value of downshift, to fit UTF-16 letters into ANSII (char) range.
*/
unsigned char *p = letters;
int bytes = 0;
int code = -1;
int count = 0;
while (code != 0) {
bytes = utf8_in(&code, p);
if (code > 0x20) {
*codes = code - shift;
codes++;
count++;
}
p += bytes;
}
codes++;
*codes = 0;
if (size != count + 1)
fprintf(stderr,
"PrepareLetters() error: different sizes of letter arrays reserved: %d, used:%d.\n",
size, count + 1);
}

Loading…
Cancel
Save