Browse Source

Merge branch 'test'

master
Valdis Vitolins 8 years ago
parent
commit
fd4b397ad7
2 changed files with 39 additions and 36 deletions
  1. 13
    4
      dictsource/lv_rules
  2. 26
    32
      src/libespeak-ng/dictionary.c

+ 13
- 4
dictsource/lv_rules View File



.replace .replace
ó ȯ // replace o-acute with o-dot, as it is more logical for "short o" ó ȯ // replace o-acute with o-dot, as it is more logical for "short o"
ḩ h
// +---------------------------+ // +---------------------------+
// | Suffixes of nouns | // | Suffixes of nouns |
// +---------------------------+ // +---------------------------+


.group c .group c
c ts c ts
L55) c (L55 ts>
c (C ts_! c (C ts_!


.group č .group č
č tS č tS
L55) č (L55 tS>


.group d .group d
d d d d
ei (+ ei ei (+ ei


// narrow e ---------------------------------------------------------------------- // // narrow e ---------------------------------------------------------------------- //
a) e (ro e // internationalsms
a) e (ro e // internationalisms
e (o e e (o e
// international consonants // international consonants
e (JL41+ e e (JL41+ e


.group f .group f
f f f f
L55) f (L55 f>


.group g .group g
g g g g


.group h .group h
h h h h
L55) h (L55 h>


.group i .group i
i i i i


.group k .group k
k k k k
L55) k (L55 k>
k (ļ k} // tiny pause between k and ļ k (ļ k} // tiny pause between k and ļ


.group ķ .group ķ
ķ c ķ c
L55) ķ (L55 c>
ķ (L55 c} // tiny pause for sharper sound before short vowels ķ (L55 c} // tiny pause for sharper sound before short vowels


.group l .group l


.group p .group p
p p p p
// pus..number stress on next syllable
_) pus (vien p%us
L55) p (L55 p>
_) pus (vien p%us // pus..number stress on next syllable
_) pus (div p%us _) pus (div p%us
_) pus (otr p%us _) pus (otr p%us
_) pus (trij p%us _) pus (trij p%us
sh s_!h_! // probably no need to make distinct in other places sh s_!h_! // probably no need to make distinct in other places
_) s (L61 s_ // make s distinct at start of the word before unvocied consonants _) s (L61 s_ // make s distinct at start of the word before unvocied consonants
s s s s
L55) s (L55 s>


.group š .group š
š S š S
L55) š (L55 S>


.group t .group t
t t t t
L55) t (L55 t>


.group u .group u
ui ui ui ui

+ 26
- 32
src/libespeak-ng/dictionary.c View File

static int IsLetterGroup(Translator *tr, char *word, int group, int pre) static int IsLetterGroup(Translator *tr, char *word, int group, int pre)
{ {
/* Match the word against a list of utf-8 strings. /* Match the word against a list of utf-8 strings.
* returns length of matching letter group or -1
* *
* How this works: * How this works:
* *
} else } else
w = word; w = word;


// If no character is allowed in group
// at the start (for pre-rule) or end (post-rule)
// of the checked letter in the word, return true.
if (*p == '~' && *w == ' ') // word end checked because of comment below
return 1;
/* TODO: Need to investigate why word end mark _ doesn't work properly
* for post rule somewhere in MatchRule() function. or e.g.:
*
* .L01 ~ b c
* .group a
* _L01) a i // this works
* a (L01_ u // this doesn't work
*/
// If '~' (no character) is allowed in group, return 0.
if (*p == '~')
return 0;


// Check current group
while ((*p == *w) && (*w != 0)) { while ((*p == *w) && (*w != 0)) {
w++; w++;
p++; p++;
while (*p++ != 0) while (*p++ != 0)
; ;
} }
return 0;
// Not found
return -1;
} }


static int IsLetter(Translator *tr, int letter, int group) static int IsLetter(Translator *tr, int letter, int group)
break; break;
case RULE_LETTERGP2: // match against a list of utf-8 strings case RULE_LETTERGP2: // match against a list of utf-8 strings
letter_group = LetterGroupNo(rule++); letter_group = LetterGroupNo(rule++);
if ((n_bytes = IsLetterGroup(tr, post_ptr-1, letter_group, 0)) > 0) {
if ((n_bytes = IsLetterGroup(tr, post_ptr-1, letter_group, 0)) >= 0) {
add_points = (20-distance_right); add_points = (20-distance_right);
post_ptr += (n_bytes-1);
if (n_bytes > 0) // move pointer, if non-zero length group was found
post_ptr += (n_bytes-1);
} else } else
failed = 1; failed = 1;
break; break;
case RULE_SKIPCHARS: case RULE_SKIPCHARS:
{ {
// '(Jxy' means 'skip characters until xy' // '(Jxy' means 'skip characters until xy'
char *p = post_ptr + letter_xbytes;
char *p2 = p; // pointer to the previous character in the word
int rule_w; // first wide character of skip rule
char *p = post_ptr - 1; // to allow empty jump (without letter between), go one back
char *p2 = p; // pointer to the previous character in the word
int rule_w; // first wide character of skip rule
utf8_in(&rule_w, rule); utf8_in(&rule_w, rule);
int g_bytes = 0; // bytes of successfully found character group
while ((letter_w != rule_w) && (letter_w != RULE_SPACE) && (letter_w != 0) && (g_bytes == 0)) {
int g_bytes = -1; // bytes of successfully found character group
while ((letter_w != rule_w) && (letter_w != RULE_SPACE) && (letter_w != 0) && (g_bytes == -1)) {
if (rule_w == RULE_LETTERGP2)
g_bytes = IsLetterGroup(tr, p, LetterGroupNo(rule + 1), 0);
p2 = p; p2 = p;
p += utf8_in(&letter_w, p); p += utf8_in(&letter_w, p);
if (rule_w == RULE_LETTERGP2)
g_bytes = IsLetterGroup(tr, p2, LetterGroupNo(rule + 1), 0);

} }
if ((letter_w == rule_w) || (g_bytes > 0))
if ((letter_w == rule_w) || (g_bytes >= 0))
post_ptr = p2; post_ptr = p2;
} }
break; break;
break; break;
case RULE_LETTERGP2: // match against a list of utf-8 strings case RULE_LETTERGP2: // match against a list of utf-8 strings
letter_group = LetterGroupNo(rule++); letter_group = LetterGroupNo(rule++);
if ((n_bytes = IsLetterGroup(tr, pre_ptr, letter_group, 1)) > 0) {
if ((n_bytes = IsLetterGroup(tr, pre_ptr, letter_group, 1)) >= 0) {
add_points = (20-distance_right); add_points = (20-distance_right);
pre_ptr -= (n_bytes-1);
if (n_bytes > 0) // move pointer, if non-zero length group was found
pre_ptr -= (n_bytes-1);
} else } else
failed = 1; failed = 1;
break; break;


case RULE_SKIPCHARS: { case RULE_SKIPCHARS: {
// 'xyJ)' means 'skip characters backwards until xy' // 'xyJ)' means 'skip characters backwards until xy'
char *p = pre_ptr; // pointer to current character in word
char *p2 = p; // pointer to previous character in word
int g_bytes = 0; // bytes of successfully found character group
char *p = pre_ptr + 1; // to allow empty jump (without letter between), go one forward
char *p2 = p; // pointer to previous character in word
int g_bytes = -1; // bytes of successfully found character group


while ((*p != *rule) && (*p != RULE_SPACE) && (*p != 0) && (g_bytes == 0)) {
while ((*p != *rule) && (*p != RULE_SPACE) && (*p != 0) && (g_bytes == -1)) {
p2 = p; p2 = p;
p--; p--;
if (*rule == RULE_LETTERGP2) if (*rule == RULE_LETTERGP2)
// 'xy' part is checked as usual in following cycles of PRE rule characters // 'xy' part is checked as usual in following cycles of PRE rule characters
if (*p == *rule) if (*p == *rule)
pre_ptr = p2; pre_ptr = p2;
if (g_bytes > 0)
if (g_bytes >= 0)
pre_ptr = p2 + 1; pre_ptr = p2 + 1;


} }

Loading…
Cancel
Save