Browse Source

smj: Support for new language Lule Saami

Co-authored-by: Sebastien Le Maguer <[email protected]>
master
katrihiovain 4 years ago
parent
commit
93d3c67df6

+ 1
- 0
CHANGELOG.md View File

@@ -58,6 +58,7 @@ new languages:
* piqd (Klingon) -- Valdis Vitolins
* qu (Quechua) -- Valdis Vitolins
* qdb (Lang Belta) -- Da Def, Valdis Vitolins
* smj (Lule Saami) -- Katri Hiovain, Sebastien Le Maguer
* th (Thai) -- Valdis Vitolins
* tk (Turkmen) -- boracasli98, Valdis Vitolins
* ug (Uyghur) -- boracasli98, Valdis Vitolins

+ 5
- 0
Makefile.am View File

@@ -383,6 +383,7 @@ phsource/phonemes.stamp: \
phsource/ph_latvian \
phsource/ph_lithuanian \
phsource/ph_lojban \
phsource/ph_lule_saami \
phsource/ph_macedonian \
phsource/ph_malayalam \
phsource/ph_maltese \
@@ -578,6 +579,7 @@ dictionaries: \
espeak-ng-data/si_dict \
espeak-ng-data/sk_dict \
espeak-ng-data/sl_dict \
espeak-ng-data/smj_dict \
espeak-ng-data/sq_dict \
espeak-ng-data/sr_dict \
espeak-ng-data/sv_dict \
@@ -870,6 +872,9 @@ espeak-ng-data/sk_dict: dictsource/sk_list dictsource/sk_rules dictsource/sk_ext
sl: espeak-ng-data/sl_dict
espeak-ng-data/sl_dict: dictsource/sl_list dictsource/sl_rules dictsource/sl_extra dictsource/sl_emoji

smj: espeak-ng-data/smj_dict
espeak-ng-data/smj_dict: dictsource/smj_list dictsource/smj_rules

sq: espeak-ng-data/sq_dict
espeak-ng-data/sq_dict: dictsource/sq_list dictsource/sq_rules dictsource/sq_extra dictsource/sq_emoji


+ 8637
- 0
dictsource/smj_list
File diff suppressed because it is too large
View File


+ 282
- 0
dictsource/smj_rules View File

@@ -0,0 +1,282 @@
// SMJ translation rules
// This file is UTF-8 encoded

// _) a // "a" at the start of a word
// a (CC // "a" followed by two consonants
// a (C% // "a" followed by a double consonant (the same letter twice)
// a (/% // "a" followed by a percent sign
// %C) a // "a" preceded by a double consonants

// @@) bi // "bi" preceded by at least two syllables
// @@a) bi // "bi" preceded by at least 2 syllables and following 'a'

// START SMJ HERE

// groups

//.L01 i j // used in palatalization rules

.L01 b d g k p t // Plosives

.L02 r l j v // Sonorants

//.L02 b d f g h j l m n r s v

//.L03 l m n r

//.L04 a b c d e f g h i j k l m n o p q r s t u v õ ä ö ü

//.L05 a e i l m n o r u õ ä ö ü


// Epenthetic vowels in Q3 consonant clusters
// r l j v followed by homorganic consonants (geminates)
// barggo, dárbbo, tjoarvve, girjje, álggo, galbba, gålmmå, dálvve, nijbbe, bievdde

// separate rules for all cons. clusters and vowel nucleai?
// a (L02%C // possible to generalize?
// A) rgg r@^g // Any vowel + schwaa - more general?

//Orthographically homonymous Q3: f, l, m, n, nj, ń, ñ, ŋ, r, s, sj, v
// After numeral 1 -- Q3; after any numeral <1 -- Q2?
// akta @) ff ff:
// Word order? -- Impossible to have a syntactic disambiguator in Espeak to recognize homonymous sg.nom & sg.gen forms...
// OR if adjusted orthography would be used, e.g.: f'f/fff ff:
// Vowel compensation/readjustment: V1 and V2 longest in Q1, gradually decreasing in length, but differences very small/not significant.




.group a
// a A
a (@ A // back vowel
@) a a:
artihkal ar.t'ihkAl
%CC) a A // short after Q3 consonant/cluster

.group á
%CC) á a // short after Q3 consonant/cluster
á (@ A: //
@) á a: // in 2nd syll
C) á A: // after Q1 (single) consonant
.group ä
ä (@ &
ä &: //
// ä i& // in Sweden
ä e& // in Sweden
.group æ // same as ä
æ (@ &
æ &: //
// æ i& // in Sweden
æ e& // in Sweden
%CC) æ & // short after Q3 consonant/cluster
.group å
å O
@) å O: // in 2nd syll
%CC) å O // short after Q3 consonant/cluster
.group b
b p
A) b (A b // voiced in an intervocalic position
bb b: // voiced within a geminate
bussa p'u-s:A // Scandinavian loanword
.group c
c ts
.group d
d t
dal dAl
A) d (A d // voiced in an intervocalic position
dd d:
dj c // palatal plosive
A) dj (A J\ // voiced palatal plosive between vowels
A) ddj (A J\: // voiced palatal plosive between vowels
dts dz // [d͡z]
dtj dZ // [d͡ʒ]
.group e
e (@ E: // open-mid
e (_ iE // open 2nd syll
@) e iE // in 2nd syll
e E // other environments
.group f
f f
ff f:
akta @) ff (A ff: // (Q3/sg.nom when with numeral 1)
.group g
g k
A) g (A g // voiced in an intervocalic position
gg g:
.group h
h h
hk hk
hkk k: // [h] not audible in Q3 cons. cluster
hp hp
hpp p: // [h] not audible in Q3 cons. cluster
ht ht
htt t: // [h] not audible in Q3 cons. cluster
hts hts
htts h:ts
htj htS
httj ht:S
.group i
i i: // can be long as well?
ie (@ iE
%CC) i i // short after Q3 consonant
.group j
j j
ja ja
i) jbb jIb
journalissta Sur.nal'isstA

.group k
k k
k (á kh // Aspirated in Scandinavian loanwords, like this?
káffa kh'A:f:A
kásus kh'A:su-s
kássa kh'A:S:A
kk k:
.group l
l l // Can be a copula itself
ll l:
akta @) ll (A ll: // (Q3/sg.nom when with numeral 1)
á) lgg lAg // Q3
a) lbb lab // Q3
u) lpp lub // Q3
á) lmm lAm // Q3
á) lvv lAv // Q3
.group m
m m
mm m:
akta @) mm (A mm: // (Q3/sg.nom when with numeral 1)
.group n
n n
nj n_j
nnj n_j:
akta @) nnj (A nn_j: // (Q3/sg.nom when with numeral 1)
.group ñ
ñ N
ññ N:
akta @) ññ (A NN: // (Q3/sg.nom when with numeral 1)
.group ń
ń N
ńń N:
akta @) ńń (A NN: // (Q3/sg.nom when with numeral 1)
.group ŋ
ŋ N
ŋŋ N:
akta @) ŋŋ (A NN: // (Q3/sg.nom when with numeral 1)
.group o
o oO
@) o uo
oa ua //
oa (@ oA //
.group ö
%CC) ö Y
ö Y: //
.group ø
%CC) ø Y
ø Y: //
.group p
p p
p (á ph // Aspirated in Scandinavian loanwords
pp p:
parlamænnta par.lAm'&ntA

.group r
r r
rd rt
rl rl
// rl l. // Retroflex. in Scandinavian loanwords
rn rn
// rn n. // Retroflex. in Scandinavian loanwords
rt rt
// rt t. // Retroflex. in Scandinavian loanwords

a) rgg rag // Q3
e) rgg reg // Q3
æ) rgg r&g // Q3
a) rvv rav // Q3
á) rbb rAb // Q3
i) rjj rij // Q3
u) rŋŋ ruN // Q3
rr r:
akta @) rr (A rr: // (Q3/sg.nom when with numeral 1)

.group s
s s
ss s:
akta @) ss (A ss: // (Q3/sg.nom when with numeral 1)
sj S
ssj S: // Q2 or Q3
akta @) ssj (A SS: // (Q3/sg.nom when with numeral 1)
.group t
t t
t (á th // Aspirated in Scandinavian loanwords
t (_ ht // at the end of a word
tt t:
tj tS // voiceless alveo-palatal fricative in Sweden
ts ts // voiceless alveo-palatal fricative in Sweden e.g. <subtsas>
tts ts:

.group u
u u
uo (@ uo
// u u: //
// u U- //
// u u- // in newish Scand. loanwords like <komuvnna>

// List some of most common scand. loanwords?

.group v
v v
vv v:
L01) v f // before a plosive
v (_ P // in the end of a word, e.g. 1sg suffix

e) vdd ved // Q3
i) vdd vid // Q3
å) vdd vOd // Q3
akta @) vv (A vv: // (Q3/sg.nom when with numeral 1)

.group w
w v

.group x
x ks
xx k:s
.group y
y y

.group z
z ts
zz t:s // pizza
K) zh Z


+ 4
- 0
espeak-ng-data/lang/urj/smj View File

@@ -0,0 +1,4 @@
name Lule Saami
language smj

status testing

+ 427
- 0
phsource/ph_lule_saami View File

@@ -0,0 +1,427 @@

//====================================================
// Lule Saami
//====================================================

phoneme : // Lengthen previous vowel by "length"
virtual
length 160
endphoneme

phoneme @ // close schwa
vwl starttype #@ endtype #@
unstressed
length 120
FMT(vowel/@_4)
endphoneme

phoneme i
vwl starttype #i endtype #i
length 140
IfNextVowelAppend(;)
ChangeIfUnstressed(I)
FMT(vowel/i_4)
endphoneme

phoneme I
vwl starttype #i endtype #i
length 140
IfNextVowelAppend(;)
FMT(vowel/ii_3)
endphoneme

phoneme e
vwl starttype #e endtype #e
length 150
FMT(vowel/e_mid2)
endphoneme

phoneme E
vwl starttype #e endtype #e
length 160
FMT(vowel/ee_4)
endphoneme

phoneme &
vwl starttype #a endtype #a
length 150
FMT(vowel/&)
endphoneme

phoneme a
vwl starttype #a endtype #a
length 170
// ChangeIfUnstressed(a2)
FMT(vowel/aa_9)
endphoneme

phoneme a2
vwl starttype #a endtype #a
length 170
FMT(vowel/aa#)
endphoneme

phoneme A
vwl starttype #a endtype #a
length 170
FMT(vowel/aa_8)
endphoneme

phoneme o
vwl starttype #o endtype #o
length 155
FMT(vowel/oo_1)
endphoneme

phoneme O
vwl starttype #o endtype #o
length 150
FMT(vowel/o_mid)
endphoneme

phoneme Y
vwl starttype #@ endtype #@
length 155
FMT(vowel/y#)
endphoneme


phoneme u
vwl starttype #u endtype #u
length 140
FMT(vowel/u_bck)
endphoneme

phoneme u1 // TEST
vwl starttype #u endtype #u
length 140
FMT(vowel/u_2)
endphoneme

phoneme U // TEST
vwl starttype #u endtype #u
length 140
FMT(vowel/uu_4)
endphoneme

phoneme u-
vwl starttype #u endtype #u
ipa ʉ
length 200
FMT(vowel/u#_2)
endphoneme

phoneme y
vwl starttype #u endtype #u
length 150
FMT(vowel/y)
endphoneme

phoneme W
vwl starttype #@ endtype #@
length 120
FMT(vowel/oe_2)
endphoneme

phoneme w
liquid
lengthmod 7
starttype #u
IF nextPh(isVowel) THEN
NextVowelStarts
VowelStart(w/w@)
VowelStart(w/wa)
VowelStart(w/we)
VowelStart(w/wi)
VowelStart(w/wo)
VowelStart(w/wu)
EndSwitch

VowelEnding(w/xw, -30)

IF prevPhW(isNasal) THEN
FMT(w/w)
ELSEIF prevPhW(h) THEN
// none, [hw]
ELSE
FMT(w/_w)
ENDIF
ELSE
// no vowel follows
Vowelout len=50
IF prevPh(#i) THEN
FMT(w/iw_)
ENDIF
FMT(w/w_)
ENDIF
endphoneme


// Diphthongs
//===========

phoneme ai
vwl starttype #a endtype #i
length 310
lng
FMT(vdiph/aai_3)
endphoneme

phoneme &i
vwl starttype #a endtype #i
length 290
lng
FMT(vdiph/&i)
endphoneme

phoneme oi
vwl starttype #o endtype #i
length 280
lng
FMT(vdiph/ooi)
endphoneme

phoneme ei
vwl starttype #e endtype #i
length 280
lng
FMT(vdiph/eei_2)
endphoneme

phoneme Yi
vwl starttype #@ endtype #i
length 270
lng
FMT(vdiph/y#i)
endphoneme

phoneme ui
vwl starttype #u endtype #i
length 270
lng
FMT(vdiph/ui_4)
endphoneme

phoneme yi
vwl starttype #u endtype #i
length 270
lng
FMT(vdiph/yi)
endphoneme


phoneme au
vwl starttype #a endtype #u
length 300
lng
FMT(vdiph/aau)
endphoneme

phoneme ou
vwl starttype #o endtype #u
length 280
lng
FMT(vdiph/ou)
endphoneme

phoneme eu
vwl starttype #e endtype #u
length 280
lng
FMT(vdiph/eu_2)
endphoneme

phoneme iu
vwl starttype #i endtype #u
length 280
lng
FMT(vdiph2/iu_2)
endphoneme


phoneme &y
vwl starttype #a endtype #u
length 280
lng
FMT(vdiph/&y)
endphoneme

phoneme ey
vwl starttype #e endtype #u
length 280
lng
FMT(vdiph/eey)
endphoneme

phoneme Yy
vwl starttype #@ endtype #u
length 270
lng
FMT(vdiph/y#y)
endphoneme

phoneme iy
vwl starttype #i endtype #u
length 260
lng
FMT(vdiph2/iy)
endphoneme


phoneme uo
vwl starttype #u endtype #o
length 280
lng
FMT(vdiph2/uo)
endphoneme

phoneme ie
vwl starttype #i endtype #e
length 280
lng
FMT(vdiph2/ie)
endphoneme

phoneme yY
vwl starttype #u endtype #@
length 280
lng
FMT(vdiph2/y-y#)
endphoneme




// CONSONANTS
//===========



phoneme r
import_phoneme base1/R
endphoneme

phoneme r. // retroflex
liquid rfx
lengthmod 3
Vowelout f1=3 f2=1400 -400 300 f3=-400 80 rms=35 len=15 colr=2
Vowelin f1=2 f2=1400 -400 300 f3=-400 80 len=20
IF nextPh(isNotVowel) THEN
FMT(r3/@tap_rfx_)
ENDIF
FMT(r3/@tap_rfx)
endphoneme


phoneme R2 // this is [R2], but shorter
liquid trl
lengthmod 6
ipa r
Vowelin f1=0 f2=1700 -300 300 f3=-300 80
Vowelout f1=2 f2=1700 -300 300 f3=-300 80 brk
FMT(r3/r_trill_short) addWav(r3/r_trill.wav, 50)
endphoneme

phoneme P
vls blb stp
lengthmod 2
voicingswitch b
Vowelin f1=0 f2=1000 -50 -100 f3=-200 80 gpaus
Vowelout f1=0 f2=1000 -500 -350 f3=-300 80 rms=30
WAV(ustop/p_unasp)
endphoneme



phoneme v // approximant, not fricative
vcd lbd frc
lengthmod 6
voicingswitch f
Vowelin f1=0 f2=1000 -300 -200 f3=-300 100
Vowelout f1=0 f2=1000 -500 -300 f3=-300 60 len=50
IF nextPh(isPause2) THEN
FMT(voc/v_)
ENDIF
FMT(voc/v)
endphoneme


phoneme t // dental variant of /t/
import_phoneme base1/t[
voicingswitch d
endphoneme



phoneme d
vcd dnt stp
voicingswitch t
lengthmod 5
Vowelin f1=2 f2=1700 -100 200 f3=0 80
Vowelout f1=2 f2=1700 -100 200 f3=0 80 rms=20

IF PreVoicing THEN
FMT(d/xd)
ENDIF

IF nextPh(isPause2) THEN
FMT(d/d_) addWav(x/d_)
ELIF nextPh(r) THEN
FMT(d/dr) addWav(x/d_dnt, 60)
ENDIF

FMT(d/d) addWav(x/d_dnt, 60)
endphoneme


phoneme n
vcd alv nas
lengthmod 4
ipa n
IF nextPhW(isVelar) THEN // /n/ is velarized to /N/ when before velar stops
ChangePhoneme(N)
ENDIF
CALL base1/n
endphoneme

phoneme p
import_phoneme base2/p
voicingswitch b
endphoneme



phoneme k
import_phoneme base2/k
voicingswitch g
endphoneme


phoneme s2 // second part of long [s]
import_phoneme consonants/s2
endphoneme


phoneme h
vls glt apr
lengthmod 3

IF nextPh(#@) THEN
WAV(h/h@)
ELIF nextPh(#a) THEN
WAV(h/ha)
ELIF nextPh(#e) THEN
WAV(h/he)
ELIF nextPh(#i) THEN
WAV(h/hi)
ELIF nextPh(#o) THEN
WAV(h/hu_fi, 70)
ELIF nextPh(#u) THEN
WAV(h/hu_fi, 80)
ENDIF
WAV(h/h_, 70) // no vowel following
endphoneme



+ 3
- 2
phsource/phonemes View File

@@ -602,7 +602,6 @@ endphoneme




phoneme w
liquid
lengthmod 7
@@ -639,7 +638,6 @@ endphoneme




phoneme j // **y**es
liquid pal starttype #i
lengthmod 7
@@ -1710,6 +1708,9 @@ include ph_farsi
phonemetable fi base1
include ph_finnish

phonemetable smj base1
include ph_lule_saami

phonemetable fr base1
include ph_french


+ 30
- 0
phsource/vowelcharts/smj View File

@@ -0,0 +1,30 @@
a 0 712 1160 2780 696 1160 2780
e 0 504 1900 2540 504 1900 2540
i 0 312 2300 2820 304 2300 2820
o 0 512 860 2648 512 860 2648
u 0 320 740 2400 320 740 2400
I 0 360 2120 2680 360 2120 2660
& 0 736 1540 2500 752 1540 2500
a2 0 657 1100 2703 657 1120 2703
Y 0 448 1520 2420 463 1520 2400
u1 0 344 800 2360 344 800 2360
U 0 368 800 2660 368 800 2660
y 0 280 1720 2340 280 1720 2340
ai 0 688 1064 2806 312 2300 2820
&i 0 744 1540 2500 328 2280 2720
oi 0 545 780 2720 310 2002 2500
ei 0 547 1855 2536 320 2300 2740
Yi 0 448 1520 2420 355 2222 2720
ui 0 320 740 2400 272 2247 3100
yi 0 280 1720 2320 296 2280 2780
au 0 688 1060 2580 368 900 2540
ou 0 516 860 2648 312 860 2360
eu 0 460 1929 2512 327 947 2367
iu 0 296 2180 2820 320 920 2360
&y 0 744 1520 2500 296 1720 2340
ey 0 547 1855 2536 280 1720 2340
Yy 0 448 1520 2420 280 1720 2340
iy 0 280 2280 2820 280 1720 2340
uo 0 344 896 2375 494 864 2468
ie 0 280 2280 2820 728 1562 2520
yY 0 280 1720 2340 448 1520 2420

+ 22
- 1
src/libespeak-ng/tr_languages.c View File

@@ -811,7 +811,8 @@ Translator *SelectTranslator(const char *name)
tr->langopts.param[LOPT_IT_DOUBLING] = 1;
tr->langopts.long_stop = 130;

tr->langopts.numbers = NUM_DECIMAL_COMMA + NUM_ALLOW_SPACE;
tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2 | NUM_ORDINAL_DOT;
// tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_ORDINAL_DOT;
SetLetterVowel(tr, 'y');
tr->langopts.spelling_stress = 1;
tr->langopts.intonation_group = 3; // less intonation, don't raise pitch at comma
@@ -1400,6 +1401,26 @@ Translator *SelectTranslator(const char *name)
tr->langopts.numbers2 = NUM2_THOUSANDS_VAR4;
tr->langopts.thousands_sep = ' '; // don't allow dot as thousands separator
break;
case L3('s', 'm', 'j'): // Lule Saami
{
static const unsigned char stress_amps_fi[8] = { 18, 16, 22, 22, 20, 22, 22, 22 };
static const short stress_lengths_fi[8] = { 150, 180, 200, 200, 0, 0, 210, 250 };

SetupTranslator(tr, stress_lengths_fi, stress_amps_fi);

tr->langopts.stress_rule = STRESSPOSN_1L;
tr->langopts.stress_flags = S_FINAL_DIM_ONLY | S_FINAL_NO_2 | S_2_TO_HEAVY; // move secondary stress from light to a following heavy syllable
tr->langopts.param[LOPT_IT_DOUBLING] = 1;
tr->langopts.long_stop = 130;

tr->langopts.numbers = NUM_DECIMAL_COMMA | NUM_ALLOW_SPACE | NUM_SWAP_TENS | NUM_OMIT_1_HUNDRED | NUM_DFRACTION_2 | NUM_ORDINAL_DOT;
SetLetterVowel(tr, 'y');
tr->langopts.spelling_stress = 1;
tr->langopts.intonation_group = 3; // less intonation, don't raise pitch at comma
}
break;
case L('s', 'q'): // Albanian
{
static const short stress_lengths_sq[8] = { 150, 150, 180, 180, 0, 0, 300, 300 };

Loading…
Cancel
Save