-x command-line option. Fixed possible buffer overflow when writing phoneme output. Slavic languages, changed behaviour of [v] during voiced/unvoiced propagation. When looking for a word boundary when breaking long clauses, allow for greater word size (Southern Indian languages have big words 3-byte UTF-8 character codes). $combine. Don't combine with the nerxt word if it is translated using a different language. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@262 d46cf337-b52f-0410-862d-fd96e6ae7743master
| m) a (lt_ A:l | m) a (lt_ A:l | ||||
| ker) am (ik 'A:m | ker) am (ik 'A:m | ||||
| @) an (_ 'A:n | @) an (_ 'A:n | ||||
| @) ance (_ 'A~s | |||||
| s) ance (_ 'A~s | |||||
| s) ancen (_ 'A~s@n | |||||
| fr) ance (_ 'A~s@ | fr) ance (_ 'A~s@ | ||||
| ein) an (der 'an | ein) an (der 'an | ||||
| @) ant (_ 'ant | @) ant (_ 'ant | ||||
| _) natur (C@P5 nA:t'u:r | _) natur (C@P5 nA:t'u:r | ||||
| _) na (tür n%A: | _) na (tür n%A: | ||||
| nce (_ _^_EN | nce (_ _^_EN | ||||
| nces (_ _^_EN | |||||
| _) ne (b ne: | _) ne (b ne: | ||||
| nebeneina (nd n,e:b@n_|aIn'a | nebeneina (nd n,e:b@n_|aIn'a | ||||
| _) neben (@P5 n'e:b@n | _) neben (@P5 n'e:b@n | ||||
| schwer Sve:r | schwer Sve:r | ||||
| _) sechs (P5 z'Eks | _) sechs (P5 z'Eks | ||||
| seku (nden ze:k'U | |||||
| _) selbst (@@P6 z'Elpst | _) selbst (@@P6 z'Elpst | ||||
| service (_S7 z%Ervi:s | service (_S7 z%Ervi:s | ||||
| shop _^_EN | shop _^_EN | ||||
| _) w (lad v | _) w (lad v | ||||
| _) w (rA v | _) w (rA v | ||||
| @) wert (_ vErt | |||||
| @) wert (_ ve:rt | |||||
| _) wh _^_EN | _) wh _^_EN | ||||
| &) ware (_ _^_EN | &) ware (_ _^_EN | ||||
| _) wasch (@P5 v'aS | _) wasch (@P5 v'aS |
| z Z z. | z Z z. | ||||
| Dictionary bg_dict 2011-04-20 | |||||
| Dictionary bg_dict 2011-04-25 | |||||
| @ a e i o u | @ a e i o u | ||||
| prede (ces pri:dI | prede (ces pri:dI | ||||
| _) pre (diP3 pr%i | _) pre (diP3 pr%i | ||||
| _) pre (dic pr%I | _) pre (dic pr%I | ||||
| _) pre (emp pr%I | |||||
| _) pre (em pr%i: | |||||
| _) pre (fec pri: | _) pre (fec pri: | ||||
| _) pre (feren prE | _) pre (feren prE | ||||
| pre (fix pri: | pre (fix pri: |
| _) க் (ரௌண்ட g | _) க் (ரௌண்ட g | ||||
| _) க்வ (A kw | _) க்வ (A kw | ||||
| _) க் (வாலியர g | _) க் (வாலியர g | ||||
| _) கி (ராம்ப k | |||||
| _) க (ஞ்சா gV | |||||
| //endsort | //endsort | ||||
| //sort | //sort | ||||
| என்சை) க்லோப (ீடியா klo:p | என்சை) க்லோப (ீடியா klo:p | ||||
| க் (ஸ k | க் (ஸ k | ||||
| _மெ) க்ஸிக (ோ ksik | _மெ) க்ஸிக (ோ ksik | ||||
| _ராஜ்) க (ிரண k | |||||
| இராம) க (ாவிய k | |||||
| _மெரு) க (ூட்ட g | |||||
| //endsort | //endsort | ||||
| _) த (ோஹா d | _) த (ோஹா d | ||||
| _) தௌலதாப (ாத daUlVd.a:b | _) தௌலதாப (ாத daUlVd.a:b | ||||
| _) த (்ராட்சை d | _) த (்ராட்சை d | ||||
| _) த (வக்கால dV | |||||
| _) த (ருமங் dV | |||||
| _) த (ருமப் dV | |||||
| //endsort | //endsort | ||||
| //sort | //sort | ||||
| த) த (்ரூப t | த) த (்ரூப t | ||||
| பா) த (்ரூம t | பா) த (்ரூம t | ||||
| _மை) த் (ரேய t | _மை) த் (ரேய t | ||||
| L03) த்த (ின dd | |||||
| L03) த்த (ிசை dd | |||||
| L03) த்த (ூச dd | |||||
| L03) த்த (ேவன dd | |||||
| L03) த்த (ேவர dd | |||||
| L03) த்த (ேவத dd | |||||
| //endsort | //endsort | ||||
| _) ப் (ளவுஸ b | _) ப் (ளவுஸ b | ||||
| _) ப் (ளோரிடா f | _) ப் (ளோரிடா f | ||||
| _) ப் (ளௌஸ b | _) ப் (ளௌஸ b | ||||
| _) பு (ரோகித p | |||||
| _) ப (ீன்ஸ b | |||||
| _) ப (ிஜிலி b | |||||
| _) ப (ேமானி b | |||||
| //endsort | //endsort | ||||
| //sort | //sort | ||||
| ஏரோ) ப் (ளேன p | ஏரோ) ப் (ளேன p | ||||
| _) ப் (ளோரன்ஸ f | _) ப் (ளோரன்ஸ f | ||||
| ப் (ஸ p | ப் (ஸ p | ||||
| _அ) ப (ின்_ b | |||||
| _குமார) ப (ாளைய p | |||||
| //endsort | //endsort | ||||
| // If ப+ appearing in the middle of a word is followed by ட+ | // If ப+ appearing in the middle of a word is followed by ட+ |
| [l/] fr | [l/] fr | ||||
| l/l_@ [l/3] base | l/l_@ [l/3] base | ||||
| [l/] fr | [l/] fr | ||||
| l/l@ [l#] base | |||||
| [”¦] base | |||||
| l/l@ [hös] base | |||||
| [l#] base | |||||
| [l] fr | [l] fr | ||||
| [l/2] fr | [l/2] fr | ||||
| [K] nso | [K] nso | ||||
| l/L2_uL [l/2] base | l/L2_uL [l/2] base | ||||
| l/l_3 [l/] de | l/l_3 [l/] de | ||||
| l/l_4 [ll] sq | l/l_4 [ll] sq | ||||
| l/la [l#] base | |||||
| [”¦] base | |||||
| l/la [hös] base | |||||
| [l#] base | |||||
| [l] fr | [l] fr | ||||
| [l/2] fr | [l/2] fr | ||||
| [K] nso | [K] nso | ||||
| [K] tn | [K] tn | ||||
| l/l_a [l/3] base | l/l_a [l/3] base | ||||
| [l/] fr | [l/] fr | ||||
| l/le [l#] base | |||||
| [”¦] base | |||||
| l/le [hös] base | |||||
| [l#] base | |||||
| [l] fr | [l] fr | ||||
| [l/2] fr | [l/2] fr | ||||
| [K] nso | [K] nso | ||||
| [&:] af | [&:] af | ||||
| l/l_front [L] sq | l/l_front [L] sq | ||||
| l/l_front_ [l/4] sq | l/l_front_ [l/4] sq | ||||
| l/li [l#] base | |||||
| [”¦] base | |||||
| l/li [hös] base | |||||
| [l#] base | |||||
| [l] fr | [l] fr | ||||
| [l/2] fr | [l/2] fr | ||||
| [l] zh | [l] zh | ||||
| ll/_ll [L] bg | ll/_ll [L] bg | ||||
| l/l_long [l] base | l/l_long [l] base | ||||
| [l] fr | [l] fr | ||||
| l/lo [l#] base | |||||
| [”¦] base | |||||
| l/lo [hös] base | |||||
| [l#] base | |||||
| [l/2] fr | [l/2] fr | ||||
| [K] nso | [K] nso | ||||
| [K] tn | [K] tn | ||||
| l^/l_rfx [l.] base | l^/l_rfx [l.] base | ||||
| [l] ru | [l] ru | ||||
| [l^] ru | [l^] ru | ||||
| l/lu [l#] base | |||||
| [”¦] base | |||||
| l/lu [hös] base | |||||
| [l#] base | |||||
| [l] fr | [l] fr | ||||
| [l/2] fr | [l/2] fr | ||||
| [K] nso | [K] nso |
| /*************************************************************************** | /*************************************************************************** | ||||
| * Copyright (C) 2005 to 2010 by Jonathan Duddington * | |||||
| * Copyright (C) 2005 to 2011 by Jonathan Duddington * | |||||
| * email: [email protected] * | * email: [email protected] * | ||||
| * * | * * | ||||
| * This program is free software; you can redistribute it and/or modify * | * This program is free software; you can redistribute it and/or modify * | ||||
| }; | }; | ||||
| static void WritePhMnemonic(char *phon_out, int *ix, PHONEME_TAB *ph, PHONEME_LIST *plist) | |||||
| static void WritePhMnemonic(char **buf, PHONEME_TAB *ph, PHONEME_LIST *plist) | |||||
| {//======================================================================================= | {//======================================================================================= | ||||
| int c; | int c; | ||||
| int mnem; | int mnem; | ||||
| int len; | int len; | ||||
| int first; | int first; | ||||
| int ix = 0; | |||||
| char *phon_out; | |||||
| unsigned int ipa_control=0; // first byte of ipa string may control the phoneme name interpretation. 0x20 = ignore this phoneme | unsigned int ipa_control=0; // first byte of ipa string may control the phoneme name interpretation. 0x20 = ignore this phoneme | ||||
| PHONEME_DATA phdata; | PHONEME_DATA phdata; | ||||
| phon_out = *buf; | |||||
| if(option_phonemes == 3) | if(option_phonemes == 3) | ||||
| { | { | ||||
| // has an ipa name been defined for this phoneme ? | // has an ipa name been defined for this phoneme ? | ||||
| { | { | ||||
| if((ipa_control = phdata.ipa_string[0]) > 0x20) | if((ipa_control = phdata.ipa_string[0]) > 0x20) | ||||
| { | { | ||||
| strcpy(&phon_out[*ix], phdata.ipa_string); | |||||
| *ix += len; | |||||
| strcpy(&phon_out[ix], phdata.ipa_string); | |||||
| ix += len; | |||||
| } | } | ||||
| if(ipa_control >= 0x20) | if(ipa_control >= 0x20) | ||||
| { | |||||
| *buf += ix; | |||||
| return; // 0x20 = ignore phoneme | return; // 0x20 = ignore phoneme | ||||
| } | |||||
| } | } | ||||
| } | } | ||||
| if((c >= 0x20) && (c < 128)) | if((c >= 0x20) && (c < 128)) | ||||
| c = ipa1[c-0x20]; | c = ipa1[c-0x20]; | ||||
| *ix += utf8_out(c, &phon_out[*ix]); | |||||
| ix += utf8_out(c, &phon_out[ix]); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| phon_out[(*ix)++]= c; | |||||
| phon_out[ix++]= c; | |||||
| } | } | ||||
| first = 0; | first = 0; | ||||
| } | } | ||||
| *buf += ix; | |||||
| } // end of WritePhMnemonic | } // end of WritePhMnemonic | ||||
| */ | */ | ||||
| int ix; | int ix; | ||||
| int len; | |||||
| int max_len; | |||||
| int phon_out_ix=0; | int phon_out_ix=0; | ||||
| int stress; | int stress; | ||||
| unsigned int c; | unsigned int c; | ||||
| char *p; | char *p; | ||||
| char *buf; | |||||
| char phon_buf[30]; | |||||
| PHONEME_LIST *plist; | PHONEME_LIST *plist; | ||||
| static const char *stress_chars = "==,,''"; | static const char *stress_chars = "==,,''"; | ||||
| if(phon_out != NULL) | if(phon_out != NULL) | ||||
| { | { | ||||
| for(ix=1; ix<(n_phoneme_list-2) && (phon_out_ix < (n_phon_out - 6)); ix++) | |||||
| for(ix=1; ix<(n_phoneme_list-2); ix++) | |||||
| { | { | ||||
| buf = phon_buf; | |||||
| plist = &phoneme_list[ix]; | plist = &phoneme_list[ix]; | ||||
| if(plist->newword) | if(plist->newword) | ||||
| phon_out[phon_out_ix++] = ' '; | |||||
| *buf++ = ' '; | |||||
| if(plist->synthflags & SFLAG_SYLLABLE) | if(plist->synthflags & SFLAG_SYLLABLE) | ||||
| { | { | ||||
| if(c != 0) | if(c != 0) | ||||
| { | { | ||||
| phon_out_ix += utf8_out(c, &phon_out[phon_out_ix]); | |||||
| buf += utf8_out(c, buf); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| // the tone_ph field contains a phoneme table number | // the tone_ph field contains a phoneme table number | ||||
| p = phoneme_tab_list[plist->tone_ph].name; | p = phoneme_tab_list[plist->tone_ph].name; | ||||
| sprintf(&phon_out[phon_out_ix], "(%s)", p); | |||||
| phon_out_ix += (strlen(p) + 2); | |||||
| sprintf(buf, "(%s)", p); | |||||
| buf += (strlen(p) + 2); | |||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| WritePhMnemonic(phon_out, &phon_out_ix, plist->ph, plist); | |||||
| WritePhMnemonic(&buf, plist->ph, plist); | |||||
| if(plist->synthflags & SFLAG_LENGTHEN) | if(plist->synthflags & SFLAG_LENGTHEN) | ||||
| { | { | ||||
| WritePhMnemonic(phon_out, &phon_out_ix, phoneme_tab[phonLENGTHEN], NULL); | |||||
| WritePhMnemonic(&buf, phoneme_tab[phonLENGTHEN], NULL); | |||||
| } | } | ||||
| if((plist->synthflags & SFLAG_SYLLABLE) && (plist->type != phVOWEL)) | if((plist->synthflags & SFLAG_SYLLABLE) && (plist->type != phVOWEL)) | ||||
| { | { | ||||
| // syllablic consonant | // syllablic consonant | ||||
| WritePhMnemonic(phon_out, &phon_out_ix, phoneme_tab[phonSYLLABIC], NULL); | |||||
| WritePhMnemonic(&buf, phoneme_tab[phonSYLLABIC], NULL); | |||||
| } | } | ||||
| if(plist->tone_ph > 0) | if(plist->tone_ph > 0) | ||||
| { | { | ||||
| WritePhMnemonic(phon_out, &phon_out_ix, phoneme_tab[plist->tone_ph], NULL); | |||||
| WritePhMnemonic(&buf, phoneme_tab[plist->tone_ph], NULL); | |||||
| } | } | ||||
| } | } | ||||
| } | |||||
| if(phon_out_ix >= n_phon_out) | |||||
| phon_out_ix = n_phon_out - 1; | |||||
| len = buf - phon_buf; | |||||
| max_len = (n_phon_out - phon_out_ix - 5); // allow for " ..." and zero byte terminator | |||||
| if(len > max_len) | |||||
| { | |||||
| strcpy(&phon_buf[max_len], " ..."); | |||||
| len = max_len + 4; | |||||
| } | |||||
| phon_buf[len] = 0; | |||||
| strcpy(&phon_out[phon_out_ix], phon_buf); | |||||
| phon_out_ix += len; | |||||
| if(len > max_len) | |||||
| { | |||||
| break; | |||||
| } | |||||
| } | |||||
| phon_out[phon_out_ix] = 0; | phon_out[phon_out_ix] = 0; | ||||
| } | } | ||||
| } // end of GetTranslatedPhonemeString | } // end of GetTranslatedPhonemeString |
| /*************************************************************************** | /*************************************************************************** | ||||
| * Copyright (C) 2005 to 2007 by Jonathan Duddington * | |||||
| * Copyright (C) 2005 to 2011 by Jonathan Duddington * | |||||
| * email: [email protected] * | * email: [email protected] * | ||||
| * * | * * | ||||
| * This program is free software; you can redistribute it and/or modify * | * This program is free software; you can redistribute it and/or modify * | ||||
| // set consonant clusters to all voiced or all unvoiced | // set consonant clusters to all voiced or all unvoiced | ||||
| // Regressive | // Regressive | ||||
| int type; | int type; | ||||
| int word_end_devoice = 0; | |||||
| int stop_propagation = 0; | |||||
| voicing = 0; | voicing = 0; | ||||
| for(j=n_ph_list2-1; j>=0; j--) | for(j=n_ph_list2-1; j>=0; j--) | ||||
| if(regression & 0x2) | if(regression & 0x2) | ||||
| { | { | ||||
| // LANG=Russian, [v] amd [v;] don't cause regression, or [R^] | |||||
| // [v] amd [v;] don't cause regression, or [R^] | |||||
| if((ph->mnemonic == 'v') || (ph->mnemonic == ((';'<<8)+'v')) || ((ph->mnemonic & 0xff)== 'R')) | if((ph->mnemonic == 'v') || (ph->mnemonic == ((';'<<8)+'v')) || ((ph->mnemonic & 0xff)== 'R')) | ||||
| { | { | ||||
| if(word_end_devoice == 1) | |||||
| voicing = 0; | |||||
| else | |||||
| type = phLIQUID; | |||||
| stop_propagation = 1; | |||||
| } | } | ||||
| } | } | ||||
| voicing = 0; | voicing = 0; | ||||
| } | } | ||||
| } | } | ||||
| if(stop_propagation) | |||||
| { | |||||
| voicing = 0; | |||||
| stop_propagation = 0; | |||||
| } | |||||
| word_end_devoice = 0; | |||||
| if(plist2[j].sourceix) | if(plist2[j].sourceix) | ||||
| { | { | ||||
| if(regression & 0x04) | if(regression & 0x04) | ||||
| if(voicing == 0) | if(voicing == 0) | ||||
| { | { | ||||
| voicing = 1; | voicing = 1; | ||||
| word_end_devoice = 1; | |||||
| } | } | ||||
| } | } | ||||
| } | } |
| } | } | ||||
| *charix_top = ix; | *charix_top = ix; | ||||
| if(((ix > (n_buf-20)) && !IsAlpha(c1) && !iswdigit(c1)) || (ix >= (n_buf-2))) | |||||
| if(((ix > (n_buf-75)) && !IsAlpha(c1) && !iswdigit(c1)) || (ix >= (n_buf-4))) | |||||
| { | { | ||||
| // clause too long, getting near end of buffer, so break here | // clause too long, getting near end of buffer, so break here | ||||
| // try to break at a word boundary (unless we actually reach the end of buffer). | // try to break at a word boundary (unless we actually reach the end of buffer). | ||||
| // (n_buf-4) is to allow for 3 bytes of multibyte character plus terminator. | |||||
| buf[ix] = ' '; | buf[ix] = ' '; | ||||
| buf[ix+1] = 0; | buf[ix+1] = 0; | ||||
| UngetC(c2); | UngetC(c2); |
| #include "translate.h" | #include "translate.h" | ||||
| #include "wave.h" | #include "wave.h" | ||||
| const char *version_string = "1.45.04 25.Apr.11"; | |||||
| const char *version_string = "1.45.21 27.Apr.11"; | |||||
| const int version_phdata = 0x014500; | const int version_phdata = 0x014500; | ||||
| int option_device_number = -1; | int option_device_number = -1; |
| unsigned int embedded_list[N_EMBEDDED_LIST]; | unsigned int embedded_list[N_EMBEDDED_LIST]; | ||||
| // the source text of a single clause (UTF8 bytes) | // the source text of a single clause (UTF8 bytes) | ||||
| #define N_TR_SOURCE 700 | |||||
| #define N_TR_SOURCE 800 | |||||
| static char source[N_TR_SOURCE+40]; // extra space for embedded command & voice change info at end | static char source[N_TR_SOURCE+40]; // extra space for embedded command & voice change info at end | ||||
| int n_replace_phonemes; | int n_replace_phonemes; | ||||
| strcpy(ph_buf,word_phonemes); | strcpy(ph_buf,word_phonemes); | ||||
| flags2[0] = TranslateWord(translator, p2+1, 0, wtab+1); | flags2[0] = TranslateWord(translator, p2+1, 0, wtab+1); | ||||
| if(flags2[0] & FLAG_WAS_UNPRONOUNCABLE) | |||||
| if((flags2[0] & FLAG_WAS_UNPRONOUNCABLE) || (word_phonemes[0] == phonSWITCH)) | |||||
| ok = 0; | ok = 0; | ||||
| if(sylimit & 0x100) | if(sylimit & 0x100) |
| #define LOPT_PREFIXES 3 | #define LOPT_PREFIXES 3 | ||||
| // non-zero, change voiced/unoiced to match last consonant in a cluster | // non-zero, change voiced/unoiced to match last consonant in a cluster | ||||
| // bit 1=LANG=ru, don't propagate over [v] | |||||
| // bit 1=LANG=cz,bg don't propagate over [v] | |||||
| // bit 2=don't propagate acress word boundaries | // bit 2=don't propagate acress word boundaries | ||||
| // bit 3=LANG=pl, propagate over liquids and nasals | // bit 3=LANG=pl, propagate over liquids and nasals | ||||
| // bit 4=devoice word-final consonants | // bit 4=devoice word-final consonants | ||||
| int transpose_min; | int transpose_min; | ||||
| char dictionary_name[40]; | char dictionary_name[40]; | ||||
| char phon_out[300]; | |||||
| char phon_out[400]; | |||||
| char phonemes_repeat[20]; | char phonemes_repeat[20]; | ||||
| int phonemes_repeat_count; | int phonemes_repeat_count; | ||||
| int phoneme_tab_ix; | int phoneme_tab_ix; |