Fix: crash with very long words. git-svn-id: https://espeak.svn.sourceforge.net/svnroot/espeak/trunk@168 d46cf337-b52f-0410-862d-fd96e6ae7743master
| // 2006-11-18 Gilles Casse <[email protected]> | // 2006-11-18 Gilles Casse <[email protected]> | ||||
| // | // | ||||
| // Updated 2008-03-05 Michel Such <[email protected]> | |||||
| // Updated 2008-03-12 Michel Such <[email protected]> | |||||
| // | // | ||||
| // * Numbers, a few abbreviations and exceptions. | // * Numbers, a few abbreviations and exceptions. | ||||
| // | // | ||||
| //n En // in fr_rules | //n En // in fr_rules | ||||
| ñ $accent | ñ $accent | ||||
| o o | o o | ||||
| ô $accent | |||||
| _ô $accent | |||||
| ö $accent | ö $accent | ||||
| p pe | p pe | ||||
| q ky | q ky | ||||
| r ER | r ER | ||||
| s Es | s Es | ||||
| t te | |||||
| u y | u y | ||||
| ù $accent | ù $accent | ||||
| û $accent | û $accent | ||||
| v ve | v ve | ||||
| w dubl@v'e | w dubl@v'e | ||||
| x iks | x iks | ||||
| z zEd | |||||
| _y i:gR'Ek | _y i:gR'Ek | ||||
| z zEd | |||||
| comme $u | comme $u | ||||
| dont dO~t2 $u | dont dO~t2 $u | ||||
| lorsque $u | lorsque $u | ||||
| parce $u | |||||
| puisque $u | puisque $u | ||||
| quand $u | quand $u | ||||
| que $u | que $u | ||||
| par $u | par $u | ||||
| pour $u | pour $u | ||||
| en $u | en $u | ||||
| (en effet) A~nEf'E $brk | |||||
| (en suspens) A~||sysp'A~ | (en suspens) A~||sysp'A~ | ||||
| vers $u | vers $u | ||||
| // Letters which can be words | // Letters which can be words | ||||
| //=========================== | //=========================== | ||||
| à $atend $accent | à $atend $accent | ||||
| y $atend | |||||
| y igr'Ek $atend | |||||
| ô $atend $accent | |||||
| // pronunciation exceptions | // pronunciation exceptions | ||||
| aspic aspik | |||||
| consent kO~s'A~t2 | consent kO~s'A~t2 | ||||
| scient si | scient si | ||||
| coing kwE~ | coing kwE~ | ||||
| convent kO~vA~ | convent kO~vA~ | ||||
| couvent k'uvt2 $verb | couvent k'uvt2 $verb | ||||
| (couvent couvent) kuvA~||k'uvt2 | (couvent couvent) kuvA~||k'uvt2 | ||||
| dessus d@sy | |||||
| dessous d@su | dessous d@su | ||||
| divers divErz2 | divers divErz2 | ||||
| hareng _!arA~ | hareng _!arA~ | ||||
| cocus coky | cocus coky | ||||
| estomac Estoma | estomac Estoma | ||||
| fils fis | fils fis | ||||
| gars ga | |||||
| iris iris | iris iris | ||||
| juin ZyE~ | juin ZyE~ | ||||
| laser lazEr | laser lazEr | ||||
| m'y mi $verbf | m'y mi $verbf | ||||
| t'y ti $verbf | t'y ti $verbf | ||||
| s'y si $verbf | s'y si $verbf | ||||
| n'y ni $verbf | |||||
| l'y li $verbf | |||||
| qu'y ki $verbf | qu'y ki $verbf | ||||
| (sud est) sydEst | (sud est) sydEst | ||||
| (nord est) nOREst | (nord est) nOREst | ||||
| // 2006-11-18 Gilles Casse <[email protected]> | // 2006-11-18 Gilles Casse <[email protected]> | ||||
| // | // | ||||
| // Updated: 2008-03-10 Michel Such <[email protected]> | |||||
| // Updated: 2008-03-12 Michel Such <[email protected]> | |||||
| // | // | ||||
| // * The rules are based on Cicero TTS. | // * The rules are based on Cicero TTS. | ||||
| // Y | // Y | ||||
| _cl) ef (_ e // clef | _cl) ef (_ e // clef | ||||
| ein (C E~ // peindre | ein (C E~ // peindre | ||||
| ein (_ E~ | ein (_ E~ | ||||
| eint (_ E~t2 | |||||
| e (il E // vieille | e (il E // vieille | ||||
| ei E // neige | ei E // neige | ||||
| ell El // selle | ell El // selle | ||||
| ertin) en (t_ A~ // pertinent, impertinent | ertin) en (t_ A~ // pertinent, impertinent | ||||
| ontin) en (t_ A~ // continent, incontinent | ontin) en (t_ A~ // continent, incontinent | ||||
| erm) en (t_ A~ // ferment, serment | |||||
| _sArm) en (t_ A~ // sarment, serment | |||||
| erm) en (t_ A~ // ferment | |||||
| Vferm) en (t_ // ferment, referment (verbe) | Vferm) en (t_ // ferment, referment (verbe) | ||||
| XArp) en (t_ A~ // arpent, serpent | XArp) en (t_ A~ // arpent, serpent | ||||
| CArr) en (t_ A~ // conncurrent, torrent | CArr) en (t_ A~ // conncurrent, torrent | ||||
| Cflu) en (t_ A~ // affluent, confluent | Cflu) en (t_ A~ // affluent, confluent | ||||
| VCflu) en (t_ // refluent, influent (verbe) | VCflu) en (t_ // refluent, influent (verbe) | ||||
| al) en (t_ A~ // talent équivalent | |||||
| _tal) en (t_ A~ // talent | |||||
| ival) en (t_ A~ // équivalent | |||||
| Vival) en (t_ // équivalent (verbe) | Vival) en (t_ // équivalent (verbe) | ||||
| xcell) en (t_ A~ // excellent | xcell) en (t_ A~ // excellent | ||||
| Vxcell) en (t_ // excellent | |||||
| nn) en (t_ t2 // prennent, viennent, sonnent | nn) en (t_ t2 // prennent, viennent, sonnent | ||||
| iCam) en (t_ A~ | iCam) en (t_ A~ | ||||
| qui_couv) en (t_ | qui_couv) en (t_ | ||||
| _couv) en (t_ A~ // le couvent | _couv) en (t_ A~ // le couvent | ||||
| en (t_ // étaient mangent | |||||
| en (t_ t2 // étaient mangent | |||||
| Ci) en (_ E~ // viendra | Ci) en (_ E~ // viendra | ||||
| Ci) en (C E~ // viendra | Ci) en (C E~ // viendra | ||||
| er (_ e // parler léger | er (_ e // parler léger | ||||
| er (s_ e | er (s_ e | ||||
| i) ert (_t2 e // requiert | |||||
| i) e (rt_ E // requiert | |||||
| d) e (ssous_ @ // dessous | |||||
| d) e (ssus_ @ // dessus | d) e (ssus_ @ // dessus | ||||
| _d) es (bA e | _d) es (bA e | ||||
| _r) e (ss @ // ressembler | _r) e (ss @ // ressembler | ||||
| _r) e (ssu e // ressusciter | _r) e (ssu e // ressusciter | ||||
| _r) e (stra @ | _r) e (stra @ | ||||
| _r) e (stre @ | |||||
| _r) e (stro @ | _r) e (stro @ | ||||
| _r) e (stru @ // restructurer | _r) e (stru @ // restructurer | ||||
| -) est (_ Est // -est | -) est (_ Est // -est | ||||
| dg) et (_ Et // Bridget, gadget | dg) et (_ Et // Bridget, gadget | ||||
| ') eu (_ y // eu | ') eu (_ y // eu | ||||
| eu (r_ @ // peur | |||||
| eu (rs_ @ // tracteurs | |||||
| eu (rX @ // peur | |||||
| // eu (rs_ @ // tracteurs | |||||
| eu (b @ // meuble | |||||
| eu (f @ // neuf | |||||
| eu (C @ | |||||
| eu (i @ | eu (i @ | ||||
| eu (l @ // fieul | |||||
| eu (n @ // jeune | |||||
| eu (p @ | |||||
| eu (r @ | |||||
| eu (v @ | |||||
| eu (s Y // meute tueuse jeu gueuse | |||||
| eu (x_ Y | |||||
| eu (t_ Y | |||||
| eu Y // meute tueuse jeu gueuze | eu Y // meute tueuse jeu gueuze | ||||
| _) eû y // eût | _) eû y // eût | ||||
| eû Y // jeûne | eû Y // jeûne | ||||
| tr) e (CrA @ // entreprise | tr) e (CrA @ // entreprise | ||||
| e (CC E // infect pelle mettre | e (CC E // infect pelle mettre | ||||
| e (CC E // infect pelle mettre | |||||
| _) e (ff e // effet, effectuer | |||||
| _s) e (cr @ // secret | _s) e (cr @ // secret | ||||
| e (C_ E | e (C_ E | ||||
| es (_ z2 | es (_ z2 | ||||
| ie (ment i // remerciement balbutiement | ie (ment i // remerciement balbutiement | ||||
| C) ien (t_ i | C) ien (t_ i | ||||
| dév) ien (t_ i | dév) ien (t_ i | ||||
| t) ien (t_ jE~ // tient | |||||
| v) ien (t_ jE~ // vient | |||||
| t) ient (_ jE~t2 // tient | |||||
| v) ient (_ jE~t2 // vient | |||||
| at) ien (t_ jA~ // patient | at) ien (t_ jA~ // patient | ||||
| cip) ien (C jA~ // récipient | cip) ien (C jA~ // récipient | ||||
| cl) ien (C iA~ // client | cl) ien (C iA~ // client | ||||
| in (C E~ // vingt | in (C E~ // vingt | ||||
| in (_ W~ // vin | in (_ W~ // vin | ||||
| ique (_ ik | ique (_ ik | ||||
| C) is (_ i | |||||
| // group i: English section | // group i: English section | ||||
| iev _^_en | iev _^_en | ||||
| .group t | .group t | ||||
| t t // bataille | t t // bataille | ||||
| _) t (_ te | |||||
| -) t (- t | |||||
| V) t (_ t2 // silent at end of verb unless next word starts with a vowel | V) t (_ t2 // silent at end of verb unless next word starts with a vowel | ||||
| t (_ // silent at end of word | t (_ // silent at end of word | ||||
| t (-S1 t | |||||
| _) th (onX t // thon | _) th (onX t // thon | ||||
| _) th (ymX t // shym | _) th (ymX t // shym | ||||
| ts (_S1 z2 // silent at end of word unless next word starts with a vowel | ts (_S1 z2 // silent at end of word unless next word starts with a vowel |
| #include "translate.h" | #include "translate.h" | ||||
| #include "wave.h" | #include "wave.h" | ||||
| const char *version_string = "1.36.01 11.Mar.08"; | |||||
| const char *version_string = "1.36.02 13.Mar.08"; | |||||
| const int version_phdata = 0x013400; | const int version_phdata = 0x013400; | ||||
| int option_device_number = -1; | int option_device_number = -1; |
| // other characters which break a word, but don't produce a pause | // other characters which break a word, but don't produce a pause | ||||
| static const unsigned short breaks[] = {'_', 0}; | static const unsigned short breaks[] = {'_', 0}; | ||||
| // treat these characters as spaces, in addition to iswspace() | |||||
| static const wchar_t chars_space[] = {0x2500,0}; // box drawing horiz | |||||
| // Translate character codes 0xA0 to 0xFF into their unicode values | // Translate character codes 0xA0 to 0xFF into their unicode values | ||||
| return(0); | return(0); | ||||
| } | } | ||||
| int IsSpace(unsigned int c) | |||||
| {//======================== | |||||
| if(wcschr(chars_space,c)) | |||||
| return(1); | |||||
| return(iswspace(c)); | |||||
| } | |||||
| Translator::Translator() | Translator::Translator() | ||||
| {//===================== | {//===================== | ||||
| const char *new_language; | const char *new_language; | ||||
| unsigned char bad_phoneme[4]; | unsigned char bad_phoneme[4]; | ||||
| int word_copy_len; | int word_copy_len; | ||||
| char word_copy[N_WORD_BYTES]; | |||||
| char word_copy[N_WORD_BYTES+1]; | |||||
| len = wtab->length; | len = wtab->length; | ||||
| if(len > 31) len = 31; | if(len > 31) len = 31; | ||||
| { | { | ||||
| int c2; | int c2; | ||||
| ix = 0; | ix = 0; | ||||
| while(((c2 = word_copy[ix] = word[ix]) != ' ') && (c2 != 0)) ix++; | |||||
| while(((c2 = word_copy[ix] = word[ix]) != ' ') && (c2 != 0) && (ix < N_WORD_BYTES)) ix++; | |||||
| word_copy_len = ix; | word_copy_len = ix; | ||||
| flags = translator->TranslateWord(word, next_pause, wtab); | flags = translator->TranslateWord(word, next_pause, wtab); | ||||
| next_in = char_inserted; | next_in = char_inserted; | ||||
| // allow certain punctuation within a word (usually only apostrophe) | // allow certain punctuation within a word (usually only apostrophe) | ||||
| if(!IsAlpha(c) && !iswspace(c) && (wcschr(punct_within_word,c) == 0)) | |||||
| if(!IsAlpha(c) && !IsSpace(c) && (wcschr(punct_within_word,c) == 0)) | |||||
| { | { | ||||
| if(IsAlpha(prev_out)) | if(IsAlpha(prev_out)) | ||||
| { | { | ||||
| } | } | ||||
| else | else | ||||
| { | { | ||||
| if(iswspace(prev_out)) | |||||
| if(IsSpace(prev_out)) | |||||
| single_quoted = 1; | single_quoted = 1; | ||||
| else | else | ||||
| single_quoted = 0; | single_quoted = 0; | ||||
| } | } | ||||
| } | } | ||||
| if(iswspace(c)) | |||||
| if(IsSpace(c)) | |||||
| { | { | ||||
| if(space_inserted) | if(space_inserted) | ||||
| { | { |