eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

translateword.c 36KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201
  1. /*
  2. * Copyright (C) 2005 to 2014 by Jonathan Duddington
  3. * email: [email protected]
  4. * Copyright (C) 2015-2017 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #include "config.h"
  20. #include <ctype.h>
  21. #include <stdbool.h>
  22. #include <stdint.h>
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <wchar.h>
  27. #include <wctype.h>
  28. #include <espeak-ng/espeak_ng.h>
  29. #include <espeak-ng/speak_lib.h>
  30. #include <espeak-ng/encoding.h>
  31. #include "translate.h"
  32. #include "translateword.h"
  33. #include "common.h" // for strncpy0
  34. #include "dictionary.h" // for TranslateRules, LookupDictList
  35. #include "numbers.h" // for SetSpellingStress, ...
  36. #include "phoneme.h" // for phonSWITCH, PHONEME_TAB, phonPAUSE_...
  37. #include "readclause.h" // for towlower2
  38. #include "synthdata.h" // for SelectPhonemeTable, LookupPhonemeTable
  39. #include "ucd/ucd.h" // for ucd_toupper
  40. #include "voice.h" // for voice, voice_t
  41. static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes);
  42. static void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
  43. static void ChangeWordStress(Translator *tr, char *word, int new_stress);
  44. static int CheckDottedAbbrev(char *word1);
  45. static int NonAsciiNumber(int letter);
  46. static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, const ALPHABET *current_alphabet, char word_phonemes[]);
  47. static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, const ALPHABET *current_alphabet);
  48. static int Unpronouncable(Translator *tr, char *word, int posn);
  49. static int Unpronouncable2(Translator *tr, char *word);
  50. int TranslateWord3(Translator *tr, char *word_start, WORD_TAB *wtab, char *word_out, bool *any_stressed_words, ALPHABET *current_alphabet, char word_phonemes[], size_t size_word_phonemes)
  51. {
  52. // word1 is terminated by space (0x20) character
  53. char *word1;
  54. int word_length;
  55. int ix;
  56. char *p;
  57. int pfix;
  58. int n_chars;
  59. unsigned int dictionary_flags[2];
  60. unsigned int dictionary_flags2[2];
  61. int end_type = 0;
  62. int end_type1 = 0;
  63. int prefix_type = 0;
  64. int prefix_stress;
  65. char *wordx;
  66. char phonemes[N_WORD_PHONEMES];
  67. char phonemes2[N_WORD_PHONEMES];
  68. char prefix_phonemes[N_WORD_PHONEMES];
  69. char unpron_phonemes[N_WORD_PHONEMES];
  70. char end_phonemes[N_WORD_PHONEMES];
  71. char end_phonemes2[N_WORD_PHONEMES];
  72. char word_copy[N_WORD_BYTES];
  73. char word_copy2[N_WORD_BYTES];
  74. int word_copy_length;
  75. char prefix_chars[0x3f + 2];
  76. bool found = false;
  77. int end_flags;
  78. int c_temp; // save a character byte while we temporarily replace it with space
  79. int first_char;
  80. int last_char = 0;
  81. int prefix_flags = 0;
  82. bool more_suffixes;
  83. bool confirm_prefix;
  84. int spell_word;
  85. int emphasize_allcaps = 0;
  86. int wflags;
  87. int was_unpronouncable = 0;
  88. int loopcount;
  89. int add_suffix_phonemes = 0;
  90. WORD_TAB wtab_null[8];
  91. if (wtab == NULL) {
  92. memset(wtab_null, 0, sizeof(wtab_null));
  93. wtab = wtab_null;
  94. }
  95. wflags = wtab->flags;
  96. dictionary_flags[0] = 0;
  97. dictionary_flags[1] = 0;
  98. dictionary_flags2[0] = 0;
  99. dictionary_flags2[1] = 0;
  100. dictionary_skipwords = 0;
  101. phonemes[0] = 0;
  102. unpron_phonemes[0] = 0;
  103. prefix_phonemes[0] = 0;
  104. end_phonemes[0] = 0;
  105. if (tr->data_dictlist == NULL) {
  106. // dictionary is not loaded
  107. word_phonemes[0] = 0;
  108. return 0;
  109. }
  110. // count the length of the word
  111. word1 = word_start;
  112. if (*word1 == ' ') word1++; // possibly a dot was replaced by space: $dot
  113. wordx = word1;
  114. utf8_in(&first_char, wordx);
  115. word_length = 0;
  116. while ((*wordx != 0) && (*wordx != ' ')) {
  117. wordx += utf8_in(&last_char, wordx);
  118. word_length++;
  119. }
  120. word_copy_length = wordx - word_start;
  121. if (word_copy_length >= N_WORD_BYTES)
  122. word_copy_length = N_WORD_BYTES-1;
  123. memcpy(word_copy2, word_start, word_copy_length);
  124. spell_word = 0;
  125. if ((word_length == 1) && (wflags & FLAG_TRANSLATOR2)) {
  126. // retranslating a 1-character word using a different language, say its name
  127. utf8_in(&c_temp, wordx+1); // the next character
  128. if (!IsAlpha(c_temp) || (AlphabetFromChar(last_char) != AlphabetFromChar(c_temp)))
  129. spell_word = 1;
  130. }
  131. if (option_sayas == SAYAS_KEY) {
  132. if (word_length == 1)
  133. spell_word = 4;
  134. else {
  135. // is there a translation for this keyname ?
  136. word1--;
  137. *word1 = '_'; // prefix keyname with '_'
  138. found = LookupDictList(tr, &word1, phonemes, dictionary_flags, 0, wtab);
  139. }
  140. }
  141. // try an initial lookup in the dictionary list, we may find a pronunciation specified, or
  142. // we may just find some flags
  143. if (option_sayas & 0x10) {
  144. // SAYAS_CHAR, SAYAS_GYLPH, or SAYAS_SINGLE_CHAR
  145. spell_word = option_sayas & 0xf; // 2,3,4
  146. } else {
  147. if (!found)
  148. found = LookupDictList(tr, &word1, phonemes, dictionary_flags, FLAG_ALLOW_TEXTMODE, wtab); // the original word
  149. if ((dictionary_flags[0] & (FLAG_ALLOW_DOT | FLAG_NEEDS_DOT)) && (wordx[1] == '.'))
  150. wordx[1] = ' '; // remove a Dot after this word
  151. if (dictionary_flags[0] & FLAG_TEXTMODE) {
  152. if (word_out != NULL)
  153. strcpy(word_out, word1);
  154. return dictionary_flags[0];
  155. } else if ((found == false) && (dictionary_flags[0] & FLAG_SKIPWORDS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
  156. // grouped words, but no translation. Join the words with hyphens.
  157. wordx = word1;
  158. ix = 0;
  159. while (ix < dictionary_skipwords) {
  160. if (*wordx == ' ') {
  161. *wordx = '-';
  162. ix++;
  163. }
  164. wordx++;
  165. }
  166. }
  167. if ((word_length == 1) && (dictionary_skipwords == 0)) {
  168. // is this a series of single letters separated by dots?
  169. if (CheckDottedAbbrev(word1)) {
  170. dictionary_flags[0] = 0;
  171. dictionary_flags[1] = 0;
  172. spell_word = 1;
  173. if (dictionary_skipwords)
  174. dictionary_flags[0] = FLAG_SKIPWORDS;
  175. }
  176. }
  177. if (phonemes[0] == phonSWITCH) {
  178. // change to another language in order to translate this word
  179. strcpy(word_phonemes, phonemes);
  180. return 0;
  181. }
  182. if (!found && (dictionary_flags[0] & FLAG_ABBREV)) {
  183. // the word has $abbrev flag, but no pronunciation specified. Speak as individual letters
  184. spell_word = 1;
  185. }
  186. if (!found && iswdigit(first_char)) {
  187. Lookup(tr, "_0lang", word_phonemes);
  188. if (word_phonemes[0] == phonSWITCH)
  189. return 0;
  190. if ((tr->langopts.numbers2 & NUM2_ENGLISH_NUMERALS) && !(wtab->flags & FLAG_CHAR_REPLACED)) {
  191. // for this language, speak English numerals (0-9) with the English voice
  192. sprintf(word_phonemes, "%c", phonSWITCH);
  193. return 0;
  194. }
  195. found = TranslateNumber(tr, word1, phonemes, dictionary_flags, wtab, 0);
  196. }
  197. if (!found && ((wflags & FLAG_UPPERS) != FLAG_FIRST_UPPER)) {
  198. // either all upper or all lower case
  199. if ((tr->langopts.numbers & NUM_ROMAN) || ((tr->langopts.numbers & NUM_ROMAN_CAPITALS) && (wflags & FLAG_ALL_UPPER))) {
  200. if ((wflags & FLAG_LAST_WORD) || !(wtab[1].flags & FLAG_NOSPACE)) {
  201. // don't use Roman number if this word is not separated from the next word (eg. "XLTest")
  202. if ((found = TranslateRoman(tr, word1, phonemes, wtab)) != 0)
  203. dictionary_flags[0] |= FLAG_ABBREV; // prevent emphasis if capitals
  204. }
  205. }
  206. }
  207. if ((wflags & FLAG_ALL_UPPER) && (word_length > 1) && iswalpha(first_char)) {
  208. if ((option_tone_flags & OPTION_EMPHASIZE_ALLCAPS) && !(dictionary_flags[0] & FLAG_ABBREV)) {
  209. // emphasize words which are in capitals
  210. emphasize_allcaps = FLAG_EMPHASIZED;
  211. } else if (!found && !(dictionary_flags[0] & FLAG_SKIPWORDS) && (word_length < 4) && (tr->clause_lower_count > 3)
  212. && (tr->clause_upper_count <= tr->clause_lower_count)) {
  213. // An upper case word in a lower case clause. This could be an abbreviation.
  214. spell_word = 1;
  215. }
  216. }
  217. }
  218. if (spell_word > 0) {
  219. // Speak as individual letters
  220. phonemes[0] = 0;
  221. if (SpeakIndividualLetters(tr, word1, phonemes, spell_word, current_alphabet, word_phonemes) == NULL) {
  222. if (word_length > 1)
  223. return FLAG_SPELLWORD; // a mixture of languages, retranslate as individual letters, separated by spaces
  224. return 0;
  225. }
  226. strcpy(word_phonemes, phonemes);
  227. if (wflags & FLAG_TRANSLATOR2)
  228. return 0;
  229. addPluralSuffixes(wflags, tr, last_char, word_phonemes);
  230. return dictionary_flags[0] & FLAG_SKIPWORDS; // for "b.c.d"
  231. } else if (found == false) {
  232. // word's pronunciation is not given in the dictionary list, although
  233. // dictionary_flags may have ben set there
  234. int posn;
  235. bool non_initial = false;
  236. int length;
  237. posn = 0;
  238. length = 999;
  239. wordx = word1;
  240. while (((length < 3) && (length > 0)) || (word_length > 1 && Unpronouncable(tr, wordx, posn))) {
  241. // This word looks "unpronouncable", so speak letters individually until we
  242. // find a remainder that we can pronounce.
  243. was_unpronouncable = FLAG_WAS_UNPRONOUNCABLE;
  244. emphasize_allcaps = 0;
  245. if (wordx[0] == '\'')
  246. break;
  247. if (posn > 0)
  248. non_initial = true;
  249. wordx += TranslateLetter(tr, wordx, unpron_phonemes, non_initial, current_alphabet);
  250. posn++;
  251. if (unpron_phonemes[0] == phonSWITCH) {
  252. // change to another language in order to translate this word
  253. strcpy(word_phonemes, unpron_phonemes);
  254. if (strcmp(&unpron_phonemes[1], ESPEAKNG_DEFAULT_VOICE) == 0)
  255. return FLAG_SPELLWORD; // _^_en must have been set in TranslateLetter(), not *_rules which uses only _^_
  256. return 0;
  257. }
  258. length = 0;
  259. while (wordx[length] != ' ') length++;
  260. }
  261. SetSpellingStress(tr, unpron_phonemes, 0, posn);
  262. // anything left ?
  263. if (*wordx != ' ') {
  264. if ((unpron_phonemes[0] != 0) && (wordx[0] != '\'')) {
  265. // letters which have been spoken individually from affecting the pronunciation of the pronuncable part
  266. wordx[-1] = ' ';
  267. }
  268. // Translate the stem
  269. end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
  270. if (phonemes[0] == phonSWITCH) {
  271. // change to another language in order to translate this word
  272. strcpy(word_phonemes, phonemes);
  273. return 0;
  274. }
  275. if ((phonemes[0] == 0) && (end_phonemes[0] == 0)) {
  276. int wc;
  277. // characters not recognised, speak them individually
  278. // ?? should we say super/sub-script numbers and letters here?
  279. utf8_in(&wc, wordx);
  280. if ((word_length == 1) && (IsAlpha(wc) || IsSuperscript(wc))) {
  281. if ((wordx = SpeakIndividualLetters(tr, wordx, phonemes, spell_word, current_alphabet, word_phonemes)) == NULL)
  282. return 0;
  283. strcpy(word_phonemes, phonemes);
  284. return 0;
  285. }
  286. }
  287. c_temp = wordx[-1];
  288. found = false;
  289. confirm_prefix = true;
  290. for (loopcount = 0; (loopcount < 50) && (end_type & SUFX_P); loopcount++) {
  291. // Found a standard prefix, remove it and retranslate
  292. // loopcount guards against an endless loop
  293. if (confirm_prefix && !(end_type & SUFX_B)) {
  294. int end2;
  295. char end_phonemes2[N_WORD_PHONEMES];
  296. // remove any standard suffix and confirm that the prefix is still recognised
  297. phonemes2[0] = 0;
  298. end2 = TranslateRules(tr, wordx, phonemes2, N_WORD_PHONEMES, end_phonemes2, wflags|FLAG_NO_PREFIX|FLAG_NO_TRACE, dictionary_flags);
  299. if (end2) {
  300. RemoveEnding(tr, wordx, end2, word_copy);
  301. end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags|FLAG_NO_TRACE, dictionary_flags);
  302. memcpy(wordx, word_copy, strlen(word_copy));
  303. if ((end_type & SUFX_P) == 0) {
  304. // after removing the suffix, the prefix is no longer recognised.
  305. // Keep the suffix, but don't use the prefix
  306. end_type = end2;
  307. strcpy(phonemes, phonemes2);
  308. strcpy(end_phonemes, end_phonemes2);
  309. if (option_phonemes & espeakPHONEMES_TRACE) {
  310. DecodePhonemes(end_phonemes, end_phonemes2);
  311. fprintf(f_trans, " suffix [%s]\n\n", end_phonemes2);
  312. }
  313. }
  314. confirm_prefix = false;
  315. continue;
  316. }
  317. }
  318. prefix_type = end_type;
  319. if (prefix_type & SUFX_V)
  320. tr->expect_verb = 1; // use the verb form of the word
  321. wordx[-1] = c_temp;
  322. if ((prefix_type & SUFX_B) == 0) {
  323. for (ix = (prefix_type & 0xf); ix > 0; ix--) { // num. of characters to remove
  324. wordx++;
  325. while ((*wordx & 0xc0) == 0x80) wordx++; // for multibyte characters
  326. }
  327. } else {
  328. pfix = 1;
  329. prefix_chars[0] = 0;
  330. n_chars = prefix_type & 0x3f;
  331. for (ix = 0; ix < n_chars; ix++) { // num. of bytes to remove
  332. prefix_chars[pfix++] = *wordx++;
  333. if ((prefix_type & SUFX_B) && (ix == (n_chars-1)))
  334. prefix_chars[pfix-1] = 0; // discard the last character of the prefix, this is the separator character
  335. }
  336. prefix_chars[pfix] = 0;
  337. }
  338. c_temp = wordx[-1];
  339. wordx[-1] = ' ';
  340. confirm_prefix = true;
  341. wflags |= FLAG_PREFIX_REMOVED;
  342. if (prefix_type & SUFX_B) {
  343. // SUFX_B is used for Turkish, tr_rules contains " ' (Pb"
  344. // examine the prefix part
  345. char *wordpf;
  346. char prefix_phonemes2[12];
  347. strncpy0(prefix_phonemes2, end_phonemes, sizeof(prefix_phonemes2));
  348. wordpf = &prefix_chars[1];
  349. strcpy(prefix_phonemes, phonemes);
  350. // look for stress marker or $abbrev
  351. found = LookupDictList(tr, &wordpf, phonemes, dictionary_flags, 0, wtab);
  352. if (found)
  353. strcpy(prefix_phonemes, phonemes);
  354. if (dictionary_flags[0] & FLAG_ABBREV) {
  355. prefix_phonemes[0] = 0;
  356. SpeakIndividualLetters(tr, wordpf, prefix_phonemes, 1, current_alphabet, word_phonemes);
  357. }
  358. } else
  359. strcat(prefix_phonemes, end_phonemes);
  360. end_phonemes[0] = 0;
  361. end_type = 0;
  362. found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, SUFX_P, wtab); // without prefix
  363. if (dictionary_flags[0] == 0) {
  364. dictionary_flags[0] = dictionary_flags2[0];
  365. dictionary_flags[1] = dictionary_flags2[1];
  366. } else
  367. prefix_flags = 1;
  368. if (found == false) {
  369. end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags & (FLAG_HYPHEN_AFTER | FLAG_PREFIX_REMOVED), dictionary_flags);
  370. if (phonemes[0] == phonSWITCH) {
  371. // change to another language in order to translate this word
  372. wordx[-1] = c_temp;
  373. strcpy(word_phonemes, phonemes);
  374. return 0;
  375. }
  376. }
  377. }
  378. if ((end_type != 0) && !(end_type & SUFX_P)) {
  379. end_type1 = end_type;
  380. strcpy(phonemes2, phonemes);
  381. // The word has a standard ending, re-translate without this ending
  382. end_flags = RemoveEnding(tr, wordx, end_type, word_copy);
  383. more_suffixes = true;
  384. while (more_suffixes) {
  385. more_suffixes = false;
  386. phonemes[0] = 0;
  387. if (prefix_phonemes[0] != 0) {
  388. // lookup the stem without the prefix removed
  389. wordx[-1] = c_temp;
  390. found = LookupDictList(tr, &word1, phonemes, dictionary_flags2, end_flags, wtab); // include prefix, but not suffix
  391. wordx[-1] = ' ';
  392. if (phonemes[0] == phonSWITCH) {
  393. // change to another language in order to translate this word
  394. memcpy(wordx, word_copy, strlen(word_copy));
  395. strcpy(word_phonemes, phonemes);
  396. return 0;
  397. }
  398. if (dictionary_flags[0] == 0) {
  399. dictionary_flags[0] = dictionary_flags2[0];
  400. dictionary_flags[1] = dictionary_flags2[1];
  401. }
  402. if (found)
  403. prefix_phonemes[0] = 0; // matched whole word, don't need prefix now
  404. if ((found == false) && (dictionary_flags2[0] != 0))
  405. prefix_flags = 1;
  406. }
  407. if (found == false) {
  408. found = LookupDictList(tr, &wordx, phonemes, dictionary_flags2, end_flags, wtab); // without prefix and suffix
  409. if (phonemes[0] == phonSWITCH) {
  410. // change to another language in order to translate this word
  411. memcpy(wordx, word_copy, strlen(word_copy));
  412. strcpy(word_phonemes, phonemes);
  413. return 0;
  414. }
  415. if (dictionary_flags[0] == 0) {
  416. dictionary_flags[0] = dictionary_flags2[0];
  417. dictionary_flags[1] = dictionary_flags2[1];
  418. }
  419. }
  420. if (found == false) {
  421. if (end_type & SUFX_Q) {
  422. // don't retranslate, use the original lookup result
  423. strcpy(phonemes, phonemes2);
  424. } else {
  425. if (end_flags & FLAG_SUFX)
  426. wflags |= FLAG_SUFFIX_REMOVED;
  427. if (end_type & SUFX_A)
  428. wflags |= FLAG_SUFFIX_VOWEL;
  429. if (end_type & SUFX_M) {
  430. // allow more suffixes before this suffix
  431. strcpy(end_phonemes2, end_phonemes);
  432. end_type = TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, end_phonemes, wflags, dictionary_flags);
  433. strcat(end_phonemes, end_phonemes2); // add the phonemes for the previous suffixes after this one
  434. if ((end_type != 0) && !(end_type & SUFX_P)) {
  435. // there is another suffix
  436. end_flags = RemoveEnding(tr, wordx, end_type, NULL);
  437. more_suffixes = true;
  438. }
  439. } else {
  440. // don't remove any previous suffix
  441. TranslateRules(tr, wordx, phonemes, N_WORD_PHONEMES, NULL, wflags, dictionary_flags);
  442. end_type = 0;
  443. }
  444. if (phonemes[0] == phonSWITCH) {
  445. // change to another language in order to translate this word
  446. strcpy(word_phonemes, phonemes);
  447. memcpy(wordx, word_copy, strlen(word_copy));
  448. wordx[-1] = c_temp;
  449. return 0;
  450. }
  451. }
  452. }
  453. }
  454. if ((end_type1 & SUFX_T) == 0) {
  455. // the default is to add the suffix and then determine the word's stress pattern
  456. AppendPhonemes(tr, phonemes, N_WORD_PHONEMES, end_phonemes);
  457. end_phonemes[0] = 0;
  458. }
  459. memcpy(wordx, word_copy, strlen(word_copy));
  460. }
  461. wordx[-1] = c_temp;
  462. }
  463. }
  464. addPluralSuffixes(wflags, tr, last_char, word_phonemes);
  465. wflags |= emphasize_allcaps;
  466. // determine stress pattern for this word
  467. add_suffix_phonemes = 0;
  468. if (end_phonemes[0] != 0)
  469. add_suffix_phonemes = 2;
  470. prefix_stress = 0;
  471. for (p = prefix_phonemes; *p != 0; p++) {
  472. if ((*p == phonSTRESS_P) || (*p == phonSTRESS_P2))
  473. prefix_stress = *p;
  474. }
  475. if (prefix_flags || (prefix_stress != 0)) {
  476. if ((tr->langopts.param[LOPT_PREFIXES]) || (prefix_type & SUFX_T)) {
  477. char *p;
  478. // German, keep a secondary stress on the stem
  479. SetWordStress(tr, phonemes, dictionary_flags, 3, 0);
  480. // reduce all but the first primary stress
  481. ix = 0;
  482. for (p = prefix_phonemes; *p != 0; p++) {
  483. if (*p == phonSTRESS_P) {
  484. if (ix == 0)
  485. ix = 1;
  486. else
  487. *p = phonSTRESS_3;
  488. }
  489. }
  490. snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
  491. word_phonemes[N_WORD_PHONEMES-1] = 0;
  492. SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0);
  493. } else {
  494. // stress position affects the whole word, including prefix
  495. snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
  496. word_phonemes[N_WORD_PHONEMES-1] = 0;
  497. SetWordStress(tr, word_phonemes, dictionary_flags, -1, 0);
  498. }
  499. } else {
  500. SetWordStress(tr, phonemes, dictionary_flags, -1, add_suffix_phonemes);
  501. snprintf(word_phonemes, size_word_phonemes, "%s%s%s", unpron_phonemes, prefix_phonemes, phonemes);
  502. word_phonemes[N_WORD_PHONEMES-1] = 0;
  503. }
  504. if (end_phonemes[0] != 0) {
  505. // a suffix had the SUFX_T option set, add the suffix after the stress pattern has been determined
  506. ix = strlen(word_phonemes);
  507. end_phonemes[N_WORD_PHONEMES-1-ix] = 0; // ensure no buffer overflow
  508. strcpy(&word_phonemes[ix], end_phonemes);
  509. }
  510. if (wflags & FLAG_LAST_WORD) {
  511. // don't use $brk pause before the last word of a sentence
  512. // (but allow it for emphasis, see below
  513. dictionary_flags[0] &= ~FLAG_PAUSE1;
  514. }
  515. if ((wflags & FLAG_HYPHEN) && (tr->langopts.stress_flags & S_HYPEN_UNSTRESS))
  516. ChangeWordStress(tr, word_phonemes, 3);
  517. else if (wflags & FLAG_EMPHASIZED2) {
  518. // A word is indicated in the source text as stressed
  519. // Give it stress level 6 (for the intonation module)
  520. ChangeWordStress(tr, word_phonemes, 6);
  521. if (wflags & FLAG_EMPHASIZED)
  522. dictionary_flags[0] |= FLAG_PAUSE1; // precede by short pause
  523. } else if (wtab[dictionary_skipwords].flags & FLAG_LAST_WORD) {
  524. // the word has attribute to stress or unstress when at end of clause
  525. if (dictionary_flags[0] & (FLAG_STRESS_END | FLAG_STRESS_END2))
  526. ChangeWordStress(tr, word_phonemes, 4);
  527. else if ((dictionary_flags[0] & FLAG_UNSTRESS_END) && (any_stressed_words))
  528. ChangeWordStress(tr, word_phonemes, 3);
  529. }
  530. // dictionary flags for this word give a clue about which alternative pronunciations of
  531. // following words to use.
  532. if (end_type1 & SUFX_F) {
  533. // expect a verb form, with or without -s suffix
  534. tr->expect_verb = 2;
  535. tr->expect_verb_s = 2;
  536. }
  537. if (dictionary_flags[1] & FLAG_PASTF) {
  538. // expect perfect tense in next two words
  539. tr->expect_past = 3;
  540. tr->expect_verb = 0;
  541. tr->expect_noun = 0;
  542. } else if (dictionary_flags[1] & FLAG_VERBF) {
  543. // expect a verb in the next word
  544. tr->expect_verb = 2;
  545. tr->expect_verb_s = 0; // verb won't have -s suffix
  546. tr->expect_noun = 0;
  547. } else if (dictionary_flags[1] & FLAG_VERBSF) {
  548. // expect a verb, must have a -s suffix
  549. tr->expect_verb = 0;
  550. tr->expect_verb_s = 2;
  551. tr->expect_past = 0;
  552. tr->expect_noun = 0;
  553. } else if (dictionary_flags[1] & FLAG_NOUNF) {
  554. // not expecting a verb next
  555. tr->expect_noun = 2;
  556. tr->expect_verb = 0;
  557. tr->expect_verb_s = 0;
  558. tr->expect_past = 0;
  559. }
  560. if ((wordx[0] != 0) && (!(dictionary_flags[1] & FLAG_VERB_EXT))) {
  561. if (tr->expect_verb > 0)
  562. tr->expect_verb--;
  563. if (tr->expect_verb_s > 0)
  564. tr->expect_verb_s--;
  565. if (tr->expect_noun > 0)
  566. tr->expect_noun--;
  567. if (tr->expect_past > 0)
  568. tr->expect_past--;
  569. }
  570. if ((word_length == 1) && (tr->translator_name == L('e', 'n')) && iswalpha(first_char) && (first_char != 'i')) {
  571. // English Specific !!!!
  572. // any single letter before a dot is an abbreviation, except 'I'
  573. dictionary_flags[0] |= FLAG_ALLOW_DOT;
  574. }
  575. if ((tr->langopts.param[LOPT_ALT] & 2) && ((dictionary_flags[0] & (FLAG_ALT_TRANS | FLAG_ALT2_TRANS)) != 0))
  576. ApplySpecialAttribute2(tr, word_phonemes, dictionary_flags[0]);
  577. dictionary_flags[0] |= was_unpronouncable;
  578. memcpy(word_start, word_copy2, word_copy_length);
  579. return dictionary_flags[0];
  580. }
  581. void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags)
  582. {
  583. // apply after the translation is complete
  584. int len;
  585. len = strlen(phonemes);
  586. if (tr->langopts.param[LOPT_ALT] & 2) {
  587. for (int ix = 0; ix < (len-1); ix++) {
  588. if (phonemes[ix] == phonSTRESS_P) {
  589. char *p;
  590. p = &phonemes[ix+1];
  591. if ((dict_flags & FLAG_ALT2_TRANS) != 0) {
  592. if (*p == PhonemeCode('E'))
  593. *p = PhonemeCode('e');
  594. if (*p == PhonemeCode('O'))
  595. *p = PhonemeCode('o');
  596. } else {
  597. if (*p == PhonemeCode('e'))
  598. *p = PhonemeCode('E');
  599. if (*p == PhonemeCode('o'))
  600. *p = PhonemeCode('O');
  601. }
  602. break;
  603. }
  604. }
  605. }
  606. }
  607. static void ChangeWordStress(Translator *tr, char *word, int new_stress)
  608. {
  609. int ix;
  610. unsigned char *p;
  611. int max_stress;
  612. int vowel_count; // num of vowels + 1
  613. int stressed_syllable = 0; // position of stressed syllable
  614. unsigned char phonetic[N_WORD_PHONEMES];
  615. signed char vowel_stress[N_WORD_PHONEMES/2];
  616. strcpy((char *)phonetic, word);
  617. max_stress = GetVowelStress(tr, phonetic, vowel_stress, &vowel_count, &stressed_syllable, 0);
  618. if (new_stress >= STRESS_IS_PRIMARY) {
  619. // promote to primary stress
  620. for (ix = 1; ix < vowel_count; ix++) {
  621. if (vowel_stress[ix] >= max_stress) {
  622. vowel_stress[ix] = new_stress;
  623. break;
  624. }
  625. }
  626. } else {
  627. // remove primary stress
  628. for (ix = 1; ix < vowel_count; ix++) {
  629. if (vowel_stress[ix] > new_stress) // >= allows for diminished stress (=1)
  630. vowel_stress[ix] = new_stress;
  631. }
  632. }
  633. // write out phonemes
  634. ix = 1;
  635. p = phonetic;
  636. while (*p != 0) {
  637. if ((phoneme_tab[*p]->type == phVOWEL) && !(phoneme_tab[*p]->phflags & phNONSYLLABIC)) {
  638. if ((vowel_stress[ix] == STRESS_IS_DIMINISHED) || (vowel_stress[ix] > STRESS_IS_UNSTRESSED))
  639. *word++ = stress_phonemes[(unsigned char)vowel_stress[ix]];
  640. ix++;
  641. }
  642. *word++ = *p++;
  643. }
  644. *word = 0;
  645. }
  646. static char *SpeakIndividualLetters(Translator *tr, char *word, char *phonemes, int spell_word, const ALPHABET *current_alphabet, char word_phonemes[])
  647. {
  648. int posn = 0;
  649. int capitals = 0;
  650. bool non_initial = false;
  651. if (spell_word > 2)
  652. capitals = 2; // speak 'capital'
  653. if (spell_word > 1)
  654. capitals |= 4; // speak character code for unknown letters
  655. while ((*word != ' ') && (*word != 0)) {
  656. word += TranslateLetter(tr, word, phonemes, capitals | non_initial, current_alphabet);
  657. posn++;
  658. non_initial = true;
  659. if (phonemes[0] == phonSWITCH) {
  660. // change to another language in order to translate this word
  661. strcpy(word_phonemes, phonemes);
  662. return NULL;
  663. }
  664. }
  665. SetSpellingStress(tr, phonemes, spell_word, posn);
  666. return word;
  667. }
  668. static const char *const hex_letters[] = {"'e:j", "b'i:", "s'i:", "d'i:", "'i:", "'ef"};
  669. static const char *const modifiers[] = { NULL, "_sub", "_sup", NULL };
  670. // unicode ranges for non-ascii digits 0-9 (these must be in ascending order)
  671. static const int number_ranges[] = {
  672. 0x660, 0x6f0, // arabic
  673. 0x966, 0x9e6, 0xa66, 0xae6, 0xb66, 0xbe6, 0xc66, 0xce6, 0xd66, // indic
  674. 0xe50, 0xed0, 0xf20, 0x1040, 0x1090,
  675. 0
  676. };
  677. static int TranslateLetter(Translator *tr, char *word, char *phonemes, int control, const ALPHABET *current_alphabet)
  678. {
  679. // get pronunciation for an isolated letter
  680. // return number of bytes used by the letter
  681. // control bit 0: a non-initial letter in a word
  682. // bit 1: say 'capital'
  683. // bit 2: say character code for unknown letters
  684. int n_bytes;
  685. int letter;
  686. int len;
  687. const ALPHABET *alphabet;
  688. int al_offset;
  689. int al_flags;
  690. int number;
  691. int phontab_1;
  692. char capital[30];
  693. char ph_buf[80];
  694. char ph_buf2[80];
  695. char ph_alphabet[80];
  696. char hexbuf[12];
  697. static const char pause_string[] = { phonPAUSE, 0 };
  698. ph_buf[0] = 0;
  699. ph_alphabet[0] = 0;
  700. capital[0] = 0;
  701. phontab_1 = translator->phoneme_tab_ix;
  702. n_bytes = utf8_in(&letter, word);
  703. if ((letter & 0xfff00) == 0x0e000)
  704. letter &= 0xff; // uncode private usage area
  705. if (control & 2) {
  706. // include CAPITAL information
  707. if (iswupper(letter))
  708. Lookup(tr, "_cap", capital);
  709. }
  710. letter = towlower2(letter, tr);
  711. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  712. if (ph_buf[0] == 0) {
  713. // is this a subscript or superscript letter ?
  714. int c;
  715. if ((c = IsSuperscript(letter)) != 0) {
  716. letter = c & 0x3fff;
  717. const char *modifier;
  718. if ((control & 4 ) && ((modifier = modifiers[c >> 14]) != NULL)) {
  719. // don't say "superscript" during normal text reading
  720. Lookup(tr, modifier, capital);
  721. if (capital[0] == 0) {
  722. capital[2] = SetTranslator3(ESPEAKNG_DEFAULT_VOICE); // overwrites previous contents of translator3
  723. Lookup(translator3, modifier, &capital[3]);
  724. if (capital[3] != 0) {
  725. capital[0] = phonPAUSE;
  726. capital[1] = phonSWITCH;
  727. len = strlen(&capital[3]);
  728. capital[len+3] = phonSWITCH;
  729. capital[len+4] = phontab_1;
  730. capital[len+5] = 0;
  731. }
  732. }
  733. }
  734. }
  735. LookupLetter(tr, letter, word[n_bytes], ph_buf, control & 1);
  736. }
  737. if (ph_buf[0] == phonSWITCH) {
  738. strcpy(phonemes, ph_buf);
  739. return 0;
  740. }
  741. if ((ph_buf[0] == 0) && ((number = NonAsciiNumber(letter)) > 0)) {
  742. // convert a non-ascii number to 0-9
  743. LookupLetter(tr, number, 0, ph_buf, control & 1);
  744. }
  745. al_offset = 0;
  746. al_flags = 0;
  747. if ((alphabet = AlphabetFromChar(letter)) != NULL) {
  748. al_offset = alphabet->offset;
  749. al_flags = alphabet->flags;
  750. }
  751. if (alphabet != current_alphabet) {
  752. // speak the name of the alphabet
  753. current_alphabet = alphabet;
  754. if ((alphabet != NULL) && !(al_flags & AL_DONT_NAME) && (al_offset != translator->letter_bits_offset)) {
  755. if ((al_flags & AL_DONT_NAME) || (al_offset == translator->langopts.alt_alphabet) || (al_offset == translator->langopts.our_alphabet)) {
  756. // don't say the alphabet name
  757. } else {
  758. ph_buf2[0] = 0;
  759. if (Lookup(translator, alphabet->name, ph_alphabet) == 0) { // the original language for the current voice
  760. // Can't find the local name for this alphabet, use the English name
  761. ph_alphabet[2] = SetTranslator3(ESPEAKNG_DEFAULT_VOICE); // overwrites previous contents of translator3
  762. Lookup(translator3, alphabet->name, ph_buf2);
  763. } else if (translator != tr) {
  764. phontab_1 = tr->phoneme_tab_ix;
  765. strcpy(ph_buf2, ph_alphabet);
  766. ph_alphabet[2] = translator->phoneme_tab_ix;
  767. }
  768. if (ph_buf2[0] != 0) {
  769. // we used a different language for the alphabet name (now in ph_buf2)
  770. ph_alphabet[0] = phonPAUSE;
  771. ph_alphabet[1] = phonSWITCH;
  772. strcpy(&ph_alphabet[3], ph_buf2);
  773. len = strlen(ph_buf2) + 3;
  774. ph_alphabet[len] = phonSWITCH;
  775. ph_alphabet[len+1] = phontab_1;
  776. ph_alphabet[len+2] = 0;
  777. }
  778. }
  779. }
  780. }
  781. // caution: SetWordStress() etc don't expect phonSWITCH + phoneme table number
  782. if (ph_buf[0] == 0) {
  783. int language;
  784. if ((al_offset != 0) && (al_offset == translator->langopts.alt_alphabet))
  785. language = translator->langopts.alt_alphabet_lang;
  786. else if ((alphabet != NULL) && (alphabet->language != 0) && !(al_flags & AL_NOT_LETTERS))
  787. language = alphabet->language;
  788. else
  789. language = L('e', 'n');
  790. if ((language != tr->translator_name) || (language == L('k', 'o'))) {
  791. char *p3;
  792. //int initial, code;
  793. char hangul_buf[12];
  794. // speak in the language for this alphabet (or English)
  795. char word_buf[5];
  796. ph_buf[2] = SetTranslator3(WordToString2(word_buf, language));
  797. if (translator3 != NULL) {
  798. int code;
  799. if (((code = letter - 0xac00) >= 0) && (letter <= 0xd7af)) {
  800. // Special case for Korean letters.
  801. // break a syllable hangul into 2 or 3 individual jamo
  802. hangul_buf[0] = ' ';
  803. p3 = &hangul_buf[1];
  804. int initial;
  805. if ((initial = (code/28)/21) != 11) {
  806. p3 += utf8_out(initial + 0x1100, p3);
  807. }
  808. utf8_out(((code/28) % 21) + 0x1161, p3); // medial
  809. utf8_out((code % 28) + 0x11a7, &p3[3]); // final
  810. p3[6] = ' ';
  811. p3[7] = 0;
  812. ph_buf[3] = 0;
  813. TranslateRules(translator3, &hangul_buf[1], &ph_buf[3], sizeof(ph_buf)-3, NULL, 0, NULL);
  814. SetWordStress(translator3, &ph_buf[3], NULL, -1, 0);
  815. } else
  816. LookupLetter(translator3, letter, word[n_bytes], &ph_buf[3], control & 1);
  817. if (ph_buf[3] == phonSWITCH) {
  818. // another level of language change
  819. ph_buf[2] = SetTranslator3(&ph_buf[4]);
  820. LookupLetter(translator3, letter, word[n_bytes], &ph_buf[3], control & 1);
  821. }
  822. SelectPhonemeTable(voice->phoneme_tab_ix); // revert to original phoneme table
  823. if (ph_buf[3] != 0) {
  824. ph_buf[0] = phonPAUSE;
  825. ph_buf[1] = phonSWITCH;
  826. len = strlen(&ph_buf[3]) + 3;
  827. ph_buf[len] = phonSWITCH; // switch back
  828. ph_buf[len+1] = tr->phoneme_tab_ix;
  829. ph_buf[len+2] = 0;
  830. }
  831. }
  832. }
  833. }
  834. if (ph_buf[0] == 0) {
  835. // character name not found
  836. int speak_letter_number = 1;
  837. if (!(al_flags & AL_NO_SYMBOL)) {
  838. if (iswalpha(letter))
  839. Lookup(translator, "_?A", ph_buf);
  840. if ((ph_buf[0] == 0) && !iswspace(letter))
  841. Lookup(translator, "_??", ph_buf);
  842. if (ph_buf[0] == 0)
  843. EncodePhonemes("l'et@", ph_buf, NULL);
  844. }
  845. if (!(control & 4) && (al_flags & AL_NOT_CODE)) {
  846. // don't speak the character code number, unless we want full details of this character
  847. speak_letter_number = 0;
  848. }
  849. if (speak_letter_number) {
  850. char *p2;
  851. if (al_offset == 0x2800) {
  852. // braille dots symbol, list the numbered dots
  853. p2 = hexbuf;
  854. for (int ix = 0; ix < 8; ix++) {
  855. if (letter & (1 << ix))
  856. *p2++ = '1'+ix;
  857. }
  858. *p2 = 0;
  859. } else {
  860. // speak the hexadecimal number of the character code
  861. sprintf(hexbuf, "%x", letter);
  862. }
  863. char *pbuf;
  864. pbuf = ph_buf;
  865. for (p2 = hexbuf; *p2 != 0; p2++) {
  866. pbuf += strlen(pbuf);
  867. *pbuf++ = phonPAUSE_VSHORT;
  868. LookupLetter(translator, *p2, 0, pbuf, 1);
  869. if (((pbuf[0] == 0) || (pbuf[0] == phonSWITCH)) && (*p2 >= 'a')) {
  870. // This language has no translation for 'a' to 'f', speak English names using base phonemes
  871. EncodePhonemes(hex_letters[*p2 - 'a'], pbuf, NULL);
  872. }
  873. }
  874. strcat(pbuf, pause_string);
  875. }
  876. }
  877. len = strlen(phonemes);
  878. if (tr->langopts.accents & 2) // 'capital' before or after the word ?
  879. sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, ph_buf, capital);
  880. else
  881. sprintf(ph_buf2, "%c%s%s%s", 0xff, ph_alphabet, capital, ph_buf); // the 0xff marker will be removed or replaced in SetSpellingStress()
  882. if ((len + strlen(ph_buf2)) < N_WORD_PHONEMES)
  883. strcpy(&phonemes[len], ph_buf2);
  884. return n_bytes;
  885. }
  886. // append plural suffixes depending on preceding letter
  887. static void addPluralSuffixes(int flags, Translator *tr, char last_char, char *word_phonemes)
  888. {
  889. char word_zz[4] = { ' ', 'z', 'z', 0 };
  890. char word_iz[4] = { ' ', 'i', 'z', 0 };
  891. char word_ss[4] = { ' ', 's', 's', 0 };
  892. if (flags & FLAG_HAS_PLURAL) {
  893. // s or 's suffix, append [s], [z] or [Iz] depending on previous letter
  894. if (last_char == 'f')
  895. TranslateRules(tr, &word_ss[1], word_phonemes, N_WORD_PHONEMES,
  896. NULL, 0, NULL);
  897. else if ((last_char == 0) || (strchr_w("hsx", last_char) == NULL))
  898. TranslateRules(tr, &word_zz[1], word_phonemes, N_WORD_PHONEMES,
  899. NULL, 0, NULL);
  900. else
  901. TranslateRules(tr, &word_iz[1], word_phonemes, N_WORD_PHONEMES,
  902. NULL, 0, NULL);
  903. }
  904. }
  905. static int CheckDottedAbbrev(char *word1)
  906. {
  907. int wc;
  908. int count = 0;
  909. int ix;
  910. char *word;
  911. char *wbuf;
  912. char word_buf[80];
  913. word = word1;
  914. wbuf = word_buf;
  915. for (;;) {
  916. int ok = 0;
  917. int nbytes = utf8_in(&wc, word);
  918. if ((word[nbytes] == ' ') && IsAlpha(wc)) {
  919. if (word[nbytes+1] == '.') {
  920. if (word[nbytes+2] == ' ')
  921. ok = 1;
  922. else if (word[nbytes+2] == '\'' && word[nbytes+3] == 's') {
  923. nbytes += 2; // delete the final dot (eg. u.s.a.'s)
  924. ok = 2;
  925. }
  926. } else if ((count > 0))
  927. ok = 2;
  928. }
  929. if (ok == 0)
  930. break;
  931. for (ix = 0; ix < nbytes; ix++)
  932. *wbuf++ = word[ix];
  933. count++;
  934. if (ok == 2) {
  935. word += nbytes;
  936. break;
  937. }
  938. word += (nbytes + 3);
  939. }
  940. if (count > 1) {
  941. ix = wbuf - word_buf;
  942. memcpy(word1, word_buf, ix);
  943. while (&word1[ix] < word)
  944. word1[ix++] = ' ';
  945. dictionary_skipwords = (count - 1)*2;
  946. }
  947. return count;
  948. }
  949. static int NonAsciiNumber(int letter)
  950. {
  951. // Change non-ascii digit into ascii digit '0' to '9', (or -1 if not)
  952. const int *p;
  953. int base;
  954. for (p = number_ranges; (base = *p) != 0; p++) {
  955. if (letter < base)
  956. break; // not found
  957. if (letter < (base+10))
  958. return letter-base+'0';
  959. }
  960. return -1;
  961. }
  962. static int Unpronouncable(Translator *tr, char *word, int posn)
  963. {
  964. /* Determines whether a word in 'unpronouncable', i.e. whether it should
  965. be spoken as individual letters.
  966. This function may be language specific. This is a generic version.
  967. */
  968. int c;
  969. int c1 = 0;
  970. int vowel_posn = 9;
  971. int index;
  972. int count;
  973. const ALPHABET *alphabet;
  974. utf8_in(&c, word);
  975. if ((tr->letter_bits_offset > 0) && (c < 0x241)) {
  976. // Latin characters for a language with a non-latin alphabet
  977. return 0; // so we can re-translate the word as English
  978. }
  979. if (((alphabet = AlphabetFromChar(c)) != NULL) && (alphabet->offset != tr->letter_bits_offset)) {
  980. // Character is not in our alphabet
  981. return 0;
  982. }
  983. if (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 1)
  984. return 0;
  985. if (((c = *word) == ' ') || (c == 0) || (c == '\''))
  986. return 0;
  987. index = 0;
  988. count = 0;
  989. for (;;) {
  990. index += utf8_in(&c, &word[index]);
  991. if ((c == 0) || (c == ' '))
  992. break;
  993. if ((c == '\'') && ((count > 1) || (posn > 0)))
  994. break; // "tv'" but not "l'"
  995. if (count == 0)
  996. c1 = c;
  997. if ((c == '\'') && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 3)) {
  998. // don't count apostrophe
  999. } else
  1000. count++;
  1001. if (IsVowel(tr, c)) {
  1002. vowel_posn = count; // position of the first vowel
  1003. break;
  1004. }
  1005. if ((c != '\'') && !iswalpha(c))
  1006. return 0;
  1007. }
  1008. if ((vowel_posn > 2) && (tr->langopts.param[LOPT_UNPRONOUNCABLE] == 2)) {
  1009. // Lookup unpronounable rules in *_rules
  1010. return Unpronouncable2(tr, word);
  1011. }
  1012. if (c1 == tr->langopts.param[LOPT_UNPRONOUNCABLE])
  1013. vowel_posn--; // disregard this as the initial letter when counting
  1014. if (vowel_posn > (tr->langopts.max_initial_consonants+1))
  1015. return 1; // no vowel, or no vowel in first few letters
  1016. return 0;
  1017. }
  1018. static int Unpronouncable2(Translator *tr, char *word)
  1019. {
  1020. int c;
  1021. int end_flags;
  1022. char ph_buf[N_WORD_PHONEMES];
  1023. ph_buf[0] = 0;
  1024. c = word[-1];
  1025. word[-1] = ' '; // ensure there is a space before the "word"
  1026. end_flags = TranslateRules(tr, word, ph_buf, sizeof(ph_buf), NULL, FLAG_UNPRON_TEST, NULL);
  1027. word[-1] = c;
  1028. if ((end_flags == 0) || (end_flags & SUFX_UNPRON))
  1029. return 1;
  1030. return 0;
  1031. }