eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

encoding.c 23KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. /*
  2. * Copyright (C) 2017 Reece H. Dunn
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write see:
  16. * <http://www.gnu.org/licenses/>.
  17. */
  18. #include "config.h"
  19. #include <assert.h>
  20. #include <stdint.h>
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <espeak-ng/espeak_ng.h>
  24. #include "encoding.h"
  25. void
  26. test_unbound_text_decoder()
  27. {
  28. printf("testing unbound text decoder\n");
  29. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  30. assert(decoder != NULL);
  31. assert(text_decoder_eof(decoder) == 1);
  32. destroy_text_decoder(decoder);
  33. }
  34. void
  35. test_unknown_encoding()
  36. {
  37. printf("testing unknown encodings\n");
  38. assert(espeak_ng_EncodingFromName(NULL) == ESPEAKNG_ENCODING_UNKNOWN);
  39. assert(espeak_ng_EncodingFromName("") == ESPEAKNG_ENCODING_UNKNOWN);
  40. assert(espeak_ng_EncodingFromName("abcxyz") == ESPEAKNG_ENCODING_UNKNOWN);
  41. assert(espeak_ng_EncodingFromName("US") == ESPEAKNG_ENCODING_UNKNOWN); // wrong case
  42. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  43. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_UNKNOWN) == ENS_UNKNOWN_TEXT_ENCODING);
  44. assert(text_decoder_eof(decoder) == 1);
  45. destroy_text_decoder(decoder);
  46. }
  47. void
  48. test_us_ascii_encoding()
  49. {
  50. printf("testing US-ASCII encoding\n");
  51. assert(espeak_ng_EncodingFromName("US-ASCII") == ESPEAKNG_ENCODING_US_ASCII);
  52. assert(espeak_ng_EncodingFromName("iso-ir-6") == ESPEAKNG_ENCODING_US_ASCII);
  53. assert(espeak_ng_EncodingFromName("ANSI_X3.4-1968") == ESPEAKNG_ENCODING_US_ASCII);
  54. assert(espeak_ng_EncodingFromName("ANSI_X3.4-1986") == ESPEAKNG_ENCODING_US_ASCII);
  55. assert(espeak_ng_EncodingFromName("ISO_646.irv:1991") == ESPEAKNG_ENCODING_US_ASCII);
  56. assert(espeak_ng_EncodingFromName("ISO646-US") == ESPEAKNG_ENCODING_US_ASCII);
  57. assert(espeak_ng_EncodingFromName("us") == ESPEAKNG_ENCODING_US_ASCII);
  58. assert(espeak_ng_EncodingFromName("IBM367") == ESPEAKNG_ENCODING_US_ASCII);
  59. assert(espeak_ng_EncodingFromName("cp367") == ESPEAKNG_ENCODING_US_ASCII);
  60. assert(espeak_ng_EncodingFromName("csASCII") == ESPEAKNG_ENCODING_US_ASCII);
  61. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  62. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK);
  63. assert(text_decoder_eof(decoder) == 0);
  64. assert(text_decoder_getc(decoder) == 'a');
  65. assert(text_decoder_eof(decoder) == 0);
  66. assert(text_decoder_getc(decoder) == 'G');
  67. assert(text_decoder_eof(decoder) == 0);
  68. assert(text_decoder_getc(decoder) == 0xFFFD);
  69. assert(text_decoder_eof(decoder) == 0);
  70. assert(text_decoder_getc(decoder) == 0xFFFD);
  71. assert(text_decoder_eof(decoder) == 0);
  72. assert(text_decoder_getc(decoder) == 0xFFFD);
  73. assert(text_decoder_eof(decoder) == 1);
  74. destroy_text_decoder(decoder);
  75. }
  76. void
  77. test_iso_8859_1_encoding()
  78. {
  79. printf("testing ISO-8859-1 encoding\n");
  80. assert(espeak_ng_EncodingFromName("ISO-8859-1") == ESPEAKNG_ENCODING_ISO_8859_1);
  81. assert(espeak_ng_EncodingFromName("ISO_8859-1") == ESPEAKNG_ENCODING_ISO_8859_1);
  82. assert(espeak_ng_EncodingFromName("ISO_8859-1:1987") == ESPEAKNG_ENCODING_ISO_8859_1);
  83. assert(espeak_ng_EncodingFromName("iso-ir-100") == ESPEAKNG_ENCODING_ISO_8859_1);
  84. assert(espeak_ng_EncodingFromName("latin1") == ESPEAKNG_ENCODING_ISO_8859_1);
  85. assert(espeak_ng_EncodingFromName("l1") == ESPEAKNG_ENCODING_ISO_8859_1);
  86. assert(espeak_ng_EncodingFromName("IBM819") == ESPEAKNG_ENCODING_ISO_8859_1);
  87. assert(espeak_ng_EncodingFromName("cp819") == ESPEAKNG_ENCODING_ISO_8859_1);
  88. assert(espeak_ng_EncodingFromName("csISOLatin1") == ESPEAKNG_ENCODING_ISO_8859_1);
  89. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  90. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_1) == ENS_OK);
  91. assert(text_decoder_eof(decoder) == 0);
  92. assert(text_decoder_getc(decoder) == 'a');
  93. assert(text_decoder_eof(decoder) == 0);
  94. assert(text_decoder_getc(decoder) == 'G');
  95. assert(text_decoder_eof(decoder) == 0);
  96. assert(text_decoder_getc(decoder) == 0x92);
  97. assert(text_decoder_eof(decoder) == 0);
  98. assert(text_decoder_getc(decoder) == 0xA0);
  99. assert(text_decoder_eof(decoder) == 0);
  100. assert(text_decoder_getc(decoder) == 0xDE);
  101. assert(text_decoder_eof(decoder) == 1);
  102. destroy_text_decoder(decoder);
  103. }
  104. void
  105. test_iso_8859_2_encoding()
  106. {
  107. printf("testing ISO-8859-2 encoding\n");
  108. assert(espeak_ng_EncodingFromName("ISO-8859-2") == ESPEAKNG_ENCODING_ISO_8859_2);
  109. assert(espeak_ng_EncodingFromName("ISO_8859-2") == ESPEAKNG_ENCODING_ISO_8859_2);
  110. assert(espeak_ng_EncodingFromName("ISO_8859-2:1987") == ESPEAKNG_ENCODING_ISO_8859_2);
  111. assert(espeak_ng_EncodingFromName("iso-ir-101") == ESPEAKNG_ENCODING_ISO_8859_2);
  112. assert(espeak_ng_EncodingFromName("latin2") == ESPEAKNG_ENCODING_ISO_8859_2);
  113. assert(espeak_ng_EncodingFromName("l2") == ESPEAKNG_ENCODING_ISO_8859_2);
  114. assert(espeak_ng_EncodingFromName("csISOLatin2") == ESPEAKNG_ENCODING_ISO_8859_2);
  115. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  116. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_2) == ENS_OK);
  117. assert(text_decoder_eof(decoder) == 0);
  118. assert(text_decoder_getc(decoder) == 'a');
  119. assert(text_decoder_eof(decoder) == 0);
  120. assert(text_decoder_getc(decoder) == 'G');
  121. assert(text_decoder_eof(decoder) == 0);
  122. assert(text_decoder_getc(decoder) == 0x92);
  123. assert(text_decoder_eof(decoder) == 0);
  124. assert(text_decoder_getc(decoder) == 0xA0);
  125. assert(text_decoder_eof(decoder) == 0);
  126. assert(text_decoder_getc(decoder) == 0x0162);
  127. assert(text_decoder_eof(decoder) == 1);
  128. destroy_text_decoder(decoder);
  129. }
  130. void
  131. test_iso_8859_3_encoding()
  132. {
  133. printf("testing ISO-8859-3 encoding\n");
  134. assert(espeak_ng_EncodingFromName("ISO-8859-3") == ESPEAKNG_ENCODING_ISO_8859_3);
  135. assert(espeak_ng_EncodingFromName("ISO_8859-3") == ESPEAKNG_ENCODING_ISO_8859_3);
  136. assert(espeak_ng_EncodingFromName("ISO_8859-3:1988") == ESPEAKNG_ENCODING_ISO_8859_3);
  137. assert(espeak_ng_EncodingFromName("iso-ir-109") == ESPEAKNG_ENCODING_ISO_8859_3);
  138. assert(espeak_ng_EncodingFromName("latin3") == ESPEAKNG_ENCODING_ISO_8859_3);
  139. assert(espeak_ng_EncodingFromName("l3") == ESPEAKNG_ENCODING_ISO_8859_3);
  140. assert(espeak_ng_EncodingFromName("csISOLatin3") == ESPEAKNG_ENCODING_ISO_8859_3);
  141. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  142. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_3) == ENS_OK);
  143. assert(text_decoder_eof(decoder) == 0);
  144. assert(text_decoder_getc(decoder) == 'a');
  145. assert(text_decoder_eof(decoder) == 0);
  146. assert(text_decoder_getc(decoder) == 'G');
  147. assert(text_decoder_eof(decoder) == 0);
  148. assert(text_decoder_getc(decoder) == 0x92);
  149. assert(text_decoder_eof(decoder) == 0);
  150. assert(text_decoder_getc(decoder) == 0xA0);
  151. assert(text_decoder_eof(decoder) == 0);
  152. assert(text_decoder_getc(decoder) == 0x015C);
  153. assert(text_decoder_eof(decoder) == 1);
  154. destroy_text_decoder(decoder);
  155. }
  156. void
  157. test_iso_8859_4_encoding()
  158. {
  159. printf("testing ISO-8859-4 encoding\n");
  160. assert(espeak_ng_EncodingFromName("ISO-8859-4") == ESPEAKNG_ENCODING_ISO_8859_4);
  161. assert(espeak_ng_EncodingFromName("ISO_8859-4") == ESPEAKNG_ENCODING_ISO_8859_4);
  162. assert(espeak_ng_EncodingFromName("ISO_8859-4:1988") == ESPEAKNG_ENCODING_ISO_8859_4);
  163. assert(espeak_ng_EncodingFromName("iso-ir-110") == ESPEAKNG_ENCODING_ISO_8859_4);
  164. assert(espeak_ng_EncodingFromName("latin4") == ESPEAKNG_ENCODING_ISO_8859_4);
  165. assert(espeak_ng_EncodingFromName("l4") == ESPEAKNG_ENCODING_ISO_8859_4);
  166. assert(espeak_ng_EncodingFromName("csISOLatin4") == ESPEAKNG_ENCODING_ISO_8859_4);
  167. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  168. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_4) == ENS_OK);
  169. assert(text_decoder_eof(decoder) == 0);
  170. assert(text_decoder_getc(decoder) == 'a');
  171. assert(text_decoder_eof(decoder) == 0);
  172. assert(text_decoder_getc(decoder) == 'G');
  173. assert(text_decoder_eof(decoder) == 0);
  174. assert(text_decoder_getc(decoder) == 0x92);
  175. assert(text_decoder_eof(decoder) == 0);
  176. assert(text_decoder_getc(decoder) == 0xA0);
  177. assert(text_decoder_eof(decoder) == 0);
  178. assert(text_decoder_getc(decoder) == 0x016A);
  179. assert(text_decoder_eof(decoder) == 1);
  180. destroy_text_decoder(decoder);
  181. }
  182. void
  183. test_iso_8859_5_encoding()
  184. {
  185. printf("testing ISO-8859-5 encoding\n");
  186. assert(espeak_ng_EncodingFromName("ISO-8859-5") == ESPEAKNG_ENCODING_ISO_8859_5);
  187. assert(espeak_ng_EncodingFromName("ISO_8859-5") == ESPEAKNG_ENCODING_ISO_8859_5);
  188. assert(espeak_ng_EncodingFromName("ISO_8859-5:1988") == ESPEAKNG_ENCODING_ISO_8859_5);
  189. assert(espeak_ng_EncodingFromName("iso-ir-144") == ESPEAKNG_ENCODING_ISO_8859_5);
  190. assert(espeak_ng_EncodingFromName("cyrillic") == ESPEAKNG_ENCODING_ISO_8859_5);
  191. assert(espeak_ng_EncodingFromName("csISOLatinCyrillic") == ESPEAKNG_ENCODING_ISO_8859_5);
  192. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  193. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_5) == ENS_OK);
  194. assert(text_decoder_eof(decoder) == 0);
  195. assert(text_decoder_getc(decoder) == 'a');
  196. assert(text_decoder_eof(decoder) == 0);
  197. assert(text_decoder_getc(decoder) == 'G');
  198. assert(text_decoder_eof(decoder) == 0);
  199. assert(text_decoder_getc(decoder) == 0x92);
  200. assert(text_decoder_eof(decoder) == 0);
  201. assert(text_decoder_getc(decoder) == 0xA0);
  202. assert(text_decoder_eof(decoder) == 0);
  203. assert(text_decoder_getc(decoder) == 0x043E);
  204. assert(text_decoder_eof(decoder) == 1);
  205. destroy_text_decoder(decoder);
  206. }
  207. void
  208. test_iso_8859_6_encoding()
  209. {
  210. printf("testing ISO-8859-6 encoding\n");
  211. assert(espeak_ng_EncodingFromName("ISO-8859-6") == ESPEAKNG_ENCODING_ISO_8859_6);
  212. assert(espeak_ng_EncodingFromName("ISO_8859-6") == ESPEAKNG_ENCODING_ISO_8859_6);
  213. assert(espeak_ng_EncodingFromName("ISO_8859-6:1987") == ESPEAKNG_ENCODING_ISO_8859_6);
  214. assert(espeak_ng_EncodingFromName("iso-ir-127") == ESPEAKNG_ENCODING_ISO_8859_6);
  215. assert(espeak_ng_EncodingFromName("ECMA-114") == ESPEAKNG_ENCODING_ISO_8859_6);
  216. assert(espeak_ng_EncodingFromName("ASMO-708") == ESPEAKNG_ENCODING_ISO_8859_6);
  217. assert(espeak_ng_EncodingFromName("arabic") == ESPEAKNG_ENCODING_ISO_8859_6);
  218. assert(espeak_ng_EncodingFromName("csISOLatinArabic") == ESPEAKNG_ENCODING_ISO_8859_6);
  219. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  220. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDA", 5, ESPEAKNG_ENCODING_ISO_8859_6) == ENS_OK);
  221. assert(text_decoder_eof(decoder) == 0);
  222. assert(text_decoder_getc(decoder) == 'a');
  223. assert(text_decoder_eof(decoder) == 0);
  224. assert(text_decoder_getc(decoder) == 'G');
  225. assert(text_decoder_eof(decoder) == 0);
  226. assert(text_decoder_getc(decoder) == 0x92);
  227. assert(text_decoder_eof(decoder) == 0);
  228. assert(text_decoder_getc(decoder) == 0xA0);
  229. assert(text_decoder_eof(decoder) == 0);
  230. assert(text_decoder_getc(decoder) == 0x063A);
  231. assert(text_decoder_eof(decoder) == 1);
  232. destroy_text_decoder(decoder);
  233. }
  234. void
  235. test_iso_8859_7_encoding()
  236. {
  237. printf("testing ISO-8859-7 encoding\n");
  238. assert(espeak_ng_EncodingFromName("ISO-8859-7") == ESPEAKNG_ENCODING_ISO_8859_7);
  239. assert(espeak_ng_EncodingFromName("ISO_8859-7") == ESPEAKNG_ENCODING_ISO_8859_7);
  240. assert(espeak_ng_EncodingFromName("ISO_8859-7:1987") == ESPEAKNG_ENCODING_ISO_8859_7);
  241. assert(espeak_ng_EncodingFromName("iso-ir-126") == ESPEAKNG_ENCODING_ISO_8859_7);
  242. assert(espeak_ng_EncodingFromName("ECMA-118") == ESPEAKNG_ENCODING_ISO_8859_7);
  243. assert(espeak_ng_EncodingFromName("ELOT_928") == ESPEAKNG_ENCODING_ISO_8859_7);
  244. assert(espeak_ng_EncodingFromName("greek") == ESPEAKNG_ENCODING_ISO_8859_7);
  245. assert(espeak_ng_EncodingFromName("greek8") == ESPEAKNG_ENCODING_ISO_8859_7);
  246. assert(espeak_ng_EncodingFromName("csISOLatinGreek") == ESPEAKNG_ENCODING_ISO_8859_7);
  247. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  248. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_7) == ENS_OK);
  249. assert(text_decoder_eof(decoder) == 0);
  250. assert(text_decoder_getc(decoder) == 'a');
  251. assert(text_decoder_eof(decoder) == 0);
  252. assert(text_decoder_getc(decoder) == 'G');
  253. assert(text_decoder_eof(decoder) == 0);
  254. assert(text_decoder_getc(decoder) == 0x92);
  255. assert(text_decoder_eof(decoder) == 0);
  256. assert(text_decoder_getc(decoder) == 0xA0);
  257. assert(text_decoder_eof(decoder) == 0);
  258. assert(text_decoder_getc(decoder) == 0x03AE);
  259. assert(text_decoder_eof(decoder) == 1);
  260. destroy_text_decoder(decoder);
  261. }
  262. void
  263. test_iso_8859_8_encoding()
  264. {
  265. printf("testing ISO-8859-8 encoding\n");
  266. assert(espeak_ng_EncodingFromName("ISO-8859-8") == ESPEAKNG_ENCODING_ISO_8859_8);
  267. assert(espeak_ng_EncodingFromName("ISO_8859-8") == ESPEAKNG_ENCODING_ISO_8859_8);
  268. assert(espeak_ng_EncodingFromName("ISO_8859-8:1988") == ESPEAKNG_ENCODING_ISO_8859_8);
  269. assert(espeak_ng_EncodingFromName("iso-ir-138") == ESPEAKNG_ENCODING_ISO_8859_8);
  270. assert(espeak_ng_EncodingFromName("hebrew") == ESPEAKNG_ENCODING_ISO_8859_8);
  271. assert(espeak_ng_EncodingFromName("csISOLatinHebrew") == ESPEAKNG_ENCODING_ISO_8859_8);
  272. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  273. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_8) == ENS_OK);
  274. assert(text_decoder_eof(decoder) == 0);
  275. assert(text_decoder_getc(decoder) == 'a');
  276. assert(text_decoder_eof(decoder) == 0);
  277. assert(text_decoder_getc(decoder) == 'G');
  278. assert(text_decoder_eof(decoder) == 0);
  279. assert(text_decoder_getc(decoder) == 0x92);
  280. assert(text_decoder_eof(decoder) == 0);
  281. assert(text_decoder_getc(decoder) == 0xA0);
  282. assert(text_decoder_eof(decoder) == 0);
  283. assert(text_decoder_getc(decoder) == 0x05de);
  284. assert(text_decoder_eof(decoder) == 1);
  285. destroy_text_decoder(decoder);
  286. }
  287. void
  288. test_iso_8859_9_encoding()
  289. {
  290. printf("testing ISO-8859-9 encoding\n");
  291. assert(espeak_ng_EncodingFromName("ISO-8859-9") == ESPEAKNG_ENCODING_ISO_8859_9);
  292. assert(espeak_ng_EncodingFromName("ISO_8859-9") == ESPEAKNG_ENCODING_ISO_8859_9);
  293. assert(espeak_ng_EncodingFromName("ISO_8859-9:1989") == ESPEAKNG_ENCODING_ISO_8859_9);
  294. assert(espeak_ng_EncodingFromName("iso-ir-148") == ESPEAKNG_ENCODING_ISO_8859_9);
  295. assert(espeak_ng_EncodingFromName("latin5") == ESPEAKNG_ENCODING_ISO_8859_9);
  296. assert(espeak_ng_EncodingFromName("l5") == ESPEAKNG_ENCODING_ISO_8859_9);
  297. assert(espeak_ng_EncodingFromName("csISOLatin5") == ESPEAKNG_ENCODING_ISO_8859_9);
  298. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  299. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_9) == ENS_OK);
  300. assert(text_decoder_eof(decoder) == 0);
  301. assert(text_decoder_getc(decoder) == 'a');
  302. assert(text_decoder_eof(decoder) == 0);
  303. assert(text_decoder_getc(decoder) == 'G');
  304. assert(text_decoder_eof(decoder) == 0);
  305. assert(text_decoder_getc(decoder) == 0x92);
  306. assert(text_decoder_eof(decoder) == 0);
  307. assert(text_decoder_getc(decoder) == 0xA0);
  308. assert(text_decoder_eof(decoder) == 0);
  309. assert(text_decoder_getc(decoder) == 0x015e);
  310. assert(text_decoder_eof(decoder) == 1);
  311. destroy_text_decoder(decoder);
  312. }
  313. void
  314. test_iso_8859_10_encoding()
  315. {
  316. printf("testing ISO-8859-10 encoding\n");
  317. assert(espeak_ng_EncodingFromName("ISO-8859-10") == ESPEAKNG_ENCODING_ISO_8859_10);
  318. assert(espeak_ng_EncodingFromName("ISO_8859-10") == ESPEAKNG_ENCODING_ISO_8859_10);
  319. assert(espeak_ng_EncodingFromName("ISO_8859-10:1992") == ESPEAKNG_ENCODING_ISO_8859_10);
  320. assert(espeak_ng_EncodingFromName("iso-ir-157") == ESPEAKNG_ENCODING_ISO_8859_10);
  321. assert(espeak_ng_EncodingFromName("latin6") == ESPEAKNG_ENCODING_ISO_8859_10);
  322. assert(espeak_ng_EncodingFromName("l6") == ESPEAKNG_ENCODING_ISO_8859_10);
  323. assert(espeak_ng_EncodingFromName("csISOLatin6") == ESPEAKNG_ENCODING_ISO_8859_10);
  324. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  325. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_10) == ENS_OK);
  326. assert(text_decoder_eof(decoder) == 0);
  327. assert(text_decoder_getc(decoder) == 'a');
  328. assert(text_decoder_eof(decoder) == 0);
  329. assert(text_decoder_getc(decoder) == 'G');
  330. assert(text_decoder_eof(decoder) == 0);
  331. assert(text_decoder_getc(decoder) == 0x92);
  332. assert(text_decoder_eof(decoder) == 0);
  333. assert(text_decoder_getc(decoder) == 0xA0);
  334. assert(text_decoder_eof(decoder) == 0);
  335. assert(text_decoder_getc(decoder) == 0x00de);
  336. assert(text_decoder_eof(decoder) == 1);
  337. destroy_text_decoder(decoder);
  338. }
  339. void
  340. test_iso_8859_11_encoding()
  341. {
  342. printf("testing ISO-8859-11 encoding\n");
  343. assert(espeak_ng_EncodingFromName("ISO-8859-11") == ESPEAKNG_ENCODING_ISO_8859_11);
  344. assert(espeak_ng_EncodingFromName("TIS-620") == ESPEAKNG_ENCODING_ISO_8859_11);
  345. assert(espeak_ng_EncodingFromName("csTIS620") == ESPEAKNG_ENCODING_ISO_8859_11);
  346. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  347. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_11) == ENS_OK);
  348. assert(text_decoder_eof(decoder) == 0);
  349. assert(text_decoder_getc(decoder) == 'a');
  350. assert(text_decoder_eof(decoder) == 0);
  351. assert(text_decoder_getc(decoder) == 'G');
  352. assert(text_decoder_eof(decoder) == 0);
  353. assert(text_decoder_getc(decoder) == 0x92);
  354. assert(text_decoder_eof(decoder) == 0);
  355. assert(text_decoder_getc(decoder) == 0xA0);
  356. assert(text_decoder_eof(decoder) == 0);
  357. assert(text_decoder_getc(decoder) == 0x0e4e);
  358. assert(text_decoder_eof(decoder) == 1);
  359. destroy_text_decoder(decoder);
  360. }
  361. void
  362. test_iso_8859_13_encoding()
  363. {
  364. printf("testing ISO-8859-13 encoding\n");
  365. assert(espeak_ng_EncodingFromName("ISO-8859-13") == ESPEAKNG_ENCODING_ISO_8859_13);
  366. assert(espeak_ng_EncodingFromName("csISO885913") == ESPEAKNG_ENCODING_ISO_8859_13);
  367. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  368. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_13) == ENS_OK);
  369. assert(text_decoder_eof(decoder) == 0);
  370. assert(text_decoder_getc(decoder) == 'a');
  371. assert(text_decoder_eof(decoder) == 0);
  372. assert(text_decoder_getc(decoder) == 'G');
  373. assert(text_decoder_eof(decoder) == 0);
  374. assert(text_decoder_getc(decoder) == 0x92);
  375. assert(text_decoder_eof(decoder) == 0);
  376. assert(text_decoder_getc(decoder) == 0xA0);
  377. assert(text_decoder_eof(decoder) == 0);
  378. assert(text_decoder_getc(decoder) == 0x012b);
  379. assert(text_decoder_eof(decoder) == 1);
  380. destroy_text_decoder(decoder);
  381. }
  382. void
  383. test_iso_8859_14_encoding()
  384. {
  385. printf("testing ISO-8859-14 encoding\n");
  386. assert(espeak_ng_EncodingFromName("ISO-8859-14") == ESPEAKNG_ENCODING_ISO_8859_14);
  387. assert(espeak_ng_EncodingFromName("ISO_8859-14") == ESPEAKNG_ENCODING_ISO_8859_14);
  388. assert(espeak_ng_EncodingFromName("ISO_8859-14:1998") == ESPEAKNG_ENCODING_ISO_8859_14);
  389. assert(espeak_ng_EncodingFromName("iso-ir-199") == ESPEAKNG_ENCODING_ISO_8859_14);
  390. assert(espeak_ng_EncodingFromName("iso-celtic") == ESPEAKNG_ENCODING_ISO_8859_14);
  391. assert(espeak_ng_EncodingFromName("latin8") == ESPEAKNG_ENCODING_ISO_8859_14);
  392. assert(espeak_ng_EncodingFromName("l8") == ESPEAKNG_ENCODING_ISO_8859_14);
  393. assert(espeak_ng_EncodingFromName("csISO885914") == ESPEAKNG_ENCODING_ISO_8859_14);
  394. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  395. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_14) == ENS_OK);
  396. assert(text_decoder_eof(decoder) == 0);
  397. assert(text_decoder_getc(decoder) == 'a');
  398. assert(text_decoder_eof(decoder) == 0);
  399. assert(text_decoder_getc(decoder) == 'G');
  400. assert(text_decoder_eof(decoder) == 0);
  401. assert(text_decoder_getc(decoder) == 0x92);
  402. assert(text_decoder_eof(decoder) == 0);
  403. assert(text_decoder_getc(decoder) == 0xA0);
  404. assert(text_decoder_eof(decoder) == 0);
  405. assert(text_decoder_getc(decoder) == 0x0176);
  406. assert(text_decoder_eof(decoder) == 1);
  407. destroy_text_decoder(decoder);
  408. }
  409. void
  410. test_iso_8859_15_encoding()
  411. {
  412. printf("testing ISO-8859-15 encoding\n");
  413. assert(espeak_ng_EncodingFromName("ISO-8859-15") == ESPEAKNG_ENCODING_ISO_8859_15);
  414. assert(espeak_ng_EncodingFromName("ISO_8859-15") == ESPEAKNG_ENCODING_ISO_8859_15);
  415. assert(espeak_ng_EncodingFromName("Latin-9") == ESPEAKNG_ENCODING_ISO_8859_15);
  416. assert(espeak_ng_EncodingFromName("csISO885915") == ESPEAKNG_ENCODING_ISO_8859_15);
  417. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  418. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xBE", 5, ESPEAKNG_ENCODING_ISO_8859_15) == ENS_OK);
  419. assert(text_decoder_eof(decoder) == 0);
  420. assert(text_decoder_getc(decoder) == 'a');
  421. assert(text_decoder_eof(decoder) == 0);
  422. assert(text_decoder_getc(decoder) == 'G');
  423. assert(text_decoder_eof(decoder) == 0);
  424. assert(text_decoder_getc(decoder) == 0x92);
  425. assert(text_decoder_eof(decoder) == 0);
  426. assert(text_decoder_getc(decoder) == 0xA0);
  427. assert(text_decoder_eof(decoder) == 0);
  428. assert(text_decoder_getc(decoder) == 0x0178);
  429. assert(text_decoder_eof(decoder) == 1);
  430. destroy_text_decoder(decoder);
  431. }
  432. void
  433. test_iso_8859_16_encoding()
  434. {
  435. printf("testing ISO-8859-16 encoding\n");
  436. assert(espeak_ng_EncodingFromName("ISO-8859-16") == ESPEAKNG_ENCODING_ISO_8859_16);
  437. assert(espeak_ng_EncodingFromName("ISO_8859-16") == ESPEAKNG_ENCODING_ISO_8859_16);
  438. assert(espeak_ng_EncodingFromName("ISO_8859-16:2001") == ESPEAKNG_ENCODING_ISO_8859_16);
  439. assert(espeak_ng_EncodingFromName("iso-ir-226") == ESPEAKNG_ENCODING_ISO_8859_16);
  440. assert(espeak_ng_EncodingFromName("latin10") == ESPEAKNG_ENCODING_ISO_8859_16);
  441. assert(espeak_ng_EncodingFromName("l10") == ESPEAKNG_ENCODING_ISO_8859_16);
  442. assert(espeak_ng_EncodingFromName("csISO885916") == ESPEAKNG_ENCODING_ISO_8859_16);
  443. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  444. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_16) == ENS_OK);
  445. assert(text_decoder_eof(decoder) == 0);
  446. assert(text_decoder_getc(decoder) == 'a');
  447. assert(text_decoder_eof(decoder) == 0);
  448. assert(text_decoder_getc(decoder) == 'G');
  449. assert(text_decoder_eof(decoder) == 0);
  450. assert(text_decoder_getc(decoder) == 0x92);
  451. assert(text_decoder_eof(decoder) == 0);
  452. assert(text_decoder_getc(decoder) == 0xA0);
  453. assert(text_decoder_eof(decoder) == 0);
  454. assert(text_decoder_getc(decoder) == 0x021a);
  455. assert(text_decoder_eof(decoder) == 1);
  456. destroy_text_decoder(decoder);
  457. }
  458. int
  459. main(int argc, char **argv)
  460. {
  461. test_unbound_text_decoder();
  462. test_unknown_encoding();
  463. test_us_ascii_encoding();
  464. test_iso_8859_1_encoding();
  465. test_iso_8859_2_encoding();
  466. test_iso_8859_3_encoding();
  467. test_iso_8859_4_encoding();
  468. test_iso_8859_5_encoding();
  469. test_iso_8859_6_encoding();
  470. test_iso_8859_7_encoding();
  471. test_iso_8859_8_encoding();
  472. test_iso_8859_9_encoding();
  473. test_iso_8859_10_encoding();
  474. test_iso_8859_11_encoding();
  475. // ISO-8859-12 is not a valid encoding.
  476. test_iso_8859_13_encoding();
  477. test_iso_8859_14_encoding();
  478. test_iso_8859_15_encoding();
  479. test_iso_8859_16_encoding();
  480. printf("done\n");
  481. return EXIT_SUCCESS;
  482. }