eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

encoding.c 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606
  1. /*
  2. * Copyright (C) 2017 Reece H. Dunn
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write see:
  16. * <http://www.gnu.org/licenses/>.
  17. */
  18. #include "config.h"
  19. #include <assert.h>
  20. #include <stdint.h>
  21. #include <stdlib.h>
  22. #include <stdio.h>
  23. #include <espeak-ng/espeak_ng.h>
  24. #include "encoding.h"
  25. void
  26. test_unbound_text_decoder()
  27. {
  28. printf("testing unbound text decoder\n");
  29. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  30. assert(decoder != NULL);
  31. assert(text_decoder_eof(decoder) == 1);
  32. destroy_text_decoder(decoder);
  33. }
  34. void
  35. test_unknown_encoding()
  36. {
  37. printf("testing unknown encodings\n");
  38. assert(espeak_ng_EncodingFromName(NULL) == ESPEAKNG_ENCODING_UNKNOWN);
  39. assert(espeak_ng_EncodingFromName("") == ESPEAKNG_ENCODING_UNKNOWN);
  40. assert(espeak_ng_EncodingFromName("abcxyz") == ESPEAKNG_ENCODING_UNKNOWN);
  41. assert(espeak_ng_EncodingFromName("US") == ESPEAKNG_ENCODING_UNKNOWN); // wrong case
  42. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  43. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_UNKNOWN) == ENS_UNKNOWN_TEXT_ENCODING);
  44. assert(text_decoder_eof(decoder) == 1);
  45. destroy_text_decoder(decoder);
  46. }
  47. void
  48. test_us_ascii_encoding()
  49. {
  50. printf("testing US-ASCII encoding\n");
  51. assert(espeak_ng_EncodingFromName("US-ASCII") == ESPEAKNG_ENCODING_US_ASCII);
  52. assert(espeak_ng_EncodingFromName("iso-ir-6") == ESPEAKNG_ENCODING_US_ASCII);
  53. assert(espeak_ng_EncodingFromName("ANSI_X3.4-1968") == ESPEAKNG_ENCODING_US_ASCII);
  54. assert(espeak_ng_EncodingFromName("ANSI_X3.4-1986") == ESPEAKNG_ENCODING_US_ASCII);
  55. assert(espeak_ng_EncodingFromName("ISO_646.irv:1991") == ESPEAKNG_ENCODING_US_ASCII);
  56. assert(espeak_ng_EncodingFromName("ISO646-US") == ESPEAKNG_ENCODING_US_ASCII);
  57. assert(espeak_ng_EncodingFromName("us") == ESPEAKNG_ENCODING_US_ASCII);
  58. assert(espeak_ng_EncodingFromName("IBM367") == ESPEAKNG_ENCODING_US_ASCII);
  59. assert(espeak_ng_EncodingFromName("cp367") == ESPEAKNG_ENCODING_US_ASCII);
  60. assert(espeak_ng_EncodingFromName("csASCII") == ESPEAKNG_ENCODING_US_ASCII);
  61. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  62. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_US_ASCII) == ENS_OK);
  63. assert(text_decoder_eof(decoder) == 0);
  64. assert(text_decoder_getc(decoder) == 'a');
  65. assert(text_decoder_eof(decoder) == 0);
  66. assert(text_decoder_getc(decoder) == 'G');
  67. assert(text_decoder_eof(decoder) == 0);
  68. assert(text_decoder_getc(decoder) == 0xFFFD);
  69. assert(text_decoder_eof(decoder) == 0);
  70. assert(text_decoder_getc(decoder) == 0xFFFD);
  71. assert(text_decoder_eof(decoder) == 0);
  72. assert(text_decoder_getc(decoder) == 0xFFFD);
  73. assert(text_decoder_eof(decoder) == 1);
  74. destroy_text_decoder(decoder);
  75. }
  76. void
  77. test_koi8_r_encoding()
  78. {
  79. printf("testing KOI8-R encoding\n");
  80. assert(espeak_ng_EncodingFromName("KOI8-R") == ESPEAKNG_ENCODING_KOI8_R);
  81. assert(espeak_ng_EncodingFromName("csKOI8R") == ESPEAKNG_ENCODING_KOI8_R);
  82. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  83. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_KOI8_R) == ENS_OK);
  84. assert(text_decoder_eof(decoder) == 0);
  85. assert(text_decoder_getc(decoder) == 'a');
  86. assert(text_decoder_eof(decoder) == 0);
  87. assert(text_decoder_getc(decoder) == 'G');
  88. assert(text_decoder_eof(decoder) == 0);
  89. assert(text_decoder_getc(decoder) == 0x92);
  90. assert(text_decoder_eof(decoder) == 0);
  91. assert(text_decoder_getc(decoder) == 0xA0);
  92. assert(text_decoder_eof(decoder) == 0);
  93. assert(text_decoder_getc(decoder) == 0x021a);
  94. assert(text_decoder_eof(decoder) == 1);
  95. destroy_text_decoder(decoder);
  96. }
  97. void
  98. test_iso_8859_1_encoding()
  99. {
  100. printf("testing ISO-8859-1 encoding\n");
  101. assert(espeak_ng_EncodingFromName("ISO-8859-1") == ESPEAKNG_ENCODING_ISO_8859_1);
  102. assert(espeak_ng_EncodingFromName("ISO_8859-1") == ESPEAKNG_ENCODING_ISO_8859_1);
  103. assert(espeak_ng_EncodingFromName("ISO_8859-1:1987") == ESPEAKNG_ENCODING_ISO_8859_1);
  104. assert(espeak_ng_EncodingFromName("iso-ir-100") == ESPEAKNG_ENCODING_ISO_8859_1);
  105. assert(espeak_ng_EncodingFromName("latin1") == ESPEAKNG_ENCODING_ISO_8859_1);
  106. assert(espeak_ng_EncodingFromName("l1") == ESPEAKNG_ENCODING_ISO_8859_1);
  107. assert(espeak_ng_EncodingFromName("IBM819") == ESPEAKNG_ENCODING_ISO_8859_1);
  108. assert(espeak_ng_EncodingFromName("cp819") == ESPEAKNG_ENCODING_ISO_8859_1);
  109. assert(espeak_ng_EncodingFromName("csISOLatin1") == ESPEAKNG_ENCODING_ISO_8859_1);
  110. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  111. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_1) == ENS_OK);
  112. assert(text_decoder_eof(decoder) == 0);
  113. assert(text_decoder_getc(decoder) == 'a');
  114. assert(text_decoder_eof(decoder) == 0);
  115. assert(text_decoder_getc(decoder) == 'G');
  116. assert(text_decoder_eof(decoder) == 0);
  117. assert(text_decoder_getc(decoder) == 0x92);
  118. assert(text_decoder_eof(decoder) == 0);
  119. assert(text_decoder_getc(decoder) == 0xA0);
  120. assert(text_decoder_eof(decoder) == 0);
  121. assert(text_decoder_getc(decoder) == 0xDE);
  122. assert(text_decoder_eof(decoder) == 1);
  123. destroy_text_decoder(decoder);
  124. }
  125. void
  126. test_iso_8859_2_encoding()
  127. {
  128. printf("testing ISO-8859-2 encoding\n");
  129. assert(espeak_ng_EncodingFromName("ISO-8859-2") == ESPEAKNG_ENCODING_ISO_8859_2);
  130. assert(espeak_ng_EncodingFromName("ISO_8859-2") == ESPEAKNG_ENCODING_ISO_8859_2);
  131. assert(espeak_ng_EncodingFromName("ISO_8859-2:1987") == ESPEAKNG_ENCODING_ISO_8859_2);
  132. assert(espeak_ng_EncodingFromName("iso-ir-101") == ESPEAKNG_ENCODING_ISO_8859_2);
  133. assert(espeak_ng_EncodingFromName("latin2") == ESPEAKNG_ENCODING_ISO_8859_2);
  134. assert(espeak_ng_EncodingFromName("l2") == ESPEAKNG_ENCODING_ISO_8859_2);
  135. assert(espeak_ng_EncodingFromName("csISOLatin2") == ESPEAKNG_ENCODING_ISO_8859_2);
  136. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  137. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_2) == ENS_OK);
  138. assert(text_decoder_eof(decoder) == 0);
  139. assert(text_decoder_getc(decoder) == 'a');
  140. assert(text_decoder_eof(decoder) == 0);
  141. assert(text_decoder_getc(decoder) == 'G');
  142. assert(text_decoder_eof(decoder) == 0);
  143. assert(text_decoder_getc(decoder) == 0x92);
  144. assert(text_decoder_eof(decoder) == 0);
  145. assert(text_decoder_getc(decoder) == 0xA0);
  146. assert(text_decoder_eof(decoder) == 0);
  147. assert(text_decoder_getc(decoder) == 0x0162);
  148. assert(text_decoder_eof(decoder) == 1);
  149. destroy_text_decoder(decoder);
  150. }
  151. void
  152. test_iso_8859_3_encoding()
  153. {
  154. printf("testing ISO-8859-3 encoding\n");
  155. assert(espeak_ng_EncodingFromName("ISO-8859-3") == ESPEAKNG_ENCODING_ISO_8859_3);
  156. assert(espeak_ng_EncodingFromName("ISO_8859-3") == ESPEAKNG_ENCODING_ISO_8859_3);
  157. assert(espeak_ng_EncodingFromName("ISO_8859-3:1988") == ESPEAKNG_ENCODING_ISO_8859_3);
  158. assert(espeak_ng_EncodingFromName("iso-ir-109") == ESPEAKNG_ENCODING_ISO_8859_3);
  159. assert(espeak_ng_EncodingFromName("latin3") == ESPEAKNG_ENCODING_ISO_8859_3);
  160. assert(espeak_ng_EncodingFromName("l3") == ESPEAKNG_ENCODING_ISO_8859_3);
  161. assert(espeak_ng_EncodingFromName("csISOLatin3") == ESPEAKNG_ENCODING_ISO_8859_3);
  162. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  163. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_3) == ENS_OK);
  164. assert(text_decoder_eof(decoder) == 0);
  165. assert(text_decoder_getc(decoder) == 'a');
  166. assert(text_decoder_eof(decoder) == 0);
  167. assert(text_decoder_getc(decoder) == 'G');
  168. assert(text_decoder_eof(decoder) == 0);
  169. assert(text_decoder_getc(decoder) == 0x92);
  170. assert(text_decoder_eof(decoder) == 0);
  171. assert(text_decoder_getc(decoder) == 0xA0);
  172. assert(text_decoder_eof(decoder) == 0);
  173. assert(text_decoder_getc(decoder) == 0x015C);
  174. assert(text_decoder_eof(decoder) == 1);
  175. destroy_text_decoder(decoder);
  176. }
  177. void
  178. test_iso_8859_4_encoding()
  179. {
  180. printf("testing ISO-8859-4 encoding\n");
  181. assert(espeak_ng_EncodingFromName("ISO-8859-4") == ESPEAKNG_ENCODING_ISO_8859_4);
  182. assert(espeak_ng_EncodingFromName("ISO_8859-4") == ESPEAKNG_ENCODING_ISO_8859_4);
  183. assert(espeak_ng_EncodingFromName("ISO_8859-4:1988") == ESPEAKNG_ENCODING_ISO_8859_4);
  184. assert(espeak_ng_EncodingFromName("iso-ir-110") == ESPEAKNG_ENCODING_ISO_8859_4);
  185. assert(espeak_ng_EncodingFromName("latin4") == ESPEAKNG_ENCODING_ISO_8859_4);
  186. assert(espeak_ng_EncodingFromName("l4") == ESPEAKNG_ENCODING_ISO_8859_4);
  187. assert(espeak_ng_EncodingFromName("csISOLatin4") == ESPEAKNG_ENCODING_ISO_8859_4);
  188. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  189. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_4) == ENS_OK);
  190. assert(text_decoder_eof(decoder) == 0);
  191. assert(text_decoder_getc(decoder) == 'a');
  192. assert(text_decoder_eof(decoder) == 0);
  193. assert(text_decoder_getc(decoder) == 'G');
  194. assert(text_decoder_eof(decoder) == 0);
  195. assert(text_decoder_getc(decoder) == 0x92);
  196. assert(text_decoder_eof(decoder) == 0);
  197. assert(text_decoder_getc(decoder) == 0xA0);
  198. assert(text_decoder_eof(decoder) == 0);
  199. assert(text_decoder_getc(decoder) == 0x016A);
  200. assert(text_decoder_eof(decoder) == 1);
  201. destroy_text_decoder(decoder);
  202. }
  203. void
  204. test_iso_8859_5_encoding()
  205. {
  206. printf("testing ISO-8859-5 encoding\n");
  207. assert(espeak_ng_EncodingFromName("ISO-8859-5") == ESPEAKNG_ENCODING_ISO_8859_5);
  208. assert(espeak_ng_EncodingFromName("ISO_8859-5") == ESPEAKNG_ENCODING_ISO_8859_5);
  209. assert(espeak_ng_EncodingFromName("ISO_8859-5:1988") == ESPEAKNG_ENCODING_ISO_8859_5);
  210. assert(espeak_ng_EncodingFromName("iso-ir-144") == ESPEAKNG_ENCODING_ISO_8859_5);
  211. assert(espeak_ng_EncodingFromName("cyrillic") == ESPEAKNG_ENCODING_ISO_8859_5);
  212. assert(espeak_ng_EncodingFromName("csISOLatinCyrillic") == ESPEAKNG_ENCODING_ISO_8859_5);
  213. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  214. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_5) == ENS_OK);
  215. assert(text_decoder_eof(decoder) == 0);
  216. assert(text_decoder_getc(decoder) == 'a');
  217. assert(text_decoder_eof(decoder) == 0);
  218. assert(text_decoder_getc(decoder) == 'G');
  219. assert(text_decoder_eof(decoder) == 0);
  220. assert(text_decoder_getc(decoder) == 0x92);
  221. assert(text_decoder_eof(decoder) == 0);
  222. assert(text_decoder_getc(decoder) == 0xA0);
  223. assert(text_decoder_eof(decoder) == 0);
  224. assert(text_decoder_getc(decoder) == 0x043E);
  225. assert(text_decoder_eof(decoder) == 1);
  226. destroy_text_decoder(decoder);
  227. }
  228. void
  229. test_iso_8859_6_encoding()
  230. {
  231. printf("testing ISO-8859-6 encoding\n");
  232. assert(espeak_ng_EncodingFromName("ISO-8859-6") == ESPEAKNG_ENCODING_ISO_8859_6);
  233. assert(espeak_ng_EncodingFromName("ISO_8859-6") == ESPEAKNG_ENCODING_ISO_8859_6);
  234. assert(espeak_ng_EncodingFromName("ISO_8859-6:1987") == ESPEAKNG_ENCODING_ISO_8859_6);
  235. assert(espeak_ng_EncodingFromName("iso-ir-127") == ESPEAKNG_ENCODING_ISO_8859_6);
  236. assert(espeak_ng_EncodingFromName("ECMA-114") == ESPEAKNG_ENCODING_ISO_8859_6);
  237. assert(espeak_ng_EncodingFromName("ASMO-708") == ESPEAKNG_ENCODING_ISO_8859_6);
  238. assert(espeak_ng_EncodingFromName("arabic") == ESPEAKNG_ENCODING_ISO_8859_6);
  239. assert(espeak_ng_EncodingFromName("csISOLatinArabic") == ESPEAKNG_ENCODING_ISO_8859_6);
  240. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  241. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDA", 5, ESPEAKNG_ENCODING_ISO_8859_6) == ENS_OK);
  242. assert(text_decoder_eof(decoder) == 0);
  243. assert(text_decoder_getc(decoder) == 'a');
  244. assert(text_decoder_eof(decoder) == 0);
  245. assert(text_decoder_getc(decoder) == 'G');
  246. assert(text_decoder_eof(decoder) == 0);
  247. assert(text_decoder_getc(decoder) == 0x92);
  248. assert(text_decoder_eof(decoder) == 0);
  249. assert(text_decoder_getc(decoder) == 0xA0);
  250. assert(text_decoder_eof(decoder) == 0);
  251. assert(text_decoder_getc(decoder) == 0x063A);
  252. assert(text_decoder_eof(decoder) == 1);
  253. destroy_text_decoder(decoder);
  254. }
  255. void
  256. test_iso_8859_7_encoding()
  257. {
  258. printf("testing ISO-8859-7 encoding\n");
  259. assert(espeak_ng_EncodingFromName("ISO-8859-7") == ESPEAKNG_ENCODING_ISO_8859_7);
  260. assert(espeak_ng_EncodingFromName("ISO_8859-7") == ESPEAKNG_ENCODING_ISO_8859_7);
  261. assert(espeak_ng_EncodingFromName("ISO_8859-7:1987") == ESPEAKNG_ENCODING_ISO_8859_7);
  262. assert(espeak_ng_EncodingFromName("iso-ir-126") == ESPEAKNG_ENCODING_ISO_8859_7);
  263. assert(espeak_ng_EncodingFromName("ECMA-118") == ESPEAKNG_ENCODING_ISO_8859_7);
  264. assert(espeak_ng_EncodingFromName("ELOT_928") == ESPEAKNG_ENCODING_ISO_8859_7);
  265. assert(espeak_ng_EncodingFromName("greek") == ESPEAKNG_ENCODING_ISO_8859_7);
  266. assert(espeak_ng_EncodingFromName("greek8") == ESPEAKNG_ENCODING_ISO_8859_7);
  267. assert(espeak_ng_EncodingFromName("csISOLatinGreek") == ESPEAKNG_ENCODING_ISO_8859_7);
  268. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  269. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_7) == ENS_OK);
  270. assert(text_decoder_eof(decoder) == 0);
  271. assert(text_decoder_getc(decoder) == 'a');
  272. assert(text_decoder_eof(decoder) == 0);
  273. assert(text_decoder_getc(decoder) == 'G');
  274. assert(text_decoder_eof(decoder) == 0);
  275. assert(text_decoder_getc(decoder) == 0x92);
  276. assert(text_decoder_eof(decoder) == 0);
  277. assert(text_decoder_getc(decoder) == 0xA0);
  278. assert(text_decoder_eof(decoder) == 0);
  279. assert(text_decoder_getc(decoder) == 0x03AE);
  280. assert(text_decoder_eof(decoder) == 1);
  281. destroy_text_decoder(decoder);
  282. }
  283. void
  284. test_iso_8859_8_encoding()
  285. {
  286. printf("testing ISO-8859-8 encoding\n");
  287. assert(espeak_ng_EncodingFromName("ISO-8859-8") == ESPEAKNG_ENCODING_ISO_8859_8);
  288. assert(espeak_ng_EncodingFromName("ISO_8859-8") == ESPEAKNG_ENCODING_ISO_8859_8);
  289. assert(espeak_ng_EncodingFromName("ISO_8859-8:1988") == ESPEAKNG_ENCODING_ISO_8859_8);
  290. assert(espeak_ng_EncodingFromName("iso-ir-138") == ESPEAKNG_ENCODING_ISO_8859_8);
  291. assert(espeak_ng_EncodingFromName("hebrew") == ESPEAKNG_ENCODING_ISO_8859_8);
  292. assert(espeak_ng_EncodingFromName("csISOLatinHebrew") == ESPEAKNG_ENCODING_ISO_8859_8);
  293. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  294. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_8) == ENS_OK);
  295. assert(text_decoder_eof(decoder) == 0);
  296. assert(text_decoder_getc(decoder) == 'a');
  297. assert(text_decoder_eof(decoder) == 0);
  298. assert(text_decoder_getc(decoder) == 'G');
  299. assert(text_decoder_eof(decoder) == 0);
  300. assert(text_decoder_getc(decoder) == 0x92);
  301. assert(text_decoder_eof(decoder) == 0);
  302. assert(text_decoder_getc(decoder) == 0xA0);
  303. assert(text_decoder_eof(decoder) == 0);
  304. assert(text_decoder_getc(decoder) == 0x05de);
  305. assert(text_decoder_eof(decoder) == 1);
  306. destroy_text_decoder(decoder);
  307. }
  308. void
  309. test_iso_8859_9_encoding()
  310. {
  311. printf("testing ISO-8859-9 encoding\n");
  312. assert(espeak_ng_EncodingFromName("ISO-8859-9") == ESPEAKNG_ENCODING_ISO_8859_9);
  313. assert(espeak_ng_EncodingFromName("ISO_8859-9") == ESPEAKNG_ENCODING_ISO_8859_9);
  314. assert(espeak_ng_EncodingFromName("ISO_8859-9:1989") == ESPEAKNG_ENCODING_ISO_8859_9);
  315. assert(espeak_ng_EncodingFromName("iso-ir-148") == ESPEAKNG_ENCODING_ISO_8859_9);
  316. assert(espeak_ng_EncodingFromName("latin5") == ESPEAKNG_ENCODING_ISO_8859_9);
  317. assert(espeak_ng_EncodingFromName("l5") == ESPEAKNG_ENCODING_ISO_8859_9);
  318. assert(espeak_ng_EncodingFromName("csISOLatin5") == ESPEAKNG_ENCODING_ISO_8859_9);
  319. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  320. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_9) == ENS_OK);
  321. assert(text_decoder_eof(decoder) == 0);
  322. assert(text_decoder_getc(decoder) == 'a');
  323. assert(text_decoder_eof(decoder) == 0);
  324. assert(text_decoder_getc(decoder) == 'G');
  325. assert(text_decoder_eof(decoder) == 0);
  326. assert(text_decoder_getc(decoder) == 0x92);
  327. assert(text_decoder_eof(decoder) == 0);
  328. assert(text_decoder_getc(decoder) == 0xA0);
  329. assert(text_decoder_eof(decoder) == 0);
  330. assert(text_decoder_getc(decoder) == 0x015e);
  331. assert(text_decoder_eof(decoder) == 1);
  332. destroy_text_decoder(decoder);
  333. }
  334. void
  335. test_iso_8859_10_encoding()
  336. {
  337. printf("testing ISO-8859-10 encoding\n");
  338. assert(espeak_ng_EncodingFromName("ISO-8859-10") == ESPEAKNG_ENCODING_ISO_8859_10);
  339. assert(espeak_ng_EncodingFromName("ISO_8859-10") == ESPEAKNG_ENCODING_ISO_8859_10);
  340. assert(espeak_ng_EncodingFromName("ISO_8859-10:1992") == ESPEAKNG_ENCODING_ISO_8859_10);
  341. assert(espeak_ng_EncodingFromName("iso-ir-157") == ESPEAKNG_ENCODING_ISO_8859_10);
  342. assert(espeak_ng_EncodingFromName("latin6") == ESPEAKNG_ENCODING_ISO_8859_10);
  343. assert(espeak_ng_EncodingFromName("l6") == ESPEAKNG_ENCODING_ISO_8859_10);
  344. assert(espeak_ng_EncodingFromName("csISOLatin6") == ESPEAKNG_ENCODING_ISO_8859_10);
  345. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  346. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_10) == ENS_OK);
  347. assert(text_decoder_eof(decoder) == 0);
  348. assert(text_decoder_getc(decoder) == 'a');
  349. assert(text_decoder_eof(decoder) == 0);
  350. assert(text_decoder_getc(decoder) == 'G');
  351. assert(text_decoder_eof(decoder) == 0);
  352. assert(text_decoder_getc(decoder) == 0x92);
  353. assert(text_decoder_eof(decoder) == 0);
  354. assert(text_decoder_getc(decoder) == 0xA0);
  355. assert(text_decoder_eof(decoder) == 0);
  356. assert(text_decoder_getc(decoder) == 0x00de);
  357. assert(text_decoder_eof(decoder) == 1);
  358. destroy_text_decoder(decoder);
  359. }
  360. void
  361. test_iso_8859_11_encoding()
  362. {
  363. printf("testing ISO-8859-11 encoding\n");
  364. assert(espeak_ng_EncodingFromName("ISO-8859-11") == ESPEAKNG_ENCODING_ISO_8859_11);
  365. assert(espeak_ng_EncodingFromName("TIS-620") == ESPEAKNG_ENCODING_ISO_8859_11);
  366. assert(espeak_ng_EncodingFromName("csTIS620") == ESPEAKNG_ENCODING_ISO_8859_11);
  367. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  368. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_11) == ENS_OK);
  369. assert(text_decoder_eof(decoder) == 0);
  370. assert(text_decoder_getc(decoder) == 'a');
  371. assert(text_decoder_eof(decoder) == 0);
  372. assert(text_decoder_getc(decoder) == 'G');
  373. assert(text_decoder_eof(decoder) == 0);
  374. assert(text_decoder_getc(decoder) == 0x92);
  375. assert(text_decoder_eof(decoder) == 0);
  376. assert(text_decoder_getc(decoder) == 0xA0);
  377. assert(text_decoder_eof(decoder) == 0);
  378. assert(text_decoder_getc(decoder) == 0x0e4e);
  379. assert(text_decoder_eof(decoder) == 1);
  380. destroy_text_decoder(decoder);
  381. }
  382. void
  383. test_iso_8859_13_encoding()
  384. {
  385. printf("testing ISO-8859-13 encoding\n");
  386. assert(espeak_ng_EncodingFromName("ISO-8859-13") == ESPEAKNG_ENCODING_ISO_8859_13);
  387. assert(espeak_ng_EncodingFromName("csISO885913") == ESPEAKNG_ENCODING_ISO_8859_13);
  388. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  389. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xEE", 5, ESPEAKNG_ENCODING_ISO_8859_13) == ENS_OK);
  390. assert(text_decoder_eof(decoder) == 0);
  391. assert(text_decoder_getc(decoder) == 'a');
  392. assert(text_decoder_eof(decoder) == 0);
  393. assert(text_decoder_getc(decoder) == 'G');
  394. assert(text_decoder_eof(decoder) == 0);
  395. assert(text_decoder_getc(decoder) == 0x92);
  396. assert(text_decoder_eof(decoder) == 0);
  397. assert(text_decoder_getc(decoder) == 0xA0);
  398. assert(text_decoder_eof(decoder) == 0);
  399. assert(text_decoder_getc(decoder) == 0x012b);
  400. assert(text_decoder_eof(decoder) == 1);
  401. destroy_text_decoder(decoder);
  402. }
  403. void
  404. test_iso_8859_14_encoding()
  405. {
  406. printf("testing ISO-8859-14 encoding\n");
  407. assert(espeak_ng_EncodingFromName("ISO-8859-14") == ESPEAKNG_ENCODING_ISO_8859_14);
  408. assert(espeak_ng_EncodingFromName("ISO_8859-14") == ESPEAKNG_ENCODING_ISO_8859_14);
  409. assert(espeak_ng_EncodingFromName("ISO_8859-14:1998") == ESPEAKNG_ENCODING_ISO_8859_14);
  410. assert(espeak_ng_EncodingFromName("iso-ir-199") == ESPEAKNG_ENCODING_ISO_8859_14);
  411. assert(espeak_ng_EncodingFromName("iso-celtic") == ESPEAKNG_ENCODING_ISO_8859_14);
  412. assert(espeak_ng_EncodingFromName("latin8") == ESPEAKNG_ENCODING_ISO_8859_14);
  413. assert(espeak_ng_EncodingFromName("l8") == ESPEAKNG_ENCODING_ISO_8859_14);
  414. assert(espeak_ng_EncodingFromName("csISO885914") == ESPEAKNG_ENCODING_ISO_8859_14);
  415. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  416. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_14) == ENS_OK);
  417. assert(text_decoder_eof(decoder) == 0);
  418. assert(text_decoder_getc(decoder) == 'a');
  419. assert(text_decoder_eof(decoder) == 0);
  420. assert(text_decoder_getc(decoder) == 'G');
  421. assert(text_decoder_eof(decoder) == 0);
  422. assert(text_decoder_getc(decoder) == 0x92);
  423. assert(text_decoder_eof(decoder) == 0);
  424. assert(text_decoder_getc(decoder) == 0xA0);
  425. assert(text_decoder_eof(decoder) == 0);
  426. assert(text_decoder_getc(decoder) == 0x0176);
  427. assert(text_decoder_eof(decoder) == 1);
  428. destroy_text_decoder(decoder);
  429. }
  430. void
  431. test_iso_8859_15_encoding()
  432. {
  433. printf("testing ISO-8859-15 encoding\n");
  434. assert(espeak_ng_EncodingFromName("ISO-8859-15") == ESPEAKNG_ENCODING_ISO_8859_15);
  435. assert(espeak_ng_EncodingFromName("ISO_8859-15") == ESPEAKNG_ENCODING_ISO_8859_15);
  436. assert(espeak_ng_EncodingFromName("Latin-9") == ESPEAKNG_ENCODING_ISO_8859_15);
  437. assert(espeak_ng_EncodingFromName("csISO885915") == ESPEAKNG_ENCODING_ISO_8859_15);
  438. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  439. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xBE", 5, ESPEAKNG_ENCODING_ISO_8859_15) == ENS_OK);
  440. assert(text_decoder_eof(decoder) == 0);
  441. assert(text_decoder_getc(decoder) == 'a');
  442. assert(text_decoder_eof(decoder) == 0);
  443. assert(text_decoder_getc(decoder) == 'G');
  444. assert(text_decoder_eof(decoder) == 0);
  445. assert(text_decoder_getc(decoder) == 0x92);
  446. assert(text_decoder_eof(decoder) == 0);
  447. assert(text_decoder_getc(decoder) == 0xA0);
  448. assert(text_decoder_eof(decoder) == 0);
  449. assert(text_decoder_getc(decoder) == 0x0178);
  450. assert(text_decoder_eof(decoder) == 1);
  451. destroy_text_decoder(decoder);
  452. }
  453. void
  454. test_iso_8859_16_encoding()
  455. {
  456. printf("testing ISO-8859-16 encoding\n");
  457. assert(espeak_ng_EncodingFromName("ISO-8859-16") == ESPEAKNG_ENCODING_ISO_8859_16);
  458. assert(espeak_ng_EncodingFromName("ISO_8859-16") == ESPEAKNG_ENCODING_ISO_8859_16);
  459. assert(espeak_ng_EncodingFromName("ISO_8859-16:2001") == ESPEAKNG_ENCODING_ISO_8859_16);
  460. assert(espeak_ng_EncodingFromName("iso-ir-226") == ESPEAKNG_ENCODING_ISO_8859_16);
  461. assert(espeak_ng_EncodingFromName("latin10") == ESPEAKNG_ENCODING_ISO_8859_16);
  462. assert(espeak_ng_EncodingFromName("l10") == ESPEAKNG_ENCODING_ISO_8859_16);
  463. assert(espeak_ng_EncodingFromName("csISO885916") == ESPEAKNG_ENCODING_ISO_8859_16);
  464. espeak_ng_TEXT_DECODER *decoder = create_text_decoder();
  465. assert(text_decoder_decode_string(decoder, "aG\x92\xA0\xDE", 5, ESPEAKNG_ENCODING_ISO_8859_16) == ENS_OK);
  466. assert(text_decoder_eof(decoder) == 0);
  467. assert(text_decoder_getc(decoder) == 'a');
  468. assert(text_decoder_eof(decoder) == 0);
  469. assert(text_decoder_getc(decoder) == 'G');
  470. assert(text_decoder_eof(decoder) == 0);
  471. assert(text_decoder_getc(decoder) == 0x92);
  472. assert(text_decoder_eof(decoder) == 0);
  473. assert(text_decoder_getc(decoder) == 0xA0);
  474. assert(text_decoder_eof(decoder) == 0);
  475. assert(text_decoder_getc(decoder) == 0x021a);
  476. assert(text_decoder_eof(decoder) == 1);
  477. destroy_text_decoder(decoder);
  478. }
  479. int
  480. main(int argc, char **argv)
  481. {
  482. test_unbound_text_decoder();
  483. test_unknown_encoding();
  484. test_us_ascii_encoding();
  485. test_koi8_r_encoding();
  486. test_iso_8859_1_encoding();
  487. test_iso_8859_2_encoding();
  488. test_iso_8859_3_encoding();
  489. test_iso_8859_4_encoding();
  490. test_iso_8859_5_encoding();
  491. test_iso_8859_6_encoding();
  492. test_iso_8859_7_encoding();
  493. test_iso_8859_8_encoding();
  494. test_iso_8859_9_encoding();
  495. test_iso_8859_10_encoding();
  496. test_iso_8859_11_encoding();
  497. // ISO-8859-12 is not a valid encoding.
  498. test_iso_8859_13_encoding();
  499. test_iso_8859_14_encoding();
  500. test_iso_8859_15_encoding();
  501. test_iso_8859_16_encoding();
  502. printf("done\n");
  503. return EXIT_SUCCESS;
  504. }