eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ctype.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495
  1. /* ctype-style APIs.
  2. *
  3. * Copyright (C) 2012-2017 Reece H. Dunn
  4. *
  5. * This file is part of ucd-tools.
  6. *
  7. * ucd-tools is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * ucd-tools is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. #include "ucd/ucd.h"
  21. static int other_alphabetic_MnMcSo(codepoint_t c)
  22. {
  23. switch (c & 0xFFFFFF00)
  24. {
  25. case 0x0300:
  26. return c == 0x0345;
  27. case 0x0500:
  28. return (c >= 0x05B0 && c <= 0x05BD)
  29. || c == 0x05BF
  30. || (c >= 0x05C1 && c <= 0x05C2)
  31. || (c >= 0x05C4 && c <= 0x05C5)
  32. || c == 0x05C7;
  33. case 0x0600:
  34. return (c >= 0x0610 && c <= 0x061A)
  35. || (c >= 0x064B && c <= 0x0657)
  36. || (c >= 0x0659 && c <= 0x065F)
  37. || c == 0x0670
  38. || (c >= 0x06D6 && c <= 0x06DC)
  39. || (c >= 0x06E1 && c <= 0x06E4)
  40. || (c >= 0x06E7 && c <= 0x06E8)
  41. || c == 0x06ED;
  42. case 0x0700:
  43. return c == 0x0711
  44. || (c >= 0x0730 && c <= 0x073F)
  45. || (c >= 0x07A6 && c <= 0x07B0);
  46. case 0x0800:
  47. return (c >= 0x0816 && c <= 0x0817)
  48. || (c >= 0x081B && c <= 0x0823)
  49. || (c >= 0x0825 && c <= 0x0827)
  50. || (c >= 0x0829 && c <= 0x082C)
  51. || (c >= 0x08D4 && c <= 0x08DF)
  52. || (c >= 0x08E3 && c <= 0x08E9)
  53. || c >= 0x08F0;
  54. case 0x0900:
  55. return (c >= 0x0900 && c <= 0x0903)
  56. || (c >= 0x093A && c <= 0x093B)
  57. || (c >= 0x093E && c <= 0x094C)
  58. || (c >= 0x094E && c <= 0x094F)
  59. || (c >= 0x0955 && c <= 0x0957)
  60. || (c >= 0x0962 && c <= 0x0963)
  61. || (c >= 0x0981 && c <= 0x0983)
  62. || (c >= 0x0981 && c <= 0x0983)
  63. || (c >= 0x09BE && c <= 0x09C4)
  64. || (c >= 0x09C7 && c <= 0x09C8)
  65. || (c >= 0x09CB && c <= 0x09CC)
  66. || c == 0x09D7
  67. || (c >= 0x09E2 && c <= 0x09E3);
  68. case 0x0A00:
  69. return (c >= 0x0A01 && c <= 0x0A03)
  70. || (c >= 0x0A3E && c <= 0x0A42)
  71. || (c >= 0x0A47 && c <= 0x0A48)
  72. || (c >= 0x0A4B && c <= 0x0A4C)
  73. || c == 0x0A51
  74. || (c >= 0x0A70 && c <= 0x0A71)
  75. || c == 0x0A75
  76. || (c >= 0x0A81 && c <= 0x0A83)
  77. || (c >= 0x0ABE && c <= 0x0AC5)
  78. || (c >= 0x0AC7 && c <= 0x0AC9)
  79. || (c >= 0x0ACB && c <= 0x0ACC)
  80. || (c >= 0x0AE2 && c <= 0x0AE3);
  81. case 0x0B00:
  82. return (c >= 0x0B01 && c <= 0x0B03)
  83. || (c >= 0x0B3E && c <= 0x0B44)
  84. || (c >= 0x0B47 && c <= 0x0B48)
  85. || (c >= 0x0B4B && c <= 0x0B4C)
  86. || (c >= 0x0B56 && c <= 0x0B57)
  87. || (c >= 0x0B62 && c <= 0x0B63)
  88. || c == 0x0B82
  89. || (c >= 0x0BBE && c <= 0x0BC2)
  90. || (c >= 0x0BC6 && c <= 0x0BC8)
  91. || (c >= 0x0BCA && c <= 0x0BCC)
  92. || c == 0x0BD7;
  93. case 0x0C00:
  94. return (c >= 0x0C00 && c <= 0x0C03)
  95. || (c >= 0x0C3E && c <= 0x0C44)
  96. || (c >= 0x0C46 && c <= 0x0C48)
  97. || (c >= 0x0C4A && c <= 0x0C4C)
  98. || (c >= 0x0C55 && c <= 0x0C56)
  99. || (c >= 0x0C62 && c <= 0x0C63)
  100. || (c >= 0x0C81 && c <= 0x0C83)
  101. || (c >= 0x0CBE && c <= 0x0CBF)
  102. || (c >= 0x0CC0 && c <= 0x0CC4)
  103. || (c >= 0x0CC6 && c <= 0x0CC8)
  104. || (c >= 0x0CCA && c <= 0x0CCC)
  105. || (c >= 0x0CD5 && c <= 0x0CD6)
  106. || (c >= 0x0CE2 && c <= 0x0CE3);
  107. case 0x0D00:
  108. return (c >= 0x0D01 && c <= 0x0D03)
  109. || (c >= 0x0D3E && c <= 0x0D44)
  110. || (c >= 0x0D46 && c <= 0x0D48)
  111. || (c >= 0x0D4A && c <= 0x0D4C)
  112. || c == 0x0D57
  113. || (c >= 0x0D62 && c <= 0x0D63)
  114. || (c >= 0x0D82 && c <= 0x0D83)
  115. || (c >= 0x0DCF && c <= 0x0DD4)
  116. || c == 0x0DD6
  117. || (c >= 0x0DD8 && c <= 0x0DDF)
  118. || (c >= 0x0DF2 && c <= 0x0DF3);
  119. case 0x0E00:
  120. return c == 0x0E31
  121. || (c >= 0x0E34 && c <= 0x0E3A)
  122. || c == 0x0E4D
  123. || c == 0x0EB1
  124. || (c >= 0x0EB4 && c <= 0x0EB9)
  125. || (c >= 0x0EBB && c <= 0x0EBD)
  126. || c == 0x0ECD;
  127. case 0x0F00:
  128. return (c >= 0x0F71 && c <= 0x0F7F)
  129. || (c >= 0x0F80 && c <= 0x0F81)
  130. || (c >= 0x0F8D && c <= 0x0F97)
  131. || (c >= 0x0F99 && c <= 0x0FBC);
  132. case 0x1000:
  133. return (c >= 0x102B && c <= 0x1036)
  134. || c == 0x1038
  135. || (c >= 0x103B && c <= 0x103E)
  136. || (c >= 0x1056 && c <= 0x1059)
  137. || (c >= 0x105E && c <= 0x1060)
  138. || c == 0x1062
  139. || (c >= 0x1067 && c <= 0x1068)
  140. || (c >= 0x1071 && c <= 0x1074)
  141. || (c >= 0x1082 && c <= 0x1086)
  142. || (c >= 0x109C && c <= 0x109D);
  143. case 0x1300:
  144. return c == 0x135F;
  145. case 0x1700:
  146. return (c >= 0x1712 && c <= 0x1713)
  147. || (c >= 0x1732 && c <= 0x1733)
  148. || (c >= 0x1752 && c <= 0x1753)
  149. || (c >= 0x1772 && c <= 0x1773)
  150. || (c >= 0x17B6 && c <= 0x17C8);
  151. case 0x1800:
  152. return (c >= 0x1885 && c <= 0x1886)
  153. || c == 0x18A9;
  154. case 0x1900:
  155. return (c >= 0x1920 && c <= 0x192B)
  156. || (c >= 0x1930 && c <= 0x1938);
  157. case 0x1A00:
  158. return (c >= 0x1A17 && c <= 0x1A1B)
  159. || (c >= 0x1A55 && c <= 0x1A5E)
  160. || (c >= 0x1A61 && c <= 0x1A74);
  161. case 0x1B00:
  162. return (c >= 0x1B00 && c <= 0x1B04)
  163. || (c >= 0x1B35 && c <= 0x1B43)
  164. || (c >= 0x1B80 && c <= 0x1B82)
  165. || (c >= 0x1BA1 && c <= 0x1BA9)
  166. || (c >= 0x1BAC && c <= 0x1BAD)
  167. || (c >= 0x1BE7 && c <= 0x1BF1);
  168. case 0x1C00:
  169. return (c >= 0x1C24 && c <= 0x1C35)
  170. || (c >= 0x1CF2 && c <= 0x1CF3);
  171. case 0x1D00:
  172. return (c >= 0x1DE7 && c <= 0x1DF4);
  173. case 0x2400:
  174. return (c >= 0x24B6 && c <= 0x24E9);
  175. case 0x2D00:
  176. return (c >= 0x2DE0 && c <= 0x2DFF);
  177. case 0xA600:
  178. return (c >= 0xA674 && c <= 0xA67B)
  179. || (c >= 0xA69E && c <= 0xA69F);
  180. case 0xA800:
  181. return (c >= 0xA823 && c <= 0xA827)
  182. || (c >= 0xA880 && c <= 0xA881)
  183. || (c >= 0xA8B4 && c <= 0xA8C3)
  184. || c == 0xA8C5;
  185. case 0xA900:
  186. return (c >= 0xA926 && c <= 0xA92A)
  187. || (c >= 0xA947 && c <= 0xA952)
  188. || (c >= 0xA980 && c <= 0xA983)
  189. || (c >= 0xA9B4 && c <= 0xA9BF);
  190. case 0xAA00:
  191. return (c >= 0xAA29 && c <= 0xAA36)
  192. || c == 0xAA43
  193. || (c >= 0xAA4C && c <= 0xAA4D)
  194. || c == 0xAAB0
  195. || (c >= 0xAAB2 && c <= 0xAAB4)
  196. || (c >= 0xAAB7 && c <= 0xAAB8)
  197. || c == 0xAABE
  198. || (c >= 0xAAEB && c <= 0xAAEF)
  199. || c == 0xAAF5;
  200. case 0xAB00:
  201. return (c >= 0xABE3 && c <= 0xABEA);
  202. case 0xFB00:
  203. return c == 0xFB1E;
  204. case 0x10300:
  205. return (c >= 0x10376 && c <= 0x1037A);
  206. case 0x10A00:
  207. return (c >= 0x10A01 && c <= 0x10A03)
  208. || (c >= 0x10A05 && c <= 0x10A06)
  209. || (c >= 0x10A0C && c <= 0x10A0F);
  210. case 0x11000:
  211. return (c >= 0x11000 && c <= 0x11002)
  212. || (c >= 0x11038 && c <= 0x11045)
  213. || c == 0x11082
  214. || (c >= 0x110B0 && c <= 0x110B8);
  215. case 0x11100:
  216. return (c >= 0x11100 && c <= 0x11102)
  217. || (c >= 0x11127 && c <= 0x11132)
  218. || (c >= 0x11180 && c <= 0x11182)
  219. || (c >= 0x111B3 && c <= 0x111BF);
  220. case 0x11200:
  221. return (c >= 0x1122C && c <= 0x11234)
  222. || c == 0x11237
  223. || c == 0x1123E
  224. || (c >= 0x112DF && c <= 0x112E8);
  225. case 0x11300:
  226. return (c >= 0x11300 && c <= 0x11303)
  227. || (c >= 0x1133E && c <= 0x11344)
  228. || (c >= 0x11347 && c <= 0x11348)
  229. || (c >= 0x1134B && c <= 0x1134C)
  230. || c == 0x11357
  231. || (c >= 0x11362 && c <= 0x11363);
  232. case 0x11400:
  233. return (c >= 0x11435 && c <= 0x11441)
  234. || (c >= 0x11443 && c <= 0x11445)
  235. || (c >= 0x114B0 && c <= 0x114C1);
  236. case 0x11500:
  237. return (c >= 0x115AF && c <= 0x115B5)
  238. || (c >= 0x115B8 && c <= 0x115BE)
  239. || (c >= 0x115DC && c <= 0x115DD);
  240. case 0x11600:
  241. return (c >= 0x11630 && c <= 0x1163E)
  242. || c == 0x11640
  243. || (c >= 0x116AB && c <= 0x116B5);
  244. case 0x11700:
  245. return (c >= 0x1171D && c <= 0x1172A);
  246. case 0x11C00:
  247. return (c >= 0x11C2F && c <= 0x11C36)
  248. || (c >= 0x11C38 && c <= 0x11C3E)
  249. || (c >= 0x11C92 && c <= 0x11CA7)
  250. || (c >= 0x11CA9 && c <= 0x11CB6);
  251. case 0x16B00:
  252. return (c >= 0x16B30 && c <= 0x16B36);
  253. case 0x16F00:
  254. return (c >= 0x16F51 && c <= 0x16F7E);
  255. case 0x1BC00:
  256. return c == 0x1BC9E;
  257. case 0x1E000:
  258. return (c >= 0x1E000 && c <= 0x1E006)
  259. || (c >= 0x1E008 && c <= 0x1E018)
  260. || (c >= 0x1E01B && c <= 0x1E021)
  261. || (c >= 0x1E023 && c <= 0x1E024)
  262. || (c >= 0x1E026 && c <= 0x1E02A);
  263. case 0x1E900:
  264. return c == 0x1E947;
  265. case 0x1F100:
  266. return (c >= 0x01F130 && c <= 0x01F149)
  267. || (c >= 0x01F150 && c <= 0x01F169)
  268. || (c >= 0x01F170 && c <= 0x01F189);
  269. default:
  270. return 0;
  271. }
  272. }
  273. int ucd_isalnum(codepoint_t c)
  274. {
  275. switch (ucd_lookup_category(c))
  276. {
  277. case UCD_CATEGORY_Lu:
  278. case UCD_CATEGORY_Ll:
  279. case UCD_CATEGORY_Lt:
  280. case UCD_CATEGORY_Lm:
  281. case UCD_CATEGORY_Lo:
  282. case UCD_CATEGORY_Nl:
  283. case UCD_CATEGORY_Nd:
  284. case UCD_CATEGORY_No:
  285. return 1;
  286. case UCD_CATEGORY_Mn:
  287. case UCD_CATEGORY_Mc:
  288. case UCD_CATEGORY_So:
  289. return other_alphabetic_MnMcSo(c);
  290. default:
  291. return 0;
  292. }
  293. }
  294. int ucd_isalpha(codepoint_t c)
  295. {
  296. switch (ucd_lookup_category(c))
  297. {
  298. case UCD_CATEGORY_Lu:
  299. case UCD_CATEGORY_Ll:
  300. case UCD_CATEGORY_Lt:
  301. case UCD_CATEGORY_Lm:
  302. case UCD_CATEGORY_Lo:
  303. case UCD_CATEGORY_Nl:
  304. return 1;
  305. case UCD_CATEGORY_Mn:
  306. case UCD_CATEGORY_Mc:
  307. case UCD_CATEGORY_So:
  308. return other_alphabetic_MnMcSo(c);
  309. default:
  310. return 0;
  311. }
  312. }
  313. int ucd_isblank(codepoint_t c)
  314. {
  315. switch (ucd_lookup_category(c))
  316. {
  317. case UCD_CATEGORY_Zs:
  318. switch (c) // Exclude characters with the <noBreak> DispositionType
  319. {
  320. case 0x00A0: // U+00A0 : NO-BREAK SPACE
  321. case 0x2007: // U+2007 : FIGURE SPACE
  322. case 0x202F: // U+202F : NARROW NO-BREAK SPACE
  323. return 0;
  324. }
  325. return 1;
  326. case UCD_CATEGORY_Cc:
  327. return c == 0x09; // U+0009 : CHARACTER TABULATION
  328. default:
  329. return 0;
  330. }
  331. }
  332. int ucd_iscntrl(codepoint_t c)
  333. {
  334. return ucd_lookup_category(c) == UCD_CATEGORY_Cc;
  335. }
  336. int ucd_isdigit(codepoint_t c)
  337. {
  338. return (c >= 0x30 && c <= 0x39); // [0-9]
  339. }
  340. int ucd_isgraph(codepoint_t c)
  341. {
  342. switch (ucd_lookup_category(c))
  343. {
  344. case UCD_CATEGORY_Cc:
  345. case UCD_CATEGORY_Cf:
  346. case UCD_CATEGORY_Cn:
  347. case UCD_CATEGORY_Co:
  348. case UCD_CATEGORY_Cs:
  349. case UCD_CATEGORY_Zl:
  350. case UCD_CATEGORY_Zp:
  351. case UCD_CATEGORY_Zs:
  352. case UCD_CATEGORY_Ii:
  353. return 0;
  354. default:
  355. return 1;
  356. }
  357. }
  358. int ucd_islower(codepoint_t c)
  359. {
  360. switch (ucd_lookup_category(c))
  361. {
  362. case UCD_CATEGORY_Ll:
  363. return 1;
  364. case UCD_CATEGORY_Lt:
  365. return ucd_toupper(c) != c;
  366. case UCD_CATEGORY_Lo:
  367. return c == 0xAA // Other_Lowercase : FEMININE ORDINAL INDICATOR
  368. || c == 0xBA; // Other_Lowercase : MASCULINE ORDINAL INDICATOR
  369. case UCD_CATEGORY_Lm:
  370. return (c >= 0x02B0 && c <= 0x02B8) // Other_Lowercase
  371. || (c >= 0x02C0 && c <= 0x02C1) // Other_Lowercase
  372. || (c >= 0x02E0 && c <= 0x02E4) // Other_Lowercase
  373. || c == 0x037A // Other_Lowercase
  374. || (c >= 0x1D2C && c <= 0x1D6A) // Other_Lowercase
  375. || c == 0x1D78 // Other_Lowercase
  376. || (c >= 0x1D9B && c <= 0x1DBF) // Other_Lowercase
  377. || c == 0x2071 // Other_Lowercase
  378. || c == 0x207F // Other_Lowercase
  379. || (c >= 0x2090 && c <= 0x209C) // Other_Lowercase
  380. || (c >= 0x2C7C && c <= 0x2C7D) // Other_Lowercase
  381. || (c >= 0xA69C && c <= 0xA69D) // Other_Lowercase
  382. || c == 0xA770 // Other_Lowercase
  383. || (c >= 0xA7F8 && c <= 0xA7F9) // Other_Lowercase
  384. || (c >= 0xAB5C && c <= 0xAB5F); // Other_Lowercase
  385. case UCD_CATEGORY_Mn:
  386. return c == 0x0345; // Other_Lowercase : COMBINING GREEK YPOGEGRAMMENI
  387. case UCD_CATEGORY_Nl:
  388. return (c >= 0x2170 && c <= 0x217F); // Other_Lowercase
  389. case UCD_CATEGORY_So:
  390. return (c >= 0x24D0 && c <= 0x24E9); // Other_Lowercase
  391. default:
  392. return 0;
  393. }
  394. }
  395. int ucd_isprint(codepoint_t c)
  396. {
  397. switch (ucd_lookup_category(c))
  398. {
  399. case UCD_CATEGORY_Cc:
  400. case UCD_CATEGORY_Cf:
  401. case UCD_CATEGORY_Cn:
  402. case UCD_CATEGORY_Co:
  403. case UCD_CATEGORY_Cs:
  404. case UCD_CATEGORY_Ii:
  405. return 0;
  406. default:
  407. return 1;
  408. }
  409. }
  410. int ucd_ispunct(codepoint_t c)
  411. {
  412. switch (ucd_lookup_category(c))
  413. {
  414. case UCD_CATEGORY_Pc:
  415. case UCD_CATEGORY_Pd:
  416. case UCD_CATEGORY_Pe:
  417. case UCD_CATEGORY_Pf:
  418. case UCD_CATEGORY_Pi:
  419. case UCD_CATEGORY_Po:
  420. case UCD_CATEGORY_Ps:
  421. return 1;
  422. default:
  423. return 0;
  424. }
  425. }
  426. int ucd_isspace(codepoint_t c)
  427. {
  428. switch (ucd_lookup_category(c))
  429. {
  430. case UCD_CATEGORY_Zl:
  431. case UCD_CATEGORY_Zp:
  432. return 1;
  433. case UCD_CATEGORY_Zs:
  434. switch (c) // Exclude characters with the <noBreak> DispositionType
  435. {
  436. case 0x00A0: // U+00A0 : NO-BREAK SPACE
  437. case 0x2007: // U+2007 : FIGURE SPACE
  438. case 0x202F: // U+202F : NARROW NO-BREAK SPACE
  439. return 0;
  440. }
  441. return 1;
  442. case UCD_CATEGORY_Cc:
  443. switch (c) // Include control characters marked as White_Space
  444. {
  445. case 0x09: // U+0009 : CHARACTER TABULATION
  446. case 0x0A: // U+000A : LINE FEED
  447. case 0x0B: // U+000B : LINE TABULATION
  448. case 0x0C: // U+000C : FORM FEED
  449. case 0x0D: // U+000D : CARRIAGE RETURN
  450. case 0x85: // U+0085 : NEXT LINE
  451. return 1;
  452. }
  453. default:
  454. return 0;
  455. }
  456. }
  457. int ucd_isupper(codepoint_t c)
  458. {
  459. switch (ucd_lookup_category(c))
  460. {
  461. case UCD_CATEGORY_Lu:
  462. return 1;
  463. case UCD_CATEGORY_Lt:
  464. return ucd_tolower(c) != c;
  465. case UCD_CATEGORY_Nl:
  466. return (c >= 0x002160 && c <= 0x00216F); // Other_Uppercase
  467. case UCD_CATEGORY_So:
  468. return (c >= 0x0024B6 && c <= 0x0024CF) // Other_Uppercase
  469. || (c >= 0x01F130 && c <= 0x01F149) // Other_Uppercase
  470. || (c >= 0x01F150 && c <= 0x01F169) // Other_Uppercase
  471. || (c >= 0x01F170 && c <= 0x01F189); // Other_Uppercase
  472. default:
  473. return 0;
  474. }
  475. }
  476. int ucd_isxdigit(codepoint_t c)
  477. {
  478. return (c >= 0x30 && c <= 0x39) // [0-9]
  479. || (c >= 0x41 && c <= 0x46) // [A-Z]
  480. || (c >= 0x61 && c <= 0x66); // [a-z]
  481. }