eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

case.py 3.5KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. #!/usr/bin/python
  2. # Copyright (C) 2012-2016 Reece H. Dunn
  3. #
  4. # This file is part of ucd-tools.
  5. #
  6. # ucd-tools is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # ucd-tools is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
  18. import os
  19. import sys
  20. import ucd
  21. ucd_rootdir = sys.argv[1]
  22. ucd_version = sys.argv[2]
  23. unicode_chars = {}
  24. null = ucd.CodePoint('0000')
  25. for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
  26. if data['LowerCase'] != null or data['UpperCase'] != null or data['TitleCase'] != null:
  27. unicode_chars[data['CodePoint']] = (data['LowerCase'], data['UpperCase'], data['TitleCase'])
  28. if __name__ == '__main__':
  29. sys.stdout.write("""/* Unicode Case Conversion
  30. *
  31. * Copyright (C) 2012-2016 Reece H. Dunn
  32. *
  33. * This file is part of ucd-tools.
  34. *
  35. * ucd-tools is free software: you can redistribute it and/or modify
  36. * it under the terms of the GNU General Public License as published by
  37. * the Free Software Foundation, either version 3 of the License, or
  38. * (at your option) any later version.
  39. *
  40. * ucd-tools is distributed in the hope that it will be useful,
  41. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  42. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  43. * GNU General Public License for more details.
  44. *
  45. * You should have received a copy of the GNU General Public License
  46. * along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
  47. */
  48. // NOTE: This file is automatically generated from the UnicodeData.txt file in
  49. // the Unicode Character database by the ucd-tools/tools/categories.py script.
  50. #include "ucd/ucd.h"
  51. #include <stddef.h>
  52. // Unicode Character Data %s
  53. struct case_conversion_entry
  54. {
  55. codepoint_t codepoint;
  56. codepoint_t uppercase;
  57. codepoint_t lowercase;
  58. codepoint_t titlecase;
  59. };
  60. """ % ucd_version)
  61. sys.stdout.write('\n')
  62. sys.stdout.write('static const struct case_conversion_entry case_conversion_data[] =\n')
  63. sys.stdout.write('{\n')
  64. for codepoint in sorted(unicode_chars.keys()):
  65. lower, upper, title = unicode_chars[codepoint]
  66. sys.stdout.write('\t{ 0x%s, 0x%s, 0x%s, 0x%s },\n' % (codepoint, upper, lower, title))
  67. sys.stdout.write('};\n')
  68. for case in ['upper', 'lower', 'title']:
  69. sys.stdout.write('\n')
  70. sys.stdout.write('codepoint_t ucd_to%s(codepoint_t c)\n' % case)
  71. sys.stdout.write('{\n')
  72. sys.stdout.write('\tint begin = 0;\n')
  73. sys.stdout.write('\tint end = sizeof(case_conversion_data)/sizeof(case_conversion_data[0]);\n')
  74. sys.stdout.write('\twhile (begin <= end)\n')
  75. sys.stdout.write('\t{\n')
  76. sys.stdout.write('\t\tint pos = (begin + end) / 2;\n')
  77. sys.stdout.write('\t\tconst struct case_conversion_entry *item = (case_conversion_data + pos);\n')
  78. sys.stdout.write('\t\tif (c == item->codepoint)\n')
  79. sys.stdout.write('\t\t\treturn item->%scase == 0 ? c : item->%scase;\n' % (case, case))
  80. sys.stdout.write('\t\telse if (c > item->codepoint)\n')
  81. sys.stdout.write('\t\t\tbegin = pos + 1;\n')
  82. sys.stdout.write('\t\telse\n')
  83. sys.stdout.write('\t\t\tend = pos - 1;\n')
  84. sys.stdout.write('\t}\n')
  85. sys.stdout.write('\treturn c;\n')
  86. sys.stdout.write('}\n')