eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

printdata.py 2.4KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. #!/usr/bin/python
  2. # Copyright (C) 2012 Reece H. Dunn
  3. #
  4. # This file is part of ucd-tools.
  5. #
  6. # ucd-tools is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # ucd-tools is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
  18. import os
  19. import sys
  20. import ucd
  21. ucd_rootdir = sys.argv[1]
  22. unicode_chars = {}
  23. for data in ucd.parse_ucd_data(ucd_rootdir, 'UnicodeData'):
  24. for codepoint in data['CodePoint']:
  25. unicode_chars[codepoint] = data
  26. unicode_chars[codepoint]['Properties'] = []
  27. for data in ucd.parse_ucd_data(ucd_rootdir, 'PropList'):
  28. if data['Property'] in ['White_Space']:
  29. for codepoint in data['Range']:
  30. unicode_chars[codepoint]['Properties'].append(data['Property'])
  31. for data in ucd.parse_ucd_data(ucd_rootdir, 'Scripts'):
  32. for codepoint in data['Range']:
  33. unicode_chars[codepoint]['Script'] = data['Script']
  34. for data in ucd.parse_ucd_data('supplemental', 'Klingon'):
  35. for codepoint in data['CodePoint']:
  36. unicode_chars[codepoint] = data
  37. unicode_chars[codepoint]['Properties'] = []
  38. unicode_chars[codepoint]['UpperCase'] = ucd.CodePoint('0000')
  39. unicode_chars[codepoint]['LowerCase'] = ucd.CodePoint('0000')
  40. unicode_chars[codepoint]['TitleCase'] = ucd.CodePoint('0000')
  41. null = ucd.CodePoint('0000')
  42. if __name__ == '__main__':
  43. for codepoint in ucd.CodeRange('000000..10FFFF'):
  44. try:
  45. data = unicode_chars[codepoint]
  46. except KeyError:
  47. data = {'GeneralCategory': 'Cn', 'TitleCase': codepoint, 'UpperCase': codepoint, 'LowerCase': codepoint, 'Properties': []}
  48. try:
  49. script = data['Script']
  50. except KeyError:
  51. script = 'Zzzz'
  52. title = data['TitleCase']
  53. upper = data['UpperCase']
  54. lower = data['LowerCase']
  55. if title == null: title = codepoint
  56. if upper == null: upper = codepoint
  57. if lower == null: lower = codepoint
  58. print '%s %s %s %s %s %s %s %s' % (
  59. codepoint, script,
  60. data['GeneralCategory'][0], data['GeneralCategory'],
  61. upper, lower, title,
  62. ' '.join(data['Properties']))