eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

iana.py 2.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #!/usr/bin/python
  2. # Copyright (C) 2012 Reece H. Dunn
  3. #
  4. # This file is part of ucd-tools.
  5. #
  6. # ucd-tools is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # ucd-tools is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with ucd-tools. If not, see <http://www.gnu.org/licenses/>.
  18. import os
  19. def read_data(path, split_char=':'):
  20. with open(path) as f:
  21. for line in f:
  22. line = line.replace('\n', '')
  23. if not line.startswith('#'):
  24. yield line.split(split_char)
  25. def fold_lines(path):
  26. next_line = None
  27. with open(path) as f:
  28. for line in f:
  29. line = line.replace('\n', '')
  30. if line.startswith(' '):
  31. next_line = '%s%s' % (next_line, line[1:])
  32. continue
  33. if next_line:
  34. yield next_line
  35. next_line = line
  36. def iana_subtag_entries(path):
  37. tag = {}
  38. for line in fold_lines(path):
  39. if line == '%%':
  40. if 'Type' in tag:
  41. yield tag
  42. tag = {}
  43. continue
  44. packed = line.split(': ')
  45. key = packed[0]
  46. value = ': '.join(packed[1:])
  47. if key == 'Description':
  48. # Only select the first Description. This handles subtag codes
  49. # that have multiple descriptions (e.g. 'es' maps to "Spanish"
  50. # and "Castilian").
  51. if not key in tag.keys():
  52. tag[key] = value
  53. else:
  54. tag[key] = value
  55. yield tag
  56. typemap = {
  57. 'extlang': 'ExtLang',
  58. 'grandfathered': 'Grandfathered',
  59. 'language': 'Language',
  60. 'redundant': 'Redundant',
  61. 'region': 'Region',
  62. 'script': 'Script',
  63. 'variant': 'Variant',
  64. }
  65. scopemap = {
  66. 'collection': 'Collection',
  67. 'macrolanguage': 'MacroLanguage',
  68. 'special': 'Special',
  69. 'private-use': 'PrivateUse',
  70. }
  71. def read_iana_subtags(path):
  72. tags = {}
  73. for tag in iana_subtag_entries(path):
  74. if 'Subtag' in tag.keys():
  75. ref = tag['Subtag']
  76. del tag['Subtag']
  77. else:
  78. ref = tag['Tag']
  79. del tag['Tag']
  80. if 'Scope' in tag.keys():
  81. if tag['Type'] != 'language':
  82. raise Exception('"Scope" property unexpected for Type="%s"' % tag['Type'])
  83. tag['Type'] = scopemap[ tag['Scope'] ]
  84. del tag['Scope']
  85. else:
  86. tag['Type'] = typemap[ tag['Type'] ]
  87. if '..' not in ref: # exclude private use definitions
  88. tags[ref] = tag
  89. return tags