eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

mkdictlist 4.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #!/usr/bin/python
  2. #
  3. # Copyright (C) 2011 Reece H. Dunn
  4. # Licence: GPLv3
  5. #
  6. # A script for generating the dictionary Makefile rules from the files in dictsource.
  7. import sys
  8. import os
  9. voices = set()
  10. dictionaries = {}
  11. phoneme_data = set()
  12. mbrola = set()
  13. # Map voice names to dictionaries when these do not match.
  14. special_voices = {
  15. 'bs': 'hbs',
  16. 'hr': 'hbs',
  17. 'zh-yue': 'zhy',
  18. }
  19. # Support for extended dictionaries.
  20. extended_dictionaries = {
  21. 'ru': 'ru_listx',
  22. 'zh': 'zh_listx',
  23. 'zhy': 'zhy_list',
  24. }
  25. exclude_voices = []
  26. def find_voices(path):
  27. for filename in os.listdir(path):
  28. voice_path = os.path.join(path, filename)
  29. if os.path.isdir(voice_path):
  30. if not filename in ['!v', 'mb']:
  31. find_voices(voice_path)
  32. else:
  33. if filename in special_voices.keys():
  34. voices.add(special_voices[filename])
  35. elif filename not in exclude_voices:
  36. voices.add(filename)
  37. def find_phoneme_data(path):
  38. for filename in os.listdir(path):
  39. phondata_path = os.path.join(path, filename)
  40. if filename.startswith('ph_'):
  41. phoneme_data.add(phondata_path)
  42. def find_mbrola_voices(path):
  43. for filename in os.listdir(path):
  44. mbrola.add(filename)
  45. find_voices('espeak-data/voices')
  46. find_phoneme_data('phsource')
  47. find_mbrola_voices('phsource/mbrola')
  48. for filename in os.listdir('dictsource'):
  49. if filename.endswith('_rules') or filename.endswith('_list') or filename in ['bg_listx', 'it_listx']:
  50. dic, cat = filename.split('_')
  51. if dic in voices:
  52. if not dic in dictionaries.keys():
  53. dictionaries[dic] = []
  54. dictionaries[dic].append('dictsource/%s' % filename)
  55. def write_phoneme_data_rules(f):
  56. f.write('##### phoneme data:\n')
  57. f.write('\n')
  58. f.write('espeak-data/phondata: phsource/phonemes.stamp\n')
  59. f.write('espeak-data/phondata-manifest: phsource/phonemes.stamp\n')
  60. f.write('espeak-data/phonindex: phsource/phonemes.stamp\n')
  61. f.write('espeak-data/phontab: phsource/phonemes.stamp\n')
  62. f.write('espeak-data/intonations: phsource/phonemes.stamp\n')
  63. f.write('\n')
  64. f.write('phsource/phonemes.stamp: \\\n')
  65. for phonfile in sorted(phoneme_data):
  66. f.write('\t%s \\\n' % phonfile)
  67. f.write('\tphsource/phonemes \\\n')
  68. f.write('\tsrc/espeakedit\n')
  69. f.write('\tESPEAK_DATA_PATH=$(PWD) src/espeakedit --compile && touch $@\n')
  70. f.write('\n')
  71. def write_dictionary_make_rules(f):
  72. f.write('##### dictionaries:\n')
  73. f.write('\n')
  74. f.write('dictionaries: \\\n')
  75. for n, name in enumerate(sorted(dictionaries.keys())):
  76. if not name in ['bo']: # espeak fails to read these voices
  77. if n == len(dictionaries.keys()) - 1:
  78. f.write('\tespeak-data/%s_dict\n' % name)
  79. else:
  80. f.write('\tespeak-data/%s_dict \\\n' % name)
  81. for name, files in sorted(dictionaries.items()):
  82. f.write('\n')
  83. f.write('%s: espeak-data/%s_dict\n' % (name, name))
  84. f.write('dictsource/%s_extra:\n' % name)
  85. f.write('\ttouch dictsource/%s_extra\n' % name)
  86. if name in extended_dictionaries.keys():
  87. ext = extended_dictionaries[name]
  88. f.write('dictsource/%s:\n' % ext)
  89. f.write('\tln -svf extra/%s dictsource/\n' % ext)
  90. f.write('if HAVE_%s_EXTENDED_DICTIONARY\n' % name.upper())
  91. f.write('espeak-data/%s_dict: src/espeak phsource/phonemes.stamp %s dictsource/%s_extra dictsource/%s\n' % (name, ' '.join(sorted(files)), name, ext))
  92. f.write('else\n')
  93. f.write('espeak-data/%s_dict: src/espeak-ng phsource/phonemes.stamp %s dictsource/%s_extra\n' % (name, ' '.join(sorted(files)), name))
  94. if name in extended_dictionaries.keys():
  95. f.write('endif\n')
  96. f.write('\tcd dictsource && ESPEAK_DATA_PATH=$(PWD) LD_LIBRARY_PATH=../src:${LD_LIBRARY_PATH} ../src/espeak-ng --compile=%s && cd ..\n' % name)
  97. def write_mbrola_make_rules(f):
  98. f.write('\n##### mbrola:\n')
  99. f.write('\n')
  100. f.write('mbrola: \\\n')
  101. for n, name in enumerate(sorted(mbrola)):
  102. if n == len(mbrola) - 1:
  103. f.write('\tespeak-data/mbrola_ph/%s_phtrans\n' % name)
  104. else:
  105. f.write('\tespeak-data/mbrola_ph/%s_phtrans \\\n' % name)
  106. for name in sorted(mbrola):
  107. f.write('\n')
  108. f.write('espeak-data/mbrola_ph/%s_phtrans: phsource/mbrola/%s src/espeakedit\n' % (name, name))
  109. f.write('\tmkdir -p espeak-data/mbrola_ph\n')
  110. f.write('\tESPEAK_DATA_PATH=$(PWD) src/espeakedit --compile-mbrola ${PWD}/$<\n')
  111. try:
  112. filename = sys.argv[1]
  113. except:
  114. filename = None
  115. if filename:
  116. with open(filename, 'r') as f:
  117. prelude = f.read().split('##### phoneme data:\n')[0]
  118. with open(filename, 'w') as f:
  119. f.write(prelude)
  120. write_phoneme_data_rules(f)
  121. write_dictionary_make_rules(f)
  122. write_mbrola_make_rules(f)
  123. else:
  124. write_phoneme_data_rules(sys.stdout)
  125. write_dictionary_make_rules(sys.stdout)
  126. write_mbrola_make_rules(sys.stdout)