eSpeak NG is an open source speech synthesizer that supports more than hundred languages and accents.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

minimize-corpus.sh 697B

123456789101112131415161718192021222324252627282930313233343536
  1. #!/bin/bash
  2. if [[ $# -lt 1 ]]
  3. then
  4. echo "Usage: $0 <corpus-dir>"
  5. echo "Usage: $0 --preserve-txt <corpus-dir> (minimize corpus but keep .txt files intact)"
  6. exit 1
  7. fi
  8. preserve_txt=0
  9. if [[ "$1" == "--preserve-txt" ]]
  10. then
  11. preserve_txt=1
  12. CORPUS_DIR=$2
  13. else
  14. CORPUS_DIR=$1
  15. fi
  16. export FUZZ_VOICE=en
  17. FUZZER=./synth_fuzzer
  18. TMP_DIR=$(mktemp -d)
  19. echo "Merging..."
  20. `$FUZZER -merge=1 $TMP_DIR $CORPUS_DIR`
  21. echo "Removing old files..."
  22. if [[ $preserve_txt -eq 1 ]]
  23. then
  24. echo " => Preserve .txt files"
  25. rm -rvf $(find $CORPUS_DIR | grep -vE "*.txt|$CORPUS_DIR") 2>/dev/null
  26. else
  27. rm -rf $CORPUS_DIR/* 2>/dev/null
  28. fi
  29. cp $TMP_DIR/* $CORPUS_DIR 2>/dev/null
  30. rm -rf $TMP_DIR
  31. echo "Merging done !"